2009-06-03 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / config / arm / arm.c
blob24a94a6ebfbd8a1046047f3962f322ed57f2f9f4
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-pragma.h"
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
57 /* Forward definitions of types. */
58 typedef struct minipool_node Mnode;
59 typedef struct minipool_fixup Mfix;
61 const struct attribute_spec arm_attribute_table[];
63 void (*arm_lang_output_object_attributes_hook)(void);
65 /* Forward function declarations. */
66 static int arm_compute_static_chain_stack_bytes (void);
67 static arm_stack_offsets *arm_get_frame_offsets (void);
68 static void arm_add_gc_roots (void);
69 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
70 HOST_WIDE_INT, rtx, rtx, int, int);
71 static unsigned bit_count (unsigned long);
72 static int arm_address_register_rtx_p (rtx, int);
73 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
74 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
75 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
76 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
77 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
78 inline static int thumb1_index_register_rtx_p (rtx, int);
79 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
80 static int thumb_far_jump_used_p (void);
81 static bool thumb_force_lr_save (void);
82 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
83 static rtx emit_sfm (int, int);
84 static unsigned arm_size_return_regs (void);
85 static bool arm_assemble_integer (rtx, unsigned int, int);
86 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
87 static arm_cc get_arm_condition_code (rtx);
88 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
89 static rtx is_jump_table (rtx);
90 static const char *output_multi_immediate (rtx *, const char *, const char *,
91 int, HOST_WIDE_INT);
92 static const char *shift_op (rtx, HOST_WIDE_INT *);
93 static struct machine_function *arm_init_machine_status (void);
94 static void thumb_exit (FILE *, int);
95 static rtx is_jump_table (rtx);
96 static HOST_WIDE_INT get_jump_table_size (rtx);
97 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
98 static Mnode *add_minipool_forward_ref (Mfix *);
99 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
100 static Mnode *add_minipool_backward_ref (Mfix *);
101 static void assign_minipool_offsets (Mfix *);
102 static void arm_print_value (FILE *, rtx);
103 static void dump_minipool (rtx);
104 static int arm_barrier_cost (rtx);
105 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
106 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
107 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
108 rtx);
109 static void arm_reorg (void);
110 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
111 static unsigned long arm_compute_save_reg0_reg12_mask (void);
112 static unsigned long arm_compute_save_reg_mask (void);
113 static unsigned long arm_isr_value (tree);
114 static unsigned long arm_compute_func_type (void);
115 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
116 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
117 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
118 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
119 #endif
120 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
121 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
122 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
123 static int arm_comp_type_attributes (const_tree, const_tree);
124 static void arm_set_default_type_attributes (tree);
125 static int arm_adjust_cost (rtx, rtx, rtx, int);
126 static int count_insns_for_constant (HOST_WIDE_INT, int);
127 static int arm_get_strip_length (int);
128 static bool arm_function_ok_for_sibcall (tree, tree);
129 static void arm_internal_label (FILE *, const char *, unsigned long);
130 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
131 tree);
132 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
133 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
134 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
135 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
136 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
137 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
138 static bool arm_rtx_costs (rtx, int, int, int *, bool);
139 static int arm_address_cost (rtx, bool);
140 static bool arm_memory_load_p (rtx);
141 static bool arm_cirrus_insn_p (rtx);
142 static void cirrus_reorg (rtx);
143 static void arm_init_builtins (void);
144 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
145 static void arm_init_iwmmxt_builtins (void);
146 static rtx safe_vector_operand (rtx, enum machine_mode);
147 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
148 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
149 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
150 static void emit_constant_insn (rtx cond, rtx pattern);
151 static rtx emit_set_insn (rtx, rtx);
152 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
153 tree, bool);
155 #ifdef OBJECT_FORMAT_ELF
156 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
157 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
158 #endif
159 #ifndef ARM_PE
160 static void arm_encode_section_info (tree, rtx, int);
161 #endif
163 static void arm_file_end (void);
164 static void arm_file_start (void);
166 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
167 tree, int *, int);
168 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
169 enum machine_mode, const_tree, bool);
170 static bool arm_promote_prototypes (const_tree);
171 static bool arm_default_short_enums (void);
172 static bool arm_align_anon_bitfield (void);
173 static bool arm_return_in_msb (const_tree);
174 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 #ifdef TARGET_UNWIND_INFO
177 static void arm_unwind_emit (FILE *, rtx);
178 static bool arm_output_ttype (rtx);
179 #endif
180 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
182 static tree arm_cxx_guard_type (void);
183 static bool arm_cxx_guard_mask_bit (void);
184 static tree arm_get_cookie_size (tree);
185 static bool arm_cookie_has_size (void);
186 static bool arm_cxx_cdtor_returns_this (void);
187 static bool arm_cxx_key_method_may_be_inline (void);
188 static void arm_cxx_determine_class_data_visibility (tree);
189 static bool arm_cxx_class_data_always_comdat (void);
190 static bool arm_cxx_use_aeabi_atexit (void);
191 static void arm_init_libfuncs (void);
192 static tree arm_build_builtin_va_list (void);
193 static void arm_expand_builtin_va_start (tree, rtx);
194 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
195 static bool arm_handle_option (size_t, const char *, int);
196 static void arm_target_help (void);
197 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
198 static bool arm_cannot_copy_insn_p (rtx);
199 static bool arm_tls_symbol_p (rtx x);
200 static int arm_issue_rate (void);
201 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
202 static bool arm_allocate_stack_slots_for_args (void);
205 /* Initialize the GCC target structure. */
206 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
207 #undef TARGET_MERGE_DECL_ATTRIBUTES
208 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
209 #endif
211 #undef TARGET_LEGITIMIZE_ADDRESS
212 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
214 #undef TARGET_ATTRIBUTE_TABLE
215 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
217 #undef TARGET_ASM_FILE_START
218 #define TARGET_ASM_FILE_START arm_file_start
219 #undef TARGET_ASM_FILE_END
220 #define TARGET_ASM_FILE_END arm_file_end
222 #undef TARGET_ASM_ALIGNED_SI_OP
223 #define TARGET_ASM_ALIGNED_SI_OP NULL
224 #undef TARGET_ASM_INTEGER
225 #define TARGET_ASM_INTEGER arm_assemble_integer
227 #undef TARGET_ASM_FUNCTION_PROLOGUE
228 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
230 #undef TARGET_ASM_FUNCTION_EPILOGUE
231 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
233 #undef TARGET_DEFAULT_TARGET_FLAGS
234 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
235 #undef TARGET_HANDLE_OPTION
236 #define TARGET_HANDLE_OPTION arm_handle_option
237 #undef TARGET_HELP
238 #define TARGET_HELP arm_target_help
240 #undef TARGET_COMP_TYPE_ATTRIBUTES
241 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
243 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
244 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
246 #undef TARGET_SCHED_ADJUST_COST
247 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
249 #undef TARGET_ENCODE_SECTION_INFO
250 #ifdef ARM_PE
251 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
252 #else
253 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
254 #endif
256 #undef TARGET_STRIP_NAME_ENCODING
257 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
259 #undef TARGET_ASM_INTERNAL_LABEL
260 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
262 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
263 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
265 #undef TARGET_ASM_OUTPUT_MI_THUNK
266 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
267 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
268 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
270 #undef TARGET_RTX_COSTS
271 #define TARGET_RTX_COSTS arm_rtx_costs
272 #undef TARGET_ADDRESS_COST
273 #define TARGET_ADDRESS_COST arm_address_cost
275 #undef TARGET_SHIFT_TRUNCATION_MASK
276 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
277 #undef TARGET_VECTOR_MODE_SUPPORTED_P
278 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
280 #undef TARGET_MACHINE_DEPENDENT_REORG
281 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS arm_init_builtins
285 #undef TARGET_EXPAND_BUILTIN
286 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
288 #undef TARGET_INIT_LIBFUNCS
289 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
291 #undef TARGET_PROMOTE_FUNCTION_ARGS
292 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
293 #undef TARGET_PROMOTE_FUNCTION_RETURN
294 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
295 #undef TARGET_PROMOTE_PROTOTYPES
296 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
297 #undef TARGET_PASS_BY_REFERENCE
298 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
299 #undef TARGET_ARG_PARTIAL_BYTES
300 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
302 #undef TARGET_SETUP_INCOMING_VARARGS
303 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
305 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
306 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
308 #undef TARGET_DEFAULT_SHORT_ENUMS
309 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
311 #undef TARGET_ALIGN_ANON_BITFIELD
312 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
314 #undef TARGET_NARROW_VOLATILE_BITFIELD
315 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
317 #undef TARGET_CXX_GUARD_TYPE
318 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
320 #undef TARGET_CXX_GUARD_MASK_BIT
321 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
323 #undef TARGET_CXX_GET_COOKIE_SIZE
324 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
326 #undef TARGET_CXX_COOKIE_HAS_SIZE
327 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
329 #undef TARGET_CXX_CDTOR_RETURNS_THIS
330 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
332 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
333 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
335 #undef TARGET_CXX_USE_AEABI_ATEXIT
336 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
338 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
339 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
340 arm_cxx_determine_class_data_visibility
342 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
343 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
345 #undef TARGET_RETURN_IN_MSB
346 #define TARGET_RETURN_IN_MSB arm_return_in_msb
348 #undef TARGET_RETURN_IN_MEMORY
349 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
351 #undef TARGET_MUST_PASS_IN_STACK
352 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
354 #ifdef TARGET_UNWIND_INFO
355 #undef TARGET_UNWIND_EMIT
356 #define TARGET_UNWIND_EMIT arm_unwind_emit
358 /* EABI unwinding tables use a different format for the typeinfo tables. */
359 #undef TARGET_ASM_TTYPE
360 #define TARGET_ASM_TTYPE arm_output_ttype
362 #undef TARGET_ARM_EABI_UNWINDER
363 #define TARGET_ARM_EABI_UNWINDER true
364 #endif /* TARGET_UNWIND_INFO */
366 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
367 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
369 #undef TARGET_CANNOT_COPY_INSN_P
370 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
372 #ifdef HAVE_AS_TLS
373 #undef TARGET_HAVE_TLS
374 #define TARGET_HAVE_TLS true
375 #endif
377 #undef TARGET_CANNOT_FORCE_CONST_MEM
378 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
380 #undef TARGET_MAX_ANCHOR_OFFSET
381 #define TARGET_MAX_ANCHOR_OFFSET 4095
383 /* The minimum is set such that the total size of the block
384 for a particular anchor is -4088 + 1 + 4095 bytes, which is
385 divisible by eight, ensuring natural spacing of anchors. */
386 #undef TARGET_MIN_ANCHOR_OFFSET
387 #define TARGET_MIN_ANCHOR_OFFSET -4088
389 #undef TARGET_SCHED_ISSUE_RATE
390 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
392 #undef TARGET_MANGLE_TYPE
393 #define TARGET_MANGLE_TYPE arm_mangle_type
395 #undef TARGET_BUILD_BUILTIN_VA_LIST
396 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
397 #undef TARGET_EXPAND_BUILTIN_VA_START
398 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
399 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
400 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
402 #ifdef HAVE_AS_TLS
403 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
404 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
405 #endif
407 #undef TARGET_LEGITIMATE_ADDRESS_P
408 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
410 struct gcc_target targetm = TARGET_INITIALIZER;
412 /* Obstack for minipool constant handling. */
413 static struct obstack minipool_obstack;
414 static char * minipool_startobj;
416 /* The maximum number of insns skipped which
417 will be conditionalised if possible. */
418 static int max_insns_skipped = 5;
420 extern FILE * asm_out_file;
422 /* True if we are currently building a constant table. */
423 int making_const_table;
425 /* The processor for which instructions should be scheduled. */
426 enum processor_type arm_tune = arm_none;
428 /* The default processor used if not overridden by commandline. */
429 static enum processor_type arm_default_cpu = arm_none;
431 /* Which floating point model to use. */
432 enum arm_fp_model arm_fp_model;
434 /* Which floating point hardware is available. */
435 enum fputype arm_fpu_arch;
437 /* Which floating point hardware to schedule for. */
438 enum fputype arm_fpu_tune;
440 /* Whether to use floating point hardware. */
441 enum float_abi_type arm_float_abi;
443 /* Which ABI to use. */
444 enum arm_abi_type arm_abi;
446 /* Which thread pointer model to use. */
447 enum arm_tp_type target_thread_pointer = TP_AUTO;
449 /* Used to parse -mstructure_size_boundary command line option. */
450 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
452 /* Used for Thumb call_via trampolines. */
453 rtx thumb_call_via_label[14];
454 static int thumb_call_reg_needed;
456 /* Bit values used to identify processor capabilities. */
457 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
458 #define FL_ARCH3M (1 << 1) /* Extended multiply */
459 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
460 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
461 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
462 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
463 #define FL_THUMB (1 << 6) /* Thumb aware */
464 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
465 #define FL_STRONG (1 << 8) /* StrongARM */
466 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
467 #define FL_XSCALE (1 << 10) /* XScale */
468 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
469 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
470 media instructions. */
471 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
472 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
473 Note: ARM6 & 7 derivatives only. */
474 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
475 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
476 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
477 profile. */
478 #define FL_DIV (1 << 18) /* Hardware divide. */
479 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
480 #define FL_NEON (1 << 20) /* Neon instructions. */
482 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
484 #define FL_FOR_ARCH2 FL_NOTM
485 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
486 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
487 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
488 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
489 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
490 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
491 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
492 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
493 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
494 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
495 #define FL_FOR_ARCH6J FL_FOR_ARCH6
496 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
497 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
498 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
499 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
500 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
501 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
502 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
503 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
504 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
506 /* The bits in this mask specify which
507 instructions we are allowed to generate. */
508 static unsigned long insn_flags = 0;
510 /* The bits in this mask specify which instruction scheduling options should
511 be used. */
512 static unsigned long tune_flags = 0;
514 /* The following are used in the arm.md file as equivalents to bits
515 in the above two flag variables. */
517 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
518 int arm_arch3m = 0;
520 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
521 int arm_arch4 = 0;
523 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
524 int arm_arch4t = 0;
526 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
527 int arm_arch5 = 0;
529 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
530 int arm_arch5e = 0;
532 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
533 int arm_arch6 = 0;
535 /* Nonzero if this chip supports the ARM 6K extensions. */
536 int arm_arch6k = 0;
538 /* Nonzero if instructions not present in the 'M' profile can be used. */
539 int arm_arch_notm = 0;
541 /* Nonzero if this chip can benefit from load scheduling. */
542 int arm_ld_sched = 0;
544 /* Nonzero if this chip is a StrongARM. */
545 int arm_tune_strongarm = 0;
547 /* Nonzero if this chip is a Cirrus variant. */
548 int arm_arch_cirrus = 0;
550 /* Nonzero if this chip supports Intel Wireless MMX technology. */
551 int arm_arch_iwmmxt = 0;
553 /* Nonzero if this chip is an XScale. */
554 int arm_arch_xscale = 0;
556 /* Nonzero if tuning for XScale */
557 int arm_tune_xscale = 0;
559 /* Nonzero if we want to tune for stores that access the write-buffer.
560 This typically means an ARM6 or ARM7 with MMU or MPU. */
561 int arm_tune_wbuf = 0;
563 /* Nonzero if tuning for Cortex-A9. */
564 int arm_tune_cortex_a9 = 0;
566 /* Nonzero if generating Thumb instructions. */
567 int thumb_code = 0;
569 /* Nonzero if we should define __THUMB_INTERWORK__ in the
570 preprocessor.
571 XXX This is a bit of a hack, it's intended to help work around
572 problems in GLD which doesn't understand that armv5t code is
573 interworking clean. */
574 int arm_cpp_interwork = 0;
576 /* Nonzero if chip supports Thumb 2. */
577 int arm_arch_thumb2;
579 /* Nonzero if chip supports integer division instruction. */
580 int arm_arch_hwdiv;
582 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
583 must report the mode of the memory reference from PRINT_OPERAND to
584 PRINT_OPERAND_ADDRESS. */
585 enum machine_mode output_memory_reference_mode;
587 /* The register number to be used for the PIC offset register. */
588 unsigned arm_pic_register = INVALID_REGNUM;
590 /* Set to 1 after arm_reorg has started. Reset to start at the start of
591 the next function. */
592 static int after_arm_reorg = 0;
594 /* The maximum number of insns to be used when loading a constant. */
595 static int arm_constant_limit = 3;
597 /* For an explanation of these variables, see final_prescan_insn below. */
598 int arm_ccfsm_state;
599 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
600 enum arm_cond_code arm_current_cc;
601 rtx arm_target_insn;
602 int arm_target_label;
603 /* The number of conditionally executed insns, including the current insn. */
604 int arm_condexec_count = 0;
605 /* A bitmask specifying the patterns for the IT block.
606 Zero means do not output an IT block before this insn. */
607 int arm_condexec_mask = 0;
608 /* The number of bits used in arm_condexec_mask. */
609 int arm_condexec_masklen = 0;
611 /* The condition codes of the ARM, and the inverse function. */
612 static const char * const arm_condition_codes[] =
614 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
615 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
618 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
619 #define streq(string1, string2) (strcmp (string1, string2) == 0)
621 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
622 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
623 | (1 << PIC_OFFSET_TABLE_REGNUM)))
625 /* Initialization code. */
627 struct processors
629 const char *const name;
630 enum processor_type core;
631 const char *arch;
632 const unsigned long flags;
633 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
636 /* Not all of these give usefully different compilation alternatives,
637 but there is no simple way of generalizing them. */
638 static const struct processors all_cores[] =
640 /* ARM Cores */
641 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
642 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
643 #include "arm-cores.def"
644 #undef ARM_CORE
645 {NULL, arm_none, NULL, 0, NULL}
648 static const struct processors all_architectures[] =
650 /* ARM Architectures */
651 /* We don't specify rtx_costs here as it will be figured out
652 from the core. */
654 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
655 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
656 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
657 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
658 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
659 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
660 implementations that support it, so we will leave it out for now. */
661 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
662 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
663 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
664 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
665 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
666 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
667 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
668 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
669 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
670 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
671 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
672 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
673 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
674 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
675 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
676 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
677 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
678 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
679 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
680 {NULL, arm_none, NULL, 0 , NULL}
683 struct arm_cpu_select
685 const char * string;
686 const char * name;
687 const struct processors * processors;
690 /* This is a magic structure. The 'string' field is magically filled in
691 with a pointer to the value specified by the user on the command line
692 assuming that the user has specified such a value. */
694 static struct arm_cpu_select arm_select[] =
696 /* string name processors */
697 { NULL, "-mcpu=", all_cores },
698 { NULL, "-march=", all_architectures },
699 { NULL, "-mtune=", all_cores }
702 /* Defines representing the indexes into the above table. */
703 #define ARM_OPT_SET_CPU 0
704 #define ARM_OPT_SET_ARCH 1
705 #define ARM_OPT_SET_TUNE 2
707 /* The name of the preprocessor macro to define for this architecture. */
709 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
711 struct fpu_desc
713 const char * name;
714 enum fputype fpu;
718 /* Available values for -mfpu=. */
720 static const struct fpu_desc all_fpus[] =
722 {"fpa", FPUTYPE_FPA},
723 {"fpe2", FPUTYPE_FPA_EMU2},
724 {"fpe3", FPUTYPE_FPA_EMU2},
725 {"maverick", FPUTYPE_MAVERICK},
726 {"vfp", FPUTYPE_VFP},
727 {"vfp3", FPUTYPE_VFP3},
728 {"vfpv3", FPUTYPE_VFP3},
729 {"vfpv3-d16", FPUTYPE_VFP3D16},
730 {"neon", FPUTYPE_NEON}
734 /* Floating point models used by the different hardware.
735 See fputype in arm.h. */
737 static const enum arm_fp_model fp_model_for_fpu[] =
739 /* No FP hardware. */
740 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
741 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
742 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
743 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
744 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
745 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
746 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
747 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
748 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
752 struct float_abi
754 const char * name;
755 enum float_abi_type abi_type;
759 /* Available values for -mfloat-abi=. */
761 static const struct float_abi all_float_abis[] =
763 {"soft", ARM_FLOAT_ABI_SOFT},
764 {"softfp", ARM_FLOAT_ABI_SOFTFP},
765 {"hard", ARM_FLOAT_ABI_HARD}
769 struct abi_name
771 const char *name;
772 enum arm_abi_type abi_type;
776 /* Available values for -mabi=. */
778 static const struct abi_name arm_all_abis[] =
780 {"apcs-gnu", ARM_ABI_APCS},
781 {"atpcs", ARM_ABI_ATPCS},
782 {"aapcs", ARM_ABI_AAPCS},
783 {"iwmmxt", ARM_ABI_IWMMXT},
784 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
787 /* Supported TLS relocations. */
789 enum tls_reloc {
790 TLS_GD32,
791 TLS_LDM32,
792 TLS_LDO32,
793 TLS_IE32,
794 TLS_LE32
797 /* Emit an insn that's a simple single-set. Both the operands must be known
798 to be valid. */
799 inline static rtx
800 emit_set_insn (rtx x, rtx y)
802 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
805 /* Return the number of bits set in VALUE. */
806 static unsigned
807 bit_count (unsigned long value)
809 unsigned long count = 0;
811 while (value)
813 count++;
814 value &= value - 1; /* Clear the least-significant set bit. */
817 return count;
820 /* Set up library functions unique to ARM. */
822 static void
823 arm_init_libfuncs (void)
825 /* There are no special library functions unless we are using the
826 ARM BPABI. */
827 if (!TARGET_BPABI)
828 return;
830 /* The functions below are described in Section 4 of the "Run-Time
831 ABI for the ARM architecture", Version 1.0. */
833 /* Double-precision floating-point arithmetic. Table 2. */
834 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
835 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
836 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
837 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
838 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
840 /* Double-precision comparisons. Table 3. */
841 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
842 set_optab_libfunc (ne_optab, DFmode, NULL);
843 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
844 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
845 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
846 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
847 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
849 /* Single-precision floating-point arithmetic. Table 4. */
850 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
851 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
852 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
853 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
854 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
856 /* Single-precision comparisons. Table 5. */
857 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
858 set_optab_libfunc (ne_optab, SFmode, NULL);
859 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
860 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
861 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
862 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
863 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
865 /* Floating-point to integer conversions. Table 6. */
866 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
867 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
868 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
869 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
870 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
871 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
872 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
873 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
875 /* Conversions between floating types. Table 7. */
876 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
877 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
879 /* Integer to floating-point conversions. Table 8. */
880 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
881 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
882 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
883 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
884 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
885 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
886 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
887 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
889 /* Long long. Table 9. */
890 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
891 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
892 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
893 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
894 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
895 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
896 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
897 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
899 /* Integer (32/32->32) division. \S 4.3.1. */
900 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
901 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
903 /* The divmod functions are designed so that they can be used for
904 plain division, even though they return both the quotient and the
905 remainder. The quotient is returned in the usual location (i.e.,
906 r0 for SImode, {r0, r1} for DImode), just as would be expected
907 for an ordinary division routine. Because the AAPCS calling
908 conventions specify that all of { r0, r1, r2, r3 } are
909 callee-saved registers, there is no need to tell the compiler
910 explicitly that those registers are clobbered by these
911 routines. */
912 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
913 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
915 /* For SImode division the ABI provides div-without-mod routines,
916 which are faster. */
917 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
918 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
920 /* We don't have mod libcalls. Fortunately gcc knows how to use the
921 divmod libcalls instead. */
922 set_optab_libfunc (smod_optab, DImode, NULL);
923 set_optab_libfunc (umod_optab, DImode, NULL);
924 set_optab_libfunc (smod_optab, SImode, NULL);
925 set_optab_libfunc (umod_optab, SImode, NULL);
928 /* On AAPCS systems, this is the "struct __va_list". */
929 static GTY(()) tree va_list_type;
931 /* Return the type to use as __builtin_va_list. */
932 static tree
933 arm_build_builtin_va_list (void)
935 tree va_list_name;
936 tree ap_field;
938 if (!TARGET_AAPCS_BASED)
939 return std_build_builtin_va_list ();
941 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
942 defined as:
944 struct __va_list
946 void *__ap;
949 The C Library ABI further reinforces this definition in \S
950 4.1.
952 We must follow this definition exactly. The structure tag
953 name is visible in C++ mangled names, and thus forms a part
954 of the ABI. The field name may be used by people who
955 #include <stdarg.h>. */
956 /* Create the type. */
957 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
958 /* Give it the required name. */
959 va_list_name = build_decl (TYPE_DECL,
960 get_identifier ("__va_list"),
961 va_list_type);
962 DECL_ARTIFICIAL (va_list_name) = 1;
963 TYPE_NAME (va_list_type) = va_list_name;
964 /* Create the __ap field. */
965 ap_field = build_decl (FIELD_DECL,
966 get_identifier ("__ap"),
967 ptr_type_node);
968 DECL_ARTIFICIAL (ap_field) = 1;
969 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
970 TYPE_FIELDS (va_list_type) = ap_field;
971 /* Compute its layout. */
972 layout_type (va_list_type);
974 return va_list_type;
977 /* Return an expression of type "void *" pointing to the next
978 available argument in a variable-argument list. VALIST is the
979 user-level va_list object, of type __builtin_va_list. */
980 static tree
981 arm_extract_valist_ptr (tree valist)
983 if (TREE_TYPE (valist) == error_mark_node)
984 return error_mark_node;
986 /* On an AAPCS target, the pointer is stored within "struct
987 va_list". */
988 if (TARGET_AAPCS_BASED)
990 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
991 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
992 valist, ap_field, NULL_TREE);
995 return valist;
998 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
999 static void
1000 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1002 valist = arm_extract_valist_ptr (valist);
1003 std_expand_builtin_va_start (valist, nextarg);
1006 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1007 static tree
1008 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1009 gimple_seq *post_p)
1011 valist = arm_extract_valist_ptr (valist);
1012 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1015 /* Implement TARGET_HANDLE_OPTION. */
1017 static bool
1018 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1020 switch (code)
1022 case OPT_march_:
1023 arm_select[1].string = arg;
1024 return true;
1026 case OPT_mcpu_:
1027 arm_select[0].string = arg;
1028 return true;
1030 case OPT_mhard_float:
1031 target_float_abi_name = "hard";
1032 return true;
1034 case OPT_msoft_float:
1035 target_float_abi_name = "soft";
1036 return true;
1038 case OPT_mtune_:
1039 arm_select[2].string = arg;
1040 return true;
1042 default:
1043 return true;
1047 static void
1048 arm_target_help (void)
1050 int i;
1051 static int columns = 0;
1052 int remaining;
1054 /* If we have not done so already, obtain the desired maximum width of
1055 the output. Note - this is a duplication of the code at the start of
1056 gcc/opts.c:print_specific_help() - the two copies should probably be
1057 replaced by a single function. */
1058 if (columns == 0)
1060 const char *p;
1062 GET_ENVIRONMENT (p, "COLUMNS");
1063 if (p != NULL)
1065 int value = atoi (p);
1067 if (value > 0)
1068 columns = value;
1071 if (columns == 0)
1072 /* Use a reasonable default. */
1073 columns = 80;
1076 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1078 /* The - 2 is because we know that the last entry in the array is NULL. */
1079 i = ARRAY_SIZE (all_cores) - 2;
1080 gcc_assert (i > 0);
1081 printf (" %s", all_cores[i].name);
1082 remaining = columns - (strlen (all_cores[i].name) + 4);
1083 gcc_assert (remaining >= 0);
1085 while (i--)
1087 int len = strlen (all_cores[i].name);
1089 if (remaining > len + 2)
1091 printf (", %s", all_cores[i].name);
1092 remaining -= len + 2;
1094 else
1096 if (remaining > 0)
1097 printf (",");
1098 printf ("\n %s", all_cores[i].name);
1099 remaining = columns - (len + 4);
1103 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1105 i = ARRAY_SIZE (all_architectures) - 2;
1106 gcc_assert (i > 0);
1108 printf (" %s", all_architectures[i].name);
1109 remaining = columns - (strlen (all_architectures[i].name) + 4);
1110 gcc_assert (remaining >= 0);
1112 while (i--)
1114 int len = strlen (all_architectures[i].name);
1116 if (remaining > len + 2)
1118 printf (", %s", all_architectures[i].name);
1119 remaining -= len + 2;
1121 else
1123 if (remaining > 0)
1124 printf (",");
1125 printf ("\n %s", all_architectures[i].name);
1126 remaining = columns - (len + 4);
1129 printf ("\n");
1133 /* Fix up any incompatible options that the user has specified.
1134 This has now turned into a maze. */
1135 void
1136 arm_override_options (void)
1138 unsigned i;
1139 enum processor_type target_arch_cpu = arm_none;
1140 enum processor_type selected_cpu = arm_none;
1142 /* Set up the flags based on the cpu/architecture selected by the user. */
1143 for (i = ARRAY_SIZE (arm_select); i--;)
1145 struct arm_cpu_select * ptr = arm_select + i;
1147 if (ptr->string != NULL && ptr->string[0] != '\0')
1149 const struct processors * sel;
1151 for (sel = ptr->processors; sel->name != NULL; sel++)
1152 if (streq (ptr->string, sel->name))
1154 /* Set the architecture define. */
1155 if (i != ARM_OPT_SET_TUNE)
1156 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1158 /* Determine the processor core for which we should
1159 tune code-generation. */
1160 if (/* -mcpu= is a sensible default. */
1161 i == ARM_OPT_SET_CPU
1162 /* -mtune= overrides -mcpu= and -march=. */
1163 || i == ARM_OPT_SET_TUNE)
1164 arm_tune = (enum processor_type) (sel - ptr->processors);
1166 /* Remember the CPU associated with this architecture.
1167 If no other option is used to set the CPU type,
1168 we'll use this to guess the most suitable tuning
1169 options. */
1170 if (i == ARM_OPT_SET_ARCH)
1171 target_arch_cpu = sel->core;
1173 if (i == ARM_OPT_SET_CPU)
1174 selected_cpu = (enum processor_type) (sel - ptr->processors);
1176 if (i != ARM_OPT_SET_TUNE)
1178 /* If we have been given an architecture and a processor
1179 make sure that they are compatible. We only generate
1180 a warning though, and we prefer the CPU over the
1181 architecture. */
1182 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1183 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1184 ptr->string);
1186 insn_flags = sel->flags;
1189 break;
1192 if (sel->name == NULL)
1193 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1197 /* Guess the tuning options from the architecture if necessary. */
1198 if (arm_tune == arm_none)
1199 arm_tune = target_arch_cpu;
1201 /* If the user did not specify a processor, choose one for them. */
1202 if (insn_flags == 0)
1204 const struct processors * sel;
1205 unsigned int sought;
1207 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1208 if (selected_cpu == arm_none)
1210 #ifdef SUBTARGET_CPU_DEFAULT
1211 /* Use the subtarget default CPU if none was specified by
1212 configure. */
1213 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1214 #endif
1215 /* Default to ARM6. */
1216 if (selected_cpu == arm_none)
1217 selected_cpu = arm6;
1219 sel = &all_cores[selected_cpu];
1221 insn_flags = sel->flags;
1223 /* Now check to see if the user has specified some command line
1224 switch that require certain abilities from the cpu. */
1225 sought = 0;
1227 if (TARGET_INTERWORK || TARGET_THUMB)
1229 sought |= (FL_THUMB | FL_MODE32);
1231 /* There are no ARM processors that support both APCS-26 and
1232 interworking. Therefore we force FL_MODE26 to be removed
1233 from insn_flags here (if it was set), so that the search
1234 below will always be able to find a compatible processor. */
1235 insn_flags &= ~FL_MODE26;
1238 if (sought != 0 && ((sought & insn_flags) != sought))
1240 /* Try to locate a CPU type that supports all of the abilities
1241 of the default CPU, plus the extra abilities requested by
1242 the user. */
1243 for (sel = all_cores; sel->name != NULL; sel++)
1244 if ((sel->flags & sought) == (sought | insn_flags))
1245 break;
1247 if (sel->name == NULL)
1249 unsigned current_bit_count = 0;
1250 const struct processors * best_fit = NULL;
1252 /* Ideally we would like to issue an error message here
1253 saying that it was not possible to find a CPU compatible
1254 with the default CPU, but which also supports the command
1255 line options specified by the programmer, and so they
1256 ought to use the -mcpu=<name> command line option to
1257 override the default CPU type.
1259 If we cannot find a cpu that has both the
1260 characteristics of the default cpu and the given
1261 command line options we scan the array again looking
1262 for a best match. */
1263 for (sel = all_cores; sel->name != NULL; sel++)
1264 if ((sel->flags & sought) == sought)
1266 unsigned count;
1268 count = bit_count (sel->flags & insn_flags);
1270 if (count >= current_bit_count)
1272 best_fit = sel;
1273 current_bit_count = count;
1277 gcc_assert (best_fit);
1278 sel = best_fit;
1281 insn_flags = sel->flags;
1283 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1284 arm_default_cpu = (enum processor_type) (sel - all_cores);
1285 if (arm_tune == arm_none)
1286 arm_tune = arm_default_cpu;
1289 /* The processor for which we should tune should now have been
1290 chosen. */
1291 gcc_assert (arm_tune != arm_none);
1293 tune_flags = all_cores[(int)arm_tune].flags;
1295 if (target_abi_name)
1297 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1299 if (streq (arm_all_abis[i].name, target_abi_name))
1301 arm_abi = arm_all_abis[i].abi_type;
1302 break;
1305 if (i == ARRAY_SIZE (arm_all_abis))
1306 error ("invalid ABI option: -mabi=%s", target_abi_name);
1308 else
1309 arm_abi = ARM_DEFAULT_ABI;
1311 /* Make sure that the processor choice does not conflict with any of the
1312 other command line choices. */
1313 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1314 error ("target CPU does not support ARM mode");
1316 /* BPABI targets use linker tricks to allow interworking on cores
1317 without thumb support. */
1318 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1320 warning (0, "target CPU does not support interworking" );
1321 target_flags &= ~MASK_INTERWORK;
1324 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1326 warning (0, "target CPU does not support THUMB instructions");
1327 target_flags &= ~MASK_THUMB;
1330 if (TARGET_APCS_FRAME && TARGET_THUMB)
1332 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1333 target_flags &= ~MASK_APCS_FRAME;
1336 /* Callee super interworking implies thumb interworking. Adding
1337 this to the flags here simplifies the logic elsewhere. */
1338 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1339 target_flags |= MASK_INTERWORK;
1341 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1342 from here where no function is being compiled currently. */
1343 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1344 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1346 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1347 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1349 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1350 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1352 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1354 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1355 target_flags |= MASK_APCS_FRAME;
1358 if (TARGET_POKE_FUNCTION_NAME)
1359 target_flags |= MASK_APCS_FRAME;
1361 if (TARGET_APCS_REENT && flag_pic)
1362 error ("-fpic and -mapcs-reent are incompatible");
1364 if (TARGET_APCS_REENT)
1365 warning (0, "APCS reentrant code not supported. Ignored");
1367 /* If this target is normally configured to use APCS frames, warn if they
1368 are turned off and debugging is turned on. */
1369 if (TARGET_ARM
1370 && write_symbols != NO_DEBUG
1371 && !TARGET_APCS_FRAME
1372 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1373 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1375 if (TARGET_APCS_FLOAT)
1376 warning (0, "passing floating point arguments in fp regs not yet supported");
1378 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1379 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1380 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1381 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1382 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1383 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1384 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1385 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1386 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1387 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1388 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1389 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1391 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1392 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1393 thumb_code = (TARGET_ARM == 0);
1394 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1395 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1396 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1397 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1398 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1400 /* If we are not using the default (ARM mode) section anchor offset
1401 ranges, then set the correct ranges now. */
1402 if (TARGET_THUMB1)
1404 /* Thumb-1 LDR instructions cannot have negative offsets.
1405 Permissible positive offset ranges are 5-bit (for byte loads),
1406 6-bit (for halfword loads), or 7-bit (for word loads).
1407 Empirical results suggest a 7-bit anchor range gives the best
1408 overall code size. */
1409 targetm.min_anchor_offset = 0;
1410 targetm.max_anchor_offset = 127;
1412 else if (TARGET_THUMB2)
1414 /* The minimum is set such that the total size of the block
1415 for a particular anchor is 248 + 1 + 4095 bytes, which is
1416 divisible by eight, ensuring natural spacing of anchors. */
1417 targetm.min_anchor_offset = -248;
1418 targetm.max_anchor_offset = 4095;
1421 /* V5 code we generate is completely interworking capable, so we turn off
1422 TARGET_INTERWORK here to avoid many tests later on. */
1424 /* XXX However, we must pass the right pre-processor defines to CPP
1425 or GLD can get confused. This is a hack. */
1426 if (TARGET_INTERWORK)
1427 arm_cpp_interwork = 1;
1429 if (arm_arch5)
1430 target_flags &= ~MASK_INTERWORK;
1432 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1433 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1435 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1436 error ("iwmmxt abi requires an iwmmxt capable cpu");
1438 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1439 if (target_fpu_name == NULL && target_fpe_name != NULL)
1441 if (streq (target_fpe_name, "2"))
1442 target_fpu_name = "fpe2";
1443 else if (streq (target_fpe_name, "3"))
1444 target_fpu_name = "fpe3";
1445 else
1446 error ("invalid floating point emulation option: -mfpe=%s",
1447 target_fpe_name);
1449 if (target_fpu_name != NULL)
1451 /* The user specified a FPU. */
1452 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1454 if (streq (all_fpus[i].name, target_fpu_name))
1456 arm_fpu_arch = all_fpus[i].fpu;
1457 arm_fpu_tune = arm_fpu_arch;
1458 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1459 break;
1462 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1463 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1465 else
1467 #ifdef FPUTYPE_DEFAULT
1468 /* Use the default if it is specified for this platform. */
1469 arm_fpu_arch = FPUTYPE_DEFAULT;
1470 arm_fpu_tune = FPUTYPE_DEFAULT;
1471 #else
1472 /* Pick one based on CPU type. */
1473 /* ??? Some targets assume FPA is the default.
1474 if ((insn_flags & FL_VFP) != 0)
1475 arm_fpu_arch = FPUTYPE_VFP;
1476 else
1478 if (arm_arch_cirrus)
1479 arm_fpu_arch = FPUTYPE_MAVERICK;
1480 else
1481 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1482 #endif
1483 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1484 arm_fpu_tune = FPUTYPE_FPA;
1485 else
1486 arm_fpu_tune = arm_fpu_arch;
1487 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1488 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1491 if (target_float_abi_name != NULL)
1493 /* The user specified a FP ABI. */
1494 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1496 if (streq (all_float_abis[i].name, target_float_abi_name))
1498 arm_float_abi = all_float_abis[i].abi_type;
1499 break;
1502 if (i == ARRAY_SIZE (all_float_abis))
1503 error ("invalid floating point abi: -mfloat-abi=%s",
1504 target_float_abi_name);
1506 else
1507 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1509 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1510 sorry ("-mfloat-abi=hard and VFP");
1512 if (TARGET_AAPCS_BASED
1513 && (arm_fp_model == ARM_FP_MODEL_FPA))
1514 error ("FPA is unsupported in the AAPCS");
1516 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1517 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1518 will ever exist. GCC makes no attempt to support this combination. */
1519 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1520 sorry ("iWMMXt and hardware floating point");
1522 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1523 if (TARGET_THUMB2 && TARGET_IWMMXT)
1524 sorry ("Thumb-2 iWMMXt");
1526 /* If soft-float is specified then don't use FPU. */
1527 if (TARGET_SOFT_FLOAT)
1528 arm_fpu_arch = FPUTYPE_NONE;
1530 /* For arm2/3 there is no need to do any scheduling if there is only
1531 a floating point emulator, or we are doing software floating-point. */
1532 if ((TARGET_SOFT_FLOAT
1533 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1534 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1535 && (tune_flags & FL_MODE32) == 0)
1536 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1538 if (target_thread_switch)
1540 if (strcmp (target_thread_switch, "soft") == 0)
1541 target_thread_pointer = TP_SOFT;
1542 else if (strcmp (target_thread_switch, "auto") == 0)
1543 target_thread_pointer = TP_AUTO;
1544 else if (strcmp (target_thread_switch, "cp15") == 0)
1545 target_thread_pointer = TP_CP15;
1546 else
1547 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1550 /* Use the cp15 method if it is available. */
1551 if (target_thread_pointer == TP_AUTO)
1553 if (arm_arch6k && !TARGET_THUMB)
1554 target_thread_pointer = TP_CP15;
1555 else
1556 target_thread_pointer = TP_SOFT;
1559 if (TARGET_HARD_TP && TARGET_THUMB1)
1560 error ("can not use -mtp=cp15 with 16-bit Thumb");
1562 /* Override the default structure alignment for AAPCS ABI. */
1563 if (TARGET_AAPCS_BASED)
1564 arm_structure_size_boundary = 8;
1566 if (structure_size_string != NULL)
1568 int size = strtol (structure_size_string, NULL, 0);
1570 if (size == 8 || size == 32
1571 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1572 arm_structure_size_boundary = size;
1573 else
1574 warning (0, "structure size boundary can only be set to %s",
1575 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1578 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1580 error ("RTP PIC is incompatible with Thumb");
1581 flag_pic = 0;
1584 /* If stack checking is disabled, we can use r10 as the PIC register,
1585 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1586 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1588 if (TARGET_VXWORKS_RTP)
1589 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1590 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1593 if (flag_pic && TARGET_VXWORKS_RTP)
1594 arm_pic_register = 9;
1596 if (arm_pic_register_string != NULL)
1598 int pic_register = decode_reg_name (arm_pic_register_string);
1600 if (!flag_pic)
1601 warning (0, "-mpic-register= is useless without -fpic");
1603 /* Prevent the user from choosing an obviously stupid PIC register. */
1604 else if (pic_register < 0 || call_used_regs[pic_register]
1605 || pic_register == HARD_FRAME_POINTER_REGNUM
1606 || pic_register == STACK_POINTER_REGNUM
1607 || pic_register >= PC_REGNUM
1608 || (TARGET_VXWORKS_RTP
1609 && (unsigned int) pic_register != arm_pic_register))
1610 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1611 else
1612 arm_pic_register = pic_register;
1615 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1616 if (fix_cm3_ldrd == 2)
1618 if (selected_cpu == cortexm3)
1619 fix_cm3_ldrd = 1;
1620 else
1621 fix_cm3_ldrd = 0;
1624 /* ??? We might want scheduling for thumb2. */
1625 if (TARGET_THUMB && flag_schedule_insns)
1627 /* Don't warn since it's on by default in -O2. */
1628 flag_schedule_insns = 0;
1631 if (optimize_size)
1633 arm_constant_limit = 1;
1635 /* If optimizing for size, bump the number of instructions that we
1636 are prepared to conditionally execute (even on a StrongARM). */
1637 max_insns_skipped = 6;
1639 else
1641 /* For processors with load scheduling, it never costs more than
1642 2 cycles to load a constant, and the load scheduler may well
1643 reduce that to 1. */
1644 if (arm_ld_sched)
1645 arm_constant_limit = 1;
1647 /* On XScale the longer latency of a load makes it more difficult
1648 to achieve a good schedule, so it's faster to synthesize
1649 constants that can be done in two insns. */
1650 if (arm_tune_xscale)
1651 arm_constant_limit = 2;
1653 /* StrongARM has early execution of branches, so a sequence
1654 that is worth skipping is shorter. */
1655 if (arm_tune_strongarm)
1656 max_insns_skipped = 3;
1659 /* Register global variables with the garbage collector. */
1660 arm_add_gc_roots ();
1663 static void
1664 arm_add_gc_roots (void)
1666 gcc_obstack_init(&minipool_obstack);
1667 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1670 /* A table of known ARM exception types.
1671 For use with the interrupt function attribute. */
1673 typedef struct
1675 const char *const arg;
1676 const unsigned long return_value;
1678 isr_attribute_arg;
1680 static const isr_attribute_arg isr_attribute_args [] =
1682 { "IRQ", ARM_FT_ISR },
1683 { "irq", ARM_FT_ISR },
1684 { "FIQ", ARM_FT_FIQ },
1685 { "fiq", ARM_FT_FIQ },
1686 { "ABORT", ARM_FT_ISR },
1687 { "abort", ARM_FT_ISR },
1688 { "ABORT", ARM_FT_ISR },
1689 { "abort", ARM_FT_ISR },
1690 { "UNDEF", ARM_FT_EXCEPTION },
1691 { "undef", ARM_FT_EXCEPTION },
1692 { "SWI", ARM_FT_EXCEPTION },
1693 { "swi", ARM_FT_EXCEPTION },
1694 { NULL, ARM_FT_NORMAL }
1697 /* Returns the (interrupt) function type of the current
1698 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1700 static unsigned long
1701 arm_isr_value (tree argument)
1703 const isr_attribute_arg * ptr;
1704 const char * arg;
1706 if (!arm_arch_notm)
1707 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1709 /* No argument - default to IRQ. */
1710 if (argument == NULL_TREE)
1711 return ARM_FT_ISR;
1713 /* Get the value of the argument. */
1714 if (TREE_VALUE (argument) == NULL_TREE
1715 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1716 return ARM_FT_UNKNOWN;
1718 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1720 /* Check it against the list of known arguments. */
1721 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1722 if (streq (arg, ptr->arg))
1723 return ptr->return_value;
1725 /* An unrecognized interrupt type. */
1726 return ARM_FT_UNKNOWN;
1729 /* Computes the type of the current function. */
1731 static unsigned long
1732 arm_compute_func_type (void)
1734 unsigned long type = ARM_FT_UNKNOWN;
1735 tree a;
1736 tree attr;
1738 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1740 /* Decide if the current function is volatile. Such functions
1741 never return, and many memory cycles can be saved by not storing
1742 register values that will never be needed again. This optimization
1743 was added to speed up context switching in a kernel application. */
1744 if (optimize > 0
1745 && (TREE_NOTHROW (current_function_decl)
1746 || !(flag_unwind_tables
1747 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1748 && TREE_THIS_VOLATILE (current_function_decl))
1749 type |= ARM_FT_VOLATILE;
1751 if (cfun->static_chain_decl != NULL)
1752 type |= ARM_FT_NESTED;
1754 attr = DECL_ATTRIBUTES (current_function_decl);
1756 a = lookup_attribute ("naked", attr);
1757 if (a != NULL_TREE)
1758 type |= ARM_FT_NAKED;
1760 a = lookup_attribute ("isr", attr);
1761 if (a == NULL_TREE)
1762 a = lookup_attribute ("interrupt", attr);
1764 if (a == NULL_TREE)
1765 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1766 else
1767 type |= arm_isr_value (TREE_VALUE (a));
1769 return type;
1772 /* Returns the type of the current function. */
1774 unsigned long
1775 arm_current_func_type (void)
1777 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1778 cfun->machine->func_type = arm_compute_func_type ();
1780 return cfun->machine->func_type;
1783 bool
1784 arm_allocate_stack_slots_for_args (void)
1786 /* Naked functions should not allocate stack slots for arguments. */
1787 return !IS_NAKED (arm_current_func_type ());
1791 /* Return 1 if it is possible to return using a single instruction.
1792 If SIBLING is non-null, this is a test for a return before a sibling
1793 call. SIBLING is the call insn, so we can examine its register usage. */
1796 use_return_insn (int iscond, rtx sibling)
1798 int regno;
1799 unsigned int func_type;
1800 unsigned long saved_int_regs;
1801 unsigned HOST_WIDE_INT stack_adjust;
1802 arm_stack_offsets *offsets;
1804 /* Never use a return instruction before reload has run. */
1805 if (!reload_completed)
1806 return 0;
1808 func_type = arm_current_func_type ();
1810 /* Naked, volatile and stack alignment functions need special
1811 consideration. */
1812 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1813 return 0;
1815 /* So do interrupt functions that use the frame pointer and Thumb
1816 interrupt functions. */
1817 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1818 return 0;
1820 offsets = arm_get_frame_offsets ();
1821 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1823 /* As do variadic functions. */
1824 if (crtl->args.pretend_args_size
1825 || cfun->machine->uses_anonymous_args
1826 /* Or if the function calls __builtin_eh_return () */
1827 || crtl->calls_eh_return
1828 /* Or if the function calls alloca */
1829 || cfun->calls_alloca
1830 /* Or if there is a stack adjustment. However, if the stack pointer
1831 is saved on the stack, we can use a pre-incrementing stack load. */
1832 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1833 && stack_adjust == 4)))
1834 return 0;
1836 saved_int_regs = offsets->saved_regs_mask;
1838 /* Unfortunately, the insn
1840 ldmib sp, {..., sp, ...}
1842 triggers a bug on most SA-110 based devices, such that the stack
1843 pointer won't be correctly restored if the instruction takes a
1844 page fault. We work around this problem by popping r3 along with
1845 the other registers, since that is never slower than executing
1846 another instruction.
1848 We test for !arm_arch5 here, because code for any architecture
1849 less than this could potentially be run on one of the buggy
1850 chips. */
1851 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1853 /* Validate that r3 is a call-clobbered register (always true in
1854 the default abi) ... */
1855 if (!call_used_regs[3])
1856 return 0;
1858 /* ... that it isn't being used for a return value ... */
1859 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1860 return 0;
1862 /* ... or for a tail-call argument ... */
1863 if (sibling)
1865 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1867 if (find_regno_fusage (sibling, USE, 3))
1868 return 0;
1871 /* ... and that there are no call-saved registers in r0-r2
1872 (always true in the default ABI). */
1873 if (saved_int_regs & 0x7)
1874 return 0;
1877 /* Can't be done if interworking with Thumb, and any registers have been
1878 stacked. */
1879 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1880 return 0;
1882 /* On StrongARM, conditional returns are expensive if they aren't
1883 taken and multiple registers have been stacked. */
1884 if (iscond && arm_tune_strongarm)
1886 /* Conditional return when just the LR is stored is a simple
1887 conditional-load instruction, that's not expensive. */
1888 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1889 return 0;
1891 if (flag_pic
1892 && arm_pic_register != INVALID_REGNUM
1893 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1894 return 0;
1897 /* If there are saved registers but the LR isn't saved, then we need
1898 two instructions for the return. */
1899 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1900 return 0;
1902 /* Can't be done if any of the FPA regs are pushed,
1903 since this also requires an insn. */
1904 if (TARGET_HARD_FLOAT && TARGET_FPA)
1905 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1906 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1907 return 0;
1909 /* Likewise VFP regs. */
1910 if (TARGET_HARD_FLOAT && TARGET_VFP)
1911 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1912 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1913 return 0;
1915 if (TARGET_REALLY_IWMMXT)
1916 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1917 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1918 return 0;
1920 return 1;
1923 /* Return TRUE if int I is a valid immediate ARM constant. */
1926 const_ok_for_arm (HOST_WIDE_INT i)
1928 int lowbit;
1930 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1931 be all zero, or all one. */
1932 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1933 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1934 != ((~(unsigned HOST_WIDE_INT) 0)
1935 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1936 return FALSE;
1938 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1940 /* Fast return for 0 and small values. We must do this for zero, since
1941 the code below can't handle that one case. */
1942 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1943 return TRUE;
1945 /* Get the number of trailing zeros. */
1946 lowbit = ffs((int) i) - 1;
1948 /* Only even shifts are allowed in ARM mode so round down to the
1949 nearest even number. */
1950 if (TARGET_ARM)
1951 lowbit &= ~1;
1953 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1954 return TRUE;
1956 if (TARGET_ARM)
1958 /* Allow rotated constants in ARM mode. */
1959 if (lowbit <= 4
1960 && ((i & ~0xc000003f) == 0
1961 || (i & ~0xf000000f) == 0
1962 || (i & ~0xfc000003) == 0))
1963 return TRUE;
1965 else
1967 HOST_WIDE_INT v;
1969 /* Allow repeated pattern. */
1970 v = i & 0xff;
1971 v |= v << 16;
1972 if (i == v || i == (v | (v << 8)))
1973 return TRUE;
1976 return FALSE;
1979 /* Return true if I is a valid constant for the operation CODE. */
1980 static int
1981 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1983 if (const_ok_for_arm (i))
1984 return 1;
1986 switch (code)
1988 case PLUS:
1989 case COMPARE:
1990 case EQ:
1991 case NE:
1992 case GT:
1993 case LE:
1994 case LT:
1995 case GE:
1996 case GEU:
1997 case LTU:
1998 case GTU:
1999 case LEU:
2000 case UNORDERED:
2001 case ORDERED:
2002 case UNEQ:
2003 case UNGE:
2004 case UNLT:
2005 case UNGT:
2006 case UNLE:
2007 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2009 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2010 case XOR:
2011 return 0;
2013 case IOR:
2014 if (TARGET_THUMB2)
2015 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2016 return 0;
2018 case AND:
2019 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2021 default:
2022 gcc_unreachable ();
2026 /* Emit a sequence of insns to handle a large constant.
2027 CODE is the code of the operation required, it can be any of SET, PLUS,
2028 IOR, AND, XOR, MINUS;
2029 MODE is the mode in which the operation is being performed;
2030 VAL is the integer to operate on;
2031 SOURCE is the other operand (a register, or a null-pointer for SET);
2032 SUBTARGETS means it is safe to create scratch registers if that will
2033 either produce a simpler sequence, or we will want to cse the values.
2034 Return value is the number of insns emitted. */
2036 /* ??? Tweak this for thumb2. */
2038 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2039 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2041 rtx cond;
2043 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2044 cond = COND_EXEC_TEST (PATTERN (insn));
2045 else
2046 cond = NULL_RTX;
2048 if (subtargets || code == SET
2049 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2050 && REGNO (target) != REGNO (source)))
2052 /* After arm_reorg has been called, we can't fix up expensive
2053 constants by pushing them into memory so we must synthesize
2054 them in-line, regardless of the cost. This is only likely to
2055 be more costly on chips that have load delay slots and we are
2056 compiling without running the scheduler (so no splitting
2057 occurred before the final instruction emission).
2059 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2061 if (!after_arm_reorg
2062 && !cond
2063 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2064 1, 0)
2065 > arm_constant_limit + (code != SET)))
2067 if (code == SET)
2069 /* Currently SET is the only monadic value for CODE, all
2070 the rest are diadic. */
2071 if (TARGET_USE_MOVT)
2072 arm_emit_movpair (target, GEN_INT (val));
2073 else
2074 emit_set_insn (target, GEN_INT (val));
2076 return 1;
2078 else
2080 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2082 if (TARGET_USE_MOVT)
2083 arm_emit_movpair (temp, GEN_INT (val));
2084 else
2085 emit_set_insn (temp, GEN_INT (val));
2087 /* For MINUS, the value is subtracted from, since we never
2088 have subtraction of a constant. */
2089 if (code == MINUS)
2090 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2091 else
2092 emit_set_insn (target,
2093 gen_rtx_fmt_ee (code, mode, source, temp));
2094 return 2;
2099 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2103 /* Return the number of ARM instructions required to synthesize the given
2104 constant. */
2105 static int
2106 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2108 HOST_WIDE_INT temp1;
2109 int num_insns = 0;
2112 int end;
2114 if (i <= 0)
2115 i += 32;
2116 if (remainder & (3 << (i - 2)))
2118 end = i - 8;
2119 if (end < 0)
2120 end += 32;
2121 temp1 = remainder & ((0x0ff << end)
2122 | ((i < end) ? (0xff >> (32 - end)) : 0));
2123 remainder &= ~temp1;
2124 num_insns++;
2125 i -= 6;
2127 i -= 2;
2128 } while (remainder);
2129 return num_insns;
2132 /* Emit an instruction with the indicated PATTERN. If COND is
2133 non-NULL, conditionalize the execution of the instruction on COND
2134 being true. */
2136 static void
2137 emit_constant_insn (rtx cond, rtx pattern)
2139 if (cond)
2140 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2141 emit_insn (pattern);
2144 /* As above, but extra parameter GENERATE which, if clear, suppresses
2145 RTL generation. */
2146 /* ??? This needs more work for thumb2. */
2148 static int
2149 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2150 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2151 int generate)
2153 int can_invert = 0;
2154 int can_negate = 0;
2155 int can_negate_initial = 0;
2156 int can_shift = 0;
2157 int i;
2158 int num_bits_set = 0;
2159 int set_sign_bit_copies = 0;
2160 int clear_sign_bit_copies = 0;
2161 int clear_zero_bit_copies = 0;
2162 int set_zero_bit_copies = 0;
2163 int insns = 0;
2164 unsigned HOST_WIDE_INT temp1, temp2;
2165 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2167 /* Find out which operations are safe for a given CODE. Also do a quick
2168 check for degenerate cases; these can occur when DImode operations
2169 are split. */
2170 switch (code)
2172 case SET:
2173 can_invert = 1;
2174 can_shift = 1;
2175 can_negate = 1;
2176 break;
2178 case PLUS:
2179 can_negate = 1;
2180 can_negate_initial = 1;
2181 break;
2183 case IOR:
2184 if (remainder == 0xffffffff)
2186 if (generate)
2187 emit_constant_insn (cond,
2188 gen_rtx_SET (VOIDmode, target,
2189 GEN_INT (ARM_SIGN_EXTEND (val))));
2190 return 1;
2193 if (remainder == 0)
2195 if (reload_completed && rtx_equal_p (target, source))
2196 return 0;
2198 if (generate)
2199 emit_constant_insn (cond,
2200 gen_rtx_SET (VOIDmode, target, source));
2201 return 1;
2204 if (TARGET_THUMB2)
2205 can_invert = 1;
2206 break;
2208 case AND:
2209 if (remainder == 0)
2211 if (generate)
2212 emit_constant_insn (cond,
2213 gen_rtx_SET (VOIDmode, target, const0_rtx));
2214 return 1;
2216 if (remainder == 0xffffffff)
2218 if (reload_completed && rtx_equal_p (target, source))
2219 return 0;
2220 if (generate)
2221 emit_constant_insn (cond,
2222 gen_rtx_SET (VOIDmode, target, source));
2223 return 1;
2225 can_invert = 1;
2226 break;
2228 case XOR:
2229 if (remainder == 0)
2231 if (reload_completed && rtx_equal_p (target, source))
2232 return 0;
2233 if (generate)
2234 emit_constant_insn (cond,
2235 gen_rtx_SET (VOIDmode, target, source));
2236 return 1;
2239 /* We don't know how to handle other cases yet. */
2240 gcc_assert (remainder == 0xffffffff);
2242 if (generate)
2243 emit_constant_insn (cond,
2244 gen_rtx_SET (VOIDmode, target,
2245 gen_rtx_NOT (mode, source)));
2246 return 1;
2248 case MINUS:
2249 /* We treat MINUS as (val - source), since (source - val) is always
2250 passed as (source + (-val)). */
2251 if (remainder == 0)
2253 if (generate)
2254 emit_constant_insn (cond,
2255 gen_rtx_SET (VOIDmode, target,
2256 gen_rtx_NEG (mode, source)));
2257 return 1;
2259 if (const_ok_for_arm (val))
2261 if (generate)
2262 emit_constant_insn (cond,
2263 gen_rtx_SET (VOIDmode, target,
2264 gen_rtx_MINUS (mode, GEN_INT (val),
2265 source)));
2266 return 1;
2268 can_negate = 1;
2270 break;
2272 default:
2273 gcc_unreachable ();
2276 /* If we can do it in one insn get out quickly. */
2277 if (const_ok_for_arm (val)
2278 || (can_negate_initial && const_ok_for_arm (-val))
2279 || (can_invert && const_ok_for_arm (~val)))
2281 if (generate)
2282 emit_constant_insn (cond,
2283 gen_rtx_SET (VOIDmode, target,
2284 (source
2285 ? gen_rtx_fmt_ee (code, mode, source,
2286 GEN_INT (val))
2287 : GEN_INT (val))));
2288 return 1;
2291 /* Calculate a few attributes that may be useful for specific
2292 optimizations. */
2293 /* Count number of leading zeros. */
2294 for (i = 31; i >= 0; i--)
2296 if ((remainder & (1 << i)) == 0)
2297 clear_sign_bit_copies++;
2298 else
2299 break;
2302 /* Count number of leading 1's. */
2303 for (i = 31; i >= 0; i--)
2305 if ((remainder & (1 << i)) != 0)
2306 set_sign_bit_copies++;
2307 else
2308 break;
2311 /* Count number of trailing zero's. */
2312 for (i = 0; i <= 31; i++)
2314 if ((remainder & (1 << i)) == 0)
2315 clear_zero_bit_copies++;
2316 else
2317 break;
2320 /* Count number of trailing 1's. */
2321 for (i = 0; i <= 31; i++)
2323 if ((remainder & (1 << i)) != 0)
2324 set_zero_bit_copies++;
2325 else
2326 break;
2329 switch (code)
2331 case SET:
2332 /* See if we can use movw. */
2333 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2335 if (generate)
2336 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2337 GEN_INT (val)));
2338 return 1;
2341 /* See if we can do this by sign_extending a constant that is known
2342 to be negative. This is a good, way of doing it, since the shift
2343 may well merge into a subsequent insn. */
2344 if (set_sign_bit_copies > 1)
2346 if (const_ok_for_arm
2347 (temp1 = ARM_SIGN_EXTEND (remainder
2348 << (set_sign_bit_copies - 1))))
2350 if (generate)
2352 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2353 emit_constant_insn (cond,
2354 gen_rtx_SET (VOIDmode, new_src,
2355 GEN_INT (temp1)));
2356 emit_constant_insn (cond,
2357 gen_ashrsi3 (target, new_src,
2358 GEN_INT (set_sign_bit_copies - 1)));
2360 return 2;
2362 /* For an inverted constant, we will need to set the low bits,
2363 these will be shifted out of harm's way. */
2364 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2365 if (const_ok_for_arm (~temp1))
2367 if (generate)
2369 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2370 emit_constant_insn (cond,
2371 gen_rtx_SET (VOIDmode, new_src,
2372 GEN_INT (temp1)));
2373 emit_constant_insn (cond,
2374 gen_ashrsi3 (target, new_src,
2375 GEN_INT (set_sign_bit_copies - 1)));
2377 return 2;
2381 /* See if we can calculate the value as the difference between two
2382 valid immediates. */
2383 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2385 int topshift = clear_sign_bit_copies & ~1;
2387 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2388 & (0xff000000 >> topshift));
2390 /* If temp1 is zero, then that means the 9 most significant
2391 bits of remainder were 1 and we've caused it to overflow.
2392 When topshift is 0 we don't need to do anything since we
2393 can borrow from 'bit 32'. */
2394 if (temp1 == 0 && topshift != 0)
2395 temp1 = 0x80000000 >> (topshift - 1);
2397 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2399 if (const_ok_for_arm (temp2))
2401 if (generate)
2403 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2404 emit_constant_insn (cond,
2405 gen_rtx_SET (VOIDmode, new_src,
2406 GEN_INT (temp1)));
2407 emit_constant_insn (cond,
2408 gen_addsi3 (target, new_src,
2409 GEN_INT (-temp2)));
2412 return 2;
2416 /* See if we can generate this by setting the bottom (or the top)
2417 16 bits, and then shifting these into the other half of the
2418 word. We only look for the simplest cases, to do more would cost
2419 too much. Be careful, however, not to generate this when the
2420 alternative would take fewer insns. */
2421 if (val & 0xffff0000)
2423 temp1 = remainder & 0xffff0000;
2424 temp2 = remainder & 0x0000ffff;
2426 /* Overlaps outside this range are best done using other methods. */
2427 for (i = 9; i < 24; i++)
2429 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2430 && !const_ok_for_arm (temp2))
2432 rtx new_src = (subtargets
2433 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2434 : target);
2435 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2436 source, subtargets, generate);
2437 source = new_src;
2438 if (generate)
2439 emit_constant_insn
2440 (cond,
2441 gen_rtx_SET
2442 (VOIDmode, target,
2443 gen_rtx_IOR (mode,
2444 gen_rtx_ASHIFT (mode, source,
2445 GEN_INT (i)),
2446 source)));
2447 return insns + 1;
2451 /* Don't duplicate cases already considered. */
2452 for (i = 17; i < 24; i++)
2454 if (((temp1 | (temp1 >> i)) == remainder)
2455 && !const_ok_for_arm (temp1))
2457 rtx new_src = (subtargets
2458 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2459 : target);
2460 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2461 source, subtargets, generate);
2462 source = new_src;
2463 if (generate)
2464 emit_constant_insn
2465 (cond,
2466 gen_rtx_SET (VOIDmode, target,
2467 gen_rtx_IOR
2468 (mode,
2469 gen_rtx_LSHIFTRT (mode, source,
2470 GEN_INT (i)),
2471 source)));
2472 return insns + 1;
2476 break;
2478 case IOR:
2479 case XOR:
2480 /* If we have IOR or XOR, and the constant can be loaded in a
2481 single instruction, and we can find a temporary to put it in,
2482 then this can be done in two instructions instead of 3-4. */
2483 if (subtargets
2484 /* TARGET can't be NULL if SUBTARGETS is 0 */
2485 || (reload_completed && !reg_mentioned_p (target, source)))
2487 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2489 if (generate)
2491 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2493 emit_constant_insn (cond,
2494 gen_rtx_SET (VOIDmode, sub,
2495 GEN_INT (val)));
2496 emit_constant_insn (cond,
2497 gen_rtx_SET (VOIDmode, target,
2498 gen_rtx_fmt_ee (code, mode,
2499 source, sub)));
2501 return 2;
2505 if (code == XOR)
2506 break;
2508 /* Convert.
2509 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2510 and the remainder 0s for e.g. 0xfff00000)
2511 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2513 This can be done in 2 instructions by using shifts with mov or mvn.
2514 e.g. for
2515 x = x | 0xfff00000;
2516 we generate.
2517 mvn r0, r0, asl #12
2518 mvn r0, r0, lsr #12 */
2519 if (set_sign_bit_copies > 8
2520 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2522 if (generate)
2524 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2525 rtx shift = GEN_INT (set_sign_bit_copies);
2527 emit_constant_insn
2528 (cond,
2529 gen_rtx_SET (VOIDmode, sub,
2530 gen_rtx_NOT (mode,
2531 gen_rtx_ASHIFT (mode,
2532 source,
2533 shift))));
2534 emit_constant_insn
2535 (cond,
2536 gen_rtx_SET (VOIDmode, target,
2537 gen_rtx_NOT (mode,
2538 gen_rtx_LSHIFTRT (mode, sub,
2539 shift))));
2541 return 2;
2544 /* Convert
2545 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2547 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2549 For eg. r0 = r0 | 0xfff
2550 mvn r0, r0, lsr #12
2551 mvn r0, r0, asl #12
2554 if (set_zero_bit_copies > 8
2555 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2557 if (generate)
2559 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2560 rtx shift = GEN_INT (set_zero_bit_copies);
2562 emit_constant_insn
2563 (cond,
2564 gen_rtx_SET (VOIDmode, sub,
2565 gen_rtx_NOT (mode,
2566 gen_rtx_LSHIFTRT (mode,
2567 source,
2568 shift))));
2569 emit_constant_insn
2570 (cond,
2571 gen_rtx_SET (VOIDmode, target,
2572 gen_rtx_NOT (mode,
2573 gen_rtx_ASHIFT (mode, sub,
2574 shift))));
2576 return 2;
2579 /* This will never be reached for Thumb2 because orn is a valid
2580 instruction. This is for Thumb1 and the ARM 32 bit cases.
2582 x = y | constant (such that ~constant is a valid constant)
2583 Transform this to
2584 x = ~(~y & ~constant).
2586 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2588 if (generate)
2590 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2591 emit_constant_insn (cond,
2592 gen_rtx_SET (VOIDmode, sub,
2593 gen_rtx_NOT (mode, source)));
2594 source = sub;
2595 if (subtargets)
2596 sub = gen_reg_rtx (mode);
2597 emit_constant_insn (cond,
2598 gen_rtx_SET (VOIDmode, sub,
2599 gen_rtx_AND (mode, source,
2600 GEN_INT (temp1))));
2601 emit_constant_insn (cond,
2602 gen_rtx_SET (VOIDmode, target,
2603 gen_rtx_NOT (mode, sub)));
2605 return 3;
2607 break;
2609 case AND:
2610 /* See if two shifts will do 2 or more insn's worth of work. */
2611 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2613 HOST_WIDE_INT shift_mask = ((0xffffffff
2614 << (32 - clear_sign_bit_copies))
2615 & 0xffffffff);
2617 if ((remainder | shift_mask) != 0xffffffff)
2619 if (generate)
2621 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2622 insns = arm_gen_constant (AND, mode, cond,
2623 remainder | shift_mask,
2624 new_src, source, subtargets, 1);
2625 source = new_src;
2627 else
2629 rtx targ = subtargets ? NULL_RTX : target;
2630 insns = arm_gen_constant (AND, mode, cond,
2631 remainder | shift_mask,
2632 targ, source, subtargets, 0);
2636 if (generate)
2638 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2639 rtx shift = GEN_INT (clear_sign_bit_copies);
2641 emit_insn (gen_ashlsi3 (new_src, source, shift));
2642 emit_insn (gen_lshrsi3 (target, new_src, shift));
2645 return insns + 2;
2648 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2650 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2652 if ((remainder | shift_mask) != 0xffffffff)
2654 if (generate)
2656 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2658 insns = arm_gen_constant (AND, mode, cond,
2659 remainder | shift_mask,
2660 new_src, source, subtargets, 1);
2661 source = new_src;
2663 else
2665 rtx targ = subtargets ? NULL_RTX : target;
2667 insns = arm_gen_constant (AND, mode, cond,
2668 remainder | shift_mask,
2669 targ, source, subtargets, 0);
2673 if (generate)
2675 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2676 rtx shift = GEN_INT (clear_zero_bit_copies);
2678 emit_insn (gen_lshrsi3 (new_src, source, shift));
2679 emit_insn (gen_ashlsi3 (target, new_src, shift));
2682 return insns + 2;
2685 break;
2687 default:
2688 break;
2691 for (i = 0; i < 32; i++)
2692 if (remainder & (1 << i))
2693 num_bits_set++;
2695 if ((code == AND)
2696 || (code != IOR && can_invert && num_bits_set > 16))
2697 remainder = (~remainder) & 0xffffffff;
2698 else if (code == PLUS && num_bits_set > 16)
2699 remainder = (-remainder) & 0xffffffff;
2700 else
2702 can_invert = 0;
2703 can_negate = 0;
2706 /* Now try and find a way of doing the job in either two or three
2707 instructions.
2708 We start by looking for the largest block of zeros that are aligned on
2709 a 2-bit boundary, we then fill up the temps, wrapping around to the
2710 top of the word when we drop off the bottom.
2711 In the worst case this code should produce no more than four insns.
2712 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2713 best place to start. */
2715 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2716 the same. */
2718 int best_start = 0;
2719 if (!TARGET_THUMB2)
2721 int best_consecutive_zeros = 0;
2723 for (i = 0; i < 32; i += 2)
2725 int consecutive_zeros = 0;
2727 if (!(remainder & (3 << i)))
2729 while ((i < 32) && !(remainder & (3 << i)))
2731 consecutive_zeros += 2;
2732 i += 2;
2734 if (consecutive_zeros > best_consecutive_zeros)
2736 best_consecutive_zeros = consecutive_zeros;
2737 best_start = i - consecutive_zeros;
2739 i -= 2;
2743 /* So long as it won't require any more insns to do so, it's
2744 desirable to emit a small constant (in bits 0...9) in the last
2745 insn. This way there is more chance that it can be combined with
2746 a later addressing insn to form a pre-indexed load or store
2747 operation. Consider:
2749 *((volatile int *)0xe0000100) = 1;
2750 *((volatile int *)0xe0000110) = 2;
2752 We want this to wind up as:
2754 mov rA, #0xe0000000
2755 mov rB, #1
2756 str rB, [rA, #0x100]
2757 mov rB, #2
2758 str rB, [rA, #0x110]
2760 rather than having to synthesize both large constants from scratch.
2762 Therefore, we calculate how many insns would be required to emit
2763 the constant starting from `best_start', and also starting from
2764 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2765 yield a shorter sequence, we may as well use zero. */
2766 if (best_start != 0
2767 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2768 && (count_insns_for_constant (remainder, 0) <=
2769 count_insns_for_constant (remainder, best_start)))
2770 best_start = 0;
2773 /* Now start emitting the insns. */
2774 i = best_start;
2777 int end;
2779 if (i <= 0)
2780 i += 32;
2781 if (remainder & (3 << (i - 2)))
2783 end = i - 8;
2784 if (end < 0)
2785 end += 32;
2786 temp1 = remainder & ((0x0ff << end)
2787 | ((i < end) ? (0xff >> (32 - end)) : 0));
2788 remainder &= ~temp1;
2790 if (generate)
2792 rtx new_src, temp1_rtx;
2794 if (code == SET || code == MINUS)
2796 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2797 if (can_invert && code != MINUS)
2798 temp1 = ~temp1;
2800 else
2802 if (remainder && subtargets)
2803 new_src = gen_reg_rtx (mode);
2804 else
2805 new_src = target;
2806 if (can_invert)
2807 temp1 = ~temp1;
2808 else if (can_negate)
2809 temp1 = -temp1;
2812 temp1 = trunc_int_for_mode (temp1, mode);
2813 temp1_rtx = GEN_INT (temp1);
2815 if (code == SET)
2817 else if (code == MINUS)
2818 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2819 else
2820 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2822 emit_constant_insn (cond,
2823 gen_rtx_SET (VOIDmode, new_src,
2824 temp1_rtx));
2825 source = new_src;
2828 if (code == SET)
2830 can_invert = 0;
2831 code = PLUS;
2833 else if (code == MINUS)
2834 code = PLUS;
2836 insns++;
2837 if (TARGET_ARM)
2838 i -= 6;
2839 else
2840 i -= 7;
2842 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2843 shifts. */
2844 if (TARGET_ARM)
2845 i -= 2;
2846 else
2847 i--;
2849 while (remainder);
2852 return insns;
2855 /* Canonicalize a comparison so that we are more likely to recognize it.
2856 This can be done for a few constant compares, where we can make the
2857 immediate value easier to load. */
2859 enum rtx_code
2860 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2861 rtx * op1)
2863 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2864 unsigned HOST_WIDE_INT maxval;
2865 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2867 switch (code)
2869 case EQ:
2870 case NE:
2871 return code;
2873 case GT:
2874 case LE:
2875 if (i != maxval
2876 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2878 *op1 = GEN_INT (i + 1);
2879 return code == GT ? GE : LT;
2881 break;
2883 case GE:
2884 case LT:
2885 if (i != ~maxval
2886 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2888 *op1 = GEN_INT (i - 1);
2889 return code == GE ? GT : LE;
2891 break;
2893 case GTU:
2894 case LEU:
2895 if (i != ~((unsigned HOST_WIDE_INT) 0)
2896 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2898 *op1 = GEN_INT (i + 1);
2899 return code == GTU ? GEU : LTU;
2901 break;
2903 case GEU:
2904 case LTU:
2905 if (i != 0
2906 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2908 *op1 = GEN_INT (i - 1);
2909 return code == GEU ? GTU : LEU;
2911 break;
2913 default:
2914 gcc_unreachable ();
2917 return code;
2921 /* Define how to find the value returned by a function. */
2924 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2926 enum machine_mode mode;
2927 int unsignedp ATTRIBUTE_UNUSED;
2928 rtx r ATTRIBUTE_UNUSED;
2930 mode = TYPE_MODE (type);
2931 /* Promote integer types. */
2932 if (INTEGRAL_TYPE_P (type))
2933 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2935 /* Promotes small structs returned in a register to full-word size
2936 for big-endian AAPCS. */
2937 if (arm_return_in_msb (type))
2939 HOST_WIDE_INT size = int_size_in_bytes (type);
2940 if (size % UNITS_PER_WORD != 0)
2942 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2943 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2947 return LIBCALL_VALUE(mode);
2950 /* Determine the amount of memory needed to store the possible return
2951 registers of an untyped call. */
2953 arm_apply_result_size (void)
2955 int size = 16;
2957 if (TARGET_ARM)
2959 if (TARGET_HARD_FLOAT_ABI)
2961 if (TARGET_FPA)
2962 size += 12;
2963 if (TARGET_MAVERICK)
2964 size += 8;
2966 if (TARGET_IWMMXT_ABI)
2967 size += 8;
2970 return size;
2973 /* Decide whether a type should be returned in memory (true)
2974 or in a register (false). This is called as the target hook
2975 TARGET_RETURN_IN_MEMORY. */
2976 static bool
2977 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2979 HOST_WIDE_INT size;
2981 size = int_size_in_bytes (type);
2983 /* Vector values should be returned using ARM registers, not memory (unless
2984 they're over 16 bytes, which will break since we only have four
2985 call-clobbered registers to play with). */
2986 if (TREE_CODE (type) == VECTOR_TYPE)
2987 return (size < 0 || size > (4 * UNITS_PER_WORD));
2989 if (!AGGREGATE_TYPE_P (type) &&
2990 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2991 /* All simple types are returned in registers.
2992 For AAPCS, complex types are treated the same as aggregates. */
2993 return 0;
2995 if (arm_abi != ARM_ABI_APCS)
2997 /* ATPCS and later return aggregate types in memory only if they are
2998 larger than a word (or are variable size). */
2999 return (size < 0 || size > UNITS_PER_WORD);
3002 /* For the arm-wince targets we choose to be compatible with Microsoft's
3003 ARM and Thumb compilers, which always return aggregates in memory. */
3004 #ifndef ARM_WINCE
3005 /* All structures/unions bigger than one word are returned in memory.
3006 Also catch the case where int_size_in_bytes returns -1. In this case
3007 the aggregate is either huge or of variable size, and in either case
3008 we will want to return it via memory and not in a register. */
3009 if (size < 0 || size > UNITS_PER_WORD)
3010 return 1;
3012 if (TREE_CODE (type) == RECORD_TYPE)
3014 tree field;
3016 /* For a struct the APCS says that we only return in a register
3017 if the type is 'integer like' and every addressable element
3018 has an offset of zero. For practical purposes this means
3019 that the structure can have at most one non bit-field element
3020 and that this element must be the first one in the structure. */
3022 /* Find the first field, ignoring non FIELD_DECL things which will
3023 have been created by C++. */
3024 for (field = TYPE_FIELDS (type);
3025 field && TREE_CODE (field) != FIELD_DECL;
3026 field = TREE_CHAIN (field))
3027 continue;
3029 if (field == NULL)
3030 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
3032 /* Check that the first field is valid for returning in a register. */
3034 /* ... Floats are not allowed */
3035 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3036 return 1;
3038 /* ... Aggregates that are not themselves valid for returning in
3039 a register are not allowed. */
3040 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3041 return 1;
3043 /* Now check the remaining fields, if any. Only bitfields are allowed,
3044 since they are not addressable. */
3045 for (field = TREE_CHAIN (field);
3046 field;
3047 field = TREE_CHAIN (field))
3049 if (TREE_CODE (field) != FIELD_DECL)
3050 continue;
3052 if (!DECL_BIT_FIELD_TYPE (field))
3053 return 1;
3056 return 0;
3059 if (TREE_CODE (type) == UNION_TYPE)
3061 tree field;
3063 /* Unions can be returned in registers if every element is
3064 integral, or can be returned in an integer register. */
3065 for (field = TYPE_FIELDS (type);
3066 field;
3067 field = TREE_CHAIN (field))
3069 if (TREE_CODE (field) != FIELD_DECL)
3070 continue;
3072 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3073 return 1;
3075 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3076 return 1;
3079 return 0;
3081 #endif /* not ARM_WINCE */
3083 /* Return all other types in memory. */
3084 return 1;
3087 /* Indicate whether or not words of a double are in big-endian order. */
3090 arm_float_words_big_endian (void)
3092 if (TARGET_MAVERICK)
3093 return 0;
3095 /* For FPA, float words are always big-endian. For VFP, floats words
3096 follow the memory system mode. */
3098 if (TARGET_FPA)
3100 return 1;
3103 if (TARGET_VFP)
3104 return (TARGET_BIG_END ? 1 : 0);
3106 return 1;
3109 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3110 for a call to a function whose data type is FNTYPE.
3111 For a library call, FNTYPE is NULL. */
3112 void
3113 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
3114 rtx libname ATTRIBUTE_UNUSED,
3115 tree fndecl ATTRIBUTE_UNUSED)
3117 /* On the ARM, the offset starts at 0. */
3118 pcum->nregs = 0;
3119 pcum->iwmmxt_nregs = 0;
3120 pcum->can_split = true;
3122 /* Varargs vectors are treated the same as long long.
3123 named_count avoids having to change the way arm handles 'named' */
3124 pcum->named_count = 0;
3125 pcum->nargs = 0;
3127 if (TARGET_REALLY_IWMMXT && fntype)
3129 tree fn_arg;
3131 for (fn_arg = TYPE_ARG_TYPES (fntype);
3132 fn_arg;
3133 fn_arg = TREE_CHAIN (fn_arg))
3134 pcum->named_count += 1;
3136 if (! pcum->named_count)
3137 pcum->named_count = INT_MAX;
3142 /* Return true if mode/type need doubleword alignment. */
3143 bool
3144 arm_needs_doubleword_align (enum machine_mode mode, tree type)
3146 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
3147 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
3151 /* Determine where to put an argument to a function.
3152 Value is zero to push the argument on the stack,
3153 or a hard register in which to store the argument.
3155 MODE is the argument's machine mode.
3156 TYPE is the data type of the argument (as a tree).
3157 This is null for libcalls where that information may
3158 not be available.
3159 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3160 the preceding args and about the function being called.
3161 NAMED is nonzero if this argument is a named parameter
3162 (otherwise it is an extra parameter matching an ellipsis). */
3165 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3166 tree type, int named)
3168 int nregs;
3170 /* Varargs vectors are treated the same as long long.
3171 named_count avoids having to change the way arm handles 'named' */
3172 if (TARGET_IWMMXT_ABI
3173 && arm_vector_mode_supported_p (mode)
3174 && pcum->named_count > pcum->nargs + 1)
3176 if (pcum->iwmmxt_nregs <= 9)
3177 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
3178 else
3180 pcum->can_split = false;
3181 return NULL_RTX;
3185 /* Put doubleword aligned quantities in even register pairs. */
3186 if (pcum->nregs & 1
3187 && ARM_DOUBLEWORD_ALIGN
3188 && arm_needs_doubleword_align (mode, type))
3189 pcum->nregs++;
3191 if (mode == VOIDmode)
3192 /* Pick an arbitrary value for operand 2 of the call insn. */
3193 return const0_rtx;
3195 /* Only allow splitting an arg between regs and memory if all preceding
3196 args were allocated to regs. For args passed by reference we only count
3197 the reference pointer. */
3198 if (pcum->can_split)
3199 nregs = 1;
3200 else
3201 nregs = ARM_NUM_REGS2 (mode, type);
3203 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
3204 return NULL_RTX;
3206 return gen_rtx_REG (mode, pcum->nregs);
3209 static int
3210 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3211 tree type, bool named ATTRIBUTE_UNUSED)
3213 int nregs = pcum->nregs;
3215 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3216 return 0;
3218 if (NUM_ARG_REGS > nregs
3219 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3220 && pcum->can_split)
3221 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3223 return 0;
3226 /* Variable sized types are passed by reference. This is a GCC
3227 extension to the ARM ABI. */
3229 static bool
3230 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3231 enum machine_mode mode ATTRIBUTE_UNUSED,
3232 const_tree type, bool named ATTRIBUTE_UNUSED)
3234 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3237 /* Encode the current state of the #pragma [no_]long_calls. */
3238 typedef enum
3240 OFF, /* No #pragma [no_]long_calls is in effect. */
3241 LONG, /* #pragma long_calls is in effect. */
3242 SHORT /* #pragma no_long_calls is in effect. */
3243 } arm_pragma_enum;
3245 static arm_pragma_enum arm_pragma_long_calls = OFF;
3247 void
3248 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3250 arm_pragma_long_calls = LONG;
3253 void
3254 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3256 arm_pragma_long_calls = SHORT;
3259 void
3260 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3262 arm_pragma_long_calls = OFF;
3265 /* Table of machine attributes. */
3266 const struct attribute_spec arm_attribute_table[] =
3268 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3269 /* Function calls made to this symbol must be done indirectly, because
3270 it may lie outside of the 26 bit addressing range of a normal function
3271 call. */
3272 { "long_call", 0, 0, false, true, true, NULL },
3273 /* Whereas these functions are always known to reside within the 26 bit
3274 addressing range. */
3275 { "short_call", 0, 0, false, true, true, NULL },
3276 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3277 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3278 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3279 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3280 #ifdef ARM_PE
3281 /* ARM/PE has three new attributes:
3282 interfacearm - ?
3283 dllexport - for exporting a function/variable that will live in a dll
3284 dllimport - for importing a function/variable from a dll
3286 Microsoft allows multiple declspecs in one __declspec, separating
3287 them with spaces. We do NOT support this. Instead, use __declspec
3288 multiple times.
3290 { "dllimport", 0, 0, true, false, false, NULL },
3291 { "dllexport", 0, 0, true, false, false, NULL },
3292 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3293 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3294 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3295 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3296 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3297 #endif
3298 { NULL, 0, 0, false, false, false, NULL }
3301 /* Handle an attribute requiring a FUNCTION_DECL;
3302 arguments as in struct attribute_spec.handler. */
3303 static tree
3304 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3305 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3307 if (TREE_CODE (*node) != FUNCTION_DECL)
3309 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3310 name);
3311 *no_add_attrs = true;
3314 return NULL_TREE;
3317 /* Handle an "interrupt" or "isr" attribute;
3318 arguments as in struct attribute_spec.handler. */
3319 static tree
3320 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3321 bool *no_add_attrs)
3323 if (DECL_P (*node))
3325 if (TREE_CODE (*node) != FUNCTION_DECL)
3327 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3328 name);
3329 *no_add_attrs = true;
3331 /* FIXME: the argument if any is checked for type attributes;
3332 should it be checked for decl ones? */
3334 else
3336 if (TREE_CODE (*node) == FUNCTION_TYPE
3337 || TREE_CODE (*node) == METHOD_TYPE)
3339 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3341 warning (OPT_Wattributes, "%qE attribute ignored",
3342 name);
3343 *no_add_attrs = true;
3346 else if (TREE_CODE (*node) == POINTER_TYPE
3347 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3348 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3349 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3351 *node = build_variant_type_copy (*node);
3352 TREE_TYPE (*node) = build_type_attribute_variant
3353 (TREE_TYPE (*node),
3354 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3355 *no_add_attrs = true;
3357 else
3359 /* Possibly pass this attribute on from the type to a decl. */
3360 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3361 | (int) ATTR_FLAG_FUNCTION_NEXT
3362 | (int) ATTR_FLAG_ARRAY_NEXT))
3364 *no_add_attrs = true;
3365 return tree_cons (name, args, NULL_TREE);
3367 else
3369 warning (OPT_Wattributes, "%qE attribute ignored",
3370 name);
3375 return NULL_TREE;
3378 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3379 /* Handle the "notshared" attribute. This attribute is another way of
3380 requesting hidden visibility. ARM's compiler supports
3381 "__declspec(notshared)"; we support the same thing via an
3382 attribute. */
3384 static tree
3385 arm_handle_notshared_attribute (tree *node,
3386 tree name ATTRIBUTE_UNUSED,
3387 tree args ATTRIBUTE_UNUSED,
3388 int flags ATTRIBUTE_UNUSED,
3389 bool *no_add_attrs)
3391 tree decl = TYPE_NAME (*node);
3393 if (decl)
3395 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3396 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3397 *no_add_attrs = false;
3399 return NULL_TREE;
3401 #endif
3403 /* Return 0 if the attributes for two types are incompatible, 1 if they
3404 are compatible, and 2 if they are nearly compatible (which causes a
3405 warning to be generated). */
3406 static int
3407 arm_comp_type_attributes (const_tree type1, const_tree type2)
3409 int l1, l2, s1, s2;
3411 /* Check for mismatch of non-default calling convention. */
3412 if (TREE_CODE (type1) != FUNCTION_TYPE)
3413 return 1;
3415 /* Check for mismatched call attributes. */
3416 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3417 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3418 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3419 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3421 /* Only bother to check if an attribute is defined. */
3422 if (l1 | l2 | s1 | s2)
3424 /* If one type has an attribute, the other must have the same attribute. */
3425 if ((l1 != l2) || (s1 != s2))
3426 return 0;
3428 /* Disallow mixed attributes. */
3429 if ((l1 & s2) || (l2 & s1))
3430 return 0;
3433 /* Check for mismatched ISR attribute. */
3434 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3435 if (! l1)
3436 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3437 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3438 if (! l2)
3439 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3440 if (l1 != l2)
3441 return 0;
3443 return 1;
3446 /* Assigns default attributes to newly defined type. This is used to
3447 set short_call/long_call attributes for function types of
3448 functions defined inside corresponding #pragma scopes. */
3449 static void
3450 arm_set_default_type_attributes (tree type)
3452 /* Add __attribute__ ((long_call)) to all functions, when
3453 inside #pragma long_calls or __attribute__ ((short_call)),
3454 when inside #pragma no_long_calls. */
3455 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3457 tree type_attr_list, attr_name;
3458 type_attr_list = TYPE_ATTRIBUTES (type);
3460 if (arm_pragma_long_calls == LONG)
3461 attr_name = get_identifier ("long_call");
3462 else if (arm_pragma_long_calls == SHORT)
3463 attr_name = get_identifier ("short_call");
3464 else
3465 return;
3467 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3468 TYPE_ATTRIBUTES (type) = type_attr_list;
3472 /* Return true if DECL is known to be linked into section SECTION. */
3474 static bool
3475 arm_function_in_section_p (tree decl, section *section)
3477 /* We can only be certain about functions defined in the same
3478 compilation unit. */
3479 if (!TREE_STATIC (decl))
3480 return false;
3482 /* Make sure that SYMBOL always binds to the definition in this
3483 compilation unit. */
3484 if (!targetm.binds_local_p (decl))
3485 return false;
3487 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3488 if (!DECL_SECTION_NAME (decl))
3490 /* Make sure that we will not create a unique section for DECL. */
3491 if (flag_function_sections || DECL_ONE_ONLY (decl))
3492 return false;
3495 return function_section (decl) == section;
3498 /* Return nonzero if a 32-bit "long_call" should be generated for
3499 a call from the current function to DECL. We generate a long_call
3500 if the function:
3502 a. has an __attribute__((long call))
3503 or b. is within the scope of a #pragma long_calls
3504 or c. the -mlong-calls command line switch has been specified
3506 However we do not generate a long call if the function:
3508 d. has an __attribute__ ((short_call))
3509 or e. is inside the scope of a #pragma no_long_calls
3510 or f. is defined in the same section as the current function. */
3512 bool
3513 arm_is_long_call_p (tree decl)
3515 tree attrs;
3517 if (!decl)
3518 return TARGET_LONG_CALLS;
3520 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3521 if (lookup_attribute ("short_call", attrs))
3522 return false;
3524 /* For "f", be conservative, and only cater for cases in which the
3525 whole of the current function is placed in the same section. */
3526 if (!flag_reorder_blocks_and_partition
3527 && TREE_CODE (decl) == FUNCTION_DECL
3528 && arm_function_in_section_p (decl, current_function_section ()))
3529 return false;
3531 if (lookup_attribute ("long_call", attrs))
3532 return true;
3534 return TARGET_LONG_CALLS;
3537 /* Return nonzero if it is ok to make a tail-call to DECL. */
3538 static bool
3539 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3541 unsigned long func_type;
3543 if (cfun->machine->sibcall_blocked)
3544 return false;
3546 /* Never tailcall something for which we have no decl, or if we
3547 are in Thumb mode. */
3548 if (decl == NULL || TARGET_THUMB)
3549 return false;
3551 /* The PIC register is live on entry to VxWorks PLT entries, so we
3552 must make the call before restoring the PIC register. */
3553 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3554 return false;
3556 /* Cannot tail-call to long calls, since these are out of range of
3557 a branch instruction. */
3558 if (arm_is_long_call_p (decl))
3559 return false;
3561 /* If we are interworking and the function is not declared static
3562 then we can't tail-call it unless we know that it exists in this
3563 compilation unit (since it might be a Thumb routine). */
3564 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3565 return false;
3567 func_type = arm_current_func_type ();
3568 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3569 if (IS_INTERRUPT (func_type))
3570 return false;
3572 /* Never tailcall if function may be called with a misaligned SP. */
3573 if (IS_STACKALIGN (func_type))
3574 return false;
3576 /* Everything else is ok. */
3577 return true;
3581 /* Addressing mode support functions. */
3583 /* Return nonzero if X is a legitimate immediate operand when compiling
3584 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3586 legitimate_pic_operand_p (rtx x)
3588 if (GET_CODE (x) == SYMBOL_REF
3589 || (GET_CODE (x) == CONST
3590 && GET_CODE (XEXP (x, 0)) == PLUS
3591 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3592 return 0;
3594 return 1;
3597 /* Record that the current function needs a PIC register. Initialize
3598 cfun->machine->pic_reg if we have not already done so. */
3600 static void
3601 require_pic_register (void)
3603 /* A lot of the logic here is made obscure by the fact that this
3604 routine gets called as part of the rtx cost estimation process.
3605 We don't want those calls to affect any assumptions about the real
3606 function; and further, we can't call entry_of_function() until we
3607 start the real expansion process. */
3608 if (!crtl->uses_pic_offset_table)
3610 gcc_assert (can_create_pseudo_p ());
3611 if (arm_pic_register != INVALID_REGNUM)
3613 if (!cfun->machine->pic_reg)
3614 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3616 /* Play games to avoid marking the function as needing pic
3617 if we are being called as part of the cost-estimation
3618 process. */
3619 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3620 crtl->uses_pic_offset_table = 1;
3622 else
3624 rtx seq;
3626 if (!cfun->machine->pic_reg)
3627 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3629 /* Play games to avoid marking the function as needing pic
3630 if we are being called as part of the cost-estimation
3631 process. */
3632 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3634 crtl->uses_pic_offset_table = 1;
3635 start_sequence ();
3637 arm_load_pic_register (0UL);
3639 seq = get_insns ();
3640 end_sequence ();
3641 /* We can be called during expansion of PHI nodes, where
3642 we can't yet emit instructions directly in the final
3643 insn stream. Queue the insns on the entry edge, they will
3644 be committed after everything else is expanded. */
3645 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
3652 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3654 if (GET_CODE (orig) == SYMBOL_REF
3655 || GET_CODE (orig) == LABEL_REF)
3657 rtx pic_ref, address;
3658 rtx insn;
3659 int subregs = 0;
3661 /* If this function doesn't have a pic register, create one now. */
3662 require_pic_register ();
3664 if (reg == 0)
3666 gcc_assert (can_create_pseudo_p ());
3667 reg = gen_reg_rtx (Pmode);
3669 subregs = 1;
3672 if (subregs)
3673 address = gen_reg_rtx (Pmode);
3674 else
3675 address = reg;
3677 if (TARGET_ARM)
3678 emit_insn (gen_pic_load_addr_arm (address, orig));
3679 else if (TARGET_THUMB2)
3680 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3681 else /* TARGET_THUMB1 */
3682 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3684 /* VxWorks does not impose a fixed gap between segments; the run-time
3685 gap can be different from the object-file gap. We therefore can't
3686 use GOTOFF unless we are absolutely sure that the symbol is in the
3687 same segment as the GOT. Unfortunately, the flexibility of linker
3688 scripts means that we can't be sure of that in general, so assume
3689 that GOTOFF is never valid on VxWorks. */
3690 if ((GET_CODE (orig) == LABEL_REF
3691 || (GET_CODE (orig) == SYMBOL_REF &&
3692 SYMBOL_REF_LOCAL_P (orig)))
3693 && NEED_GOT_RELOC
3694 && !TARGET_VXWORKS_RTP)
3695 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3696 else
3698 pic_ref = gen_const_mem (Pmode,
3699 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3700 address));
3703 insn = emit_move_insn (reg, pic_ref);
3705 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3706 by loop. */
3707 set_unique_reg_note (insn, REG_EQUAL, orig);
3709 return reg;
3711 else if (GET_CODE (orig) == CONST)
3713 rtx base, offset;
3715 if (GET_CODE (XEXP (orig, 0)) == PLUS
3716 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3717 return orig;
3719 /* Handle the case where we have: const (UNSPEC_TLS). */
3720 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3721 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3722 return orig;
3724 /* Handle the case where we have:
3725 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
3726 CONST_INT. */
3727 if (GET_CODE (XEXP (orig, 0)) == PLUS
3728 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
3729 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
3731 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
3732 return orig;
3735 if (reg == 0)
3737 gcc_assert (can_create_pseudo_p ());
3738 reg = gen_reg_rtx (Pmode);
3741 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3743 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3744 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3745 base == reg ? 0 : reg);
3747 if (GET_CODE (offset) == CONST_INT)
3749 /* The base register doesn't really matter, we only want to
3750 test the index for the appropriate mode. */
3751 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3753 gcc_assert (can_create_pseudo_p ());
3754 offset = force_reg (Pmode, offset);
3757 if (GET_CODE (offset) == CONST_INT)
3758 return plus_constant (base, INTVAL (offset));
3761 if (GET_MODE_SIZE (mode) > 4
3762 && (GET_MODE_CLASS (mode) == MODE_INT
3763 || TARGET_SOFT_FLOAT))
3765 emit_insn (gen_addsi3 (reg, base, offset));
3766 return reg;
3769 return gen_rtx_PLUS (Pmode, base, offset);
3772 return orig;
3776 /* Find a spare register to use during the prolog of a function. */
3778 static int
3779 thumb_find_work_register (unsigned long pushed_regs_mask)
3781 int reg;
3783 /* Check the argument registers first as these are call-used. The
3784 register allocation order means that sometimes r3 might be used
3785 but earlier argument registers might not, so check them all. */
3786 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3787 if (!df_regs_ever_live_p (reg))
3788 return reg;
3790 /* Before going on to check the call-saved registers we can try a couple
3791 more ways of deducing that r3 is available. The first is when we are
3792 pushing anonymous arguments onto the stack and we have less than 4
3793 registers worth of fixed arguments(*). In this case r3 will be part of
3794 the variable argument list and so we can be sure that it will be
3795 pushed right at the start of the function. Hence it will be available
3796 for the rest of the prologue.
3797 (*): ie crtl->args.pretend_args_size is greater than 0. */
3798 if (cfun->machine->uses_anonymous_args
3799 && crtl->args.pretend_args_size > 0)
3800 return LAST_ARG_REGNUM;
3802 /* The other case is when we have fixed arguments but less than 4 registers
3803 worth. In this case r3 might be used in the body of the function, but
3804 it is not being used to convey an argument into the function. In theory
3805 we could just check crtl->args.size to see how many bytes are
3806 being passed in argument registers, but it seems that it is unreliable.
3807 Sometimes it will have the value 0 when in fact arguments are being
3808 passed. (See testcase execute/20021111-1.c for an example). So we also
3809 check the args_info.nregs field as well. The problem with this field is
3810 that it makes no allowances for arguments that are passed to the
3811 function but which are not used. Hence we could miss an opportunity
3812 when a function has an unused argument in r3. But it is better to be
3813 safe than to be sorry. */
3814 if (! cfun->machine->uses_anonymous_args
3815 && crtl->args.size >= 0
3816 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3817 && crtl->args.info.nregs < 4)
3818 return LAST_ARG_REGNUM;
3820 /* Otherwise look for a call-saved register that is going to be pushed. */
3821 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3822 if (pushed_regs_mask & (1 << reg))
3823 return reg;
3825 if (TARGET_THUMB2)
3827 /* Thumb-2 can use high regs. */
3828 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3829 if (pushed_regs_mask & (1 << reg))
3830 return reg;
3832 /* Something went wrong - thumb_compute_save_reg_mask()
3833 should have arranged for a suitable register to be pushed. */
3834 gcc_unreachable ();
3837 static GTY(()) int pic_labelno;
3839 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3840 low register. */
3842 void
3843 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3845 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
3847 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3848 return;
3850 gcc_assert (flag_pic);
3852 pic_reg = cfun->machine->pic_reg;
3853 if (TARGET_VXWORKS_RTP)
3855 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3856 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3857 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3859 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3861 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3862 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3864 else
3866 /* We use an UNSPEC rather than a LABEL_REF because this label
3867 never appears in the code stream. */
3869 labelno = GEN_INT (pic_labelno++);
3870 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3871 l1 = gen_rtx_CONST (VOIDmode, l1);
3873 /* On the ARM the PC register contains 'dot + 8' at the time of the
3874 addition, on the Thumb it is 'dot + 4'. */
3875 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
3876 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
3877 UNSPEC_GOTSYM_OFF);
3878 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3880 if (TARGET_ARM)
3882 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3883 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3885 else if (TARGET_THUMB2)
3887 /* Thumb-2 only allows very limited access to the PC. Calculate the
3888 address in a temporary register. */
3889 if (arm_pic_register != INVALID_REGNUM)
3891 pic_tmp = gen_rtx_REG (SImode,
3892 thumb_find_work_register (saved_regs));
3894 else
3896 gcc_assert (can_create_pseudo_p ());
3897 pic_tmp = gen_reg_rtx (Pmode);
3900 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3901 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3902 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3904 else /* TARGET_THUMB1 */
3906 if (arm_pic_register != INVALID_REGNUM
3907 && REGNO (pic_reg) > LAST_LO_REGNUM)
3909 /* We will have pushed the pic register, so we should always be
3910 able to find a work register. */
3911 pic_tmp = gen_rtx_REG (SImode,
3912 thumb_find_work_register (saved_regs));
3913 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3914 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3916 else
3917 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3918 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3922 /* Need to emit this whether or not we obey regdecls,
3923 since setjmp/longjmp can cause life info to screw up. */
3924 emit_use (pic_reg);
3928 /* Return nonzero if X is valid as an ARM state addressing register. */
3929 static int
3930 arm_address_register_rtx_p (rtx x, int strict_p)
3932 int regno;
3934 if (GET_CODE (x) != REG)
3935 return 0;
3937 regno = REGNO (x);
3939 if (strict_p)
3940 return ARM_REGNO_OK_FOR_BASE_P (regno);
3942 return (regno <= LAST_ARM_REGNUM
3943 || regno >= FIRST_PSEUDO_REGISTER
3944 || regno == FRAME_POINTER_REGNUM
3945 || regno == ARG_POINTER_REGNUM);
3948 /* Return TRUE if this rtx is the difference of a symbol and a label,
3949 and will reduce to a PC-relative relocation in the object file.
3950 Expressions like this can be left alone when generating PIC, rather
3951 than forced through the GOT. */
3952 static int
3953 pcrel_constant_p (rtx x)
3955 if (GET_CODE (x) == MINUS)
3956 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3958 return FALSE;
3961 /* Return nonzero if X is a valid ARM state address operand. */
3963 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3964 int strict_p)
3966 bool use_ldrd;
3967 enum rtx_code code = GET_CODE (x);
3969 if (arm_address_register_rtx_p (x, strict_p))
3970 return 1;
3972 use_ldrd = (TARGET_LDRD
3973 && (mode == DImode
3974 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3976 if (code == POST_INC || code == PRE_DEC
3977 || ((code == PRE_INC || code == POST_DEC)
3978 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3979 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3981 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3982 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3983 && GET_CODE (XEXP (x, 1)) == PLUS
3984 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3986 rtx addend = XEXP (XEXP (x, 1), 1);
3988 /* Don't allow ldrd post increment by register because it's hard
3989 to fixup invalid register choices. */
3990 if (use_ldrd
3991 && GET_CODE (x) == POST_MODIFY
3992 && GET_CODE (addend) == REG)
3993 return 0;
3995 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3996 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3999 /* After reload constants split into minipools will have addresses
4000 from a LABEL_REF. */
4001 else if (reload_completed
4002 && (code == LABEL_REF
4003 || (code == CONST
4004 && GET_CODE (XEXP (x, 0)) == PLUS
4005 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4006 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4007 return 1;
4009 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4010 return 0;
4012 else if (code == PLUS)
4014 rtx xop0 = XEXP (x, 0);
4015 rtx xop1 = XEXP (x, 1);
4017 return ((arm_address_register_rtx_p (xop0, strict_p)
4018 && GET_CODE(xop1) == CONST_INT
4019 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
4020 || (arm_address_register_rtx_p (xop1, strict_p)
4021 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
4024 #if 0
4025 /* Reload currently can't handle MINUS, so disable this for now */
4026 else if (GET_CODE (x) == MINUS)
4028 rtx xop0 = XEXP (x, 0);
4029 rtx xop1 = XEXP (x, 1);
4031 return (arm_address_register_rtx_p (xop0, strict_p)
4032 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
4034 #endif
4036 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4037 && code == SYMBOL_REF
4038 && CONSTANT_POOL_ADDRESS_P (x)
4039 && ! (flag_pic
4040 && symbol_mentioned_p (get_pool_constant (x))
4041 && ! pcrel_constant_p (get_pool_constant (x))))
4042 return 1;
4044 return 0;
4047 /* Return nonzero if X is a valid Thumb-2 address operand. */
4048 static int
4049 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4051 bool use_ldrd;
4052 enum rtx_code code = GET_CODE (x);
4054 if (arm_address_register_rtx_p (x, strict_p))
4055 return 1;
4057 use_ldrd = (TARGET_LDRD
4058 && (mode == DImode
4059 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4061 if (code == POST_INC || code == PRE_DEC
4062 || ((code == PRE_INC || code == POST_DEC)
4063 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4064 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4066 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4067 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4068 && GET_CODE (XEXP (x, 1)) == PLUS
4069 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4071 /* Thumb-2 only has autoincrement by constant. */
4072 rtx addend = XEXP (XEXP (x, 1), 1);
4073 HOST_WIDE_INT offset;
4075 if (GET_CODE (addend) != CONST_INT)
4076 return 0;
4078 offset = INTVAL(addend);
4079 if (GET_MODE_SIZE (mode) <= 4)
4080 return (offset > -256 && offset < 256);
4082 return (use_ldrd && offset > -1024 && offset < 1024
4083 && (offset & 3) == 0);
4086 /* After reload constants split into minipools will have addresses
4087 from a LABEL_REF. */
4088 else if (reload_completed
4089 && (code == LABEL_REF
4090 || (code == CONST
4091 && GET_CODE (XEXP (x, 0)) == PLUS
4092 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4093 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4094 return 1;
4096 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4097 return 0;
4099 else if (code == PLUS)
4101 rtx xop0 = XEXP (x, 0);
4102 rtx xop1 = XEXP (x, 1);
4104 return ((arm_address_register_rtx_p (xop0, strict_p)
4105 && thumb2_legitimate_index_p (mode, xop1, strict_p))
4106 || (arm_address_register_rtx_p (xop1, strict_p)
4107 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
4110 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4111 && code == SYMBOL_REF
4112 && CONSTANT_POOL_ADDRESS_P (x)
4113 && ! (flag_pic
4114 && symbol_mentioned_p (get_pool_constant (x))
4115 && ! pcrel_constant_p (get_pool_constant (x))))
4116 return 1;
4118 return 0;
4121 /* Return nonzero if INDEX is valid for an address index operand in
4122 ARM state. */
4123 static int
4124 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
4125 int strict_p)
4127 HOST_WIDE_INT range;
4128 enum rtx_code code = GET_CODE (index);
4130 /* Standard coprocessor addressing modes. */
4131 if (TARGET_HARD_FLOAT
4132 && (TARGET_FPA || TARGET_MAVERICK)
4133 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4134 || (TARGET_MAVERICK && mode == DImode)))
4135 return (code == CONST_INT && INTVAL (index) < 1024
4136 && INTVAL (index) > -1024
4137 && (INTVAL (index) & 3) == 0);
4139 if (TARGET_NEON
4140 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4141 return (code == CONST_INT
4142 && INTVAL (index) < 1016
4143 && INTVAL (index) > -1024
4144 && (INTVAL (index) & 3) == 0);
4146 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4147 return (code == CONST_INT
4148 && INTVAL (index) < 1024
4149 && INTVAL (index) > -1024
4150 && (INTVAL (index) & 3) == 0);
4152 if (arm_address_register_rtx_p (index, strict_p)
4153 && (GET_MODE_SIZE (mode) <= 4))
4154 return 1;
4156 if (mode == DImode || mode == DFmode)
4158 if (code == CONST_INT)
4160 HOST_WIDE_INT val = INTVAL (index);
4162 if (TARGET_LDRD)
4163 return val > -256 && val < 256;
4164 else
4165 return val > -4096 && val < 4092;
4168 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
4171 if (GET_MODE_SIZE (mode) <= 4
4172 && ! (arm_arch4
4173 && (mode == HImode
4174 || (mode == QImode && outer == SIGN_EXTEND))))
4176 if (code == MULT)
4178 rtx xiop0 = XEXP (index, 0);
4179 rtx xiop1 = XEXP (index, 1);
4181 return ((arm_address_register_rtx_p (xiop0, strict_p)
4182 && power_of_two_operand (xiop1, SImode))
4183 || (arm_address_register_rtx_p (xiop1, strict_p)
4184 && power_of_two_operand (xiop0, SImode)));
4186 else if (code == LSHIFTRT || code == ASHIFTRT
4187 || code == ASHIFT || code == ROTATERT)
4189 rtx op = XEXP (index, 1);
4191 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4192 && GET_CODE (op) == CONST_INT
4193 && INTVAL (op) > 0
4194 && INTVAL (op) <= 31);
4198 /* For ARM v4 we may be doing a sign-extend operation during the
4199 load. */
4200 if (arm_arch4)
4202 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
4203 range = 256;
4204 else
4205 range = 4096;
4207 else
4208 range = (mode == HImode) ? 4095 : 4096;
4210 return (code == CONST_INT
4211 && INTVAL (index) < range
4212 && INTVAL (index) > -range);
4215 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4216 index operand. i.e. 1, 2, 4 or 8. */
4217 static bool
4218 thumb2_index_mul_operand (rtx op)
4220 HOST_WIDE_INT val;
4222 if (GET_CODE(op) != CONST_INT)
4223 return false;
4225 val = INTVAL(op);
4226 return (val == 1 || val == 2 || val == 4 || val == 8);
4229 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4230 static int
4231 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4233 enum rtx_code code = GET_CODE (index);
4235 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4236 /* Standard coprocessor addressing modes. */
4237 if (TARGET_HARD_FLOAT
4238 && (TARGET_FPA || TARGET_MAVERICK)
4239 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4240 || (TARGET_MAVERICK && mode == DImode)))
4241 return (code == CONST_INT && INTVAL (index) < 1024
4242 && INTVAL (index) > -1024
4243 && (INTVAL (index) & 3) == 0);
4245 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4247 /* For DImode assume values will usually live in core regs
4248 and only allow LDRD addressing modes. */
4249 if (!TARGET_LDRD || mode != DImode)
4250 return (code == CONST_INT
4251 && INTVAL (index) < 1024
4252 && INTVAL (index) > -1024
4253 && (INTVAL (index) & 3) == 0);
4256 if (TARGET_NEON
4257 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4258 return (code == CONST_INT
4259 && INTVAL (index) < 1016
4260 && INTVAL (index) > -1024
4261 && (INTVAL (index) & 3) == 0);
4263 if (arm_address_register_rtx_p (index, strict_p)
4264 && (GET_MODE_SIZE (mode) <= 4))
4265 return 1;
4267 if (mode == DImode || mode == DFmode)
4269 HOST_WIDE_INT val = INTVAL (index);
4270 /* ??? Can we assume ldrd for thumb2? */
4271 /* Thumb-2 ldrd only has reg+const addressing modes. */
4272 if (code != CONST_INT)
4273 return 0;
4275 /* ldrd supports offsets of +-1020.
4276 However the ldr fallback does not. */
4277 return val > -256 && val < 256 && (val & 3) == 0;
4280 if (code == MULT)
4282 rtx xiop0 = XEXP (index, 0);
4283 rtx xiop1 = XEXP (index, 1);
4285 return ((arm_address_register_rtx_p (xiop0, strict_p)
4286 && thumb2_index_mul_operand (xiop1))
4287 || (arm_address_register_rtx_p (xiop1, strict_p)
4288 && thumb2_index_mul_operand (xiop0)));
4290 else if (code == ASHIFT)
4292 rtx op = XEXP (index, 1);
4294 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4295 && GET_CODE (op) == CONST_INT
4296 && INTVAL (op) > 0
4297 && INTVAL (op) <= 3);
4300 return (code == CONST_INT
4301 && INTVAL (index) < 4096
4302 && INTVAL (index) > -256);
4305 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4306 static int
4307 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4309 int regno;
4311 if (GET_CODE (x) != REG)
4312 return 0;
4314 regno = REGNO (x);
4316 if (strict_p)
4317 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4319 return (regno <= LAST_LO_REGNUM
4320 || regno > LAST_VIRTUAL_REGISTER
4321 || regno == FRAME_POINTER_REGNUM
4322 || (GET_MODE_SIZE (mode) >= 4
4323 && (regno == STACK_POINTER_REGNUM
4324 || regno >= FIRST_PSEUDO_REGISTER
4325 || x == hard_frame_pointer_rtx
4326 || x == arg_pointer_rtx)));
4329 /* Return nonzero if x is a legitimate index register. This is the case
4330 for any base register that can access a QImode object. */
4331 inline static int
4332 thumb1_index_register_rtx_p (rtx x, int strict_p)
4334 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4337 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4339 The AP may be eliminated to either the SP or the FP, so we use the
4340 least common denominator, e.g. SImode, and offsets from 0 to 64.
4342 ??? Verify whether the above is the right approach.
4344 ??? Also, the FP may be eliminated to the SP, so perhaps that
4345 needs special handling also.
4347 ??? Look at how the mips16 port solves this problem. It probably uses
4348 better ways to solve some of these problems.
4350 Although it is not incorrect, we don't accept QImode and HImode
4351 addresses based on the frame pointer or arg pointer until the
4352 reload pass starts. This is so that eliminating such addresses
4353 into stack based ones won't produce impossible code. */
4354 static int
4355 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4357 /* ??? Not clear if this is right. Experiment. */
4358 if (GET_MODE_SIZE (mode) < 4
4359 && !(reload_in_progress || reload_completed)
4360 && (reg_mentioned_p (frame_pointer_rtx, x)
4361 || reg_mentioned_p (arg_pointer_rtx, x)
4362 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4363 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4364 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4365 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4366 return 0;
4368 /* Accept any base register. SP only in SImode or larger. */
4369 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4370 return 1;
4372 /* This is PC relative data before arm_reorg runs. */
4373 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4374 && GET_CODE (x) == SYMBOL_REF
4375 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4376 return 1;
4378 /* This is PC relative data after arm_reorg runs. */
4379 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4380 && (GET_CODE (x) == LABEL_REF
4381 || (GET_CODE (x) == CONST
4382 && GET_CODE (XEXP (x, 0)) == PLUS
4383 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4384 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4385 return 1;
4387 /* Post-inc indexing only supported for SImode and larger. */
4388 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4389 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4390 return 1;
4392 else if (GET_CODE (x) == PLUS)
4394 /* REG+REG address can be any two index registers. */
4395 /* We disallow FRAME+REG addressing since we know that FRAME
4396 will be replaced with STACK, and SP relative addressing only
4397 permits SP+OFFSET. */
4398 if (GET_MODE_SIZE (mode) <= 4
4399 && XEXP (x, 0) != frame_pointer_rtx
4400 && XEXP (x, 1) != frame_pointer_rtx
4401 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4402 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4403 return 1;
4405 /* REG+const has 5-7 bit offset for non-SP registers. */
4406 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4407 || XEXP (x, 0) == arg_pointer_rtx)
4408 && GET_CODE (XEXP (x, 1)) == CONST_INT
4409 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4410 return 1;
4412 /* REG+const has 10-bit offset for SP, but only SImode and
4413 larger is supported. */
4414 /* ??? Should probably check for DI/DFmode overflow here
4415 just like GO_IF_LEGITIMATE_OFFSET does. */
4416 else if (GET_CODE (XEXP (x, 0)) == REG
4417 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4418 && GET_MODE_SIZE (mode) >= 4
4419 && GET_CODE (XEXP (x, 1)) == CONST_INT
4420 && INTVAL (XEXP (x, 1)) >= 0
4421 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4422 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4423 return 1;
4425 else if (GET_CODE (XEXP (x, 0)) == REG
4426 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4427 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4428 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4429 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4430 && GET_MODE_SIZE (mode) >= 4
4431 && GET_CODE (XEXP (x, 1)) == CONST_INT
4432 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4433 return 1;
4436 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4437 && GET_MODE_SIZE (mode) == 4
4438 && GET_CODE (x) == SYMBOL_REF
4439 && CONSTANT_POOL_ADDRESS_P (x)
4440 && ! (flag_pic
4441 && symbol_mentioned_p (get_pool_constant (x))
4442 && ! pcrel_constant_p (get_pool_constant (x))))
4443 return 1;
4445 return 0;
4448 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4449 instruction of mode MODE. */
4451 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4453 switch (GET_MODE_SIZE (mode))
4455 case 1:
4456 return val >= 0 && val < 32;
4458 case 2:
4459 return val >= 0 && val < 64 && (val & 1) == 0;
4461 default:
4462 return (val >= 0
4463 && (val + GET_MODE_SIZE (mode)) <= 128
4464 && (val & 3) == 0);
4468 bool
4469 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
4471 if (TARGET_ARM)
4472 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
4473 else if (TARGET_THUMB2)
4474 return thumb2_legitimate_address_p (mode, x, strict_p);
4475 else /* if (TARGET_THUMB1) */
4476 return thumb1_legitimate_address_p (mode, x, strict_p);
4479 /* Build the SYMBOL_REF for __tls_get_addr. */
4481 static GTY(()) rtx tls_get_addr_libfunc;
4483 static rtx
4484 get_tls_get_addr (void)
4486 if (!tls_get_addr_libfunc)
4487 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4488 return tls_get_addr_libfunc;
4491 static rtx
4492 arm_load_tp (rtx target)
4494 if (!target)
4495 target = gen_reg_rtx (SImode);
4497 if (TARGET_HARD_TP)
4499 /* Can return in any reg. */
4500 emit_insn (gen_load_tp_hard (target));
4502 else
4504 /* Always returned in r0. Immediately copy the result into a pseudo,
4505 otherwise other uses of r0 (e.g. setting up function arguments) may
4506 clobber the value. */
4508 rtx tmp;
4510 emit_insn (gen_load_tp_soft ());
4512 tmp = gen_rtx_REG (SImode, 0);
4513 emit_move_insn (target, tmp);
4515 return target;
4518 static rtx
4519 load_tls_operand (rtx x, rtx reg)
4521 rtx tmp;
4523 if (reg == NULL_RTX)
4524 reg = gen_reg_rtx (SImode);
4526 tmp = gen_rtx_CONST (SImode, x);
4528 emit_move_insn (reg, tmp);
4530 return reg;
4533 static rtx
4534 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4536 rtx insns, label, labelno, sum;
4538 start_sequence ();
4540 labelno = GEN_INT (pic_labelno++);
4541 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4542 label = gen_rtx_CONST (VOIDmode, label);
4544 sum = gen_rtx_UNSPEC (Pmode,
4545 gen_rtvec (4, x, GEN_INT (reloc), label,
4546 GEN_INT (TARGET_ARM ? 8 : 4)),
4547 UNSPEC_TLS);
4548 reg = load_tls_operand (sum, reg);
4550 if (TARGET_ARM)
4551 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4552 else if (TARGET_THUMB2)
4554 rtx tmp;
4555 /* Thumb-2 only allows very limited access to the PC. Calculate
4556 the address in a temporary register. */
4557 tmp = gen_reg_rtx (SImode);
4558 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4559 emit_insn (gen_addsi3(reg, reg, tmp));
4561 else /* TARGET_THUMB1 */
4562 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4564 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4565 Pmode, 1, reg, Pmode);
4567 insns = get_insns ();
4568 end_sequence ();
4570 return insns;
4574 legitimize_tls_address (rtx x, rtx reg)
4576 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4577 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4579 switch (model)
4581 case TLS_MODEL_GLOBAL_DYNAMIC:
4582 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4583 dest = gen_reg_rtx (Pmode);
4584 emit_libcall_block (insns, dest, ret, x);
4585 return dest;
4587 case TLS_MODEL_LOCAL_DYNAMIC:
4588 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4590 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4591 share the LDM result with other LD model accesses. */
4592 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4593 UNSPEC_TLS);
4594 dest = gen_reg_rtx (Pmode);
4595 emit_libcall_block (insns, dest, ret, eqv);
4597 /* Load the addend. */
4598 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4599 UNSPEC_TLS);
4600 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4601 return gen_rtx_PLUS (Pmode, dest, addend);
4603 case TLS_MODEL_INITIAL_EXEC:
4604 labelno = GEN_INT (pic_labelno++);
4605 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4606 label = gen_rtx_CONST (VOIDmode, label);
4607 sum = gen_rtx_UNSPEC (Pmode,
4608 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4609 GEN_INT (TARGET_ARM ? 8 : 4)),
4610 UNSPEC_TLS);
4611 reg = load_tls_operand (sum, reg);
4613 if (TARGET_ARM)
4614 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4615 else if (TARGET_THUMB2)
4617 rtx tmp;
4618 /* Thumb-2 only allows very limited access to the PC. Calculate
4619 the address in a temporary register. */
4620 tmp = gen_reg_rtx (SImode);
4621 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4622 emit_insn (gen_addsi3(reg, reg, tmp));
4623 emit_move_insn (reg, gen_const_mem (SImode, reg));
4625 else
4627 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4628 emit_move_insn (reg, gen_const_mem (SImode, reg));
4631 tp = arm_load_tp (NULL_RTX);
4633 return gen_rtx_PLUS (Pmode, tp, reg);
4635 case TLS_MODEL_LOCAL_EXEC:
4636 tp = arm_load_tp (NULL_RTX);
4638 reg = gen_rtx_UNSPEC (Pmode,
4639 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4640 UNSPEC_TLS);
4641 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4643 return gen_rtx_PLUS (Pmode, tp, reg);
4645 default:
4646 abort ();
4650 /* Try machine-dependent ways of modifying an illegitimate address
4651 to be legitimate. If we find one, return the new, valid address. */
4653 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4655 if (!TARGET_ARM)
4657 /* TODO: legitimize_address for Thumb2. */
4658 if (TARGET_THUMB2)
4659 return x;
4660 return thumb_legitimize_address (x, orig_x, mode);
4663 if (arm_tls_symbol_p (x))
4664 return legitimize_tls_address (x, NULL_RTX);
4666 if (GET_CODE (x) == PLUS)
4668 rtx xop0 = XEXP (x, 0);
4669 rtx xop1 = XEXP (x, 1);
4671 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4672 xop0 = force_reg (SImode, xop0);
4674 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4675 xop1 = force_reg (SImode, xop1);
4677 if (ARM_BASE_REGISTER_RTX_P (xop0)
4678 && GET_CODE (xop1) == CONST_INT)
4680 HOST_WIDE_INT n, low_n;
4681 rtx base_reg, val;
4682 n = INTVAL (xop1);
4684 /* VFP addressing modes actually allow greater offsets, but for
4685 now we just stick with the lowest common denominator. */
4686 if (mode == DImode
4687 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4689 low_n = n & 0x0f;
4690 n &= ~0x0f;
4691 if (low_n > 4)
4693 n += 16;
4694 low_n -= 16;
4697 else
4699 low_n = ((mode) == TImode ? 0
4700 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4701 n -= low_n;
4704 base_reg = gen_reg_rtx (SImode);
4705 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4706 emit_move_insn (base_reg, val);
4707 x = plus_constant (base_reg, low_n);
4709 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4710 x = gen_rtx_PLUS (SImode, xop0, xop1);
4713 /* XXX We don't allow MINUS any more -- see comment in
4714 arm_legitimate_address_outer_p (). */
4715 else if (GET_CODE (x) == MINUS)
4717 rtx xop0 = XEXP (x, 0);
4718 rtx xop1 = XEXP (x, 1);
4720 if (CONSTANT_P (xop0))
4721 xop0 = force_reg (SImode, xop0);
4723 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4724 xop1 = force_reg (SImode, xop1);
4726 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4727 x = gen_rtx_MINUS (SImode, xop0, xop1);
4730 /* Make sure to take full advantage of the pre-indexed addressing mode
4731 with absolute addresses which often allows for the base register to
4732 be factorized for multiple adjacent memory references, and it might
4733 even allows for the mini pool to be avoided entirely. */
4734 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4736 unsigned int bits;
4737 HOST_WIDE_INT mask, base, index;
4738 rtx base_reg;
4740 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4741 use a 8-bit index. So let's use a 12-bit index for SImode only and
4742 hope that arm_gen_constant will enable ldrb to use more bits. */
4743 bits = (mode == SImode) ? 12 : 8;
4744 mask = (1 << bits) - 1;
4745 base = INTVAL (x) & ~mask;
4746 index = INTVAL (x) & mask;
4747 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4749 /* It'll most probably be more efficient to generate the base
4750 with more bits set and use a negative index instead. */
4751 base |= mask;
4752 index -= mask;
4754 base_reg = force_reg (SImode, GEN_INT (base));
4755 x = plus_constant (base_reg, index);
4758 if (flag_pic)
4760 /* We need to find and carefully transform any SYMBOL and LABEL
4761 references; so go back to the original address expression. */
4762 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4764 if (new_x != orig_x)
4765 x = new_x;
4768 return x;
4772 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4773 to be legitimate. If we find one, return the new, valid address. */
4775 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4777 if (arm_tls_symbol_p (x))
4778 return legitimize_tls_address (x, NULL_RTX);
4780 if (GET_CODE (x) == PLUS
4781 && GET_CODE (XEXP (x, 1)) == CONST_INT
4782 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4783 || INTVAL (XEXP (x, 1)) < 0))
4785 rtx xop0 = XEXP (x, 0);
4786 rtx xop1 = XEXP (x, 1);
4787 HOST_WIDE_INT offset = INTVAL (xop1);
4789 /* Try and fold the offset into a biasing of the base register and
4790 then offsetting that. Don't do this when optimizing for space
4791 since it can cause too many CSEs. */
4792 if (optimize_size && offset >= 0
4793 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4795 HOST_WIDE_INT delta;
4797 if (offset >= 256)
4798 delta = offset - (256 - GET_MODE_SIZE (mode));
4799 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4800 delta = 31 * GET_MODE_SIZE (mode);
4801 else
4802 delta = offset & (~31 * GET_MODE_SIZE (mode));
4804 xop0 = force_operand (plus_constant (xop0, offset - delta),
4805 NULL_RTX);
4806 x = plus_constant (xop0, delta);
4808 else if (offset < 0 && offset > -256)
4809 /* Small negative offsets are best done with a subtract before the
4810 dereference, forcing these into a register normally takes two
4811 instructions. */
4812 x = force_operand (x, NULL_RTX);
4813 else
4815 /* For the remaining cases, force the constant into a register. */
4816 xop1 = force_reg (SImode, xop1);
4817 x = gen_rtx_PLUS (SImode, xop0, xop1);
4820 else if (GET_CODE (x) == PLUS
4821 && s_register_operand (XEXP (x, 1), SImode)
4822 && !s_register_operand (XEXP (x, 0), SImode))
4824 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4826 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4829 if (flag_pic)
4831 /* We need to find and carefully transform any SYMBOL and LABEL
4832 references; so go back to the original address expression. */
4833 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4835 if (new_x != orig_x)
4836 x = new_x;
4839 return x;
4843 thumb_legitimize_reload_address (rtx *x_p,
4844 enum machine_mode mode,
4845 int opnum, int type,
4846 int ind_levels ATTRIBUTE_UNUSED)
4848 rtx x = *x_p;
4850 if (GET_CODE (x) == PLUS
4851 && GET_MODE_SIZE (mode) < 4
4852 && REG_P (XEXP (x, 0))
4853 && XEXP (x, 0) == stack_pointer_rtx
4854 && GET_CODE (XEXP (x, 1)) == CONST_INT
4855 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4857 rtx orig_x = x;
4859 x = copy_rtx (x);
4860 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4861 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4862 return x;
4865 /* If both registers are hi-regs, then it's better to reload the
4866 entire expression rather than each register individually. That
4867 only requires one reload register rather than two. */
4868 if (GET_CODE (x) == PLUS
4869 && REG_P (XEXP (x, 0))
4870 && REG_P (XEXP (x, 1))
4871 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4872 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4874 rtx orig_x = x;
4876 x = copy_rtx (x);
4877 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4878 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4879 return x;
4882 return NULL;
4885 /* Test for various thread-local symbols. */
4887 /* Return TRUE if X is a thread-local symbol. */
4889 static bool
4890 arm_tls_symbol_p (rtx x)
4892 if (! TARGET_HAVE_TLS)
4893 return false;
4895 if (GET_CODE (x) != SYMBOL_REF)
4896 return false;
4898 return SYMBOL_REF_TLS_MODEL (x) != 0;
4901 /* Helper for arm_tls_referenced_p. */
4903 static int
4904 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4906 if (GET_CODE (*x) == SYMBOL_REF)
4907 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4909 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4910 TLS offsets, not real symbol references. */
4911 if (GET_CODE (*x) == UNSPEC
4912 && XINT (*x, 1) == UNSPEC_TLS)
4913 return -1;
4915 return 0;
4918 /* Return TRUE if X contains any TLS symbol references. */
4920 bool
4921 arm_tls_referenced_p (rtx x)
4923 if (! TARGET_HAVE_TLS)
4924 return false;
4926 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4929 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4931 bool
4932 arm_cannot_force_const_mem (rtx x)
4934 rtx base, offset;
4936 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4938 split_const (x, &base, &offset);
4939 if (GET_CODE (base) == SYMBOL_REF
4940 && !offset_within_block_p (base, INTVAL (offset)))
4941 return true;
4943 return arm_tls_referenced_p (x);
4946 #define REG_OR_SUBREG_REG(X) \
4947 (GET_CODE (X) == REG \
4948 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4950 #define REG_OR_SUBREG_RTX(X) \
4951 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4953 #ifndef COSTS_N_INSNS
4954 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4955 #endif
4956 static inline int
4957 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4959 enum machine_mode mode = GET_MODE (x);
4961 switch (code)
4963 case ASHIFT:
4964 case ASHIFTRT:
4965 case LSHIFTRT:
4966 case ROTATERT:
4967 case PLUS:
4968 case MINUS:
4969 case COMPARE:
4970 case NEG:
4971 case NOT:
4972 return COSTS_N_INSNS (1);
4974 case MULT:
4975 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4977 int cycles = 0;
4978 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4980 while (i)
4982 i >>= 2;
4983 cycles++;
4985 return COSTS_N_INSNS (2) + cycles;
4987 return COSTS_N_INSNS (1) + 16;
4989 case SET:
4990 return (COSTS_N_INSNS (1)
4991 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4992 + GET_CODE (SET_DEST (x)) == MEM));
4994 case CONST_INT:
4995 if (outer == SET)
4997 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4998 return 0;
4999 if (thumb_shiftable_const (INTVAL (x)))
5000 return COSTS_N_INSNS (2);
5001 return COSTS_N_INSNS (3);
5003 else if ((outer == PLUS || outer == COMPARE)
5004 && INTVAL (x) < 256 && INTVAL (x) > -256)
5005 return 0;
5006 else if (outer == AND
5007 && INTVAL (x) < 256 && INTVAL (x) >= -256)
5008 return COSTS_N_INSNS (1);
5009 else if (outer == ASHIFT || outer == ASHIFTRT
5010 || outer == LSHIFTRT)
5011 return 0;
5012 return COSTS_N_INSNS (2);
5014 case CONST:
5015 case CONST_DOUBLE:
5016 case LABEL_REF:
5017 case SYMBOL_REF:
5018 return COSTS_N_INSNS (3);
5020 case UDIV:
5021 case UMOD:
5022 case DIV:
5023 case MOD:
5024 return 100;
5026 case TRUNCATE:
5027 return 99;
5029 case AND:
5030 case XOR:
5031 case IOR:
5032 /* XXX guess. */
5033 return 8;
5035 case MEM:
5036 /* XXX another guess. */
5037 /* Memory costs quite a lot for the first word, but subsequent words
5038 load at the equivalent of a single insn each. */
5039 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
5040 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5041 ? 4 : 0));
5043 case IF_THEN_ELSE:
5044 /* XXX a guess. */
5045 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5046 return 14;
5047 return 2;
5049 case ZERO_EXTEND:
5050 /* XXX still guessing. */
5051 switch (GET_MODE (XEXP (x, 0)))
5053 case QImode:
5054 return (1 + (mode == DImode ? 4 : 0)
5055 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5057 case HImode:
5058 return (4 + (mode == DImode ? 4 : 0)
5059 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5061 case SImode:
5062 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5064 default:
5065 return 99;
5068 default:
5069 return 99;
5073 static inline bool
5074 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
5076 enum machine_mode mode = GET_MODE (x);
5077 enum rtx_code subcode;
5078 rtx operand;
5079 enum rtx_code code = GET_CODE (x);
5080 int extra_cost;
5081 *total = 0;
5083 switch (code)
5085 case MEM:
5086 /* Memory costs quite a lot for the first word, but subsequent words
5087 load at the equivalent of a single insn each. */
5088 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
5089 return true;
5091 case DIV:
5092 case MOD:
5093 case UDIV:
5094 case UMOD:
5095 if (TARGET_HARD_FLOAT && mode == SFmode)
5096 *total = COSTS_N_INSNS (2);
5097 else if (TARGET_HARD_FLOAT && mode == DFmode)
5098 *total = COSTS_N_INSNS (4);
5099 else
5100 *total = COSTS_N_INSNS (20);
5101 return false;
5103 case ROTATE:
5104 if (GET_CODE (XEXP (x, 1)) == REG)
5105 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
5106 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5107 *total = rtx_cost (XEXP (x, 1), code, speed);
5109 /* Fall through */
5110 case ROTATERT:
5111 if (mode != SImode)
5113 *total += COSTS_N_INSNS (4);
5114 return true;
5117 /* Fall through */
5118 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
5119 *total += rtx_cost (XEXP (x, 0), code, speed);
5120 if (mode == DImode)
5122 *total += COSTS_N_INSNS (3);
5123 return true;
5126 *total += COSTS_N_INSNS (1);
5127 /* Increase the cost of complex shifts because they aren't any faster,
5128 and reduce dual issue opportunities. */
5129 if (arm_tune_cortex_a9
5130 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
5131 ++*total;
5133 return true;
5135 case MINUS:
5136 if (TARGET_THUMB2)
5138 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5140 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5141 *total = COSTS_N_INSNS (1);
5142 else
5143 *total = COSTS_N_INSNS (20);
5145 else
5146 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5147 /* Thumb2 does not have RSB, so all arguments must be
5148 registers (subtracting a constant is canonicalized as
5149 addition of the negated constant). */
5150 return false;
5153 if (mode == DImode)
5155 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5156 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5157 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5159 *total += rtx_cost (XEXP (x, 1), code, speed);
5160 return true;
5163 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5164 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
5166 *total += rtx_cost (XEXP (x, 0), code, speed);
5167 return true;
5170 return false;
5173 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5175 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5177 *total = COSTS_N_INSNS (1);
5178 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5179 && arm_const_double_rtx (XEXP (x, 0)))
5181 *total += rtx_cost (XEXP (x, 1), code, speed);
5182 return true;
5185 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5186 && arm_const_double_rtx (XEXP (x, 1)))
5188 *total += rtx_cost (XEXP (x, 0), code, speed);
5189 return true;
5192 return false;
5194 *total = COSTS_N_INSNS (20);
5195 return false;
5198 *total = COSTS_N_INSNS (1);
5199 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5200 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5202 *total += rtx_cost (XEXP (x, 1), code, speed);
5203 return true;
5206 subcode = GET_CODE (XEXP (x, 1));
5207 if (subcode == ASHIFT || subcode == ASHIFTRT
5208 || subcode == LSHIFTRT
5209 || subcode == ROTATE || subcode == ROTATERT)
5211 *total += rtx_cost (XEXP (x, 0), code, speed);
5212 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5213 return true;
5216 /* A shift as a part of RSB costs no more than RSB itself. */
5217 if (GET_CODE (XEXP (x, 0)) == MULT
5218 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5220 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
5221 *total += rtx_cost (XEXP (x, 1), code, speed);
5222 return true;
5225 if (subcode == MULT
5226 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
5228 *total += rtx_cost (XEXP (x, 0), code, speed);
5229 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5230 return true;
5233 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
5234 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
5236 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5237 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
5238 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
5239 *total += COSTS_N_INSNS (1);
5241 return true;
5244 /* Fall through */
5246 case PLUS:
5247 if (code == PLUS && arm_arch6 && mode == SImode
5248 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5249 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5251 *total = COSTS_N_INSNS (1);
5252 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
5253 speed);
5254 *total += rtx_cost (XEXP (x, 1), code, speed);
5255 return true;
5258 /* MLA: All arguments must be registers. We filter out
5259 multiplication by a power of two, so that we fall down into
5260 the code below. */
5261 if (GET_CODE (XEXP (x, 0)) == MULT
5262 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5264 /* The cost comes from the cost of the multiply. */
5265 return false;
5268 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5270 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5272 *total = COSTS_N_INSNS (1);
5273 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5274 && arm_const_double_rtx (XEXP (x, 1)))
5276 *total += rtx_cost (XEXP (x, 0), code, speed);
5277 return true;
5280 return false;
5283 *total = COSTS_N_INSNS (20);
5284 return false;
5287 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
5288 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
5290 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
5291 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5292 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
5293 *total += COSTS_N_INSNS (1);
5294 return true;
5297 /* Fall through */
5299 case AND: case XOR: case IOR:
5300 extra_cost = 0;
5302 /* Normally the frame registers will be spilt into reg+const during
5303 reload, so it is a bad idea to combine them with other instructions,
5304 since then they might not be moved outside of loops. As a compromise
5305 we allow integration with ops that have a constant as their second
5306 operand. */
5307 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
5308 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
5309 && GET_CODE (XEXP (x, 1)) != CONST_INT)
5310 || (REG_OR_SUBREG_REG (XEXP (x, 0))
5311 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
5312 *total = 4;
5314 if (mode == DImode)
5316 *total += COSTS_N_INSNS (2);
5317 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5318 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5320 *total += rtx_cost (XEXP (x, 0), code, speed);
5321 return true;
5324 return false;
5327 *total += COSTS_N_INSNS (1);
5328 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5329 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5331 *total += rtx_cost (XEXP (x, 0), code, speed);
5332 return true;
5334 subcode = GET_CODE (XEXP (x, 0));
5335 if (subcode == ASHIFT || subcode == ASHIFTRT
5336 || subcode == LSHIFTRT
5337 || subcode == ROTATE || subcode == ROTATERT)
5339 *total += rtx_cost (XEXP (x, 1), code, speed);
5340 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5341 return true;
5344 if (subcode == MULT
5345 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5347 *total += rtx_cost (XEXP (x, 1), code, speed);
5348 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5349 return true;
5352 if (subcode == UMIN || subcode == UMAX
5353 || subcode == SMIN || subcode == SMAX)
5355 *total = COSTS_N_INSNS (3);
5356 return true;
5359 return false;
5361 case MULT:
5362 /* This should have been handled by the CPU specific routines. */
5363 gcc_unreachable ();
5365 case TRUNCATE:
5366 if (arm_arch3m && mode == SImode
5367 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5368 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5369 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5370 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5371 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5372 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5374 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
5375 return true;
5377 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
5378 return false;
5380 case NEG:
5381 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5383 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5385 *total = COSTS_N_INSNS (1);
5386 return false;
5388 *total = COSTS_N_INSNS (2);
5389 return false;
5392 /* Fall through */
5393 case NOT:
5394 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
5395 if (mode == SImode && code == NOT)
5397 subcode = GET_CODE (XEXP (x, 0));
5398 if (subcode == ASHIFT || subcode == ASHIFTRT
5399 || subcode == LSHIFTRT
5400 || subcode == ROTATE || subcode == ROTATERT
5401 || (subcode == MULT
5402 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
5404 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5405 /* Register shifts cost an extra cycle. */
5406 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
5407 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
5408 subcode, speed);
5409 return true;
5413 return false;
5415 case IF_THEN_ELSE:
5416 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5418 *total = COSTS_N_INSNS (4);
5419 return true;
5422 operand = XEXP (x, 0);
5424 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
5425 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
5426 && GET_CODE (XEXP (operand, 0)) == REG
5427 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
5428 *total += COSTS_N_INSNS (1);
5429 *total += (rtx_cost (XEXP (x, 1), code, speed)
5430 + rtx_cost (XEXP (x, 2), code, speed));
5431 return true;
5433 case NE:
5434 if (mode == SImode && XEXP (x, 1) == const0_rtx)
5436 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5437 return true;
5439 goto scc_insn;
5441 case GE:
5442 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5443 && mode == SImode && XEXP (x, 1) == const0_rtx)
5445 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5446 return true;
5448 goto scc_insn;
5450 case LT:
5451 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5452 && mode == SImode && XEXP (x, 1) == const0_rtx)
5454 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5455 return true;
5457 goto scc_insn;
5459 case EQ:
5460 case GT:
5461 case LE:
5462 case GEU:
5463 case LTU:
5464 case GTU:
5465 case LEU:
5466 case UNORDERED:
5467 case ORDERED:
5468 case UNEQ:
5469 case UNGE:
5470 case UNLT:
5471 case UNGT:
5472 case UNLE:
5473 scc_insn:
5474 /* SCC insns. In the case where the comparison has already been
5475 performed, then they cost 2 instructions. Otherwise they need
5476 an additional comparison before them. */
5477 *total = COSTS_N_INSNS (2);
5478 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5480 return true;
5483 /* Fall through */
5484 case COMPARE:
5485 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5487 *total = 0;
5488 return true;
5491 *total += COSTS_N_INSNS (1);
5492 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5493 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5495 *total += rtx_cost (XEXP (x, 0), code, speed);
5496 return true;
5499 subcode = GET_CODE (XEXP (x, 0));
5500 if (subcode == ASHIFT || subcode == ASHIFTRT
5501 || subcode == LSHIFTRT
5502 || subcode == ROTATE || subcode == ROTATERT)
5504 *total += rtx_cost (XEXP (x, 1), code, speed);
5505 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5506 return true;
5509 if (subcode == MULT
5510 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5512 *total += rtx_cost (XEXP (x, 1), code, speed);
5513 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5514 return true;
5517 return false;
5519 case UMIN:
5520 case UMAX:
5521 case SMIN:
5522 case SMAX:
5523 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5524 if (GET_CODE (XEXP (x, 1)) != CONST_INT
5525 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
5526 *total += rtx_cost (XEXP (x, 1), code, speed);
5527 return true;
5529 case ABS:
5530 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5532 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5534 *total = COSTS_N_INSNS (1);
5535 return false;
5537 *total = COSTS_N_INSNS (20);
5538 return false;
5540 *total = COSTS_N_INSNS (1);
5541 if (mode == DImode)
5542 *total += COSTS_N_INSNS (3);
5543 return false;
5545 case SIGN_EXTEND:
5546 if (GET_MODE_CLASS (mode) == MODE_INT)
5548 *total = 0;
5549 if (mode == DImode)
5550 *total += COSTS_N_INSNS (1);
5552 if (GET_MODE (XEXP (x, 0)) != SImode)
5554 if (arm_arch6)
5556 if (GET_CODE (XEXP (x, 0)) != MEM)
5557 *total += COSTS_N_INSNS (1);
5559 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5560 *total += COSTS_N_INSNS (2);
5563 return false;
5566 /* Fall through */
5567 case ZERO_EXTEND:
5568 *total = 0;
5569 if (GET_MODE_CLASS (mode) == MODE_INT)
5571 if (mode == DImode)
5572 *total += COSTS_N_INSNS (1);
5574 if (GET_MODE (XEXP (x, 0)) != SImode)
5576 if (arm_arch6)
5578 if (GET_CODE (XEXP (x, 0)) != MEM)
5579 *total += COSTS_N_INSNS (1);
5581 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5582 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
5583 1 : 2);
5586 return false;
5589 switch (GET_MODE (XEXP (x, 0)))
5591 case V8QImode:
5592 case V4HImode:
5593 case V2SImode:
5594 case V4QImode:
5595 case V2HImode:
5596 *total = COSTS_N_INSNS (1);
5597 return false;
5599 default:
5600 gcc_unreachable ();
5602 gcc_unreachable ();
5604 case ZERO_EXTRACT:
5605 case SIGN_EXTRACT:
5606 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5607 return true;
5609 case CONST_INT:
5610 if (const_ok_for_arm (INTVAL (x))
5611 || const_ok_for_arm (~INTVAL (x)))
5612 *total = COSTS_N_INSNS (1);
5613 else
5614 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
5615 INTVAL (x), NULL_RTX,
5616 NULL_RTX, 0, 0));
5617 return true;
5619 case CONST:
5620 case LABEL_REF:
5621 case SYMBOL_REF:
5622 *total = COSTS_N_INSNS (3);
5623 return true;
5625 case HIGH:
5626 *total = COSTS_N_INSNS (1);
5627 return true;
5629 case LO_SUM:
5630 *total = COSTS_N_INSNS (1);
5631 *total += rtx_cost (XEXP (x, 0), code, speed);
5632 return true;
5634 case CONST_DOUBLE:
5635 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
5636 *total = COSTS_N_INSNS (1);
5637 else
5638 *total = COSTS_N_INSNS (4);
5639 return true;
5641 default:
5642 *total = COSTS_N_INSNS (4);
5643 return false;
5647 /* RTX costs when optimizing for size. */
5648 static bool
5649 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5650 int *total)
5652 enum machine_mode mode = GET_MODE (x);
5653 if (TARGET_THUMB1)
5655 /* XXX TBD. For now, use the standard costs. */
5656 *total = thumb1_rtx_costs (x, code, outer_code);
5657 return true;
5660 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5661 switch (code)
5663 case MEM:
5664 /* A memory access costs 1 insn if the mode is small, or the address is
5665 a single register, otherwise it costs one insn per word. */
5666 if (REG_P (XEXP (x, 0)))
5667 *total = COSTS_N_INSNS (1);
5668 else
5669 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5670 return true;
5672 case DIV:
5673 case MOD:
5674 case UDIV:
5675 case UMOD:
5676 /* Needs a libcall, so it costs about this. */
5677 *total = COSTS_N_INSNS (2);
5678 return false;
5680 case ROTATE:
5681 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5683 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
5684 return true;
5686 /* Fall through */
5687 case ROTATERT:
5688 case ASHIFT:
5689 case LSHIFTRT:
5690 case ASHIFTRT:
5691 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5693 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
5694 return true;
5696 else if (mode == SImode)
5698 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
5699 /* Slightly disparage register shifts, but not by much. */
5700 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5701 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
5702 return true;
5705 /* Needs a libcall. */
5706 *total = COSTS_N_INSNS (2);
5707 return false;
5709 case MINUS:
5710 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5712 *total = COSTS_N_INSNS (1);
5713 return false;
5716 if (mode == SImode)
5718 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5719 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5721 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5722 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5723 || subcode1 == ROTATE || subcode1 == ROTATERT
5724 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5725 || subcode1 == ASHIFTRT)
5727 /* It's just the cost of the two operands. */
5728 *total = 0;
5729 return false;
5732 *total = COSTS_N_INSNS (1);
5733 return false;
5736 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5737 return false;
5739 case PLUS:
5740 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5742 *total = COSTS_N_INSNS (1);
5743 return false;
5746 /* A shift as a part of ADD costs nothing. */
5747 if (GET_CODE (XEXP (x, 0)) == MULT
5748 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5750 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
5751 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
5752 *total += rtx_cost (XEXP (x, 1), code, false);
5753 return true;
5756 /* Fall through */
5757 case AND: case XOR: case IOR:
5758 if (mode == SImode)
5760 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5762 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5763 || subcode == LSHIFTRT || subcode == ASHIFTRT
5764 || (code == AND && subcode == NOT))
5766 /* It's just the cost of the two operands. */
5767 *total = 0;
5768 return false;
5772 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5773 return false;
5775 case MULT:
5776 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5777 return false;
5779 case NEG:
5780 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5782 *total = COSTS_N_INSNS (1);
5783 return false;
5786 /* Fall through */
5787 case NOT:
5788 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5790 return false;
5792 case IF_THEN_ELSE:
5793 *total = 0;
5794 return false;
5796 case COMPARE:
5797 if (cc_register (XEXP (x, 0), VOIDmode))
5798 * total = 0;
5799 else
5800 *total = COSTS_N_INSNS (1);
5801 return false;
5803 case ABS:
5804 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5805 *total = COSTS_N_INSNS (1);
5806 else
5807 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5808 return false;
5810 case SIGN_EXTEND:
5811 *total = 0;
5812 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5814 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5815 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5817 if (mode == DImode)
5818 *total += COSTS_N_INSNS (1);
5819 return false;
5821 case ZERO_EXTEND:
5822 *total = 0;
5823 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5825 switch (GET_MODE (XEXP (x, 0)))
5827 case QImode:
5828 *total += COSTS_N_INSNS (1);
5829 break;
5831 case HImode:
5832 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5834 case SImode:
5835 break;
5837 default:
5838 *total += COSTS_N_INSNS (2);
5842 if (mode == DImode)
5843 *total += COSTS_N_INSNS (1);
5845 return false;
5847 case CONST_INT:
5848 if (const_ok_for_arm (INTVAL (x)))
5849 /* A multiplication by a constant requires another instruction
5850 to load the constant to a register. */
5851 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
5852 ? 1 : 0);
5853 else if (const_ok_for_arm (~INTVAL (x)))
5854 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5855 else if (const_ok_for_arm (-INTVAL (x)))
5857 if (outer_code == COMPARE || outer_code == PLUS
5858 || outer_code == MINUS)
5859 *total = 0;
5860 else
5861 *total = COSTS_N_INSNS (1);
5863 else
5864 *total = COSTS_N_INSNS (2);
5865 return true;
5867 case CONST:
5868 case LABEL_REF:
5869 case SYMBOL_REF:
5870 *total = COSTS_N_INSNS (2);
5871 return true;
5873 case CONST_DOUBLE:
5874 *total = COSTS_N_INSNS (4);
5875 return true;
5877 case HIGH:
5878 case LO_SUM:
5879 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
5880 cost of these slightly. */
5881 *total = COSTS_N_INSNS (1) + 1;
5882 return true;
5884 default:
5885 if (mode != VOIDmode)
5886 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5887 else
5888 *total = COSTS_N_INSNS (4); /* How knows? */
5889 return false;
5893 /* RTX costs when optimizing for size. */
5894 static bool
5895 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
5896 bool speed)
5898 if (!speed)
5899 return arm_size_rtx_costs (x, (enum rtx_code) code,
5900 (enum rtx_code) outer_code, total);
5901 else
5902 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
5903 (enum rtx_code) outer_code,
5904 total, speed);
5907 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5908 supported on any "slowmul" cores, so it can be ignored. */
5910 static bool
5911 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5912 int *total, bool speed)
5914 enum machine_mode mode = GET_MODE (x);
5916 if (TARGET_THUMB)
5918 *total = thumb1_rtx_costs (x, code, outer_code);
5919 return true;
5922 switch (code)
5924 case MULT:
5925 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5926 || mode == DImode)
5928 *total = COSTS_N_INSNS (20);
5929 return false;
5932 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5934 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5935 & (unsigned HOST_WIDE_INT) 0xffffffff);
5936 int cost, const_ok = const_ok_for_arm (i);
5937 int j, booth_unit_size;
5939 /* Tune as appropriate. */
5940 cost = const_ok ? 4 : 8;
5941 booth_unit_size = 2;
5942 for (j = 0; i && j < 32; j += booth_unit_size)
5944 i >>= booth_unit_size;
5945 cost++;
5948 *total = COSTS_N_INSNS (cost);
5949 *total += rtx_cost (XEXP (x, 0), code, speed);
5950 return true;
5953 *total = COSTS_N_INSNS (20);
5954 return false;
5956 default:
5957 return arm_rtx_costs_1 (x, outer_code, total, speed);;
5962 /* RTX cost for cores with a fast multiply unit (M variants). */
5964 static bool
5965 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5966 int *total, bool speed)
5968 enum machine_mode mode = GET_MODE (x);
5970 if (TARGET_THUMB1)
5972 *total = thumb1_rtx_costs (x, code, outer_code);
5973 return true;
5976 /* ??? should thumb2 use different costs? */
5977 switch (code)
5979 case MULT:
5980 /* There is no point basing this on the tuning, since it is always the
5981 fast variant if it exists at all. */
5982 if (mode == DImode
5983 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5984 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5985 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5987 *total = COSTS_N_INSNS(2);
5988 return false;
5992 if (mode == DImode)
5994 *total = COSTS_N_INSNS (5);
5995 return false;
5998 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6000 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
6001 & (unsigned HOST_WIDE_INT) 0xffffffff);
6002 int cost, const_ok = const_ok_for_arm (i);
6003 int j, booth_unit_size;
6005 /* Tune as appropriate. */
6006 cost = const_ok ? 4 : 8;
6007 booth_unit_size = 8;
6008 for (j = 0; i && j < 32; j += booth_unit_size)
6010 i >>= booth_unit_size;
6011 cost++;
6014 *total = COSTS_N_INSNS(cost);
6015 return false;
6018 if (mode == SImode)
6020 *total = COSTS_N_INSNS (4);
6021 return false;
6024 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6026 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6028 *total = COSTS_N_INSNS (1);
6029 return false;
6033 /* Requires a lib call */
6034 *total = COSTS_N_INSNS (20);
6035 return false;
6037 default:
6038 return arm_rtx_costs_1 (x, outer_code, total, speed);
6043 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
6044 so it can be ignored. */
6046 static bool
6047 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
6049 enum machine_mode mode = GET_MODE (x);
6051 if (TARGET_THUMB)
6053 *total = thumb1_rtx_costs (x, code, outer_code);
6054 return true;
6057 switch (code)
6059 case COMPARE:
6060 if (GET_CODE (XEXP (x, 0)) != MULT)
6061 return arm_rtx_costs_1 (x, outer_code, total, speed);
6063 /* A COMPARE of a MULT is slow on XScale; the muls instruction
6064 will stall until the multiplication is complete. */
6065 *total = COSTS_N_INSNS (3);
6066 return false;
6068 case MULT:
6069 /* There is no point basing this on the tuning, since it is always the
6070 fast variant if it exists at all. */
6071 if (mode == DImode
6072 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6073 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6074 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6076 *total = COSTS_N_INSNS (2);
6077 return false;
6081 if (mode == DImode)
6083 *total = COSTS_N_INSNS (5);
6084 return false;
6087 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6089 /* If operand 1 is a constant we can more accurately
6090 calculate the cost of the multiply. The multiplier can
6091 retire 15 bits on the first cycle and a further 12 on the
6092 second. We do, of course, have to load the constant into
6093 a register first. */
6094 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6095 /* There's a general overhead of one cycle. */
6096 int cost = 1;
6097 unsigned HOST_WIDE_INT masked_const;
6099 if (i & 0x80000000)
6100 i = ~i;
6102 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
6104 masked_const = i & 0xffff8000;
6105 if (masked_const != 0)
6107 cost++;
6108 masked_const = i & 0xf8000000;
6109 if (masked_const != 0)
6110 cost++;
6112 *total = COSTS_N_INSNS (cost);
6113 return false;
6116 if (mode == SImode)
6118 *total = COSTS_N_INSNS (3);
6119 return false;
6122 /* Requires a lib call */
6123 *total = COSTS_N_INSNS (20);
6124 return false;
6126 default:
6127 return arm_rtx_costs_1 (x, outer_code, total, speed);
6132 /* RTX costs for 9e (and later) cores. */
6134 static bool
6135 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6136 int *total, bool speed)
6138 enum machine_mode mode = GET_MODE (x);
6140 if (TARGET_THUMB1)
6142 switch (code)
6144 case MULT:
6145 *total = COSTS_N_INSNS (3);
6146 return true;
6148 default:
6149 *total = thumb1_rtx_costs (x, code, outer_code);
6150 return true;
6154 switch (code)
6156 case MULT:
6157 /* There is no point basing this on the tuning, since it is always the
6158 fast variant if it exists at all. */
6159 if (mode == DImode
6160 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6161 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6162 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6164 *total = COSTS_N_INSNS (2);
6165 return false;
6169 if (mode == DImode)
6171 *total = COSTS_N_INSNS (5);
6172 return false;
6175 if (mode == SImode)
6177 *total = COSTS_N_INSNS (2);
6178 return false;
6181 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6183 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6185 *total = COSTS_N_INSNS (1);
6186 return false;
6190 *total = COSTS_N_INSNS (20);
6191 return false;
6193 default:
6194 return arm_rtx_costs_1 (x, outer_code, total, speed);
6197 /* All address computations that can be done are free, but rtx cost returns
6198 the same for practically all of them. So we weight the different types
6199 of address here in the order (most pref first):
6200 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
6201 static inline int
6202 arm_arm_address_cost (rtx x)
6204 enum rtx_code c = GET_CODE (x);
6206 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
6207 return 0;
6208 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
6209 return 10;
6211 if (c == PLUS || c == MINUS)
6213 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6214 return 2;
6216 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
6217 return 3;
6219 return 4;
6222 return 6;
6225 static inline int
6226 arm_thumb_address_cost (rtx x)
6228 enum rtx_code c = GET_CODE (x);
6230 if (c == REG)
6231 return 1;
6232 if (c == PLUS
6233 && GET_CODE (XEXP (x, 0)) == REG
6234 && GET_CODE (XEXP (x, 1)) == CONST_INT)
6235 return 1;
6237 return 2;
6240 static int
6241 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
6243 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
6246 static int
6247 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
6249 rtx i_pat, d_pat;
6251 /* Some true dependencies can have a higher cost depending
6252 on precisely how certain input operands are used. */
6253 if (arm_tune_xscale
6254 && REG_NOTE_KIND (link) == 0
6255 && recog_memoized (insn) >= 0
6256 && recog_memoized (dep) >= 0)
6258 int shift_opnum = get_attr_shift (insn);
6259 enum attr_type attr_type = get_attr_type (dep);
6261 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
6262 operand for INSN. If we have a shifted input operand and the
6263 instruction we depend on is another ALU instruction, then we may
6264 have to account for an additional stall. */
6265 if (shift_opnum != 0
6266 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
6268 rtx shifted_operand;
6269 int opno;
6271 /* Get the shifted operand. */
6272 extract_insn (insn);
6273 shifted_operand = recog_data.operand[shift_opnum];
6275 /* Iterate over all the operands in DEP. If we write an operand
6276 that overlaps with SHIFTED_OPERAND, then we have increase the
6277 cost of this dependency. */
6278 extract_insn (dep);
6279 preprocess_constraints ();
6280 for (opno = 0; opno < recog_data.n_operands; opno++)
6282 /* We can ignore strict inputs. */
6283 if (recog_data.operand_type[opno] == OP_IN)
6284 continue;
6286 if (reg_overlap_mentioned_p (recog_data.operand[opno],
6287 shifted_operand))
6288 return 2;
6293 /* XXX This is not strictly true for the FPA. */
6294 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
6295 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
6296 return 0;
6298 /* Call insns don't incur a stall, even if they follow a load. */
6299 if (REG_NOTE_KIND (link) == 0
6300 && GET_CODE (insn) == CALL_INSN)
6301 return 1;
6303 if ((i_pat = single_set (insn)) != NULL
6304 && GET_CODE (SET_SRC (i_pat)) == MEM
6305 && (d_pat = single_set (dep)) != NULL
6306 && GET_CODE (SET_DEST (d_pat)) == MEM)
6308 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
6309 /* This is a load after a store, there is no conflict if the load reads
6310 from a cached area. Assume that loads from the stack, and from the
6311 constant pool are cached, and that others will miss. This is a
6312 hack. */
6314 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
6315 || reg_mentioned_p (stack_pointer_rtx, src_mem)
6316 || reg_mentioned_p (frame_pointer_rtx, src_mem)
6317 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
6318 return 1;
6321 return cost;
6324 static int fp_consts_inited = 0;
6326 /* Only zero is valid for VFP. Other values are also valid for FPA. */
6327 static const char * const strings_fp[8] =
6329 "0", "1", "2", "3",
6330 "4", "5", "0.5", "10"
6333 static REAL_VALUE_TYPE values_fp[8];
6335 static void
6336 init_fp_table (void)
6338 int i;
6339 REAL_VALUE_TYPE r;
6341 if (TARGET_VFP)
6342 fp_consts_inited = 1;
6343 else
6344 fp_consts_inited = 8;
6346 for (i = 0; i < fp_consts_inited; i++)
6348 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
6349 values_fp[i] = r;
6353 /* Return TRUE if rtx X is a valid immediate FP constant. */
6355 arm_const_double_rtx (rtx x)
6357 REAL_VALUE_TYPE r;
6358 int i;
6360 if (!fp_consts_inited)
6361 init_fp_table ();
6363 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6364 if (REAL_VALUE_MINUS_ZERO (r))
6365 return 0;
6367 for (i = 0; i < fp_consts_inited; i++)
6368 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6369 return 1;
6371 return 0;
6374 /* Return TRUE if rtx X is a valid immediate FPA constant. */
6376 neg_const_double_rtx_ok_for_fpa (rtx x)
6378 REAL_VALUE_TYPE r;
6379 int i;
6381 if (!fp_consts_inited)
6382 init_fp_table ();
6384 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6385 r = REAL_VALUE_NEGATE (r);
6386 if (REAL_VALUE_MINUS_ZERO (r))
6387 return 0;
6389 for (i = 0; i < 8; i++)
6390 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6391 return 1;
6393 return 0;
6397 /* VFPv3 has a fairly wide range of representable immediates, formed from
6398 "quarter-precision" floating-point values. These can be evaluated using this
6399 formula (with ^ for exponentiation):
6401 -1^s * n * 2^-r
6403 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
6404 16 <= n <= 31 and 0 <= r <= 7.
6406 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
6408 - A (most-significant) is the sign bit.
6409 - BCD are the exponent (encoded as r XOR 3).
6410 - EFGH are the mantissa (encoded as n - 16).
6413 /* Return an integer index for a VFPv3 immediate operand X suitable for the
6414 fconst[sd] instruction, or -1 if X isn't suitable. */
6415 static int
6416 vfp3_const_double_index (rtx x)
6418 REAL_VALUE_TYPE r, m;
6419 int sign, exponent;
6420 unsigned HOST_WIDE_INT mantissa, mant_hi;
6421 unsigned HOST_WIDE_INT mask;
6422 HOST_WIDE_INT m1, m2;
6423 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6425 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
6426 return -1;
6428 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6430 /* We can't represent these things, so detect them first. */
6431 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
6432 return -1;
6434 /* Extract sign, exponent and mantissa. */
6435 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6436 r = REAL_VALUE_ABS (r);
6437 exponent = REAL_EXP (&r);
6438 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6439 highest (sign) bit, with a fixed binary point at bit point_pos.
6440 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
6441 bits for the mantissa, this may fail (low bits would be lost). */
6442 real_ldexp (&m, &r, point_pos - exponent);
6443 REAL_VALUE_TO_INT (&m1, &m2, m);
6444 mantissa = m1;
6445 mant_hi = m2;
6447 /* If there are bits set in the low part of the mantissa, we can't
6448 represent this value. */
6449 if (mantissa != 0)
6450 return -1;
6452 /* Now make it so that mantissa contains the most-significant bits, and move
6453 the point_pos to indicate that the least-significant bits have been
6454 discarded. */
6455 point_pos -= HOST_BITS_PER_WIDE_INT;
6456 mantissa = mant_hi;
6458 /* We can permit four significant bits of mantissa only, plus a high bit
6459 which is always 1. */
6460 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6461 if ((mantissa & mask) != 0)
6462 return -1;
6464 /* Now we know the mantissa is in range, chop off the unneeded bits. */
6465 mantissa >>= point_pos - 5;
6467 /* The mantissa may be zero. Disallow that case. (It's possible to load the
6468 floating-point immediate zero with Neon using an integer-zero load, but
6469 that case is handled elsewhere.) */
6470 if (mantissa == 0)
6471 return -1;
6473 gcc_assert (mantissa >= 16 && mantissa <= 31);
6475 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
6476 normalized significands are in the range [1, 2). (Our mantissa is shifted
6477 left 4 places at this point relative to normalized IEEE754 values). GCC
6478 internally uses [0.5, 1) (see real.c), so the exponent returned from
6479 REAL_EXP must be altered. */
6480 exponent = 5 - exponent;
6482 if (exponent < 0 || exponent > 7)
6483 return -1;
6485 /* Sign, mantissa and exponent are now in the correct form to plug into the
6486 formula described in the comment above. */
6487 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
6490 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
6492 vfp3_const_double_rtx (rtx x)
6494 if (!TARGET_VFP3)
6495 return 0;
6497 return vfp3_const_double_index (x) != -1;
6500 /* Recognize immediates which can be used in various Neon instructions. Legal
6501 immediates are described by the following table (for VMVN variants, the
6502 bitwise inverse of the constant shown is recognized. In either case, VMOV
6503 is output and the correct instruction to use for a given constant is chosen
6504 by the assembler). The constant shown is replicated across all elements of
6505 the destination vector.
6507 insn elems variant constant (binary)
6508 ---- ----- ------- -----------------
6509 vmov i32 0 00000000 00000000 00000000 abcdefgh
6510 vmov i32 1 00000000 00000000 abcdefgh 00000000
6511 vmov i32 2 00000000 abcdefgh 00000000 00000000
6512 vmov i32 3 abcdefgh 00000000 00000000 00000000
6513 vmov i16 4 00000000 abcdefgh
6514 vmov i16 5 abcdefgh 00000000
6515 vmvn i32 6 00000000 00000000 00000000 abcdefgh
6516 vmvn i32 7 00000000 00000000 abcdefgh 00000000
6517 vmvn i32 8 00000000 abcdefgh 00000000 00000000
6518 vmvn i32 9 abcdefgh 00000000 00000000 00000000
6519 vmvn i16 10 00000000 abcdefgh
6520 vmvn i16 11 abcdefgh 00000000
6521 vmov i32 12 00000000 00000000 abcdefgh 11111111
6522 vmvn i32 13 00000000 00000000 abcdefgh 11111111
6523 vmov i32 14 00000000 abcdefgh 11111111 11111111
6524 vmvn i32 15 00000000 abcdefgh 11111111 11111111
6525 vmov i8 16 abcdefgh
6526 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
6527 eeeeeeee ffffffff gggggggg hhhhhhhh
6528 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
6530 For case 18, B = !b. Representable values are exactly those accepted by
6531 vfp3_const_double_index, but are output as floating-point numbers rather
6532 than indices.
6534 Variants 0-5 (inclusive) may also be used as immediates for the second
6535 operand of VORR/VBIC instructions.
6537 The INVERSE argument causes the bitwise inverse of the given operand to be
6538 recognized instead (used for recognizing legal immediates for the VAND/VORN
6539 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
6540 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
6541 output, rather than the real insns vbic/vorr).
6543 INVERSE makes no difference to the recognition of float vectors.
6545 The return value is the variant of immediate as shown in the above table, or
6546 -1 if the given value doesn't match any of the listed patterns.
6548 static int
6549 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6550 rtx *modconst, int *elementwidth)
6552 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
6553 matches = 1; \
6554 for (i = 0; i < idx; i += (STRIDE)) \
6555 if (!(TEST)) \
6556 matches = 0; \
6557 if (matches) \
6559 immtype = (CLASS); \
6560 elsize = (ELSIZE); \
6561 break; \
6564 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6565 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6566 unsigned char bytes[16];
6567 int immtype = -1, matches;
6568 unsigned int invmask = inverse ? 0xff : 0;
6570 /* Vectors of float constants. */
6571 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6573 rtx el0 = CONST_VECTOR_ELT (op, 0);
6574 REAL_VALUE_TYPE r0;
6576 if (!vfp3_const_double_rtx (el0))
6577 return -1;
6579 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
6581 for (i = 1; i < n_elts; i++)
6583 rtx elt = CONST_VECTOR_ELT (op, i);
6584 REAL_VALUE_TYPE re;
6586 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
6588 if (!REAL_VALUES_EQUAL (r0, re))
6589 return -1;
6592 if (modconst)
6593 *modconst = CONST_VECTOR_ELT (op, 0);
6595 if (elementwidth)
6596 *elementwidth = 0;
6598 return 18;
6601 /* Splat vector constant out into a byte vector. */
6602 for (i = 0; i < n_elts; i++)
6604 rtx el = CONST_VECTOR_ELT (op, i);
6605 unsigned HOST_WIDE_INT elpart;
6606 unsigned int part, parts;
6608 if (GET_CODE (el) == CONST_INT)
6610 elpart = INTVAL (el);
6611 parts = 1;
6613 else if (GET_CODE (el) == CONST_DOUBLE)
6615 elpart = CONST_DOUBLE_LOW (el);
6616 parts = 2;
6618 else
6619 gcc_unreachable ();
6621 for (part = 0; part < parts; part++)
6623 unsigned int byte;
6624 for (byte = 0; byte < innersize; byte++)
6626 bytes[idx++] = (elpart & 0xff) ^ invmask;
6627 elpart >>= BITS_PER_UNIT;
6629 if (GET_CODE (el) == CONST_DOUBLE)
6630 elpart = CONST_DOUBLE_HIGH (el);
6634 /* Sanity check. */
6635 gcc_assert (idx == GET_MODE_SIZE (mode));
6639 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6640 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6642 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6643 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6645 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6646 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6648 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6649 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6651 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6653 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6655 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6656 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6658 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6659 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6661 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6662 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6664 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6665 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6667 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6669 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6671 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6672 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6674 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6675 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6677 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6678 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6680 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6681 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6683 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6685 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6686 && bytes[i] == bytes[(i + 8) % idx]);
6688 while (0);
6690 if (immtype == -1)
6691 return -1;
6693 if (elementwidth)
6694 *elementwidth = elsize;
6696 if (modconst)
6698 unsigned HOST_WIDE_INT imm = 0;
6700 /* Un-invert bytes of recognized vector, if necessary. */
6701 if (invmask != 0)
6702 for (i = 0; i < idx; i++)
6703 bytes[i] ^= invmask;
6705 if (immtype == 17)
6707 /* FIXME: Broken on 32-bit H_W_I hosts. */
6708 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6710 for (i = 0; i < 8; i++)
6711 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6712 << (i * BITS_PER_UNIT);
6714 *modconst = GEN_INT (imm);
6716 else
6718 unsigned HOST_WIDE_INT imm = 0;
6720 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6721 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6723 *modconst = GEN_INT (imm);
6727 return immtype;
6728 #undef CHECK
6731 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6732 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6733 float elements), and a modified constant (whatever should be output for a
6734 VMOV) in *MODCONST. */
6737 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6738 rtx *modconst, int *elementwidth)
6740 rtx tmpconst;
6741 int tmpwidth;
6742 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6744 if (retval == -1)
6745 return 0;
6747 if (modconst)
6748 *modconst = tmpconst;
6750 if (elementwidth)
6751 *elementwidth = tmpwidth;
6753 return 1;
6756 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6757 the immediate is valid, write a constant suitable for using as an operand
6758 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6759 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6762 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6763 rtx *modconst, int *elementwidth)
6765 rtx tmpconst;
6766 int tmpwidth;
6767 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6769 if (retval < 0 || retval > 5)
6770 return 0;
6772 if (modconst)
6773 *modconst = tmpconst;
6775 if (elementwidth)
6776 *elementwidth = tmpwidth;
6778 return 1;
6781 /* Return a string suitable for output of Neon immediate logic operation
6782 MNEM. */
6784 char *
6785 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6786 int inverse, int quad)
6788 int width, is_valid;
6789 static char templ[40];
6791 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6793 gcc_assert (is_valid != 0);
6795 if (quad)
6796 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6797 else
6798 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6800 return templ;
6803 /* Output a sequence of pairwise operations to implement a reduction.
6804 NOTE: We do "too much work" here, because pairwise operations work on two
6805 registers-worth of operands in one go. Unfortunately we can't exploit those
6806 extra calculations to do the full operation in fewer steps, I don't think.
6807 Although all vector elements of the result but the first are ignored, we
6808 actually calculate the same result in each of the elements. An alternative
6809 such as initially loading a vector with zero to use as each of the second
6810 operands would use up an additional register and take an extra instruction,
6811 for no particular gain. */
6813 void
6814 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6815 rtx (*reduc) (rtx, rtx, rtx))
6817 enum machine_mode inner = GET_MODE_INNER (mode);
6818 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6819 rtx tmpsum = op1;
6821 for (i = parts / 2; i >= 1; i /= 2)
6823 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6824 emit_insn (reduc (dest, tmpsum, tmpsum));
6825 tmpsum = dest;
6829 /* Initialize a vector with non-constant elements. FIXME: We can do better
6830 than the current implementation (building a vector on the stack and then
6831 loading it) in many cases. See rs6000.c. */
6833 void
6834 neon_expand_vector_init (rtx target, rtx vals)
6836 enum machine_mode mode = GET_MODE (target);
6837 enum machine_mode inner = GET_MODE_INNER (mode);
6838 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6839 rtx mem;
6841 gcc_assert (VECTOR_MODE_P (mode));
6843 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6844 for (i = 0; i < n_elts; i++)
6845 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6846 XVECEXP (vals, 0, i));
6848 emit_move_insn (target, mem);
6851 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6852 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6853 reported source locations are bogus. */
6855 static void
6856 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6857 const char *err)
6859 HOST_WIDE_INT lane;
6861 gcc_assert (GET_CODE (operand) == CONST_INT);
6863 lane = INTVAL (operand);
6865 if (lane < low || lane >= high)
6866 error (err);
6869 /* Bounds-check lanes. */
6871 void
6872 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6874 bounds_check (operand, low, high, "lane out of range");
6877 /* Bounds-check constants. */
6879 void
6880 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6882 bounds_check (operand, low, high, "constant out of range");
6885 HOST_WIDE_INT
6886 neon_element_bits (enum machine_mode mode)
6888 if (mode == DImode)
6889 return GET_MODE_BITSIZE (mode);
6890 else
6891 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6895 /* Predicates for `match_operand' and `match_operator'. */
6897 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6899 cirrus_memory_offset (rtx op)
6901 /* Reject eliminable registers. */
6902 if (! (reload_in_progress || reload_completed)
6903 && ( reg_mentioned_p (frame_pointer_rtx, op)
6904 || reg_mentioned_p (arg_pointer_rtx, op)
6905 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6906 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6907 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6908 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6909 return 0;
6911 if (GET_CODE (op) == MEM)
6913 rtx ind;
6915 ind = XEXP (op, 0);
6917 /* Match: (mem (reg)). */
6918 if (GET_CODE (ind) == REG)
6919 return 1;
6921 /* Match:
6922 (mem (plus (reg)
6923 (const))). */
6924 if (GET_CODE (ind) == PLUS
6925 && GET_CODE (XEXP (ind, 0)) == REG
6926 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6927 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6928 return 1;
6931 return 0;
6934 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6935 WB is true if full writeback address modes are allowed and is false
6936 if limited writeback address modes (POST_INC and PRE_DEC) are
6937 allowed. */
6940 arm_coproc_mem_operand (rtx op, bool wb)
6942 rtx ind;
6944 /* Reject eliminable registers. */
6945 if (! (reload_in_progress || reload_completed)
6946 && ( reg_mentioned_p (frame_pointer_rtx, op)
6947 || reg_mentioned_p (arg_pointer_rtx, op)
6948 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6949 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6950 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6951 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6952 return FALSE;
6954 /* Constants are converted into offsets from labels. */
6955 if (GET_CODE (op) != MEM)
6956 return FALSE;
6958 ind = XEXP (op, 0);
6960 if (reload_completed
6961 && (GET_CODE (ind) == LABEL_REF
6962 || (GET_CODE (ind) == CONST
6963 && GET_CODE (XEXP (ind, 0)) == PLUS
6964 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6965 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6966 return TRUE;
6968 /* Match: (mem (reg)). */
6969 if (GET_CODE (ind) == REG)
6970 return arm_address_register_rtx_p (ind, 0);
6972 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6973 acceptable in any case (subject to verification by
6974 arm_address_register_rtx_p). We need WB to be true to accept
6975 PRE_INC and POST_DEC. */
6976 if (GET_CODE (ind) == POST_INC
6977 || GET_CODE (ind) == PRE_DEC
6978 || (wb
6979 && (GET_CODE (ind) == PRE_INC
6980 || GET_CODE (ind) == POST_DEC)))
6981 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6983 if (wb
6984 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6985 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6986 && GET_CODE (XEXP (ind, 1)) == PLUS
6987 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6988 ind = XEXP (ind, 1);
6990 /* Match:
6991 (plus (reg)
6992 (const)). */
6993 if (GET_CODE (ind) == PLUS
6994 && GET_CODE (XEXP (ind, 0)) == REG
6995 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6996 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6997 && INTVAL (XEXP (ind, 1)) > -1024
6998 && INTVAL (XEXP (ind, 1)) < 1024
6999 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7000 return TRUE;
7002 return FALSE;
7005 /* Return TRUE if OP is a memory operand which we can load or store a vector
7006 to/from. TYPE is one of the following values:
7007 0 - Vector load/stor (vldr)
7008 1 - Core registers (ldm)
7009 2 - Element/structure loads (vld1)
7012 neon_vector_mem_operand (rtx op, int type)
7014 rtx ind;
7016 /* Reject eliminable registers. */
7017 if (! (reload_in_progress || reload_completed)
7018 && ( reg_mentioned_p (frame_pointer_rtx, op)
7019 || reg_mentioned_p (arg_pointer_rtx, op)
7020 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7021 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7022 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7023 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7024 return FALSE;
7026 /* Constants are converted into offsets from labels. */
7027 if (GET_CODE (op) != MEM)
7028 return FALSE;
7030 ind = XEXP (op, 0);
7032 if (reload_completed
7033 && (GET_CODE (ind) == LABEL_REF
7034 || (GET_CODE (ind) == CONST
7035 && GET_CODE (XEXP (ind, 0)) == PLUS
7036 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7037 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7038 return TRUE;
7040 /* Match: (mem (reg)). */
7041 if (GET_CODE (ind) == REG)
7042 return arm_address_register_rtx_p (ind, 0);
7044 /* Allow post-increment with Neon registers. */
7045 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
7046 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7048 /* FIXME: vld1 allows register post-modify. */
7050 /* Match:
7051 (plus (reg)
7052 (const)). */
7053 if (type == 0
7054 && GET_CODE (ind) == PLUS
7055 && GET_CODE (XEXP (ind, 0)) == REG
7056 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7057 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7058 && INTVAL (XEXP (ind, 1)) > -1024
7059 && INTVAL (XEXP (ind, 1)) < 1016
7060 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7061 return TRUE;
7063 return FALSE;
7066 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
7067 type. */
7069 neon_struct_mem_operand (rtx op)
7071 rtx ind;
7073 /* Reject eliminable registers. */
7074 if (! (reload_in_progress || reload_completed)
7075 && ( reg_mentioned_p (frame_pointer_rtx, op)
7076 || reg_mentioned_p (arg_pointer_rtx, op)
7077 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7078 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7079 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7080 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7081 return FALSE;
7083 /* Constants are converted into offsets from labels. */
7084 if (GET_CODE (op) != MEM)
7085 return FALSE;
7087 ind = XEXP (op, 0);
7089 if (reload_completed
7090 && (GET_CODE (ind) == LABEL_REF
7091 || (GET_CODE (ind) == CONST
7092 && GET_CODE (XEXP (ind, 0)) == PLUS
7093 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7094 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7095 return TRUE;
7097 /* Match: (mem (reg)). */
7098 if (GET_CODE (ind) == REG)
7099 return arm_address_register_rtx_p (ind, 0);
7101 return FALSE;
7104 /* Return true if X is a register that will be eliminated later on. */
7106 arm_eliminable_register (rtx x)
7108 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
7109 || REGNO (x) == ARG_POINTER_REGNUM
7110 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
7111 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
7114 /* Return GENERAL_REGS if a scratch register required to reload x to/from
7115 coprocessor registers. Otherwise return NO_REGS. */
7117 enum reg_class
7118 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
7120 if (TARGET_NEON
7121 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7122 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7123 && neon_vector_mem_operand (x, 0))
7124 return NO_REGS;
7126 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
7127 return NO_REGS;
7129 return GENERAL_REGS;
7132 /* Values which must be returned in the most-significant end of the return
7133 register. */
7135 static bool
7136 arm_return_in_msb (const_tree valtype)
7138 return (TARGET_AAPCS_BASED
7139 && BYTES_BIG_ENDIAN
7140 && (AGGREGATE_TYPE_P (valtype)
7141 || TREE_CODE (valtype) == COMPLEX_TYPE));
7144 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
7145 Use by the Cirrus Maverick code which has to workaround
7146 a hardware bug triggered by such instructions. */
7147 static bool
7148 arm_memory_load_p (rtx insn)
7150 rtx body, lhs, rhs;;
7152 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
7153 return false;
7155 body = PATTERN (insn);
7157 if (GET_CODE (body) != SET)
7158 return false;
7160 lhs = XEXP (body, 0);
7161 rhs = XEXP (body, 1);
7163 lhs = REG_OR_SUBREG_RTX (lhs);
7165 /* If the destination is not a general purpose
7166 register we do not have to worry. */
7167 if (GET_CODE (lhs) != REG
7168 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
7169 return false;
7171 /* As well as loads from memory we also have to react
7172 to loads of invalid constants which will be turned
7173 into loads from the minipool. */
7174 return (GET_CODE (rhs) == MEM
7175 || GET_CODE (rhs) == SYMBOL_REF
7176 || note_invalid_constants (insn, -1, false));
7179 /* Return TRUE if INSN is a Cirrus instruction. */
7180 static bool
7181 arm_cirrus_insn_p (rtx insn)
7183 enum attr_cirrus attr;
7185 /* get_attr cannot accept USE or CLOBBER. */
7186 if (!insn
7187 || GET_CODE (insn) != INSN
7188 || GET_CODE (PATTERN (insn)) == USE
7189 || GET_CODE (PATTERN (insn)) == CLOBBER)
7190 return 0;
7192 attr = get_attr_cirrus (insn);
7194 return attr != CIRRUS_NOT;
7197 /* Cirrus reorg for invalid instruction combinations. */
7198 static void
7199 cirrus_reorg (rtx first)
7201 enum attr_cirrus attr;
7202 rtx body = PATTERN (first);
7203 rtx t;
7204 int nops;
7206 /* Any branch must be followed by 2 non Cirrus instructions. */
7207 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
7209 nops = 0;
7210 t = next_nonnote_insn (first);
7212 if (arm_cirrus_insn_p (t))
7213 ++ nops;
7215 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7216 ++ nops;
7218 while (nops --)
7219 emit_insn_after (gen_nop (), first);
7221 return;
7224 /* (float (blah)) is in parallel with a clobber. */
7225 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
7226 body = XVECEXP (body, 0, 0);
7228 if (GET_CODE (body) == SET)
7230 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
7232 /* cfldrd, cfldr64, cfstrd, cfstr64 must
7233 be followed by a non Cirrus insn. */
7234 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
7236 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
7237 emit_insn_after (gen_nop (), first);
7239 return;
7241 else if (arm_memory_load_p (first))
7243 unsigned int arm_regno;
7245 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
7246 ldr/cfmv64hr combination where the Rd field is the same
7247 in both instructions must be split with a non Cirrus
7248 insn. Example:
7250 ldr r0, blah
7252 cfmvsr mvf0, r0. */
7254 /* Get Arm register number for ldr insn. */
7255 if (GET_CODE (lhs) == REG)
7256 arm_regno = REGNO (lhs);
7257 else
7259 gcc_assert (GET_CODE (rhs) == REG);
7260 arm_regno = REGNO (rhs);
7263 /* Next insn. */
7264 first = next_nonnote_insn (first);
7266 if (! arm_cirrus_insn_p (first))
7267 return;
7269 body = PATTERN (first);
7271 /* (float (blah)) is in parallel with a clobber. */
7272 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
7273 body = XVECEXP (body, 0, 0);
7275 if (GET_CODE (body) == FLOAT)
7276 body = XEXP (body, 0);
7278 if (get_attr_cirrus (first) == CIRRUS_MOVE
7279 && GET_CODE (XEXP (body, 1)) == REG
7280 && arm_regno == REGNO (XEXP (body, 1)))
7281 emit_insn_after (gen_nop (), first);
7283 return;
7287 /* get_attr cannot accept USE or CLOBBER. */
7288 if (!first
7289 || GET_CODE (first) != INSN
7290 || GET_CODE (PATTERN (first)) == USE
7291 || GET_CODE (PATTERN (first)) == CLOBBER)
7292 return;
7294 attr = get_attr_cirrus (first);
7296 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
7297 must be followed by a non-coprocessor instruction. */
7298 if (attr == CIRRUS_COMPARE)
7300 nops = 0;
7302 t = next_nonnote_insn (first);
7304 if (arm_cirrus_insn_p (t))
7305 ++ nops;
7307 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7308 ++ nops;
7310 while (nops --)
7311 emit_insn_after (gen_nop (), first);
7313 return;
7317 /* Return TRUE if X references a SYMBOL_REF. */
7319 symbol_mentioned_p (rtx x)
7321 const char * fmt;
7322 int i;
7324 if (GET_CODE (x) == SYMBOL_REF)
7325 return 1;
7327 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
7328 are constant offsets, not symbols. */
7329 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7330 return 0;
7332 fmt = GET_RTX_FORMAT (GET_CODE (x));
7334 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7336 if (fmt[i] == 'E')
7338 int j;
7340 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7341 if (symbol_mentioned_p (XVECEXP (x, i, j)))
7342 return 1;
7344 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
7345 return 1;
7348 return 0;
7351 /* Return TRUE if X references a LABEL_REF. */
7353 label_mentioned_p (rtx x)
7355 const char * fmt;
7356 int i;
7358 if (GET_CODE (x) == LABEL_REF)
7359 return 1;
7361 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
7362 instruction, but they are constant offsets, not symbols. */
7363 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7364 return 0;
7366 fmt = GET_RTX_FORMAT (GET_CODE (x));
7367 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7369 if (fmt[i] == 'E')
7371 int j;
7373 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7374 if (label_mentioned_p (XVECEXP (x, i, j)))
7375 return 1;
7377 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
7378 return 1;
7381 return 0;
7385 tls_mentioned_p (rtx x)
7387 switch (GET_CODE (x))
7389 case CONST:
7390 return tls_mentioned_p (XEXP (x, 0));
7392 case UNSPEC:
7393 if (XINT (x, 1) == UNSPEC_TLS)
7394 return 1;
7396 default:
7397 return 0;
7401 /* Must not copy a SET whose source operand is PC-relative. */
7403 static bool
7404 arm_cannot_copy_insn_p (rtx insn)
7406 rtx pat = PATTERN (insn);
7408 if (GET_CODE (pat) == SET)
7410 rtx rhs = SET_SRC (pat);
7412 if (GET_CODE (rhs) == UNSPEC
7413 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
7414 return TRUE;
7416 if (GET_CODE (rhs) == MEM
7417 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
7418 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
7419 return TRUE;
7422 return FALSE;
7425 enum rtx_code
7426 minmax_code (rtx x)
7428 enum rtx_code code = GET_CODE (x);
7430 switch (code)
7432 case SMAX:
7433 return GE;
7434 case SMIN:
7435 return LE;
7436 case UMIN:
7437 return LEU;
7438 case UMAX:
7439 return GEU;
7440 default:
7441 gcc_unreachable ();
7445 /* Return 1 if memory locations are adjacent. */
7447 adjacent_mem_locations (rtx a, rtx b)
7449 /* We don't guarantee to preserve the order of these memory refs. */
7450 if (volatile_refs_p (a) || volatile_refs_p (b))
7451 return 0;
7453 if ((GET_CODE (XEXP (a, 0)) == REG
7454 || (GET_CODE (XEXP (a, 0)) == PLUS
7455 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
7456 && (GET_CODE (XEXP (b, 0)) == REG
7457 || (GET_CODE (XEXP (b, 0)) == PLUS
7458 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
7460 HOST_WIDE_INT val0 = 0, val1 = 0;
7461 rtx reg0, reg1;
7462 int val_diff;
7464 if (GET_CODE (XEXP (a, 0)) == PLUS)
7466 reg0 = XEXP (XEXP (a, 0), 0);
7467 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
7469 else
7470 reg0 = XEXP (a, 0);
7472 if (GET_CODE (XEXP (b, 0)) == PLUS)
7474 reg1 = XEXP (XEXP (b, 0), 0);
7475 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
7477 else
7478 reg1 = XEXP (b, 0);
7480 /* Don't accept any offset that will require multiple
7481 instructions to handle, since this would cause the
7482 arith_adjacentmem pattern to output an overlong sequence. */
7483 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
7484 return 0;
7486 /* Don't allow an eliminable register: register elimination can make
7487 the offset too large. */
7488 if (arm_eliminable_register (reg0))
7489 return 0;
7491 val_diff = val1 - val0;
7493 if (arm_ld_sched)
7495 /* If the target has load delay slots, then there's no benefit
7496 to using an ldm instruction unless the offset is zero and
7497 we are optimizing for size. */
7498 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
7499 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
7500 && (val_diff == 4 || val_diff == -4));
7503 return ((REGNO (reg0) == REGNO (reg1))
7504 && (val_diff == 4 || val_diff == -4));
7507 return 0;
7511 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7512 HOST_WIDE_INT *load_offset)
7514 int unsorted_regs[4];
7515 HOST_WIDE_INT unsorted_offsets[4];
7516 int order[4];
7517 int base_reg = -1;
7518 int i;
7520 /* Can only handle 2, 3, or 4 insns at present,
7521 though could be easily extended if required. */
7522 gcc_assert (nops >= 2 && nops <= 4);
7524 memset (order, 0, 4 * sizeof (int));
7526 /* Loop over the operands and check that the memory references are
7527 suitable (i.e. immediate offsets from the same base register). At
7528 the same time, extract the target register, and the memory
7529 offsets. */
7530 for (i = 0; i < nops; i++)
7532 rtx reg;
7533 rtx offset;
7535 /* Convert a subreg of a mem into the mem itself. */
7536 if (GET_CODE (operands[nops + i]) == SUBREG)
7537 operands[nops + i] = alter_subreg (operands + (nops + i));
7539 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7541 /* Don't reorder volatile memory references; it doesn't seem worth
7542 looking for the case where the order is ok anyway. */
7543 if (MEM_VOLATILE_P (operands[nops + i]))
7544 return 0;
7546 offset = const0_rtx;
7548 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7549 || (GET_CODE (reg) == SUBREG
7550 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7551 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7552 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7553 == REG)
7554 || (GET_CODE (reg) == SUBREG
7555 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7556 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7557 == CONST_INT)))
7559 if (i == 0)
7561 base_reg = REGNO (reg);
7562 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7563 ? REGNO (operands[i])
7564 : REGNO (SUBREG_REG (operands[i])));
7565 order[0] = 0;
7567 else
7569 if (base_reg != (int) REGNO (reg))
7570 /* Not addressed from the same base register. */
7571 return 0;
7573 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7574 ? REGNO (operands[i])
7575 : REGNO (SUBREG_REG (operands[i])));
7576 if (unsorted_regs[i] < unsorted_regs[order[0]])
7577 order[0] = i;
7580 /* If it isn't an integer register, or if it overwrites the
7581 base register but isn't the last insn in the list, then
7582 we can't do this. */
7583 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
7584 || (i != nops - 1 && unsorted_regs[i] == base_reg))
7585 return 0;
7587 unsorted_offsets[i] = INTVAL (offset);
7589 else
7590 /* Not a suitable memory address. */
7591 return 0;
7594 /* All the useful information has now been extracted from the
7595 operands into unsorted_regs and unsorted_offsets; additionally,
7596 order[0] has been set to the lowest numbered register in the
7597 list. Sort the registers into order, and check that the memory
7598 offsets are ascending and adjacent. */
7600 for (i = 1; i < nops; i++)
7602 int j;
7604 order[i] = order[i - 1];
7605 for (j = 0; j < nops; j++)
7606 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7607 && (order[i] == order[i - 1]
7608 || unsorted_regs[j] < unsorted_regs[order[i]]))
7609 order[i] = j;
7611 /* Have we found a suitable register? if not, one must be used more
7612 than once. */
7613 if (order[i] == order[i - 1])
7614 return 0;
7616 /* Is the memory address adjacent and ascending? */
7617 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7618 return 0;
7621 if (base)
7623 *base = base_reg;
7625 for (i = 0; i < nops; i++)
7626 regs[i] = unsorted_regs[order[i]];
7628 *load_offset = unsorted_offsets[order[0]];
7631 if (unsorted_offsets[order[0]] == 0)
7632 return 1; /* ldmia */
7634 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7635 return 2; /* ldmib */
7637 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7638 return 3; /* ldmda */
7640 if (unsorted_offsets[order[nops - 1]] == -4)
7641 return 4; /* ldmdb */
7643 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7644 if the offset isn't small enough. The reason 2 ldrs are faster
7645 is because these ARMs are able to do more than one cache access
7646 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7647 whilst the ARM8 has a double bandwidth cache. This means that
7648 these cores can do both an instruction fetch and a data fetch in
7649 a single cycle, so the trick of calculating the address into a
7650 scratch register (one of the result regs) and then doing a load
7651 multiple actually becomes slower (and no smaller in code size).
7652 That is the transformation
7654 ldr rd1, [rbase + offset]
7655 ldr rd2, [rbase + offset + 4]
7659 add rd1, rbase, offset
7660 ldmia rd1, {rd1, rd2}
7662 produces worse code -- '3 cycles + any stalls on rd2' instead of
7663 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7664 access per cycle, the first sequence could never complete in less
7665 than 6 cycles, whereas the ldm sequence would only take 5 and
7666 would make better use of sequential accesses if not hitting the
7667 cache.
7669 We cheat here and test 'arm_ld_sched' which we currently know to
7670 only be true for the ARM8, ARM9 and StrongARM. If this ever
7671 changes, then the test below needs to be reworked. */
7672 if (nops == 2 && arm_ld_sched)
7673 return 0;
7675 /* Can't do it without setting up the offset, only do this if it takes
7676 no more than one insn. */
7677 return (const_ok_for_arm (unsorted_offsets[order[0]])
7678 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7681 const char *
7682 emit_ldm_seq (rtx *operands, int nops)
7684 int regs[4];
7685 int base_reg;
7686 HOST_WIDE_INT offset;
7687 char buf[100];
7688 int i;
7690 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7692 case 1:
7693 strcpy (buf, "ldm%(ia%)\t");
7694 break;
7696 case 2:
7697 strcpy (buf, "ldm%(ib%)\t");
7698 break;
7700 case 3:
7701 strcpy (buf, "ldm%(da%)\t");
7702 break;
7704 case 4:
7705 strcpy (buf, "ldm%(db%)\t");
7706 break;
7708 case 5:
7709 if (offset >= 0)
7710 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7711 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7712 (long) offset);
7713 else
7714 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7715 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7716 (long) -offset);
7717 output_asm_insn (buf, operands);
7718 base_reg = regs[0];
7719 strcpy (buf, "ldm%(ia%)\t");
7720 break;
7722 default:
7723 gcc_unreachable ();
7726 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7727 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7729 for (i = 1; i < nops; i++)
7730 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7731 reg_names[regs[i]]);
7733 strcat (buf, "}\t%@ phole ldm");
7735 output_asm_insn (buf, operands);
7736 return "";
7740 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7741 HOST_WIDE_INT * load_offset)
7743 int unsorted_regs[4];
7744 HOST_WIDE_INT unsorted_offsets[4];
7745 int order[4];
7746 int base_reg = -1;
7747 int i;
7749 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7750 extended if required. */
7751 gcc_assert (nops >= 2 && nops <= 4);
7753 memset (order, 0, 4 * sizeof (int));
7755 /* Loop over the operands and check that the memory references are
7756 suitable (i.e. immediate offsets from the same base register). At
7757 the same time, extract the target register, and the memory
7758 offsets. */
7759 for (i = 0; i < nops; i++)
7761 rtx reg;
7762 rtx offset;
7764 /* Convert a subreg of a mem into the mem itself. */
7765 if (GET_CODE (operands[nops + i]) == SUBREG)
7766 operands[nops + i] = alter_subreg (operands + (nops + i));
7768 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7770 /* Don't reorder volatile memory references; it doesn't seem worth
7771 looking for the case where the order is ok anyway. */
7772 if (MEM_VOLATILE_P (operands[nops + i]))
7773 return 0;
7775 offset = const0_rtx;
7777 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7778 || (GET_CODE (reg) == SUBREG
7779 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7780 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7781 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7782 == REG)
7783 || (GET_CODE (reg) == SUBREG
7784 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7785 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7786 == CONST_INT)))
7788 if (i == 0)
7790 base_reg = REGNO (reg);
7791 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7792 ? REGNO (operands[i])
7793 : REGNO (SUBREG_REG (operands[i])));
7794 order[0] = 0;
7796 else
7798 if (base_reg != (int) REGNO (reg))
7799 /* Not addressed from the same base register. */
7800 return 0;
7802 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7803 ? REGNO (operands[i])
7804 : REGNO (SUBREG_REG (operands[i])));
7805 if (unsorted_regs[i] < unsorted_regs[order[0]])
7806 order[0] = i;
7809 /* If it isn't an integer register, then we can't do this. */
7810 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7811 return 0;
7813 unsorted_offsets[i] = INTVAL (offset);
7815 else
7816 /* Not a suitable memory address. */
7817 return 0;
7820 /* All the useful information has now been extracted from the
7821 operands into unsorted_regs and unsorted_offsets; additionally,
7822 order[0] has been set to the lowest numbered register in the
7823 list. Sort the registers into order, and check that the memory
7824 offsets are ascending and adjacent. */
7826 for (i = 1; i < nops; i++)
7828 int j;
7830 order[i] = order[i - 1];
7831 for (j = 0; j < nops; j++)
7832 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7833 && (order[i] == order[i - 1]
7834 || unsorted_regs[j] < unsorted_regs[order[i]]))
7835 order[i] = j;
7837 /* Have we found a suitable register? if not, one must be used more
7838 than once. */
7839 if (order[i] == order[i - 1])
7840 return 0;
7842 /* Is the memory address adjacent and ascending? */
7843 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7844 return 0;
7847 if (base)
7849 *base = base_reg;
7851 for (i = 0; i < nops; i++)
7852 regs[i] = unsorted_regs[order[i]];
7854 *load_offset = unsorted_offsets[order[0]];
7857 if (unsorted_offsets[order[0]] == 0)
7858 return 1; /* stmia */
7860 if (unsorted_offsets[order[0]] == 4)
7861 return 2; /* stmib */
7863 if (unsorted_offsets[order[nops - 1]] == 0)
7864 return 3; /* stmda */
7866 if (unsorted_offsets[order[nops - 1]] == -4)
7867 return 4; /* stmdb */
7869 return 0;
7872 const char *
7873 emit_stm_seq (rtx *operands, int nops)
7875 int regs[4];
7876 int base_reg;
7877 HOST_WIDE_INT offset;
7878 char buf[100];
7879 int i;
7881 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7883 case 1:
7884 strcpy (buf, "stm%(ia%)\t");
7885 break;
7887 case 2:
7888 strcpy (buf, "stm%(ib%)\t");
7889 break;
7891 case 3:
7892 strcpy (buf, "stm%(da%)\t");
7893 break;
7895 case 4:
7896 strcpy (buf, "stm%(db%)\t");
7897 break;
7899 default:
7900 gcc_unreachable ();
7903 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7904 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7906 for (i = 1; i < nops; i++)
7907 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7908 reg_names[regs[i]]);
7910 strcat (buf, "}\t%@ phole stm");
7912 output_asm_insn (buf, operands);
7913 return "";
7916 /* Routines for use in generating RTL. */
7919 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7920 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7922 HOST_WIDE_INT offset = *offsetp;
7923 int i = 0, j;
7924 rtx result;
7925 int sign = up ? 1 : -1;
7926 rtx mem, addr;
7928 /* XScale has load-store double instructions, but they have stricter
7929 alignment requirements than load-store multiple, so we cannot
7930 use them.
7932 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7933 the pipeline until completion.
7935 NREGS CYCLES
7941 An ldr instruction takes 1-3 cycles, but does not block the
7942 pipeline.
7944 NREGS CYCLES
7945 1 1-3
7946 2 2-6
7947 3 3-9
7948 4 4-12
7950 Best case ldr will always win. However, the more ldr instructions
7951 we issue, the less likely we are to be able to schedule them well.
7952 Using ldr instructions also increases code size.
7954 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7955 for counts of 3 or 4 regs. */
7956 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7958 rtx seq;
7960 start_sequence ();
7962 for (i = 0; i < count; i++)
7964 addr = plus_constant (from, i * 4 * sign);
7965 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7966 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7967 offset += 4 * sign;
7970 if (write_back)
7972 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7973 *offsetp = offset;
7976 seq = get_insns ();
7977 end_sequence ();
7979 return seq;
7982 result = gen_rtx_PARALLEL (VOIDmode,
7983 rtvec_alloc (count + (write_back ? 1 : 0)));
7984 if (write_back)
7986 XVECEXP (result, 0, 0)
7987 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7988 i = 1;
7989 count++;
7992 for (j = 0; i < count; i++, j++)
7994 addr = plus_constant (from, j * 4 * sign);
7995 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7996 XVECEXP (result, 0, i)
7997 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7998 offset += 4 * sign;
8001 if (write_back)
8002 *offsetp = offset;
8004 return result;
8008 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
8009 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
8011 HOST_WIDE_INT offset = *offsetp;
8012 int i = 0, j;
8013 rtx result;
8014 int sign = up ? 1 : -1;
8015 rtx mem, addr;
8017 /* See arm_gen_load_multiple for discussion of
8018 the pros/cons of ldm/stm usage for XScale. */
8019 if (arm_tune_xscale && count <= 2 && ! optimize_size)
8021 rtx seq;
8023 start_sequence ();
8025 for (i = 0; i < count; i++)
8027 addr = plus_constant (to, i * 4 * sign);
8028 mem = adjust_automodify_address (basemem, SImode, addr, offset);
8029 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
8030 offset += 4 * sign;
8033 if (write_back)
8035 emit_move_insn (to, plus_constant (to, count * 4 * sign));
8036 *offsetp = offset;
8039 seq = get_insns ();
8040 end_sequence ();
8042 return seq;
8045 result = gen_rtx_PARALLEL (VOIDmode,
8046 rtvec_alloc (count + (write_back ? 1 : 0)));
8047 if (write_back)
8049 XVECEXP (result, 0, 0)
8050 = gen_rtx_SET (VOIDmode, to,
8051 plus_constant (to, count * 4 * sign));
8052 i = 1;
8053 count++;
8056 for (j = 0; i < count; i++, j++)
8058 addr = plus_constant (to, j * 4 * sign);
8059 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8060 XVECEXP (result, 0, i)
8061 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
8062 offset += 4 * sign;
8065 if (write_back)
8066 *offsetp = offset;
8068 return result;
8072 arm_gen_movmemqi (rtx *operands)
8074 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
8075 HOST_WIDE_INT srcoffset, dstoffset;
8076 int i;
8077 rtx src, dst, srcbase, dstbase;
8078 rtx part_bytes_reg = NULL;
8079 rtx mem;
8081 if (GET_CODE (operands[2]) != CONST_INT
8082 || GET_CODE (operands[3]) != CONST_INT
8083 || INTVAL (operands[2]) > 64
8084 || INTVAL (operands[3]) & 3)
8085 return 0;
8087 dstbase = operands[0];
8088 srcbase = operands[1];
8090 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
8091 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
8093 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
8094 out_words_to_go = INTVAL (operands[2]) / 4;
8095 last_bytes = INTVAL (operands[2]) & 3;
8096 dstoffset = srcoffset = 0;
8098 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
8099 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
8101 for (i = 0; in_words_to_go >= 2; i+=4)
8103 if (in_words_to_go > 4)
8104 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
8105 srcbase, &srcoffset));
8106 else
8107 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
8108 FALSE, srcbase, &srcoffset));
8110 if (out_words_to_go)
8112 if (out_words_to_go > 4)
8113 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
8114 dstbase, &dstoffset));
8115 else if (out_words_to_go != 1)
8116 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
8117 dst, TRUE,
8118 (last_bytes == 0
8119 ? FALSE : TRUE),
8120 dstbase, &dstoffset));
8121 else
8123 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8124 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
8125 if (last_bytes != 0)
8127 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
8128 dstoffset += 4;
8133 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
8134 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
8137 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
8138 if (out_words_to_go)
8140 rtx sreg;
8142 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8143 sreg = copy_to_reg (mem);
8145 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8146 emit_move_insn (mem, sreg);
8147 in_words_to_go--;
8149 gcc_assert (!in_words_to_go); /* Sanity check */
8152 if (in_words_to_go)
8154 gcc_assert (in_words_to_go > 0);
8156 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8157 part_bytes_reg = copy_to_mode_reg (SImode, mem);
8160 gcc_assert (!last_bytes || part_bytes_reg);
8162 if (BYTES_BIG_ENDIAN && last_bytes)
8164 rtx tmp = gen_reg_rtx (SImode);
8166 /* The bytes we want are in the top end of the word. */
8167 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
8168 GEN_INT (8 * (4 - last_bytes))));
8169 part_bytes_reg = tmp;
8171 while (last_bytes)
8173 mem = adjust_automodify_address (dstbase, QImode,
8174 plus_constant (dst, last_bytes - 1),
8175 dstoffset + last_bytes - 1);
8176 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8178 if (--last_bytes)
8180 tmp = gen_reg_rtx (SImode);
8181 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
8182 part_bytes_reg = tmp;
8187 else
8189 if (last_bytes > 1)
8191 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
8192 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
8193 last_bytes -= 2;
8194 if (last_bytes)
8196 rtx tmp = gen_reg_rtx (SImode);
8197 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
8198 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
8199 part_bytes_reg = tmp;
8200 dstoffset += 2;
8204 if (last_bytes)
8206 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
8207 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8211 return 1;
8214 /* Select a dominance comparison mode if possible for a test of the general
8215 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
8216 COND_OR == DOM_CC_X_AND_Y => (X && Y)
8217 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
8218 COND_OR == DOM_CC_X_OR_Y => (X || Y)
8219 In all cases OP will be either EQ or NE, but we don't need to know which
8220 here. If we are unable to support a dominance comparison we return
8221 CC mode. This will then fail to match for the RTL expressions that
8222 generate this call. */
8223 enum machine_mode
8224 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
8226 enum rtx_code cond1, cond2;
8227 int swapped = 0;
8229 /* Currently we will probably get the wrong result if the individual
8230 comparisons are not simple. This also ensures that it is safe to
8231 reverse a comparison if necessary. */
8232 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
8233 != CCmode)
8234 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
8235 != CCmode))
8236 return CCmode;
8238 /* The if_then_else variant of this tests the second condition if the
8239 first passes, but is true if the first fails. Reverse the first
8240 condition to get a true "inclusive-or" expression. */
8241 if (cond_or == DOM_CC_NX_OR_Y)
8242 cond1 = reverse_condition (cond1);
8244 /* If the comparisons are not equal, and one doesn't dominate the other,
8245 then we can't do this. */
8246 if (cond1 != cond2
8247 && !comparison_dominates_p (cond1, cond2)
8248 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
8249 return CCmode;
8251 if (swapped)
8253 enum rtx_code temp = cond1;
8254 cond1 = cond2;
8255 cond2 = temp;
8258 switch (cond1)
8260 case EQ:
8261 if (cond_or == DOM_CC_X_AND_Y)
8262 return CC_DEQmode;
8264 switch (cond2)
8266 case EQ: return CC_DEQmode;
8267 case LE: return CC_DLEmode;
8268 case LEU: return CC_DLEUmode;
8269 case GE: return CC_DGEmode;
8270 case GEU: return CC_DGEUmode;
8271 default: gcc_unreachable ();
8274 case LT:
8275 if (cond_or == DOM_CC_X_AND_Y)
8276 return CC_DLTmode;
8278 switch (cond2)
8280 case LT:
8281 return CC_DLTmode;
8282 case LE:
8283 return CC_DLEmode;
8284 case NE:
8285 return CC_DNEmode;
8286 default:
8287 gcc_unreachable ();
8290 case GT:
8291 if (cond_or == DOM_CC_X_AND_Y)
8292 return CC_DGTmode;
8294 switch (cond2)
8296 case GT:
8297 return CC_DGTmode;
8298 case GE:
8299 return CC_DGEmode;
8300 case NE:
8301 return CC_DNEmode;
8302 default:
8303 gcc_unreachable ();
8306 case LTU:
8307 if (cond_or == DOM_CC_X_AND_Y)
8308 return CC_DLTUmode;
8310 switch (cond2)
8312 case LTU:
8313 return CC_DLTUmode;
8314 case LEU:
8315 return CC_DLEUmode;
8316 case NE:
8317 return CC_DNEmode;
8318 default:
8319 gcc_unreachable ();
8322 case GTU:
8323 if (cond_or == DOM_CC_X_AND_Y)
8324 return CC_DGTUmode;
8326 switch (cond2)
8328 case GTU:
8329 return CC_DGTUmode;
8330 case GEU:
8331 return CC_DGEUmode;
8332 case NE:
8333 return CC_DNEmode;
8334 default:
8335 gcc_unreachable ();
8338 /* The remaining cases only occur when both comparisons are the
8339 same. */
8340 case NE:
8341 gcc_assert (cond1 == cond2);
8342 return CC_DNEmode;
8344 case LE:
8345 gcc_assert (cond1 == cond2);
8346 return CC_DLEmode;
8348 case GE:
8349 gcc_assert (cond1 == cond2);
8350 return CC_DGEmode;
8352 case LEU:
8353 gcc_assert (cond1 == cond2);
8354 return CC_DLEUmode;
8356 case GEU:
8357 gcc_assert (cond1 == cond2);
8358 return CC_DGEUmode;
8360 default:
8361 gcc_unreachable ();
8365 enum machine_mode
8366 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
8368 /* All floating point compares return CCFP if it is an equality
8369 comparison, and CCFPE otherwise. */
8370 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
8372 switch (op)
8374 case EQ:
8375 case NE:
8376 case UNORDERED:
8377 case ORDERED:
8378 case UNLT:
8379 case UNLE:
8380 case UNGT:
8381 case UNGE:
8382 case UNEQ:
8383 case LTGT:
8384 return CCFPmode;
8386 case LT:
8387 case LE:
8388 case GT:
8389 case GE:
8390 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
8391 return CCFPmode;
8392 return CCFPEmode;
8394 default:
8395 gcc_unreachable ();
8399 /* A compare with a shifted operand. Because of canonicalization, the
8400 comparison will have to be swapped when we emit the assembler. */
8401 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
8402 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8403 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
8404 || GET_CODE (x) == ROTATERT))
8405 return CC_SWPmode;
8407 /* This operation is performed swapped, but since we only rely on the Z
8408 flag we don't need an additional mode. */
8409 if (GET_MODE (y) == SImode && REG_P (y)
8410 && GET_CODE (x) == NEG
8411 && (op == EQ || op == NE))
8412 return CC_Zmode;
8414 /* This is a special case that is used by combine to allow a
8415 comparison of a shifted byte load to be split into a zero-extend
8416 followed by a comparison of the shifted integer (only valid for
8417 equalities and unsigned inequalities). */
8418 if (GET_MODE (x) == SImode
8419 && GET_CODE (x) == ASHIFT
8420 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
8421 && GET_CODE (XEXP (x, 0)) == SUBREG
8422 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
8423 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
8424 && (op == EQ || op == NE
8425 || op == GEU || op == GTU || op == LTU || op == LEU)
8426 && GET_CODE (y) == CONST_INT)
8427 return CC_Zmode;
8429 /* A construct for a conditional compare, if the false arm contains
8430 0, then both conditions must be true, otherwise either condition
8431 must be true. Not all conditions are possible, so CCmode is
8432 returned if it can't be done. */
8433 if (GET_CODE (x) == IF_THEN_ELSE
8434 && (XEXP (x, 2) == const0_rtx
8435 || XEXP (x, 2) == const1_rtx)
8436 && COMPARISON_P (XEXP (x, 0))
8437 && COMPARISON_P (XEXP (x, 1)))
8438 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8439 INTVAL (XEXP (x, 2)));
8441 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
8442 if (GET_CODE (x) == AND
8443 && COMPARISON_P (XEXP (x, 0))
8444 && COMPARISON_P (XEXP (x, 1)))
8445 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8446 DOM_CC_X_AND_Y);
8448 if (GET_CODE (x) == IOR
8449 && COMPARISON_P (XEXP (x, 0))
8450 && COMPARISON_P (XEXP (x, 1)))
8451 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8452 DOM_CC_X_OR_Y);
8454 /* An operation (on Thumb) where we want to test for a single bit.
8455 This is done by shifting that bit up into the top bit of a
8456 scratch register; we can then branch on the sign bit. */
8457 if (TARGET_THUMB1
8458 && GET_MODE (x) == SImode
8459 && (op == EQ || op == NE)
8460 && GET_CODE (x) == ZERO_EXTRACT
8461 && XEXP (x, 1) == const1_rtx)
8462 return CC_Nmode;
8464 /* An operation that sets the condition codes as a side-effect, the
8465 V flag is not set correctly, so we can only use comparisons where
8466 this doesn't matter. (For LT and GE we can use "mi" and "pl"
8467 instead.) */
8468 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
8469 if (GET_MODE (x) == SImode
8470 && y == const0_rtx
8471 && (op == EQ || op == NE || op == LT || op == GE)
8472 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
8473 || GET_CODE (x) == AND || GET_CODE (x) == IOR
8474 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
8475 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
8476 || GET_CODE (x) == LSHIFTRT
8477 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8478 || GET_CODE (x) == ROTATERT
8479 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
8480 return CC_NOOVmode;
8482 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
8483 return CC_Zmode;
8485 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
8486 && GET_CODE (x) == PLUS
8487 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
8488 return CC_Cmode;
8490 return CCmode;
8493 /* X and Y are two things to compare using CODE. Emit the compare insn and
8494 return the rtx for register 0 in the proper mode. FP means this is a
8495 floating point compare: I don't think that it is needed on the arm. */
8497 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
8499 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
8500 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
8502 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
8504 return cc_reg;
8507 /* Generate a sequence of insns that will generate the correct return
8508 address mask depending on the physical architecture that the program
8509 is running on. */
8511 arm_gen_return_addr_mask (void)
8513 rtx reg = gen_reg_rtx (Pmode);
8515 emit_insn (gen_return_addr_mask (reg));
8516 return reg;
8519 void
8520 arm_reload_in_hi (rtx *operands)
8522 rtx ref = operands[1];
8523 rtx base, scratch;
8524 HOST_WIDE_INT offset = 0;
8526 if (GET_CODE (ref) == SUBREG)
8528 offset = SUBREG_BYTE (ref);
8529 ref = SUBREG_REG (ref);
8532 if (GET_CODE (ref) == REG)
8534 /* We have a pseudo which has been spilt onto the stack; there
8535 are two cases here: the first where there is a simple
8536 stack-slot replacement and a second where the stack-slot is
8537 out of range, or is used as a subreg. */
8538 if (reg_equiv_mem[REGNO (ref)])
8540 ref = reg_equiv_mem[REGNO (ref)];
8541 base = find_replacement (&XEXP (ref, 0));
8543 else
8544 /* The slot is out of range, or was dressed up in a SUBREG. */
8545 base = reg_equiv_address[REGNO (ref)];
8547 else
8548 base = find_replacement (&XEXP (ref, 0));
8550 /* Handle the case where the address is too complex to be offset by 1. */
8551 if (GET_CODE (base) == MINUS
8552 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8554 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8556 emit_set_insn (base_plus, base);
8557 base = base_plus;
8559 else if (GET_CODE (base) == PLUS)
8561 /* The addend must be CONST_INT, or we would have dealt with it above. */
8562 HOST_WIDE_INT hi, lo;
8564 offset += INTVAL (XEXP (base, 1));
8565 base = XEXP (base, 0);
8567 /* Rework the address into a legal sequence of insns. */
8568 /* Valid range for lo is -4095 -> 4095 */
8569 lo = (offset >= 0
8570 ? (offset & 0xfff)
8571 : -((-offset) & 0xfff));
8573 /* Corner case, if lo is the max offset then we would be out of range
8574 once we have added the additional 1 below, so bump the msb into the
8575 pre-loading insn(s). */
8576 if (lo == 4095)
8577 lo &= 0x7ff;
8579 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8580 ^ (HOST_WIDE_INT) 0x80000000)
8581 - (HOST_WIDE_INT) 0x80000000);
8583 gcc_assert (hi + lo == offset);
8585 if (hi != 0)
8587 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8589 /* Get the base address; addsi3 knows how to handle constants
8590 that require more than one insn. */
8591 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8592 base = base_plus;
8593 offset = lo;
8597 /* Operands[2] may overlap operands[0] (though it won't overlap
8598 operands[1]), that's why we asked for a DImode reg -- so we can
8599 use the bit that does not overlap. */
8600 if (REGNO (operands[2]) == REGNO (operands[0]))
8601 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8602 else
8603 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8605 emit_insn (gen_zero_extendqisi2 (scratch,
8606 gen_rtx_MEM (QImode,
8607 plus_constant (base,
8608 offset))));
8609 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
8610 gen_rtx_MEM (QImode,
8611 plus_constant (base,
8612 offset + 1))));
8613 if (!BYTES_BIG_ENDIAN)
8614 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8615 gen_rtx_IOR (SImode,
8616 gen_rtx_ASHIFT
8617 (SImode,
8618 gen_rtx_SUBREG (SImode, operands[0], 0),
8619 GEN_INT (8)),
8620 scratch));
8621 else
8622 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8623 gen_rtx_IOR (SImode,
8624 gen_rtx_ASHIFT (SImode, scratch,
8625 GEN_INT (8)),
8626 gen_rtx_SUBREG (SImode, operands[0], 0)));
8629 /* Handle storing a half-word to memory during reload by synthesizing as two
8630 byte stores. Take care not to clobber the input values until after we
8631 have moved them somewhere safe. This code assumes that if the DImode
8632 scratch in operands[2] overlaps either the input value or output address
8633 in some way, then that value must die in this insn (we absolutely need
8634 two scratch registers for some corner cases). */
8635 void
8636 arm_reload_out_hi (rtx *operands)
8638 rtx ref = operands[0];
8639 rtx outval = operands[1];
8640 rtx base, scratch;
8641 HOST_WIDE_INT offset = 0;
8643 if (GET_CODE (ref) == SUBREG)
8645 offset = SUBREG_BYTE (ref);
8646 ref = SUBREG_REG (ref);
8649 if (GET_CODE (ref) == REG)
8651 /* We have a pseudo which has been spilt onto the stack; there
8652 are two cases here: the first where there is a simple
8653 stack-slot replacement and a second where the stack-slot is
8654 out of range, or is used as a subreg. */
8655 if (reg_equiv_mem[REGNO (ref)])
8657 ref = reg_equiv_mem[REGNO (ref)];
8658 base = find_replacement (&XEXP (ref, 0));
8660 else
8661 /* The slot is out of range, or was dressed up in a SUBREG. */
8662 base = reg_equiv_address[REGNO (ref)];
8664 else
8665 base = find_replacement (&XEXP (ref, 0));
8667 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8669 /* Handle the case where the address is too complex to be offset by 1. */
8670 if (GET_CODE (base) == MINUS
8671 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8673 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8675 /* Be careful not to destroy OUTVAL. */
8676 if (reg_overlap_mentioned_p (base_plus, outval))
8678 /* Updating base_plus might destroy outval, see if we can
8679 swap the scratch and base_plus. */
8680 if (!reg_overlap_mentioned_p (scratch, outval))
8682 rtx tmp = scratch;
8683 scratch = base_plus;
8684 base_plus = tmp;
8686 else
8688 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8690 /* Be conservative and copy OUTVAL into the scratch now,
8691 this should only be necessary if outval is a subreg
8692 of something larger than a word. */
8693 /* XXX Might this clobber base? I can't see how it can,
8694 since scratch is known to overlap with OUTVAL, and
8695 must be wider than a word. */
8696 emit_insn (gen_movhi (scratch_hi, outval));
8697 outval = scratch_hi;
8701 emit_set_insn (base_plus, base);
8702 base = base_plus;
8704 else if (GET_CODE (base) == PLUS)
8706 /* The addend must be CONST_INT, or we would have dealt with it above. */
8707 HOST_WIDE_INT hi, lo;
8709 offset += INTVAL (XEXP (base, 1));
8710 base = XEXP (base, 0);
8712 /* Rework the address into a legal sequence of insns. */
8713 /* Valid range for lo is -4095 -> 4095 */
8714 lo = (offset >= 0
8715 ? (offset & 0xfff)
8716 : -((-offset) & 0xfff));
8718 /* Corner case, if lo is the max offset then we would be out of range
8719 once we have added the additional 1 below, so bump the msb into the
8720 pre-loading insn(s). */
8721 if (lo == 4095)
8722 lo &= 0x7ff;
8724 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8725 ^ (HOST_WIDE_INT) 0x80000000)
8726 - (HOST_WIDE_INT) 0x80000000);
8728 gcc_assert (hi + lo == offset);
8730 if (hi != 0)
8732 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8734 /* Be careful not to destroy OUTVAL. */
8735 if (reg_overlap_mentioned_p (base_plus, outval))
8737 /* Updating base_plus might destroy outval, see if we
8738 can swap the scratch and base_plus. */
8739 if (!reg_overlap_mentioned_p (scratch, outval))
8741 rtx tmp = scratch;
8742 scratch = base_plus;
8743 base_plus = tmp;
8745 else
8747 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8749 /* Be conservative and copy outval into scratch now,
8750 this should only be necessary if outval is a
8751 subreg of something larger than a word. */
8752 /* XXX Might this clobber base? I can't see how it
8753 can, since scratch is known to overlap with
8754 outval. */
8755 emit_insn (gen_movhi (scratch_hi, outval));
8756 outval = scratch_hi;
8760 /* Get the base address; addsi3 knows how to handle constants
8761 that require more than one insn. */
8762 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8763 base = base_plus;
8764 offset = lo;
8768 if (BYTES_BIG_ENDIAN)
8770 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8771 plus_constant (base, offset + 1)),
8772 gen_lowpart (QImode, outval)));
8773 emit_insn (gen_lshrsi3 (scratch,
8774 gen_rtx_SUBREG (SImode, outval, 0),
8775 GEN_INT (8)));
8776 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8777 gen_lowpart (QImode, scratch)));
8779 else
8781 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8782 gen_lowpart (QImode, outval)));
8783 emit_insn (gen_lshrsi3 (scratch,
8784 gen_rtx_SUBREG (SImode, outval, 0),
8785 GEN_INT (8)));
8786 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8787 plus_constant (base, offset + 1)),
8788 gen_lowpart (QImode, scratch)));
8792 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8793 (padded to the size of a word) should be passed in a register. */
8795 static bool
8796 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8798 if (TARGET_AAPCS_BASED)
8799 return must_pass_in_stack_var_size (mode, type);
8800 else
8801 return must_pass_in_stack_var_size_or_pad (mode, type);
8805 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8806 Return true if an argument passed on the stack should be padded upwards,
8807 i.e. if the least-significant byte has useful data.
8808 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8809 aggregate types are placed in the lowest memory address. */
8811 bool
8812 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8814 if (!TARGET_AAPCS_BASED)
8815 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8817 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8818 return false;
8820 return true;
8824 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8825 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8826 byte of the register has useful data, and return the opposite if the
8827 most significant byte does.
8828 For AAPCS, small aggregates and small complex types are always padded
8829 upwards. */
8831 bool
8832 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8833 tree type, int first ATTRIBUTE_UNUSED)
8835 if (TARGET_AAPCS_BASED
8836 && BYTES_BIG_ENDIAN
8837 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8838 && int_size_in_bytes (type) <= 4)
8839 return true;
8841 /* Otherwise, use default padding. */
8842 return !BYTES_BIG_ENDIAN;
8846 /* Print a symbolic form of X to the debug file, F. */
8847 static void
8848 arm_print_value (FILE *f, rtx x)
8850 switch (GET_CODE (x))
8852 case CONST_INT:
8853 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8854 return;
8856 case CONST_DOUBLE:
8857 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8858 return;
8860 case CONST_VECTOR:
8862 int i;
8864 fprintf (f, "<");
8865 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8867 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8868 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8869 fputc (',', f);
8871 fprintf (f, ">");
8873 return;
8875 case CONST_STRING:
8876 fprintf (f, "\"%s\"", XSTR (x, 0));
8877 return;
8879 case SYMBOL_REF:
8880 fprintf (f, "`%s'", XSTR (x, 0));
8881 return;
8883 case LABEL_REF:
8884 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8885 return;
8887 case CONST:
8888 arm_print_value (f, XEXP (x, 0));
8889 return;
8891 case PLUS:
8892 arm_print_value (f, XEXP (x, 0));
8893 fprintf (f, "+");
8894 arm_print_value (f, XEXP (x, 1));
8895 return;
8897 case PC:
8898 fprintf (f, "pc");
8899 return;
8901 default:
8902 fprintf (f, "????");
8903 return;
8907 /* Routines for manipulation of the constant pool. */
8909 /* Arm instructions cannot load a large constant directly into a
8910 register; they have to come from a pc relative load. The constant
8911 must therefore be placed in the addressable range of the pc
8912 relative load. Depending on the precise pc relative load
8913 instruction the range is somewhere between 256 bytes and 4k. This
8914 means that we often have to dump a constant inside a function, and
8915 generate code to branch around it.
8917 It is important to minimize this, since the branches will slow
8918 things down and make the code larger.
8920 Normally we can hide the table after an existing unconditional
8921 branch so that there is no interruption of the flow, but in the
8922 worst case the code looks like this:
8924 ldr rn, L1
8926 b L2
8927 align
8928 L1: .long value
8932 ldr rn, L3
8934 b L4
8935 align
8936 L3: .long value
8940 We fix this by performing a scan after scheduling, which notices
8941 which instructions need to have their operands fetched from the
8942 constant table and builds the table.
8944 The algorithm starts by building a table of all the constants that
8945 need fixing up and all the natural barriers in the function (places
8946 where a constant table can be dropped without breaking the flow).
8947 For each fixup we note how far the pc-relative replacement will be
8948 able to reach and the offset of the instruction into the function.
8950 Having built the table we then group the fixes together to form
8951 tables that are as large as possible (subject to addressing
8952 constraints) and emit each table of constants after the last
8953 barrier that is within range of all the instructions in the group.
8954 If a group does not contain a barrier, then we forcibly create one
8955 by inserting a jump instruction into the flow. Once the table has
8956 been inserted, the insns are then modified to reference the
8957 relevant entry in the pool.
8959 Possible enhancements to the algorithm (not implemented) are:
8961 1) For some processors and object formats, there may be benefit in
8962 aligning the pools to the start of cache lines; this alignment
8963 would need to be taken into account when calculating addressability
8964 of a pool. */
8966 /* These typedefs are located at the start of this file, so that
8967 they can be used in the prototypes there. This comment is to
8968 remind readers of that fact so that the following structures
8969 can be understood more easily.
8971 typedef struct minipool_node Mnode;
8972 typedef struct minipool_fixup Mfix; */
8974 struct minipool_node
8976 /* Doubly linked chain of entries. */
8977 Mnode * next;
8978 Mnode * prev;
8979 /* The maximum offset into the code that this entry can be placed. While
8980 pushing fixes for forward references, all entries are sorted in order
8981 of increasing max_address. */
8982 HOST_WIDE_INT max_address;
8983 /* Similarly for an entry inserted for a backwards ref. */
8984 HOST_WIDE_INT min_address;
8985 /* The number of fixes referencing this entry. This can become zero
8986 if we "unpush" an entry. In this case we ignore the entry when we
8987 come to emit the code. */
8988 int refcount;
8989 /* The offset from the start of the minipool. */
8990 HOST_WIDE_INT offset;
8991 /* The value in table. */
8992 rtx value;
8993 /* The mode of value. */
8994 enum machine_mode mode;
8995 /* The size of the value. With iWMMXt enabled
8996 sizes > 4 also imply an alignment of 8-bytes. */
8997 int fix_size;
9000 struct minipool_fixup
9002 Mfix * next;
9003 rtx insn;
9004 HOST_WIDE_INT address;
9005 rtx * loc;
9006 enum machine_mode mode;
9007 int fix_size;
9008 rtx value;
9009 Mnode * minipool;
9010 HOST_WIDE_INT forwards;
9011 HOST_WIDE_INT backwards;
9014 /* Fixes less than a word need padding out to a word boundary. */
9015 #define MINIPOOL_FIX_SIZE(mode) \
9016 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
9018 static Mnode * minipool_vector_head;
9019 static Mnode * minipool_vector_tail;
9020 static rtx minipool_vector_label;
9021 static int minipool_pad;
9023 /* The linked list of all minipool fixes required for this function. */
9024 Mfix * minipool_fix_head;
9025 Mfix * minipool_fix_tail;
9026 /* The fix entry for the current minipool, once it has been placed. */
9027 Mfix * minipool_barrier;
9029 /* Determines if INSN is the start of a jump table. Returns the end
9030 of the TABLE or NULL_RTX. */
9031 static rtx
9032 is_jump_table (rtx insn)
9034 rtx table;
9036 if (GET_CODE (insn) == JUMP_INSN
9037 && JUMP_LABEL (insn) != NULL
9038 && ((table = next_real_insn (JUMP_LABEL (insn)))
9039 == next_real_insn (insn))
9040 && table != NULL
9041 && GET_CODE (table) == JUMP_INSN
9042 && (GET_CODE (PATTERN (table)) == ADDR_VEC
9043 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
9044 return table;
9046 return NULL_RTX;
9049 #ifndef JUMP_TABLES_IN_TEXT_SECTION
9050 #define JUMP_TABLES_IN_TEXT_SECTION 0
9051 #endif
9053 static HOST_WIDE_INT
9054 get_jump_table_size (rtx insn)
9056 /* ADDR_VECs only take room if read-only data does into the text
9057 section. */
9058 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
9060 rtx body = PATTERN (insn);
9061 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
9062 HOST_WIDE_INT size;
9063 HOST_WIDE_INT modesize;
9065 modesize = GET_MODE_SIZE (GET_MODE (body));
9066 size = modesize * XVECLEN (body, elt);
9067 switch (modesize)
9069 case 1:
9070 /* Round up size of TBB table to a halfword boundary. */
9071 size = (size + 1) & ~(HOST_WIDE_INT)1;
9072 break;
9073 case 2:
9074 /* No padding necessary for TBH. */
9075 break;
9076 case 4:
9077 /* Add two bytes for alignment on Thumb. */
9078 if (TARGET_THUMB)
9079 size += 2;
9080 break;
9081 default:
9082 gcc_unreachable ();
9084 return size;
9087 return 0;
9090 /* Move a minipool fix MP from its current location to before MAX_MP.
9091 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
9092 constraints may need updating. */
9093 static Mnode *
9094 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
9095 HOST_WIDE_INT max_address)
9097 /* The code below assumes these are different. */
9098 gcc_assert (mp != max_mp);
9100 if (max_mp == NULL)
9102 if (max_address < mp->max_address)
9103 mp->max_address = max_address;
9105 else
9107 if (max_address > max_mp->max_address - mp->fix_size)
9108 mp->max_address = max_mp->max_address - mp->fix_size;
9109 else
9110 mp->max_address = max_address;
9112 /* Unlink MP from its current position. Since max_mp is non-null,
9113 mp->prev must be non-null. */
9114 mp->prev->next = mp->next;
9115 if (mp->next != NULL)
9116 mp->next->prev = mp->prev;
9117 else
9118 minipool_vector_tail = mp->prev;
9120 /* Re-insert it before MAX_MP. */
9121 mp->next = max_mp;
9122 mp->prev = max_mp->prev;
9123 max_mp->prev = mp;
9125 if (mp->prev != NULL)
9126 mp->prev->next = mp;
9127 else
9128 minipool_vector_head = mp;
9131 /* Save the new entry. */
9132 max_mp = mp;
9134 /* Scan over the preceding entries and adjust their addresses as
9135 required. */
9136 while (mp->prev != NULL
9137 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9139 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9140 mp = mp->prev;
9143 return max_mp;
9146 /* Add a constant to the minipool for a forward reference. Returns the
9147 node added or NULL if the constant will not fit in this pool. */
9148 static Mnode *
9149 add_minipool_forward_ref (Mfix *fix)
9151 /* If set, max_mp is the first pool_entry that has a lower
9152 constraint than the one we are trying to add. */
9153 Mnode * max_mp = NULL;
9154 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
9155 Mnode * mp;
9157 /* If the minipool starts before the end of FIX->INSN then this FIX
9158 can not be placed into the current pool. Furthermore, adding the
9159 new constant pool entry may cause the pool to start FIX_SIZE bytes
9160 earlier. */
9161 if (minipool_vector_head &&
9162 (fix->address + get_attr_length (fix->insn)
9163 >= minipool_vector_head->max_address - fix->fix_size))
9164 return NULL;
9166 /* Scan the pool to see if a constant with the same value has
9167 already been added. While we are doing this, also note the
9168 location where we must insert the constant if it doesn't already
9169 exist. */
9170 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9172 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9173 && fix->mode == mp->mode
9174 && (GET_CODE (fix->value) != CODE_LABEL
9175 || (CODE_LABEL_NUMBER (fix->value)
9176 == CODE_LABEL_NUMBER (mp->value)))
9177 && rtx_equal_p (fix->value, mp->value))
9179 /* More than one fix references this entry. */
9180 mp->refcount++;
9181 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
9184 /* Note the insertion point if necessary. */
9185 if (max_mp == NULL
9186 && mp->max_address > max_address)
9187 max_mp = mp;
9189 /* If we are inserting an 8-bytes aligned quantity and
9190 we have not already found an insertion point, then
9191 make sure that all such 8-byte aligned quantities are
9192 placed at the start of the pool. */
9193 if (ARM_DOUBLEWORD_ALIGN
9194 && max_mp == NULL
9195 && fix->fix_size >= 8
9196 && mp->fix_size < 8)
9198 max_mp = mp;
9199 max_address = mp->max_address;
9203 /* The value is not currently in the minipool, so we need to create
9204 a new entry for it. If MAX_MP is NULL, the entry will be put on
9205 the end of the list since the placement is less constrained than
9206 any existing entry. Otherwise, we insert the new fix before
9207 MAX_MP and, if necessary, adjust the constraints on the other
9208 entries. */
9209 mp = XNEW (Mnode);
9210 mp->fix_size = fix->fix_size;
9211 mp->mode = fix->mode;
9212 mp->value = fix->value;
9213 mp->refcount = 1;
9214 /* Not yet required for a backwards ref. */
9215 mp->min_address = -65536;
9217 if (max_mp == NULL)
9219 mp->max_address = max_address;
9220 mp->next = NULL;
9221 mp->prev = minipool_vector_tail;
9223 if (mp->prev == NULL)
9225 minipool_vector_head = mp;
9226 minipool_vector_label = gen_label_rtx ();
9228 else
9229 mp->prev->next = mp;
9231 minipool_vector_tail = mp;
9233 else
9235 if (max_address > max_mp->max_address - mp->fix_size)
9236 mp->max_address = max_mp->max_address - mp->fix_size;
9237 else
9238 mp->max_address = max_address;
9240 mp->next = max_mp;
9241 mp->prev = max_mp->prev;
9242 max_mp->prev = mp;
9243 if (mp->prev != NULL)
9244 mp->prev->next = mp;
9245 else
9246 minipool_vector_head = mp;
9249 /* Save the new entry. */
9250 max_mp = mp;
9252 /* Scan over the preceding entries and adjust their addresses as
9253 required. */
9254 while (mp->prev != NULL
9255 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9257 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9258 mp = mp->prev;
9261 return max_mp;
9264 static Mnode *
9265 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
9266 HOST_WIDE_INT min_address)
9268 HOST_WIDE_INT offset;
9270 /* The code below assumes these are different. */
9271 gcc_assert (mp != min_mp);
9273 if (min_mp == NULL)
9275 if (min_address > mp->min_address)
9276 mp->min_address = min_address;
9278 else
9280 /* We will adjust this below if it is too loose. */
9281 mp->min_address = min_address;
9283 /* Unlink MP from its current position. Since min_mp is non-null,
9284 mp->next must be non-null. */
9285 mp->next->prev = mp->prev;
9286 if (mp->prev != NULL)
9287 mp->prev->next = mp->next;
9288 else
9289 minipool_vector_head = mp->next;
9291 /* Reinsert it after MIN_MP. */
9292 mp->prev = min_mp;
9293 mp->next = min_mp->next;
9294 min_mp->next = mp;
9295 if (mp->next != NULL)
9296 mp->next->prev = mp;
9297 else
9298 minipool_vector_tail = mp;
9301 min_mp = mp;
9303 offset = 0;
9304 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9306 mp->offset = offset;
9307 if (mp->refcount > 0)
9308 offset += mp->fix_size;
9310 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
9311 mp->next->min_address = mp->min_address + mp->fix_size;
9314 return min_mp;
9317 /* Add a constant to the minipool for a backward reference. Returns the
9318 node added or NULL if the constant will not fit in this pool.
9320 Note that the code for insertion for a backwards reference can be
9321 somewhat confusing because the calculated offsets for each fix do
9322 not take into account the size of the pool (which is still under
9323 construction. */
9324 static Mnode *
9325 add_minipool_backward_ref (Mfix *fix)
9327 /* If set, min_mp is the last pool_entry that has a lower constraint
9328 than the one we are trying to add. */
9329 Mnode *min_mp = NULL;
9330 /* This can be negative, since it is only a constraint. */
9331 HOST_WIDE_INT min_address = fix->address - fix->backwards;
9332 Mnode *mp;
9334 /* If we can't reach the current pool from this insn, or if we can't
9335 insert this entry at the end of the pool without pushing other
9336 fixes out of range, then we don't try. This ensures that we
9337 can't fail later on. */
9338 if (min_address >= minipool_barrier->address
9339 || (minipool_vector_tail->min_address + fix->fix_size
9340 >= minipool_barrier->address))
9341 return NULL;
9343 /* Scan the pool to see if a constant with the same value has
9344 already been added. While we are doing this, also note the
9345 location where we must insert the constant if it doesn't already
9346 exist. */
9347 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
9349 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9350 && fix->mode == mp->mode
9351 && (GET_CODE (fix->value) != CODE_LABEL
9352 || (CODE_LABEL_NUMBER (fix->value)
9353 == CODE_LABEL_NUMBER (mp->value)))
9354 && rtx_equal_p (fix->value, mp->value)
9355 /* Check that there is enough slack to move this entry to the
9356 end of the table (this is conservative). */
9357 && (mp->max_address
9358 > (minipool_barrier->address
9359 + minipool_vector_tail->offset
9360 + minipool_vector_tail->fix_size)))
9362 mp->refcount++;
9363 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
9366 if (min_mp != NULL)
9367 mp->min_address += fix->fix_size;
9368 else
9370 /* Note the insertion point if necessary. */
9371 if (mp->min_address < min_address)
9373 /* For now, we do not allow the insertion of 8-byte alignment
9374 requiring nodes anywhere but at the start of the pool. */
9375 if (ARM_DOUBLEWORD_ALIGN
9376 && fix->fix_size >= 8 && mp->fix_size < 8)
9377 return NULL;
9378 else
9379 min_mp = mp;
9381 else if (mp->max_address
9382 < minipool_barrier->address + mp->offset + fix->fix_size)
9384 /* Inserting before this entry would push the fix beyond
9385 its maximum address (which can happen if we have
9386 re-located a forwards fix); force the new fix to come
9387 after it. */
9388 if (ARM_DOUBLEWORD_ALIGN
9389 && fix->fix_size >= 8 && mp->fix_size < 8)
9390 return NULL;
9391 else
9393 min_mp = mp;
9394 min_address = mp->min_address + fix->fix_size;
9397 /* Do not insert a non-8-byte aligned quantity before 8-byte
9398 aligned quantities. */
9399 else if (ARM_DOUBLEWORD_ALIGN
9400 && fix->fix_size < 8
9401 && mp->fix_size >= 8)
9403 min_mp = mp;
9404 min_address = mp->min_address + fix->fix_size;
9409 /* We need to create a new entry. */
9410 mp = XNEW (Mnode);
9411 mp->fix_size = fix->fix_size;
9412 mp->mode = fix->mode;
9413 mp->value = fix->value;
9414 mp->refcount = 1;
9415 mp->max_address = minipool_barrier->address + 65536;
9417 mp->min_address = min_address;
9419 if (min_mp == NULL)
9421 mp->prev = NULL;
9422 mp->next = minipool_vector_head;
9424 if (mp->next == NULL)
9426 minipool_vector_tail = mp;
9427 minipool_vector_label = gen_label_rtx ();
9429 else
9430 mp->next->prev = mp;
9432 minipool_vector_head = mp;
9434 else
9436 mp->next = min_mp->next;
9437 mp->prev = min_mp;
9438 min_mp->next = mp;
9440 if (mp->next != NULL)
9441 mp->next->prev = mp;
9442 else
9443 minipool_vector_tail = mp;
9446 /* Save the new entry. */
9447 min_mp = mp;
9449 if (mp->prev)
9450 mp = mp->prev;
9451 else
9452 mp->offset = 0;
9454 /* Scan over the following entries and adjust their offsets. */
9455 while (mp->next != NULL)
9457 if (mp->next->min_address < mp->min_address + mp->fix_size)
9458 mp->next->min_address = mp->min_address + mp->fix_size;
9460 if (mp->refcount)
9461 mp->next->offset = mp->offset + mp->fix_size;
9462 else
9463 mp->next->offset = mp->offset;
9465 mp = mp->next;
9468 return min_mp;
9471 static void
9472 assign_minipool_offsets (Mfix *barrier)
9474 HOST_WIDE_INT offset = 0;
9475 Mnode *mp;
9477 minipool_barrier = barrier;
9479 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9481 mp->offset = offset;
9483 if (mp->refcount > 0)
9484 offset += mp->fix_size;
9488 /* Output the literal table */
9489 static void
9490 dump_minipool (rtx scan)
9492 Mnode * mp;
9493 Mnode * nmp;
9494 int align64 = 0;
9496 if (ARM_DOUBLEWORD_ALIGN)
9497 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9498 if (mp->refcount > 0 && mp->fix_size >= 8)
9500 align64 = 1;
9501 break;
9504 if (dump_file)
9505 fprintf (dump_file,
9506 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
9507 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
9509 scan = emit_label_after (gen_label_rtx (), scan);
9510 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
9511 scan = emit_label_after (minipool_vector_label, scan);
9513 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
9515 if (mp->refcount > 0)
9517 if (dump_file)
9519 fprintf (dump_file,
9520 ";; Offset %u, min %ld, max %ld ",
9521 (unsigned) mp->offset, (unsigned long) mp->min_address,
9522 (unsigned long) mp->max_address);
9523 arm_print_value (dump_file, mp->value);
9524 fputc ('\n', dump_file);
9527 switch (mp->fix_size)
9529 #ifdef HAVE_consttable_1
9530 case 1:
9531 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
9532 break;
9534 #endif
9535 #ifdef HAVE_consttable_2
9536 case 2:
9537 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
9538 break;
9540 #endif
9541 #ifdef HAVE_consttable_4
9542 case 4:
9543 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
9544 break;
9546 #endif
9547 #ifdef HAVE_consttable_8
9548 case 8:
9549 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
9550 break;
9552 #endif
9553 #ifdef HAVE_consttable_16
9554 case 16:
9555 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
9556 break;
9558 #endif
9559 default:
9560 gcc_unreachable ();
9564 nmp = mp->next;
9565 free (mp);
9568 minipool_vector_head = minipool_vector_tail = NULL;
9569 scan = emit_insn_after (gen_consttable_end (), scan);
9570 scan = emit_barrier_after (scan);
9573 /* Return the cost of forcibly inserting a barrier after INSN. */
9574 static int
9575 arm_barrier_cost (rtx insn)
9577 /* Basing the location of the pool on the loop depth is preferable,
9578 but at the moment, the basic block information seems to be
9579 corrupt by this stage of the compilation. */
9580 int base_cost = 50;
9581 rtx next = next_nonnote_insn (insn);
9583 if (next != NULL && GET_CODE (next) == CODE_LABEL)
9584 base_cost -= 20;
9586 switch (GET_CODE (insn))
9588 case CODE_LABEL:
9589 /* It will always be better to place the table before the label, rather
9590 than after it. */
9591 return 50;
9593 case INSN:
9594 case CALL_INSN:
9595 return base_cost;
9597 case JUMP_INSN:
9598 return base_cost - 10;
9600 default:
9601 return base_cost + 10;
9605 /* Find the best place in the insn stream in the range
9606 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
9607 Create the barrier by inserting a jump and add a new fix entry for
9608 it. */
9609 static Mfix *
9610 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
9612 HOST_WIDE_INT count = 0;
9613 rtx barrier;
9614 rtx from = fix->insn;
9615 /* The instruction after which we will insert the jump. */
9616 rtx selected = NULL;
9617 int selected_cost;
9618 /* The address at which the jump instruction will be placed. */
9619 HOST_WIDE_INT selected_address;
9620 Mfix * new_fix;
9621 HOST_WIDE_INT max_count = max_address - fix->address;
9622 rtx label = gen_label_rtx ();
9624 selected_cost = arm_barrier_cost (from);
9625 selected_address = fix->address;
9627 while (from && count < max_count)
9629 rtx tmp;
9630 int new_cost;
9632 /* This code shouldn't have been called if there was a natural barrier
9633 within range. */
9634 gcc_assert (GET_CODE (from) != BARRIER);
9636 /* Count the length of this insn. */
9637 count += get_attr_length (from);
9639 /* If there is a jump table, add its length. */
9640 tmp = is_jump_table (from);
9641 if (tmp != NULL)
9643 count += get_jump_table_size (tmp);
9645 /* Jump tables aren't in a basic block, so base the cost on
9646 the dispatch insn. If we select this location, we will
9647 still put the pool after the table. */
9648 new_cost = arm_barrier_cost (from);
9650 if (count < max_count
9651 && (!selected || new_cost <= selected_cost))
9653 selected = tmp;
9654 selected_cost = new_cost;
9655 selected_address = fix->address + count;
9658 /* Continue after the dispatch table. */
9659 from = NEXT_INSN (tmp);
9660 continue;
9663 new_cost = arm_barrier_cost (from);
9665 if (count < max_count
9666 && (!selected || new_cost <= selected_cost))
9668 selected = from;
9669 selected_cost = new_cost;
9670 selected_address = fix->address + count;
9673 from = NEXT_INSN (from);
9676 /* Make sure that we found a place to insert the jump. */
9677 gcc_assert (selected);
9679 /* Create a new JUMP_INSN that branches around a barrier. */
9680 from = emit_jump_insn_after (gen_jump (label), selected);
9681 JUMP_LABEL (from) = label;
9682 barrier = emit_barrier_after (from);
9683 emit_label_after (label, barrier);
9685 /* Create a minipool barrier entry for the new barrier. */
9686 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9687 new_fix->insn = barrier;
9688 new_fix->address = selected_address;
9689 new_fix->next = fix->next;
9690 fix->next = new_fix;
9692 return new_fix;
9695 /* Record that there is a natural barrier in the insn stream at
9696 ADDRESS. */
9697 static void
9698 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9700 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9702 fix->insn = insn;
9703 fix->address = address;
9705 fix->next = NULL;
9706 if (minipool_fix_head != NULL)
9707 minipool_fix_tail->next = fix;
9708 else
9709 minipool_fix_head = fix;
9711 minipool_fix_tail = fix;
9714 /* Record INSN, which will need fixing up to load a value from the
9715 minipool. ADDRESS is the offset of the insn since the start of the
9716 function; LOC is a pointer to the part of the insn which requires
9717 fixing; VALUE is the constant that must be loaded, which is of type
9718 MODE. */
9719 static void
9720 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9721 enum machine_mode mode, rtx value)
9723 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9725 fix->insn = insn;
9726 fix->address = address;
9727 fix->loc = loc;
9728 fix->mode = mode;
9729 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9730 fix->value = value;
9731 fix->forwards = get_attr_pool_range (insn);
9732 fix->backwards = get_attr_neg_pool_range (insn);
9733 fix->minipool = NULL;
9735 /* If an insn doesn't have a range defined for it, then it isn't
9736 expecting to be reworked by this code. Better to stop now than
9737 to generate duff assembly code. */
9738 gcc_assert (fix->forwards || fix->backwards);
9740 /* If an entry requires 8-byte alignment then assume all constant pools
9741 require 4 bytes of padding. Trying to do this later on a per-pool
9742 basis is awkward because existing pool entries have to be modified. */
9743 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9744 minipool_pad = 4;
9746 if (dump_file)
9748 fprintf (dump_file,
9749 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9750 GET_MODE_NAME (mode),
9751 INSN_UID (insn), (unsigned long) address,
9752 -1 * (long)fix->backwards, (long)fix->forwards);
9753 arm_print_value (dump_file, fix->value);
9754 fprintf (dump_file, "\n");
9757 /* Add it to the chain of fixes. */
9758 fix->next = NULL;
9760 if (minipool_fix_head != NULL)
9761 minipool_fix_tail->next = fix;
9762 else
9763 minipool_fix_head = fix;
9765 minipool_fix_tail = fix;
9768 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9769 Returns the number of insns needed, or 99 if we don't know how to
9770 do it. */
9772 arm_const_double_inline_cost (rtx val)
9774 rtx lowpart, highpart;
9775 enum machine_mode mode;
9777 mode = GET_MODE (val);
9779 if (mode == VOIDmode)
9780 mode = DImode;
9782 gcc_assert (GET_MODE_SIZE (mode) == 8);
9784 lowpart = gen_lowpart (SImode, val);
9785 highpart = gen_highpart_mode (SImode, mode, val);
9787 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9788 gcc_assert (GET_CODE (highpart) == CONST_INT);
9790 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9791 NULL_RTX, NULL_RTX, 0, 0)
9792 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9793 NULL_RTX, NULL_RTX, 0, 0));
9796 /* Return true if it is worthwhile to split a 64-bit constant into two
9797 32-bit operations. This is the case if optimizing for size, or
9798 if we have load delay slots, or if one 32-bit part can be done with
9799 a single data operation. */
9800 bool
9801 arm_const_double_by_parts (rtx val)
9803 enum machine_mode mode = GET_MODE (val);
9804 rtx part;
9806 if (optimize_size || arm_ld_sched)
9807 return true;
9809 if (mode == VOIDmode)
9810 mode = DImode;
9812 part = gen_highpart_mode (SImode, mode, val);
9814 gcc_assert (GET_CODE (part) == CONST_INT);
9816 if (const_ok_for_arm (INTVAL (part))
9817 || const_ok_for_arm (~INTVAL (part)))
9818 return true;
9820 part = gen_lowpart (SImode, val);
9822 gcc_assert (GET_CODE (part) == CONST_INT);
9824 if (const_ok_for_arm (INTVAL (part))
9825 || const_ok_for_arm (~INTVAL (part)))
9826 return true;
9828 return false;
9831 /* Scan INSN and note any of its operands that need fixing.
9832 If DO_PUSHES is false we do not actually push any of the fixups
9833 needed. The function returns TRUE if any fixups were needed/pushed.
9834 This is used by arm_memory_load_p() which needs to know about loads
9835 of constants that will be converted into minipool loads. */
9836 static bool
9837 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9839 bool result = false;
9840 int opno;
9842 extract_insn (insn);
9844 if (!constrain_operands (1))
9845 fatal_insn_not_found (insn);
9847 if (recog_data.n_alternatives == 0)
9848 return false;
9850 /* Fill in recog_op_alt with information about the constraints of
9851 this insn. */
9852 preprocess_constraints ();
9854 for (opno = 0; opno < recog_data.n_operands; opno++)
9856 /* Things we need to fix can only occur in inputs. */
9857 if (recog_data.operand_type[opno] != OP_IN)
9858 continue;
9860 /* If this alternative is a memory reference, then any mention
9861 of constants in this alternative is really to fool reload
9862 into allowing us to accept one there. We need to fix them up
9863 now so that we output the right code. */
9864 if (recog_op_alt[opno][which_alternative].memory_ok)
9866 rtx op = recog_data.operand[opno];
9868 if (CONSTANT_P (op))
9870 if (do_pushes)
9871 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9872 recog_data.operand_mode[opno], op);
9873 result = true;
9875 else if (GET_CODE (op) == MEM
9876 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9877 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9879 if (do_pushes)
9881 rtx cop = avoid_constant_pool_reference (op);
9883 /* Casting the address of something to a mode narrower
9884 than a word can cause avoid_constant_pool_reference()
9885 to return the pool reference itself. That's no good to
9886 us here. Lets just hope that we can use the
9887 constant pool value directly. */
9888 if (op == cop)
9889 cop = get_pool_constant (XEXP (op, 0));
9891 push_minipool_fix (insn, address,
9892 recog_data.operand_loc[opno],
9893 recog_data.operand_mode[opno], cop);
9896 result = true;
9901 return result;
9904 /* Gcc puts the pool in the wrong place for ARM, since we can only
9905 load addresses a limited distance around the pc. We do some
9906 special munging to move the constant pool values to the correct
9907 point in the code. */
9908 static void
9909 arm_reorg (void)
9911 rtx insn;
9912 HOST_WIDE_INT address = 0;
9913 Mfix * fix;
9915 minipool_fix_head = minipool_fix_tail = NULL;
9917 /* The first insn must always be a note, or the code below won't
9918 scan it properly. */
9919 insn = get_insns ();
9920 gcc_assert (GET_CODE (insn) == NOTE);
9921 minipool_pad = 0;
9923 /* Scan all the insns and record the operands that will need fixing. */
9924 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9926 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9927 && (arm_cirrus_insn_p (insn)
9928 || GET_CODE (insn) == JUMP_INSN
9929 || arm_memory_load_p (insn)))
9930 cirrus_reorg (insn);
9932 if (GET_CODE (insn) == BARRIER)
9933 push_minipool_barrier (insn, address);
9934 else if (INSN_P (insn))
9936 rtx table;
9938 note_invalid_constants (insn, address, true);
9939 address += get_attr_length (insn);
9941 /* If the insn is a vector jump, add the size of the table
9942 and skip the table. */
9943 if ((table = is_jump_table (insn)) != NULL)
9945 address += get_jump_table_size (table);
9946 insn = table;
9951 fix = minipool_fix_head;
9953 /* Now scan the fixups and perform the required changes. */
9954 while (fix)
9956 Mfix * ftmp;
9957 Mfix * fdel;
9958 Mfix * last_added_fix;
9959 Mfix * last_barrier = NULL;
9960 Mfix * this_fix;
9962 /* Skip any further barriers before the next fix. */
9963 while (fix && GET_CODE (fix->insn) == BARRIER)
9964 fix = fix->next;
9966 /* No more fixes. */
9967 if (fix == NULL)
9968 break;
9970 last_added_fix = NULL;
9972 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9974 if (GET_CODE (ftmp->insn) == BARRIER)
9976 if (ftmp->address >= minipool_vector_head->max_address)
9977 break;
9979 last_barrier = ftmp;
9981 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9982 break;
9984 last_added_fix = ftmp; /* Keep track of the last fix added. */
9987 /* If we found a barrier, drop back to that; any fixes that we
9988 could have reached but come after the barrier will now go in
9989 the next mini-pool. */
9990 if (last_barrier != NULL)
9992 /* Reduce the refcount for those fixes that won't go into this
9993 pool after all. */
9994 for (fdel = last_barrier->next;
9995 fdel && fdel != ftmp;
9996 fdel = fdel->next)
9998 fdel->minipool->refcount--;
9999 fdel->minipool = NULL;
10002 ftmp = last_barrier;
10004 else
10006 /* ftmp is first fix that we can't fit into this pool and
10007 there no natural barriers that we could use. Insert a
10008 new barrier in the code somewhere between the previous
10009 fix and this one, and arrange to jump around it. */
10010 HOST_WIDE_INT max_address;
10012 /* The last item on the list of fixes must be a barrier, so
10013 we can never run off the end of the list of fixes without
10014 last_barrier being set. */
10015 gcc_assert (ftmp);
10017 max_address = minipool_vector_head->max_address;
10018 /* Check that there isn't another fix that is in range that
10019 we couldn't fit into this pool because the pool was
10020 already too large: we need to put the pool before such an
10021 instruction. The pool itself may come just after the
10022 fix because create_fix_barrier also allows space for a
10023 jump instruction. */
10024 if (ftmp->address < max_address)
10025 max_address = ftmp->address + 1;
10027 last_barrier = create_fix_barrier (last_added_fix, max_address);
10030 assign_minipool_offsets (last_barrier);
10032 while (ftmp)
10034 if (GET_CODE (ftmp->insn) != BARRIER
10035 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
10036 == NULL))
10037 break;
10039 ftmp = ftmp->next;
10042 /* Scan over the fixes we have identified for this pool, fixing them
10043 up and adding the constants to the pool itself. */
10044 for (this_fix = fix; this_fix && ftmp != this_fix;
10045 this_fix = this_fix->next)
10046 if (GET_CODE (this_fix->insn) != BARRIER)
10048 rtx addr
10049 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
10050 minipool_vector_label),
10051 this_fix->minipool->offset);
10052 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
10055 dump_minipool (last_barrier->insn);
10056 fix = ftmp;
10059 /* From now on we must synthesize any constants that we can't handle
10060 directly. This can happen if the RTL gets split during final
10061 instruction generation. */
10062 after_arm_reorg = 1;
10064 /* Free the minipool memory. */
10065 obstack_free (&minipool_obstack, minipool_startobj);
10068 /* Routines to output assembly language. */
10070 /* If the rtx is the correct value then return the string of the number.
10071 In this way we can ensure that valid double constants are generated even
10072 when cross compiling. */
10073 const char *
10074 fp_immediate_constant (rtx x)
10076 REAL_VALUE_TYPE r;
10077 int i;
10079 if (!fp_consts_inited)
10080 init_fp_table ();
10082 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10083 for (i = 0; i < 8; i++)
10084 if (REAL_VALUES_EQUAL (r, values_fp[i]))
10085 return strings_fp[i];
10087 gcc_unreachable ();
10090 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
10091 static const char *
10092 fp_const_from_val (REAL_VALUE_TYPE *r)
10094 int i;
10096 if (!fp_consts_inited)
10097 init_fp_table ();
10099 for (i = 0; i < 8; i++)
10100 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
10101 return strings_fp[i];
10103 gcc_unreachable ();
10106 /* Output the operands of a LDM/STM instruction to STREAM.
10107 MASK is the ARM register set mask of which only bits 0-15 are important.
10108 REG is the base register, either the frame pointer or the stack pointer,
10109 INSTR is the possibly suffixed load or store instruction.
10110 RFE is nonzero if the instruction should also copy spsr to cpsr. */
10112 static void
10113 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
10114 unsigned long mask, int rfe)
10116 unsigned i;
10117 bool not_first = FALSE;
10119 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
10120 fputc ('\t', stream);
10121 asm_fprintf (stream, instr, reg);
10122 fputc ('{', stream);
10124 for (i = 0; i <= LAST_ARM_REGNUM; i++)
10125 if (mask & (1 << i))
10127 if (not_first)
10128 fprintf (stream, ", ");
10130 asm_fprintf (stream, "%r", i);
10131 not_first = TRUE;
10134 if (rfe)
10135 fprintf (stream, "}^\n");
10136 else
10137 fprintf (stream, "}\n");
10141 /* Output a FLDMD instruction to STREAM.
10142 BASE if the register containing the address.
10143 REG and COUNT specify the register range.
10144 Extra registers may be added to avoid hardware bugs.
10146 We output FLDMD even for ARMv5 VFP implementations. Although
10147 FLDMD is technically not supported until ARMv6, it is believed
10148 that all VFP implementations support its use in this context. */
10150 static void
10151 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
10153 int i;
10155 /* Workaround ARM10 VFPr1 bug. */
10156 if (count == 2 && !arm_arch6)
10158 if (reg == 15)
10159 reg--;
10160 count++;
10163 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
10164 load into multiple parts if we have to handle more than 16 registers. */
10165 if (count > 16)
10167 vfp_output_fldmd (stream, base, reg, 16);
10168 vfp_output_fldmd (stream, base, reg + 16, count - 16);
10169 return;
10172 fputc ('\t', stream);
10173 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
10175 for (i = reg; i < reg + count; i++)
10177 if (i > reg)
10178 fputs (", ", stream);
10179 asm_fprintf (stream, "d%d", i);
10181 fputs ("}\n", stream);
10186 /* Output the assembly for a store multiple. */
10188 const char *
10189 vfp_output_fstmd (rtx * operands)
10191 char pattern[100];
10192 int p;
10193 int base;
10194 int i;
10196 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
10197 p = strlen (pattern);
10199 gcc_assert (GET_CODE (operands[1]) == REG);
10201 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
10202 for (i = 1; i < XVECLEN (operands[2], 0); i++)
10204 p += sprintf (&pattern[p], ", d%d", base + i);
10206 strcpy (&pattern[p], "}");
10208 output_asm_insn (pattern, operands);
10209 return "";
10213 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
10214 number of bytes pushed. */
10216 static int
10217 vfp_emit_fstmd (int base_reg, int count)
10219 rtx par;
10220 rtx dwarf;
10221 rtx tmp, reg;
10222 int i;
10224 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
10225 register pairs are stored by a store multiple insn. We avoid this
10226 by pushing an extra pair. */
10227 if (count == 2 && !arm_arch6)
10229 if (base_reg == LAST_VFP_REGNUM - 3)
10230 base_reg -= 2;
10231 count++;
10234 /* FSTMD may not store more than 16 doubleword registers at once. Split
10235 larger stores into multiple parts (up to a maximum of two, in
10236 practice). */
10237 if (count > 16)
10239 int saved;
10240 /* NOTE: base_reg is an internal register number, so each D register
10241 counts as 2. */
10242 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
10243 saved += vfp_emit_fstmd (base_reg, 16);
10244 return saved;
10247 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10248 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10250 reg = gen_rtx_REG (DFmode, base_reg);
10251 base_reg += 2;
10253 XVECEXP (par, 0, 0)
10254 = gen_rtx_SET (VOIDmode,
10255 gen_frame_mem (BLKmode,
10256 gen_rtx_PRE_DEC (BLKmode,
10257 stack_pointer_rtx)),
10258 gen_rtx_UNSPEC (BLKmode,
10259 gen_rtvec (1, reg),
10260 UNSPEC_PUSH_MULT));
10262 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10263 plus_constant (stack_pointer_rtx, -(count * 8)));
10264 RTX_FRAME_RELATED_P (tmp) = 1;
10265 XVECEXP (dwarf, 0, 0) = tmp;
10267 tmp = gen_rtx_SET (VOIDmode,
10268 gen_frame_mem (DFmode, stack_pointer_rtx),
10269 reg);
10270 RTX_FRAME_RELATED_P (tmp) = 1;
10271 XVECEXP (dwarf, 0, 1) = tmp;
10273 for (i = 1; i < count; i++)
10275 reg = gen_rtx_REG (DFmode, base_reg);
10276 base_reg += 2;
10277 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10279 tmp = gen_rtx_SET (VOIDmode,
10280 gen_frame_mem (DFmode,
10281 plus_constant (stack_pointer_rtx,
10282 i * 8)),
10283 reg);
10284 RTX_FRAME_RELATED_P (tmp) = 1;
10285 XVECEXP (dwarf, 0, i + 1) = tmp;
10288 par = emit_insn (par);
10289 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
10290 RTX_FRAME_RELATED_P (par) = 1;
10292 return count * 8;
10295 /* Emit a call instruction with pattern PAT. ADDR is the address of
10296 the call target. */
10298 void
10299 arm_emit_call_insn (rtx pat, rtx addr)
10301 rtx insn;
10303 insn = emit_call_insn (pat);
10305 /* The PIC register is live on entry to VxWorks PIC PLT entries.
10306 If the call might use such an entry, add a use of the PIC register
10307 to the instruction's CALL_INSN_FUNCTION_USAGE. */
10308 if (TARGET_VXWORKS_RTP
10309 && flag_pic
10310 && GET_CODE (addr) == SYMBOL_REF
10311 && (SYMBOL_REF_DECL (addr)
10312 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
10313 : !SYMBOL_REF_LOCAL_P (addr)))
10315 require_pic_register ();
10316 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
10320 /* Output a 'call' insn. */
10321 const char *
10322 output_call (rtx *operands)
10324 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
10326 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
10327 if (REGNO (operands[0]) == LR_REGNUM)
10329 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
10330 output_asm_insn ("mov%?\t%0, %|lr", operands);
10333 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10335 if (TARGET_INTERWORK || arm_arch4t)
10336 output_asm_insn ("bx%?\t%0", operands);
10337 else
10338 output_asm_insn ("mov%?\t%|pc, %0", operands);
10340 return "";
10343 /* Output a 'call' insn that is a reference in memory. */
10344 const char *
10345 output_call_mem (rtx *operands)
10347 if (TARGET_INTERWORK && !arm_arch5)
10349 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10350 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10351 output_asm_insn ("bx%?\t%|ip", operands);
10353 else if (regno_use_in (LR_REGNUM, operands[0]))
10355 /* LR is used in the memory address. We load the address in the
10356 first instruction. It's safe to use IP as the target of the
10357 load since the call will kill it anyway. */
10358 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10359 if (arm_arch5)
10360 output_asm_insn ("blx%?\t%|ip", operands);
10361 else
10363 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10364 if (arm_arch4t)
10365 output_asm_insn ("bx%?\t%|ip", operands);
10366 else
10367 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
10370 else
10372 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10373 output_asm_insn ("ldr%?\t%|pc, %0", operands);
10376 return "";
10380 /* Output a move from arm registers to an fpa registers.
10381 OPERANDS[0] is an fpa register.
10382 OPERANDS[1] is the first registers of an arm register pair. */
10383 const char *
10384 output_mov_long_double_fpa_from_arm (rtx *operands)
10386 int arm_reg0 = REGNO (operands[1]);
10387 rtx ops[3];
10389 gcc_assert (arm_reg0 != IP_REGNUM);
10391 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10392 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10393 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10395 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10396 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
10398 return "";
10401 /* Output a move from an fpa register to arm registers.
10402 OPERANDS[0] is the first registers of an arm register pair.
10403 OPERANDS[1] is an fpa register. */
10404 const char *
10405 output_mov_long_double_arm_from_fpa (rtx *operands)
10407 int arm_reg0 = REGNO (operands[0]);
10408 rtx ops[3];
10410 gcc_assert (arm_reg0 != IP_REGNUM);
10412 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10413 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10414 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10416 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
10417 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10418 return "";
10421 /* Output a move from arm registers to arm registers of a long double
10422 OPERANDS[0] is the destination.
10423 OPERANDS[1] is the source. */
10424 const char *
10425 output_mov_long_double_arm_from_arm (rtx *operands)
10427 /* We have to be careful here because the two might overlap. */
10428 int dest_start = REGNO (operands[0]);
10429 int src_start = REGNO (operands[1]);
10430 rtx ops[2];
10431 int i;
10433 if (dest_start < src_start)
10435 for (i = 0; i < 3; i++)
10437 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10438 ops[1] = gen_rtx_REG (SImode, src_start + i);
10439 output_asm_insn ("mov%?\t%0, %1", ops);
10442 else
10444 for (i = 2; i >= 0; i--)
10446 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10447 ops[1] = gen_rtx_REG (SImode, src_start + i);
10448 output_asm_insn ("mov%?\t%0, %1", ops);
10452 return "";
10456 /* Emit a MOVW/MOVT pair. */
10457 void arm_emit_movpair (rtx dest, rtx src)
10459 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
10460 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
10464 /* Output a move from arm registers to an fpa registers.
10465 OPERANDS[0] is an fpa register.
10466 OPERANDS[1] is the first registers of an arm register pair. */
10467 const char *
10468 output_mov_double_fpa_from_arm (rtx *operands)
10470 int arm_reg0 = REGNO (operands[1]);
10471 rtx ops[2];
10473 gcc_assert (arm_reg0 != IP_REGNUM);
10475 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10476 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10477 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
10478 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
10479 return "";
10482 /* Output a move from an fpa register to arm registers.
10483 OPERANDS[0] is the first registers of an arm register pair.
10484 OPERANDS[1] is an fpa register. */
10485 const char *
10486 output_mov_double_arm_from_fpa (rtx *operands)
10488 int arm_reg0 = REGNO (operands[0]);
10489 rtx ops[2];
10491 gcc_assert (arm_reg0 != IP_REGNUM);
10493 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10494 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10495 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
10496 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
10497 return "";
10500 /* Output a move between double words.
10501 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
10502 or MEM<-REG and all MEMs must be offsettable addresses. */
10503 const char *
10504 output_move_double (rtx *operands)
10506 enum rtx_code code0 = GET_CODE (operands[0]);
10507 enum rtx_code code1 = GET_CODE (operands[1]);
10508 rtx otherops[3];
10510 if (code0 == REG)
10512 unsigned int reg0 = REGNO (operands[0]);
10514 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10516 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
10518 switch (GET_CODE (XEXP (operands[1], 0)))
10520 case REG:
10521 if (TARGET_LDRD
10522 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
10523 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
10524 else
10525 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10526 break;
10528 case PRE_INC:
10529 gcc_assert (TARGET_LDRD);
10530 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
10531 break;
10533 case PRE_DEC:
10534 if (TARGET_LDRD)
10535 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
10536 else
10537 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
10538 break;
10540 case POST_INC:
10541 if (TARGET_LDRD)
10542 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
10543 else
10544 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
10545 break;
10547 case POST_DEC:
10548 gcc_assert (TARGET_LDRD);
10549 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
10550 break;
10552 case PRE_MODIFY:
10553 case POST_MODIFY:
10554 /* Autoicrement addressing modes should never have overlapping
10555 base and destination registers, and overlapping index registers
10556 are already prohibited, so this doesn't need to worry about
10557 fix_cm3_ldrd. */
10558 otherops[0] = operands[0];
10559 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
10560 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
10562 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
10564 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10566 /* Registers overlap so split out the increment. */
10567 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10568 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
10570 else
10572 /* Use a single insn if we can.
10573 FIXME: IWMMXT allows offsets larger than ldrd can
10574 handle, fix these up with a pair of ldr. */
10575 if (TARGET_THUMB2
10576 || GET_CODE (otherops[2]) != CONST_INT
10577 || (INTVAL (otherops[2]) > -256
10578 && INTVAL (otherops[2]) < 256))
10579 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
10580 else
10582 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10583 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10587 else
10589 /* Use a single insn if we can.
10590 FIXME: IWMMXT allows offsets larger than ldrd can handle,
10591 fix these up with a pair of ldr. */
10592 if (TARGET_THUMB2
10593 || GET_CODE (otherops[2]) != CONST_INT
10594 || (INTVAL (otherops[2]) > -256
10595 && INTVAL (otherops[2]) < 256))
10596 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
10597 else
10599 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10600 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10603 break;
10605 case LABEL_REF:
10606 case CONST:
10607 /* We might be able to use ldrd %0, %1 here. However the range is
10608 different to ldr/adr, and it is broken on some ARMv7-M
10609 implementations. */
10610 /* Use the second register of the pair to avoid problematic
10611 overlap. */
10612 otherops[1] = operands[1];
10613 output_asm_insn ("adr%?\t%0, %1", otherops);
10614 operands[1] = otherops[0];
10615 if (TARGET_LDRD)
10616 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10617 else
10618 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
10619 break;
10621 /* ??? This needs checking for thumb2. */
10622 default:
10623 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
10624 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
10626 otherops[0] = operands[0];
10627 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
10628 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
10630 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10632 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10634 switch ((int) INTVAL (otherops[2]))
10636 case -8:
10637 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10638 return "";
10639 case -4:
10640 if (TARGET_THUMB2)
10641 break;
10642 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10643 return "";
10644 case 4:
10645 if (TARGET_THUMB2)
10646 break;
10647 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10648 return "";
10651 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
10652 operands[1] = otherops[0];
10653 if (TARGET_LDRD
10654 && (GET_CODE (otherops[2]) == REG
10655 || TARGET_THUMB2
10656 || (GET_CODE (otherops[2]) == CONST_INT
10657 && INTVAL (otherops[2]) > -256
10658 && INTVAL (otherops[2]) < 256)))
10660 if (reg_overlap_mentioned_p (operands[0],
10661 otherops[2]))
10663 rtx tmp;
10664 /* Swap base and index registers over to
10665 avoid a conflict. */
10666 tmp = otherops[1];
10667 otherops[1] = otherops[2];
10668 otherops[2] = tmp;
10670 /* If both registers conflict, it will usually
10671 have been fixed by a splitter. */
10672 if (reg_overlap_mentioned_p (operands[0], otherops[2])
10673 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
10675 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10676 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10678 else
10680 otherops[0] = operands[0];
10681 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10683 return "";
10686 if (GET_CODE (otherops[2]) == CONST_INT)
10688 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10689 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10690 else
10691 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10693 else
10694 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10696 else
10697 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10699 if (TARGET_LDRD)
10700 return "ldr%(d%)\t%0, [%1]";
10702 return "ldm%(ia%)\t%1, %M0";
10704 else
10706 otherops[1] = adjust_address (operands[1], SImode, 4);
10707 /* Take care of overlapping base/data reg. */
10708 if (reg_mentioned_p (operands[0], operands[1]))
10710 output_asm_insn ("ldr%?\t%0, %1", otherops);
10711 output_asm_insn ("ldr%?\t%0, %1", operands);
10713 else
10715 output_asm_insn ("ldr%?\t%0, %1", operands);
10716 output_asm_insn ("ldr%?\t%0, %1", otherops);
10721 else
10723 /* Constraints should ensure this. */
10724 gcc_assert (code0 == MEM && code1 == REG);
10725 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10727 switch (GET_CODE (XEXP (operands[0], 0)))
10729 case REG:
10730 if (TARGET_LDRD)
10731 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10732 else
10733 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10734 break;
10736 case PRE_INC:
10737 gcc_assert (TARGET_LDRD);
10738 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10739 break;
10741 case PRE_DEC:
10742 if (TARGET_LDRD)
10743 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10744 else
10745 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10746 break;
10748 case POST_INC:
10749 if (TARGET_LDRD)
10750 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10751 else
10752 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10753 break;
10755 case POST_DEC:
10756 gcc_assert (TARGET_LDRD);
10757 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10758 break;
10760 case PRE_MODIFY:
10761 case POST_MODIFY:
10762 otherops[0] = operands[1];
10763 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10764 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10766 /* IWMMXT allows offsets larger than ldrd can handle,
10767 fix these up with a pair of ldr. */
10768 if (!TARGET_THUMB2
10769 && GET_CODE (otherops[2]) == CONST_INT
10770 && (INTVAL(otherops[2]) <= -256
10771 || INTVAL(otherops[2]) >= 256))
10773 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10775 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10776 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10778 else
10780 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10781 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10784 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10785 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10786 else
10787 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10788 break;
10790 case PLUS:
10791 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10792 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10794 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10796 case -8:
10797 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10798 return "";
10800 case -4:
10801 if (TARGET_THUMB2)
10802 break;
10803 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10804 return "";
10806 case 4:
10807 if (TARGET_THUMB2)
10808 break;
10809 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10810 return "";
10813 if (TARGET_LDRD
10814 && (GET_CODE (otherops[2]) == REG
10815 || TARGET_THUMB2
10816 || (GET_CODE (otherops[2]) == CONST_INT
10817 && INTVAL (otherops[2]) > -256
10818 && INTVAL (otherops[2]) < 256)))
10820 otherops[0] = operands[1];
10821 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10822 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10823 return "";
10825 /* Fall through */
10827 default:
10828 otherops[0] = adjust_address (operands[0], SImode, 4);
10829 otherops[1] = operands[1];
10830 output_asm_insn ("str%?\t%1, %0", operands);
10831 output_asm_insn ("str%?\t%H1, %0", otherops);
10835 return "";
10838 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10839 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
10841 const char *
10842 output_move_quad (rtx *operands)
10844 if (REG_P (operands[0]))
10846 /* Load, or reg->reg move. */
10848 if (MEM_P (operands[1]))
10850 switch (GET_CODE (XEXP (operands[1], 0)))
10852 case REG:
10853 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10854 break;
10856 case LABEL_REF:
10857 case CONST:
10858 output_asm_insn ("adr%?\t%0, %1", operands);
10859 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10860 break;
10862 default:
10863 gcc_unreachable ();
10866 else
10868 rtx ops[2];
10869 int dest, src, i;
10871 gcc_assert (REG_P (operands[1]));
10873 dest = REGNO (operands[0]);
10874 src = REGNO (operands[1]);
10876 /* This seems pretty dumb, but hopefully GCC won't try to do it
10877 very often. */
10878 if (dest < src)
10879 for (i = 0; i < 4; i++)
10881 ops[0] = gen_rtx_REG (SImode, dest + i);
10882 ops[1] = gen_rtx_REG (SImode, src + i);
10883 output_asm_insn ("mov%?\t%0, %1", ops);
10885 else
10886 for (i = 3; i >= 0; i--)
10888 ops[0] = gen_rtx_REG (SImode, dest + i);
10889 ops[1] = gen_rtx_REG (SImode, src + i);
10890 output_asm_insn ("mov%?\t%0, %1", ops);
10894 else
10896 gcc_assert (MEM_P (operands[0]));
10897 gcc_assert (REG_P (operands[1]));
10898 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10900 switch (GET_CODE (XEXP (operands[0], 0)))
10902 case REG:
10903 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10904 break;
10906 default:
10907 gcc_unreachable ();
10911 return "";
10914 /* Output a VFP load or store instruction. */
10916 const char *
10917 output_move_vfp (rtx *operands)
10919 rtx reg, mem, addr, ops[2];
10920 int load = REG_P (operands[0]);
10921 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10922 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10923 const char *templ;
10924 char buff[50];
10925 enum machine_mode mode;
10927 reg = operands[!load];
10928 mem = operands[load];
10930 mode = GET_MODE (reg);
10932 gcc_assert (REG_P (reg));
10933 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10934 gcc_assert (mode == SFmode
10935 || mode == DFmode
10936 || mode == SImode
10937 || mode == DImode
10938 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10939 gcc_assert (MEM_P (mem));
10941 addr = XEXP (mem, 0);
10943 switch (GET_CODE (addr))
10945 case PRE_DEC:
10946 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10947 ops[0] = XEXP (addr, 0);
10948 ops[1] = reg;
10949 break;
10951 case POST_INC:
10952 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10953 ops[0] = XEXP (addr, 0);
10954 ops[1] = reg;
10955 break;
10957 default:
10958 templ = "f%s%c%%?\t%%%s0, %%1%s";
10959 ops[0] = reg;
10960 ops[1] = mem;
10961 break;
10964 sprintf (buff, templ,
10965 load ? "ld" : "st",
10966 dp ? 'd' : 's',
10967 dp ? "P" : "",
10968 integer_p ? "\t%@ int" : "");
10969 output_asm_insn (buff, ops);
10971 return "";
10974 /* Output a Neon quad-word load or store, or a load or store for
10975 larger structure modes.
10977 WARNING: The ordering of elements is weird in big-endian mode,
10978 because we use VSTM, as required by the EABI. GCC RTL defines
10979 element ordering based on in-memory order. This can be differ
10980 from the architectural ordering of elements within a NEON register.
10981 The intrinsics defined in arm_neon.h use the NEON register element
10982 ordering, not the GCC RTL element ordering.
10984 For example, the in-memory ordering of a big-endian a quadword
10985 vector with 16-bit elements when stored from register pair {d0,d1}
10986 will be (lowest address first, d0[N] is NEON register element N):
10988 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
10990 When necessary, quadword registers (dN, dN+1) are moved to ARM
10991 registers from rN in the order:
10993 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10995 So that STM/LDM can be used on vectors in ARM registers, and the
10996 same memory layout will result as if VSTM/VLDM were used. */
10998 const char *
10999 output_move_neon (rtx *operands)
11001 rtx reg, mem, addr, ops[2];
11002 int regno, load = REG_P (operands[0]);
11003 const char *templ;
11004 char buff[50];
11005 enum machine_mode mode;
11007 reg = operands[!load];
11008 mem = operands[load];
11010 mode = GET_MODE (reg);
11012 gcc_assert (REG_P (reg));
11013 regno = REGNO (reg);
11014 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
11015 || NEON_REGNO_OK_FOR_QUAD (regno));
11016 gcc_assert (VALID_NEON_DREG_MODE (mode)
11017 || VALID_NEON_QREG_MODE (mode)
11018 || VALID_NEON_STRUCT_MODE (mode));
11019 gcc_assert (MEM_P (mem));
11021 addr = XEXP (mem, 0);
11023 /* Strip off const from addresses like (const (plus (...))). */
11024 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
11025 addr = XEXP (addr, 0);
11027 switch (GET_CODE (addr))
11029 case POST_INC:
11030 templ = "v%smia%%?\t%%0!, %%h1";
11031 ops[0] = XEXP (addr, 0);
11032 ops[1] = reg;
11033 break;
11035 case PRE_DEC:
11036 /* FIXME: We should be using vld1/vst1 here in BE mode? */
11037 templ = "v%smdb%%?\t%%0!, %%h1";
11038 ops[0] = XEXP (addr, 0);
11039 ops[1] = reg;
11040 break;
11042 case POST_MODIFY:
11043 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
11044 gcc_unreachable ();
11046 case LABEL_REF:
11047 case PLUS:
11049 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
11050 int i;
11051 int overlap = -1;
11052 for (i = 0; i < nregs; i++)
11054 /* We're only using DImode here because it's a convenient size. */
11055 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
11056 ops[1] = adjust_address (mem, SImode, 8 * i);
11057 if (reg_overlap_mentioned_p (ops[0], mem))
11059 gcc_assert (overlap == -1);
11060 overlap = i;
11062 else
11064 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11065 output_asm_insn (buff, ops);
11068 if (overlap != -1)
11070 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
11071 ops[1] = adjust_address (mem, SImode, 8 * overlap);
11072 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11073 output_asm_insn (buff, ops);
11076 return "";
11079 default:
11080 templ = "v%smia%%?\t%%m0, %%h1";
11081 ops[0] = mem;
11082 ops[1] = reg;
11085 sprintf (buff, templ, load ? "ld" : "st");
11086 output_asm_insn (buff, ops);
11088 return "";
11091 /* Output an ADD r, s, #n where n may be too big for one instruction.
11092 If adding zero to one register, output nothing. */
11093 const char *
11094 output_add_immediate (rtx *operands)
11096 HOST_WIDE_INT n = INTVAL (operands[2]);
11098 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
11100 if (n < 0)
11101 output_multi_immediate (operands,
11102 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
11103 -n);
11104 else
11105 output_multi_immediate (operands,
11106 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
11110 return "";
11113 /* Output a multiple immediate operation.
11114 OPERANDS is the vector of operands referred to in the output patterns.
11115 INSTR1 is the output pattern to use for the first constant.
11116 INSTR2 is the output pattern to use for subsequent constants.
11117 IMMED_OP is the index of the constant slot in OPERANDS.
11118 N is the constant value. */
11119 static const char *
11120 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
11121 int immed_op, HOST_WIDE_INT n)
11123 #if HOST_BITS_PER_WIDE_INT > 32
11124 n &= 0xffffffff;
11125 #endif
11127 if (n == 0)
11129 /* Quick and easy output. */
11130 operands[immed_op] = const0_rtx;
11131 output_asm_insn (instr1, operands);
11133 else
11135 int i;
11136 const char * instr = instr1;
11138 /* Note that n is never zero here (which would give no output). */
11139 for (i = 0; i < 32; i += 2)
11141 if (n & (3 << i))
11143 operands[immed_op] = GEN_INT (n & (255 << i));
11144 output_asm_insn (instr, operands);
11145 instr = instr2;
11146 i += 6;
11151 return "";
11154 /* Return the name of a shifter operation. */
11155 static const char *
11156 arm_shift_nmem(enum rtx_code code)
11158 switch (code)
11160 case ASHIFT:
11161 return ARM_LSL_NAME;
11163 case ASHIFTRT:
11164 return "asr";
11166 case LSHIFTRT:
11167 return "lsr";
11169 case ROTATERT:
11170 return "ror";
11172 default:
11173 abort();
11177 /* Return the appropriate ARM instruction for the operation code.
11178 The returned result should not be overwritten. OP is the rtx of the
11179 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
11180 was shifted. */
11181 const char *
11182 arithmetic_instr (rtx op, int shift_first_arg)
11184 switch (GET_CODE (op))
11186 case PLUS:
11187 return "add";
11189 case MINUS:
11190 return shift_first_arg ? "rsb" : "sub";
11192 case IOR:
11193 return "orr";
11195 case XOR:
11196 return "eor";
11198 case AND:
11199 return "and";
11201 case ASHIFT:
11202 case ASHIFTRT:
11203 case LSHIFTRT:
11204 case ROTATERT:
11205 return arm_shift_nmem(GET_CODE(op));
11207 default:
11208 gcc_unreachable ();
11212 /* Ensure valid constant shifts and return the appropriate shift mnemonic
11213 for the operation code. The returned result should not be overwritten.
11214 OP is the rtx code of the shift.
11215 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
11216 shift. */
11217 static const char *
11218 shift_op (rtx op, HOST_WIDE_INT *amountp)
11220 const char * mnem;
11221 enum rtx_code code = GET_CODE (op);
11223 switch (GET_CODE (XEXP (op, 1)))
11225 case REG:
11226 case SUBREG:
11227 *amountp = -1;
11228 break;
11230 case CONST_INT:
11231 *amountp = INTVAL (XEXP (op, 1));
11232 break;
11234 default:
11235 gcc_unreachable ();
11238 switch (code)
11240 case ROTATE:
11241 gcc_assert (*amountp != -1);
11242 *amountp = 32 - *amountp;
11243 code = ROTATERT;
11245 /* Fall through. */
11247 case ASHIFT:
11248 case ASHIFTRT:
11249 case LSHIFTRT:
11250 case ROTATERT:
11251 mnem = arm_shift_nmem(code);
11252 break;
11254 case MULT:
11255 /* We never have to worry about the amount being other than a
11256 power of 2, since this case can never be reloaded from a reg. */
11257 gcc_assert (*amountp != -1);
11258 *amountp = int_log2 (*amountp);
11259 return ARM_LSL_NAME;
11261 default:
11262 gcc_unreachable ();
11265 if (*amountp != -1)
11267 /* This is not 100% correct, but follows from the desire to merge
11268 multiplication by a power of 2 with the recognizer for a
11269 shift. >=32 is not a valid shift for "lsl", so we must try and
11270 output a shift that produces the correct arithmetical result.
11271 Using lsr #32 is identical except for the fact that the carry bit
11272 is not set correctly if we set the flags; but we never use the
11273 carry bit from such an operation, so we can ignore that. */
11274 if (code == ROTATERT)
11275 /* Rotate is just modulo 32. */
11276 *amountp &= 31;
11277 else if (*amountp != (*amountp & 31))
11279 if (code == ASHIFT)
11280 mnem = "lsr";
11281 *amountp = 32;
11284 /* Shifts of 0 are no-ops. */
11285 if (*amountp == 0)
11286 return NULL;
11289 return mnem;
11292 /* Obtain the shift from the POWER of two. */
11294 static HOST_WIDE_INT
11295 int_log2 (HOST_WIDE_INT power)
11297 HOST_WIDE_INT shift = 0;
11299 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
11301 gcc_assert (shift <= 31);
11302 shift++;
11305 return shift;
11308 /* Output a .ascii pseudo-op, keeping track of lengths. This is
11309 because /bin/as is horribly restrictive. The judgement about
11310 whether or not each character is 'printable' (and can be output as
11311 is) or not (and must be printed with an octal escape) must be made
11312 with reference to the *host* character set -- the situation is
11313 similar to that discussed in the comments above pp_c_char in
11314 c-pretty-print.c. */
11316 #define MAX_ASCII_LEN 51
11318 void
11319 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
11321 int i;
11322 int len_so_far = 0;
11324 fputs ("\t.ascii\t\"", stream);
11326 for (i = 0; i < len; i++)
11328 int c = p[i];
11330 if (len_so_far >= MAX_ASCII_LEN)
11332 fputs ("\"\n\t.ascii\t\"", stream);
11333 len_so_far = 0;
11336 if (ISPRINT (c))
11338 if (c == '\\' || c == '\"')
11340 putc ('\\', stream);
11341 len_so_far++;
11343 putc (c, stream);
11344 len_so_far++;
11346 else
11348 fprintf (stream, "\\%03o", c);
11349 len_so_far += 4;
11353 fputs ("\"\n", stream);
11356 /* Compute the register save mask for registers 0 through 12
11357 inclusive. This code is used by arm_compute_save_reg_mask. */
11359 static unsigned long
11360 arm_compute_save_reg0_reg12_mask (void)
11362 unsigned long func_type = arm_current_func_type ();
11363 unsigned long save_reg_mask = 0;
11364 unsigned int reg;
11366 if (IS_INTERRUPT (func_type))
11368 unsigned int max_reg;
11369 /* Interrupt functions must not corrupt any registers,
11370 even call clobbered ones. If this is a leaf function
11371 we can just examine the registers used by the RTL, but
11372 otherwise we have to assume that whatever function is
11373 called might clobber anything, and so we have to save
11374 all the call-clobbered registers as well. */
11375 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
11376 /* FIQ handlers have registers r8 - r12 banked, so
11377 we only need to check r0 - r7, Normal ISRs only
11378 bank r14 and r15, so we must check up to r12.
11379 r13 is the stack pointer which is always preserved,
11380 so we do not need to consider it here. */
11381 max_reg = 7;
11382 else
11383 max_reg = 12;
11385 for (reg = 0; reg <= max_reg; reg++)
11386 if (df_regs_ever_live_p (reg)
11387 || (! current_function_is_leaf && call_used_regs[reg]))
11388 save_reg_mask |= (1 << reg);
11390 /* Also save the pic base register if necessary. */
11391 if (flag_pic
11392 && !TARGET_SINGLE_PIC_BASE
11393 && arm_pic_register != INVALID_REGNUM
11394 && crtl->uses_pic_offset_table)
11395 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11397 else
11399 /* In the normal case we only need to save those registers
11400 which are call saved and which are used by this function. */
11401 for (reg = 0; reg <= 11; reg++)
11402 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
11403 save_reg_mask |= (1 << reg);
11405 /* Handle the frame pointer as a special case. */
11406 if (frame_pointer_needed)
11407 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
11409 /* If we aren't loading the PIC register,
11410 don't stack it even though it may be live. */
11411 if (flag_pic
11412 && !TARGET_SINGLE_PIC_BASE
11413 && arm_pic_register != INVALID_REGNUM
11414 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
11415 || crtl->uses_pic_offset_table))
11416 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11418 /* The prologue will copy SP into R0, so save it. */
11419 if (IS_STACKALIGN (func_type))
11420 save_reg_mask |= 1;
11423 /* Save registers so the exception handler can modify them. */
11424 if (crtl->calls_eh_return)
11426 unsigned int i;
11428 for (i = 0; ; i++)
11430 reg = EH_RETURN_DATA_REGNO (i);
11431 if (reg == INVALID_REGNUM)
11432 break;
11433 save_reg_mask |= 1 << reg;
11437 return save_reg_mask;
11441 /* Compute the number of bytes used to store the static chain register on the
11442 stack, above the stack frame. We need to know this accurately to get the
11443 alignment of the rest of the stack frame correct. */
11445 static int arm_compute_static_chain_stack_bytes (void)
11447 unsigned long func_type = arm_current_func_type ();
11448 int static_chain_stack_bytes = 0;
11450 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
11451 IS_NESTED (func_type) &&
11452 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
11453 static_chain_stack_bytes = 4;
11455 return static_chain_stack_bytes;
11459 /* Compute a bit mask of which registers need to be
11460 saved on the stack for the current function.
11461 This is used by arm_get_frame_offsets, which may add extra registers. */
11463 static unsigned long
11464 arm_compute_save_reg_mask (void)
11466 unsigned int save_reg_mask = 0;
11467 unsigned long func_type = arm_current_func_type ();
11468 unsigned int reg;
11470 if (IS_NAKED (func_type))
11471 /* This should never really happen. */
11472 return 0;
11474 /* If we are creating a stack frame, then we must save the frame pointer,
11475 IP (which will hold the old stack pointer), LR and the PC. */
11476 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11477 save_reg_mask |=
11478 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
11479 | (1 << IP_REGNUM)
11480 | (1 << LR_REGNUM)
11481 | (1 << PC_REGNUM);
11483 /* Volatile functions do not return, so there
11484 is no need to save any other registers. */
11485 if (IS_VOLATILE (func_type))
11486 return save_reg_mask;
11488 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
11490 /* Decide if we need to save the link register.
11491 Interrupt routines have their own banked link register,
11492 so they never need to save it.
11493 Otherwise if we do not use the link register we do not need to save
11494 it. If we are pushing other registers onto the stack however, we
11495 can save an instruction in the epilogue by pushing the link register
11496 now and then popping it back into the PC. This incurs extra memory
11497 accesses though, so we only do it when optimizing for size, and only
11498 if we know that we will not need a fancy return sequence. */
11499 if (df_regs_ever_live_p (LR_REGNUM)
11500 || (save_reg_mask
11501 && optimize_size
11502 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11503 && !crtl->calls_eh_return))
11504 save_reg_mask |= 1 << LR_REGNUM;
11506 if (cfun->machine->lr_save_eliminated)
11507 save_reg_mask &= ~ (1 << LR_REGNUM);
11509 if (TARGET_REALLY_IWMMXT
11510 && ((bit_count (save_reg_mask)
11511 + ARM_NUM_INTS (crtl->args.pretend_args_size +
11512 arm_compute_static_chain_stack_bytes())
11513 ) % 2) != 0)
11515 /* The total number of registers that are going to be pushed
11516 onto the stack is odd. We need to ensure that the stack
11517 is 64-bit aligned before we start to save iWMMXt registers,
11518 and also before we start to create locals. (A local variable
11519 might be a double or long long which we will load/store using
11520 an iWMMXt instruction). Therefore we need to push another
11521 ARM register, so that the stack will be 64-bit aligned. We
11522 try to avoid using the arg registers (r0 -r3) as they might be
11523 used to pass values in a tail call. */
11524 for (reg = 4; reg <= 12; reg++)
11525 if ((save_reg_mask & (1 << reg)) == 0)
11526 break;
11528 if (reg <= 12)
11529 save_reg_mask |= (1 << reg);
11530 else
11532 cfun->machine->sibcall_blocked = 1;
11533 save_reg_mask |= (1 << 3);
11537 /* We may need to push an additional register for use initializing the
11538 PIC base register. */
11539 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
11540 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
11542 reg = thumb_find_work_register (1 << 4);
11543 if (!call_used_regs[reg])
11544 save_reg_mask |= (1 << reg);
11547 return save_reg_mask;
11551 /* Compute a bit mask of which registers need to be
11552 saved on the stack for the current function. */
11553 static unsigned long
11554 thumb1_compute_save_reg_mask (void)
11556 unsigned long mask;
11557 unsigned reg;
11559 mask = 0;
11560 for (reg = 0; reg < 12; reg ++)
11561 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11562 mask |= 1 << reg;
11564 if (flag_pic
11565 && !TARGET_SINGLE_PIC_BASE
11566 && arm_pic_register != INVALID_REGNUM
11567 && crtl->uses_pic_offset_table)
11568 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11570 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
11571 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
11572 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
11574 /* LR will also be pushed if any lo regs are pushed. */
11575 if (mask & 0xff || thumb_force_lr_save ())
11576 mask |= (1 << LR_REGNUM);
11578 /* Make sure we have a low work register if we need one.
11579 We will need one if we are going to push a high register,
11580 but we are not currently intending to push a low register. */
11581 if ((mask & 0xff) == 0
11582 && ((mask & 0x0f00) || TARGET_BACKTRACE))
11584 /* Use thumb_find_work_register to choose which register
11585 we will use. If the register is live then we will
11586 have to push it. Use LAST_LO_REGNUM as our fallback
11587 choice for the register to select. */
11588 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
11589 /* Make sure the register returned by thumb_find_work_register is
11590 not part of the return value. */
11591 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
11592 reg = LAST_LO_REGNUM;
11594 if (! call_used_regs[reg])
11595 mask |= 1 << reg;
11598 /* The 504 below is 8 bytes less than 512 because there are two possible
11599 alignment words. We can't tell here if they will be present or not so we
11600 have to play it safe and assume that they are. */
11601 if ((CALLER_INTERWORKING_SLOT_SIZE +
11602 ROUND_UP_WORD (get_frame_size ()) +
11603 crtl->outgoing_args_size) >= 504)
11605 /* This is the same as the code in thumb1_expand_prologue() which
11606 determines which register to use for stack decrement. */
11607 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
11608 if (mask & (1 << reg))
11609 break;
11611 if (reg > LAST_LO_REGNUM)
11613 /* Make sure we have a register available for stack decrement. */
11614 mask |= 1 << LAST_LO_REGNUM;
11618 return mask;
11622 /* Return the number of bytes required to save VFP registers. */
11623 static int
11624 arm_get_vfp_saved_size (void)
11626 unsigned int regno;
11627 int count;
11628 int saved;
11630 saved = 0;
11631 /* Space for saved VFP registers. */
11632 if (TARGET_HARD_FLOAT && TARGET_VFP)
11634 count = 0;
11635 for (regno = FIRST_VFP_REGNUM;
11636 regno < LAST_VFP_REGNUM;
11637 regno += 2)
11639 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
11640 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11642 if (count > 0)
11644 /* Workaround ARM10 VFPr1 bug. */
11645 if (count == 2 && !arm_arch6)
11646 count++;
11647 saved += count * 8;
11649 count = 0;
11651 else
11652 count++;
11654 if (count > 0)
11656 if (count == 2 && !arm_arch6)
11657 count++;
11658 saved += count * 8;
11661 return saved;
11665 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11666 everything bar the final return instruction. */
11667 const char *
11668 output_return_instruction (rtx operand, int really_return, int reverse)
11670 char conditional[10];
11671 char instr[100];
11672 unsigned reg;
11673 unsigned long live_regs_mask;
11674 unsigned long func_type;
11675 arm_stack_offsets *offsets;
11677 func_type = arm_current_func_type ();
11679 if (IS_NAKED (func_type))
11680 return "";
11682 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11684 /* If this function was declared non-returning, and we have
11685 found a tail call, then we have to trust that the called
11686 function won't return. */
11687 if (really_return)
11689 rtx ops[2];
11691 /* Otherwise, trap an attempted return by aborting. */
11692 ops[0] = operand;
11693 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11694 : "abort");
11695 assemble_external_libcall (ops[1]);
11696 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11699 return "";
11702 gcc_assert (!cfun->calls_alloca || really_return);
11704 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11706 cfun->machine->return_used_this_function = 1;
11708 offsets = arm_get_frame_offsets ();
11709 live_regs_mask = offsets->saved_regs_mask;
11711 if (live_regs_mask)
11713 const char * return_reg;
11715 /* If we do not have any special requirements for function exit
11716 (e.g. interworking) then we can load the return address
11717 directly into the PC. Otherwise we must load it into LR. */
11718 if (really_return
11719 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11720 return_reg = reg_names[PC_REGNUM];
11721 else
11722 return_reg = reg_names[LR_REGNUM];
11724 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11726 /* There are three possible reasons for the IP register
11727 being saved. 1) a stack frame was created, in which case
11728 IP contains the old stack pointer, or 2) an ISR routine
11729 corrupted it, or 3) it was saved to align the stack on
11730 iWMMXt. In case 1, restore IP into SP, otherwise just
11731 restore IP. */
11732 if (frame_pointer_needed)
11734 live_regs_mask &= ~ (1 << IP_REGNUM);
11735 live_regs_mask |= (1 << SP_REGNUM);
11737 else
11738 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11741 /* On some ARM architectures it is faster to use LDR rather than
11742 LDM to load a single register. On other architectures, the
11743 cost is the same. In 26 bit mode, or for exception handlers,
11744 we have to use LDM to load the PC so that the CPSR is also
11745 restored. */
11746 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11747 if (live_regs_mask == (1U << reg))
11748 break;
11750 if (reg <= LAST_ARM_REGNUM
11751 && (reg != LR_REGNUM
11752 || ! really_return
11753 || ! IS_INTERRUPT (func_type)))
11755 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11756 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11758 else
11760 char *p;
11761 int first = 1;
11763 /* Generate the load multiple instruction to restore the
11764 registers. Note we can get here, even if
11765 frame_pointer_needed is true, but only if sp already
11766 points to the base of the saved core registers. */
11767 if (live_regs_mask & (1 << SP_REGNUM))
11769 unsigned HOST_WIDE_INT stack_adjust;
11771 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11772 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11774 if (stack_adjust && arm_arch5 && TARGET_ARM)
11775 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11776 else
11778 /* If we can't use ldmib (SA110 bug),
11779 then try to pop r3 instead. */
11780 if (stack_adjust)
11781 live_regs_mask |= 1 << 3;
11782 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11785 else
11786 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11788 p = instr + strlen (instr);
11790 for (reg = 0; reg <= SP_REGNUM; reg++)
11791 if (live_regs_mask & (1 << reg))
11793 int l = strlen (reg_names[reg]);
11795 if (first)
11796 first = 0;
11797 else
11799 memcpy (p, ", ", 2);
11800 p += 2;
11803 memcpy (p, "%|", 2);
11804 memcpy (p + 2, reg_names[reg], l);
11805 p += l + 2;
11808 if (live_regs_mask & (1 << LR_REGNUM))
11810 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11811 /* If returning from an interrupt, restore the CPSR. */
11812 if (IS_INTERRUPT (func_type))
11813 strcat (p, "^");
11815 else
11816 strcpy (p, "}");
11819 output_asm_insn (instr, & operand);
11821 /* See if we need to generate an extra instruction to
11822 perform the actual function return. */
11823 if (really_return
11824 && func_type != ARM_FT_INTERWORKED
11825 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11827 /* The return has already been handled
11828 by loading the LR into the PC. */
11829 really_return = 0;
11833 if (really_return)
11835 switch ((int) ARM_FUNC_TYPE (func_type))
11837 case ARM_FT_ISR:
11838 case ARM_FT_FIQ:
11839 /* ??? This is wrong for unified assembly syntax. */
11840 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11841 break;
11843 case ARM_FT_INTERWORKED:
11844 sprintf (instr, "bx%s\t%%|lr", conditional);
11845 break;
11847 case ARM_FT_EXCEPTION:
11848 /* ??? This is wrong for unified assembly syntax. */
11849 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11850 break;
11852 default:
11853 /* Use bx if it's available. */
11854 if (arm_arch5 || arm_arch4t)
11855 sprintf (instr, "bx%s\t%%|lr", conditional);
11856 else
11857 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11858 break;
11861 output_asm_insn (instr, & operand);
11864 return "";
11867 /* Write the function name into the code section, directly preceding
11868 the function prologue.
11870 Code will be output similar to this:
11872 .ascii "arm_poke_function_name", 0
11873 .align
11875 .word 0xff000000 + (t1 - t0)
11876 arm_poke_function_name
11877 mov ip, sp
11878 stmfd sp!, {fp, ip, lr, pc}
11879 sub fp, ip, #4
11881 When performing a stack backtrace, code can inspect the value
11882 of 'pc' stored at 'fp' + 0. If the trace function then looks
11883 at location pc - 12 and the top 8 bits are set, then we know
11884 that there is a function name embedded immediately preceding this
11885 location and has length ((pc[-3]) & 0xff000000).
11887 We assume that pc is declared as a pointer to an unsigned long.
11889 It is of no benefit to output the function name if we are assembling
11890 a leaf function. These function types will not contain a stack
11891 backtrace structure, therefore it is not possible to determine the
11892 function name. */
11893 void
11894 arm_poke_function_name (FILE *stream, const char *name)
11896 unsigned long alignlength;
11897 unsigned long length;
11898 rtx x;
11900 length = strlen (name) + 1;
11901 alignlength = ROUND_UP_WORD (length);
11903 ASM_OUTPUT_ASCII (stream, name, length);
11904 ASM_OUTPUT_ALIGN (stream, 2);
11905 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11906 assemble_aligned_integer (UNITS_PER_WORD, x);
11909 /* Place some comments into the assembler stream
11910 describing the current function. */
11911 static void
11912 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11914 unsigned long func_type;
11916 if (TARGET_THUMB1)
11918 thumb1_output_function_prologue (f, frame_size);
11919 return;
11922 /* Sanity check. */
11923 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11925 func_type = arm_current_func_type ();
11927 switch ((int) ARM_FUNC_TYPE (func_type))
11929 default:
11930 case ARM_FT_NORMAL:
11931 break;
11932 case ARM_FT_INTERWORKED:
11933 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11934 break;
11935 case ARM_FT_ISR:
11936 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11937 break;
11938 case ARM_FT_FIQ:
11939 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11940 break;
11941 case ARM_FT_EXCEPTION:
11942 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11943 break;
11946 if (IS_NAKED (func_type))
11947 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11949 if (IS_VOLATILE (func_type))
11950 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11952 if (IS_NESTED (func_type))
11953 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11954 if (IS_STACKALIGN (func_type))
11955 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11957 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11958 crtl->args.size,
11959 crtl->args.pretend_args_size, frame_size);
11961 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11962 frame_pointer_needed,
11963 cfun->machine->uses_anonymous_args);
11965 if (cfun->machine->lr_save_eliminated)
11966 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11968 if (crtl->calls_eh_return)
11969 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11973 const char *
11974 arm_output_epilogue (rtx sibling)
11976 int reg;
11977 unsigned long saved_regs_mask;
11978 unsigned long func_type;
11979 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11980 frame that is $fp + 4 for a non-variadic function. */
11981 int floats_offset = 0;
11982 rtx operands[3];
11983 FILE * f = asm_out_file;
11984 unsigned int lrm_count = 0;
11985 int really_return = (sibling == NULL);
11986 int start_reg;
11987 arm_stack_offsets *offsets;
11989 /* If we have already generated the return instruction
11990 then it is futile to generate anything else. */
11991 if (use_return_insn (FALSE, sibling) &&
11992 (cfun->machine->return_used_this_function != 0))
11993 return "";
11995 func_type = arm_current_func_type ();
11997 if (IS_NAKED (func_type))
11998 /* Naked functions don't have epilogues. */
11999 return "";
12001 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12003 rtx op;
12005 /* A volatile function should never return. Call abort. */
12006 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
12007 assemble_external_libcall (op);
12008 output_asm_insn ("bl\t%a0", &op);
12010 return "";
12013 /* If we are throwing an exception, then we really must be doing a
12014 return, so we can't tail-call. */
12015 gcc_assert (!crtl->calls_eh_return || really_return);
12017 offsets = arm_get_frame_offsets ();
12018 saved_regs_mask = offsets->saved_regs_mask;
12020 if (TARGET_IWMMXT)
12021 lrm_count = bit_count (saved_regs_mask);
12023 floats_offset = offsets->saved_args;
12024 /* Compute how far away the floats will be. */
12025 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12026 if (saved_regs_mask & (1 << reg))
12027 floats_offset += 4;
12029 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12031 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
12032 int vfp_offset = offsets->frame;
12034 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12036 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12037 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12039 floats_offset += 12;
12040 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
12041 reg, FP_REGNUM, floats_offset - vfp_offset);
12044 else
12046 start_reg = LAST_FPA_REGNUM;
12048 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12050 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12052 floats_offset += 12;
12054 /* We can't unstack more than four registers at once. */
12055 if (start_reg - reg == 3)
12057 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
12058 reg, FP_REGNUM, floats_offset - vfp_offset);
12059 start_reg = reg - 1;
12062 else
12064 if (reg != start_reg)
12065 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12066 reg + 1, start_reg - reg,
12067 FP_REGNUM, floats_offset - vfp_offset);
12068 start_reg = reg - 1;
12072 /* Just in case the last register checked also needs unstacking. */
12073 if (reg != start_reg)
12074 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12075 reg + 1, start_reg - reg,
12076 FP_REGNUM, floats_offset - vfp_offset);
12079 if (TARGET_HARD_FLOAT && TARGET_VFP)
12081 int saved_size;
12083 /* The fldmd insns do not have base+offset addressing
12084 modes, so we use IP to hold the address. */
12085 saved_size = arm_get_vfp_saved_size ();
12087 if (saved_size > 0)
12089 floats_offset += saved_size;
12090 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
12091 FP_REGNUM, floats_offset - vfp_offset);
12093 start_reg = FIRST_VFP_REGNUM;
12094 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12096 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12097 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12099 if (start_reg != reg)
12100 vfp_output_fldmd (f, IP_REGNUM,
12101 (start_reg - FIRST_VFP_REGNUM) / 2,
12102 (reg - start_reg) / 2);
12103 start_reg = reg + 2;
12106 if (start_reg != reg)
12107 vfp_output_fldmd (f, IP_REGNUM,
12108 (start_reg - FIRST_VFP_REGNUM) / 2,
12109 (reg - start_reg) / 2);
12112 if (TARGET_IWMMXT)
12114 /* The frame pointer is guaranteed to be non-double-word aligned.
12115 This is because it is set to (old_stack_pointer - 4) and the
12116 old_stack_pointer was double word aligned. Thus the offset to
12117 the iWMMXt registers to be loaded must also be non-double-word
12118 sized, so that the resultant address *is* double-word aligned.
12119 We can ignore floats_offset since that was already included in
12120 the live_regs_mask. */
12121 lrm_count += (lrm_count % 2 ? 2 : 1);
12123 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12124 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12126 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
12127 reg, FP_REGNUM, lrm_count * 4);
12128 lrm_count += 2;
12132 /* saved_regs_mask should contain the IP, which at the time of stack
12133 frame generation actually contains the old stack pointer. So a
12134 quick way to unwind the stack is just pop the IP register directly
12135 into the stack pointer. */
12136 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
12137 saved_regs_mask &= ~ (1 << IP_REGNUM);
12138 saved_regs_mask |= (1 << SP_REGNUM);
12140 /* There are two registers left in saved_regs_mask - LR and PC. We
12141 only need to restore the LR register (the return address), but to
12142 save time we can load it directly into the PC, unless we need a
12143 special function exit sequence, or we are not really returning. */
12144 if (really_return
12145 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12146 && !crtl->calls_eh_return)
12147 /* Delete the LR from the register mask, so that the LR on
12148 the stack is loaded into the PC in the register mask. */
12149 saved_regs_mask &= ~ (1 << LR_REGNUM);
12150 else
12151 saved_regs_mask &= ~ (1 << PC_REGNUM);
12153 /* We must use SP as the base register, because SP is one of the
12154 registers being restored. If an interrupt or page fault
12155 happens in the ldm instruction, the SP might or might not
12156 have been restored. That would be bad, as then SP will no
12157 longer indicate the safe area of stack, and we can get stack
12158 corruption. Using SP as the base register means that it will
12159 be reset correctly to the original value, should an interrupt
12160 occur. If the stack pointer already points at the right
12161 place, then omit the subtraction. */
12162 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
12163 || cfun->calls_alloca)
12164 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
12165 4 * bit_count (saved_regs_mask));
12166 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
12168 if (IS_INTERRUPT (func_type))
12169 /* Interrupt handlers will have pushed the
12170 IP onto the stack, so restore it now. */
12171 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
12173 else
12175 /* This branch is executed for ARM mode (non-apcs frames) and
12176 Thumb-2 mode. Frame layout is essentially the same for those
12177 cases, except that in ARM mode frame pointer points to the
12178 first saved register, while in Thumb-2 mode the frame pointer points
12179 to the last saved register.
12181 It is possible to make frame pointer point to last saved
12182 register in both cases, and remove some conditionals below.
12183 That means that fp setup in prologue would be just "mov fp, sp"
12184 and sp restore in epilogue would be just "mov sp, fp", whereas
12185 now we have to use add/sub in those cases. However, the value
12186 of that would be marginal, as both mov and add/sub are 32-bit
12187 in ARM mode, and it would require extra conditionals
12188 in arm_expand_prologue to distingish ARM-apcs-frame case
12189 (where frame pointer is required to point at first register)
12190 and ARM-non-apcs-frame. Therefore, such change is postponed
12191 until real need arise. */
12192 unsigned HOST_WIDE_INT amount;
12193 int rfe;
12194 /* Restore stack pointer if necessary. */
12195 if (TARGET_ARM && frame_pointer_needed)
12197 operands[0] = stack_pointer_rtx;
12198 operands[1] = hard_frame_pointer_rtx;
12200 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
12201 output_add_immediate (operands);
12203 else
12205 if (frame_pointer_needed)
12207 /* For Thumb-2 restore sp from the frame pointer.
12208 Operand restrictions mean we have to incrememnt FP, then copy
12209 to SP. */
12210 amount = offsets->locals_base - offsets->saved_regs;
12211 operands[0] = hard_frame_pointer_rtx;
12213 else
12215 unsigned long count;
12216 operands[0] = stack_pointer_rtx;
12217 amount = offsets->outgoing_args - offsets->saved_regs;
12218 /* pop call clobbered registers if it avoids a
12219 separate stack adjustment. */
12220 count = offsets->saved_regs - offsets->saved_args;
12221 if (optimize_size
12222 && count != 0
12223 && !crtl->calls_eh_return
12224 && bit_count(saved_regs_mask) * 4 == count
12225 && !IS_INTERRUPT (func_type)
12226 && !crtl->tail_call_emit)
12228 unsigned long mask;
12229 mask = (1 << (arm_size_return_regs() / 4)) - 1;
12230 mask ^= 0xf;
12231 mask &= ~saved_regs_mask;
12232 reg = 0;
12233 while (bit_count (mask) * 4 > amount)
12235 while ((mask & (1 << reg)) == 0)
12236 reg++;
12237 mask &= ~(1 << reg);
12239 if (bit_count (mask) * 4 == amount) {
12240 amount = 0;
12241 saved_regs_mask |= mask;
12246 if (amount)
12248 operands[1] = operands[0];
12249 operands[2] = GEN_INT (amount);
12250 output_add_immediate (operands);
12252 if (frame_pointer_needed)
12253 asm_fprintf (f, "\tmov\t%r, %r\n",
12254 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
12257 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12259 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12260 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12261 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
12262 reg, SP_REGNUM);
12264 else
12266 start_reg = FIRST_FPA_REGNUM;
12268 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12270 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12272 if (reg - start_reg == 3)
12274 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
12275 start_reg, SP_REGNUM);
12276 start_reg = reg + 1;
12279 else
12281 if (reg != start_reg)
12282 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12283 start_reg, reg - start_reg,
12284 SP_REGNUM);
12286 start_reg = reg + 1;
12290 /* Just in case the last register checked also needs unstacking. */
12291 if (reg != start_reg)
12292 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12293 start_reg, reg - start_reg, SP_REGNUM);
12296 if (TARGET_HARD_FLOAT && TARGET_VFP)
12298 start_reg = FIRST_VFP_REGNUM;
12299 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12301 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12302 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12304 if (start_reg != reg)
12305 vfp_output_fldmd (f, SP_REGNUM,
12306 (start_reg - FIRST_VFP_REGNUM) / 2,
12307 (reg - start_reg) / 2);
12308 start_reg = reg + 2;
12311 if (start_reg != reg)
12312 vfp_output_fldmd (f, SP_REGNUM,
12313 (start_reg - FIRST_VFP_REGNUM) / 2,
12314 (reg - start_reg) / 2);
12316 if (TARGET_IWMMXT)
12317 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
12318 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12319 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
12321 /* If we can, restore the LR into the PC. */
12322 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
12323 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
12324 && !IS_STACKALIGN (func_type)
12325 && really_return
12326 && crtl->args.pretend_args_size == 0
12327 && saved_regs_mask & (1 << LR_REGNUM)
12328 && !crtl->calls_eh_return)
12330 saved_regs_mask &= ~ (1 << LR_REGNUM);
12331 saved_regs_mask |= (1 << PC_REGNUM);
12332 rfe = IS_INTERRUPT (func_type);
12334 else
12335 rfe = 0;
12337 /* Load the registers off the stack. If we only have one register
12338 to load use the LDR instruction - it is faster. For Thumb-2
12339 always use pop and the assembler will pick the best instruction.*/
12340 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
12341 && !IS_INTERRUPT(func_type))
12343 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
12345 else if (saved_regs_mask)
12347 if (saved_regs_mask & (1 << SP_REGNUM))
12348 /* Note - write back to the stack register is not enabled
12349 (i.e. "ldmfd sp!..."). We know that the stack pointer is
12350 in the list of registers and if we add writeback the
12351 instruction becomes UNPREDICTABLE. */
12352 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
12353 rfe);
12354 else if (TARGET_ARM)
12355 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
12356 rfe);
12357 else
12358 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
12361 if (crtl->args.pretend_args_size)
12363 /* Unwind the pre-pushed regs. */
12364 operands[0] = operands[1] = stack_pointer_rtx;
12365 operands[2] = GEN_INT (crtl->args.pretend_args_size);
12366 output_add_immediate (operands);
12370 /* We may have already restored PC directly from the stack. */
12371 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
12372 return "";
12374 /* Stack adjustment for exception handler. */
12375 if (crtl->calls_eh_return)
12376 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
12377 ARM_EH_STACKADJ_REGNUM);
12379 /* Generate the return instruction. */
12380 switch ((int) ARM_FUNC_TYPE (func_type))
12382 case ARM_FT_ISR:
12383 case ARM_FT_FIQ:
12384 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
12385 break;
12387 case ARM_FT_EXCEPTION:
12388 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12389 break;
12391 case ARM_FT_INTERWORKED:
12392 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12393 break;
12395 default:
12396 if (IS_STACKALIGN (func_type))
12398 /* See comment in arm_expand_prologue. */
12399 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
12401 if (arm_arch5 || arm_arch4t)
12402 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12403 else
12404 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12405 break;
12408 return "";
12411 static void
12412 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
12413 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
12415 arm_stack_offsets *offsets;
12417 if (TARGET_THUMB1)
12419 int regno;
12421 /* Emit any call-via-reg trampolines that are needed for v4t support
12422 of call_reg and call_value_reg type insns. */
12423 for (regno = 0; regno < LR_REGNUM; regno++)
12425 rtx label = cfun->machine->call_via[regno];
12427 if (label != NULL)
12429 switch_to_section (function_section (current_function_decl));
12430 targetm.asm_out.internal_label (asm_out_file, "L",
12431 CODE_LABEL_NUMBER (label));
12432 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
12436 /* ??? Probably not safe to set this here, since it assumes that a
12437 function will be emitted as assembly immediately after we generate
12438 RTL for it. This does not happen for inline functions. */
12439 cfun->machine->return_used_this_function = 0;
12441 else /* TARGET_32BIT */
12443 /* We need to take into account any stack-frame rounding. */
12444 offsets = arm_get_frame_offsets ();
12446 gcc_assert (!use_return_insn (FALSE, NULL)
12447 || (cfun->machine->return_used_this_function != 0)
12448 || offsets->saved_regs == offsets->outgoing_args
12449 || frame_pointer_needed);
12451 /* Reset the ARM-specific per-function variables. */
12452 after_arm_reorg = 0;
12456 /* Generate and emit an insn that we will recognize as a push_multi.
12457 Unfortunately, since this insn does not reflect very well the actual
12458 semantics of the operation, we need to annotate the insn for the benefit
12459 of DWARF2 frame unwind information. */
12460 static rtx
12461 emit_multi_reg_push (unsigned long mask)
12463 int num_regs = 0;
12464 int num_dwarf_regs;
12465 int i, j;
12466 rtx par;
12467 rtx dwarf;
12468 int dwarf_par_index;
12469 rtx tmp, reg;
12471 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12472 if (mask & (1 << i))
12473 num_regs++;
12475 gcc_assert (num_regs && num_regs <= 16);
12477 /* We don't record the PC in the dwarf frame information. */
12478 num_dwarf_regs = num_regs;
12479 if (mask & (1 << PC_REGNUM))
12480 num_dwarf_regs--;
12482 /* For the body of the insn we are going to generate an UNSPEC in
12483 parallel with several USEs. This allows the insn to be recognized
12484 by the push_multi pattern in the arm.md file. The insn looks
12485 something like this:
12487 (parallel [
12488 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
12489 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
12490 (use (reg:SI 11 fp))
12491 (use (reg:SI 12 ip))
12492 (use (reg:SI 14 lr))
12493 (use (reg:SI 15 pc))
12496 For the frame note however, we try to be more explicit and actually
12497 show each register being stored into the stack frame, plus a (single)
12498 decrement of the stack pointer. We do it this way in order to be
12499 friendly to the stack unwinding code, which only wants to see a single
12500 stack decrement per instruction. The RTL we generate for the note looks
12501 something like this:
12503 (sequence [
12504 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
12505 (set (mem:SI (reg:SI sp)) (reg:SI r4))
12506 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
12507 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
12508 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
12511 This sequence is used both by the code to support stack unwinding for
12512 exceptions handlers and the code to generate dwarf2 frame debugging. */
12514 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
12515 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
12516 dwarf_par_index = 1;
12518 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12520 if (mask & (1 << i))
12522 reg = gen_rtx_REG (SImode, i);
12524 XVECEXP (par, 0, 0)
12525 = gen_rtx_SET (VOIDmode,
12526 gen_frame_mem (BLKmode,
12527 gen_rtx_PRE_DEC (BLKmode,
12528 stack_pointer_rtx)),
12529 gen_rtx_UNSPEC (BLKmode,
12530 gen_rtvec (1, reg),
12531 UNSPEC_PUSH_MULT));
12533 if (i != PC_REGNUM)
12535 tmp = gen_rtx_SET (VOIDmode,
12536 gen_frame_mem (SImode, stack_pointer_rtx),
12537 reg);
12538 RTX_FRAME_RELATED_P (tmp) = 1;
12539 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
12540 dwarf_par_index++;
12543 break;
12547 for (j = 1, i++; j < num_regs; i++)
12549 if (mask & (1 << i))
12551 reg = gen_rtx_REG (SImode, i);
12553 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
12555 if (i != PC_REGNUM)
12558 = gen_rtx_SET (VOIDmode,
12559 gen_frame_mem (SImode,
12560 plus_constant (stack_pointer_rtx,
12561 4 * j)),
12562 reg);
12563 RTX_FRAME_RELATED_P (tmp) = 1;
12564 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
12567 j++;
12571 par = emit_insn (par);
12573 tmp = gen_rtx_SET (VOIDmode,
12574 stack_pointer_rtx,
12575 plus_constant (stack_pointer_rtx, -4 * num_regs));
12576 RTX_FRAME_RELATED_P (tmp) = 1;
12577 XVECEXP (dwarf, 0, 0) = tmp;
12579 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12581 return par;
12584 /* Calculate the size of the return value that is passed in registers. */
12585 static unsigned
12586 arm_size_return_regs (void)
12588 enum machine_mode mode;
12590 if (crtl->return_rtx != 0)
12591 mode = GET_MODE (crtl->return_rtx);
12592 else
12593 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12595 return GET_MODE_SIZE (mode);
12598 static rtx
12599 emit_sfm (int base_reg, int count)
12601 rtx par;
12602 rtx dwarf;
12603 rtx tmp, reg;
12604 int i;
12606 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12607 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12609 reg = gen_rtx_REG (XFmode, base_reg++);
12611 XVECEXP (par, 0, 0)
12612 = gen_rtx_SET (VOIDmode,
12613 gen_frame_mem (BLKmode,
12614 gen_rtx_PRE_DEC (BLKmode,
12615 stack_pointer_rtx)),
12616 gen_rtx_UNSPEC (BLKmode,
12617 gen_rtvec (1, reg),
12618 UNSPEC_PUSH_MULT));
12619 tmp = gen_rtx_SET (VOIDmode,
12620 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
12621 RTX_FRAME_RELATED_P (tmp) = 1;
12622 XVECEXP (dwarf, 0, 1) = tmp;
12624 for (i = 1; i < count; i++)
12626 reg = gen_rtx_REG (XFmode, base_reg++);
12627 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12629 tmp = gen_rtx_SET (VOIDmode,
12630 gen_frame_mem (XFmode,
12631 plus_constant (stack_pointer_rtx,
12632 i * 12)),
12633 reg);
12634 RTX_FRAME_RELATED_P (tmp) = 1;
12635 XVECEXP (dwarf, 0, i + 1) = tmp;
12638 tmp = gen_rtx_SET (VOIDmode,
12639 stack_pointer_rtx,
12640 plus_constant (stack_pointer_rtx, -12 * count));
12642 RTX_FRAME_RELATED_P (tmp) = 1;
12643 XVECEXP (dwarf, 0, 0) = tmp;
12645 par = emit_insn (par);
12646 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12648 return par;
12652 /* Return true if the current function needs to save/restore LR. */
12654 static bool
12655 thumb_force_lr_save (void)
12657 return !cfun->machine->lr_save_eliminated
12658 && (!leaf_function_p ()
12659 || thumb_far_jump_used_p ()
12660 || df_regs_ever_live_p (LR_REGNUM));
12664 /* Compute the distance from register FROM to register TO.
12665 These can be the arg pointer (26), the soft frame pointer (25),
12666 the stack pointer (13) or the hard frame pointer (11).
12667 In thumb mode r7 is used as the soft frame pointer, if needed.
12668 Typical stack layout looks like this:
12670 old stack pointer -> | |
12671 ----
12672 | | \
12673 | | saved arguments for
12674 | | vararg functions
12675 | | /
12677 hard FP & arg pointer -> | | \
12678 | | stack
12679 | | frame
12680 | | /
12682 | | \
12683 | | call saved
12684 | | registers
12685 soft frame pointer -> | | /
12687 | | \
12688 | | local
12689 | | variables
12690 locals base pointer -> | | /
12692 | | \
12693 | | outgoing
12694 | | arguments
12695 current stack pointer -> | | /
12698 For a given function some or all of these stack components
12699 may not be needed, giving rise to the possibility of
12700 eliminating some of the registers.
12702 The values returned by this function must reflect the behavior
12703 of arm_expand_prologue() and arm_compute_save_reg_mask().
12705 The sign of the number returned reflects the direction of stack
12706 growth, so the values are positive for all eliminations except
12707 from the soft frame pointer to the hard frame pointer.
12709 SFP may point just inside the local variables block to ensure correct
12710 alignment. */
12713 /* Calculate stack offsets. These are used to calculate register elimination
12714 offsets and in prologue/epilogue code. Also calculates which registers
12715 should be saved. */
12717 static arm_stack_offsets *
12718 arm_get_frame_offsets (void)
12720 struct arm_stack_offsets *offsets;
12721 unsigned long func_type;
12722 int leaf;
12723 int saved;
12724 int core_saved;
12725 HOST_WIDE_INT frame_size;
12726 int i;
12728 offsets = &cfun->machine->stack_offsets;
12730 /* We need to know if we are a leaf function. Unfortunately, it
12731 is possible to be called after start_sequence has been called,
12732 which causes get_insns to return the insns for the sequence,
12733 not the function, which will cause leaf_function_p to return
12734 the incorrect result.
12736 to know about leaf functions once reload has completed, and the
12737 frame size cannot be changed after that time, so we can safely
12738 use the cached value. */
12740 if (reload_completed)
12741 return offsets;
12743 /* Initially this is the size of the local variables. It will translated
12744 into an offset once we have determined the size of preceding data. */
12745 frame_size = ROUND_UP_WORD (get_frame_size ());
12747 leaf = leaf_function_p ();
12749 /* Space for variadic functions. */
12750 offsets->saved_args = crtl->args.pretend_args_size;
12752 /* In Thumb mode this is incorrect, but never used. */
12753 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12754 arm_compute_static_chain_stack_bytes();
12756 if (TARGET_32BIT)
12758 unsigned int regno;
12760 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12761 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12762 saved = core_saved;
12764 /* We know that SP will be doubleword aligned on entry, and we must
12765 preserve that condition at any subroutine call. We also require the
12766 soft frame pointer to be doubleword aligned. */
12768 if (TARGET_REALLY_IWMMXT)
12770 /* Check for the call-saved iWMMXt registers. */
12771 for (regno = FIRST_IWMMXT_REGNUM;
12772 regno <= LAST_IWMMXT_REGNUM;
12773 regno++)
12774 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12775 saved += 8;
12778 func_type = arm_current_func_type ();
12779 if (! IS_VOLATILE (func_type))
12781 /* Space for saved FPA registers. */
12782 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12783 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12784 saved += 12;
12786 /* Space for saved VFP registers. */
12787 if (TARGET_HARD_FLOAT && TARGET_VFP)
12788 saved += arm_get_vfp_saved_size ();
12791 else /* TARGET_THUMB1 */
12793 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12794 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12795 saved = core_saved;
12796 if (TARGET_BACKTRACE)
12797 saved += 16;
12800 /* Saved registers include the stack frame. */
12801 offsets->saved_regs = offsets->saved_args + saved +
12802 arm_compute_static_chain_stack_bytes();
12803 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12804 /* A leaf function does not need any stack alignment if it has nothing
12805 on the stack. */
12806 if (leaf && frame_size == 0)
12808 offsets->outgoing_args = offsets->soft_frame;
12809 offsets->locals_base = offsets->soft_frame;
12810 return offsets;
12813 /* Ensure SFP has the correct alignment. */
12814 if (ARM_DOUBLEWORD_ALIGN
12815 && (offsets->soft_frame & 7))
12817 offsets->soft_frame += 4;
12818 /* Try to align stack by pushing an extra reg. Don't bother doing this
12819 when there is a stack frame as the alignment will be rolled into
12820 the normal stack adjustment. */
12821 if (frame_size + crtl->outgoing_args_size == 0)
12823 int reg = -1;
12825 /* If it is safe to use r3, then do so. This sometimes
12826 generates better code on Thumb-2 by avoiding the need to
12827 use 32-bit push/pop instructions. */
12828 if (!crtl->tail_call_emit
12829 && arm_size_return_regs () <= 12)
12831 reg = 3;
12833 else
12834 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12836 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12838 reg = i;
12839 break;
12843 if (reg != -1)
12845 offsets->saved_regs += 4;
12846 offsets->saved_regs_mask |= (1 << reg);
12851 offsets->locals_base = offsets->soft_frame + frame_size;
12852 offsets->outgoing_args = (offsets->locals_base
12853 + crtl->outgoing_args_size);
12855 if (ARM_DOUBLEWORD_ALIGN)
12857 /* Ensure SP remains doubleword aligned. */
12858 if (offsets->outgoing_args & 7)
12859 offsets->outgoing_args += 4;
12860 gcc_assert (!(offsets->outgoing_args & 7));
12863 return offsets;
12867 /* Calculate the relative offsets for the different stack pointers. Positive
12868 offsets are in the direction of stack growth. */
12870 HOST_WIDE_INT
12871 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12873 arm_stack_offsets *offsets;
12875 offsets = arm_get_frame_offsets ();
12877 /* OK, now we have enough information to compute the distances.
12878 There must be an entry in these switch tables for each pair
12879 of registers in ELIMINABLE_REGS, even if some of the entries
12880 seem to be redundant or useless. */
12881 switch (from)
12883 case ARG_POINTER_REGNUM:
12884 switch (to)
12886 case THUMB_HARD_FRAME_POINTER_REGNUM:
12887 return 0;
12889 case FRAME_POINTER_REGNUM:
12890 /* This is the reverse of the soft frame pointer
12891 to hard frame pointer elimination below. */
12892 return offsets->soft_frame - offsets->saved_args;
12894 case ARM_HARD_FRAME_POINTER_REGNUM:
12895 /* This is only non-zero in the case where the static chain register
12896 is stored above the frame. */
12897 return offsets->frame - offsets->saved_args - 4;
12899 case STACK_POINTER_REGNUM:
12900 /* If nothing has been pushed on the stack at all
12901 then this will return -4. This *is* correct! */
12902 return offsets->outgoing_args - (offsets->saved_args + 4);
12904 default:
12905 gcc_unreachable ();
12907 gcc_unreachable ();
12909 case FRAME_POINTER_REGNUM:
12910 switch (to)
12912 case THUMB_HARD_FRAME_POINTER_REGNUM:
12913 return 0;
12915 case ARM_HARD_FRAME_POINTER_REGNUM:
12916 /* The hard frame pointer points to the top entry in the
12917 stack frame. The soft frame pointer to the bottom entry
12918 in the stack frame. If there is no stack frame at all,
12919 then they are identical. */
12921 return offsets->frame - offsets->soft_frame;
12923 case STACK_POINTER_REGNUM:
12924 return offsets->outgoing_args - offsets->soft_frame;
12926 default:
12927 gcc_unreachable ();
12929 gcc_unreachable ();
12931 default:
12932 /* You cannot eliminate from the stack pointer.
12933 In theory you could eliminate from the hard frame
12934 pointer to the stack pointer, but this will never
12935 happen, since if a stack frame is not needed the
12936 hard frame pointer will never be used. */
12937 gcc_unreachable ();
12942 /* Emit RTL to save coprocessor registers on function entry. Returns the
12943 number of bytes pushed. */
12945 static int
12946 arm_save_coproc_regs(void)
12948 int saved_size = 0;
12949 unsigned reg;
12950 unsigned start_reg;
12951 rtx insn;
12953 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12954 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12956 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12957 insn = gen_rtx_MEM (V2SImode, insn);
12958 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12959 RTX_FRAME_RELATED_P (insn) = 1;
12960 saved_size += 8;
12963 /* Save any floating point call-saved registers used by this
12964 function. */
12965 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12967 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12968 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12970 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12971 insn = gen_rtx_MEM (XFmode, insn);
12972 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12973 RTX_FRAME_RELATED_P (insn) = 1;
12974 saved_size += 12;
12977 else
12979 start_reg = LAST_FPA_REGNUM;
12981 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12983 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12985 if (start_reg - reg == 3)
12987 insn = emit_sfm (reg, 4);
12988 RTX_FRAME_RELATED_P (insn) = 1;
12989 saved_size += 48;
12990 start_reg = reg - 1;
12993 else
12995 if (start_reg != reg)
12997 insn = emit_sfm (reg + 1, start_reg - reg);
12998 RTX_FRAME_RELATED_P (insn) = 1;
12999 saved_size += (start_reg - reg) * 12;
13001 start_reg = reg - 1;
13005 if (start_reg != reg)
13007 insn = emit_sfm (reg + 1, start_reg - reg);
13008 saved_size += (start_reg - reg) * 12;
13009 RTX_FRAME_RELATED_P (insn) = 1;
13012 if (TARGET_HARD_FLOAT && TARGET_VFP)
13014 start_reg = FIRST_VFP_REGNUM;
13016 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13018 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13019 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13021 if (start_reg != reg)
13022 saved_size += vfp_emit_fstmd (start_reg,
13023 (reg - start_reg) / 2);
13024 start_reg = reg + 2;
13027 if (start_reg != reg)
13028 saved_size += vfp_emit_fstmd (start_reg,
13029 (reg - start_reg) / 2);
13031 return saved_size;
13035 /* Set the Thumb frame pointer from the stack pointer. */
13037 static void
13038 thumb_set_frame_pointer (arm_stack_offsets *offsets)
13040 HOST_WIDE_INT amount;
13041 rtx insn, dwarf;
13043 amount = offsets->outgoing_args - offsets->locals_base;
13044 if (amount < 1024)
13045 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13046 stack_pointer_rtx, GEN_INT (amount)));
13047 else
13049 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
13050 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
13051 expects the first two operands to be the same. */
13052 if (TARGET_THUMB2)
13054 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13055 stack_pointer_rtx,
13056 hard_frame_pointer_rtx));
13058 else
13060 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13061 hard_frame_pointer_rtx,
13062 stack_pointer_rtx));
13064 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
13065 plus_constant (stack_pointer_rtx, amount));
13066 RTX_FRAME_RELATED_P (dwarf) = 1;
13067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13070 RTX_FRAME_RELATED_P (insn) = 1;
13073 /* Generate the prologue instructions for entry into an ARM or Thumb-2
13074 function. */
13075 void
13076 arm_expand_prologue (void)
13078 rtx amount;
13079 rtx insn;
13080 rtx ip_rtx;
13081 unsigned long live_regs_mask;
13082 unsigned long func_type;
13083 int fp_offset = 0;
13084 int saved_pretend_args = 0;
13085 int saved_regs = 0;
13086 unsigned HOST_WIDE_INT args_to_push;
13087 arm_stack_offsets *offsets;
13089 func_type = arm_current_func_type ();
13091 /* Naked functions don't have prologues. */
13092 if (IS_NAKED (func_type))
13093 return;
13095 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
13096 args_to_push = crtl->args.pretend_args_size;
13098 /* Compute which register we will have to save onto the stack. */
13099 offsets = arm_get_frame_offsets ();
13100 live_regs_mask = offsets->saved_regs_mask;
13102 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
13104 if (IS_STACKALIGN (func_type))
13106 rtx dwarf;
13107 rtx r0;
13108 rtx r1;
13109 /* Handle a word-aligned stack pointer. We generate the following:
13111 mov r0, sp
13112 bic r1, r0, #7
13113 mov sp, r1
13114 <save and restore r0 in normal prologue/epilogue>
13115 mov sp, r0
13116 bx lr
13118 The unwinder doesn't need to know about the stack realignment.
13119 Just tell it we saved SP in r0. */
13120 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
13122 r0 = gen_rtx_REG (SImode, 0);
13123 r1 = gen_rtx_REG (SImode, 1);
13124 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
13125 compiler won't choke. */
13126 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
13127 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
13128 insn = gen_movsi (r0, stack_pointer_rtx);
13129 RTX_FRAME_RELATED_P (insn) = 1;
13130 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13131 emit_insn (insn);
13132 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
13133 emit_insn (gen_movsi (stack_pointer_rtx, r1));
13136 /* For APCS frames, if IP register is clobbered
13137 when creating frame, save that register in a special
13138 way. */
13139 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13141 if (IS_INTERRUPT (func_type))
13143 /* Interrupt functions must not corrupt any registers.
13144 Creating a frame pointer however, corrupts the IP
13145 register, so we must push it first. */
13146 insn = emit_multi_reg_push (1 << IP_REGNUM);
13148 /* Do not set RTX_FRAME_RELATED_P on this insn.
13149 The dwarf stack unwinding code only wants to see one
13150 stack decrement per function, and this is not it. If
13151 this instruction is labeled as being part of the frame
13152 creation sequence then dwarf2out_frame_debug_expr will
13153 die when it encounters the assignment of IP to FP
13154 later on, since the use of SP here establishes SP as
13155 the CFA register and not IP.
13157 Anyway this instruction is not really part of the stack
13158 frame creation although it is part of the prologue. */
13160 else if (IS_NESTED (func_type))
13162 /* The Static chain register is the same as the IP register
13163 used as a scratch register during stack frame creation.
13164 To get around this need to find somewhere to store IP
13165 whilst the frame is being created. We try the following
13166 places in order:
13168 1. The last argument register.
13169 2. A slot on the stack above the frame. (This only
13170 works if the function is not a varargs function).
13171 3. Register r3, after pushing the argument registers
13172 onto the stack.
13174 Note - we only need to tell the dwarf2 backend about the SP
13175 adjustment in the second variant; the static chain register
13176 doesn't need to be unwound, as it doesn't contain a value
13177 inherited from the caller. */
13179 if (df_regs_ever_live_p (3) == false)
13180 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13181 else if (args_to_push == 0)
13183 rtx dwarf;
13185 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
13186 saved_regs += 4;
13188 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
13189 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
13190 fp_offset = 4;
13192 /* Just tell the dwarf backend that we adjusted SP. */
13193 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13194 plus_constant (stack_pointer_rtx,
13195 -fp_offset));
13196 RTX_FRAME_RELATED_P (insn) = 1;
13197 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13199 else
13201 /* Store the args on the stack. */
13202 if (cfun->machine->uses_anonymous_args)
13203 insn = emit_multi_reg_push
13204 ((0xf0 >> (args_to_push / 4)) & 0xf);
13205 else
13206 insn = emit_insn
13207 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13208 GEN_INT (- args_to_push)));
13210 RTX_FRAME_RELATED_P (insn) = 1;
13212 saved_pretend_args = 1;
13213 fp_offset = args_to_push;
13214 args_to_push = 0;
13216 /* Now reuse r3 to preserve IP. */
13217 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13221 insn = emit_set_insn (ip_rtx,
13222 plus_constant (stack_pointer_rtx, fp_offset));
13223 RTX_FRAME_RELATED_P (insn) = 1;
13226 if (args_to_push)
13228 /* Push the argument registers, or reserve space for them. */
13229 if (cfun->machine->uses_anonymous_args)
13230 insn = emit_multi_reg_push
13231 ((0xf0 >> (args_to_push / 4)) & 0xf);
13232 else
13233 insn = emit_insn
13234 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13235 GEN_INT (- args_to_push)));
13236 RTX_FRAME_RELATED_P (insn) = 1;
13239 /* If this is an interrupt service routine, and the link register
13240 is going to be pushed, and we're not generating extra
13241 push of IP (needed when frame is needed and frame layout if apcs),
13242 subtracting four from LR now will mean that the function return
13243 can be done with a single instruction. */
13244 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
13245 && (live_regs_mask & (1 << LR_REGNUM)) != 0
13246 && !(frame_pointer_needed && TARGET_APCS_FRAME)
13247 && TARGET_ARM)
13249 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
13251 emit_set_insn (lr, plus_constant (lr, -4));
13254 if (live_regs_mask)
13256 saved_regs += bit_count (live_regs_mask) * 4;
13257 if (optimize_size && !frame_pointer_needed
13258 && saved_regs == offsets->saved_regs - offsets->saved_args)
13260 /* If no coprocessor registers are being pushed and we don't have
13261 to worry about a frame pointer then push extra registers to
13262 create the stack frame. This is done is a way that does not
13263 alter the frame layout, so is independent of the epilogue. */
13264 int n;
13265 int frame;
13266 n = 0;
13267 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
13268 n++;
13269 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
13270 if (frame && n * 4 >= frame)
13272 n = frame / 4;
13273 live_regs_mask |= (1 << n) - 1;
13274 saved_regs += frame;
13277 insn = emit_multi_reg_push (live_regs_mask);
13278 RTX_FRAME_RELATED_P (insn) = 1;
13281 if (! IS_VOLATILE (func_type))
13282 saved_regs += arm_save_coproc_regs ();
13284 if (frame_pointer_needed && TARGET_ARM)
13286 /* Create the new frame pointer. */
13287 if (TARGET_APCS_FRAME)
13289 insn = GEN_INT (-(4 + args_to_push + fp_offset));
13290 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
13291 RTX_FRAME_RELATED_P (insn) = 1;
13293 if (IS_NESTED (func_type))
13295 /* Recover the static chain register. */
13296 if (!df_regs_ever_live_p (3)
13297 || saved_pretend_args)
13298 insn = gen_rtx_REG (SImode, 3);
13299 else /* if (crtl->args.pretend_args_size == 0) */
13301 insn = plus_constant (hard_frame_pointer_rtx, 4);
13302 insn = gen_frame_mem (SImode, insn);
13304 emit_set_insn (ip_rtx, insn);
13305 /* Add a USE to stop propagate_one_insn() from barfing. */
13306 emit_insn (gen_prologue_use (ip_rtx));
13309 else
13311 insn = GEN_INT (saved_regs - 4);
13312 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13313 stack_pointer_rtx, insn));
13314 RTX_FRAME_RELATED_P (insn) = 1;
13318 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
13320 /* This add can produce multiple insns for a large constant, so we
13321 need to get tricky. */
13322 rtx last = get_last_insn ();
13324 amount = GEN_INT (offsets->saved_args + saved_regs
13325 - offsets->outgoing_args);
13327 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13328 amount));
13331 last = last ? NEXT_INSN (last) : get_insns ();
13332 RTX_FRAME_RELATED_P (last) = 1;
13334 while (last != insn);
13336 /* If the frame pointer is needed, emit a special barrier that
13337 will prevent the scheduler from moving stores to the frame
13338 before the stack adjustment. */
13339 if (frame_pointer_needed)
13340 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
13341 hard_frame_pointer_rtx));
13345 if (frame_pointer_needed && TARGET_THUMB2)
13346 thumb_set_frame_pointer (offsets);
13348 if (flag_pic && arm_pic_register != INVALID_REGNUM)
13350 unsigned long mask;
13352 mask = live_regs_mask;
13353 mask &= THUMB2_WORK_REGS;
13354 if (!IS_NESTED (func_type))
13355 mask |= (1 << IP_REGNUM);
13356 arm_load_pic_register (mask);
13359 /* If we are profiling, make sure no instructions are scheduled before
13360 the call to mcount. Similarly if the user has requested no
13361 scheduling in the prolog. Similarly if we want non-call exceptions
13362 using the EABI unwinder, to prevent faulting instructions from being
13363 swapped with a stack adjustment. */
13364 if (crtl->profile || !TARGET_SCHED_PROLOG
13365 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13366 emit_insn (gen_blockage ());
13368 /* If the link register is being kept alive, with the return address in it,
13369 then make sure that it does not get reused by the ce2 pass. */
13370 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
13371 cfun->machine->lr_save_eliminated = 1;
13374 /* Print condition code to STREAM. Helper function for arm_print_operand. */
13375 static void
13376 arm_print_condition (FILE *stream)
13378 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
13380 /* Branch conversion is not implemented for Thumb-2. */
13381 if (TARGET_THUMB)
13383 output_operand_lossage ("predicated Thumb instruction");
13384 return;
13386 if (current_insn_predicate != NULL)
13388 output_operand_lossage
13389 ("predicated instruction in conditional sequence");
13390 return;
13393 fputs (arm_condition_codes[arm_current_cc], stream);
13395 else if (current_insn_predicate)
13397 enum arm_cond_code code;
13399 if (TARGET_THUMB1)
13401 output_operand_lossage ("predicated Thumb instruction");
13402 return;
13405 code = get_arm_condition_code (current_insn_predicate);
13406 fputs (arm_condition_codes[code], stream);
13411 /* If CODE is 'd', then the X is a condition operand and the instruction
13412 should only be executed if the condition is true.
13413 if CODE is 'D', then the X is a condition operand and the instruction
13414 should only be executed if the condition is false: however, if the mode
13415 of the comparison is CCFPEmode, then always execute the instruction -- we
13416 do this because in these circumstances !GE does not necessarily imply LT;
13417 in these cases the instruction pattern will take care to make sure that
13418 an instruction containing %d will follow, thereby undoing the effects of
13419 doing this instruction unconditionally.
13420 If CODE is 'N' then X is a floating point operand that must be negated
13421 before output.
13422 If CODE is 'B' then output a bitwise inverted value of X (a const int).
13423 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
13424 void
13425 arm_print_operand (FILE *stream, rtx x, int code)
13427 switch (code)
13429 case '@':
13430 fputs (ASM_COMMENT_START, stream);
13431 return;
13433 case '_':
13434 fputs (user_label_prefix, stream);
13435 return;
13437 case '|':
13438 fputs (REGISTER_PREFIX, stream);
13439 return;
13441 case '?':
13442 arm_print_condition (stream);
13443 return;
13445 case '(':
13446 /* Nothing in unified syntax, otherwise the current condition code. */
13447 if (!TARGET_UNIFIED_ASM)
13448 arm_print_condition (stream);
13449 break;
13451 case ')':
13452 /* The current condition code in unified syntax, otherwise nothing. */
13453 if (TARGET_UNIFIED_ASM)
13454 arm_print_condition (stream);
13455 break;
13457 case '.':
13458 /* The current condition code for a condition code setting instruction.
13459 Preceded by 's' in unified syntax, otherwise followed by 's'. */
13460 if (TARGET_UNIFIED_ASM)
13462 fputc('s', stream);
13463 arm_print_condition (stream);
13465 else
13467 arm_print_condition (stream);
13468 fputc('s', stream);
13470 return;
13472 case '!':
13473 /* If the instruction is conditionally executed then print
13474 the current condition code, otherwise print 's'. */
13475 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
13476 if (current_insn_predicate)
13477 arm_print_condition (stream);
13478 else
13479 fputc('s', stream);
13480 break;
13482 /* %# is a "break" sequence. It doesn't output anything, but is used to
13483 separate e.g. operand numbers from following text, if that text consists
13484 of further digits which we don't want to be part of the operand
13485 number. */
13486 case '#':
13487 return;
13489 case 'N':
13491 REAL_VALUE_TYPE r;
13492 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13493 r = REAL_VALUE_NEGATE (r);
13494 fprintf (stream, "%s", fp_const_from_val (&r));
13496 return;
13498 /* An integer or symbol address without a preceding # sign. */
13499 case 'c':
13500 switch (GET_CODE (x))
13502 case CONST_INT:
13503 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13504 break;
13506 case SYMBOL_REF:
13507 output_addr_const (stream, x);
13508 break;
13510 default:
13511 gcc_unreachable ();
13513 return;
13515 case 'B':
13516 if (GET_CODE (x) == CONST_INT)
13518 HOST_WIDE_INT val;
13519 val = ARM_SIGN_EXTEND (~INTVAL (x));
13520 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
13522 else
13524 putc ('~', stream);
13525 output_addr_const (stream, x);
13527 return;
13529 case 'L':
13530 /* The low 16 bits of an immediate constant. */
13531 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
13532 return;
13534 case 'i':
13535 fprintf (stream, "%s", arithmetic_instr (x, 1));
13536 return;
13538 /* Truncate Cirrus shift counts. */
13539 case 's':
13540 if (GET_CODE (x) == CONST_INT)
13542 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
13543 return;
13545 arm_print_operand (stream, x, 0);
13546 return;
13548 case 'I':
13549 fprintf (stream, "%s", arithmetic_instr (x, 0));
13550 return;
13552 case 'S':
13554 HOST_WIDE_INT val;
13555 const char *shift;
13557 if (!shift_operator (x, SImode))
13559 output_operand_lossage ("invalid shift operand");
13560 break;
13563 shift = shift_op (x, &val);
13565 if (shift)
13567 fprintf (stream, ", %s ", shift);
13568 if (val == -1)
13569 arm_print_operand (stream, XEXP (x, 1), 0);
13570 else
13571 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
13574 return;
13576 /* An explanation of the 'Q', 'R' and 'H' register operands:
13578 In a pair of registers containing a DI or DF value the 'Q'
13579 operand returns the register number of the register containing
13580 the least significant part of the value. The 'R' operand returns
13581 the register number of the register containing the most
13582 significant part of the value.
13584 The 'H' operand returns the higher of the two register numbers.
13585 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
13586 same as the 'Q' operand, since the most significant part of the
13587 value is held in the lower number register. The reverse is true
13588 on systems where WORDS_BIG_ENDIAN is false.
13590 The purpose of these operands is to distinguish between cases
13591 where the endian-ness of the values is important (for example
13592 when they are added together), and cases where the endian-ness
13593 is irrelevant, but the order of register operations is important.
13594 For example when loading a value from memory into a register
13595 pair, the endian-ness does not matter. Provided that the value
13596 from the lower memory address is put into the lower numbered
13597 register, and the value from the higher address is put into the
13598 higher numbered register, the load will work regardless of whether
13599 the value being loaded is big-wordian or little-wordian. The
13600 order of the two register loads can matter however, if the address
13601 of the memory location is actually held in one of the registers
13602 being overwritten by the load. */
13603 case 'Q':
13604 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13606 output_operand_lossage ("invalid operand for code '%c'", code);
13607 return;
13610 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
13611 return;
13613 case 'R':
13614 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13616 output_operand_lossage ("invalid operand for code '%c'", code);
13617 return;
13620 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
13621 return;
13623 case 'H':
13624 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13626 output_operand_lossage ("invalid operand for code '%c'", code);
13627 return;
13630 asm_fprintf (stream, "%r", REGNO (x) + 1);
13631 return;
13633 case 'J':
13634 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13636 output_operand_lossage ("invalid operand for code '%c'", code);
13637 return;
13640 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
13641 return;
13643 case 'K':
13644 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13646 output_operand_lossage ("invalid operand for code '%c'", code);
13647 return;
13650 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13651 return;
13653 case 'm':
13654 asm_fprintf (stream, "%r",
13655 GET_CODE (XEXP (x, 0)) == REG
13656 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13657 return;
13659 case 'M':
13660 asm_fprintf (stream, "{%r-%r}",
13661 REGNO (x),
13662 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13663 return;
13665 /* Like 'M', but writing doubleword vector registers, for use by Neon
13666 insns. */
13667 case 'h':
13669 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13670 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13671 if (numregs == 1)
13672 asm_fprintf (stream, "{d%d}", regno);
13673 else
13674 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13676 return;
13678 case 'd':
13679 /* CONST_TRUE_RTX means always -- that's the default. */
13680 if (x == const_true_rtx)
13681 return;
13683 if (!COMPARISON_P (x))
13685 output_operand_lossage ("invalid operand for code '%c'", code);
13686 return;
13689 fputs (arm_condition_codes[get_arm_condition_code (x)],
13690 stream);
13691 return;
13693 case 'D':
13694 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13695 want to do that. */
13696 if (x == const_true_rtx)
13698 output_operand_lossage ("instruction never executed");
13699 return;
13701 if (!COMPARISON_P (x))
13703 output_operand_lossage ("invalid operand for code '%c'", code);
13704 return;
13707 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13708 (get_arm_condition_code (x))],
13709 stream);
13710 return;
13712 /* Cirrus registers can be accessed in a variety of ways:
13713 single floating point (f)
13714 double floating point (d)
13715 32bit integer (fx)
13716 64bit integer (dx). */
13717 case 'W': /* Cirrus register in F mode. */
13718 case 'X': /* Cirrus register in D mode. */
13719 case 'Y': /* Cirrus register in FX mode. */
13720 case 'Z': /* Cirrus register in DX mode. */
13721 gcc_assert (GET_CODE (x) == REG
13722 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13724 fprintf (stream, "mv%s%s",
13725 code == 'W' ? "f"
13726 : code == 'X' ? "d"
13727 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13729 return;
13731 /* Print cirrus register in the mode specified by the register's mode. */
13732 case 'V':
13734 int mode = GET_MODE (x);
13736 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13738 output_operand_lossage ("invalid operand for code '%c'", code);
13739 return;
13742 fprintf (stream, "mv%s%s",
13743 mode == DFmode ? "d"
13744 : mode == SImode ? "fx"
13745 : mode == DImode ? "dx"
13746 : "f", reg_names[REGNO (x)] + 2);
13748 return;
13751 case 'U':
13752 if (GET_CODE (x) != REG
13753 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13754 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13755 /* Bad value for wCG register number. */
13757 output_operand_lossage ("invalid operand for code '%c'", code);
13758 return;
13761 else
13762 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13763 return;
13765 /* Print an iWMMXt control register name. */
13766 case 'w':
13767 if (GET_CODE (x) != CONST_INT
13768 || INTVAL (x) < 0
13769 || INTVAL (x) >= 16)
13770 /* Bad value for wC register number. */
13772 output_operand_lossage ("invalid operand for code '%c'", code);
13773 return;
13776 else
13778 static const char * wc_reg_names [16] =
13780 "wCID", "wCon", "wCSSF", "wCASF",
13781 "wC4", "wC5", "wC6", "wC7",
13782 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13783 "wC12", "wC13", "wC14", "wC15"
13786 fprintf (stream, wc_reg_names [INTVAL (x)]);
13788 return;
13790 /* Print a VFP/Neon double precision or quad precision register name. */
13791 case 'P':
13792 case 'q':
13794 int mode = GET_MODE (x);
13795 int is_quad = (code == 'q');
13796 int regno;
13798 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13800 output_operand_lossage ("invalid operand for code '%c'", code);
13801 return;
13804 if (GET_CODE (x) != REG
13805 || !IS_VFP_REGNUM (REGNO (x)))
13807 output_operand_lossage ("invalid operand for code '%c'", code);
13808 return;
13811 regno = REGNO (x);
13812 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13813 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13815 output_operand_lossage ("invalid operand for code '%c'", code);
13816 return;
13819 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13820 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13822 return;
13824 /* These two codes print the low/high doubleword register of a Neon quad
13825 register, respectively. For pair-structure types, can also print
13826 low/high quadword registers. */
13827 case 'e':
13828 case 'f':
13830 int mode = GET_MODE (x);
13831 int regno;
13833 if ((GET_MODE_SIZE (mode) != 16
13834 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13836 output_operand_lossage ("invalid operand for code '%c'", code);
13837 return;
13840 regno = REGNO (x);
13841 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13843 output_operand_lossage ("invalid operand for code '%c'", code);
13844 return;
13847 if (GET_MODE_SIZE (mode) == 16)
13848 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13849 + (code == 'f' ? 1 : 0));
13850 else
13851 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13852 + (code == 'f' ? 1 : 0));
13854 return;
13856 /* Print a VFPv3 floating-point constant, represented as an integer
13857 index. */
13858 case 'G':
13860 int index = vfp3_const_double_index (x);
13861 gcc_assert (index != -1);
13862 fprintf (stream, "%d", index);
13864 return;
13866 /* Print bits representing opcode features for Neon.
13868 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13869 and polynomials as unsigned.
13871 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13873 Bit 2 is 1 for rounding functions, 0 otherwise. */
13875 /* Identify the type as 's', 'u', 'p' or 'f'. */
13876 case 'T':
13878 HOST_WIDE_INT bits = INTVAL (x);
13879 fputc ("uspf"[bits & 3], stream);
13881 return;
13883 /* Likewise, but signed and unsigned integers are both 'i'. */
13884 case 'F':
13886 HOST_WIDE_INT bits = INTVAL (x);
13887 fputc ("iipf"[bits & 3], stream);
13889 return;
13891 /* As for 'T', but emit 'u' instead of 'p'. */
13892 case 't':
13894 HOST_WIDE_INT bits = INTVAL (x);
13895 fputc ("usuf"[bits & 3], stream);
13897 return;
13899 /* Bit 2: rounding (vs none). */
13900 case 'O':
13902 HOST_WIDE_INT bits = INTVAL (x);
13903 fputs ((bits & 4) != 0 ? "r" : "", stream);
13905 return;
13907 /* Memory operand for vld1/vst1 instruction. */
13908 case 'A':
13910 rtx addr;
13911 bool postinc = FALSE;
13912 gcc_assert (GET_CODE (x) == MEM);
13913 addr = XEXP (x, 0);
13914 if (GET_CODE (addr) == POST_INC)
13916 postinc = 1;
13917 addr = XEXP (addr, 0);
13919 asm_fprintf (stream, "[%r]", REGNO (addr));
13920 if (postinc)
13921 fputs("!", stream);
13923 return;
13925 default:
13926 if (x == 0)
13928 output_operand_lossage ("missing operand");
13929 return;
13932 switch (GET_CODE (x))
13934 case REG:
13935 asm_fprintf (stream, "%r", REGNO (x));
13936 break;
13938 case MEM:
13939 output_memory_reference_mode = GET_MODE (x);
13940 output_address (XEXP (x, 0));
13941 break;
13943 case CONST_DOUBLE:
13944 if (TARGET_NEON)
13946 char fpstr[20];
13947 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13948 sizeof (fpstr), 0, 1);
13949 fprintf (stream, "#%s", fpstr);
13951 else
13952 fprintf (stream, "#%s", fp_immediate_constant (x));
13953 break;
13955 default:
13956 gcc_assert (GET_CODE (x) != NEG);
13957 fputc ('#', stream);
13958 output_addr_const (stream, x);
13959 break;
13964 /* Target hook for assembling integer objects. The ARM version needs to
13965 handle word-sized values specially. */
13966 static bool
13967 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13969 enum machine_mode mode;
13971 if (size == UNITS_PER_WORD && aligned_p)
13973 fputs ("\t.word\t", asm_out_file);
13974 output_addr_const (asm_out_file, x);
13976 /* Mark symbols as position independent. We only do this in the
13977 .text segment, not in the .data segment. */
13978 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13979 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13981 /* See legitimize_pic_address for an explanation of the
13982 TARGET_VXWORKS_RTP check. */
13983 if (TARGET_VXWORKS_RTP
13984 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13985 fputs ("(GOT)", asm_out_file);
13986 else
13987 fputs ("(GOTOFF)", asm_out_file);
13989 fputc ('\n', asm_out_file);
13990 return true;
13993 mode = GET_MODE (x);
13995 if (arm_vector_mode_supported_p (mode))
13997 int i, units;
13999 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14001 units = CONST_VECTOR_NUNITS (x);
14002 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
14004 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14005 for (i = 0; i < units; i++)
14007 rtx elt = CONST_VECTOR_ELT (x, i);
14008 assemble_integer
14009 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
14011 else
14012 for (i = 0; i < units; i++)
14014 rtx elt = CONST_VECTOR_ELT (x, i);
14015 REAL_VALUE_TYPE rval;
14017 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
14019 assemble_real
14020 (rval, GET_MODE_INNER (mode),
14021 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
14024 return true;
14027 return default_assemble_integer (x, size, aligned_p);
14030 static void
14031 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
14033 section *s;
14035 if (!TARGET_AAPCS_BASED)
14037 (is_ctor ?
14038 default_named_section_asm_out_constructor
14039 : default_named_section_asm_out_destructor) (symbol, priority);
14040 return;
14043 /* Put these in the .init_array section, using a special relocation. */
14044 if (priority != DEFAULT_INIT_PRIORITY)
14046 char buf[18];
14047 sprintf (buf, "%s.%.5u",
14048 is_ctor ? ".init_array" : ".fini_array",
14049 priority);
14050 s = get_section (buf, SECTION_WRITE, NULL_TREE);
14052 else if (is_ctor)
14053 s = ctors_section;
14054 else
14055 s = dtors_section;
14057 switch_to_section (s);
14058 assemble_align (POINTER_SIZE);
14059 fputs ("\t.word\t", asm_out_file);
14060 output_addr_const (asm_out_file, symbol);
14061 fputs ("(target1)\n", asm_out_file);
14064 /* Add a function to the list of static constructors. */
14066 static void
14067 arm_elf_asm_constructor (rtx symbol, int priority)
14069 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
14072 /* Add a function to the list of static destructors. */
14074 static void
14075 arm_elf_asm_destructor (rtx symbol, int priority)
14077 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
14080 /* A finite state machine takes care of noticing whether or not instructions
14081 can be conditionally executed, and thus decrease execution time and code
14082 size by deleting branch instructions. The fsm is controlled by
14083 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
14085 /* The state of the fsm controlling condition codes are:
14086 0: normal, do nothing special
14087 1: make ASM_OUTPUT_OPCODE not output this instruction
14088 2: make ASM_OUTPUT_OPCODE not output this instruction
14089 3: make instructions conditional
14090 4: make instructions conditional
14092 State transitions (state->state by whom under condition):
14093 0 -> 1 final_prescan_insn if the `target' is a label
14094 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
14095 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
14096 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
14097 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
14098 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
14099 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
14100 (the target insn is arm_target_insn).
14102 If the jump clobbers the conditions then we use states 2 and 4.
14104 A similar thing can be done with conditional return insns.
14106 XXX In case the `target' is an unconditional branch, this conditionalising
14107 of the instructions always reduces code size, but not always execution
14108 time. But then, I want to reduce the code size to somewhere near what
14109 /bin/cc produces. */
14111 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
14112 instructions. When a COND_EXEC instruction is seen the subsequent
14113 instructions are scanned so that multiple conditional instructions can be
14114 combined into a single IT block. arm_condexec_count and arm_condexec_mask
14115 specify the length and true/false mask for the IT block. These will be
14116 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
14118 /* Returns the index of the ARM condition code string in
14119 `arm_condition_codes'. COMPARISON should be an rtx like
14120 `(eq (...) (...))'. */
14121 static enum arm_cond_code
14122 get_arm_condition_code (rtx comparison)
14124 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
14125 enum arm_cond_code code;
14126 enum rtx_code comp_code = GET_CODE (comparison);
14128 if (GET_MODE_CLASS (mode) != MODE_CC)
14129 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
14130 XEXP (comparison, 1));
14132 switch (mode)
14134 case CC_DNEmode: code = ARM_NE; goto dominance;
14135 case CC_DEQmode: code = ARM_EQ; goto dominance;
14136 case CC_DGEmode: code = ARM_GE; goto dominance;
14137 case CC_DGTmode: code = ARM_GT; goto dominance;
14138 case CC_DLEmode: code = ARM_LE; goto dominance;
14139 case CC_DLTmode: code = ARM_LT; goto dominance;
14140 case CC_DGEUmode: code = ARM_CS; goto dominance;
14141 case CC_DGTUmode: code = ARM_HI; goto dominance;
14142 case CC_DLEUmode: code = ARM_LS; goto dominance;
14143 case CC_DLTUmode: code = ARM_CC;
14145 dominance:
14146 gcc_assert (comp_code == EQ || comp_code == NE);
14148 if (comp_code == EQ)
14149 return ARM_INVERSE_CONDITION_CODE (code);
14150 return code;
14152 case CC_NOOVmode:
14153 switch (comp_code)
14155 case NE: return ARM_NE;
14156 case EQ: return ARM_EQ;
14157 case GE: return ARM_PL;
14158 case LT: return ARM_MI;
14159 default: gcc_unreachable ();
14162 case CC_Zmode:
14163 switch (comp_code)
14165 case NE: return ARM_NE;
14166 case EQ: return ARM_EQ;
14167 default: gcc_unreachable ();
14170 case CC_Nmode:
14171 switch (comp_code)
14173 case NE: return ARM_MI;
14174 case EQ: return ARM_PL;
14175 default: gcc_unreachable ();
14178 case CCFPEmode:
14179 case CCFPmode:
14180 /* These encodings assume that AC=1 in the FPA system control
14181 byte. This allows us to handle all cases except UNEQ and
14182 LTGT. */
14183 switch (comp_code)
14185 case GE: return ARM_GE;
14186 case GT: return ARM_GT;
14187 case LE: return ARM_LS;
14188 case LT: return ARM_MI;
14189 case NE: return ARM_NE;
14190 case EQ: return ARM_EQ;
14191 case ORDERED: return ARM_VC;
14192 case UNORDERED: return ARM_VS;
14193 case UNLT: return ARM_LT;
14194 case UNLE: return ARM_LE;
14195 case UNGT: return ARM_HI;
14196 case UNGE: return ARM_PL;
14197 /* UNEQ and LTGT do not have a representation. */
14198 case UNEQ: /* Fall through. */
14199 case LTGT: /* Fall through. */
14200 default: gcc_unreachable ();
14203 case CC_SWPmode:
14204 switch (comp_code)
14206 case NE: return ARM_NE;
14207 case EQ: return ARM_EQ;
14208 case GE: return ARM_LE;
14209 case GT: return ARM_LT;
14210 case LE: return ARM_GE;
14211 case LT: return ARM_GT;
14212 case GEU: return ARM_LS;
14213 case GTU: return ARM_CC;
14214 case LEU: return ARM_CS;
14215 case LTU: return ARM_HI;
14216 default: gcc_unreachable ();
14219 case CC_Cmode:
14220 switch (comp_code)
14222 case LTU: return ARM_CS;
14223 case GEU: return ARM_CC;
14224 default: gcc_unreachable ();
14227 case CCmode:
14228 switch (comp_code)
14230 case NE: return ARM_NE;
14231 case EQ: return ARM_EQ;
14232 case GE: return ARM_GE;
14233 case GT: return ARM_GT;
14234 case LE: return ARM_LE;
14235 case LT: return ARM_LT;
14236 case GEU: return ARM_CS;
14237 case GTU: return ARM_HI;
14238 case LEU: return ARM_LS;
14239 case LTU: return ARM_CC;
14240 default: gcc_unreachable ();
14243 default: gcc_unreachable ();
14247 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
14248 instructions. */
14249 void
14250 thumb2_final_prescan_insn (rtx insn)
14252 rtx first_insn = insn;
14253 rtx body = PATTERN (insn);
14254 rtx predicate;
14255 enum arm_cond_code code;
14256 int n;
14257 int mask;
14259 /* Remove the previous insn from the count of insns to be output. */
14260 if (arm_condexec_count)
14261 arm_condexec_count--;
14263 /* Nothing to do if we are already inside a conditional block. */
14264 if (arm_condexec_count)
14265 return;
14267 if (GET_CODE (body) != COND_EXEC)
14268 return;
14270 /* Conditional jumps are implemented directly. */
14271 if (GET_CODE (insn) == JUMP_INSN)
14272 return;
14274 predicate = COND_EXEC_TEST (body);
14275 arm_current_cc = get_arm_condition_code (predicate);
14277 n = get_attr_ce_count (insn);
14278 arm_condexec_count = 1;
14279 arm_condexec_mask = (1 << n) - 1;
14280 arm_condexec_masklen = n;
14281 /* See if subsequent instructions can be combined into the same block. */
14282 for (;;)
14284 insn = next_nonnote_insn (insn);
14286 /* Jumping into the middle of an IT block is illegal, so a label or
14287 barrier terminates the block. */
14288 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
14289 break;
14291 body = PATTERN (insn);
14292 /* USE and CLOBBER aren't really insns, so just skip them. */
14293 if (GET_CODE (body) == USE
14294 || GET_CODE (body) == CLOBBER)
14295 continue;
14297 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
14298 if (GET_CODE (body) != COND_EXEC)
14299 break;
14300 /* Allow up to 4 conditionally executed instructions in a block. */
14301 n = get_attr_ce_count (insn);
14302 if (arm_condexec_masklen + n > 4)
14303 break;
14305 predicate = COND_EXEC_TEST (body);
14306 code = get_arm_condition_code (predicate);
14307 mask = (1 << n) - 1;
14308 if (arm_current_cc == code)
14309 arm_condexec_mask |= (mask << arm_condexec_masklen);
14310 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
14311 break;
14313 arm_condexec_count++;
14314 arm_condexec_masklen += n;
14316 /* A jump must be the last instruction in a conditional block. */
14317 if (GET_CODE(insn) == JUMP_INSN)
14318 break;
14320 /* Restore recog_data (getting the attributes of other insns can
14321 destroy this array, but final.c assumes that it remains intact
14322 across this call). */
14323 extract_constrain_insn_cached (first_insn);
14326 void
14327 arm_final_prescan_insn (rtx insn)
14329 /* BODY will hold the body of INSN. */
14330 rtx body = PATTERN (insn);
14332 /* This will be 1 if trying to repeat the trick, and things need to be
14333 reversed if it appears to fail. */
14334 int reverse = 0;
14336 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
14337 taken are clobbered, even if the rtl suggests otherwise. It also
14338 means that we have to grub around within the jump expression to find
14339 out what the conditions are when the jump isn't taken. */
14340 int jump_clobbers = 0;
14342 /* If we start with a return insn, we only succeed if we find another one. */
14343 int seeking_return = 0;
14345 /* START_INSN will hold the insn from where we start looking. This is the
14346 first insn after the following code_label if REVERSE is true. */
14347 rtx start_insn = insn;
14349 /* If in state 4, check if the target branch is reached, in order to
14350 change back to state 0. */
14351 if (arm_ccfsm_state == 4)
14353 if (insn == arm_target_insn)
14355 arm_target_insn = NULL;
14356 arm_ccfsm_state = 0;
14358 return;
14361 /* If in state 3, it is possible to repeat the trick, if this insn is an
14362 unconditional branch to a label, and immediately following this branch
14363 is the previous target label which is only used once, and the label this
14364 branch jumps to is not too far off. */
14365 if (arm_ccfsm_state == 3)
14367 if (simplejump_p (insn))
14369 start_insn = next_nonnote_insn (start_insn);
14370 if (GET_CODE (start_insn) == BARRIER)
14372 /* XXX Isn't this always a barrier? */
14373 start_insn = next_nonnote_insn (start_insn);
14375 if (GET_CODE (start_insn) == CODE_LABEL
14376 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14377 && LABEL_NUSES (start_insn) == 1)
14378 reverse = TRUE;
14379 else
14380 return;
14382 else if (GET_CODE (body) == RETURN)
14384 start_insn = next_nonnote_insn (start_insn);
14385 if (GET_CODE (start_insn) == BARRIER)
14386 start_insn = next_nonnote_insn (start_insn);
14387 if (GET_CODE (start_insn) == CODE_LABEL
14388 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14389 && LABEL_NUSES (start_insn) == 1)
14391 reverse = TRUE;
14392 seeking_return = 1;
14394 else
14395 return;
14397 else
14398 return;
14401 gcc_assert (!arm_ccfsm_state || reverse);
14402 if (GET_CODE (insn) != JUMP_INSN)
14403 return;
14405 /* This jump might be paralleled with a clobber of the condition codes
14406 the jump should always come first */
14407 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
14408 body = XVECEXP (body, 0, 0);
14410 if (reverse
14411 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
14412 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
14414 int insns_skipped;
14415 int fail = FALSE, succeed = FALSE;
14416 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
14417 int then_not_else = TRUE;
14418 rtx this_insn = start_insn, label = 0;
14420 /* If the jump cannot be done with one instruction, we cannot
14421 conditionally execute the instruction in the inverse case. */
14422 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
14424 jump_clobbers = 1;
14425 return;
14428 /* Register the insn jumped to. */
14429 if (reverse)
14431 if (!seeking_return)
14432 label = XEXP (SET_SRC (body), 0);
14434 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
14435 label = XEXP (XEXP (SET_SRC (body), 1), 0);
14436 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
14438 label = XEXP (XEXP (SET_SRC (body), 2), 0);
14439 then_not_else = FALSE;
14441 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
14442 seeking_return = 1;
14443 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
14445 seeking_return = 1;
14446 then_not_else = FALSE;
14448 else
14449 gcc_unreachable ();
14451 /* See how many insns this branch skips, and what kind of insns. If all
14452 insns are okay, and the label or unconditional branch to the same
14453 label is not too far away, succeed. */
14454 for (insns_skipped = 0;
14455 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
14457 rtx scanbody;
14459 this_insn = next_nonnote_insn (this_insn);
14460 if (!this_insn)
14461 break;
14463 switch (GET_CODE (this_insn))
14465 case CODE_LABEL:
14466 /* Succeed if it is the target label, otherwise fail since
14467 control falls in from somewhere else. */
14468 if (this_insn == label)
14470 if (jump_clobbers)
14472 arm_ccfsm_state = 2;
14473 this_insn = next_nonnote_insn (this_insn);
14475 else
14476 arm_ccfsm_state = 1;
14477 succeed = TRUE;
14479 else
14480 fail = TRUE;
14481 break;
14483 case BARRIER:
14484 /* Succeed if the following insn is the target label.
14485 Otherwise fail.
14486 If return insns are used then the last insn in a function
14487 will be a barrier. */
14488 this_insn = next_nonnote_insn (this_insn);
14489 if (this_insn && this_insn == label)
14491 if (jump_clobbers)
14493 arm_ccfsm_state = 2;
14494 this_insn = next_nonnote_insn (this_insn);
14496 else
14497 arm_ccfsm_state = 1;
14498 succeed = TRUE;
14500 else
14501 fail = TRUE;
14502 break;
14504 case CALL_INSN:
14505 /* The AAPCS says that conditional calls should not be
14506 used since they make interworking inefficient (the
14507 linker can't transform BL<cond> into BLX). That's
14508 only a problem if the machine has BLX. */
14509 if (arm_arch5)
14511 fail = TRUE;
14512 break;
14515 /* Succeed if the following insn is the target label, or
14516 if the following two insns are a barrier and the
14517 target label. */
14518 this_insn = next_nonnote_insn (this_insn);
14519 if (this_insn && GET_CODE (this_insn) == BARRIER)
14520 this_insn = next_nonnote_insn (this_insn);
14522 if (this_insn && this_insn == label
14523 && insns_skipped < max_insns_skipped)
14525 if (jump_clobbers)
14527 arm_ccfsm_state = 2;
14528 this_insn = next_nonnote_insn (this_insn);
14530 else
14531 arm_ccfsm_state = 1;
14532 succeed = TRUE;
14534 else
14535 fail = TRUE;
14536 break;
14538 case JUMP_INSN:
14539 /* If this is an unconditional branch to the same label, succeed.
14540 If it is to another label, do nothing. If it is conditional,
14541 fail. */
14542 /* XXX Probably, the tests for SET and the PC are
14543 unnecessary. */
14545 scanbody = PATTERN (this_insn);
14546 if (GET_CODE (scanbody) == SET
14547 && GET_CODE (SET_DEST (scanbody)) == PC)
14549 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
14550 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
14552 arm_ccfsm_state = 2;
14553 succeed = TRUE;
14555 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
14556 fail = TRUE;
14558 /* Fail if a conditional return is undesirable (e.g. on a
14559 StrongARM), but still allow this if optimizing for size. */
14560 else if (GET_CODE (scanbody) == RETURN
14561 && !use_return_insn (TRUE, NULL)
14562 && !optimize_size)
14563 fail = TRUE;
14564 else if (GET_CODE (scanbody) == RETURN
14565 && seeking_return)
14567 arm_ccfsm_state = 2;
14568 succeed = TRUE;
14570 else if (GET_CODE (scanbody) == PARALLEL)
14572 switch (get_attr_conds (this_insn))
14574 case CONDS_NOCOND:
14575 break;
14576 default:
14577 fail = TRUE;
14578 break;
14581 else
14582 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
14584 break;
14586 case INSN:
14587 /* Instructions using or affecting the condition codes make it
14588 fail. */
14589 scanbody = PATTERN (this_insn);
14590 if (!(GET_CODE (scanbody) == SET
14591 || GET_CODE (scanbody) == PARALLEL)
14592 || get_attr_conds (this_insn) != CONDS_NOCOND)
14593 fail = TRUE;
14595 /* A conditional cirrus instruction must be followed by
14596 a non Cirrus instruction. However, since we
14597 conditionalize instructions in this function and by
14598 the time we get here we can't add instructions
14599 (nops), because shorten_branches() has already been
14600 called, we will disable conditionalizing Cirrus
14601 instructions to be safe. */
14602 if (GET_CODE (scanbody) != USE
14603 && GET_CODE (scanbody) != CLOBBER
14604 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
14605 fail = TRUE;
14606 break;
14608 default:
14609 break;
14612 if (succeed)
14614 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
14615 arm_target_label = CODE_LABEL_NUMBER (label);
14616 else
14618 gcc_assert (seeking_return || arm_ccfsm_state == 2);
14620 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
14622 this_insn = next_nonnote_insn (this_insn);
14623 gcc_assert (!this_insn
14624 || (GET_CODE (this_insn) != BARRIER
14625 && GET_CODE (this_insn) != CODE_LABEL));
14627 if (!this_insn)
14629 /* Oh, dear! we ran off the end.. give up. */
14630 extract_constrain_insn_cached (insn);
14631 arm_ccfsm_state = 0;
14632 arm_target_insn = NULL;
14633 return;
14635 arm_target_insn = this_insn;
14637 if (jump_clobbers)
14639 gcc_assert (!reverse);
14640 arm_current_cc =
14641 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
14642 0), 0), 1));
14643 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
14644 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14645 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
14646 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14648 else
14650 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
14651 what it was. */
14652 if (!reverse)
14653 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
14654 0));
14657 if (reverse || then_not_else)
14658 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14661 /* Restore recog_data (getting the attributes of other insns can
14662 destroy this array, but final.c assumes that it remains intact
14663 across this call. */
14664 extract_constrain_insn_cached (insn);
14668 /* Output IT instructions. */
14669 void
14670 thumb2_asm_output_opcode (FILE * stream)
14672 char buff[5];
14673 int n;
14675 if (arm_condexec_mask)
14677 for (n = 0; n < arm_condexec_masklen; n++)
14678 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14679 buff[n] = 0;
14680 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14681 arm_condition_codes[arm_current_cc]);
14682 arm_condexec_mask = 0;
14686 /* Returns true if REGNO is a valid register
14687 for holding a quantity of type MODE. */
14689 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14691 if (GET_MODE_CLASS (mode) == MODE_CC)
14692 return (regno == CC_REGNUM
14693 || (TARGET_HARD_FLOAT && TARGET_VFP
14694 && regno == VFPCC_REGNUM));
14696 if (TARGET_THUMB1)
14697 /* For the Thumb we only allow values bigger than SImode in
14698 registers 0 - 6, so that there is always a second low
14699 register available to hold the upper part of the value.
14700 We probably we ought to ensure that the register is the
14701 start of an even numbered register pair. */
14702 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14704 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14705 && IS_CIRRUS_REGNUM (regno))
14706 /* We have outlawed SI values in Cirrus registers because they
14707 reside in the lower 32 bits, but SF values reside in the
14708 upper 32 bits. This causes gcc all sorts of grief. We can't
14709 even split the registers into pairs because Cirrus SI values
14710 get sign extended to 64bits-- aldyh. */
14711 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14713 if (TARGET_HARD_FLOAT && TARGET_VFP
14714 && IS_VFP_REGNUM (regno))
14716 if (mode == SFmode || mode == SImode)
14717 return VFP_REGNO_OK_FOR_SINGLE (regno);
14719 if (mode == DFmode)
14720 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14722 if (TARGET_NEON)
14723 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14724 || (VALID_NEON_QREG_MODE (mode)
14725 && NEON_REGNO_OK_FOR_QUAD (regno))
14726 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14727 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14728 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14729 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14730 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14732 return FALSE;
14735 if (TARGET_REALLY_IWMMXT)
14737 if (IS_IWMMXT_GR_REGNUM (regno))
14738 return mode == SImode;
14740 if (IS_IWMMXT_REGNUM (regno))
14741 return VALID_IWMMXT_REG_MODE (mode);
14744 /* We allow any value to be stored in the general registers.
14745 Restrict doubleword quantities to even register pairs so that we can
14746 use ldrd. Do not allow Neon structure opaque modes in general registers;
14747 they would use too many. */
14748 if (regno <= LAST_ARM_REGNUM)
14749 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14750 && !VALID_NEON_STRUCT_MODE (mode);
14752 if (regno == FRAME_POINTER_REGNUM
14753 || regno == ARG_POINTER_REGNUM)
14754 /* We only allow integers in the fake hard registers. */
14755 return GET_MODE_CLASS (mode) == MODE_INT;
14757 /* The only registers left are the FPA registers
14758 which we only allow to hold FP values. */
14759 return (TARGET_HARD_FLOAT && TARGET_FPA
14760 && GET_MODE_CLASS (mode) == MODE_FLOAT
14761 && regno >= FIRST_FPA_REGNUM
14762 && regno <= LAST_FPA_REGNUM);
14765 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14766 not used in arm mode. */
14768 enum reg_class
14769 arm_regno_class (int regno)
14771 if (TARGET_THUMB1)
14773 if (regno == STACK_POINTER_REGNUM)
14774 return STACK_REG;
14775 if (regno == CC_REGNUM)
14776 return CC_REG;
14777 if (regno < 8)
14778 return LO_REGS;
14779 return HI_REGS;
14782 if (TARGET_THUMB2 && regno < 8)
14783 return LO_REGS;
14785 if ( regno <= LAST_ARM_REGNUM
14786 || regno == FRAME_POINTER_REGNUM
14787 || regno == ARG_POINTER_REGNUM)
14788 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14790 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14791 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14793 if (IS_CIRRUS_REGNUM (regno))
14794 return CIRRUS_REGS;
14796 if (IS_VFP_REGNUM (regno))
14798 if (regno <= D7_VFP_REGNUM)
14799 return VFP_D0_D7_REGS;
14800 else if (regno <= LAST_LO_VFP_REGNUM)
14801 return VFP_LO_REGS;
14802 else
14803 return VFP_HI_REGS;
14806 if (IS_IWMMXT_REGNUM (regno))
14807 return IWMMXT_REGS;
14809 if (IS_IWMMXT_GR_REGNUM (regno))
14810 return IWMMXT_GR_REGS;
14812 return FPA_REGS;
14815 /* Handle a special case when computing the offset
14816 of an argument from the frame pointer. */
14818 arm_debugger_arg_offset (int value, rtx addr)
14820 rtx insn;
14822 /* We are only interested if dbxout_parms() failed to compute the offset. */
14823 if (value != 0)
14824 return 0;
14826 /* We can only cope with the case where the address is held in a register. */
14827 if (GET_CODE (addr) != REG)
14828 return 0;
14830 /* If we are using the frame pointer to point at the argument, then
14831 an offset of 0 is correct. */
14832 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14833 return 0;
14835 /* If we are using the stack pointer to point at the
14836 argument, then an offset of 0 is correct. */
14837 /* ??? Check this is consistent with thumb2 frame layout. */
14838 if ((TARGET_THUMB || !frame_pointer_needed)
14839 && REGNO (addr) == SP_REGNUM)
14840 return 0;
14842 /* Oh dear. The argument is pointed to by a register rather
14843 than being held in a register, or being stored at a known
14844 offset from the frame pointer. Since GDB only understands
14845 those two kinds of argument we must translate the address
14846 held in the register into an offset from the frame pointer.
14847 We do this by searching through the insns for the function
14848 looking to see where this register gets its value. If the
14849 register is initialized from the frame pointer plus an offset
14850 then we are in luck and we can continue, otherwise we give up.
14852 This code is exercised by producing debugging information
14853 for a function with arguments like this:
14855 double func (double a, double b, int c, double d) {return d;}
14857 Without this code the stab for parameter 'd' will be set to
14858 an offset of 0 from the frame pointer, rather than 8. */
14860 /* The if() statement says:
14862 If the insn is a normal instruction
14863 and if the insn is setting the value in a register
14864 and if the register being set is the register holding the address of the argument
14865 and if the address is computing by an addition
14866 that involves adding to a register
14867 which is the frame pointer
14868 a constant integer
14870 then... */
14872 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14874 if ( GET_CODE (insn) == INSN
14875 && GET_CODE (PATTERN (insn)) == SET
14876 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14877 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14878 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14879 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14880 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14883 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14885 break;
14889 if (value == 0)
14891 debug_rtx (addr);
14892 warning (0, "unable to compute real location of stacked parameter");
14893 value = 8; /* XXX magic hack */
14896 return value;
14899 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14900 do \
14902 if ((MASK) & insn_flags) \
14903 add_builtin_function ((NAME), (TYPE), (CODE), \
14904 BUILT_IN_MD, NULL, NULL_TREE); \
14906 while (0)
14908 struct builtin_description
14910 const unsigned int mask;
14911 const enum insn_code icode;
14912 const char * const name;
14913 const enum arm_builtins code;
14914 const enum rtx_code comparison;
14915 const unsigned int flag;
14918 static const struct builtin_description bdesc_2arg[] =
14920 #define IWMMXT_BUILTIN(code, string, builtin) \
14921 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14922 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
14924 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14925 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14926 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14927 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14928 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14929 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14930 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14931 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14932 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14933 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14934 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14935 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14936 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14937 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14938 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14939 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14940 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14941 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14942 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14943 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14944 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14945 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14946 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14947 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14948 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14949 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14950 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14951 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14952 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14953 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14954 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14955 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14956 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14957 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14958 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14959 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14960 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14961 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14962 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14963 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14964 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14965 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14966 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14967 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14968 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14969 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14970 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14971 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14972 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14973 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14974 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14975 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14976 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14977 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14978 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14979 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14980 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14981 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14983 #define IWMMXT_BUILTIN2(code, builtin) \
14984 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
14986 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14987 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14988 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14989 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14990 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14991 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14992 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14993 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14994 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14995 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14996 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14997 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14998 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14999 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
15000 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
15001 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
15002 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
15003 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
15004 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
15005 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
15006 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
15007 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
15008 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
15009 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
15010 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
15011 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
15012 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
15013 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
15014 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
15015 IWMMXT_BUILTIN2 (rordi3, WRORDI)
15016 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
15017 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
15020 static const struct builtin_description bdesc_1arg[] =
15022 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
15023 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
15024 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
15025 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
15026 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
15027 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
15028 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
15029 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
15030 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
15031 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
15032 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
15033 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
15034 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
15035 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
15036 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
15037 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
15038 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
15039 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
15042 /* Set up all the iWMMXt builtins. This is
15043 not called if TARGET_IWMMXT is zero. */
15045 static void
15046 arm_init_iwmmxt_builtins (void)
15048 const struct builtin_description * d;
15049 size_t i;
15050 tree endlink = void_list_node;
15052 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15053 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15054 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15056 tree int_ftype_int
15057 = build_function_type (integer_type_node,
15058 tree_cons (NULL_TREE, integer_type_node, endlink));
15059 tree v8qi_ftype_v8qi_v8qi_int
15060 = build_function_type (V8QI_type_node,
15061 tree_cons (NULL_TREE, V8QI_type_node,
15062 tree_cons (NULL_TREE, V8QI_type_node,
15063 tree_cons (NULL_TREE,
15064 integer_type_node,
15065 endlink))));
15066 tree v4hi_ftype_v4hi_int
15067 = build_function_type (V4HI_type_node,
15068 tree_cons (NULL_TREE, V4HI_type_node,
15069 tree_cons (NULL_TREE, integer_type_node,
15070 endlink)));
15071 tree v2si_ftype_v2si_int
15072 = build_function_type (V2SI_type_node,
15073 tree_cons (NULL_TREE, V2SI_type_node,
15074 tree_cons (NULL_TREE, integer_type_node,
15075 endlink)));
15076 tree v2si_ftype_di_di
15077 = build_function_type (V2SI_type_node,
15078 tree_cons (NULL_TREE, long_long_integer_type_node,
15079 tree_cons (NULL_TREE, long_long_integer_type_node,
15080 endlink)));
15081 tree di_ftype_di_int
15082 = build_function_type (long_long_integer_type_node,
15083 tree_cons (NULL_TREE, long_long_integer_type_node,
15084 tree_cons (NULL_TREE, integer_type_node,
15085 endlink)));
15086 tree di_ftype_di_int_int
15087 = build_function_type (long_long_integer_type_node,
15088 tree_cons (NULL_TREE, long_long_integer_type_node,
15089 tree_cons (NULL_TREE, integer_type_node,
15090 tree_cons (NULL_TREE,
15091 integer_type_node,
15092 endlink))));
15093 tree int_ftype_v8qi
15094 = build_function_type (integer_type_node,
15095 tree_cons (NULL_TREE, V8QI_type_node,
15096 endlink));
15097 tree int_ftype_v4hi
15098 = build_function_type (integer_type_node,
15099 tree_cons (NULL_TREE, V4HI_type_node,
15100 endlink));
15101 tree int_ftype_v2si
15102 = build_function_type (integer_type_node,
15103 tree_cons (NULL_TREE, V2SI_type_node,
15104 endlink));
15105 tree int_ftype_v8qi_int
15106 = build_function_type (integer_type_node,
15107 tree_cons (NULL_TREE, V8QI_type_node,
15108 tree_cons (NULL_TREE, integer_type_node,
15109 endlink)));
15110 tree int_ftype_v4hi_int
15111 = build_function_type (integer_type_node,
15112 tree_cons (NULL_TREE, V4HI_type_node,
15113 tree_cons (NULL_TREE, integer_type_node,
15114 endlink)));
15115 tree int_ftype_v2si_int
15116 = build_function_type (integer_type_node,
15117 tree_cons (NULL_TREE, V2SI_type_node,
15118 tree_cons (NULL_TREE, integer_type_node,
15119 endlink)));
15120 tree v8qi_ftype_v8qi_int_int
15121 = build_function_type (V8QI_type_node,
15122 tree_cons (NULL_TREE, V8QI_type_node,
15123 tree_cons (NULL_TREE, integer_type_node,
15124 tree_cons (NULL_TREE,
15125 integer_type_node,
15126 endlink))));
15127 tree v4hi_ftype_v4hi_int_int
15128 = build_function_type (V4HI_type_node,
15129 tree_cons (NULL_TREE, V4HI_type_node,
15130 tree_cons (NULL_TREE, integer_type_node,
15131 tree_cons (NULL_TREE,
15132 integer_type_node,
15133 endlink))));
15134 tree v2si_ftype_v2si_int_int
15135 = build_function_type (V2SI_type_node,
15136 tree_cons (NULL_TREE, V2SI_type_node,
15137 tree_cons (NULL_TREE, integer_type_node,
15138 tree_cons (NULL_TREE,
15139 integer_type_node,
15140 endlink))));
15141 /* Miscellaneous. */
15142 tree v8qi_ftype_v4hi_v4hi
15143 = build_function_type (V8QI_type_node,
15144 tree_cons (NULL_TREE, V4HI_type_node,
15145 tree_cons (NULL_TREE, V4HI_type_node,
15146 endlink)));
15147 tree v4hi_ftype_v2si_v2si
15148 = build_function_type (V4HI_type_node,
15149 tree_cons (NULL_TREE, V2SI_type_node,
15150 tree_cons (NULL_TREE, V2SI_type_node,
15151 endlink)));
15152 tree v2si_ftype_v4hi_v4hi
15153 = build_function_type (V2SI_type_node,
15154 tree_cons (NULL_TREE, V4HI_type_node,
15155 tree_cons (NULL_TREE, V4HI_type_node,
15156 endlink)));
15157 tree v2si_ftype_v8qi_v8qi
15158 = build_function_type (V2SI_type_node,
15159 tree_cons (NULL_TREE, V8QI_type_node,
15160 tree_cons (NULL_TREE, V8QI_type_node,
15161 endlink)));
15162 tree v4hi_ftype_v4hi_di
15163 = build_function_type (V4HI_type_node,
15164 tree_cons (NULL_TREE, V4HI_type_node,
15165 tree_cons (NULL_TREE,
15166 long_long_integer_type_node,
15167 endlink)));
15168 tree v2si_ftype_v2si_di
15169 = build_function_type (V2SI_type_node,
15170 tree_cons (NULL_TREE, V2SI_type_node,
15171 tree_cons (NULL_TREE,
15172 long_long_integer_type_node,
15173 endlink)));
15174 tree void_ftype_int_int
15175 = build_function_type (void_type_node,
15176 tree_cons (NULL_TREE, integer_type_node,
15177 tree_cons (NULL_TREE, integer_type_node,
15178 endlink)));
15179 tree di_ftype_void
15180 = build_function_type (long_long_unsigned_type_node, endlink);
15181 tree di_ftype_v8qi
15182 = build_function_type (long_long_integer_type_node,
15183 tree_cons (NULL_TREE, V8QI_type_node,
15184 endlink));
15185 tree di_ftype_v4hi
15186 = build_function_type (long_long_integer_type_node,
15187 tree_cons (NULL_TREE, V4HI_type_node,
15188 endlink));
15189 tree di_ftype_v2si
15190 = build_function_type (long_long_integer_type_node,
15191 tree_cons (NULL_TREE, V2SI_type_node,
15192 endlink));
15193 tree v2si_ftype_v4hi
15194 = build_function_type (V2SI_type_node,
15195 tree_cons (NULL_TREE, V4HI_type_node,
15196 endlink));
15197 tree v4hi_ftype_v8qi
15198 = build_function_type (V4HI_type_node,
15199 tree_cons (NULL_TREE, V8QI_type_node,
15200 endlink));
15202 tree di_ftype_di_v4hi_v4hi
15203 = build_function_type (long_long_unsigned_type_node,
15204 tree_cons (NULL_TREE,
15205 long_long_unsigned_type_node,
15206 tree_cons (NULL_TREE, V4HI_type_node,
15207 tree_cons (NULL_TREE,
15208 V4HI_type_node,
15209 endlink))));
15211 tree di_ftype_v4hi_v4hi
15212 = build_function_type (long_long_unsigned_type_node,
15213 tree_cons (NULL_TREE, V4HI_type_node,
15214 tree_cons (NULL_TREE, V4HI_type_node,
15215 endlink)));
15217 /* Normal vector binops. */
15218 tree v8qi_ftype_v8qi_v8qi
15219 = build_function_type (V8QI_type_node,
15220 tree_cons (NULL_TREE, V8QI_type_node,
15221 tree_cons (NULL_TREE, V8QI_type_node,
15222 endlink)));
15223 tree v4hi_ftype_v4hi_v4hi
15224 = build_function_type (V4HI_type_node,
15225 tree_cons (NULL_TREE, V4HI_type_node,
15226 tree_cons (NULL_TREE, V4HI_type_node,
15227 endlink)));
15228 tree v2si_ftype_v2si_v2si
15229 = build_function_type (V2SI_type_node,
15230 tree_cons (NULL_TREE, V2SI_type_node,
15231 tree_cons (NULL_TREE, V2SI_type_node,
15232 endlink)));
15233 tree di_ftype_di_di
15234 = build_function_type (long_long_unsigned_type_node,
15235 tree_cons (NULL_TREE, long_long_unsigned_type_node,
15236 tree_cons (NULL_TREE,
15237 long_long_unsigned_type_node,
15238 endlink)));
15240 /* Add all builtins that are more or less simple operations on two
15241 operands. */
15242 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15244 /* Use one of the operands; the target can have a different mode for
15245 mask-generating compares. */
15246 enum machine_mode mode;
15247 tree type;
15249 if (d->name == 0)
15250 continue;
15252 mode = insn_data[d->icode].operand[1].mode;
15254 switch (mode)
15256 case V8QImode:
15257 type = v8qi_ftype_v8qi_v8qi;
15258 break;
15259 case V4HImode:
15260 type = v4hi_ftype_v4hi_v4hi;
15261 break;
15262 case V2SImode:
15263 type = v2si_ftype_v2si_v2si;
15264 break;
15265 case DImode:
15266 type = di_ftype_di_di;
15267 break;
15269 default:
15270 gcc_unreachable ();
15273 def_mbuiltin (d->mask, d->name, type, d->code);
15276 /* Add the remaining MMX insns with somewhat more complicated types. */
15277 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
15278 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
15279 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
15281 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
15282 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
15283 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
15284 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
15285 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
15286 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
15288 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
15289 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
15290 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
15291 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
15292 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
15293 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
15295 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
15296 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
15297 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
15298 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
15299 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
15300 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
15302 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
15303 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
15304 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
15305 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
15306 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
15307 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
15309 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
15311 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
15312 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
15313 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
15314 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
15316 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
15317 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
15318 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
15319 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
15320 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
15321 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
15322 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
15323 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
15324 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
15326 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
15327 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
15328 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
15330 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
15331 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
15332 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
15334 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
15335 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
15336 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
15337 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
15338 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
15339 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
15341 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
15342 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
15343 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
15344 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
15345 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
15346 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
15347 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
15348 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
15349 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
15350 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
15351 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
15352 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
15354 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
15355 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
15356 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
15357 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
15359 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
15360 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
15361 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
15362 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
15363 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
15364 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
15365 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
15368 static void
15369 arm_init_tls_builtins (void)
15371 tree ftype, decl;
15373 ftype = build_function_type (ptr_type_node, void_list_node);
15374 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
15375 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
15376 NULL, NULL_TREE);
15377 TREE_NOTHROW (decl) = 1;
15378 TREE_READONLY (decl) = 1;
15381 enum neon_builtin_type_bits {
15382 T_V8QI = 0x0001,
15383 T_V4HI = 0x0002,
15384 T_V2SI = 0x0004,
15385 T_V2SF = 0x0008,
15386 T_DI = 0x0010,
15387 T_V16QI = 0x0020,
15388 T_V8HI = 0x0040,
15389 T_V4SI = 0x0080,
15390 T_V4SF = 0x0100,
15391 T_V2DI = 0x0200,
15392 T_TI = 0x0400,
15393 T_EI = 0x0800,
15394 T_OI = 0x1000
15397 #define v8qi_UP T_V8QI
15398 #define v4hi_UP T_V4HI
15399 #define v2si_UP T_V2SI
15400 #define v2sf_UP T_V2SF
15401 #define di_UP T_DI
15402 #define v16qi_UP T_V16QI
15403 #define v8hi_UP T_V8HI
15404 #define v4si_UP T_V4SI
15405 #define v4sf_UP T_V4SF
15406 #define v2di_UP T_V2DI
15407 #define ti_UP T_TI
15408 #define ei_UP T_EI
15409 #define oi_UP T_OI
15411 #define UP(X) X##_UP
15413 #define T_MAX 13
15415 typedef enum {
15416 NEON_BINOP,
15417 NEON_TERNOP,
15418 NEON_UNOP,
15419 NEON_GETLANE,
15420 NEON_SETLANE,
15421 NEON_CREATE,
15422 NEON_DUP,
15423 NEON_DUPLANE,
15424 NEON_COMBINE,
15425 NEON_SPLIT,
15426 NEON_LANEMUL,
15427 NEON_LANEMULL,
15428 NEON_LANEMULH,
15429 NEON_LANEMAC,
15430 NEON_SCALARMUL,
15431 NEON_SCALARMULL,
15432 NEON_SCALARMULH,
15433 NEON_SCALARMAC,
15434 NEON_CONVERT,
15435 NEON_FIXCONV,
15436 NEON_SELECT,
15437 NEON_RESULTPAIR,
15438 NEON_REINTERP,
15439 NEON_VTBL,
15440 NEON_VTBX,
15441 NEON_LOAD1,
15442 NEON_LOAD1LANE,
15443 NEON_STORE1,
15444 NEON_STORE1LANE,
15445 NEON_LOADSTRUCT,
15446 NEON_LOADSTRUCTLANE,
15447 NEON_STORESTRUCT,
15448 NEON_STORESTRUCTLANE,
15449 NEON_LOGICBINOP,
15450 NEON_SHIFTINSERT,
15451 NEON_SHIFTIMM,
15452 NEON_SHIFTACC
15453 } neon_itype;
15455 typedef struct {
15456 const char *name;
15457 const neon_itype itype;
15458 const int bits;
15459 const enum insn_code codes[T_MAX];
15460 const unsigned int num_vars;
15461 unsigned int base_fcode;
15462 } neon_builtin_datum;
15464 #define CF(N,X) CODE_FOR_neon_##N##X
15466 #define VAR1(T, N, A) \
15467 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
15468 #define VAR2(T, N, A, B) \
15469 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
15470 #define VAR3(T, N, A, B, C) \
15471 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
15472 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
15473 #define VAR4(T, N, A, B, C, D) \
15474 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
15475 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
15476 #define VAR5(T, N, A, B, C, D, E) \
15477 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
15478 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
15479 #define VAR6(T, N, A, B, C, D, E, F) \
15480 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
15481 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
15482 #define VAR7(T, N, A, B, C, D, E, F, G) \
15483 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
15484 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15485 CF (N, G) }, 7, 0
15486 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
15487 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15488 | UP (H), \
15489 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15490 CF (N, G), CF (N, H) }, 8, 0
15491 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
15492 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15493 | UP (H) | UP (I), \
15494 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15495 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
15496 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
15497 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15498 | UP (H) | UP (I) | UP (J), \
15499 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15500 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
15502 /* The mode entries in the following table correspond to the "key" type of the
15503 instruction variant, i.e. equivalent to that which would be specified after
15504 the assembler mnemonic, which usually refers to the last vector operand.
15505 (Signed/unsigned/polynomial types are not differentiated between though, and
15506 are all mapped onto the same mode for a given element size.) The modes
15507 listed per instruction should be the same as those defined for that
15508 instruction's pattern in neon.md.
15509 WARNING: Variants should be listed in the same increasing order as
15510 neon_builtin_type_bits. */
15512 static neon_builtin_datum neon_builtin_data[] =
15514 { VAR10 (BINOP, vadd,
15515 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15516 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
15517 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
15518 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15519 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15520 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
15521 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15522 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15523 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
15524 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15525 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
15526 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
15527 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
15528 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
15529 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
15530 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
15531 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
15532 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
15533 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
15534 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
15535 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
15536 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
15537 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15538 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15539 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15540 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
15541 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
15542 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
15543 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15544 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15545 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15546 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
15547 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15548 { VAR10 (BINOP, vsub,
15549 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15550 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
15551 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
15552 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15553 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15554 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
15555 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15556 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15557 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15558 { VAR2 (BINOP, vcage, v2sf, v4sf) },
15559 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
15560 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15561 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15562 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
15563 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15564 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
15565 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15566 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15567 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
15568 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15569 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15570 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
15571 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
15572 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
15573 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
15574 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15575 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15576 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15577 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15578 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15579 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15580 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15581 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15582 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
15583 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
15584 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
15585 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15586 /* FIXME: vget_lane supports more variants than this! */
15587 { VAR10 (GETLANE, vget_lane,
15588 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15589 { VAR10 (SETLANE, vset_lane,
15590 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15591 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
15592 { VAR10 (DUP, vdup_n,
15593 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15594 { VAR10 (DUPLANE, vdup_lane,
15595 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15596 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
15597 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
15598 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
15599 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
15600 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
15601 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
15602 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
15603 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15604 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15605 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
15606 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
15607 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15608 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
15609 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
15610 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15611 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15612 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
15613 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
15614 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15615 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
15616 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
15617 { VAR10 (BINOP, vext,
15618 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15619 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15620 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
15621 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
15622 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
15623 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
15624 { VAR10 (SELECT, vbsl,
15625 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15626 { VAR1 (VTBL, vtbl1, v8qi) },
15627 { VAR1 (VTBL, vtbl2, v8qi) },
15628 { VAR1 (VTBL, vtbl3, v8qi) },
15629 { VAR1 (VTBL, vtbl4, v8qi) },
15630 { VAR1 (VTBX, vtbx1, v8qi) },
15631 { VAR1 (VTBX, vtbx2, v8qi) },
15632 { VAR1 (VTBX, vtbx3, v8qi) },
15633 { VAR1 (VTBX, vtbx4, v8qi) },
15634 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15635 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15636 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15637 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
15638 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
15639 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
15640 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
15641 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
15642 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
15643 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
15644 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
15645 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
15646 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
15647 { VAR10 (LOAD1, vld1,
15648 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15649 { VAR10 (LOAD1LANE, vld1_lane,
15650 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15651 { VAR10 (LOAD1, vld1_dup,
15652 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15653 { VAR10 (STORE1, vst1,
15654 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15655 { VAR10 (STORE1LANE, vst1_lane,
15656 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15657 { VAR9 (LOADSTRUCT,
15658 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15659 { VAR7 (LOADSTRUCTLANE, vld2_lane,
15660 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15661 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
15662 { VAR9 (STORESTRUCT, vst2,
15663 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15664 { VAR7 (STORESTRUCTLANE, vst2_lane,
15665 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15666 { VAR9 (LOADSTRUCT,
15667 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15668 { VAR7 (LOADSTRUCTLANE, vld3_lane,
15669 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15670 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
15671 { VAR9 (STORESTRUCT, vst3,
15672 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15673 { VAR7 (STORESTRUCTLANE, vst3_lane,
15674 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15675 { VAR9 (LOADSTRUCT, vld4,
15676 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15677 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15678 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15679 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15680 { VAR9 (STORESTRUCT, vst4,
15681 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15682 { VAR7 (STORESTRUCTLANE, vst4_lane,
15683 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15684 { VAR10 (LOGICBINOP, vand,
15685 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15686 { VAR10 (LOGICBINOP, vorr,
15687 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15688 { VAR10 (BINOP, veor,
15689 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15690 { VAR10 (LOGICBINOP, vbic,
15691 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15692 { VAR10 (LOGICBINOP, vorn,
15693 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15696 #undef CF
15697 #undef VAR1
15698 #undef VAR2
15699 #undef VAR3
15700 #undef VAR4
15701 #undef VAR5
15702 #undef VAR6
15703 #undef VAR7
15704 #undef VAR8
15705 #undef VAR9
15706 #undef VAR10
15708 static void
15709 arm_init_neon_builtins (void)
15711 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15713 tree neon_intQI_type_node;
15714 tree neon_intHI_type_node;
15715 tree neon_polyQI_type_node;
15716 tree neon_polyHI_type_node;
15717 tree neon_intSI_type_node;
15718 tree neon_intDI_type_node;
15719 tree neon_float_type_node;
15721 tree intQI_pointer_node;
15722 tree intHI_pointer_node;
15723 tree intSI_pointer_node;
15724 tree intDI_pointer_node;
15725 tree float_pointer_node;
15727 tree const_intQI_node;
15728 tree const_intHI_node;
15729 tree const_intSI_node;
15730 tree const_intDI_node;
15731 tree const_float_node;
15733 tree const_intQI_pointer_node;
15734 tree const_intHI_pointer_node;
15735 tree const_intSI_pointer_node;
15736 tree const_intDI_pointer_node;
15737 tree const_float_pointer_node;
15739 tree V8QI_type_node;
15740 tree V4HI_type_node;
15741 tree V2SI_type_node;
15742 tree V2SF_type_node;
15743 tree V16QI_type_node;
15744 tree V8HI_type_node;
15745 tree V4SI_type_node;
15746 tree V4SF_type_node;
15747 tree V2DI_type_node;
15749 tree intUQI_type_node;
15750 tree intUHI_type_node;
15751 tree intUSI_type_node;
15752 tree intUDI_type_node;
15754 tree intEI_type_node;
15755 tree intOI_type_node;
15756 tree intCI_type_node;
15757 tree intXI_type_node;
15759 tree V8QI_pointer_node;
15760 tree V4HI_pointer_node;
15761 tree V2SI_pointer_node;
15762 tree V2SF_pointer_node;
15763 tree V16QI_pointer_node;
15764 tree V8HI_pointer_node;
15765 tree V4SI_pointer_node;
15766 tree V4SF_pointer_node;
15767 tree V2DI_pointer_node;
15769 tree void_ftype_pv8qi_v8qi_v8qi;
15770 tree void_ftype_pv4hi_v4hi_v4hi;
15771 tree void_ftype_pv2si_v2si_v2si;
15772 tree void_ftype_pv2sf_v2sf_v2sf;
15773 tree void_ftype_pdi_di_di;
15774 tree void_ftype_pv16qi_v16qi_v16qi;
15775 tree void_ftype_pv8hi_v8hi_v8hi;
15776 tree void_ftype_pv4si_v4si_v4si;
15777 tree void_ftype_pv4sf_v4sf_v4sf;
15778 tree void_ftype_pv2di_v2di_v2di;
15780 tree reinterp_ftype_dreg[5][5];
15781 tree reinterp_ftype_qreg[5][5];
15782 tree dreg_types[5], qreg_types[5];
15784 /* Create distinguished type nodes for NEON vector element types,
15785 and pointers to values of such types, so we can detect them later. */
15786 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15787 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15788 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15789 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15790 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15791 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15792 neon_float_type_node = make_node (REAL_TYPE);
15793 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15794 layout_type (neon_float_type_node);
15796 /* Define typedefs which exactly correspond to the modes we are basing vector
15797 types on. If you change these names you'll need to change
15798 the table used by arm_mangle_type too. */
15799 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15800 "__builtin_neon_qi");
15801 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15802 "__builtin_neon_hi");
15803 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15804 "__builtin_neon_si");
15805 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15806 "__builtin_neon_sf");
15807 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15808 "__builtin_neon_di");
15809 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15810 "__builtin_neon_poly8");
15811 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15812 "__builtin_neon_poly16");
15814 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15815 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15816 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15817 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15818 float_pointer_node = build_pointer_type (neon_float_type_node);
15820 /* Next create constant-qualified versions of the above types. */
15821 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15822 TYPE_QUAL_CONST);
15823 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15824 TYPE_QUAL_CONST);
15825 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15826 TYPE_QUAL_CONST);
15827 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15828 TYPE_QUAL_CONST);
15829 const_float_node = build_qualified_type (neon_float_type_node,
15830 TYPE_QUAL_CONST);
15832 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15833 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15834 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15835 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15836 const_float_pointer_node = build_pointer_type (const_float_node);
15838 /* Now create vector types based on our NEON element types. */
15839 /* 64-bit vectors. */
15840 V8QI_type_node =
15841 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15842 V4HI_type_node =
15843 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15844 V2SI_type_node =
15845 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15846 V2SF_type_node =
15847 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15848 /* 128-bit vectors. */
15849 V16QI_type_node =
15850 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15851 V8HI_type_node =
15852 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15853 V4SI_type_node =
15854 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15855 V4SF_type_node =
15856 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15857 V2DI_type_node =
15858 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15860 /* Unsigned integer types for various mode sizes. */
15861 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15862 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15863 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15864 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15866 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15867 "__builtin_neon_uqi");
15868 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15869 "__builtin_neon_uhi");
15870 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15871 "__builtin_neon_usi");
15872 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15873 "__builtin_neon_udi");
15875 /* Opaque integer types for structures of vectors. */
15876 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15877 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15878 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15879 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15881 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15882 "__builtin_neon_ti");
15883 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15884 "__builtin_neon_ei");
15885 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15886 "__builtin_neon_oi");
15887 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15888 "__builtin_neon_ci");
15889 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15890 "__builtin_neon_xi");
15892 /* Pointers to vector types. */
15893 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15894 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15895 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15896 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15897 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15898 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15899 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15900 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15901 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15903 /* Operations which return results as pairs. */
15904 void_ftype_pv8qi_v8qi_v8qi =
15905 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15906 V8QI_type_node, NULL);
15907 void_ftype_pv4hi_v4hi_v4hi =
15908 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15909 V4HI_type_node, NULL);
15910 void_ftype_pv2si_v2si_v2si =
15911 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15912 V2SI_type_node, NULL);
15913 void_ftype_pv2sf_v2sf_v2sf =
15914 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15915 V2SF_type_node, NULL);
15916 void_ftype_pdi_di_di =
15917 build_function_type_list (void_type_node, intDI_pointer_node,
15918 neon_intDI_type_node, neon_intDI_type_node, NULL);
15919 void_ftype_pv16qi_v16qi_v16qi =
15920 build_function_type_list (void_type_node, V16QI_pointer_node,
15921 V16QI_type_node, V16QI_type_node, NULL);
15922 void_ftype_pv8hi_v8hi_v8hi =
15923 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15924 V8HI_type_node, NULL);
15925 void_ftype_pv4si_v4si_v4si =
15926 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15927 V4SI_type_node, NULL);
15928 void_ftype_pv4sf_v4sf_v4sf =
15929 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15930 V4SF_type_node, NULL);
15931 void_ftype_pv2di_v2di_v2di =
15932 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15933 V2DI_type_node, NULL);
15935 dreg_types[0] = V8QI_type_node;
15936 dreg_types[1] = V4HI_type_node;
15937 dreg_types[2] = V2SI_type_node;
15938 dreg_types[3] = V2SF_type_node;
15939 dreg_types[4] = neon_intDI_type_node;
15941 qreg_types[0] = V16QI_type_node;
15942 qreg_types[1] = V8HI_type_node;
15943 qreg_types[2] = V4SI_type_node;
15944 qreg_types[3] = V4SF_type_node;
15945 qreg_types[4] = V2DI_type_node;
15947 for (i = 0; i < 5; i++)
15949 int j;
15950 for (j = 0; j < 5; j++)
15952 reinterp_ftype_dreg[i][j]
15953 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15954 reinterp_ftype_qreg[i][j]
15955 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15959 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15961 neon_builtin_datum *d = &neon_builtin_data[i];
15962 unsigned int j, codeidx = 0;
15964 d->base_fcode = fcode;
15966 for (j = 0; j < T_MAX; j++)
15968 const char* const modenames[] = {
15969 "v8qi", "v4hi", "v2si", "v2sf", "di",
15970 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15972 char namebuf[60];
15973 tree ftype = NULL;
15974 enum insn_code icode;
15975 int is_load = 0, is_store = 0;
15977 if ((d->bits & (1 << j)) == 0)
15978 continue;
15980 icode = d->codes[codeidx++];
15982 switch (d->itype)
15984 case NEON_LOAD1:
15985 case NEON_LOAD1LANE:
15986 case NEON_LOADSTRUCT:
15987 case NEON_LOADSTRUCTLANE:
15988 is_load = 1;
15989 /* Fall through. */
15990 case NEON_STORE1:
15991 case NEON_STORE1LANE:
15992 case NEON_STORESTRUCT:
15993 case NEON_STORESTRUCTLANE:
15994 if (!is_load)
15995 is_store = 1;
15996 /* Fall through. */
15997 case NEON_UNOP:
15998 case NEON_BINOP:
15999 case NEON_LOGICBINOP:
16000 case NEON_SHIFTINSERT:
16001 case NEON_TERNOP:
16002 case NEON_GETLANE:
16003 case NEON_SETLANE:
16004 case NEON_CREATE:
16005 case NEON_DUP:
16006 case NEON_DUPLANE:
16007 case NEON_SHIFTIMM:
16008 case NEON_SHIFTACC:
16009 case NEON_COMBINE:
16010 case NEON_SPLIT:
16011 case NEON_CONVERT:
16012 case NEON_FIXCONV:
16013 case NEON_LANEMUL:
16014 case NEON_LANEMULL:
16015 case NEON_LANEMULH:
16016 case NEON_LANEMAC:
16017 case NEON_SCALARMUL:
16018 case NEON_SCALARMULL:
16019 case NEON_SCALARMULH:
16020 case NEON_SCALARMAC:
16021 case NEON_SELECT:
16022 case NEON_VTBL:
16023 case NEON_VTBX:
16025 int k;
16026 tree return_type = void_type_node, args = void_list_node;
16028 /* Build a function type directly from the insn_data for this
16029 builtin. The build_function_type() function takes care of
16030 removing duplicates for us. */
16031 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
16033 tree eltype;
16035 if (is_load && k == 1)
16037 /* Neon load patterns always have the memory operand
16038 (a SImode pointer) in the operand 1 position. We
16039 want a const pointer to the element type in that
16040 position. */
16041 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16043 switch (1 << j)
16045 case T_V8QI:
16046 case T_V16QI:
16047 eltype = const_intQI_pointer_node;
16048 break;
16050 case T_V4HI:
16051 case T_V8HI:
16052 eltype = const_intHI_pointer_node;
16053 break;
16055 case T_V2SI:
16056 case T_V4SI:
16057 eltype = const_intSI_pointer_node;
16058 break;
16060 case T_V2SF:
16061 case T_V4SF:
16062 eltype = const_float_pointer_node;
16063 break;
16065 case T_DI:
16066 case T_V2DI:
16067 eltype = const_intDI_pointer_node;
16068 break;
16070 default: gcc_unreachable ();
16073 else if (is_store && k == 0)
16075 /* Similarly, Neon store patterns use operand 0 as
16076 the memory location to store to (a SImode pointer).
16077 Use a pointer to the element type of the store in
16078 that position. */
16079 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16081 switch (1 << j)
16083 case T_V8QI:
16084 case T_V16QI:
16085 eltype = intQI_pointer_node;
16086 break;
16088 case T_V4HI:
16089 case T_V8HI:
16090 eltype = intHI_pointer_node;
16091 break;
16093 case T_V2SI:
16094 case T_V4SI:
16095 eltype = intSI_pointer_node;
16096 break;
16098 case T_V2SF:
16099 case T_V4SF:
16100 eltype = float_pointer_node;
16101 break;
16103 case T_DI:
16104 case T_V2DI:
16105 eltype = intDI_pointer_node;
16106 break;
16108 default: gcc_unreachable ();
16111 else
16113 switch (insn_data[icode].operand[k].mode)
16115 case VOIDmode: eltype = void_type_node; break;
16116 /* Scalars. */
16117 case QImode: eltype = neon_intQI_type_node; break;
16118 case HImode: eltype = neon_intHI_type_node; break;
16119 case SImode: eltype = neon_intSI_type_node; break;
16120 case SFmode: eltype = neon_float_type_node; break;
16121 case DImode: eltype = neon_intDI_type_node; break;
16122 case TImode: eltype = intTI_type_node; break;
16123 case EImode: eltype = intEI_type_node; break;
16124 case OImode: eltype = intOI_type_node; break;
16125 case CImode: eltype = intCI_type_node; break;
16126 case XImode: eltype = intXI_type_node; break;
16127 /* 64-bit vectors. */
16128 case V8QImode: eltype = V8QI_type_node; break;
16129 case V4HImode: eltype = V4HI_type_node; break;
16130 case V2SImode: eltype = V2SI_type_node; break;
16131 case V2SFmode: eltype = V2SF_type_node; break;
16132 /* 128-bit vectors. */
16133 case V16QImode: eltype = V16QI_type_node; break;
16134 case V8HImode: eltype = V8HI_type_node; break;
16135 case V4SImode: eltype = V4SI_type_node; break;
16136 case V4SFmode: eltype = V4SF_type_node; break;
16137 case V2DImode: eltype = V2DI_type_node; break;
16138 default: gcc_unreachable ();
16142 if (k == 0 && !is_store)
16143 return_type = eltype;
16144 else
16145 args = tree_cons (NULL_TREE, eltype, args);
16148 ftype = build_function_type (return_type, args);
16150 break;
16152 case NEON_RESULTPAIR:
16154 switch (insn_data[icode].operand[1].mode)
16156 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
16157 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
16158 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
16159 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
16160 case DImode: ftype = void_ftype_pdi_di_di; break;
16161 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
16162 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
16163 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
16164 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
16165 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
16166 default: gcc_unreachable ();
16169 break;
16171 case NEON_REINTERP:
16173 /* We iterate over 5 doubleword types, then 5 quadword
16174 types. */
16175 int rhs = j % 5;
16176 switch (insn_data[icode].operand[0].mode)
16178 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
16179 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
16180 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
16181 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
16182 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
16183 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
16184 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
16185 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
16186 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
16187 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
16188 default: gcc_unreachable ();
16191 break;
16193 default:
16194 gcc_unreachable ();
16197 gcc_assert (ftype != NULL);
16199 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
16201 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
16202 NULL_TREE);
16207 static void
16208 arm_init_builtins (void)
16210 arm_init_tls_builtins ();
16212 if (TARGET_REALLY_IWMMXT)
16213 arm_init_iwmmxt_builtins ();
16215 if (TARGET_NEON)
16216 arm_init_neon_builtins ();
16219 /* Errors in the source file can cause expand_expr to return const0_rtx
16220 where we expect a vector. To avoid crashing, use one of the vector
16221 clear instructions. */
16223 static rtx
16224 safe_vector_operand (rtx x, enum machine_mode mode)
16226 if (x != const0_rtx)
16227 return x;
16228 x = gen_reg_rtx (mode);
16230 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
16231 : gen_rtx_SUBREG (DImode, x, 0)));
16232 return x;
16235 /* Subroutine of arm_expand_builtin to take care of binop insns. */
16237 static rtx
16238 arm_expand_binop_builtin (enum insn_code icode,
16239 tree exp, rtx target)
16241 rtx pat;
16242 tree arg0 = CALL_EXPR_ARG (exp, 0);
16243 tree arg1 = CALL_EXPR_ARG (exp, 1);
16244 rtx op0 = expand_normal (arg0);
16245 rtx op1 = expand_normal (arg1);
16246 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16247 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16248 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16250 if (VECTOR_MODE_P (mode0))
16251 op0 = safe_vector_operand (op0, mode0);
16252 if (VECTOR_MODE_P (mode1))
16253 op1 = safe_vector_operand (op1, mode1);
16255 if (! target
16256 || GET_MODE (target) != tmode
16257 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16258 target = gen_reg_rtx (tmode);
16260 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
16262 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16263 op0 = copy_to_mode_reg (mode0, op0);
16264 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16265 op1 = copy_to_mode_reg (mode1, op1);
16267 pat = GEN_FCN (icode) (target, op0, op1);
16268 if (! pat)
16269 return 0;
16270 emit_insn (pat);
16271 return target;
16274 /* Subroutine of arm_expand_builtin to take care of unop insns. */
16276 static rtx
16277 arm_expand_unop_builtin (enum insn_code icode,
16278 tree exp, rtx target, int do_load)
16280 rtx pat;
16281 tree arg0 = CALL_EXPR_ARG (exp, 0);
16282 rtx op0 = expand_normal (arg0);
16283 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16284 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16286 if (! target
16287 || GET_MODE (target) != tmode
16288 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16289 target = gen_reg_rtx (tmode);
16290 if (do_load)
16291 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16292 else
16294 if (VECTOR_MODE_P (mode0))
16295 op0 = safe_vector_operand (op0, mode0);
16297 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16298 op0 = copy_to_mode_reg (mode0, op0);
16301 pat = GEN_FCN (icode) (target, op0);
16302 if (! pat)
16303 return 0;
16304 emit_insn (pat);
16305 return target;
16308 static int
16309 neon_builtin_compare (const void *a, const void *b)
16311 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
16312 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
16313 unsigned int soughtcode = key->base_fcode;
16315 if (soughtcode >= memb->base_fcode
16316 && soughtcode < memb->base_fcode + memb->num_vars)
16317 return 0;
16318 else if (soughtcode < memb->base_fcode)
16319 return -1;
16320 else
16321 return 1;
16324 static enum insn_code
16325 locate_neon_builtin_icode (int fcode, neon_itype *itype)
16327 neon_builtin_datum key, *found;
16328 int idx;
16330 key.base_fcode = fcode;
16331 found = (neon_builtin_datum *)
16332 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
16333 sizeof (neon_builtin_data[0]), neon_builtin_compare);
16334 gcc_assert (found);
16335 idx = fcode - (int) found->base_fcode;
16336 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
16338 if (itype)
16339 *itype = found->itype;
16341 return found->codes[idx];
16344 typedef enum {
16345 NEON_ARG_COPY_TO_REG,
16346 NEON_ARG_CONSTANT,
16347 NEON_ARG_STOP
16348 } builtin_arg;
16350 #define NEON_MAX_BUILTIN_ARGS 5
16352 /* Expand a Neon builtin. */
16353 static rtx
16354 arm_expand_neon_args (rtx target, int icode, int have_retval,
16355 tree exp, ...)
16357 va_list ap;
16358 rtx pat;
16359 tree arg[NEON_MAX_BUILTIN_ARGS];
16360 rtx op[NEON_MAX_BUILTIN_ARGS];
16361 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16362 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
16363 int argc = 0;
16365 if (have_retval
16366 && (!target
16367 || GET_MODE (target) != tmode
16368 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
16369 target = gen_reg_rtx (tmode);
16371 va_start (ap, exp);
16373 for (;;)
16375 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
16377 if (thisarg == NEON_ARG_STOP)
16378 break;
16379 else
16381 arg[argc] = CALL_EXPR_ARG (exp, argc);
16382 op[argc] = expand_normal (arg[argc]);
16383 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
16385 switch (thisarg)
16387 case NEON_ARG_COPY_TO_REG:
16388 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
16389 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16390 (op[argc], mode[argc]))
16391 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
16392 break;
16394 case NEON_ARG_CONSTANT:
16395 /* FIXME: This error message is somewhat unhelpful. */
16396 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16397 (op[argc], mode[argc]))
16398 error ("argument must be a constant");
16399 break;
16401 case NEON_ARG_STOP:
16402 gcc_unreachable ();
16405 argc++;
16409 va_end (ap);
16411 if (have_retval)
16412 switch (argc)
16414 case 1:
16415 pat = GEN_FCN (icode) (target, op[0]);
16416 break;
16418 case 2:
16419 pat = GEN_FCN (icode) (target, op[0], op[1]);
16420 break;
16422 case 3:
16423 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
16424 break;
16426 case 4:
16427 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
16428 break;
16430 case 5:
16431 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
16432 break;
16434 default:
16435 gcc_unreachable ();
16437 else
16438 switch (argc)
16440 case 1:
16441 pat = GEN_FCN (icode) (op[0]);
16442 break;
16444 case 2:
16445 pat = GEN_FCN (icode) (op[0], op[1]);
16446 break;
16448 case 3:
16449 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
16450 break;
16452 case 4:
16453 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
16454 break;
16456 case 5:
16457 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
16458 break;
16460 default:
16461 gcc_unreachable ();
16464 if (!pat)
16465 return 0;
16467 emit_insn (pat);
16469 return target;
16472 /* Expand a Neon builtin. These are "special" because they don't have symbolic
16473 constants defined per-instruction or per instruction-variant. Instead, the
16474 required info is looked up in the table neon_builtin_data. */
16475 static rtx
16476 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
16478 neon_itype itype;
16479 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
16481 switch (itype)
16483 case NEON_UNOP:
16484 case NEON_CONVERT:
16485 case NEON_DUPLANE:
16486 return arm_expand_neon_args (target, icode, 1, exp,
16487 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16489 case NEON_BINOP:
16490 case NEON_SETLANE:
16491 case NEON_SCALARMUL:
16492 case NEON_SCALARMULL:
16493 case NEON_SCALARMULH:
16494 case NEON_SHIFTINSERT:
16495 case NEON_LOGICBINOP:
16496 return arm_expand_neon_args (target, icode, 1, exp,
16497 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16498 NEON_ARG_STOP);
16500 case NEON_TERNOP:
16501 return arm_expand_neon_args (target, icode, 1, exp,
16502 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16503 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16505 case NEON_GETLANE:
16506 case NEON_FIXCONV:
16507 case NEON_SHIFTIMM:
16508 return arm_expand_neon_args (target, icode, 1, exp,
16509 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
16510 NEON_ARG_STOP);
16512 case NEON_CREATE:
16513 return arm_expand_neon_args (target, icode, 1, exp,
16514 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16516 case NEON_DUP:
16517 case NEON_SPLIT:
16518 case NEON_REINTERP:
16519 return arm_expand_neon_args (target, icode, 1, exp,
16520 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16522 case NEON_COMBINE:
16523 case NEON_VTBL:
16524 return arm_expand_neon_args (target, icode, 1, exp,
16525 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16527 case NEON_RESULTPAIR:
16528 return arm_expand_neon_args (target, icode, 0, exp,
16529 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16530 NEON_ARG_STOP);
16532 case NEON_LANEMUL:
16533 case NEON_LANEMULL:
16534 case NEON_LANEMULH:
16535 return arm_expand_neon_args (target, icode, 1, exp,
16536 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16537 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16539 case NEON_LANEMAC:
16540 return arm_expand_neon_args (target, icode, 1, exp,
16541 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16542 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16544 case NEON_SHIFTACC:
16545 return arm_expand_neon_args (target, icode, 1, exp,
16546 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16547 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16549 case NEON_SCALARMAC:
16550 return arm_expand_neon_args (target, icode, 1, exp,
16551 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16552 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16554 case NEON_SELECT:
16555 case NEON_VTBX:
16556 return arm_expand_neon_args (target, icode, 1, exp,
16557 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16558 NEON_ARG_STOP);
16560 case NEON_LOAD1:
16561 case NEON_LOADSTRUCT:
16562 return arm_expand_neon_args (target, icode, 1, exp,
16563 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16565 case NEON_LOAD1LANE:
16566 case NEON_LOADSTRUCTLANE:
16567 return arm_expand_neon_args (target, icode, 1, exp,
16568 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16569 NEON_ARG_STOP);
16571 case NEON_STORE1:
16572 case NEON_STORESTRUCT:
16573 return arm_expand_neon_args (target, icode, 0, exp,
16574 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16576 case NEON_STORE1LANE:
16577 case NEON_STORESTRUCTLANE:
16578 return arm_expand_neon_args (target, icode, 0, exp,
16579 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16580 NEON_ARG_STOP);
16583 gcc_unreachable ();
16586 /* Emit code to reinterpret one Neon type as another, without altering bits. */
16587 void
16588 neon_reinterpret (rtx dest, rtx src)
16590 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
16593 /* Emit code to place a Neon pair result in memory locations (with equal
16594 registers). */
16595 void
16596 neon_emit_pair_result_insn (enum machine_mode mode,
16597 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
16598 rtx op1, rtx op2)
16600 rtx mem = gen_rtx_MEM (mode, destaddr);
16601 rtx tmp1 = gen_reg_rtx (mode);
16602 rtx tmp2 = gen_reg_rtx (mode);
16604 emit_insn (intfn (tmp1, op1, tmp2, op2));
16606 emit_move_insn (mem, tmp1);
16607 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
16608 emit_move_insn (mem, tmp2);
16611 /* Set up operands for a register copy from src to dest, taking care not to
16612 clobber registers in the process.
16613 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
16614 be called with a large N, so that should be OK. */
16616 void
16617 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
16619 unsigned int copied = 0, opctr = 0;
16620 unsigned int done = (1 << count) - 1;
16621 unsigned int i, j;
16623 while (copied != done)
16625 for (i = 0; i < count; i++)
16627 int good = 1;
16629 for (j = 0; good && j < count; j++)
16630 if (i != j && (copied & (1 << j)) == 0
16631 && reg_overlap_mentioned_p (src[j], dest[i]))
16632 good = 0;
16634 if (good)
16636 operands[opctr++] = dest[i];
16637 operands[opctr++] = src[i];
16638 copied |= 1 << i;
16643 gcc_assert (opctr == count * 2);
16646 /* Expand an expression EXP that calls a built-in function,
16647 with result going to TARGET if that's convenient
16648 (and in mode MODE if that's convenient).
16649 SUBTARGET may be used as the target for computing one of EXP's operands.
16650 IGNORE is nonzero if the value is to be ignored. */
16652 static rtx
16653 arm_expand_builtin (tree exp,
16654 rtx target,
16655 rtx subtarget ATTRIBUTE_UNUSED,
16656 enum machine_mode mode ATTRIBUTE_UNUSED,
16657 int ignore ATTRIBUTE_UNUSED)
16659 const struct builtin_description * d;
16660 enum insn_code icode;
16661 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16662 tree arg0;
16663 tree arg1;
16664 tree arg2;
16665 rtx op0;
16666 rtx op1;
16667 rtx op2;
16668 rtx pat;
16669 int fcode = DECL_FUNCTION_CODE (fndecl);
16670 size_t i;
16671 enum machine_mode tmode;
16672 enum machine_mode mode0;
16673 enum machine_mode mode1;
16674 enum machine_mode mode2;
16676 if (fcode >= ARM_BUILTIN_NEON_BASE)
16677 return arm_expand_neon_builtin (fcode, exp, target);
16679 switch (fcode)
16681 case ARM_BUILTIN_TEXTRMSB:
16682 case ARM_BUILTIN_TEXTRMUB:
16683 case ARM_BUILTIN_TEXTRMSH:
16684 case ARM_BUILTIN_TEXTRMUH:
16685 case ARM_BUILTIN_TEXTRMSW:
16686 case ARM_BUILTIN_TEXTRMUW:
16687 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16688 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16689 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16690 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16691 : CODE_FOR_iwmmxt_textrmw);
16693 arg0 = CALL_EXPR_ARG (exp, 0);
16694 arg1 = CALL_EXPR_ARG (exp, 1);
16695 op0 = expand_normal (arg0);
16696 op1 = expand_normal (arg1);
16697 tmode = insn_data[icode].operand[0].mode;
16698 mode0 = insn_data[icode].operand[1].mode;
16699 mode1 = insn_data[icode].operand[2].mode;
16701 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16702 op0 = copy_to_mode_reg (mode0, op0);
16703 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16705 /* @@@ better error message */
16706 error ("selector must be an immediate");
16707 return gen_reg_rtx (tmode);
16709 if (target == 0
16710 || GET_MODE (target) != tmode
16711 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16712 target = gen_reg_rtx (tmode);
16713 pat = GEN_FCN (icode) (target, op0, op1);
16714 if (! pat)
16715 return 0;
16716 emit_insn (pat);
16717 return target;
16719 case ARM_BUILTIN_TINSRB:
16720 case ARM_BUILTIN_TINSRH:
16721 case ARM_BUILTIN_TINSRW:
16722 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16723 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16724 : CODE_FOR_iwmmxt_tinsrw);
16725 arg0 = CALL_EXPR_ARG (exp, 0);
16726 arg1 = CALL_EXPR_ARG (exp, 1);
16727 arg2 = CALL_EXPR_ARG (exp, 2);
16728 op0 = expand_normal (arg0);
16729 op1 = expand_normal (arg1);
16730 op2 = expand_normal (arg2);
16731 tmode = insn_data[icode].operand[0].mode;
16732 mode0 = insn_data[icode].operand[1].mode;
16733 mode1 = insn_data[icode].operand[2].mode;
16734 mode2 = insn_data[icode].operand[3].mode;
16736 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16737 op0 = copy_to_mode_reg (mode0, op0);
16738 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16739 op1 = copy_to_mode_reg (mode1, op1);
16740 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16742 /* @@@ better error message */
16743 error ("selector must be an immediate");
16744 return const0_rtx;
16746 if (target == 0
16747 || GET_MODE (target) != tmode
16748 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16749 target = gen_reg_rtx (tmode);
16750 pat = GEN_FCN (icode) (target, op0, op1, op2);
16751 if (! pat)
16752 return 0;
16753 emit_insn (pat);
16754 return target;
16756 case ARM_BUILTIN_SETWCX:
16757 arg0 = CALL_EXPR_ARG (exp, 0);
16758 arg1 = CALL_EXPR_ARG (exp, 1);
16759 op0 = force_reg (SImode, expand_normal (arg0));
16760 op1 = expand_normal (arg1);
16761 emit_insn (gen_iwmmxt_tmcr (op1, op0));
16762 return 0;
16764 case ARM_BUILTIN_GETWCX:
16765 arg0 = CALL_EXPR_ARG (exp, 0);
16766 op0 = expand_normal (arg0);
16767 target = gen_reg_rtx (SImode);
16768 emit_insn (gen_iwmmxt_tmrc (target, op0));
16769 return target;
16771 case ARM_BUILTIN_WSHUFH:
16772 icode = CODE_FOR_iwmmxt_wshufh;
16773 arg0 = CALL_EXPR_ARG (exp, 0);
16774 arg1 = CALL_EXPR_ARG (exp, 1);
16775 op0 = expand_normal (arg0);
16776 op1 = expand_normal (arg1);
16777 tmode = insn_data[icode].operand[0].mode;
16778 mode1 = insn_data[icode].operand[1].mode;
16779 mode2 = insn_data[icode].operand[2].mode;
16781 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16782 op0 = copy_to_mode_reg (mode1, op0);
16783 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16785 /* @@@ better error message */
16786 error ("mask must be an immediate");
16787 return const0_rtx;
16789 if (target == 0
16790 || GET_MODE (target) != tmode
16791 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16792 target = gen_reg_rtx (tmode);
16793 pat = GEN_FCN (icode) (target, op0, op1);
16794 if (! pat)
16795 return 0;
16796 emit_insn (pat);
16797 return target;
16799 case ARM_BUILTIN_WSADB:
16800 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
16801 case ARM_BUILTIN_WSADH:
16802 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
16803 case ARM_BUILTIN_WSADBZ:
16804 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
16805 case ARM_BUILTIN_WSADHZ:
16806 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
16808 /* Several three-argument builtins. */
16809 case ARM_BUILTIN_WMACS:
16810 case ARM_BUILTIN_WMACU:
16811 case ARM_BUILTIN_WALIGN:
16812 case ARM_BUILTIN_TMIA:
16813 case ARM_BUILTIN_TMIAPH:
16814 case ARM_BUILTIN_TMIATT:
16815 case ARM_BUILTIN_TMIATB:
16816 case ARM_BUILTIN_TMIABT:
16817 case ARM_BUILTIN_TMIABB:
16818 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16819 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16820 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16821 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16822 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16823 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16824 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16825 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16826 : CODE_FOR_iwmmxt_walign);
16827 arg0 = CALL_EXPR_ARG (exp, 0);
16828 arg1 = CALL_EXPR_ARG (exp, 1);
16829 arg2 = CALL_EXPR_ARG (exp, 2);
16830 op0 = expand_normal (arg0);
16831 op1 = expand_normal (arg1);
16832 op2 = expand_normal (arg2);
16833 tmode = insn_data[icode].operand[0].mode;
16834 mode0 = insn_data[icode].operand[1].mode;
16835 mode1 = insn_data[icode].operand[2].mode;
16836 mode2 = insn_data[icode].operand[3].mode;
16838 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16839 op0 = copy_to_mode_reg (mode0, op0);
16840 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16841 op1 = copy_to_mode_reg (mode1, op1);
16842 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16843 op2 = copy_to_mode_reg (mode2, op2);
16844 if (target == 0
16845 || GET_MODE (target) != tmode
16846 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16847 target = gen_reg_rtx (tmode);
16848 pat = GEN_FCN (icode) (target, op0, op1, op2);
16849 if (! pat)
16850 return 0;
16851 emit_insn (pat);
16852 return target;
16854 case ARM_BUILTIN_WZERO:
16855 target = gen_reg_rtx (DImode);
16856 emit_insn (gen_iwmmxt_clrdi (target));
16857 return target;
16859 case ARM_BUILTIN_THREAD_POINTER:
16860 return arm_load_tp (target);
16862 default:
16863 break;
16866 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16867 if (d->code == (const enum arm_builtins) fcode)
16868 return arm_expand_binop_builtin (d->icode, exp, target);
16870 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16871 if (d->code == (const enum arm_builtins) fcode)
16872 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16874 /* @@@ Should really do something sensible here. */
16875 return NULL_RTX;
16878 /* Return the number (counting from 0) of
16879 the least significant set bit in MASK. */
16881 inline static int
16882 number_of_first_bit_set (unsigned mask)
16884 int bit;
16886 for (bit = 0;
16887 (mask & (1 << bit)) == 0;
16888 ++bit)
16889 continue;
16891 return bit;
16894 /* Emit code to push or pop registers to or from the stack. F is the
16895 assembly file. MASK is the registers to push or pop. PUSH is
16896 nonzero if we should push, and zero if we should pop. For debugging
16897 output, if pushing, adjust CFA_OFFSET by the amount of space added
16898 to the stack. REAL_REGS should have the same number of bits set as
16899 MASK, and will be used instead (in the same order) to describe which
16900 registers were saved - this is used to mark the save slots when we
16901 push high registers after moving them to low registers. */
16902 static void
16903 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16904 unsigned long real_regs)
16906 int regno;
16907 int lo_mask = mask & 0xFF;
16908 int pushed_words = 0;
16910 gcc_assert (mask);
16912 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16914 /* Special case. Do not generate a POP PC statement here, do it in
16915 thumb_exit() */
16916 thumb_exit (f, -1);
16917 return;
16920 if (ARM_EABI_UNWIND_TABLES && push)
16922 fprintf (f, "\t.save\t{");
16923 for (regno = 0; regno < 15; regno++)
16925 if (real_regs & (1 << regno))
16927 if (real_regs & ((1 << regno) -1))
16928 fprintf (f, ", ");
16929 asm_fprintf (f, "%r", regno);
16932 fprintf (f, "}\n");
16935 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16937 /* Look at the low registers first. */
16938 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16940 if (lo_mask & 1)
16942 asm_fprintf (f, "%r", regno);
16944 if ((lo_mask & ~1) != 0)
16945 fprintf (f, ", ");
16947 pushed_words++;
16951 if (push && (mask & (1 << LR_REGNUM)))
16953 /* Catch pushing the LR. */
16954 if (mask & 0xFF)
16955 fprintf (f, ", ");
16957 asm_fprintf (f, "%r", LR_REGNUM);
16959 pushed_words++;
16961 else if (!push && (mask & (1 << PC_REGNUM)))
16963 /* Catch popping the PC. */
16964 if (TARGET_INTERWORK || TARGET_BACKTRACE
16965 || crtl->calls_eh_return)
16967 /* The PC is never poped directly, instead
16968 it is popped into r3 and then BX is used. */
16969 fprintf (f, "}\n");
16971 thumb_exit (f, -1);
16973 return;
16975 else
16977 if (mask & 0xFF)
16978 fprintf (f, ", ");
16980 asm_fprintf (f, "%r", PC_REGNUM);
16984 fprintf (f, "}\n");
16986 if (push && pushed_words && dwarf2out_do_frame ())
16988 char *l = dwarf2out_cfi_label (false);
16989 int pushed_mask = real_regs;
16991 *cfa_offset += pushed_words * 4;
16992 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16994 pushed_words = 0;
16995 pushed_mask = real_regs;
16996 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16998 if (pushed_mask & 1)
16999 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
17004 /* Generate code to return from a thumb function.
17005 If 'reg_containing_return_addr' is -1, then the return address is
17006 actually on the stack, at the stack pointer. */
17007 static void
17008 thumb_exit (FILE *f, int reg_containing_return_addr)
17010 unsigned regs_available_for_popping;
17011 unsigned regs_to_pop;
17012 int pops_needed;
17013 unsigned available;
17014 unsigned required;
17015 int mode;
17016 int size;
17017 int restore_a4 = FALSE;
17019 /* Compute the registers we need to pop. */
17020 regs_to_pop = 0;
17021 pops_needed = 0;
17023 if (reg_containing_return_addr == -1)
17025 regs_to_pop |= 1 << LR_REGNUM;
17026 ++pops_needed;
17029 if (TARGET_BACKTRACE)
17031 /* Restore the (ARM) frame pointer and stack pointer. */
17032 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
17033 pops_needed += 2;
17036 /* If there is nothing to pop then just emit the BX instruction and
17037 return. */
17038 if (pops_needed == 0)
17040 if (crtl->calls_eh_return)
17041 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17043 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17044 return;
17046 /* Otherwise if we are not supporting interworking and we have not created
17047 a backtrace structure and the function was not entered in ARM mode then
17048 just pop the return address straight into the PC. */
17049 else if (!TARGET_INTERWORK
17050 && !TARGET_BACKTRACE
17051 && !is_called_in_ARM_mode (current_function_decl)
17052 && !crtl->calls_eh_return)
17054 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
17055 return;
17058 /* Find out how many of the (return) argument registers we can corrupt. */
17059 regs_available_for_popping = 0;
17061 /* If returning via __builtin_eh_return, the bottom three registers
17062 all contain information needed for the return. */
17063 if (crtl->calls_eh_return)
17064 size = 12;
17065 else
17067 /* If we can deduce the registers used from the function's
17068 return value. This is more reliable that examining
17069 df_regs_ever_live_p () because that will be set if the register is
17070 ever used in the function, not just if the register is used
17071 to hold a return value. */
17073 if (crtl->return_rtx != 0)
17074 mode = GET_MODE (crtl->return_rtx);
17075 else
17076 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17078 size = GET_MODE_SIZE (mode);
17080 if (size == 0)
17082 /* In a void function we can use any argument register.
17083 In a function that returns a structure on the stack
17084 we can use the second and third argument registers. */
17085 if (mode == VOIDmode)
17086 regs_available_for_popping =
17087 (1 << ARG_REGISTER (1))
17088 | (1 << ARG_REGISTER (2))
17089 | (1 << ARG_REGISTER (3));
17090 else
17091 regs_available_for_popping =
17092 (1 << ARG_REGISTER (2))
17093 | (1 << ARG_REGISTER (3));
17095 else if (size <= 4)
17096 regs_available_for_popping =
17097 (1 << ARG_REGISTER (2))
17098 | (1 << ARG_REGISTER (3));
17099 else if (size <= 8)
17100 regs_available_for_popping =
17101 (1 << ARG_REGISTER (3));
17104 /* Match registers to be popped with registers into which we pop them. */
17105 for (available = regs_available_for_popping,
17106 required = regs_to_pop;
17107 required != 0 && available != 0;
17108 available &= ~(available & - available),
17109 required &= ~(required & - required))
17110 -- pops_needed;
17112 /* If we have any popping registers left over, remove them. */
17113 if (available > 0)
17114 regs_available_for_popping &= ~available;
17116 /* Otherwise if we need another popping register we can use
17117 the fourth argument register. */
17118 else if (pops_needed)
17120 /* If we have not found any free argument registers and
17121 reg a4 contains the return address, we must move it. */
17122 if (regs_available_for_popping == 0
17123 && reg_containing_return_addr == LAST_ARG_REGNUM)
17125 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17126 reg_containing_return_addr = LR_REGNUM;
17128 else if (size > 12)
17130 /* Register a4 is being used to hold part of the return value,
17131 but we have dire need of a free, low register. */
17132 restore_a4 = TRUE;
17134 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
17137 if (reg_containing_return_addr != LAST_ARG_REGNUM)
17139 /* The fourth argument register is available. */
17140 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
17142 --pops_needed;
17146 /* Pop as many registers as we can. */
17147 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17148 regs_available_for_popping);
17150 /* Process the registers we popped. */
17151 if (reg_containing_return_addr == -1)
17153 /* The return address was popped into the lowest numbered register. */
17154 regs_to_pop &= ~(1 << LR_REGNUM);
17156 reg_containing_return_addr =
17157 number_of_first_bit_set (regs_available_for_popping);
17159 /* Remove this register for the mask of available registers, so that
17160 the return address will not be corrupted by further pops. */
17161 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
17164 /* If we popped other registers then handle them here. */
17165 if (regs_available_for_popping)
17167 int frame_pointer;
17169 /* Work out which register currently contains the frame pointer. */
17170 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
17172 /* Move it into the correct place. */
17173 asm_fprintf (f, "\tmov\t%r, %r\n",
17174 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
17176 /* (Temporarily) remove it from the mask of popped registers. */
17177 regs_available_for_popping &= ~(1 << frame_pointer);
17178 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
17180 if (regs_available_for_popping)
17182 int stack_pointer;
17184 /* We popped the stack pointer as well,
17185 find the register that contains it. */
17186 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
17188 /* Move it into the stack register. */
17189 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
17191 /* At this point we have popped all necessary registers, so
17192 do not worry about restoring regs_available_for_popping
17193 to its correct value:
17195 assert (pops_needed == 0)
17196 assert (regs_available_for_popping == (1 << frame_pointer))
17197 assert (regs_to_pop == (1 << STACK_POINTER)) */
17199 else
17201 /* Since we have just move the popped value into the frame
17202 pointer, the popping register is available for reuse, and
17203 we know that we still have the stack pointer left to pop. */
17204 regs_available_for_popping |= (1 << frame_pointer);
17208 /* If we still have registers left on the stack, but we no longer have
17209 any registers into which we can pop them, then we must move the return
17210 address into the link register and make available the register that
17211 contained it. */
17212 if (regs_available_for_popping == 0 && pops_needed > 0)
17214 regs_available_for_popping |= 1 << reg_containing_return_addr;
17216 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
17217 reg_containing_return_addr);
17219 reg_containing_return_addr = LR_REGNUM;
17222 /* If we have registers left on the stack then pop some more.
17223 We know that at most we will want to pop FP and SP. */
17224 if (pops_needed > 0)
17226 int popped_into;
17227 int move_to;
17229 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17230 regs_available_for_popping);
17232 /* We have popped either FP or SP.
17233 Move whichever one it is into the correct register. */
17234 popped_into = number_of_first_bit_set (regs_available_for_popping);
17235 move_to = number_of_first_bit_set (regs_to_pop);
17237 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
17239 regs_to_pop &= ~(1 << move_to);
17241 --pops_needed;
17244 /* If we still have not popped everything then we must have only
17245 had one register available to us and we are now popping the SP. */
17246 if (pops_needed > 0)
17248 int popped_into;
17250 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17251 regs_available_for_popping);
17253 popped_into = number_of_first_bit_set (regs_available_for_popping);
17255 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
17257 assert (regs_to_pop == (1 << STACK_POINTER))
17258 assert (pops_needed == 1)
17262 /* If necessary restore the a4 register. */
17263 if (restore_a4)
17265 if (reg_containing_return_addr != LR_REGNUM)
17267 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17268 reg_containing_return_addr = LR_REGNUM;
17271 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
17274 if (crtl->calls_eh_return)
17275 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17277 /* Return to caller. */
17278 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17282 void
17283 thumb1_final_prescan_insn (rtx insn)
17285 if (flag_print_asm_name)
17286 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
17287 INSN_ADDRESSES (INSN_UID (insn)));
17291 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
17293 unsigned HOST_WIDE_INT mask = 0xff;
17294 int i;
17296 if (val == 0) /* XXX */
17297 return 0;
17299 for (i = 0; i < 25; i++)
17300 if ((val & (mask << i)) == val)
17301 return 1;
17303 return 0;
17306 /* Returns nonzero if the current function contains,
17307 or might contain a far jump. */
17308 static int
17309 thumb_far_jump_used_p (void)
17311 rtx insn;
17313 /* This test is only important for leaf functions. */
17314 /* assert (!leaf_function_p ()); */
17316 /* If we have already decided that far jumps may be used,
17317 do not bother checking again, and always return true even if
17318 it turns out that they are not being used. Once we have made
17319 the decision that far jumps are present (and that hence the link
17320 register will be pushed onto the stack) we cannot go back on it. */
17321 if (cfun->machine->far_jump_used)
17322 return 1;
17324 /* If this function is not being called from the prologue/epilogue
17325 generation code then it must be being called from the
17326 INITIAL_ELIMINATION_OFFSET macro. */
17327 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
17329 /* In this case we know that we are being asked about the elimination
17330 of the arg pointer register. If that register is not being used,
17331 then there are no arguments on the stack, and we do not have to
17332 worry that a far jump might force the prologue to push the link
17333 register, changing the stack offsets. In this case we can just
17334 return false, since the presence of far jumps in the function will
17335 not affect stack offsets.
17337 If the arg pointer is live (or if it was live, but has now been
17338 eliminated and so set to dead) then we do have to test to see if
17339 the function might contain a far jump. This test can lead to some
17340 false negatives, since before reload is completed, then length of
17341 branch instructions is not known, so gcc defaults to returning their
17342 longest length, which in turn sets the far jump attribute to true.
17344 A false negative will not result in bad code being generated, but it
17345 will result in a needless push and pop of the link register. We
17346 hope that this does not occur too often.
17348 If we need doubleword stack alignment this could affect the other
17349 elimination offsets so we can't risk getting it wrong. */
17350 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
17351 cfun->machine->arg_pointer_live = 1;
17352 else if (!cfun->machine->arg_pointer_live)
17353 return 0;
17356 /* Check to see if the function contains a branch
17357 insn with the far jump attribute set. */
17358 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17360 if (GET_CODE (insn) == JUMP_INSN
17361 /* Ignore tablejump patterns. */
17362 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17363 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
17364 && get_attr_far_jump (insn) == FAR_JUMP_YES
17367 /* Record the fact that we have decided that
17368 the function does use far jumps. */
17369 cfun->machine->far_jump_used = 1;
17370 return 1;
17374 return 0;
17377 /* Return nonzero if FUNC must be entered in ARM mode. */
17379 is_called_in_ARM_mode (tree func)
17381 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
17383 /* Ignore the problem about functions whose address is taken. */
17384 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
17385 return TRUE;
17387 #ifdef ARM_PE
17388 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
17389 #else
17390 return FALSE;
17391 #endif
17394 /* The bits which aren't usefully expanded as rtl. */
17395 const char *
17396 thumb_unexpanded_epilogue (void)
17398 arm_stack_offsets *offsets;
17399 int regno;
17400 unsigned long live_regs_mask = 0;
17401 int high_regs_pushed = 0;
17402 int had_to_push_lr;
17403 int size;
17405 if (cfun->machine->return_used_this_function != 0)
17406 return "";
17408 if (IS_NAKED (arm_current_func_type ()))
17409 return "";
17411 offsets = arm_get_frame_offsets ();
17412 live_regs_mask = offsets->saved_regs_mask;
17413 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17415 /* If we can deduce the registers used from the function's return value.
17416 This is more reliable that examining df_regs_ever_live_p () because that
17417 will be set if the register is ever used in the function, not just if
17418 the register is used to hold a return value. */
17419 size = arm_size_return_regs ();
17421 /* The prolog may have pushed some high registers to use as
17422 work registers. e.g. the testsuite file:
17423 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
17424 compiles to produce:
17425 push {r4, r5, r6, r7, lr}
17426 mov r7, r9
17427 mov r6, r8
17428 push {r6, r7}
17429 as part of the prolog. We have to undo that pushing here. */
17431 if (high_regs_pushed)
17433 unsigned long mask = live_regs_mask & 0xff;
17434 int next_hi_reg;
17436 /* The available low registers depend on the size of the value we are
17437 returning. */
17438 if (size <= 12)
17439 mask |= 1 << 3;
17440 if (size <= 8)
17441 mask |= 1 << 2;
17443 if (mask == 0)
17444 /* Oh dear! We have no low registers into which we can pop
17445 high registers! */
17446 internal_error
17447 ("no low registers available for popping high registers");
17449 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
17450 if (live_regs_mask & (1 << next_hi_reg))
17451 break;
17453 while (high_regs_pushed)
17455 /* Find lo register(s) into which the high register(s) can
17456 be popped. */
17457 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17459 if (mask & (1 << regno))
17460 high_regs_pushed--;
17461 if (high_regs_pushed == 0)
17462 break;
17465 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
17467 /* Pop the values into the low register(s). */
17468 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
17470 /* Move the value(s) into the high registers. */
17471 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17473 if (mask & (1 << regno))
17475 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
17476 regno);
17478 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
17479 if (live_regs_mask & (1 << next_hi_reg))
17480 break;
17484 live_regs_mask &= ~0x0f00;
17487 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
17488 live_regs_mask &= 0xff;
17490 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
17492 /* Pop the return address into the PC. */
17493 if (had_to_push_lr)
17494 live_regs_mask |= 1 << PC_REGNUM;
17496 /* Either no argument registers were pushed or a backtrace
17497 structure was created which includes an adjusted stack
17498 pointer, so just pop everything. */
17499 if (live_regs_mask)
17500 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17501 live_regs_mask);
17503 /* We have either just popped the return address into the
17504 PC or it is was kept in LR for the entire function. */
17505 if (!had_to_push_lr)
17506 thumb_exit (asm_out_file, LR_REGNUM);
17508 else
17510 /* Pop everything but the return address. */
17511 if (live_regs_mask)
17512 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17513 live_regs_mask);
17515 if (had_to_push_lr)
17517 if (size > 12)
17519 /* We have no free low regs, so save one. */
17520 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
17521 LAST_ARG_REGNUM);
17524 /* Get the return address into a temporary register. */
17525 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
17526 1 << LAST_ARG_REGNUM);
17528 if (size > 12)
17530 /* Move the return address to lr. */
17531 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
17532 LAST_ARG_REGNUM);
17533 /* Restore the low register. */
17534 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
17535 IP_REGNUM);
17536 regno = LR_REGNUM;
17538 else
17539 regno = LAST_ARG_REGNUM;
17541 else
17542 regno = LR_REGNUM;
17544 /* Remove the argument registers that were pushed onto the stack. */
17545 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
17546 SP_REGNUM, SP_REGNUM,
17547 crtl->args.pretend_args_size);
17549 thumb_exit (asm_out_file, regno);
17552 return "";
17555 /* Functions to save and restore machine-specific function data. */
17556 static struct machine_function *
17557 arm_init_machine_status (void)
17559 struct machine_function *machine;
17560 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
17562 #if ARM_FT_UNKNOWN != 0
17563 machine->func_type = ARM_FT_UNKNOWN;
17564 #endif
17565 return machine;
17568 /* Return an RTX indicating where the return address to the
17569 calling function can be found. */
17571 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
17573 if (count != 0)
17574 return NULL_RTX;
17576 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
17579 /* Do anything needed before RTL is emitted for each function. */
17580 void
17581 arm_init_expanders (void)
17583 /* Arrange to initialize and mark the machine per-function status. */
17584 init_machine_status = arm_init_machine_status;
17586 /* This is to stop the combine pass optimizing away the alignment
17587 adjustment of va_arg. */
17588 /* ??? It is claimed that this should not be necessary. */
17589 if (cfun)
17590 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
17594 /* Like arm_compute_initial_elimination offset. Simpler because there
17595 isn't an ABI specified frame pointer for Thumb. Instead, we set it
17596 to point at the base of the local variables after static stack
17597 space for a function has been allocated. */
17599 HOST_WIDE_INT
17600 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17602 arm_stack_offsets *offsets;
17604 offsets = arm_get_frame_offsets ();
17606 switch (from)
17608 case ARG_POINTER_REGNUM:
17609 switch (to)
17611 case STACK_POINTER_REGNUM:
17612 return offsets->outgoing_args - offsets->saved_args;
17614 case FRAME_POINTER_REGNUM:
17615 return offsets->soft_frame - offsets->saved_args;
17617 case ARM_HARD_FRAME_POINTER_REGNUM:
17618 return offsets->saved_regs - offsets->saved_args;
17620 case THUMB_HARD_FRAME_POINTER_REGNUM:
17621 return offsets->locals_base - offsets->saved_args;
17623 default:
17624 gcc_unreachable ();
17626 break;
17628 case FRAME_POINTER_REGNUM:
17629 switch (to)
17631 case STACK_POINTER_REGNUM:
17632 return offsets->outgoing_args - offsets->soft_frame;
17634 case ARM_HARD_FRAME_POINTER_REGNUM:
17635 return offsets->saved_regs - offsets->soft_frame;
17637 case THUMB_HARD_FRAME_POINTER_REGNUM:
17638 return offsets->locals_base - offsets->soft_frame;
17640 default:
17641 gcc_unreachable ();
17643 break;
17645 default:
17646 gcc_unreachable ();
17650 /* Generate the rest of a function's prologue. */
17651 void
17652 thumb1_expand_prologue (void)
17654 rtx insn, dwarf;
17656 HOST_WIDE_INT amount;
17657 arm_stack_offsets *offsets;
17658 unsigned long func_type;
17659 int regno;
17660 unsigned long live_regs_mask;
17662 func_type = arm_current_func_type ();
17664 /* Naked functions don't have prologues. */
17665 if (IS_NAKED (func_type))
17666 return;
17668 if (IS_INTERRUPT (func_type))
17670 error ("interrupt Service Routines cannot be coded in Thumb mode");
17671 return;
17674 offsets = arm_get_frame_offsets ();
17675 live_regs_mask = offsets->saved_regs_mask;
17676 /* Load the pic register before setting the frame pointer,
17677 so we can use r7 as a temporary work register. */
17678 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17679 arm_load_pic_register (live_regs_mask);
17681 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17682 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17683 stack_pointer_rtx);
17685 amount = offsets->outgoing_args - offsets->saved_regs;
17686 if (amount)
17688 if (amount < 512)
17690 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17691 GEN_INT (- amount)));
17692 RTX_FRAME_RELATED_P (insn) = 1;
17694 else
17696 rtx reg;
17698 /* The stack decrement is too big for an immediate value in a single
17699 insn. In theory we could issue multiple subtracts, but after
17700 three of them it becomes more space efficient to place the full
17701 value in the constant pool and load into a register. (Also the
17702 ARM debugger really likes to see only one stack decrement per
17703 function). So instead we look for a scratch register into which
17704 we can load the decrement, and then we subtract this from the
17705 stack pointer. Unfortunately on the thumb the only available
17706 scratch registers are the argument registers, and we cannot use
17707 these as they may hold arguments to the function. Instead we
17708 attempt to locate a call preserved register which is used by this
17709 function. If we can find one, then we know that it will have
17710 been pushed at the start of the prologue and so we can corrupt
17711 it now. */
17712 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17713 if (live_regs_mask & (1 << regno))
17714 break;
17716 gcc_assert(regno <= LAST_LO_REGNUM);
17718 reg = gen_rtx_REG (SImode, regno);
17720 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17722 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17723 stack_pointer_rtx, reg));
17724 RTX_FRAME_RELATED_P (insn) = 1;
17725 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17726 plus_constant (stack_pointer_rtx,
17727 -amount));
17728 RTX_FRAME_RELATED_P (dwarf) = 1;
17729 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17733 if (frame_pointer_needed)
17734 thumb_set_frame_pointer (offsets);
17736 /* If we are profiling, make sure no instructions are scheduled before
17737 the call to mcount. Similarly if the user has requested no
17738 scheduling in the prolog. Similarly if we want non-call exceptions
17739 using the EABI unwinder, to prevent faulting instructions from being
17740 swapped with a stack adjustment. */
17741 if (crtl->profile || !TARGET_SCHED_PROLOG
17742 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17743 emit_insn (gen_blockage ());
17745 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17746 if (live_regs_mask & 0xff)
17747 cfun->machine->lr_save_eliminated = 0;
17751 void
17752 thumb1_expand_epilogue (void)
17754 HOST_WIDE_INT amount;
17755 arm_stack_offsets *offsets;
17756 int regno;
17758 /* Naked functions don't have prologues. */
17759 if (IS_NAKED (arm_current_func_type ()))
17760 return;
17762 offsets = arm_get_frame_offsets ();
17763 amount = offsets->outgoing_args - offsets->saved_regs;
17765 if (frame_pointer_needed)
17767 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17768 amount = offsets->locals_base - offsets->saved_regs;
17771 gcc_assert (amount >= 0);
17772 if (amount)
17774 if (amount < 512)
17775 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17776 GEN_INT (amount)));
17777 else
17779 /* r3 is always free in the epilogue. */
17780 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17782 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17783 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17787 /* Emit a USE (stack_pointer_rtx), so that
17788 the stack adjustment will not be deleted. */
17789 emit_insn (gen_prologue_use (stack_pointer_rtx));
17791 if (crtl->profile || !TARGET_SCHED_PROLOG)
17792 emit_insn (gen_blockage ());
17794 /* Emit a clobber for each insn that will be restored in the epilogue,
17795 so that flow2 will get register lifetimes correct. */
17796 for (regno = 0; regno < 13; regno++)
17797 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17798 emit_clobber (gen_rtx_REG (SImode, regno));
17800 if (! df_regs_ever_live_p (LR_REGNUM))
17801 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
17804 static void
17805 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17807 arm_stack_offsets *offsets;
17808 unsigned long live_regs_mask = 0;
17809 unsigned long l_mask;
17810 unsigned high_regs_pushed = 0;
17811 int cfa_offset = 0;
17812 int regno;
17814 if (IS_NAKED (arm_current_func_type ()))
17815 return;
17817 if (is_called_in_ARM_mode (current_function_decl))
17819 const char * name;
17821 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17822 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17823 == SYMBOL_REF);
17824 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17826 /* Generate code sequence to switch us into Thumb mode. */
17827 /* The .code 32 directive has already been emitted by
17828 ASM_DECLARE_FUNCTION_NAME. */
17829 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17830 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17832 /* Generate a label, so that the debugger will notice the
17833 change in instruction sets. This label is also used by
17834 the assembler to bypass the ARM code when this function
17835 is called from a Thumb encoded function elsewhere in the
17836 same file. Hence the definition of STUB_NAME here must
17837 agree with the definition in gas/config/tc-arm.c. */
17839 #define STUB_NAME ".real_start_of"
17841 fprintf (f, "\t.code\t16\n");
17842 #ifdef ARM_PE
17843 if (arm_dllexport_name_p (name))
17844 name = arm_strip_name_encoding (name);
17845 #endif
17846 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17847 fprintf (f, "\t.thumb_func\n");
17848 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17851 if (crtl->args.pretend_args_size)
17853 /* Output unwind directive for the stack adjustment. */
17854 if (ARM_EABI_UNWIND_TABLES)
17855 fprintf (f, "\t.pad #%d\n",
17856 crtl->args.pretend_args_size);
17858 if (cfun->machine->uses_anonymous_args)
17860 int num_pushes;
17862 fprintf (f, "\tpush\t{");
17864 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
17866 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17867 regno <= LAST_ARG_REGNUM;
17868 regno++)
17869 asm_fprintf (f, "%r%s", regno,
17870 regno == LAST_ARG_REGNUM ? "" : ", ");
17872 fprintf (f, "}\n");
17874 else
17875 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17876 SP_REGNUM, SP_REGNUM,
17877 crtl->args.pretend_args_size);
17879 /* We don't need to record the stores for unwinding (would it
17880 help the debugger any if we did?), but record the change in
17881 the stack pointer. */
17882 if (dwarf2out_do_frame ())
17884 char *l = dwarf2out_cfi_label (false);
17886 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
17887 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17891 /* Get the registers we are going to push. */
17892 offsets = arm_get_frame_offsets ();
17893 live_regs_mask = offsets->saved_regs_mask;
17894 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17895 l_mask = live_regs_mask & 0x40ff;
17896 /* Then count how many other high registers will need to be pushed. */
17897 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17899 if (TARGET_BACKTRACE)
17901 unsigned offset;
17902 unsigned work_register;
17904 /* We have been asked to create a stack backtrace structure.
17905 The code looks like this:
17907 0 .align 2
17908 0 func:
17909 0 sub SP, #16 Reserve space for 4 registers.
17910 2 push {R7} Push low registers.
17911 4 add R7, SP, #20 Get the stack pointer before the push.
17912 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17913 8 mov R7, PC Get hold of the start of this code plus 12.
17914 10 str R7, [SP, #16] Store it.
17915 12 mov R7, FP Get hold of the current frame pointer.
17916 14 str R7, [SP, #4] Store it.
17917 16 mov R7, LR Get hold of the current return address.
17918 18 str R7, [SP, #12] Store it.
17919 20 add R7, SP, #16 Point at the start of the backtrace structure.
17920 22 mov FP, R7 Put this value into the frame pointer. */
17922 work_register = thumb_find_work_register (live_regs_mask);
17924 if (ARM_EABI_UNWIND_TABLES)
17925 asm_fprintf (f, "\t.pad #16\n");
17927 asm_fprintf
17928 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17929 SP_REGNUM, SP_REGNUM);
17931 if (dwarf2out_do_frame ())
17933 char *l = dwarf2out_cfi_label (false);
17935 cfa_offset = cfa_offset + 16;
17936 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17939 if (l_mask)
17941 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17942 offset = bit_count (l_mask) * UNITS_PER_WORD;
17944 else
17945 offset = 0;
17947 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17948 offset + 16 + crtl->args.pretend_args_size);
17950 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17951 offset + 4);
17953 /* Make sure that the instruction fetching the PC is in the right place
17954 to calculate "start of backtrace creation code + 12". */
17955 if (l_mask)
17957 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17958 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17959 offset + 12);
17960 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17961 ARM_HARD_FRAME_POINTER_REGNUM);
17962 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17963 offset);
17965 else
17967 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17968 ARM_HARD_FRAME_POINTER_REGNUM);
17969 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17970 offset);
17971 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17972 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17973 offset + 12);
17976 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17977 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17978 offset + 8);
17979 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17980 offset + 12);
17981 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17982 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17984 /* Optimization: If we are not pushing any low registers but we are going
17985 to push some high registers then delay our first push. This will just
17986 be a push of LR and we can combine it with the push of the first high
17987 register. */
17988 else if ((l_mask & 0xff) != 0
17989 || (high_regs_pushed == 0 && l_mask))
17990 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17992 if (high_regs_pushed)
17994 unsigned pushable_regs;
17995 unsigned next_hi_reg;
17997 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17998 if (live_regs_mask & (1 << next_hi_reg))
17999 break;
18001 pushable_regs = l_mask & 0xff;
18003 if (pushable_regs == 0)
18004 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
18006 while (high_regs_pushed > 0)
18008 unsigned long real_regs_mask = 0;
18010 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
18012 if (pushable_regs & (1 << regno))
18014 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
18016 high_regs_pushed --;
18017 real_regs_mask |= (1 << next_hi_reg);
18019 if (high_regs_pushed)
18021 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
18022 next_hi_reg --)
18023 if (live_regs_mask & (1 << next_hi_reg))
18024 break;
18026 else
18028 pushable_regs &= ~((1 << regno) - 1);
18029 break;
18034 /* If we had to find a work register and we have not yet
18035 saved the LR then add it to the list of regs to push. */
18036 if (l_mask == (1 << LR_REGNUM))
18038 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
18039 1, &cfa_offset,
18040 real_regs_mask | (1 << LR_REGNUM));
18041 l_mask = 0;
18043 else
18044 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
18049 /* Handle the case of a double word load into a low register from
18050 a computed memory address. The computed address may involve a
18051 register which is overwritten by the load. */
18052 const char *
18053 thumb_load_double_from_address (rtx *operands)
18055 rtx addr;
18056 rtx base;
18057 rtx offset;
18058 rtx arg1;
18059 rtx arg2;
18061 gcc_assert (GET_CODE (operands[0]) == REG);
18062 gcc_assert (GET_CODE (operands[1]) == MEM);
18064 /* Get the memory address. */
18065 addr = XEXP (operands[1], 0);
18067 /* Work out how the memory address is computed. */
18068 switch (GET_CODE (addr))
18070 case REG:
18071 operands[2] = adjust_address (operands[1], SImode, 4);
18073 if (REGNO (operands[0]) == REGNO (addr))
18075 output_asm_insn ("ldr\t%H0, %2", operands);
18076 output_asm_insn ("ldr\t%0, %1", operands);
18078 else
18080 output_asm_insn ("ldr\t%0, %1", operands);
18081 output_asm_insn ("ldr\t%H0, %2", operands);
18083 break;
18085 case CONST:
18086 /* Compute <address> + 4 for the high order load. */
18087 operands[2] = adjust_address (operands[1], SImode, 4);
18089 output_asm_insn ("ldr\t%0, %1", operands);
18090 output_asm_insn ("ldr\t%H0, %2", operands);
18091 break;
18093 case PLUS:
18094 arg1 = XEXP (addr, 0);
18095 arg2 = XEXP (addr, 1);
18097 if (CONSTANT_P (arg1))
18098 base = arg2, offset = arg1;
18099 else
18100 base = arg1, offset = arg2;
18102 gcc_assert (GET_CODE (base) == REG);
18104 /* Catch the case of <address> = <reg> + <reg> */
18105 if (GET_CODE (offset) == REG)
18107 int reg_offset = REGNO (offset);
18108 int reg_base = REGNO (base);
18109 int reg_dest = REGNO (operands[0]);
18111 /* Add the base and offset registers together into the
18112 higher destination register. */
18113 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
18114 reg_dest + 1, reg_base, reg_offset);
18116 /* Load the lower destination register from the address in
18117 the higher destination register. */
18118 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
18119 reg_dest, reg_dest + 1);
18121 /* Load the higher destination register from its own address
18122 plus 4. */
18123 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
18124 reg_dest + 1, reg_dest + 1);
18126 else
18128 /* Compute <address> + 4 for the high order load. */
18129 operands[2] = adjust_address (operands[1], SImode, 4);
18131 /* If the computed address is held in the low order register
18132 then load the high order register first, otherwise always
18133 load the low order register first. */
18134 if (REGNO (operands[0]) == REGNO (base))
18136 output_asm_insn ("ldr\t%H0, %2", operands);
18137 output_asm_insn ("ldr\t%0, %1", operands);
18139 else
18141 output_asm_insn ("ldr\t%0, %1", operands);
18142 output_asm_insn ("ldr\t%H0, %2", operands);
18145 break;
18147 case LABEL_REF:
18148 /* With no registers to worry about we can just load the value
18149 directly. */
18150 operands[2] = adjust_address (operands[1], SImode, 4);
18152 output_asm_insn ("ldr\t%H0, %2", operands);
18153 output_asm_insn ("ldr\t%0, %1", operands);
18154 break;
18156 default:
18157 gcc_unreachable ();
18160 return "";
18163 const char *
18164 thumb_output_move_mem_multiple (int n, rtx *operands)
18166 rtx tmp;
18168 switch (n)
18170 case 2:
18171 if (REGNO (operands[4]) > REGNO (operands[5]))
18173 tmp = operands[4];
18174 operands[4] = operands[5];
18175 operands[5] = tmp;
18177 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
18178 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
18179 break;
18181 case 3:
18182 if (REGNO (operands[4]) > REGNO (operands[5]))
18184 tmp = operands[4];
18185 operands[4] = operands[5];
18186 operands[5] = tmp;
18188 if (REGNO (operands[5]) > REGNO (operands[6]))
18190 tmp = operands[5];
18191 operands[5] = operands[6];
18192 operands[6] = tmp;
18194 if (REGNO (operands[4]) > REGNO (operands[5]))
18196 tmp = operands[4];
18197 operands[4] = operands[5];
18198 operands[5] = tmp;
18201 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
18202 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
18203 break;
18205 default:
18206 gcc_unreachable ();
18209 return "";
18212 /* Output a call-via instruction for thumb state. */
18213 const char *
18214 thumb_call_via_reg (rtx reg)
18216 int regno = REGNO (reg);
18217 rtx *labelp;
18219 gcc_assert (regno < LR_REGNUM);
18221 /* If we are in the normal text section we can use a single instance
18222 per compilation unit. If we are doing function sections, then we need
18223 an entry per section, since we can't rely on reachability. */
18224 if (in_section == text_section)
18226 thumb_call_reg_needed = 1;
18228 if (thumb_call_via_label[regno] == NULL)
18229 thumb_call_via_label[regno] = gen_label_rtx ();
18230 labelp = thumb_call_via_label + regno;
18232 else
18234 if (cfun->machine->call_via[regno] == NULL)
18235 cfun->machine->call_via[regno] = gen_label_rtx ();
18236 labelp = cfun->machine->call_via + regno;
18239 output_asm_insn ("bl\t%a0", labelp);
18240 return "";
18243 /* Routines for generating rtl. */
18244 void
18245 thumb_expand_movmemqi (rtx *operands)
18247 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
18248 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
18249 HOST_WIDE_INT len = INTVAL (operands[2]);
18250 HOST_WIDE_INT offset = 0;
18252 while (len >= 12)
18254 emit_insn (gen_movmem12b (out, in, out, in));
18255 len -= 12;
18258 if (len >= 8)
18260 emit_insn (gen_movmem8b (out, in, out, in));
18261 len -= 8;
18264 if (len >= 4)
18266 rtx reg = gen_reg_rtx (SImode);
18267 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
18268 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
18269 len -= 4;
18270 offset += 4;
18273 if (len >= 2)
18275 rtx reg = gen_reg_rtx (HImode);
18276 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
18277 plus_constant (in, offset))));
18278 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
18279 reg));
18280 len -= 2;
18281 offset += 2;
18284 if (len)
18286 rtx reg = gen_reg_rtx (QImode);
18287 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
18288 plus_constant (in, offset))));
18289 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
18290 reg));
18294 void
18295 thumb_reload_out_hi (rtx *operands)
18297 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
18300 /* Handle reading a half-word from memory during reload. */
18301 void
18302 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
18304 gcc_unreachable ();
18307 /* Return the length of a function name prefix
18308 that starts with the character 'c'. */
18309 static int
18310 arm_get_strip_length (int c)
18312 switch (c)
18314 ARM_NAME_ENCODING_LENGTHS
18315 default: return 0;
18319 /* Return a pointer to a function's name with any
18320 and all prefix encodings stripped from it. */
18321 const char *
18322 arm_strip_name_encoding (const char *name)
18324 int skip;
18326 while ((skip = arm_get_strip_length (* name)))
18327 name += skip;
18329 return name;
18332 /* If there is a '*' anywhere in the name's prefix, then
18333 emit the stripped name verbatim, otherwise prepend an
18334 underscore if leading underscores are being used. */
18335 void
18336 arm_asm_output_labelref (FILE *stream, const char *name)
18338 int skip;
18339 int verbatim = 0;
18341 while ((skip = arm_get_strip_length (* name)))
18343 verbatim |= (*name == '*');
18344 name += skip;
18347 if (verbatim)
18348 fputs (name, stream);
18349 else
18350 asm_fprintf (stream, "%U%s", name);
18353 static void
18354 arm_file_start (void)
18356 int val;
18358 if (TARGET_UNIFIED_ASM)
18359 asm_fprintf (asm_out_file, "\t.syntax unified\n");
18361 if (TARGET_BPABI)
18363 const char *fpu_name;
18364 if (arm_select[0].string)
18365 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
18366 else if (arm_select[1].string)
18367 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
18368 else
18369 asm_fprintf (asm_out_file, "\t.cpu %s\n",
18370 all_cores[arm_default_cpu].name);
18372 if (TARGET_SOFT_FLOAT)
18374 if (TARGET_VFP)
18375 fpu_name = "softvfp";
18376 else
18377 fpu_name = "softfpa";
18379 else
18381 int set_float_abi_attributes = 0;
18382 switch (arm_fpu_arch)
18384 case FPUTYPE_FPA:
18385 fpu_name = "fpa";
18386 break;
18387 case FPUTYPE_FPA_EMU2:
18388 fpu_name = "fpe2";
18389 break;
18390 case FPUTYPE_FPA_EMU3:
18391 fpu_name = "fpe3";
18392 break;
18393 case FPUTYPE_MAVERICK:
18394 fpu_name = "maverick";
18395 break;
18396 case FPUTYPE_VFP:
18397 fpu_name = "vfp";
18398 set_float_abi_attributes = 1;
18399 break;
18400 case FPUTYPE_VFP3D16:
18401 fpu_name = "vfpv3-d16";
18402 set_float_abi_attributes = 1;
18403 break;
18404 case FPUTYPE_VFP3:
18405 fpu_name = "vfpv3";
18406 set_float_abi_attributes = 1;
18407 break;
18408 case FPUTYPE_NEON:
18409 fpu_name = "neon";
18410 set_float_abi_attributes = 1;
18411 break;
18412 default:
18413 abort();
18415 if (set_float_abi_attributes)
18417 if (TARGET_HARD_FLOAT)
18418 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
18419 if (TARGET_HARD_FLOAT_ABI)
18420 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
18423 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
18425 /* Some of these attributes only apply when the corresponding features
18426 are used. However we don't have any easy way of figuring this out.
18427 Conservatively record the setting that would have been used. */
18429 /* Tag_ABI_FP_rounding. */
18430 if (flag_rounding_math)
18431 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
18432 if (!flag_unsafe_math_optimizations)
18434 /* Tag_ABI_FP_denomal. */
18435 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
18436 /* Tag_ABI_FP_exceptions. */
18437 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
18439 /* Tag_ABI_FP_user_exceptions. */
18440 if (flag_signaling_nans)
18441 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
18442 /* Tag_ABI_FP_number_model. */
18443 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
18444 flag_finite_math_only ? 1 : 3);
18446 /* Tag_ABI_align8_needed. */
18447 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
18448 /* Tag_ABI_align8_preserved. */
18449 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
18450 /* Tag_ABI_enum_size. */
18451 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
18452 flag_short_enums ? 1 : 2);
18454 /* Tag_ABI_optimization_goals. */
18455 if (optimize_size)
18456 val = 4;
18457 else if (optimize >= 2)
18458 val = 2;
18459 else if (optimize)
18460 val = 1;
18461 else
18462 val = 6;
18463 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
18465 if (arm_lang_output_object_attributes_hook)
18466 arm_lang_output_object_attributes_hook();
18468 default_file_start();
18471 static void
18472 arm_file_end (void)
18474 int regno;
18476 if (NEED_INDICATE_EXEC_STACK)
18477 /* Add .note.GNU-stack. */
18478 file_end_indicate_exec_stack ();
18480 if (! thumb_call_reg_needed)
18481 return;
18483 switch_to_section (text_section);
18484 asm_fprintf (asm_out_file, "\t.code 16\n");
18485 ASM_OUTPUT_ALIGN (asm_out_file, 1);
18487 for (regno = 0; regno < LR_REGNUM; regno++)
18489 rtx label = thumb_call_via_label[regno];
18491 if (label != 0)
18493 targetm.asm_out.internal_label (asm_out_file, "L",
18494 CODE_LABEL_NUMBER (label));
18495 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18500 #ifndef ARM_PE
18501 /* Symbols in the text segment can be accessed without indirecting via the
18502 constant pool; it may take an extra binary operation, but this is still
18503 faster than indirecting via memory. Don't do this when not optimizing,
18504 since we won't be calculating al of the offsets necessary to do this
18505 simplification. */
18507 static void
18508 arm_encode_section_info (tree decl, rtx rtl, int first)
18510 if (optimize > 0 && TREE_CONSTANT (decl))
18511 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
18513 default_encode_section_info (decl, rtl, first);
18515 #endif /* !ARM_PE */
18517 static void
18518 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
18520 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
18521 && !strcmp (prefix, "L"))
18523 arm_ccfsm_state = 0;
18524 arm_target_insn = NULL;
18526 default_internal_label (stream, prefix, labelno);
18529 /* Output code to add DELTA to the first argument, and then jump
18530 to FUNCTION. Used for C++ multiple inheritance. */
18531 static void
18532 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
18533 HOST_WIDE_INT delta,
18534 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
18535 tree function)
18537 static int thunk_label = 0;
18538 char label[256];
18539 char labelpc[256];
18540 int mi_delta = delta;
18541 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
18542 int shift = 0;
18543 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
18544 ? 1 : 0);
18545 if (mi_delta < 0)
18546 mi_delta = - mi_delta;
18548 if (TARGET_THUMB1)
18550 int labelno = thunk_label++;
18551 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
18552 /* Thunks are entered in arm mode when avaiable. */
18553 if (TARGET_THUMB1_ONLY)
18555 /* push r3 so we can use it as a temporary. */
18556 /* TODO: Omit this save if r3 is not used. */
18557 fputs ("\tpush {r3}\n", file);
18558 fputs ("\tldr\tr3, ", file);
18560 else
18562 fputs ("\tldr\tr12, ", file);
18564 assemble_name (file, label);
18565 fputc ('\n', file);
18566 if (flag_pic)
18568 /* If we are generating PIC, the ldr instruction below loads
18569 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
18570 the address of the add + 8, so we have:
18572 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
18573 = target + 1.
18575 Note that we have "+ 1" because some versions of GNU ld
18576 don't set the low bit of the result for R_ARM_REL32
18577 relocations against thumb function symbols.
18578 On ARMv6M this is +4, not +8. */
18579 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
18580 assemble_name (file, labelpc);
18581 fputs (":\n", file);
18582 if (TARGET_THUMB1_ONLY)
18584 /* This is 2 insns after the start of the thunk, so we know it
18585 is 4-byte aligned. */
18586 fputs ("\tadd\tr3, pc, r3\n", file);
18587 fputs ("\tmov r12, r3\n", file);
18589 else
18590 fputs ("\tadd\tr12, pc, r12\n", file);
18592 else if (TARGET_THUMB1_ONLY)
18593 fputs ("\tmov r12, r3\n", file);
18595 if (TARGET_THUMB1_ONLY)
18597 if (mi_delta > 255)
18599 fputs ("\tldr\tr3, ", file);
18600 assemble_name (file, label);
18601 fputs ("+4\n", file);
18602 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
18603 mi_op, this_regno, this_regno);
18605 else if (mi_delta != 0)
18607 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18608 mi_op, this_regno, this_regno,
18609 mi_delta);
18612 else
18614 /* TODO: Use movw/movt for large constants when available. */
18615 while (mi_delta != 0)
18617 if ((mi_delta & (3 << shift)) == 0)
18618 shift += 2;
18619 else
18621 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18622 mi_op, this_regno, this_regno,
18623 mi_delta & (0xff << shift));
18624 mi_delta &= ~(0xff << shift);
18625 shift += 8;
18629 if (TARGET_THUMB1)
18631 if (TARGET_THUMB1_ONLY)
18632 fputs ("\tpop\t{r3}\n", file);
18634 fprintf (file, "\tbx\tr12\n");
18635 ASM_OUTPUT_ALIGN (file, 2);
18636 assemble_name (file, label);
18637 fputs (":\n", file);
18638 if (flag_pic)
18640 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
18641 rtx tem = XEXP (DECL_RTL (function), 0);
18642 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
18643 tem = gen_rtx_MINUS (GET_MODE (tem),
18644 tem,
18645 gen_rtx_SYMBOL_REF (Pmode,
18646 ggc_strdup (labelpc)));
18647 assemble_integer (tem, 4, BITS_PER_WORD, 1);
18649 else
18650 /* Output ".word .LTHUNKn". */
18651 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
18653 if (TARGET_THUMB1_ONLY && mi_delta > 255)
18654 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
18656 else
18658 fputs ("\tb\t", file);
18659 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
18660 if (NEED_PLT_RELOC)
18661 fputs ("(PLT)", file);
18662 fputc ('\n', file);
18667 arm_emit_vector_const (FILE *file, rtx x)
18669 int i;
18670 const char * pattern;
18672 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18674 switch (GET_MODE (x))
18676 case V2SImode: pattern = "%08x"; break;
18677 case V4HImode: pattern = "%04x"; break;
18678 case V8QImode: pattern = "%02x"; break;
18679 default: gcc_unreachable ();
18682 fprintf (file, "0x");
18683 for (i = CONST_VECTOR_NUNITS (x); i--;)
18685 rtx element;
18687 element = CONST_VECTOR_ELT (x, i);
18688 fprintf (file, pattern, INTVAL (element));
18691 return 1;
18694 const char *
18695 arm_output_load_gr (rtx *operands)
18697 rtx reg;
18698 rtx offset;
18699 rtx wcgr;
18700 rtx sum;
18702 if (GET_CODE (operands [1]) != MEM
18703 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18704 || GET_CODE (reg = XEXP (sum, 0)) != REG
18705 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18706 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18707 return "wldrw%?\t%0, %1";
18709 /* Fix up an out-of-range load of a GR register. */
18710 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18711 wcgr = operands[0];
18712 operands[0] = reg;
18713 output_asm_insn ("ldr%?\t%0, %1", operands);
18715 operands[0] = wcgr;
18716 operands[1] = reg;
18717 output_asm_insn ("tmcr%?\t%0, %1", operands);
18718 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18720 return "";
18723 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18725 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18726 named arg and all anonymous args onto the stack.
18727 XXX I know the prologue shouldn't be pushing registers, but it is faster
18728 that way. */
18730 static void
18731 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18732 enum machine_mode mode,
18733 tree type,
18734 int *pretend_size,
18735 int second_time ATTRIBUTE_UNUSED)
18737 int nregs = cum->nregs;
18738 if (nregs & 1
18739 && ARM_DOUBLEWORD_ALIGN
18740 && arm_needs_doubleword_align (mode, type))
18741 nregs++;
18743 cfun->machine->uses_anonymous_args = 1;
18744 if (nregs < NUM_ARG_REGS)
18745 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18748 /* Return nonzero if the CONSUMER instruction (a store) does not need
18749 PRODUCER's value to calculate the address. */
18752 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18754 rtx value = PATTERN (producer);
18755 rtx addr = PATTERN (consumer);
18757 if (GET_CODE (value) == COND_EXEC)
18758 value = COND_EXEC_CODE (value);
18759 if (GET_CODE (value) == PARALLEL)
18760 value = XVECEXP (value, 0, 0);
18761 value = XEXP (value, 0);
18762 if (GET_CODE (addr) == COND_EXEC)
18763 addr = COND_EXEC_CODE (addr);
18764 if (GET_CODE (addr) == PARALLEL)
18765 addr = XVECEXP (addr, 0, 0);
18766 addr = XEXP (addr, 0);
18768 return !reg_overlap_mentioned_p (value, addr);
18771 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18772 have an early register shift value or amount dependency on the
18773 result of PRODUCER. */
18776 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18778 rtx value = PATTERN (producer);
18779 rtx op = PATTERN (consumer);
18780 rtx early_op;
18782 if (GET_CODE (value) == COND_EXEC)
18783 value = COND_EXEC_CODE (value);
18784 if (GET_CODE (value) == PARALLEL)
18785 value = XVECEXP (value, 0, 0);
18786 value = XEXP (value, 0);
18787 if (GET_CODE (op) == COND_EXEC)
18788 op = COND_EXEC_CODE (op);
18789 if (GET_CODE (op) == PARALLEL)
18790 op = XVECEXP (op, 0, 0);
18791 op = XEXP (op, 1);
18793 early_op = XEXP (op, 0);
18794 /* This is either an actual independent shift, or a shift applied to
18795 the first operand of another operation. We want the whole shift
18796 operation. */
18797 if (GET_CODE (early_op) == REG)
18798 early_op = op;
18800 return !reg_overlap_mentioned_p (value, early_op);
18803 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18804 have an early register shift value dependency on the result of
18805 PRODUCER. */
18808 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18810 rtx value = PATTERN (producer);
18811 rtx op = PATTERN (consumer);
18812 rtx early_op;
18814 if (GET_CODE (value) == COND_EXEC)
18815 value = COND_EXEC_CODE (value);
18816 if (GET_CODE (value) == PARALLEL)
18817 value = XVECEXP (value, 0, 0);
18818 value = XEXP (value, 0);
18819 if (GET_CODE (op) == COND_EXEC)
18820 op = COND_EXEC_CODE (op);
18821 if (GET_CODE (op) == PARALLEL)
18822 op = XVECEXP (op, 0, 0);
18823 op = XEXP (op, 1);
18825 early_op = XEXP (op, 0);
18827 /* This is either an actual independent shift, or a shift applied to
18828 the first operand of another operation. We want the value being
18829 shifted, in either case. */
18830 if (GET_CODE (early_op) != REG)
18831 early_op = XEXP (early_op, 0);
18833 return !reg_overlap_mentioned_p (value, early_op);
18836 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18837 have an early register mult dependency on the result of
18838 PRODUCER. */
18841 arm_no_early_mul_dep (rtx producer, rtx consumer)
18843 rtx value = PATTERN (producer);
18844 rtx op = PATTERN (consumer);
18846 if (GET_CODE (value) == COND_EXEC)
18847 value = COND_EXEC_CODE (value);
18848 if (GET_CODE (value) == PARALLEL)
18849 value = XVECEXP (value, 0, 0);
18850 value = XEXP (value, 0);
18851 if (GET_CODE (op) == COND_EXEC)
18852 op = COND_EXEC_CODE (op);
18853 if (GET_CODE (op) == PARALLEL)
18854 op = XVECEXP (op, 0, 0);
18855 op = XEXP (op, 1);
18857 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
18859 if (GET_CODE (XEXP (op, 0)) == MULT)
18860 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
18861 else
18862 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
18865 return 0;
18868 /* We can't rely on the caller doing the proper promotion when
18869 using APCS or ATPCS. */
18871 static bool
18872 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18874 return !TARGET_AAPCS_BASED;
18878 /* AAPCS based ABIs use short enums by default. */
18880 static bool
18881 arm_default_short_enums (void)
18883 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18887 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18889 static bool
18890 arm_align_anon_bitfield (void)
18892 return TARGET_AAPCS_BASED;
18896 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18898 static tree
18899 arm_cxx_guard_type (void)
18901 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18904 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18905 has an accumulator dependency on the result of the producer (a
18906 multiplication instruction) and no other dependency on that result. */
18908 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18910 rtx mul = PATTERN (producer);
18911 rtx mac = PATTERN (consumer);
18912 rtx mul_result;
18913 rtx mac_op0, mac_op1, mac_acc;
18915 if (GET_CODE (mul) == COND_EXEC)
18916 mul = COND_EXEC_CODE (mul);
18917 if (GET_CODE (mac) == COND_EXEC)
18918 mac = COND_EXEC_CODE (mac);
18920 /* Check that mul is of the form (set (...) (mult ...))
18921 and mla is of the form (set (...) (plus (mult ...) (...))). */
18922 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18923 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18924 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18925 return 0;
18927 mul_result = XEXP (mul, 0);
18928 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18929 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18930 mac_acc = XEXP (XEXP (mac, 1), 1);
18932 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18933 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18934 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18938 /* The EABI says test the least significant bit of a guard variable. */
18940 static bool
18941 arm_cxx_guard_mask_bit (void)
18943 return TARGET_AAPCS_BASED;
18947 /* The EABI specifies that all array cookies are 8 bytes long. */
18949 static tree
18950 arm_get_cookie_size (tree type)
18952 tree size;
18954 if (!TARGET_AAPCS_BASED)
18955 return default_cxx_get_cookie_size (type);
18957 size = build_int_cst (sizetype, 8);
18958 return size;
18962 /* The EABI says that array cookies should also contain the element size. */
18964 static bool
18965 arm_cookie_has_size (void)
18967 return TARGET_AAPCS_BASED;
18971 /* The EABI says constructors and destructors should return a pointer to
18972 the object constructed/destroyed. */
18974 static bool
18975 arm_cxx_cdtor_returns_this (void)
18977 return TARGET_AAPCS_BASED;
18980 /* The EABI says that an inline function may never be the key
18981 method. */
18983 static bool
18984 arm_cxx_key_method_may_be_inline (void)
18986 return !TARGET_AAPCS_BASED;
18989 static void
18990 arm_cxx_determine_class_data_visibility (tree decl)
18992 if (!TARGET_AAPCS_BASED
18993 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
18994 return;
18996 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18997 is exported. However, on systems without dynamic vague linkage,
18998 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18999 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
19000 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
19001 else
19002 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
19003 DECL_VISIBILITY_SPECIFIED (decl) = 1;
19006 static bool
19007 arm_cxx_class_data_always_comdat (void)
19009 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
19010 vague linkage if the class has no key function. */
19011 return !TARGET_AAPCS_BASED;
19015 /* The EABI says __aeabi_atexit should be used to register static
19016 destructors. */
19018 static bool
19019 arm_cxx_use_aeabi_atexit (void)
19021 return TARGET_AAPCS_BASED;
19025 void
19026 arm_set_return_address (rtx source, rtx scratch)
19028 arm_stack_offsets *offsets;
19029 HOST_WIDE_INT delta;
19030 rtx addr;
19031 unsigned long saved_regs;
19033 offsets = arm_get_frame_offsets ();
19034 saved_regs = offsets->saved_regs_mask;
19036 if ((saved_regs & (1 << LR_REGNUM)) == 0)
19037 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19038 else
19040 if (frame_pointer_needed)
19041 addr = plus_constant(hard_frame_pointer_rtx, -4);
19042 else
19044 /* LR will be the first saved register. */
19045 delta = offsets->outgoing_args - (offsets->frame + 4);
19048 if (delta >= 4096)
19050 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
19051 GEN_INT (delta & ~4095)));
19052 addr = scratch;
19053 delta &= 4095;
19055 else
19056 addr = stack_pointer_rtx;
19058 addr = plus_constant (addr, delta);
19060 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19065 void
19066 thumb_set_return_address (rtx source, rtx scratch)
19068 arm_stack_offsets *offsets;
19069 HOST_WIDE_INT delta;
19070 HOST_WIDE_INT limit;
19071 int reg;
19072 rtx addr;
19073 unsigned long mask;
19075 emit_use (source);
19077 offsets = arm_get_frame_offsets ();
19078 mask = offsets->saved_regs_mask;
19079 if (mask & (1 << LR_REGNUM))
19081 limit = 1024;
19082 /* Find the saved regs. */
19083 if (frame_pointer_needed)
19085 delta = offsets->soft_frame - offsets->saved_args;
19086 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
19087 if (TARGET_THUMB1)
19088 limit = 128;
19090 else
19092 delta = offsets->outgoing_args - offsets->saved_args;
19093 reg = SP_REGNUM;
19095 /* Allow for the stack frame. */
19096 if (TARGET_THUMB1 && TARGET_BACKTRACE)
19097 delta -= 16;
19098 /* The link register is always the first saved register. */
19099 delta -= 4;
19101 /* Construct the address. */
19102 addr = gen_rtx_REG (SImode, reg);
19103 if (delta > limit)
19105 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
19106 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
19107 addr = scratch;
19109 else
19110 addr = plus_constant (addr, delta);
19112 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19114 else
19115 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19118 /* Implements target hook vector_mode_supported_p. */
19119 bool
19120 arm_vector_mode_supported_p (enum machine_mode mode)
19122 /* Neon also supports V2SImode, etc. listed in the clause below. */
19123 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
19124 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
19125 return true;
19127 if ((mode == V2SImode)
19128 || (mode == V4HImode)
19129 || (mode == V8QImode))
19130 return true;
19132 return false;
19135 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
19136 ARM insns and therefore guarantee that the shift count is modulo 256.
19137 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
19138 guarantee no particular behavior for out-of-range counts. */
19140 static unsigned HOST_WIDE_INT
19141 arm_shift_truncation_mask (enum machine_mode mode)
19143 return mode == SImode ? 255 : 0;
19147 /* Map internal gcc register numbers to DWARF2 register numbers. */
19149 unsigned int
19150 arm_dbx_register_number (unsigned int regno)
19152 if (regno < 16)
19153 return regno;
19155 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
19156 compatibility. The EABI defines them as registers 96-103. */
19157 if (IS_FPA_REGNUM (regno))
19158 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
19160 /* FIXME: VFPv3 register numbering. */
19161 if (IS_VFP_REGNUM (regno))
19162 return 64 + regno - FIRST_VFP_REGNUM;
19164 if (IS_IWMMXT_GR_REGNUM (regno))
19165 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
19167 if (IS_IWMMXT_REGNUM (regno))
19168 return 112 + regno - FIRST_IWMMXT_REGNUM;
19170 gcc_unreachable ();
19174 #ifdef TARGET_UNWIND_INFO
19175 /* Emit unwind directives for a store-multiple instruction or stack pointer
19176 push during alignment.
19177 These should only ever be generated by the function prologue code, so
19178 expect them to have a particular form. */
19180 static void
19181 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
19183 int i;
19184 HOST_WIDE_INT offset;
19185 HOST_WIDE_INT nregs;
19186 int reg_size;
19187 unsigned reg;
19188 unsigned lastreg;
19189 rtx e;
19191 e = XVECEXP (p, 0, 0);
19192 if (GET_CODE (e) != SET)
19193 abort ();
19195 /* First insn will adjust the stack pointer. */
19196 if (GET_CODE (e) != SET
19197 || GET_CODE (XEXP (e, 0)) != REG
19198 || REGNO (XEXP (e, 0)) != SP_REGNUM
19199 || GET_CODE (XEXP (e, 1)) != PLUS)
19200 abort ();
19202 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
19203 nregs = XVECLEN (p, 0) - 1;
19205 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
19206 if (reg < 16)
19208 /* The function prologue may also push pc, but not annotate it as it is
19209 never restored. We turn this into a stack pointer adjustment. */
19210 if (nregs * 4 == offset - 4)
19212 fprintf (asm_out_file, "\t.pad #4\n");
19213 offset -= 4;
19215 reg_size = 4;
19216 fprintf (asm_out_file, "\t.save {");
19218 else if (IS_VFP_REGNUM (reg))
19220 reg_size = 8;
19221 fprintf (asm_out_file, "\t.vsave {");
19223 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
19225 /* FPA registers are done differently. */
19226 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
19227 return;
19229 else
19230 /* Unknown register type. */
19231 abort ();
19233 /* If the stack increment doesn't match the size of the saved registers,
19234 something has gone horribly wrong. */
19235 if (offset != nregs * reg_size)
19236 abort ();
19238 offset = 0;
19239 lastreg = 0;
19240 /* The remaining insns will describe the stores. */
19241 for (i = 1; i <= nregs; i++)
19243 /* Expect (set (mem <addr>) (reg)).
19244 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
19245 e = XVECEXP (p, 0, i);
19246 if (GET_CODE (e) != SET
19247 || GET_CODE (XEXP (e, 0)) != MEM
19248 || GET_CODE (XEXP (e, 1)) != REG)
19249 abort ();
19251 reg = REGNO (XEXP (e, 1));
19252 if (reg < lastreg)
19253 abort ();
19255 if (i != 1)
19256 fprintf (asm_out_file, ", ");
19257 /* We can't use %r for vfp because we need to use the
19258 double precision register names. */
19259 if (IS_VFP_REGNUM (reg))
19260 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
19261 else
19262 asm_fprintf (asm_out_file, "%r", reg);
19264 #ifdef ENABLE_CHECKING
19265 /* Check that the addresses are consecutive. */
19266 e = XEXP (XEXP (e, 0), 0);
19267 if (GET_CODE (e) == PLUS)
19269 offset += reg_size;
19270 if (GET_CODE (XEXP (e, 0)) != REG
19271 || REGNO (XEXP (e, 0)) != SP_REGNUM
19272 || GET_CODE (XEXP (e, 1)) != CONST_INT
19273 || offset != INTVAL (XEXP (e, 1)))
19274 abort ();
19276 else if (i != 1
19277 || GET_CODE (e) != REG
19278 || REGNO (e) != SP_REGNUM)
19279 abort ();
19280 #endif
19282 fprintf (asm_out_file, "}\n");
19285 /* Emit unwind directives for a SET. */
19287 static void
19288 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
19290 rtx e0;
19291 rtx e1;
19292 unsigned reg;
19294 e0 = XEXP (p, 0);
19295 e1 = XEXP (p, 1);
19296 switch (GET_CODE (e0))
19298 case MEM:
19299 /* Pushing a single register. */
19300 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
19301 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
19302 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
19303 abort ();
19305 asm_fprintf (asm_out_file, "\t.save ");
19306 if (IS_VFP_REGNUM (REGNO (e1)))
19307 asm_fprintf(asm_out_file, "{d%d}\n",
19308 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
19309 else
19310 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
19311 break;
19313 case REG:
19314 if (REGNO (e0) == SP_REGNUM)
19316 /* A stack increment. */
19317 if (GET_CODE (e1) != PLUS
19318 || GET_CODE (XEXP (e1, 0)) != REG
19319 || REGNO (XEXP (e1, 0)) != SP_REGNUM
19320 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19321 abort ();
19323 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
19324 -INTVAL (XEXP (e1, 1)));
19326 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
19328 HOST_WIDE_INT offset;
19330 if (GET_CODE (e1) == PLUS)
19332 if (GET_CODE (XEXP (e1, 0)) != REG
19333 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19334 abort ();
19335 reg = REGNO (XEXP (e1, 0));
19336 offset = INTVAL (XEXP (e1, 1));
19337 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
19338 HARD_FRAME_POINTER_REGNUM, reg,
19339 INTVAL (XEXP (e1, 1)));
19341 else if (GET_CODE (e1) == REG)
19343 reg = REGNO (e1);
19344 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
19345 HARD_FRAME_POINTER_REGNUM, reg);
19347 else
19348 abort ();
19350 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
19352 /* Move from sp to reg. */
19353 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
19355 else if (GET_CODE (e1) == PLUS
19356 && GET_CODE (XEXP (e1, 0)) == REG
19357 && REGNO (XEXP (e1, 0)) == SP_REGNUM
19358 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
19360 /* Set reg to offset from sp. */
19361 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
19362 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
19364 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
19366 /* Stack pointer save before alignment. */
19367 reg = REGNO (e0);
19368 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
19369 reg + 0x90, reg);
19371 else
19372 abort ();
19373 break;
19375 default:
19376 abort ();
19381 /* Emit unwind directives for the given insn. */
19383 static void
19384 arm_unwind_emit (FILE * asm_out_file, rtx insn)
19386 rtx pat;
19388 if (!ARM_EABI_UNWIND_TABLES)
19389 return;
19391 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19392 && (TREE_NOTHROW (current_function_decl)
19393 || crtl->all_throwers_are_sibcalls))
19394 return;
19396 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
19397 return;
19399 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
19400 if (pat)
19401 pat = XEXP (pat, 0);
19402 else
19403 pat = PATTERN (insn);
19405 switch (GET_CODE (pat))
19407 case SET:
19408 arm_unwind_emit_set (asm_out_file, pat);
19409 break;
19411 case SEQUENCE:
19412 /* Store multiple. */
19413 arm_unwind_emit_sequence (asm_out_file, pat);
19414 break;
19416 default:
19417 abort();
19422 /* Output a reference from a function exception table to the type_info
19423 object X. The EABI specifies that the symbol should be relocated by
19424 an R_ARM_TARGET2 relocation. */
19426 static bool
19427 arm_output_ttype (rtx x)
19429 fputs ("\t.word\t", asm_out_file);
19430 output_addr_const (asm_out_file, x);
19431 /* Use special relocations for symbol references. */
19432 if (GET_CODE (x) != CONST_INT)
19433 fputs ("(TARGET2)", asm_out_file);
19434 fputc ('\n', asm_out_file);
19436 return TRUE;
19438 #endif /* TARGET_UNWIND_INFO */
19441 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
19442 stack alignment. */
19444 static void
19445 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
19447 rtx unspec = SET_SRC (pattern);
19448 gcc_assert (GET_CODE (unspec) == UNSPEC);
19450 switch (index)
19452 case UNSPEC_STACK_ALIGN:
19453 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
19454 put anything on the stack, so hopefully it won't matter.
19455 CFA = SP will be correct after alignment. */
19456 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
19457 SET_DEST (pattern));
19458 break;
19459 default:
19460 gcc_unreachable ();
19465 /* Output unwind directives for the start/end of a function. */
19467 void
19468 arm_output_fn_unwind (FILE * f, bool prologue)
19470 if (!ARM_EABI_UNWIND_TABLES)
19471 return;
19473 if (prologue)
19474 fputs ("\t.fnstart\n", f);
19475 else
19477 /* If this function will never be unwound, then mark it as such.
19478 The came condition is used in arm_unwind_emit to suppress
19479 the frame annotations. */
19480 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19481 && (TREE_NOTHROW (current_function_decl)
19482 || crtl->all_throwers_are_sibcalls))
19483 fputs("\t.cantunwind\n", f);
19485 fputs ("\t.fnend\n", f);
19489 static bool
19490 arm_emit_tls_decoration (FILE *fp, rtx x)
19492 enum tls_reloc reloc;
19493 rtx val;
19495 val = XVECEXP (x, 0, 0);
19496 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
19498 output_addr_const (fp, val);
19500 switch (reloc)
19502 case TLS_GD32:
19503 fputs ("(tlsgd)", fp);
19504 break;
19505 case TLS_LDM32:
19506 fputs ("(tlsldm)", fp);
19507 break;
19508 case TLS_LDO32:
19509 fputs ("(tlsldo)", fp);
19510 break;
19511 case TLS_IE32:
19512 fputs ("(gottpoff)", fp);
19513 break;
19514 case TLS_LE32:
19515 fputs ("(tpoff)", fp);
19516 break;
19517 default:
19518 gcc_unreachable ();
19521 switch (reloc)
19523 case TLS_GD32:
19524 case TLS_LDM32:
19525 case TLS_IE32:
19526 fputs (" + (. - ", fp);
19527 output_addr_const (fp, XVECEXP (x, 0, 2));
19528 fputs (" - ", fp);
19529 output_addr_const (fp, XVECEXP (x, 0, 3));
19530 fputc (')', fp);
19531 break;
19532 default:
19533 break;
19536 return TRUE;
19539 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
19541 static void
19542 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
19544 gcc_assert (size == 4);
19545 fputs ("\t.word\t", file);
19546 output_addr_const (file, x);
19547 fputs ("(tlsldo)", file);
19550 bool
19551 arm_output_addr_const_extra (FILE *fp, rtx x)
19553 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
19554 return arm_emit_tls_decoration (fp, x);
19555 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
19557 char label[256];
19558 int labelno = INTVAL (XVECEXP (x, 0, 0));
19560 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
19561 assemble_name_raw (fp, label);
19563 return TRUE;
19565 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
19567 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
19568 if (GOT_PCREL)
19569 fputs ("+.", fp);
19570 fputs ("-(", fp);
19571 output_addr_const (fp, XVECEXP (x, 0, 0));
19572 fputc (')', fp);
19573 return TRUE;
19575 else if (GET_CODE (x) == CONST_VECTOR)
19576 return arm_emit_vector_const (fp, x);
19578 return FALSE;
19581 /* Output assembly for a shift instruction.
19582 SET_FLAGS determines how the instruction modifies the condition codes.
19583 0 - Do not set condition codes.
19584 1 - Set condition codes.
19585 2 - Use smallest instruction. */
19586 const char *
19587 arm_output_shift(rtx * operands, int set_flags)
19589 char pattern[100];
19590 static const char flag_chars[3] = {'?', '.', '!'};
19591 const char *shift;
19592 HOST_WIDE_INT val;
19593 char c;
19595 c = flag_chars[set_flags];
19596 if (TARGET_UNIFIED_ASM)
19598 shift = shift_op(operands[3], &val);
19599 if (shift)
19601 if (val != -1)
19602 operands[2] = GEN_INT(val);
19603 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
19605 else
19606 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
19608 else
19609 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
19610 output_asm_insn (pattern, operands);
19611 return "";
19614 /* Output a Thumb-2 casesi instruction. */
19615 const char *
19616 thumb2_output_casesi (rtx *operands)
19618 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
19620 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19622 output_asm_insn ("cmp\t%0, %1", operands);
19623 output_asm_insn ("bhi\t%l3", operands);
19624 switch (GET_MODE(diff_vec))
19626 case QImode:
19627 return "tbb\t[%|pc, %0]";
19628 case HImode:
19629 return "tbh\t[%|pc, %0, lsl #1]";
19630 case SImode:
19631 if (flag_pic)
19633 output_asm_insn ("adr\t%4, %l2", operands);
19634 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
19635 output_asm_insn ("add\t%4, %4, %5", operands);
19636 return "bx\t%4";
19638 else
19640 output_asm_insn ("adr\t%4, %l2", operands);
19641 return "ldr\t%|pc, [%4, %0, lsl #2]";
19643 default:
19644 gcc_unreachable ();
19648 /* Most ARM cores are single issue, but some newer ones can dual issue.
19649 The scheduler descriptions rely on this being correct. */
19650 static int
19651 arm_issue_rate (void)
19653 switch (arm_tune)
19655 case cortexr4:
19656 case cortexr4f:
19657 case cortexa8:
19658 case cortexa9:
19659 return 2;
19661 default:
19662 return 1;
19666 /* A table and a function to perform ARM-specific name mangling for
19667 NEON vector types in order to conform to the AAPCS (see "Procedure
19668 Call Standard for the ARM Architecture", Appendix A). To qualify
19669 for emission with the mangled names defined in that document, a
19670 vector type must not only be of the correct mode but also be
19671 composed of NEON vector element types (e.g. __builtin_neon_qi). */
19672 typedef struct
19674 enum machine_mode mode;
19675 const char *element_type_name;
19676 const char *aapcs_name;
19677 } arm_mangle_map_entry;
19679 static arm_mangle_map_entry arm_mangle_map[] = {
19680 /* 64-bit containerized types. */
19681 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
19682 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19683 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
19684 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19685 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
19686 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
19687 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
19688 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19689 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19690 /* 128-bit containerized types. */
19691 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19692 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19693 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19694 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19695 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19696 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19697 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19698 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19699 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19700 { VOIDmode, NULL, NULL }
19703 const char *
19704 arm_mangle_type (const_tree type)
19706 arm_mangle_map_entry *pos = arm_mangle_map;
19708 /* The ARM ABI documents (10th October 2008) say that "__va_list"
19709 has to be managled as if it is in the "std" namespace. */
19710 if (TARGET_AAPCS_BASED
19711 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
19713 static bool warned;
19714 if (!warned && warn_psabi)
19716 warned = true;
19717 inform (input_location,
19718 "the mangling of %<va_list%> has changed in GCC 4.4");
19720 return "St9__va_list";
19723 if (TREE_CODE (type) != VECTOR_TYPE)
19724 return NULL;
19726 /* Check the mode of the vector type, and the name of the vector
19727 element type, against the table. */
19728 while (pos->mode != VOIDmode)
19730 tree elt_type = TREE_TYPE (type);
19732 if (pos->mode == TYPE_MODE (type)
19733 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
19734 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
19735 pos->element_type_name))
19736 return pos->aapcs_name;
19738 pos++;
19741 /* Use the default mangling for unrecognized (possibly user-defined)
19742 vector types. */
19743 return NULL;
19746 /* Order of allocation of core registers for Thumb: this allocation is
19747 written over the corresponding initial entries of the array
19748 initialized with REG_ALLOC_ORDER. We allocate all low registers
19749 first. Saving and restoring a low register is usually cheaper than
19750 using a call-clobbered high register. */
19752 static const int thumb_core_reg_alloc_order[] =
19754 3, 2, 1, 0, 4, 5, 6, 7,
19755 14, 12, 8, 9, 10, 11, 13, 15
19758 /* Adjust register allocation order when compiling for Thumb. */
19760 void
19761 arm_order_regs_for_local_alloc (void)
19763 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
19764 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
19765 if (TARGET_THUMB)
19766 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
19767 sizeof (thumb_core_reg_alloc_order));
19770 /* Set default optimization options. */
19771 void
19772 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
19774 /* Enable section anchors by default at -O1 or higher.
19775 Use 2 to distinguish from an explicit -fsection-anchors
19776 given on the command line. */
19777 if (level > 0)
19778 flag_section_anchors = 2;
19781 #include "gt-arm.h"