tm.texi (TARGET_LEGITIMATE_ADDRESS_P): Refer mainly to this in the former documentati...
[official-gcc.git] / gcc / config / arm / arm.c
blob464bba577206708e2041b7d17cd625b36838f6ff
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-pragma.h"
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
57 /* Forward definitions of types. */
58 typedef struct minipool_node Mnode;
59 typedef struct minipool_fixup Mfix;
61 const struct attribute_spec arm_attribute_table[];
63 void (*arm_lang_output_object_attributes_hook)(void);
65 /* Forward function declarations. */
66 static int arm_compute_static_chain_stack_bytes (void);
67 static arm_stack_offsets *arm_get_frame_offsets (void);
68 static void arm_add_gc_roots (void);
69 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
70 HOST_WIDE_INT, rtx, rtx, int, int);
71 static unsigned bit_count (unsigned long);
72 static int arm_address_register_rtx_p (rtx, int);
73 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
74 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
75 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
76 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
77 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
78 inline static int thumb1_index_register_rtx_p (rtx, int);
79 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
80 static int thumb_far_jump_used_p (void);
81 static bool thumb_force_lr_save (void);
82 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
83 static rtx emit_sfm (int, int);
84 static unsigned arm_size_return_regs (void);
85 static bool arm_assemble_integer (rtx, unsigned int, int);
86 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
87 static arm_cc get_arm_condition_code (rtx);
88 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
89 static rtx is_jump_table (rtx);
90 static const char *output_multi_immediate (rtx *, const char *, const char *,
91 int, HOST_WIDE_INT);
92 static const char *shift_op (rtx, HOST_WIDE_INT *);
93 static struct machine_function *arm_init_machine_status (void);
94 static void thumb_exit (FILE *, int);
95 static rtx is_jump_table (rtx);
96 static HOST_WIDE_INT get_jump_table_size (rtx);
97 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
98 static Mnode *add_minipool_forward_ref (Mfix *);
99 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
100 static Mnode *add_minipool_backward_ref (Mfix *);
101 static void assign_minipool_offsets (Mfix *);
102 static void arm_print_value (FILE *, rtx);
103 static void dump_minipool (rtx);
104 static int arm_barrier_cost (rtx);
105 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
106 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
107 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
108 rtx);
109 static void arm_reorg (void);
110 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
111 static unsigned long arm_compute_save_reg0_reg12_mask (void);
112 static unsigned long arm_compute_save_reg_mask (void);
113 static unsigned long arm_isr_value (tree);
114 static unsigned long arm_compute_func_type (void);
115 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
116 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
117 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
118 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
119 #endif
120 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
121 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
122 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
123 static int arm_comp_type_attributes (const_tree, const_tree);
124 static void arm_set_default_type_attributes (tree);
125 static int arm_adjust_cost (rtx, rtx, rtx, int);
126 static int count_insns_for_constant (HOST_WIDE_INT, int);
127 static int arm_get_strip_length (int);
128 static bool arm_function_ok_for_sibcall (tree, tree);
129 static void arm_internal_label (FILE *, const char *, unsigned long);
130 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
131 tree);
132 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
133 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
134 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
135 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
136 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
137 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
138 static bool arm_rtx_costs (rtx, int, int, int *, bool);
139 static int arm_address_cost (rtx, bool);
140 static bool arm_memory_load_p (rtx);
141 static bool arm_cirrus_insn_p (rtx);
142 static void cirrus_reorg (rtx);
143 static void arm_init_builtins (void);
144 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
145 static void arm_init_iwmmxt_builtins (void);
146 static rtx safe_vector_operand (rtx, enum machine_mode);
147 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
148 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
149 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
150 static void emit_constant_insn (rtx cond, rtx pattern);
151 static rtx emit_set_insn (rtx, rtx);
152 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
153 tree, bool);
155 #ifdef OBJECT_FORMAT_ELF
156 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
157 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
158 #endif
159 #ifndef ARM_PE
160 static void arm_encode_section_info (tree, rtx, int);
161 #endif
163 static void arm_file_end (void);
164 static void arm_file_start (void);
166 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
167 tree, int *, int);
168 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
169 enum machine_mode, const_tree, bool);
170 static bool arm_promote_prototypes (const_tree);
171 static bool arm_default_short_enums (void);
172 static bool arm_align_anon_bitfield (void);
173 static bool arm_return_in_msb (const_tree);
174 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 #ifdef TARGET_UNWIND_INFO
177 static void arm_unwind_emit (FILE *, rtx);
178 static bool arm_output_ttype (rtx);
179 #endif
180 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
182 static tree arm_cxx_guard_type (void);
183 static bool arm_cxx_guard_mask_bit (void);
184 static tree arm_get_cookie_size (tree);
185 static bool arm_cookie_has_size (void);
186 static bool arm_cxx_cdtor_returns_this (void);
187 static bool arm_cxx_key_method_may_be_inline (void);
188 static void arm_cxx_determine_class_data_visibility (tree);
189 static bool arm_cxx_class_data_always_comdat (void);
190 static bool arm_cxx_use_aeabi_atexit (void);
191 static void arm_init_libfuncs (void);
192 static tree arm_build_builtin_va_list (void);
193 static void arm_expand_builtin_va_start (tree, rtx);
194 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
195 static bool arm_handle_option (size_t, const char *, int);
196 static void arm_target_help (void);
197 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
198 static bool arm_cannot_copy_insn_p (rtx);
199 static bool arm_tls_symbol_p (rtx x);
200 static int arm_issue_rate (void);
201 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
202 static bool arm_allocate_stack_slots_for_args (void);
205 /* Initialize the GCC target structure. */
206 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
207 #undef TARGET_MERGE_DECL_ATTRIBUTES
208 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
209 #endif
211 #undef TARGET_LEGITIMIZE_ADDRESS
212 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
214 #undef TARGET_ATTRIBUTE_TABLE
215 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
217 #undef TARGET_ASM_FILE_START
218 #define TARGET_ASM_FILE_START arm_file_start
219 #undef TARGET_ASM_FILE_END
220 #define TARGET_ASM_FILE_END arm_file_end
222 #undef TARGET_ASM_ALIGNED_SI_OP
223 #define TARGET_ASM_ALIGNED_SI_OP NULL
224 #undef TARGET_ASM_INTEGER
225 #define TARGET_ASM_INTEGER arm_assemble_integer
227 #undef TARGET_ASM_FUNCTION_PROLOGUE
228 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
230 #undef TARGET_ASM_FUNCTION_EPILOGUE
231 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
233 #undef TARGET_DEFAULT_TARGET_FLAGS
234 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
235 #undef TARGET_HANDLE_OPTION
236 #define TARGET_HANDLE_OPTION arm_handle_option
237 #undef TARGET_HELP
238 #define TARGET_HELP arm_target_help
240 #undef TARGET_COMP_TYPE_ATTRIBUTES
241 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
243 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
244 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
246 #undef TARGET_SCHED_ADJUST_COST
247 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
249 #undef TARGET_ENCODE_SECTION_INFO
250 #ifdef ARM_PE
251 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
252 #else
253 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
254 #endif
256 #undef TARGET_STRIP_NAME_ENCODING
257 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
259 #undef TARGET_ASM_INTERNAL_LABEL
260 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
262 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
263 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
265 #undef TARGET_ASM_OUTPUT_MI_THUNK
266 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
267 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
268 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
270 #undef TARGET_RTX_COSTS
271 #define TARGET_RTX_COSTS arm_rtx_costs
272 #undef TARGET_ADDRESS_COST
273 #define TARGET_ADDRESS_COST arm_address_cost
275 #undef TARGET_SHIFT_TRUNCATION_MASK
276 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
277 #undef TARGET_VECTOR_MODE_SUPPORTED_P
278 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
280 #undef TARGET_MACHINE_DEPENDENT_REORG
281 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS arm_init_builtins
285 #undef TARGET_EXPAND_BUILTIN
286 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
288 #undef TARGET_INIT_LIBFUNCS
289 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
291 #undef TARGET_PROMOTE_FUNCTION_ARGS
292 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
293 #undef TARGET_PROMOTE_FUNCTION_RETURN
294 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
295 #undef TARGET_PROMOTE_PROTOTYPES
296 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
297 #undef TARGET_PASS_BY_REFERENCE
298 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
299 #undef TARGET_ARG_PARTIAL_BYTES
300 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
302 #undef TARGET_SETUP_INCOMING_VARARGS
303 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
305 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
306 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
308 #undef TARGET_DEFAULT_SHORT_ENUMS
309 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
311 #undef TARGET_ALIGN_ANON_BITFIELD
312 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
314 #undef TARGET_NARROW_VOLATILE_BITFIELD
315 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
317 #undef TARGET_CXX_GUARD_TYPE
318 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
320 #undef TARGET_CXX_GUARD_MASK_BIT
321 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
323 #undef TARGET_CXX_GET_COOKIE_SIZE
324 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
326 #undef TARGET_CXX_COOKIE_HAS_SIZE
327 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
329 #undef TARGET_CXX_CDTOR_RETURNS_THIS
330 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
332 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
333 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
335 #undef TARGET_CXX_USE_AEABI_ATEXIT
336 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
338 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
339 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
340 arm_cxx_determine_class_data_visibility
342 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
343 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
345 #undef TARGET_RETURN_IN_MSB
346 #define TARGET_RETURN_IN_MSB arm_return_in_msb
348 #undef TARGET_RETURN_IN_MEMORY
349 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
351 #undef TARGET_MUST_PASS_IN_STACK
352 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
354 #ifdef TARGET_UNWIND_INFO
355 #undef TARGET_UNWIND_EMIT
356 #define TARGET_UNWIND_EMIT arm_unwind_emit
358 /* EABI unwinding tables use a different format for the typeinfo tables. */
359 #undef TARGET_ASM_TTYPE
360 #define TARGET_ASM_TTYPE arm_output_ttype
362 #undef TARGET_ARM_EABI_UNWINDER
363 #define TARGET_ARM_EABI_UNWINDER true
364 #endif /* TARGET_UNWIND_INFO */
366 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
367 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
369 #undef TARGET_CANNOT_COPY_INSN_P
370 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
372 #ifdef HAVE_AS_TLS
373 #undef TARGET_HAVE_TLS
374 #define TARGET_HAVE_TLS true
375 #endif
377 #undef TARGET_CANNOT_FORCE_CONST_MEM
378 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
380 #undef TARGET_MAX_ANCHOR_OFFSET
381 #define TARGET_MAX_ANCHOR_OFFSET 4095
383 /* The minimum is set such that the total size of the block
384 for a particular anchor is -4088 + 1 + 4095 bytes, which is
385 divisible by eight, ensuring natural spacing of anchors. */
386 #undef TARGET_MIN_ANCHOR_OFFSET
387 #define TARGET_MIN_ANCHOR_OFFSET -4088
389 #undef TARGET_SCHED_ISSUE_RATE
390 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
392 #undef TARGET_MANGLE_TYPE
393 #define TARGET_MANGLE_TYPE arm_mangle_type
395 #undef TARGET_BUILD_BUILTIN_VA_LIST
396 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
397 #undef TARGET_EXPAND_BUILTIN_VA_START
398 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
399 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
400 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
402 #ifdef HAVE_AS_TLS
403 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
404 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
405 #endif
407 #undef TARGET_LEGITIMATE_ADDRESS_P
408 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
410 struct gcc_target targetm = TARGET_INITIALIZER;
412 /* Obstack for minipool constant handling. */
413 static struct obstack minipool_obstack;
414 static char * minipool_startobj;
416 /* The maximum number of insns skipped which
417 will be conditionalised if possible. */
418 static int max_insns_skipped = 5;
420 extern FILE * asm_out_file;
422 /* True if we are currently building a constant table. */
423 int making_const_table;
425 /* The processor for which instructions should be scheduled. */
426 enum processor_type arm_tune = arm_none;
428 /* The default processor used if not overridden by commandline. */
429 static enum processor_type arm_default_cpu = arm_none;
431 /* Which floating point model to use. */
432 enum arm_fp_model arm_fp_model;
434 /* Which floating point hardware is available. */
435 enum fputype arm_fpu_arch;
437 /* Which floating point hardware to schedule for. */
438 enum fputype arm_fpu_tune;
440 /* Whether to use floating point hardware. */
441 enum float_abi_type arm_float_abi;
443 /* Which ABI to use. */
444 enum arm_abi_type arm_abi;
446 /* Which thread pointer model to use. */
447 enum arm_tp_type target_thread_pointer = TP_AUTO;
449 /* Used to parse -mstructure_size_boundary command line option. */
450 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
452 /* Used for Thumb call_via trampolines. */
453 rtx thumb_call_via_label[14];
454 static int thumb_call_reg_needed;
456 /* Bit values used to identify processor capabilities. */
457 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
458 #define FL_ARCH3M (1 << 1) /* Extended multiply */
459 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
460 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
461 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
462 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
463 #define FL_THUMB (1 << 6) /* Thumb aware */
464 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
465 #define FL_STRONG (1 << 8) /* StrongARM */
466 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
467 #define FL_XSCALE (1 << 10) /* XScale */
468 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
469 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
470 media instructions. */
471 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
472 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
473 Note: ARM6 & 7 derivatives only. */
474 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
475 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
476 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
477 profile. */
478 #define FL_DIV (1 << 18) /* Hardware divide. */
479 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
480 #define FL_NEON (1 << 20) /* Neon instructions. */
482 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
484 #define FL_FOR_ARCH2 FL_NOTM
485 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
486 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
487 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
488 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
489 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
490 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
491 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
492 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
493 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
494 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
495 #define FL_FOR_ARCH6J FL_FOR_ARCH6
496 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
497 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
498 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
499 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
500 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
501 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
502 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
503 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
504 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
506 /* The bits in this mask specify which
507 instructions we are allowed to generate. */
508 static unsigned long insn_flags = 0;
510 /* The bits in this mask specify which instruction scheduling options should
511 be used. */
512 static unsigned long tune_flags = 0;
514 /* The following are used in the arm.md file as equivalents to bits
515 in the above two flag variables. */
517 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
518 int arm_arch3m = 0;
520 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
521 int arm_arch4 = 0;
523 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
524 int arm_arch4t = 0;
526 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
527 int arm_arch5 = 0;
529 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
530 int arm_arch5e = 0;
532 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
533 int arm_arch6 = 0;
535 /* Nonzero if this chip supports the ARM 6K extensions. */
536 int arm_arch6k = 0;
538 /* Nonzero if instructions not present in the 'M' profile can be used. */
539 int arm_arch_notm = 0;
541 /* Nonzero if this chip can benefit from load scheduling. */
542 int arm_ld_sched = 0;
544 /* Nonzero if this chip is a StrongARM. */
545 int arm_tune_strongarm = 0;
547 /* Nonzero if this chip is a Cirrus variant. */
548 int arm_arch_cirrus = 0;
550 /* Nonzero if this chip supports Intel Wireless MMX technology. */
551 int arm_arch_iwmmxt = 0;
553 /* Nonzero if this chip is an XScale. */
554 int arm_arch_xscale = 0;
556 /* Nonzero if tuning for XScale */
557 int arm_tune_xscale = 0;
559 /* Nonzero if we want to tune for stores that access the write-buffer.
560 This typically means an ARM6 or ARM7 with MMU or MPU. */
561 int arm_tune_wbuf = 0;
563 /* Nonzero if tuning for Cortex-A9. */
564 int arm_tune_cortex_a9 = 0;
566 /* Nonzero if generating Thumb instructions. */
567 int thumb_code = 0;
569 /* Nonzero if we should define __THUMB_INTERWORK__ in the
570 preprocessor.
571 XXX This is a bit of a hack, it's intended to help work around
572 problems in GLD which doesn't understand that armv5t code is
573 interworking clean. */
574 int arm_cpp_interwork = 0;
576 /* Nonzero if chip supports Thumb 2. */
577 int arm_arch_thumb2;
579 /* Nonzero if chip supports integer division instruction. */
580 int arm_arch_hwdiv;
582 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
583 must report the mode of the memory reference from PRINT_OPERAND to
584 PRINT_OPERAND_ADDRESS. */
585 enum machine_mode output_memory_reference_mode;
587 /* The register number to be used for the PIC offset register. */
588 unsigned arm_pic_register = INVALID_REGNUM;
590 /* Set to 1 after arm_reorg has started. Reset to start at the start of
591 the next function. */
592 static int after_arm_reorg = 0;
594 /* The maximum number of insns to be used when loading a constant. */
595 static int arm_constant_limit = 3;
597 /* For an explanation of these variables, see final_prescan_insn below. */
598 int arm_ccfsm_state;
599 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
600 enum arm_cond_code arm_current_cc;
601 rtx arm_target_insn;
602 int arm_target_label;
603 /* The number of conditionally executed insns, including the current insn. */
604 int arm_condexec_count = 0;
605 /* A bitmask specifying the patterns for the IT block.
606 Zero means do not output an IT block before this insn. */
607 int arm_condexec_mask = 0;
608 /* The number of bits used in arm_condexec_mask. */
609 int arm_condexec_masklen = 0;
611 /* The condition codes of the ARM, and the inverse function. */
612 static const char * const arm_condition_codes[] =
614 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
615 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
618 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
619 #define streq(string1, string2) (strcmp (string1, string2) == 0)
621 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
622 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
623 | (1 << PIC_OFFSET_TABLE_REGNUM)))
625 /* Initialization code. */
627 struct processors
629 const char *const name;
630 enum processor_type core;
631 const char *arch;
632 const unsigned long flags;
633 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
636 /* Not all of these give usefully different compilation alternatives,
637 but there is no simple way of generalizing them. */
638 static const struct processors all_cores[] =
640 /* ARM Cores */
641 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
642 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
643 #include "arm-cores.def"
644 #undef ARM_CORE
645 {NULL, arm_none, NULL, 0, NULL}
648 static const struct processors all_architectures[] =
650 /* ARM Architectures */
651 /* We don't specify rtx_costs here as it will be figured out
652 from the core. */
654 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
655 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
656 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
657 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
658 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
659 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
660 implementations that support it, so we will leave it out for now. */
661 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
662 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
663 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
664 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
665 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
666 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
667 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
668 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
669 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
670 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
671 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
672 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
673 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
674 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
675 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
676 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
677 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
678 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
679 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
680 {NULL, arm_none, NULL, 0 , NULL}
683 struct arm_cpu_select
685 const char * string;
686 const char * name;
687 const struct processors * processors;
690 /* This is a magic structure. The 'string' field is magically filled in
691 with a pointer to the value specified by the user on the command line
692 assuming that the user has specified such a value. */
694 static struct arm_cpu_select arm_select[] =
696 /* string name processors */
697 { NULL, "-mcpu=", all_cores },
698 { NULL, "-march=", all_architectures },
699 { NULL, "-mtune=", all_cores }
702 /* Defines representing the indexes into the above table. */
703 #define ARM_OPT_SET_CPU 0
704 #define ARM_OPT_SET_ARCH 1
705 #define ARM_OPT_SET_TUNE 2
707 /* The name of the preprocessor macro to define for this architecture. */
709 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
711 struct fpu_desc
713 const char * name;
714 enum fputype fpu;
718 /* Available values for -mfpu=. */
720 static const struct fpu_desc all_fpus[] =
722 {"fpa", FPUTYPE_FPA},
723 {"fpe2", FPUTYPE_FPA_EMU2},
724 {"fpe3", FPUTYPE_FPA_EMU2},
725 {"maverick", FPUTYPE_MAVERICK},
726 {"vfp", FPUTYPE_VFP},
727 {"vfp3", FPUTYPE_VFP3},
728 {"vfpv3", FPUTYPE_VFP3},
729 {"vfpv3-d16", FPUTYPE_VFP3D16},
730 {"neon", FPUTYPE_NEON}
734 /* Floating point models used by the different hardware.
735 See fputype in arm.h. */
737 static const enum arm_fp_model fp_model_for_fpu[] =
739 /* No FP hardware. */
740 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
741 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
742 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
743 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
744 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
745 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
746 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
747 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
748 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
752 struct float_abi
754 const char * name;
755 enum float_abi_type abi_type;
759 /* Available values for -mfloat-abi=. */
761 static const struct float_abi all_float_abis[] =
763 {"soft", ARM_FLOAT_ABI_SOFT},
764 {"softfp", ARM_FLOAT_ABI_SOFTFP},
765 {"hard", ARM_FLOAT_ABI_HARD}
769 struct abi_name
771 const char *name;
772 enum arm_abi_type abi_type;
776 /* Available values for -mabi=. */
778 static const struct abi_name arm_all_abis[] =
780 {"apcs-gnu", ARM_ABI_APCS},
781 {"atpcs", ARM_ABI_ATPCS},
782 {"aapcs", ARM_ABI_AAPCS},
783 {"iwmmxt", ARM_ABI_IWMMXT},
784 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
787 /* Supported TLS relocations. */
789 enum tls_reloc {
790 TLS_GD32,
791 TLS_LDM32,
792 TLS_LDO32,
793 TLS_IE32,
794 TLS_LE32
797 /* Emit an insn that's a simple single-set. Both the operands must be known
798 to be valid. */
799 inline static rtx
800 emit_set_insn (rtx x, rtx y)
802 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
805 /* Return the number of bits set in VALUE. */
806 static unsigned
807 bit_count (unsigned long value)
809 unsigned long count = 0;
811 while (value)
813 count++;
814 value &= value - 1; /* Clear the least-significant set bit. */
817 return count;
820 /* Set up library functions unique to ARM. */
822 static void
823 arm_init_libfuncs (void)
825 /* There are no special library functions unless we are using the
826 ARM BPABI. */
827 if (!TARGET_BPABI)
828 return;
830 /* The functions below are described in Section 4 of the "Run-Time
831 ABI for the ARM architecture", Version 1.0. */
833 /* Double-precision floating-point arithmetic. Table 2. */
834 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
835 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
836 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
837 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
838 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
840 /* Double-precision comparisons. Table 3. */
841 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
842 set_optab_libfunc (ne_optab, DFmode, NULL);
843 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
844 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
845 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
846 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
847 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
849 /* Single-precision floating-point arithmetic. Table 4. */
850 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
851 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
852 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
853 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
854 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
856 /* Single-precision comparisons. Table 5. */
857 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
858 set_optab_libfunc (ne_optab, SFmode, NULL);
859 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
860 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
861 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
862 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
863 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
865 /* Floating-point to integer conversions. Table 6. */
866 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
867 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
868 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
869 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
870 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
871 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
872 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
873 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
875 /* Conversions between floating types. Table 7. */
876 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
877 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
879 /* Integer to floating-point conversions. Table 8. */
880 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
881 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
882 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
883 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
884 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
885 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
886 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
887 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
889 /* Long long. Table 9. */
890 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
891 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
892 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
893 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
894 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
895 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
896 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
897 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
899 /* Integer (32/32->32) division. \S 4.3.1. */
900 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
901 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
903 /* The divmod functions are designed so that they can be used for
904 plain division, even though they return both the quotient and the
905 remainder. The quotient is returned in the usual location (i.e.,
906 r0 for SImode, {r0, r1} for DImode), just as would be expected
907 for an ordinary division routine. Because the AAPCS calling
908 conventions specify that all of { r0, r1, r2, r3 } are
909 callee-saved registers, there is no need to tell the compiler
910 explicitly that those registers are clobbered by these
911 routines. */
912 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
913 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
915 /* For SImode division the ABI provides div-without-mod routines,
916 which are faster. */
917 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
918 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
920 /* We don't have mod libcalls. Fortunately gcc knows how to use the
921 divmod libcalls instead. */
922 set_optab_libfunc (smod_optab, DImode, NULL);
923 set_optab_libfunc (umod_optab, DImode, NULL);
924 set_optab_libfunc (smod_optab, SImode, NULL);
925 set_optab_libfunc (umod_optab, SImode, NULL);
928 /* On AAPCS systems, this is the "struct __va_list". */
929 static GTY(()) tree va_list_type;
931 /* Return the type to use as __builtin_va_list. */
932 static tree
933 arm_build_builtin_va_list (void)
935 tree va_list_name;
936 tree ap_field;
938 if (!TARGET_AAPCS_BASED)
939 return std_build_builtin_va_list ();
941 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
942 defined as:
944 struct __va_list
946 void *__ap;
949 The C Library ABI further reinforces this definition in \S
950 4.1.
952 We must follow this definition exactly. The structure tag
953 name is visible in C++ mangled names, and thus forms a part
954 of the ABI. The field name may be used by people who
955 #include <stdarg.h>. */
956 /* Create the type. */
957 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
958 /* Give it the required name. */
959 va_list_name = build_decl (TYPE_DECL,
960 get_identifier ("__va_list"),
961 va_list_type);
962 DECL_ARTIFICIAL (va_list_name) = 1;
963 TYPE_NAME (va_list_type) = va_list_name;
964 /* Create the __ap field. */
965 ap_field = build_decl (FIELD_DECL,
966 get_identifier ("__ap"),
967 ptr_type_node);
968 DECL_ARTIFICIAL (ap_field) = 1;
969 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
970 TYPE_FIELDS (va_list_type) = ap_field;
971 /* Compute its layout. */
972 layout_type (va_list_type);
974 return va_list_type;
977 /* Return an expression of type "void *" pointing to the next
978 available argument in a variable-argument list. VALIST is the
979 user-level va_list object, of type __builtin_va_list. */
980 static tree
981 arm_extract_valist_ptr (tree valist)
983 if (TREE_TYPE (valist) == error_mark_node)
984 return error_mark_node;
986 /* On an AAPCS target, the pointer is stored within "struct
987 va_list". */
988 if (TARGET_AAPCS_BASED)
990 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
991 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
992 valist, ap_field, NULL_TREE);
995 return valist;
998 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
999 static void
1000 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1002 valist = arm_extract_valist_ptr (valist);
1003 std_expand_builtin_va_start (valist, nextarg);
1006 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1007 static tree
1008 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1009 gimple_seq *post_p)
1011 valist = arm_extract_valist_ptr (valist);
1012 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1015 /* Implement TARGET_HANDLE_OPTION. */
1017 static bool
1018 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1020 switch (code)
1022 case OPT_march_:
1023 arm_select[1].string = arg;
1024 return true;
1026 case OPT_mcpu_:
1027 arm_select[0].string = arg;
1028 return true;
1030 case OPT_mhard_float:
1031 target_float_abi_name = "hard";
1032 return true;
1034 case OPT_msoft_float:
1035 target_float_abi_name = "soft";
1036 return true;
1038 case OPT_mtune_:
1039 arm_select[2].string = arg;
1040 return true;
1042 default:
1043 return true;
1047 static void
1048 arm_target_help (void)
1050 int i;
1051 static int columns = 0;
1052 int remaining;
1054 /* If we have not done so already, obtain the desired maximum width of
1055 the output. Note - this is a duplication of the code at the start of
1056 gcc/opts.c:print_specific_help() - the two copies should probably be
1057 replaced by a single function. */
1058 if (columns == 0)
1060 const char *p;
1062 GET_ENVIRONMENT (p, "COLUMNS");
1063 if (p != NULL)
1065 int value = atoi (p);
1067 if (value > 0)
1068 columns = value;
1071 if (columns == 0)
1072 /* Use a reasonable default. */
1073 columns = 80;
1076 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1078 /* The - 2 is because we know that the last entry in the array is NULL. */
1079 i = ARRAY_SIZE (all_cores) - 2;
1080 gcc_assert (i > 0);
1081 printf (" %s", all_cores[i].name);
1082 remaining = columns - (strlen (all_cores[i].name) + 4);
1083 gcc_assert (remaining >= 0);
1085 while (i--)
1087 int len = strlen (all_cores[i].name);
1089 if (remaining > len + 2)
1091 printf (", %s", all_cores[i].name);
1092 remaining -= len + 2;
1094 else
1096 if (remaining > 0)
1097 printf (",");
1098 printf ("\n %s", all_cores[i].name);
1099 remaining = columns - (len + 4);
1103 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1105 i = ARRAY_SIZE (all_architectures) - 2;
1106 gcc_assert (i > 0);
1108 printf (" %s", all_architectures[i].name);
1109 remaining = columns - (strlen (all_architectures[i].name) + 4);
1110 gcc_assert (remaining >= 0);
1112 while (i--)
1114 int len = strlen (all_architectures[i].name);
1116 if (remaining > len + 2)
1118 printf (", %s", all_architectures[i].name);
1119 remaining -= len + 2;
1121 else
1123 if (remaining > 0)
1124 printf (",");
1125 printf ("\n %s", all_architectures[i].name);
1126 remaining = columns - (len + 4);
1129 printf ("\n");
1133 /* Fix up any incompatible options that the user has specified.
1134 This has now turned into a maze. */
1135 void
1136 arm_override_options (void)
1138 unsigned i;
1139 enum processor_type target_arch_cpu = arm_none;
1140 enum processor_type selected_cpu = arm_none;
1142 /* Set up the flags based on the cpu/architecture selected by the user. */
1143 for (i = ARRAY_SIZE (arm_select); i--;)
1145 struct arm_cpu_select * ptr = arm_select + i;
1147 if (ptr->string != NULL && ptr->string[0] != '\0')
1149 const struct processors * sel;
1151 for (sel = ptr->processors; sel->name != NULL; sel++)
1152 if (streq (ptr->string, sel->name))
1154 /* Set the architecture define. */
1155 if (i != ARM_OPT_SET_TUNE)
1156 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1158 /* Determine the processor core for which we should
1159 tune code-generation. */
1160 if (/* -mcpu= is a sensible default. */
1161 i == ARM_OPT_SET_CPU
1162 /* -mtune= overrides -mcpu= and -march=. */
1163 || i == ARM_OPT_SET_TUNE)
1164 arm_tune = (enum processor_type) (sel - ptr->processors);
1166 /* Remember the CPU associated with this architecture.
1167 If no other option is used to set the CPU type,
1168 we'll use this to guess the most suitable tuning
1169 options. */
1170 if (i == ARM_OPT_SET_ARCH)
1171 target_arch_cpu = sel->core;
1173 if (i == ARM_OPT_SET_CPU)
1174 selected_cpu = (enum processor_type) (sel - ptr->processors);
1176 if (i != ARM_OPT_SET_TUNE)
1178 /* If we have been given an architecture and a processor
1179 make sure that they are compatible. We only generate
1180 a warning though, and we prefer the CPU over the
1181 architecture. */
1182 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1183 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1184 ptr->string);
1186 insn_flags = sel->flags;
1189 break;
1192 if (sel->name == NULL)
1193 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1197 /* Guess the tuning options from the architecture if necessary. */
1198 if (arm_tune == arm_none)
1199 arm_tune = target_arch_cpu;
1201 /* If the user did not specify a processor, choose one for them. */
1202 if (insn_flags == 0)
1204 const struct processors * sel;
1205 unsigned int sought;
1207 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1208 if (selected_cpu == arm_none)
1210 #ifdef SUBTARGET_CPU_DEFAULT
1211 /* Use the subtarget default CPU if none was specified by
1212 configure. */
1213 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1214 #endif
1215 /* Default to ARM6. */
1216 if (selected_cpu == arm_none)
1217 selected_cpu = arm6;
1219 sel = &all_cores[selected_cpu];
1221 insn_flags = sel->flags;
1223 /* Now check to see if the user has specified some command line
1224 switch that require certain abilities from the cpu. */
1225 sought = 0;
1227 if (TARGET_INTERWORK || TARGET_THUMB)
1229 sought |= (FL_THUMB | FL_MODE32);
1231 /* There are no ARM processors that support both APCS-26 and
1232 interworking. Therefore we force FL_MODE26 to be removed
1233 from insn_flags here (if it was set), so that the search
1234 below will always be able to find a compatible processor. */
1235 insn_flags &= ~FL_MODE26;
1238 if (sought != 0 && ((sought & insn_flags) != sought))
1240 /* Try to locate a CPU type that supports all of the abilities
1241 of the default CPU, plus the extra abilities requested by
1242 the user. */
1243 for (sel = all_cores; sel->name != NULL; sel++)
1244 if ((sel->flags & sought) == (sought | insn_flags))
1245 break;
1247 if (sel->name == NULL)
1249 unsigned current_bit_count = 0;
1250 const struct processors * best_fit = NULL;
1252 /* Ideally we would like to issue an error message here
1253 saying that it was not possible to find a CPU compatible
1254 with the default CPU, but which also supports the command
1255 line options specified by the programmer, and so they
1256 ought to use the -mcpu=<name> command line option to
1257 override the default CPU type.
1259 If we cannot find a cpu that has both the
1260 characteristics of the default cpu and the given
1261 command line options we scan the array again looking
1262 for a best match. */
1263 for (sel = all_cores; sel->name != NULL; sel++)
1264 if ((sel->flags & sought) == sought)
1266 unsigned count;
1268 count = bit_count (sel->flags & insn_flags);
1270 if (count >= current_bit_count)
1272 best_fit = sel;
1273 current_bit_count = count;
1277 gcc_assert (best_fit);
1278 sel = best_fit;
1281 insn_flags = sel->flags;
1283 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1284 arm_default_cpu = (enum processor_type) (sel - all_cores);
1285 if (arm_tune == arm_none)
1286 arm_tune = arm_default_cpu;
1289 /* The processor for which we should tune should now have been
1290 chosen. */
1291 gcc_assert (arm_tune != arm_none);
1293 tune_flags = all_cores[(int)arm_tune].flags;
1295 if (target_abi_name)
1297 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1299 if (streq (arm_all_abis[i].name, target_abi_name))
1301 arm_abi = arm_all_abis[i].abi_type;
1302 break;
1305 if (i == ARRAY_SIZE (arm_all_abis))
1306 error ("invalid ABI option: -mabi=%s", target_abi_name);
1308 else
1309 arm_abi = ARM_DEFAULT_ABI;
1311 /* Make sure that the processor choice does not conflict with any of the
1312 other command line choices. */
1313 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1314 error ("target CPU does not support ARM mode");
1316 /* BPABI targets use linker tricks to allow interworking on cores
1317 without thumb support. */
1318 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1320 warning (0, "target CPU does not support interworking" );
1321 target_flags &= ~MASK_INTERWORK;
1324 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1326 warning (0, "target CPU does not support THUMB instructions");
1327 target_flags &= ~MASK_THUMB;
1330 if (TARGET_APCS_FRAME && TARGET_THUMB)
1332 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1333 target_flags &= ~MASK_APCS_FRAME;
1336 /* Callee super interworking implies thumb interworking. Adding
1337 this to the flags here simplifies the logic elsewhere. */
1338 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1339 target_flags |= MASK_INTERWORK;
1341 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1342 from here where no function is being compiled currently. */
1343 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1344 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1346 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1347 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1349 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1350 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1352 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1354 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1355 target_flags |= MASK_APCS_FRAME;
1358 if (TARGET_POKE_FUNCTION_NAME)
1359 target_flags |= MASK_APCS_FRAME;
1361 if (TARGET_APCS_REENT && flag_pic)
1362 error ("-fpic and -mapcs-reent are incompatible");
1364 if (TARGET_APCS_REENT)
1365 warning (0, "APCS reentrant code not supported. Ignored");
1367 /* If this target is normally configured to use APCS frames, warn if they
1368 are turned off and debugging is turned on. */
1369 if (TARGET_ARM
1370 && write_symbols != NO_DEBUG
1371 && !TARGET_APCS_FRAME
1372 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1373 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1375 if (TARGET_APCS_FLOAT)
1376 warning (0, "passing floating point arguments in fp regs not yet supported");
1378 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1379 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1380 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1381 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1382 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1383 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1384 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1385 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1386 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1387 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1388 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1389 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1391 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1392 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1393 thumb_code = (TARGET_ARM == 0);
1394 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1395 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1396 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1397 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1398 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1400 /* If we are not using the default (ARM mode) section anchor offset
1401 ranges, then set the correct ranges now. */
1402 if (TARGET_THUMB1)
1404 /* Thumb-1 LDR instructions cannot have negative offsets.
1405 Permissible positive offset ranges are 5-bit (for byte loads),
1406 6-bit (for halfword loads), or 7-bit (for word loads).
1407 Empirical results suggest a 7-bit anchor range gives the best
1408 overall code size. */
1409 targetm.min_anchor_offset = 0;
1410 targetm.max_anchor_offset = 127;
1412 else if (TARGET_THUMB2)
1414 /* The minimum is set such that the total size of the block
1415 for a particular anchor is 248 + 1 + 4095 bytes, which is
1416 divisible by eight, ensuring natural spacing of anchors. */
1417 targetm.min_anchor_offset = -248;
1418 targetm.max_anchor_offset = 4095;
1421 /* V5 code we generate is completely interworking capable, so we turn off
1422 TARGET_INTERWORK here to avoid many tests later on. */
1424 /* XXX However, we must pass the right pre-processor defines to CPP
1425 or GLD can get confused. This is a hack. */
1426 if (TARGET_INTERWORK)
1427 arm_cpp_interwork = 1;
1429 if (arm_arch5)
1430 target_flags &= ~MASK_INTERWORK;
1432 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1433 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1435 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1436 error ("iwmmxt abi requires an iwmmxt capable cpu");
1438 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1439 if (target_fpu_name == NULL && target_fpe_name != NULL)
1441 if (streq (target_fpe_name, "2"))
1442 target_fpu_name = "fpe2";
1443 else if (streq (target_fpe_name, "3"))
1444 target_fpu_name = "fpe3";
1445 else
1446 error ("invalid floating point emulation option: -mfpe=%s",
1447 target_fpe_name);
1449 if (target_fpu_name != NULL)
1451 /* The user specified a FPU. */
1452 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1454 if (streq (all_fpus[i].name, target_fpu_name))
1456 arm_fpu_arch = all_fpus[i].fpu;
1457 arm_fpu_tune = arm_fpu_arch;
1458 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1459 break;
1462 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1463 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1465 else
1467 #ifdef FPUTYPE_DEFAULT
1468 /* Use the default if it is specified for this platform. */
1469 arm_fpu_arch = FPUTYPE_DEFAULT;
1470 arm_fpu_tune = FPUTYPE_DEFAULT;
1471 #else
1472 /* Pick one based on CPU type. */
1473 /* ??? Some targets assume FPA is the default.
1474 if ((insn_flags & FL_VFP) != 0)
1475 arm_fpu_arch = FPUTYPE_VFP;
1476 else
1478 if (arm_arch_cirrus)
1479 arm_fpu_arch = FPUTYPE_MAVERICK;
1480 else
1481 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1482 #endif
1483 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1484 arm_fpu_tune = FPUTYPE_FPA;
1485 else
1486 arm_fpu_tune = arm_fpu_arch;
1487 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1488 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1491 if (target_float_abi_name != NULL)
1493 /* The user specified a FP ABI. */
1494 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1496 if (streq (all_float_abis[i].name, target_float_abi_name))
1498 arm_float_abi = all_float_abis[i].abi_type;
1499 break;
1502 if (i == ARRAY_SIZE (all_float_abis))
1503 error ("invalid floating point abi: -mfloat-abi=%s",
1504 target_float_abi_name);
1506 else
1507 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1509 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1510 sorry ("-mfloat-abi=hard and VFP");
1512 if (TARGET_AAPCS_BASED
1513 && (arm_fp_model == ARM_FP_MODEL_FPA))
1514 error ("FPA is unsupported in the AAPCS");
1516 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1517 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1518 will ever exist. GCC makes no attempt to support this combination. */
1519 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1520 sorry ("iWMMXt and hardware floating point");
1522 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1523 if (TARGET_THUMB2 && TARGET_IWMMXT)
1524 sorry ("Thumb-2 iWMMXt");
1526 /* If soft-float is specified then don't use FPU. */
1527 if (TARGET_SOFT_FLOAT)
1528 arm_fpu_arch = FPUTYPE_NONE;
1530 /* For arm2/3 there is no need to do any scheduling if there is only
1531 a floating point emulator, or we are doing software floating-point. */
1532 if ((TARGET_SOFT_FLOAT
1533 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1534 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1535 && (tune_flags & FL_MODE32) == 0)
1536 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1538 if (target_thread_switch)
1540 if (strcmp (target_thread_switch, "soft") == 0)
1541 target_thread_pointer = TP_SOFT;
1542 else if (strcmp (target_thread_switch, "auto") == 0)
1543 target_thread_pointer = TP_AUTO;
1544 else if (strcmp (target_thread_switch, "cp15") == 0)
1545 target_thread_pointer = TP_CP15;
1546 else
1547 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1550 /* Use the cp15 method if it is available. */
1551 if (target_thread_pointer == TP_AUTO)
1553 if (arm_arch6k && !TARGET_THUMB)
1554 target_thread_pointer = TP_CP15;
1555 else
1556 target_thread_pointer = TP_SOFT;
1559 if (TARGET_HARD_TP && TARGET_THUMB1)
1560 error ("can not use -mtp=cp15 with 16-bit Thumb");
1562 /* Override the default structure alignment for AAPCS ABI. */
1563 if (TARGET_AAPCS_BASED)
1564 arm_structure_size_boundary = 8;
1566 if (structure_size_string != NULL)
1568 int size = strtol (structure_size_string, NULL, 0);
1570 if (size == 8 || size == 32
1571 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1572 arm_structure_size_boundary = size;
1573 else
1574 warning (0, "structure size boundary can only be set to %s",
1575 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1578 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1580 error ("RTP PIC is incompatible with Thumb");
1581 flag_pic = 0;
1584 /* If stack checking is disabled, we can use r10 as the PIC register,
1585 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1586 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1588 if (TARGET_VXWORKS_RTP)
1589 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1590 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1593 if (flag_pic && TARGET_VXWORKS_RTP)
1594 arm_pic_register = 9;
1596 if (arm_pic_register_string != NULL)
1598 int pic_register = decode_reg_name (arm_pic_register_string);
1600 if (!flag_pic)
1601 warning (0, "-mpic-register= is useless without -fpic");
1603 /* Prevent the user from choosing an obviously stupid PIC register. */
1604 else if (pic_register < 0 || call_used_regs[pic_register]
1605 || pic_register == HARD_FRAME_POINTER_REGNUM
1606 || pic_register == STACK_POINTER_REGNUM
1607 || pic_register >= PC_REGNUM
1608 || (TARGET_VXWORKS_RTP
1609 && (unsigned int) pic_register != arm_pic_register))
1610 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1611 else
1612 arm_pic_register = pic_register;
1615 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1616 if (fix_cm3_ldrd == 2)
1618 if (selected_cpu == cortexm3)
1619 fix_cm3_ldrd = 1;
1620 else
1621 fix_cm3_ldrd = 0;
1624 /* ??? We might want scheduling for thumb2. */
1625 if (TARGET_THUMB && flag_schedule_insns)
1627 /* Don't warn since it's on by default in -O2. */
1628 flag_schedule_insns = 0;
1631 if (optimize_size)
1633 arm_constant_limit = 1;
1635 /* If optimizing for size, bump the number of instructions that we
1636 are prepared to conditionally execute (even on a StrongARM). */
1637 max_insns_skipped = 6;
1639 else
1641 /* For processors with load scheduling, it never costs more than
1642 2 cycles to load a constant, and the load scheduler may well
1643 reduce that to 1. */
1644 if (arm_ld_sched)
1645 arm_constant_limit = 1;
1647 /* On XScale the longer latency of a load makes it more difficult
1648 to achieve a good schedule, so it's faster to synthesize
1649 constants that can be done in two insns. */
1650 if (arm_tune_xscale)
1651 arm_constant_limit = 2;
1653 /* StrongARM has early execution of branches, so a sequence
1654 that is worth skipping is shorter. */
1655 if (arm_tune_strongarm)
1656 max_insns_skipped = 3;
1659 /* Register global variables with the garbage collector. */
1660 arm_add_gc_roots ();
1663 static void
1664 arm_add_gc_roots (void)
1666 gcc_obstack_init(&minipool_obstack);
1667 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1670 /* A table of known ARM exception types.
1671 For use with the interrupt function attribute. */
1673 typedef struct
1675 const char *const arg;
1676 const unsigned long return_value;
1678 isr_attribute_arg;
1680 static const isr_attribute_arg isr_attribute_args [] =
1682 { "IRQ", ARM_FT_ISR },
1683 { "irq", ARM_FT_ISR },
1684 { "FIQ", ARM_FT_FIQ },
1685 { "fiq", ARM_FT_FIQ },
1686 { "ABORT", ARM_FT_ISR },
1687 { "abort", ARM_FT_ISR },
1688 { "ABORT", ARM_FT_ISR },
1689 { "abort", ARM_FT_ISR },
1690 { "UNDEF", ARM_FT_EXCEPTION },
1691 { "undef", ARM_FT_EXCEPTION },
1692 { "SWI", ARM_FT_EXCEPTION },
1693 { "swi", ARM_FT_EXCEPTION },
1694 { NULL, ARM_FT_NORMAL }
1697 /* Returns the (interrupt) function type of the current
1698 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1700 static unsigned long
1701 arm_isr_value (tree argument)
1703 const isr_attribute_arg * ptr;
1704 const char * arg;
1706 if (!arm_arch_notm)
1707 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1709 /* No argument - default to IRQ. */
1710 if (argument == NULL_TREE)
1711 return ARM_FT_ISR;
1713 /* Get the value of the argument. */
1714 if (TREE_VALUE (argument) == NULL_TREE
1715 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1716 return ARM_FT_UNKNOWN;
1718 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1720 /* Check it against the list of known arguments. */
1721 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1722 if (streq (arg, ptr->arg))
1723 return ptr->return_value;
1725 /* An unrecognized interrupt type. */
1726 return ARM_FT_UNKNOWN;
1729 /* Computes the type of the current function. */
1731 static unsigned long
1732 arm_compute_func_type (void)
1734 unsigned long type = ARM_FT_UNKNOWN;
1735 tree a;
1736 tree attr;
1738 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1740 /* Decide if the current function is volatile. Such functions
1741 never return, and many memory cycles can be saved by not storing
1742 register values that will never be needed again. This optimization
1743 was added to speed up context switching in a kernel application. */
1744 if (optimize > 0
1745 && (TREE_NOTHROW (current_function_decl)
1746 || !(flag_unwind_tables
1747 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1748 && TREE_THIS_VOLATILE (current_function_decl))
1749 type |= ARM_FT_VOLATILE;
1751 if (cfun->static_chain_decl != NULL)
1752 type |= ARM_FT_NESTED;
1754 attr = DECL_ATTRIBUTES (current_function_decl);
1756 a = lookup_attribute ("naked", attr);
1757 if (a != NULL_TREE)
1758 type |= ARM_FT_NAKED;
1760 a = lookup_attribute ("isr", attr);
1761 if (a == NULL_TREE)
1762 a = lookup_attribute ("interrupt", attr);
1764 if (a == NULL_TREE)
1765 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1766 else
1767 type |= arm_isr_value (TREE_VALUE (a));
1769 return type;
1772 /* Returns the type of the current function. */
1774 unsigned long
1775 arm_current_func_type (void)
1777 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1778 cfun->machine->func_type = arm_compute_func_type ();
1780 return cfun->machine->func_type;
1783 bool
1784 arm_allocate_stack_slots_for_args (void)
1786 /* Naked functions should not allocate stack slots for arguments. */
1787 return !IS_NAKED (arm_current_func_type ());
1791 /* Return 1 if it is possible to return using a single instruction.
1792 If SIBLING is non-null, this is a test for a return before a sibling
1793 call. SIBLING is the call insn, so we can examine its register usage. */
1796 use_return_insn (int iscond, rtx sibling)
1798 int regno;
1799 unsigned int func_type;
1800 unsigned long saved_int_regs;
1801 unsigned HOST_WIDE_INT stack_adjust;
1802 arm_stack_offsets *offsets;
1804 /* Never use a return instruction before reload has run. */
1805 if (!reload_completed)
1806 return 0;
1808 func_type = arm_current_func_type ();
1810 /* Naked, volatile and stack alignment functions need special
1811 consideration. */
1812 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1813 return 0;
1815 /* So do interrupt functions that use the frame pointer and Thumb
1816 interrupt functions. */
1817 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1818 return 0;
1820 offsets = arm_get_frame_offsets ();
1821 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1823 /* As do variadic functions. */
1824 if (crtl->args.pretend_args_size
1825 || cfun->machine->uses_anonymous_args
1826 /* Or if the function calls __builtin_eh_return () */
1827 || crtl->calls_eh_return
1828 /* Or if the function calls alloca */
1829 || cfun->calls_alloca
1830 /* Or if there is a stack adjustment. However, if the stack pointer
1831 is saved on the stack, we can use a pre-incrementing stack load. */
1832 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1833 && stack_adjust == 4)))
1834 return 0;
1836 saved_int_regs = offsets->saved_regs_mask;
1838 /* Unfortunately, the insn
1840 ldmib sp, {..., sp, ...}
1842 triggers a bug on most SA-110 based devices, such that the stack
1843 pointer won't be correctly restored if the instruction takes a
1844 page fault. We work around this problem by popping r3 along with
1845 the other registers, since that is never slower than executing
1846 another instruction.
1848 We test for !arm_arch5 here, because code for any architecture
1849 less than this could potentially be run on one of the buggy
1850 chips. */
1851 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1853 /* Validate that r3 is a call-clobbered register (always true in
1854 the default abi) ... */
1855 if (!call_used_regs[3])
1856 return 0;
1858 /* ... that it isn't being used for a return value ... */
1859 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1860 return 0;
1862 /* ... or for a tail-call argument ... */
1863 if (sibling)
1865 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1867 if (find_regno_fusage (sibling, USE, 3))
1868 return 0;
1871 /* ... and that there are no call-saved registers in r0-r2
1872 (always true in the default ABI). */
1873 if (saved_int_regs & 0x7)
1874 return 0;
1877 /* Can't be done if interworking with Thumb, and any registers have been
1878 stacked. */
1879 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1880 return 0;
1882 /* On StrongARM, conditional returns are expensive if they aren't
1883 taken and multiple registers have been stacked. */
1884 if (iscond && arm_tune_strongarm)
1886 /* Conditional return when just the LR is stored is a simple
1887 conditional-load instruction, that's not expensive. */
1888 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1889 return 0;
1891 if (flag_pic
1892 && arm_pic_register != INVALID_REGNUM
1893 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1894 return 0;
1897 /* If there are saved registers but the LR isn't saved, then we need
1898 two instructions for the return. */
1899 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1900 return 0;
1902 /* Can't be done if any of the FPA regs are pushed,
1903 since this also requires an insn. */
1904 if (TARGET_HARD_FLOAT && TARGET_FPA)
1905 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1906 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1907 return 0;
1909 /* Likewise VFP regs. */
1910 if (TARGET_HARD_FLOAT && TARGET_VFP)
1911 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1912 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1913 return 0;
1915 if (TARGET_REALLY_IWMMXT)
1916 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1917 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1918 return 0;
1920 return 1;
1923 /* Return TRUE if int I is a valid immediate ARM constant. */
1926 const_ok_for_arm (HOST_WIDE_INT i)
1928 int lowbit;
1930 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1931 be all zero, or all one. */
1932 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1933 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1934 != ((~(unsigned HOST_WIDE_INT) 0)
1935 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1936 return FALSE;
1938 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1940 /* Fast return for 0 and small values. We must do this for zero, since
1941 the code below can't handle that one case. */
1942 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1943 return TRUE;
1945 /* Get the number of trailing zeros. */
1946 lowbit = ffs((int) i) - 1;
1948 /* Only even shifts are allowed in ARM mode so round down to the
1949 nearest even number. */
1950 if (TARGET_ARM)
1951 lowbit &= ~1;
1953 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1954 return TRUE;
1956 if (TARGET_ARM)
1958 /* Allow rotated constants in ARM mode. */
1959 if (lowbit <= 4
1960 && ((i & ~0xc000003f) == 0
1961 || (i & ~0xf000000f) == 0
1962 || (i & ~0xfc000003) == 0))
1963 return TRUE;
1965 else
1967 HOST_WIDE_INT v;
1969 /* Allow repeated pattern. */
1970 v = i & 0xff;
1971 v |= v << 16;
1972 if (i == v || i == (v | (v << 8)))
1973 return TRUE;
1976 return FALSE;
1979 /* Return true if I is a valid constant for the operation CODE. */
1980 static int
1981 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1983 if (const_ok_for_arm (i))
1984 return 1;
1986 switch (code)
1988 case PLUS:
1989 case COMPARE:
1990 case EQ:
1991 case NE:
1992 case GT:
1993 case LE:
1994 case LT:
1995 case GE:
1996 case GEU:
1997 case LTU:
1998 case GTU:
1999 case LEU:
2000 case UNORDERED:
2001 case ORDERED:
2002 case UNEQ:
2003 case UNGE:
2004 case UNLT:
2005 case UNGT:
2006 case UNLE:
2007 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2009 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2010 case XOR:
2011 case IOR:
2012 return 0;
2014 case AND:
2015 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2017 default:
2018 gcc_unreachable ();
2022 /* Emit a sequence of insns to handle a large constant.
2023 CODE is the code of the operation required, it can be any of SET, PLUS,
2024 IOR, AND, XOR, MINUS;
2025 MODE is the mode in which the operation is being performed;
2026 VAL is the integer to operate on;
2027 SOURCE is the other operand (a register, or a null-pointer for SET);
2028 SUBTARGETS means it is safe to create scratch registers if that will
2029 either produce a simpler sequence, or we will want to cse the values.
2030 Return value is the number of insns emitted. */
2032 /* ??? Tweak this for thumb2. */
2034 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2035 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2037 rtx cond;
2039 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2040 cond = COND_EXEC_TEST (PATTERN (insn));
2041 else
2042 cond = NULL_RTX;
2044 if (subtargets || code == SET
2045 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2046 && REGNO (target) != REGNO (source)))
2048 /* After arm_reorg has been called, we can't fix up expensive
2049 constants by pushing them into memory so we must synthesize
2050 them in-line, regardless of the cost. This is only likely to
2051 be more costly on chips that have load delay slots and we are
2052 compiling without running the scheduler (so no splitting
2053 occurred before the final instruction emission).
2055 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2057 if (!after_arm_reorg
2058 && !cond
2059 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2060 1, 0)
2061 > arm_constant_limit + (code != SET)))
2063 if (code == SET)
2065 /* Currently SET is the only monadic value for CODE, all
2066 the rest are diadic. */
2067 if (TARGET_USE_MOVT)
2068 arm_emit_movpair (target, GEN_INT (val));
2069 else
2070 emit_set_insn (target, GEN_INT (val));
2072 return 1;
2074 else
2076 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2078 if (TARGET_USE_MOVT)
2079 arm_emit_movpair (temp, GEN_INT (val));
2080 else
2081 emit_set_insn (temp, GEN_INT (val));
2083 /* For MINUS, the value is subtracted from, since we never
2084 have subtraction of a constant. */
2085 if (code == MINUS)
2086 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2087 else
2088 emit_set_insn (target,
2089 gen_rtx_fmt_ee (code, mode, source, temp));
2090 return 2;
2095 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2099 /* Return the number of ARM instructions required to synthesize the given
2100 constant. */
2101 static int
2102 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2104 HOST_WIDE_INT temp1;
2105 int num_insns = 0;
2108 int end;
2110 if (i <= 0)
2111 i += 32;
2112 if (remainder & (3 << (i - 2)))
2114 end = i - 8;
2115 if (end < 0)
2116 end += 32;
2117 temp1 = remainder & ((0x0ff << end)
2118 | ((i < end) ? (0xff >> (32 - end)) : 0));
2119 remainder &= ~temp1;
2120 num_insns++;
2121 i -= 6;
2123 i -= 2;
2124 } while (remainder);
2125 return num_insns;
2128 /* Emit an instruction with the indicated PATTERN. If COND is
2129 non-NULL, conditionalize the execution of the instruction on COND
2130 being true. */
2132 static void
2133 emit_constant_insn (rtx cond, rtx pattern)
2135 if (cond)
2136 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2137 emit_insn (pattern);
2140 /* As above, but extra parameter GENERATE which, if clear, suppresses
2141 RTL generation. */
2142 /* ??? This needs more work for thumb2. */
2144 static int
2145 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2146 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2147 int generate)
2149 int can_invert = 0;
2150 int can_negate = 0;
2151 int can_negate_initial = 0;
2152 int can_shift = 0;
2153 int i;
2154 int num_bits_set = 0;
2155 int set_sign_bit_copies = 0;
2156 int clear_sign_bit_copies = 0;
2157 int clear_zero_bit_copies = 0;
2158 int set_zero_bit_copies = 0;
2159 int insns = 0;
2160 unsigned HOST_WIDE_INT temp1, temp2;
2161 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2163 /* Find out which operations are safe for a given CODE. Also do a quick
2164 check for degenerate cases; these can occur when DImode operations
2165 are split. */
2166 switch (code)
2168 case SET:
2169 can_invert = 1;
2170 can_shift = 1;
2171 can_negate = 1;
2172 break;
2174 case PLUS:
2175 can_negate = 1;
2176 can_negate_initial = 1;
2177 break;
2179 case IOR:
2180 if (remainder == 0xffffffff)
2182 if (generate)
2183 emit_constant_insn (cond,
2184 gen_rtx_SET (VOIDmode, target,
2185 GEN_INT (ARM_SIGN_EXTEND (val))));
2186 return 1;
2188 if (remainder == 0)
2190 if (reload_completed && rtx_equal_p (target, source))
2191 return 0;
2192 if (generate)
2193 emit_constant_insn (cond,
2194 gen_rtx_SET (VOIDmode, target, source));
2195 return 1;
2197 break;
2199 case AND:
2200 if (remainder == 0)
2202 if (generate)
2203 emit_constant_insn (cond,
2204 gen_rtx_SET (VOIDmode, target, const0_rtx));
2205 return 1;
2207 if (remainder == 0xffffffff)
2209 if (reload_completed && rtx_equal_p (target, source))
2210 return 0;
2211 if (generate)
2212 emit_constant_insn (cond,
2213 gen_rtx_SET (VOIDmode, target, source));
2214 return 1;
2216 can_invert = 1;
2217 break;
2219 case XOR:
2220 if (remainder == 0)
2222 if (reload_completed && rtx_equal_p (target, source))
2223 return 0;
2224 if (generate)
2225 emit_constant_insn (cond,
2226 gen_rtx_SET (VOIDmode, target, source));
2227 return 1;
2230 /* We don't know how to handle other cases yet. */
2231 gcc_assert (remainder == 0xffffffff);
2233 if (generate)
2234 emit_constant_insn (cond,
2235 gen_rtx_SET (VOIDmode, target,
2236 gen_rtx_NOT (mode, source)));
2237 return 1;
2239 case MINUS:
2240 /* We treat MINUS as (val - source), since (source - val) is always
2241 passed as (source + (-val)). */
2242 if (remainder == 0)
2244 if (generate)
2245 emit_constant_insn (cond,
2246 gen_rtx_SET (VOIDmode, target,
2247 gen_rtx_NEG (mode, source)));
2248 return 1;
2250 if (const_ok_for_arm (val))
2252 if (generate)
2253 emit_constant_insn (cond,
2254 gen_rtx_SET (VOIDmode, target,
2255 gen_rtx_MINUS (mode, GEN_INT (val),
2256 source)));
2257 return 1;
2259 can_negate = 1;
2261 break;
2263 default:
2264 gcc_unreachable ();
2267 /* If we can do it in one insn get out quickly. */
2268 if (const_ok_for_arm (val)
2269 || (can_negate_initial && const_ok_for_arm (-val))
2270 || (can_invert && const_ok_for_arm (~val)))
2272 if (generate)
2273 emit_constant_insn (cond,
2274 gen_rtx_SET (VOIDmode, target,
2275 (source
2276 ? gen_rtx_fmt_ee (code, mode, source,
2277 GEN_INT (val))
2278 : GEN_INT (val))));
2279 return 1;
2282 /* Calculate a few attributes that may be useful for specific
2283 optimizations. */
2284 for (i = 31; i >= 0; i--)
2286 if ((remainder & (1 << i)) == 0)
2287 clear_sign_bit_copies++;
2288 else
2289 break;
2292 for (i = 31; i >= 0; i--)
2294 if ((remainder & (1 << i)) != 0)
2295 set_sign_bit_copies++;
2296 else
2297 break;
2300 for (i = 0; i <= 31; i++)
2302 if ((remainder & (1 << i)) == 0)
2303 clear_zero_bit_copies++;
2304 else
2305 break;
2308 for (i = 0; i <= 31; i++)
2310 if ((remainder & (1 << i)) != 0)
2311 set_zero_bit_copies++;
2312 else
2313 break;
2316 switch (code)
2318 case SET:
2319 /* See if we can use movw. */
2320 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2322 if (generate)
2323 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2324 GEN_INT (val)));
2325 return 1;
2328 /* See if we can do this by sign_extending a constant that is known
2329 to be negative. This is a good, way of doing it, since the shift
2330 may well merge into a subsequent insn. */
2331 if (set_sign_bit_copies > 1)
2333 if (const_ok_for_arm
2334 (temp1 = ARM_SIGN_EXTEND (remainder
2335 << (set_sign_bit_copies - 1))))
2337 if (generate)
2339 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2340 emit_constant_insn (cond,
2341 gen_rtx_SET (VOIDmode, new_src,
2342 GEN_INT (temp1)));
2343 emit_constant_insn (cond,
2344 gen_ashrsi3 (target, new_src,
2345 GEN_INT (set_sign_bit_copies - 1)));
2347 return 2;
2349 /* For an inverted constant, we will need to set the low bits,
2350 these will be shifted out of harm's way. */
2351 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2352 if (const_ok_for_arm (~temp1))
2354 if (generate)
2356 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2357 emit_constant_insn (cond,
2358 gen_rtx_SET (VOIDmode, new_src,
2359 GEN_INT (temp1)));
2360 emit_constant_insn (cond,
2361 gen_ashrsi3 (target, new_src,
2362 GEN_INT (set_sign_bit_copies - 1)));
2364 return 2;
2368 /* See if we can calculate the value as the difference between two
2369 valid immediates. */
2370 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2372 int topshift = clear_sign_bit_copies & ~1;
2374 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2375 & (0xff000000 >> topshift));
2377 /* If temp1 is zero, then that means the 9 most significant
2378 bits of remainder were 1 and we've caused it to overflow.
2379 When topshift is 0 we don't need to do anything since we
2380 can borrow from 'bit 32'. */
2381 if (temp1 == 0 && topshift != 0)
2382 temp1 = 0x80000000 >> (topshift - 1);
2384 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2386 if (const_ok_for_arm (temp2))
2388 if (generate)
2390 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2391 emit_constant_insn (cond,
2392 gen_rtx_SET (VOIDmode, new_src,
2393 GEN_INT (temp1)));
2394 emit_constant_insn (cond,
2395 gen_addsi3 (target, new_src,
2396 GEN_INT (-temp2)));
2399 return 2;
2403 /* See if we can generate this by setting the bottom (or the top)
2404 16 bits, and then shifting these into the other half of the
2405 word. We only look for the simplest cases, to do more would cost
2406 too much. Be careful, however, not to generate this when the
2407 alternative would take fewer insns. */
2408 if (val & 0xffff0000)
2410 temp1 = remainder & 0xffff0000;
2411 temp2 = remainder & 0x0000ffff;
2413 /* Overlaps outside this range are best done using other methods. */
2414 for (i = 9; i < 24; i++)
2416 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2417 && !const_ok_for_arm (temp2))
2419 rtx new_src = (subtargets
2420 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2421 : target);
2422 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2423 source, subtargets, generate);
2424 source = new_src;
2425 if (generate)
2426 emit_constant_insn
2427 (cond,
2428 gen_rtx_SET
2429 (VOIDmode, target,
2430 gen_rtx_IOR (mode,
2431 gen_rtx_ASHIFT (mode, source,
2432 GEN_INT (i)),
2433 source)));
2434 return insns + 1;
2438 /* Don't duplicate cases already considered. */
2439 for (i = 17; i < 24; i++)
2441 if (((temp1 | (temp1 >> i)) == remainder)
2442 && !const_ok_for_arm (temp1))
2444 rtx new_src = (subtargets
2445 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2446 : target);
2447 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2448 source, subtargets, generate);
2449 source = new_src;
2450 if (generate)
2451 emit_constant_insn
2452 (cond,
2453 gen_rtx_SET (VOIDmode, target,
2454 gen_rtx_IOR
2455 (mode,
2456 gen_rtx_LSHIFTRT (mode, source,
2457 GEN_INT (i)),
2458 source)));
2459 return insns + 1;
2463 break;
2465 case IOR:
2466 case XOR:
2467 /* If we have IOR or XOR, and the constant can be loaded in a
2468 single instruction, and we can find a temporary to put it in,
2469 then this can be done in two instructions instead of 3-4. */
2470 if (subtargets
2471 /* TARGET can't be NULL if SUBTARGETS is 0 */
2472 || (reload_completed && !reg_mentioned_p (target, source)))
2474 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2476 if (generate)
2478 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2480 emit_constant_insn (cond,
2481 gen_rtx_SET (VOIDmode, sub,
2482 GEN_INT (val)));
2483 emit_constant_insn (cond,
2484 gen_rtx_SET (VOIDmode, target,
2485 gen_rtx_fmt_ee (code, mode,
2486 source, sub)));
2488 return 2;
2492 if (code == XOR)
2493 break;
2495 if (set_sign_bit_copies > 8
2496 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2498 if (generate)
2500 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2501 rtx shift = GEN_INT (set_sign_bit_copies);
2503 emit_constant_insn
2504 (cond,
2505 gen_rtx_SET (VOIDmode, sub,
2506 gen_rtx_NOT (mode,
2507 gen_rtx_ASHIFT (mode,
2508 source,
2509 shift))));
2510 emit_constant_insn
2511 (cond,
2512 gen_rtx_SET (VOIDmode, target,
2513 gen_rtx_NOT (mode,
2514 gen_rtx_LSHIFTRT (mode, sub,
2515 shift))));
2517 return 2;
2520 if (set_zero_bit_copies > 8
2521 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2523 if (generate)
2525 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2526 rtx shift = GEN_INT (set_zero_bit_copies);
2528 emit_constant_insn
2529 (cond,
2530 gen_rtx_SET (VOIDmode, sub,
2531 gen_rtx_NOT (mode,
2532 gen_rtx_LSHIFTRT (mode,
2533 source,
2534 shift))));
2535 emit_constant_insn
2536 (cond,
2537 gen_rtx_SET (VOIDmode, target,
2538 gen_rtx_NOT (mode,
2539 gen_rtx_ASHIFT (mode, sub,
2540 shift))));
2542 return 2;
2545 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2547 if (generate)
2549 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2550 emit_constant_insn (cond,
2551 gen_rtx_SET (VOIDmode, sub,
2552 gen_rtx_NOT (mode, source)));
2553 source = sub;
2554 if (subtargets)
2555 sub = gen_reg_rtx (mode);
2556 emit_constant_insn (cond,
2557 gen_rtx_SET (VOIDmode, sub,
2558 gen_rtx_AND (mode, source,
2559 GEN_INT (temp1))));
2560 emit_constant_insn (cond,
2561 gen_rtx_SET (VOIDmode, target,
2562 gen_rtx_NOT (mode, sub)));
2564 return 3;
2566 break;
2568 case AND:
2569 /* See if two shifts will do 2 or more insn's worth of work. */
2570 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2572 HOST_WIDE_INT shift_mask = ((0xffffffff
2573 << (32 - clear_sign_bit_copies))
2574 & 0xffffffff);
2576 if ((remainder | shift_mask) != 0xffffffff)
2578 if (generate)
2580 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2581 insns = arm_gen_constant (AND, mode, cond,
2582 remainder | shift_mask,
2583 new_src, source, subtargets, 1);
2584 source = new_src;
2586 else
2588 rtx targ = subtargets ? NULL_RTX : target;
2589 insns = arm_gen_constant (AND, mode, cond,
2590 remainder | shift_mask,
2591 targ, source, subtargets, 0);
2595 if (generate)
2597 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2598 rtx shift = GEN_INT (clear_sign_bit_copies);
2600 emit_insn (gen_ashlsi3 (new_src, source, shift));
2601 emit_insn (gen_lshrsi3 (target, new_src, shift));
2604 return insns + 2;
2607 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2609 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2611 if ((remainder | shift_mask) != 0xffffffff)
2613 if (generate)
2615 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2617 insns = arm_gen_constant (AND, mode, cond,
2618 remainder | shift_mask,
2619 new_src, source, subtargets, 1);
2620 source = new_src;
2622 else
2624 rtx targ = subtargets ? NULL_RTX : target;
2626 insns = arm_gen_constant (AND, mode, cond,
2627 remainder | shift_mask,
2628 targ, source, subtargets, 0);
2632 if (generate)
2634 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2635 rtx shift = GEN_INT (clear_zero_bit_copies);
2637 emit_insn (gen_lshrsi3 (new_src, source, shift));
2638 emit_insn (gen_ashlsi3 (target, new_src, shift));
2641 return insns + 2;
2644 break;
2646 default:
2647 break;
2650 for (i = 0; i < 32; i++)
2651 if (remainder & (1 << i))
2652 num_bits_set++;
2654 if (code == AND || (can_invert && num_bits_set > 16))
2655 remainder = (~remainder) & 0xffffffff;
2656 else if (code == PLUS && num_bits_set > 16)
2657 remainder = (-remainder) & 0xffffffff;
2658 else
2660 can_invert = 0;
2661 can_negate = 0;
2664 /* Now try and find a way of doing the job in either two or three
2665 instructions.
2666 We start by looking for the largest block of zeros that are aligned on
2667 a 2-bit boundary, we then fill up the temps, wrapping around to the
2668 top of the word when we drop off the bottom.
2669 In the worst case this code should produce no more than four insns.
2670 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2671 best place to start. */
2673 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2674 the same. */
2676 int best_start = 0;
2677 if (!TARGET_THUMB2)
2679 int best_consecutive_zeros = 0;
2681 for (i = 0; i < 32; i += 2)
2683 int consecutive_zeros = 0;
2685 if (!(remainder & (3 << i)))
2687 while ((i < 32) && !(remainder & (3 << i)))
2689 consecutive_zeros += 2;
2690 i += 2;
2692 if (consecutive_zeros > best_consecutive_zeros)
2694 best_consecutive_zeros = consecutive_zeros;
2695 best_start = i - consecutive_zeros;
2697 i -= 2;
2701 /* So long as it won't require any more insns to do so, it's
2702 desirable to emit a small constant (in bits 0...9) in the last
2703 insn. This way there is more chance that it can be combined with
2704 a later addressing insn to form a pre-indexed load or store
2705 operation. Consider:
2707 *((volatile int *)0xe0000100) = 1;
2708 *((volatile int *)0xe0000110) = 2;
2710 We want this to wind up as:
2712 mov rA, #0xe0000000
2713 mov rB, #1
2714 str rB, [rA, #0x100]
2715 mov rB, #2
2716 str rB, [rA, #0x110]
2718 rather than having to synthesize both large constants from scratch.
2720 Therefore, we calculate how many insns would be required to emit
2721 the constant starting from `best_start', and also starting from
2722 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2723 yield a shorter sequence, we may as well use zero. */
2724 if (best_start != 0
2725 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2726 && (count_insns_for_constant (remainder, 0) <=
2727 count_insns_for_constant (remainder, best_start)))
2728 best_start = 0;
2731 /* Now start emitting the insns. */
2732 i = best_start;
2735 int end;
2737 if (i <= 0)
2738 i += 32;
2739 if (remainder & (3 << (i - 2)))
2741 end = i - 8;
2742 if (end < 0)
2743 end += 32;
2744 temp1 = remainder & ((0x0ff << end)
2745 | ((i < end) ? (0xff >> (32 - end)) : 0));
2746 remainder &= ~temp1;
2748 if (generate)
2750 rtx new_src, temp1_rtx;
2752 if (code == SET || code == MINUS)
2754 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2755 if (can_invert && code != MINUS)
2756 temp1 = ~temp1;
2758 else
2760 if (remainder && subtargets)
2761 new_src = gen_reg_rtx (mode);
2762 else
2763 new_src = target;
2764 if (can_invert)
2765 temp1 = ~temp1;
2766 else if (can_negate)
2767 temp1 = -temp1;
2770 temp1 = trunc_int_for_mode (temp1, mode);
2771 temp1_rtx = GEN_INT (temp1);
2773 if (code == SET)
2775 else if (code == MINUS)
2776 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2777 else
2778 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2780 emit_constant_insn (cond,
2781 gen_rtx_SET (VOIDmode, new_src,
2782 temp1_rtx));
2783 source = new_src;
2786 if (code == SET)
2788 can_invert = 0;
2789 code = PLUS;
2791 else if (code == MINUS)
2792 code = PLUS;
2794 insns++;
2795 if (TARGET_ARM)
2796 i -= 6;
2797 else
2798 i -= 7;
2800 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2801 shifts. */
2802 if (TARGET_ARM)
2803 i -= 2;
2804 else
2805 i--;
2807 while (remainder);
2810 return insns;
2813 /* Canonicalize a comparison so that we are more likely to recognize it.
2814 This can be done for a few constant compares, where we can make the
2815 immediate value easier to load. */
2817 enum rtx_code
2818 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2819 rtx * op1)
2821 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2822 unsigned HOST_WIDE_INT maxval;
2823 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2825 switch (code)
2827 case EQ:
2828 case NE:
2829 return code;
2831 case GT:
2832 case LE:
2833 if (i != maxval
2834 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2836 *op1 = GEN_INT (i + 1);
2837 return code == GT ? GE : LT;
2839 break;
2841 case GE:
2842 case LT:
2843 if (i != ~maxval
2844 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2846 *op1 = GEN_INT (i - 1);
2847 return code == GE ? GT : LE;
2849 break;
2851 case GTU:
2852 case LEU:
2853 if (i != ~((unsigned HOST_WIDE_INT) 0)
2854 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2856 *op1 = GEN_INT (i + 1);
2857 return code == GTU ? GEU : LTU;
2859 break;
2861 case GEU:
2862 case LTU:
2863 if (i != 0
2864 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2866 *op1 = GEN_INT (i - 1);
2867 return code == GEU ? GTU : LEU;
2869 break;
2871 default:
2872 gcc_unreachable ();
2875 return code;
2879 /* Define how to find the value returned by a function. */
2882 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2884 enum machine_mode mode;
2885 int unsignedp ATTRIBUTE_UNUSED;
2886 rtx r ATTRIBUTE_UNUSED;
2888 mode = TYPE_MODE (type);
2889 /* Promote integer types. */
2890 if (INTEGRAL_TYPE_P (type))
2891 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2893 /* Promotes small structs returned in a register to full-word size
2894 for big-endian AAPCS. */
2895 if (arm_return_in_msb (type))
2897 HOST_WIDE_INT size = int_size_in_bytes (type);
2898 if (size % UNITS_PER_WORD != 0)
2900 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2901 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2905 return LIBCALL_VALUE(mode);
2908 /* Determine the amount of memory needed to store the possible return
2909 registers of an untyped call. */
2911 arm_apply_result_size (void)
2913 int size = 16;
2915 if (TARGET_ARM)
2917 if (TARGET_HARD_FLOAT_ABI)
2919 if (TARGET_FPA)
2920 size += 12;
2921 if (TARGET_MAVERICK)
2922 size += 8;
2924 if (TARGET_IWMMXT_ABI)
2925 size += 8;
2928 return size;
2931 /* Decide whether a type should be returned in memory (true)
2932 or in a register (false). This is called as the target hook
2933 TARGET_RETURN_IN_MEMORY. */
2934 static bool
2935 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2937 HOST_WIDE_INT size;
2939 size = int_size_in_bytes (type);
2941 /* Vector values should be returned using ARM registers, not memory (unless
2942 they're over 16 bytes, which will break since we only have four
2943 call-clobbered registers to play with). */
2944 if (TREE_CODE (type) == VECTOR_TYPE)
2945 return (size < 0 || size > (4 * UNITS_PER_WORD));
2947 if (!AGGREGATE_TYPE_P (type) &&
2948 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2949 /* All simple types are returned in registers.
2950 For AAPCS, complex types are treated the same as aggregates. */
2951 return 0;
2953 if (arm_abi != ARM_ABI_APCS)
2955 /* ATPCS and later return aggregate types in memory only if they are
2956 larger than a word (or are variable size). */
2957 return (size < 0 || size > UNITS_PER_WORD);
2960 /* For the arm-wince targets we choose to be compatible with Microsoft's
2961 ARM and Thumb compilers, which always return aggregates in memory. */
2962 #ifndef ARM_WINCE
2963 /* All structures/unions bigger than one word are returned in memory.
2964 Also catch the case where int_size_in_bytes returns -1. In this case
2965 the aggregate is either huge or of variable size, and in either case
2966 we will want to return it via memory and not in a register. */
2967 if (size < 0 || size > UNITS_PER_WORD)
2968 return 1;
2970 if (TREE_CODE (type) == RECORD_TYPE)
2972 tree field;
2974 /* For a struct the APCS says that we only return in a register
2975 if the type is 'integer like' and every addressable element
2976 has an offset of zero. For practical purposes this means
2977 that the structure can have at most one non bit-field element
2978 and that this element must be the first one in the structure. */
2980 /* Find the first field, ignoring non FIELD_DECL things which will
2981 have been created by C++. */
2982 for (field = TYPE_FIELDS (type);
2983 field && TREE_CODE (field) != FIELD_DECL;
2984 field = TREE_CHAIN (field))
2985 continue;
2987 if (field == NULL)
2988 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2990 /* Check that the first field is valid for returning in a register. */
2992 /* ... Floats are not allowed */
2993 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2994 return 1;
2996 /* ... Aggregates that are not themselves valid for returning in
2997 a register are not allowed. */
2998 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2999 return 1;
3001 /* Now check the remaining fields, if any. Only bitfields are allowed,
3002 since they are not addressable. */
3003 for (field = TREE_CHAIN (field);
3004 field;
3005 field = TREE_CHAIN (field))
3007 if (TREE_CODE (field) != FIELD_DECL)
3008 continue;
3010 if (!DECL_BIT_FIELD_TYPE (field))
3011 return 1;
3014 return 0;
3017 if (TREE_CODE (type) == UNION_TYPE)
3019 tree field;
3021 /* Unions can be returned in registers if every element is
3022 integral, or can be returned in an integer register. */
3023 for (field = TYPE_FIELDS (type);
3024 field;
3025 field = TREE_CHAIN (field))
3027 if (TREE_CODE (field) != FIELD_DECL)
3028 continue;
3030 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3031 return 1;
3033 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3034 return 1;
3037 return 0;
3039 #endif /* not ARM_WINCE */
3041 /* Return all other types in memory. */
3042 return 1;
3045 /* Indicate whether or not words of a double are in big-endian order. */
3048 arm_float_words_big_endian (void)
3050 if (TARGET_MAVERICK)
3051 return 0;
3053 /* For FPA, float words are always big-endian. For VFP, floats words
3054 follow the memory system mode. */
3056 if (TARGET_FPA)
3058 return 1;
3061 if (TARGET_VFP)
3062 return (TARGET_BIG_END ? 1 : 0);
3064 return 1;
3067 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3068 for a call to a function whose data type is FNTYPE.
3069 For a library call, FNTYPE is NULL. */
3070 void
3071 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
3072 rtx libname ATTRIBUTE_UNUSED,
3073 tree fndecl ATTRIBUTE_UNUSED)
3075 /* On the ARM, the offset starts at 0. */
3076 pcum->nregs = 0;
3077 pcum->iwmmxt_nregs = 0;
3078 pcum->can_split = true;
3080 /* Varargs vectors are treated the same as long long.
3081 named_count avoids having to change the way arm handles 'named' */
3082 pcum->named_count = 0;
3083 pcum->nargs = 0;
3085 if (TARGET_REALLY_IWMMXT && fntype)
3087 tree fn_arg;
3089 for (fn_arg = TYPE_ARG_TYPES (fntype);
3090 fn_arg;
3091 fn_arg = TREE_CHAIN (fn_arg))
3092 pcum->named_count += 1;
3094 if (! pcum->named_count)
3095 pcum->named_count = INT_MAX;
3100 /* Return true if mode/type need doubleword alignment. */
3101 bool
3102 arm_needs_doubleword_align (enum machine_mode mode, tree type)
3104 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
3105 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
3109 /* Determine where to put an argument to a function.
3110 Value is zero to push the argument on the stack,
3111 or a hard register in which to store the argument.
3113 MODE is the argument's machine mode.
3114 TYPE is the data type of the argument (as a tree).
3115 This is null for libcalls where that information may
3116 not be available.
3117 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3118 the preceding args and about the function being called.
3119 NAMED is nonzero if this argument is a named parameter
3120 (otherwise it is an extra parameter matching an ellipsis). */
3123 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3124 tree type, int named)
3126 int nregs;
3128 /* Varargs vectors are treated the same as long long.
3129 named_count avoids having to change the way arm handles 'named' */
3130 if (TARGET_IWMMXT_ABI
3131 && arm_vector_mode_supported_p (mode)
3132 && pcum->named_count > pcum->nargs + 1)
3134 if (pcum->iwmmxt_nregs <= 9)
3135 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
3136 else
3138 pcum->can_split = false;
3139 return NULL_RTX;
3143 /* Put doubleword aligned quantities in even register pairs. */
3144 if (pcum->nregs & 1
3145 && ARM_DOUBLEWORD_ALIGN
3146 && arm_needs_doubleword_align (mode, type))
3147 pcum->nregs++;
3149 if (mode == VOIDmode)
3150 /* Pick an arbitrary value for operand 2 of the call insn. */
3151 return const0_rtx;
3153 /* Only allow splitting an arg between regs and memory if all preceding
3154 args were allocated to regs. For args passed by reference we only count
3155 the reference pointer. */
3156 if (pcum->can_split)
3157 nregs = 1;
3158 else
3159 nregs = ARM_NUM_REGS2 (mode, type);
3161 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
3162 return NULL_RTX;
3164 return gen_rtx_REG (mode, pcum->nregs);
3167 static int
3168 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3169 tree type, bool named ATTRIBUTE_UNUSED)
3171 int nregs = pcum->nregs;
3173 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3174 return 0;
3176 if (NUM_ARG_REGS > nregs
3177 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3178 && pcum->can_split)
3179 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3181 return 0;
3184 /* Variable sized types are passed by reference. This is a GCC
3185 extension to the ARM ABI. */
3187 static bool
3188 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3189 enum machine_mode mode ATTRIBUTE_UNUSED,
3190 const_tree type, bool named ATTRIBUTE_UNUSED)
3192 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3195 /* Encode the current state of the #pragma [no_]long_calls. */
3196 typedef enum
3198 OFF, /* No #pragma [no_]long_calls is in effect. */
3199 LONG, /* #pragma long_calls is in effect. */
3200 SHORT /* #pragma no_long_calls is in effect. */
3201 } arm_pragma_enum;
3203 static arm_pragma_enum arm_pragma_long_calls = OFF;
3205 void
3206 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3208 arm_pragma_long_calls = LONG;
3211 void
3212 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3214 arm_pragma_long_calls = SHORT;
3217 void
3218 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3220 arm_pragma_long_calls = OFF;
3223 /* Table of machine attributes. */
3224 const struct attribute_spec arm_attribute_table[] =
3226 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3227 /* Function calls made to this symbol must be done indirectly, because
3228 it may lie outside of the 26 bit addressing range of a normal function
3229 call. */
3230 { "long_call", 0, 0, false, true, true, NULL },
3231 /* Whereas these functions are always known to reside within the 26 bit
3232 addressing range. */
3233 { "short_call", 0, 0, false, true, true, NULL },
3234 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3235 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3236 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3237 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3238 #ifdef ARM_PE
3239 /* ARM/PE has three new attributes:
3240 interfacearm - ?
3241 dllexport - for exporting a function/variable that will live in a dll
3242 dllimport - for importing a function/variable from a dll
3244 Microsoft allows multiple declspecs in one __declspec, separating
3245 them with spaces. We do NOT support this. Instead, use __declspec
3246 multiple times.
3248 { "dllimport", 0, 0, true, false, false, NULL },
3249 { "dllexport", 0, 0, true, false, false, NULL },
3250 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3251 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3252 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3253 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3254 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3255 #endif
3256 { NULL, 0, 0, false, false, false, NULL }
3259 /* Handle an attribute requiring a FUNCTION_DECL;
3260 arguments as in struct attribute_spec.handler. */
3261 static tree
3262 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3263 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3265 if (TREE_CODE (*node) != FUNCTION_DECL)
3267 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3268 name);
3269 *no_add_attrs = true;
3272 return NULL_TREE;
3275 /* Handle an "interrupt" or "isr" attribute;
3276 arguments as in struct attribute_spec.handler. */
3277 static tree
3278 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3279 bool *no_add_attrs)
3281 if (DECL_P (*node))
3283 if (TREE_CODE (*node) != FUNCTION_DECL)
3285 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3286 name);
3287 *no_add_attrs = true;
3289 /* FIXME: the argument if any is checked for type attributes;
3290 should it be checked for decl ones? */
3292 else
3294 if (TREE_CODE (*node) == FUNCTION_TYPE
3295 || TREE_CODE (*node) == METHOD_TYPE)
3297 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3299 warning (OPT_Wattributes, "%qE attribute ignored",
3300 name);
3301 *no_add_attrs = true;
3304 else if (TREE_CODE (*node) == POINTER_TYPE
3305 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3306 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3307 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3309 *node = build_variant_type_copy (*node);
3310 TREE_TYPE (*node) = build_type_attribute_variant
3311 (TREE_TYPE (*node),
3312 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3313 *no_add_attrs = true;
3315 else
3317 /* Possibly pass this attribute on from the type to a decl. */
3318 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3319 | (int) ATTR_FLAG_FUNCTION_NEXT
3320 | (int) ATTR_FLAG_ARRAY_NEXT))
3322 *no_add_attrs = true;
3323 return tree_cons (name, args, NULL_TREE);
3325 else
3327 warning (OPT_Wattributes, "%qE attribute ignored",
3328 name);
3333 return NULL_TREE;
3336 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3337 /* Handle the "notshared" attribute. This attribute is another way of
3338 requesting hidden visibility. ARM's compiler supports
3339 "__declspec(notshared)"; we support the same thing via an
3340 attribute. */
3342 static tree
3343 arm_handle_notshared_attribute (tree *node,
3344 tree name ATTRIBUTE_UNUSED,
3345 tree args ATTRIBUTE_UNUSED,
3346 int flags ATTRIBUTE_UNUSED,
3347 bool *no_add_attrs)
3349 tree decl = TYPE_NAME (*node);
3351 if (decl)
3353 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3354 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3355 *no_add_attrs = false;
3357 return NULL_TREE;
3359 #endif
3361 /* Return 0 if the attributes for two types are incompatible, 1 if they
3362 are compatible, and 2 if they are nearly compatible (which causes a
3363 warning to be generated). */
3364 static int
3365 arm_comp_type_attributes (const_tree type1, const_tree type2)
3367 int l1, l2, s1, s2;
3369 /* Check for mismatch of non-default calling convention. */
3370 if (TREE_CODE (type1) != FUNCTION_TYPE)
3371 return 1;
3373 /* Check for mismatched call attributes. */
3374 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3375 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3376 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3377 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3379 /* Only bother to check if an attribute is defined. */
3380 if (l1 | l2 | s1 | s2)
3382 /* If one type has an attribute, the other must have the same attribute. */
3383 if ((l1 != l2) || (s1 != s2))
3384 return 0;
3386 /* Disallow mixed attributes. */
3387 if ((l1 & s2) || (l2 & s1))
3388 return 0;
3391 /* Check for mismatched ISR attribute. */
3392 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3393 if (! l1)
3394 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3395 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3396 if (! l2)
3397 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3398 if (l1 != l2)
3399 return 0;
3401 return 1;
3404 /* Assigns default attributes to newly defined type. This is used to
3405 set short_call/long_call attributes for function types of
3406 functions defined inside corresponding #pragma scopes. */
3407 static void
3408 arm_set_default_type_attributes (tree type)
3410 /* Add __attribute__ ((long_call)) to all functions, when
3411 inside #pragma long_calls or __attribute__ ((short_call)),
3412 when inside #pragma no_long_calls. */
3413 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3415 tree type_attr_list, attr_name;
3416 type_attr_list = TYPE_ATTRIBUTES (type);
3418 if (arm_pragma_long_calls == LONG)
3419 attr_name = get_identifier ("long_call");
3420 else if (arm_pragma_long_calls == SHORT)
3421 attr_name = get_identifier ("short_call");
3422 else
3423 return;
3425 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3426 TYPE_ATTRIBUTES (type) = type_attr_list;
3430 /* Return true if DECL is known to be linked into section SECTION. */
3432 static bool
3433 arm_function_in_section_p (tree decl, section *section)
3435 /* We can only be certain about functions defined in the same
3436 compilation unit. */
3437 if (!TREE_STATIC (decl))
3438 return false;
3440 /* Make sure that SYMBOL always binds to the definition in this
3441 compilation unit. */
3442 if (!targetm.binds_local_p (decl))
3443 return false;
3445 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3446 if (!DECL_SECTION_NAME (decl))
3448 /* Make sure that we will not create a unique section for DECL. */
3449 if (flag_function_sections || DECL_ONE_ONLY (decl))
3450 return false;
3453 return function_section (decl) == section;
3456 /* Return nonzero if a 32-bit "long_call" should be generated for
3457 a call from the current function to DECL. We generate a long_call
3458 if the function:
3460 a. has an __attribute__((long call))
3461 or b. is within the scope of a #pragma long_calls
3462 or c. the -mlong-calls command line switch has been specified
3464 However we do not generate a long call if the function:
3466 d. has an __attribute__ ((short_call))
3467 or e. is inside the scope of a #pragma no_long_calls
3468 or f. is defined in the same section as the current function. */
3470 bool
3471 arm_is_long_call_p (tree decl)
3473 tree attrs;
3475 if (!decl)
3476 return TARGET_LONG_CALLS;
3478 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3479 if (lookup_attribute ("short_call", attrs))
3480 return false;
3482 /* For "f", be conservative, and only cater for cases in which the
3483 whole of the current function is placed in the same section. */
3484 if (!flag_reorder_blocks_and_partition
3485 && TREE_CODE (decl) == FUNCTION_DECL
3486 && arm_function_in_section_p (decl, current_function_section ()))
3487 return false;
3489 if (lookup_attribute ("long_call", attrs))
3490 return true;
3492 return TARGET_LONG_CALLS;
3495 /* Return nonzero if it is ok to make a tail-call to DECL. */
3496 static bool
3497 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3499 unsigned long func_type;
3501 if (cfun->machine->sibcall_blocked)
3502 return false;
3504 /* Never tailcall something for which we have no decl, or if we
3505 are in Thumb mode. */
3506 if (decl == NULL || TARGET_THUMB)
3507 return false;
3509 /* The PIC register is live on entry to VxWorks PLT entries, so we
3510 must make the call before restoring the PIC register. */
3511 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3512 return false;
3514 /* Cannot tail-call to long calls, since these are out of range of
3515 a branch instruction. */
3516 if (arm_is_long_call_p (decl))
3517 return false;
3519 /* If we are interworking and the function is not declared static
3520 then we can't tail-call it unless we know that it exists in this
3521 compilation unit (since it might be a Thumb routine). */
3522 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3523 return false;
3525 func_type = arm_current_func_type ();
3526 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3527 if (IS_INTERRUPT (func_type))
3528 return false;
3530 /* Never tailcall if function may be called with a misaligned SP. */
3531 if (IS_STACKALIGN (func_type))
3532 return false;
3534 /* Everything else is ok. */
3535 return true;
3539 /* Addressing mode support functions. */
3541 /* Return nonzero if X is a legitimate immediate operand when compiling
3542 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3544 legitimate_pic_operand_p (rtx x)
3546 if (GET_CODE (x) == SYMBOL_REF
3547 || (GET_CODE (x) == CONST
3548 && GET_CODE (XEXP (x, 0)) == PLUS
3549 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3550 return 0;
3552 return 1;
3555 /* Record that the current function needs a PIC register. Initialize
3556 cfun->machine->pic_reg if we have not already done so. */
3558 static void
3559 require_pic_register (void)
3561 /* A lot of the logic here is made obscure by the fact that this
3562 routine gets called as part of the rtx cost estimation process.
3563 We don't want those calls to affect any assumptions about the real
3564 function; and further, we can't call entry_of_function() until we
3565 start the real expansion process. */
3566 if (!crtl->uses_pic_offset_table)
3568 gcc_assert (can_create_pseudo_p ());
3569 if (arm_pic_register != INVALID_REGNUM)
3571 if (!cfun->machine->pic_reg)
3572 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3574 /* Play games to avoid marking the function as needing pic
3575 if we are being called as part of the cost-estimation
3576 process. */
3577 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3578 crtl->uses_pic_offset_table = 1;
3580 else
3582 rtx seq;
3584 if (!cfun->machine->pic_reg)
3585 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3587 /* Play games to avoid marking the function as needing pic
3588 if we are being called as part of the cost-estimation
3589 process. */
3590 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3592 crtl->uses_pic_offset_table = 1;
3593 start_sequence ();
3595 arm_load_pic_register (0UL);
3597 seq = get_insns ();
3598 end_sequence ();
3599 /* We can be called during expansion of PHI nodes, where
3600 we can't yet emit instructions directly in the final
3601 insn stream. Queue the insns on the entry edge, they will
3602 be committed after everything else is expanded. */
3603 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
3610 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3612 if (GET_CODE (orig) == SYMBOL_REF
3613 || GET_CODE (orig) == LABEL_REF)
3615 rtx pic_ref, address;
3616 rtx insn;
3617 int subregs = 0;
3619 /* If this function doesn't have a pic register, create one now. */
3620 require_pic_register ();
3622 if (reg == 0)
3624 gcc_assert (can_create_pseudo_p ());
3625 reg = gen_reg_rtx (Pmode);
3627 subregs = 1;
3630 if (subregs)
3631 address = gen_reg_rtx (Pmode);
3632 else
3633 address = reg;
3635 if (TARGET_ARM)
3636 emit_insn (gen_pic_load_addr_arm (address, orig));
3637 else if (TARGET_THUMB2)
3638 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3639 else /* TARGET_THUMB1 */
3640 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3642 /* VxWorks does not impose a fixed gap between segments; the run-time
3643 gap can be different from the object-file gap. We therefore can't
3644 use GOTOFF unless we are absolutely sure that the symbol is in the
3645 same segment as the GOT. Unfortunately, the flexibility of linker
3646 scripts means that we can't be sure of that in general, so assume
3647 that GOTOFF is never valid on VxWorks. */
3648 if ((GET_CODE (orig) == LABEL_REF
3649 || (GET_CODE (orig) == SYMBOL_REF &&
3650 SYMBOL_REF_LOCAL_P (orig)))
3651 && NEED_GOT_RELOC
3652 && !TARGET_VXWORKS_RTP)
3653 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3654 else
3656 pic_ref = gen_const_mem (Pmode,
3657 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3658 address));
3661 insn = emit_move_insn (reg, pic_ref);
3663 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3664 by loop. */
3665 set_unique_reg_note (insn, REG_EQUAL, orig);
3667 return reg;
3669 else if (GET_CODE (orig) == CONST)
3671 rtx base, offset;
3673 if (GET_CODE (XEXP (orig, 0)) == PLUS
3674 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3675 return orig;
3677 /* Handle the case where we have: const (UNSPEC_TLS). */
3678 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3679 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3680 return orig;
3682 /* Handle the case where we have:
3683 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
3684 CONST_INT. */
3685 if (GET_CODE (XEXP (orig, 0)) == PLUS
3686 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
3687 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
3689 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
3690 return orig;
3693 if (reg == 0)
3695 gcc_assert (can_create_pseudo_p ());
3696 reg = gen_reg_rtx (Pmode);
3699 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3701 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3702 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3703 base == reg ? 0 : reg);
3705 if (GET_CODE (offset) == CONST_INT)
3707 /* The base register doesn't really matter, we only want to
3708 test the index for the appropriate mode. */
3709 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3711 gcc_assert (can_create_pseudo_p ());
3712 offset = force_reg (Pmode, offset);
3715 if (GET_CODE (offset) == CONST_INT)
3716 return plus_constant (base, INTVAL (offset));
3719 if (GET_MODE_SIZE (mode) > 4
3720 && (GET_MODE_CLASS (mode) == MODE_INT
3721 || TARGET_SOFT_FLOAT))
3723 emit_insn (gen_addsi3 (reg, base, offset));
3724 return reg;
3727 return gen_rtx_PLUS (Pmode, base, offset);
3730 return orig;
3734 /* Find a spare register to use during the prolog of a function. */
3736 static int
3737 thumb_find_work_register (unsigned long pushed_regs_mask)
3739 int reg;
3741 /* Check the argument registers first as these are call-used. The
3742 register allocation order means that sometimes r3 might be used
3743 but earlier argument registers might not, so check them all. */
3744 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3745 if (!df_regs_ever_live_p (reg))
3746 return reg;
3748 /* Before going on to check the call-saved registers we can try a couple
3749 more ways of deducing that r3 is available. The first is when we are
3750 pushing anonymous arguments onto the stack and we have less than 4
3751 registers worth of fixed arguments(*). In this case r3 will be part of
3752 the variable argument list and so we can be sure that it will be
3753 pushed right at the start of the function. Hence it will be available
3754 for the rest of the prologue.
3755 (*): ie crtl->args.pretend_args_size is greater than 0. */
3756 if (cfun->machine->uses_anonymous_args
3757 && crtl->args.pretend_args_size > 0)
3758 return LAST_ARG_REGNUM;
3760 /* The other case is when we have fixed arguments but less than 4 registers
3761 worth. In this case r3 might be used in the body of the function, but
3762 it is not being used to convey an argument into the function. In theory
3763 we could just check crtl->args.size to see how many bytes are
3764 being passed in argument registers, but it seems that it is unreliable.
3765 Sometimes it will have the value 0 when in fact arguments are being
3766 passed. (See testcase execute/20021111-1.c for an example). So we also
3767 check the args_info.nregs field as well. The problem with this field is
3768 that it makes no allowances for arguments that are passed to the
3769 function but which are not used. Hence we could miss an opportunity
3770 when a function has an unused argument in r3. But it is better to be
3771 safe than to be sorry. */
3772 if (! cfun->machine->uses_anonymous_args
3773 && crtl->args.size >= 0
3774 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3775 && crtl->args.info.nregs < 4)
3776 return LAST_ARG_REGNUM;
3778 /* Otherwise look for a call-saved register that is going to be pushed. */
3779 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3780 if (pushed_regs_mask & (1 << reg))
3781 return reg;
3783 if (TARGET_THUMB2)
3785 /* Thumb-2 can use high regs. */
3786 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3787 if (pushed_regs_mask & (1 << reg))
3788 return reg;
3790 /* Something went wrong - thumb_compute_save_reg_mask()
3791 should have arranged for a suitable register to be pushed. */
3792 gcc_unreachable ();
3795 static GTY(()) int pic_labelno;
3797 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3798 low register. */
3800 void
3801 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3803 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
3805 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3806 return;
3808 gcc_assert (flag_pic);
3810 pic_reg = cfun->machine->pic_reg;
3811 if (TARGET_VXWORKS_RTP)
3813 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3814 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3815 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3817 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3819 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3820 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3822 else
3824 /* We use an UNSPEC rather than a LABEL_REF because this label
3825 never appears in the code stream. */
3827 labelno = GEN_INT (pic_labelno++);
3828 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3829 l1 = gen_rtx_CONST (VOIDmode, l1);
3831 /* On the ARM the PC register contains 'dot + 8' at the time of the
3832 addition, on the Thumb it is 'dot + 4'. */
3833 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
3834 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
3835 UNSPEC_GOTSYM_OFF);
3836 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3838 if (TARGET_ARM)
3840 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3841 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3843 else if (TARGET_THUMB2)
3845 /* Thumb-2 only allows very limited access to the PC. Calculate the
3846 address in a temporary register. */
3847 if (arm_pic_register != INVALID_REGNUM)
3849 pic_tmp = gen_rtx_REG (SImode,
3850 thumb_find_work_register (saved_regs));
3852 else
3854 gcc_assert (can_create_pseudo_p ());
3855 pic_tmp = gen_reg_rtx (Pmode);
3858 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3859 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3860 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3862 else /* TARGET_THUMB1 */
3864 if (arm_pic_register != INVALID_REGNUM
3865 && REGNO (pic_reg) > LAST_LO_REGNUM)
3867 /* We will have pushed the pic register, so we should always be
3868 able to find a work register. */
3869 pic_tmp = gen_rtx_REG (SImode,
3870 thumb_find_work_register (saved_regs));
3871 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3872 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3874 else
3875 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3876 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3880 /* Need to emit this whether or not we obey regdecls,
3881 since setjmp/longjmp can cause life info to screw up. */
3882 emit_use (pic_reg);
3886 /* Return nonzero if X is valid as an ARM state addressing register. */
3887 static int
3888 arm_address_register_rtx_p (rtx x, int strict_p)
3890 int regno;
3892 if (GET_CODE (x) != REG)
3893 return 0;
3895 regno = REGNO (x);
3897 if (strict_p)
3898 return ARM_REGNO_OK_FOR_BASE_P (regno);
3900 return (regno <= LAST_ARM_REGNUM
3901 || regno >= FIRST_PSEUDO_REGISTER
3902 || regno == FRAME_POINTER_REGNUM
3903 || regno == ARG_POINTER_REGNUM);
3906 /* Return TRUE if this rtx is the difference of a symbol and a label,
3907 and will reduce to a PC-relative relocation in the object file.
3908 Expressions like this can be left alone when generating PIC, rather
3909 than forced through the GOT. */
3910 static int
3911 pcrel_constant_p (rtx x)
3913 if (GET_CODE (x) == MINUS)
3914 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3916 return FALSE;
3919 /* Return nonzero if X is a valid ARM state address operand. */
3921 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3922 int strict_p)
3924 bool use_ldrd;
3925 enum rtx_code code = GET_CODE (x);
3927 if (arm_address_register_rtx_p (x, strict_p))
3928 return 1;
3930 use_ldrd = (TARGET_LDRD
3931 && (mode == DImode
3932 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3934 if (code == POST_INC || code == PRE_DEC
3935 || ((code == PRE_INC || code == POST_DEC)
3936 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3937 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3939 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3940 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3941 && GET_CODE (XEXP (x, 1)) == PLUS
3942 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3944 rtx addend = XEXP (XEXP (x, 1), 1);
3946 /* Don't allow ldrd post increment by register because it's hard
3947 to fixup invalid register choices. */
3948 if (use_ldrd
3949 && GET_CODE (x) == POST_MODIFY
3950 && GET_CODE (addend) == REG)
3951 return 0;
3953 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3954 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3957 /* After reload constants split into minipools will have addresses
3958 from a LABEL_REF. */
3959 else if (reload_completed
3960 && (code == LABEL_REF
3961 || (code == CONST
3962 && GET_CODE (XEXP (x, 0)) == PLUS
3963 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3964 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3965 return 1;
3967 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3968 return 0;
3970 else if (code == PLUS)
3972 rtx xop0 = XEXP (x, 0);
3973 rtx xop1 = XEXP (x, 1);
3975 return ((arm_address_register_rtx_p (xop0, strict_p)
3976 && GET_CODE(xop1) == CONST_INT
3977 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3978 || (arm_address_register_rtx_p (xop1, strict_p)
3979 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3982 #if 0
3983 /* Reload currently can't handle MINUS, so disable this for now */
3984 else if (GET_CODE (x) == MINUS)
3986 rtx xop0 = XEXP (x, 0);
3987 rtx xop1 = XEXP (x, 1);
3989 return (arm_address_register_rtx_p (xop0, strict_p)
3990 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3992 #endif
3994 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3995 && code == SYMBOL_REF
3996 && CONSTANT_POOL_ADDRESS_P (x)
3997 && ! (flag_pic
3998 && symbol_mentioned_p (get_pool_constant (x))
3999 && ! pcrel_constant_p (get_pool_constant (x))))
4000 return 1;
4002 return 0;
4005 /* Return nonzero if X is a valid Thumb-2 address operand. */
4006 static int
4007 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4009 bool use_ldrd;
4010 enum rtx_code code = GET_CODE (x);
4012 if (arm_address_register_rtx_p (x, strict_p))
4013 return 1;
4015 use_ldrd = (TARGET_LDRD
4016 && (mode == DImode
4017 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4019 if (code == POST_INC || code == PRE_DEC
4020 || ((code == PRE_INC || code == POST_DEC)
4021 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4022 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4024 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4025 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4026 && GET_CODE (XEXP (x, 1)) == PLUS
4027 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4029 /* Thumb-2 only has autoincrement by constant. */
4030 rtx addend = XEXP (XEXP (x, 1), 1);
4031 HOST_WIDE_INT offset;
4033 if (GET_CODE (addend) != CONST_INT)
4034 return 0;
4036 offset = INTVAL(addend);
4037 if (GET_MODE_SIZE (mode) <= 4)
4038 return (offset > -256 && offset < 256);
4040 return (use_ldrd && offset > -1024 && offset < 1024
4041 && (offset & 3) == 0);
4044 /* After reload constants split into minipools will have addresses
4045 from a LABEL_REF. */
4046 else if (reload_completed
4047 && (code == LABEL_REF
4048 || (code == CONST
4049 && GET_CODE (XEXP (x, 0)) == PLUS
4050 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4051 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4052 return 1;
4054 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4055 return 0;
4057 else if (code == PLUS)
4059 rtx xop0 = XEXP (x, 0);
4060 rtx xop1 = XEXP (x, 1);
4062 return ((arm_address_register_rtx_p (xop0, strict_p)
4063 && thumb2_legitimate_index_p (mode, xop1, strict_p))
4064 || (arm_address_register_rtx_p (xop1, strict_p)
4065 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
4068 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4069 && code == SYMBOL_REF
4070 && CONSTANT_POOL_ADDRESS_P (x)
4071 && ! (flag_pic
4072 && symbol_mentioned_p (get_pool_constant (x))
4073 && ! pcrel_constant_p (get_pool_constant (x))))
4074 return 1;
4076 return 0;
4079 /* Return nonzero if INDEX is valid for an address index operand in
4080 ARM state. */
4081 static int
4082 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
4083 int strict_p)
4085 HOST_WIDE_INT range;
4086 enum rtx_code code = GET_CODE (index);
4088 /* Standard coprocessor addressing modes. */
4089 if (TARGET_HARD_FLOAT
4090 && (TARGET_FPA || TARGET_MAVERICK)
4091 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4092 || (TARGET_MAVERICK && mode == DImode)))
4093 return (code == CONST_INT && INTVAL (index) < 1024
4094 && INTVAL (index) > -1024
4095 && (INTVAL (index) & 3) == 0);
4097 if (TARGET_NEON
4098 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4099 return (code == CONST_INT
4100 && INTVAL (index) < 1016
4101 && INTVAL (index) > -1024
4102 && (INTVAL (index) & 3) == 0);
4104 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4105 return (code == CONST_INT
4106 && INTVAL (index) < 1024
4107 && INTVAL (index) > -1024
4108 && (INTVAL (index) & 3) == 0);
4110 if (arm_address_register_rtx_p (index, strict_p)
4111 && (GET_MODE_SIZE (mode) <= 4))
4112 return 1;
4114 if (mode == DImode || mode == DFmode)
4116 if (code == CONST_INT)
4118 HOST_WIDE_INT val = INTVAL (index);
4120 if (TARGET_LDRD)
4121 return val > -256 && val < 256;
4122 else
4123 return val > -4096 && val < 4092;
4126 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
4129 if (GET_MODE_SIZE (mode) <= 4
4130 && ! (arm_arch4
4131 && (mode == HImode
4132 || (mode == QImode && outer == SIGN_EXTEND))))
4134 if (code == MULT)
4136 rtx xiop0 = XEXP (index, 0);
4137 rtx xiop1 = XEXP (index, 1);
4139 return ((arm_address_register_rtx_p (xiop0, strict_p)
4140 && power_of_two_operand (xiop1, SImode))
4141 || (arm_address_register_rtx_p (xiop1, strict_p)
4142 && power_of_two_operand (xiop0, SImode)));
4144 else if (code == LSHIFTRT || code == ASHIFTRT
4145 || code == ASHIFT || code == ROTATERT)
4147 rtx op = XEXP (index, 1);
4149 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4150 && GET_CODE (op) == CONST_INT
4151 && INTVAL (op) > 0
4152 && INTVAL (op) <= 31);
4156 /* For ARM v4 we may be doing a sign-extend operation during the
4157 load. */
4158 if (arm_arch4)
4160 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
4161 range = 256;
4162 else
4163 range = 4096;
4165 else
4166 range = (mode == HImode) ? 4095 : 4096;
4168 return (code == CONST_INT
4169 && INTVAL (index) < range
4170 && INTVAL (index) > -range);
4173 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4174 index operand. i.e. 1, 2, 4 or 8. */
4175 static bool
4176 thumb2_index_mul_operand (rtx op)
4178 HOST_WIDE_INT val;
4180 if (GET_CODE(op) != CONST_INT)
4181 return false;
4183 val = INTVAL(op);
4184 return (val == 1 || val == 2 || val == 4 || val == 8);
4187 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4188 static int
4189 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4191 enum rtx_code code = GET_CODE (index);
4193 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4194 /* Standard coprocessor addressing modes. */
4195 if (TARGET_HARD_FLOAT
4196 && (TARGET_FPA || TARGET_MAVERICK)
4197 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4198 || (TARGET_MAVERICK && mode == DImode)))
4199 return (code == CONST_INT && INTVAL (index) < 1024
4200 && INTVAL (index) > -1024
4201 && (INTVAL (index) & 3) == 0);
4203 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4205 /* For DImode assume values will usually live in core regs
4206 and only allow LDRD addressing modes. */
4207 if (!TARGET_LDRD || mode != DImode)
4208 return (code == CONST_INT
4209 && INTVAL (index) < 1024
4210 && INTVAL (index) > -1024
4211 && (INTVAL (index) & 3) == 0);
4214 if (TARGET_NEON
4215 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4216 return (code == CONST_INT
4217 && INTVAL (index) < 1016
4218 && INTVAL (index) > -1024
4219 && (INTVAL (index) & 3) == 0);
4221 if (arm_address_register_rtx_p (index, strict_p)
4222 && (GET_MODE_SIZE (mode) <= 4))
4223 return 1;
4225 if (mode == DImode || mode == DFmode)
4227 HOST_WIDE_INT val = INTVAL (index);
4228 /* ??? Can we assume ldrd for thumb2? */
4229 /* Thumb-2 ldrd only has reg+const addressing modes. */
4230 if (code != CONST_INT)
4231 return 0;
4233 /* ldrd supports offsets of +-1020.
4234 However the ldr fallback does not. */
4235 return val > -256 && val < 256 && (val & 3) == 0;
4238 if (code == MULT)
4240 rtx xiop0 = XEXP (index, 0);
4241 rtx xiop1 = XEXP (index, 1);
4243 return ((arm_address_register_rtx_p (xiop0, strict_p)
4244 && thumb2_index_mul_operand (xiop1))
4245 || (arm_address_register_rtx_p (xiop1, strict_p)
4246 && thumb2_index_mul_operand (xiop0)));
4248 else if (code == ASHIFT)
4250 rtx op = XEXP (index, 1);
4252 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4253 && GET_CODE (op) == CONST_INT
4254 && INTVAL (op) > 0
4255 && INTVAL (op) <= 3);
4258 return (code == CONST_INT
4259 && INTVAL (index) < 4096
4260 && INTVAL (index) > -256);
4263 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4264 static int
4265 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4267 int regno;
4269 if (GET_CODE (x) != REG)
4270 return 0;
4272 regno = REGNO (x);
4274 if (strict_p)
4275 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4277 return (regno <= LAST_LO_REGNUM
4278 || regno > LAST_VIRTUAL_REGISTER
4279 || regno == FRAME_POINTER_REGNUM
4280 || (GET_MODE_SIZE (mode) >= 4
4281 && (regno == STACK_POINTER_REGNUM
4282 || regno >= FIRST_PSEUDO_REGISTER
4283 || x == hard_frame_pointer_rtx
4284 || x == arg_pointer_rtx)));
4287 /* Return nonzero if x is a legitimate index register. This is the case
4288 for any base register that can access a QImode object. */
4289 inline static int
4290 thumb1_index_register_rtx_p (rtx x, int strict_p)
4292 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4295 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4297 The AP may be eliminated to either the SP or the FP, so we use the
4298 least common denominator, e.g. SImode, and offsets from 0 to 64.
4300 ??? Verify whether the above is the right approach.
4302 ??? Also, the FP may be eliminated to the SP, so perhaps that
4303 needs special handling also.
4305 ??? Look at how the mips16 port solves this problem. It probably uses
4306 better ways to solve some of these problems.
4308 Although it is not incorrect, we don't accept QImode and HImode
4309 addresses based on the frame pointer or arg pointer until the
4310 reload pass starts. This is so that eliminating such addresses
4311 into stack based ones won't produce impossible code. */
4312 static int
4313 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4315 /* ??? Not clear if this is right. Experiment. */
4316 if (GET_MODE_SIZE (mode) < 4
4317 && !(reload_in_progress || reload_completed)
4318 && (reg_mentioned_p (frame_pointer_rtx, x)
4319 || reg_mentioned_p (arg_pointer_rtx, x)
4320 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4321 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4322 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4323 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4324 return 0;
4326 /* Accept any base register. SP only in SImode or larger. */
4327 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4328 return 1;
4330 /* This is PC relative data before arm_reorg runs. */
4331 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4332 && GET_CODE (x) == SYMBOL_REF
4333 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4334 return 1;
4336 /* This is PC relative data after arm_reorg runs. */
4337 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4338 && (GET_CODE (x) == LABEL_REF
4339 || (GET_CODE (x) == CONST
4340 && GET_CODE (XEXP (x, 0)) == PLUS
4341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4342 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4343 return 1;
4345 /* Post-inc indexing only supported for SImode and larger. */
4346 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4347 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4348 return 1;
4350 else if (GET_CODE (x) == PLUS)
4352 /* REG+REG address can be any two index registers. */
4353 /* We disallow FRAME+REG addressing since we know that FRAME
4354 will be replaced with STACK, and SP relative addressing only
4355 permits SP+OFFSET. */
4356 if (GET_MODE_SIZE (mode) <= 4
4357 && XEXP (x, 0) != frame_pointer_rtx
4358 && XEXP (x, 1) != frame_pointer_rtx
4359 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4360 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4361 return 1;
4363 /* REG+const has 5-7 bit offset for non-SP registers. */
4364 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4365 || XEXP (x, 0) == arg_pointer_rtx)
4366 && GET_CODE (XEXP (x, 1)) == CONST_INT
4367 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4368 return 1;
4370 /* REG+const has 10-bit offset for SP, but only SImode and
4371 larger is supported. */
4372 /* ??? Should probably check for DI/DFmode overflow here
4373 just like GO_IF_LEGITIMATE_OFFSET does. */
4374 else if (GET_CODE (XEXP (x, 0)) == REG
4375 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4376 && GET_MODE_SIZE (mode) >= 4
4377 && GET_CODE (XEXP (x, 1)) == CONST_INT
4378 && INTVAL (XEXP (x, 1)) >= 0
4379 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4380 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4381 return 1;
4383 else if (GET_CODE (XEXP (x, 0)) == REG
4384 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4385 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4386 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4387 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4388 && GET_MODE_SIZE (mode) >= 4
4389 && GET_CODE (XEXP (x, 1)) == CONST_INT
4390 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4391 return 1;
4394 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4395 && GET_MODE_SIZE (mode) == 4
4396 && GET_CODE (x) == SYMBOL_REF
4397 && CONSTANT_POOL_ADDRESS_P (x)
4398 && ! (flag_pic
4399 && symbol_mentioned_p (get_pool_constant (x))
4400 && ! pcrel_constant_p (get_pool_constant (x))))
4401 return 1;
4403 return 0;
4406 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4407 instruction of mode MODE. */
4409 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4411 switch (GET_MODE_SIZE (mode))
4413 case 1:
4414 return val >= 0 && val < 32;
4416 case 2:
4417 return val >= 0 && val < 64 && (val & 1) == 0;
4419 default:
4420 return (val >= 0
4421 && (val + GET_MODE_SIZE (mode)) <= 128
4422 && (val & 3) == 0);
4426 bool
4427 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
4429 if (TARGET_ARM)
4430 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
4431 else if (TARGET_THUMB2)
4432 return thumb2_legitimate_address_p (mode, x, strict_p);
4433 else /* if (TARGET_THUMB1) */
4434 return thumb1_legitimate_address_p (mode, x, strict_p);
4437 /* Build the SYMBOL_REF for __tls_get_addr. */
4439 static GTY(()) rtx tls_get_addr_libfunc;
4441 static rtx
4442 get_tls_get_addr (void)
4444 if (!tls_get_addr_libfunc)
4445 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4446 return tls_get_addr_libfunc;
4449 static rtx
4450 arm_load_tp (rtx target)
4452 if (!target)
4453 target = gen_reg_rtx (SImode);
4455 if (TARGET_HARD_TP)
4457 /* Can return in any reg. */
4458 emit_insn (gen_load_tp_hard (target));
4460 else
4462 /* Always returned in r0. Immediately copy the result into a pseudo,
4463 otherwise other uses of r0 (e.g. setting up function arguments) may
4464 clobber the value. */
4466 rtx tmp;
4468 emit_insn (gen_load_tp_soft ());
4470 tmp = gen_rtx_REG (SImode, 0);
4471 emit_move_insn (target, tmp);
4473 return target;
4476 static rtx
4477 load_tls_operand (rtx x, rtx reg)
4479 rtx tmp;
4481 if (reg == NULL_RTX)
4482 reg = gen_reg_rtx (SImode);
4484 tmp = gen_rtx_CONST (SImode, x);
4486 emit_move_insn (reg, tmp);
4488 return reg;
4491 static rtx
4492 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4494 rtx insns, label, labelno, sum;
4496 start_sequence ();
4498 labelno = GEN_INT (pic_labelno++);
4499 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4500 label = gen_rtx_CONST (VOIDmode, label);
4502 sum = gen_rtx_UNSPEC (Pmode,
4503 gen_rtvec (4, x, GEN_INT (reloc), label,
4504 GEN_INT (TARGET_ARM ? 8 : 4)),
4505 UNSPEC_TLS);
4506 reg = load_tls_operand (sum, reg);
4508 if (TARGET_ARM)
4509 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4510 else if (TARGET_THUMB2)
4512 rtx tmp;
4513 /* Thumb-2 only allows very limited access to the PC. Calculate
4514 the address in a temporary register. */
4515 tmp = gen_reg_rtx (SImode);
4516 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4517 emit_insn (gen_addsi3(reg, reg, tmp));
4519 else /* TARGET_THUMB1 */
4520 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4522 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4523 Pmode, 1, reg, Pmode);
4525 insns = get_insns ();
4526 end_sequence ();
4528 return insns;
4532 legitimize_tls_address (rtx x, rtx reg)
4534 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4535 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4537 switch (model)
4539 case TLS_MODEL_GLOBAL_DYNAMIC:
4540 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4541 dest = gen_reg_rtx (Pmode);
4542 emit_libcall_block (insns, dest, ret, x);
4543 return dest;
4545 case TLS_MODEL_LOCAL_DYNAMIC:
4546 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4548 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4549 share the LDM result with other LD model accesses. */
4550 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4551 UNSPEC_TLS);
4552 dest = gen_reg_rtx (Pmode);
4553 emit_libcall_block (insns, dest, ret, eqv);
4555 /* Load the addend. */
4556 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4557 UNSPEC_TLS);
4558 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4559 return gen_rtx_PLUS (Pmode, dest, addend);
4561 case TLS_MODEL_INITIAL_EXEC:
4562 labelno = GEN_INT (pic_labelno++);
4563 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4564 label = gen_rtx_CONST (VOIDmode, label);
4565 sum = gen_rtx_UNSPEC (Pmode,
4566 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4567 GEN_INT (TARGET_ARM ? 8 : 4)),
4568 UNSPEC_TLS);
4569 reg = load_tls_operand (sum, reg);
4571 if (TARGET_ARM)
4572 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4573 else if (TARGET_THUMB2)
4575 rtx tmp;
4576 /* Thumb-2 only allows very limited access to the PC. Calculate
4577 the address in a temporary register. */
4578 tmp = gen_reg_rtx (SImode);
4579 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4580 emit_insn (gen_addsi3(reg, reg, tmp));
4581 emit_move_insn (reg, gen_const_mem (SImode, reg));
4583 else
4585 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4586 emit_move_insn (reg, gen_const_mem (SImode, reg));
4589 tp = arm_load_tp (NULL_RTX);
4591 return gen_rtx_PLUS (Pmode, tp, reg);
4593 case TLS_MODEL_LOCAL_EXEC:
4594 tp = arm_load_tp (NULL_RTX);
4596 reg = gen_rtx_UNSPEC (Pmode,
4597 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4598 UNSPEC_TLS);
4599 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4601 return gen_rtx_PLUS (Pmode, tp, reg);
4603 default:
4604 abort ();
4608 /* Try machine-dependent ways of modifying an illegitimate address
4609 to be legitimate. If we find one, return the new, valid address. */
4611 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4613 if (!TARGET_ARM)
4615 /* TODO: legitimize_address for Thumb2. */
4616 if (TARGET_THUMB2)
4617 return x;
4618 return thumb_legitimize_address (x, orig_x, mode);
4621 if (arm_tls_symbol_p (x))
4622 return legitimize_tls_address (x, NULL_RTX);
4624 if (GET_CODE (x) == PLUS)
4626 rtx xop0 = XEXP (x, 0);
4627 rtx xop1 = XEXP (x, 1);
4629 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4630 xop0 = force_reg (SImode, xop0);
4632 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4633 xop1 = force_reg (SImode, xop1);
4635 if (ARM_BASE_REGISTER_RTX_P (xop0)
4636 && GET_CODE (xop1) == CONST_INT)
4638 HOST_WIDE_INT n, low_n;
4639 rtx base_reg, val;
4640 n = INTVAL (xop1);
4642 /* VFP addressing modes actually allow greater offsets, but for
4643 now we just stick with the lowest common denominator. */
4644 if (mode == DImode
4645 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4647 low_n = n & 0x0f;
4648 n &= ~0x0f;
4649 if (low_n > 4)
4651 n += 16;
4652 low_n -= 16;
4655 else
4657 low_n = ((mode) == TImode ? 0
4658 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4659 n -= low_n;
4662 base_reg = gen_reg_rtx (SImode);
4663 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4664 emit_move_insn (base_reg, val);
4665 x = plus_constant (base_reg, low_n);
4667 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4668 x = gen_rtx_PLUS (SImode, xop0, xop1);
4671 /* XXX We don't allow MINUS any more -- see comment in
4672 arm_legitimate_address_outer_p (). */
4673 else if (GET_CODE (x) == MINUS)
4675 rtx xop0 = XEXP (x, 0);
4676 rtx xop1 = XEXP (x, 1);
4678 if (CONSTANT_P (xop0))
4679 xop0 = force_reg (SImode, xop0);
4681 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4682 xop1 = force_reg (SImode, xop1);
4684 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4685 x = gen_rtx_MINUS (SImode, xop0, xop1);
4688 /* Make sure to take full advantage of the pre-indexed addressing mode
4689 with absolute addresses which often allows for the base register to
4690 be factorized for multiple adjacent memory references, and it might
4691 even allows for the mini pool to be avoided entirely. */
4692 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4694 unsigned int bits;
4695 HOST_WIDE_INT mask, base, index;
4696 rtx base_reg;
4698 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4699 use a 8-bit index. So let's use a 12-bit index for SImode only and
4700 hope that arm_gen_constant will enable ldrb to use more bits. */
4701 bits = (mode == SImode) ? 12 : 8;
4702 mask = (1 << bits) - 1;
4703 base = INTVAL (x) & ~mask;
4704 index = INTVAL (x) & mask;
4705 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4707 /* It'll most probably be more efficient to generate the base
4708 with more bits set and use a negative index instead. */
4709 base |= mask;
4710 index -= mask;
4712 base_reg = force_reg (SImode, GEN_INT (base));
4713 x = plus_constant (base_reg, index);
4716 if (flag_pic)
4718 /* We need to find and carefully transform any SYMBOL and LABEL
4719 references; so go back to the original address expression. */
4720 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4722 if (new_x != orig_x)
4723 x = new_x;
4726 return x;
4730 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4731 to be legitimate. If we find one, return the new, valid address. */
4733 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4735 if (arm_tls_symbol_p (x))
4736 return legitimize_tls_address (x, NULL_RTX);
4738 if (GET_CODE (x) == PLUS
4739 && GET_CODE (XEXP (x, 1)) == CONST_INT
4740 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4741 || INTVAL (XEXP (x, 1)) < 0))
4743 rtx xop0 = XEXP (x, 0);
4744 rtx xop1 = XEXP (x, 1);
4745 HOST_WIDE_INT offset = INTVAL (xop1);
4747 /* Try and fold the offset into a biasing of the base register and
4748 then offsetting that. Don't do this when optimizing for space
4749 since it can cause too many CSEs. */
4750 if (optimize_size && offset >= 0
4751 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4753 HOST_WIDE_INT delta;
4755 if (offset >= 256)
4756 delta = offset - (256 - GET_MODE_SIZE (mode));
4757 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4758 delta = 31 * GET_MODE_SIZE (mode);
4759 else
4760 delta = offset & (~31 * GET_MODE_SIZE (mode));
4762 xop0 = force_operand (plus_constant (xop0, offset - delta),
4763 NULL_RTX);
4764 x = plus_constant (xop0, delta);
4766 else if (offset < 0 && offset > -256)
4767 /* Small negative offsets are best done with a subtract before the
4768 dereference, forcing these into a register normally takes two
4769 instructions. */
4770 x = force_operand (x, NULL_RTX);
4771 else
4773 /* For the remaining cases, force the constant into a register. */
4774 xop1 = force_reg (SImode, xop1);
4775 x = gen_rtx_PLUS (SImode, xop0, xop1);
4778 else if (GET_CODE (x) == PLUS
4779 && s_register_operand (XEXP (x, 1), SImode)
4780 && !s_register_operand (XEXP (x, 0), SImode))
4782 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4784 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4787 if (flag_pic)
4789 /* We need to find and carefully transform any SYMBOL and LABEL
4790 references; so go back to the original address expression. */
4791 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4793 if (new_x != orig_x)
4794 x = new_x;
4797 return x;
4801 thumb_legitimize_reload_address (rtx *x_p,
4802 enum machine_mode mode,
4803 int opnum, int type,
4804 int ind_levels ATTRIBUTE_UNUSED)
4806 rtx x = *x_p;
4808 if (GET_CODE (x) == PLUS
4809 && GET_MODE_SIZE (mode) < 4
4810 && REG_P (XEXP (x, 0))
4811 && XEXP (x, 0) == stack_pointer_rtx
4812 && GET_CODE (XEXP (x, 1)) == CONST_INT
4813 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4815 rtx orig_x = x;
4817 x = copy_rtx (x);
4818 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4819 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4820 return x;
4823 /* If both registers are hi-regs, then it's better to reload the
4824 entire expression rather than each register individually. That
4825 only requires one reload register rather than two. */
4826 if (GET_CODE (x) == PLUS
4827 && REG_P (XEXP (x, 0))
4828 && REG_P (XEXP (x, 1))
4829 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4830 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4832 rtx orig_x = x;
4834 x = copy_rtx (x);
4835 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4836 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4837 return x;
4840 return NULL;
4843 /* Test for various thread-local symbols. */
4845 /* Return TRUE if X is a thread-local symbol. */
4847 static bool
4848 arm_tls_symbol_p (rtx x)
4850 if (! TARGET_HAVE_TLS)
4851 return false;
4853 if (GET_CODE (x) != SYMBOL_REF)
4854 return false;
4856 return SYMBOL_REF_TLS_MODEL (x) != 0;
4859 /* Helper for arm_tls_referenced_p. */
4861 static int
4862 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4864 if (GET_CODE (*x) == SYMBOL_REF)
4865 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4867 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4868 TLS offsets, not real symbol references. */
4869 if (GET_CODE (*x) == UNSPEC
4870 && XINT (*x, 1) == UNSPEC_TLS)
4871 return -1;
4873 return 0;
4876 /* Return TRUE if X contains any TLS symbol references. */
4878 bool
4879 arm_tls_referenced_p (rtx x)
4881 if (! TARGET_HAVE_TLS)
4882 return false;
4884 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4887 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4889 bool
4890 arm_cannot_force_const_mem (rtx x)
4892 rtx base, offset;
4894 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4896 split_const (x, &base, &offset);
4897 if (GET_CODE (base) == SYMBOL_REF
4898 && !offset_within_block_p (base, INTVAL (offset)))
4899 return true;
4901 return arm_tls_referenced_p (x);
4904 #define REG_OR_SUBREG_REG(X) \
4905 (GET_CODE (X) == REG \
4906 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4908 #define REG_OR_SUBREG_RTX(X) \
4909 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4911 #ifndef COSTS_N_INSNS
4912 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4913 #endif
4914 static inline int
4915 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4917 enum machine_mode mode = GET_MODE (x);
4919 switch (code)
4921 case ASHIFT:
4922 case ASHIFTRT:
4923 case LSHIFTRT:
4924 case ROTATERT:
4925 case PLUS:
4926 case MINUS:
4927 case COMPARE:
4928 case NEG:
4929 case NOT:
4930 return COSTS_N_INSNS (1);
4932 case MULT:
4933 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4935 int cycles = 0;
4936 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4938 while (i)
4940 i >>= 2;
4941 cycles++;
4943 return COSTS_N_INSNS (2) + cycles;
4945 return COSTS_N_INSNS (1) + 16;
4947 case SET:
4948 return (COSTS_N_INSNS (1)
4949 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4950 + GET_CODE (SET_DEST (x)) == MEM));
4952 case CONST_INT:
4953 if (outer == SET)
4955 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4956 return 0;
4957 if (thumb_shiftable_const (INTVAL (x)))
4958 return COSTS_N_INSNS (2);
4959 return COSTS_N_INSNS (3);
4961 else if ((outer == PLUS || outer == COMPARE)
4962 && INTVAL (x) < 256 && INTVAL (x) > -256)
4963 return 0;
4964 else if (outer == AND
4965 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4966 return COSTS_N_INSNS (1);
4967 else if (outer == ASHIFT || outer == ASHIFTRT
4968 || outer == LSHIFTRT)
4969 return 0;
4970 return COSTS_N_INSNS (2);
4972 case CONST:
4973 case CONST_DOUBLE:
4974 case LABEL_REF:
4975 case SYMBOL_REF:
4976 return COSTS_N_INSNS (3);
4978 case UDIV:
4979 case UMOD:
4980 case DIV:
4981 case MOD:
4982 return 100;
4984 case TRUNCATE:
4985 return 99;
4987 case AND:
4988 case XOR:
4989 case IOR:
4990 /* XXX guess. */
4991 return 8;
4993 case MEM:
4994 /* XXX another guess. */
4995 /* Memory costs quite a lot for the first word, but subsequent words
4996 load at the equivalent of a single insn each. */
4997 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4998 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4999 ? 4 : 0));
5001 case IF_THEN_ELSE:
5002 /* XXX a guess. */
5003 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5004 return 14;
5005 return 2;
5007 case ZERO_EXTEND:
5008 /* XXX still guessing. */
5009 switch (GET_MODE (XEXP (x, 0)))
5011 case QImode:
5012 return (1 + (mode == DImode ? 4 : 0)
5013 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5015 case HImode:
5016 return (4 + (mode == DImode ? 4 : 0)
5017 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5019 case SImode:
5020 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5022 default:
5023 return 99;
5026 default:
5027 return 99;
5031 static inline bool
5032 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
5034 enum machine_mode mode = GET_MODE (x);
5035 enum rtx_code subcode;
5036 rtx operand;
5037 enum rtx_code code = GET_CODE (x);
5038 int extra_cost;
5039 *total = 0;
5041 switch (code)
5043 case MEM:
5044 /* Memory costs quite a lot for the first word, but subsequent words
5045 load at the equivalent of a single insn each. */
5046 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
5047 return true;
5049 case DIV:
5050 case MOD:
5051 case UDIV:
5052 case UMOD:
5053 if (TARGET_HARD_FLOAT && mode == SFmode)
5054 *total = COSTS_N_INSNS (2);
5055 else if (TARGET_HARD_FLOAT && mode == DFmode)
5056 *total = COSTS_N_INSNS (4);
5057 else
5058 *total = COSTS_N_INSNS (20);
5059 return false;
5061 case ROTATE:
5062 if (GET_CODE (XEXP (x, 1)) == REG)
5063 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
5064 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5065 *total = rtx_cost (XEXP (x, 1), code, speed);
5067 /* Fall through */
5068 case ROTATERT:
5069 if (mode != SImode)
5071 *total += COSTS_N_INSNS (4);
5072 return true;
5075 /* Fall through */
5076 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
5077 *total += rtx_cost (XEXP (x, 0), code, speed);
5078 if (mode == DImode)
5080 *total += COSTS_N_INSNS (3);
5081 return true;
5084 *total += COSTS_N_INSNS (1);
5085 /* Increase the cost of complex shifts because they aren't any faster,
5086 and reduce dual issue opportunities. */
5087 if (arm_tune_cortex_a9
5088 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
5089 ++*total;
5091 return true;
5093 case MINUS:
5094 if (TARGET_THUMB2)
5096 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5098 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5099 *total = COSTS_N_INSNS (1);
5100 else
5101 *total = COSTS_N_INSNS (20);
5103 else
5104 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5105 /* Thumb2 does not have RSB, so all arguments must be
5106 registers (subtracting a constant is canonicalized as
5107 addition of the negated constant). */
5108 return false;
5111 if (mode == DImode)
5113 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5114 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5115 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5117 *total += rtx_cost (XEXP (x, 1), code, speed);
5118 return true;
5121 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5122 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
5124 *total += rtx_cost (XEXP (x, 0), code, speed);
5125 return true;
5128 return false;
5131 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5133 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5135 *total = COSTS_N_INSNS (1);
5136 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5137 && arm_const_double_rtx (XEXP (x, 0)))
5139 *total += rtx_cost (XEXP (x, 1), code, speed);
5140 return true;
5143 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5144 && arm_const_double_rtx (XEXP (x, 1)))
5146 *total += rtx_cost (XEXP (x, 0), code, speed);
5147 return true;
5150 return false;
5152 *total = COSTS_N_INSNS (20);
5153 return false;
5156 *total = COSTS_N_INSNS (1);
5157 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5158 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5160 *total += rtx_cost (XEXP (x, 1), code, speed);
5161 return true;
5164 subcode = GET_CODE (XEXP (x, 1));
5165 if (subcode == ASHIFT || subcode == ASHIFTRT
5166 || subcode == LSHIFTRT
5167 || subcode == ROTATE || subcode == ROTATERT)
5169 *total += rtx_cost (XEXP (x, 0), code, speed);
5170 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5171 return true;
5174 /* A shift as a part of RSB costs no more than RSB itself. */
5175 if (GET_CODE (XEXP (x, 0)) == MULT
5176 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5178 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
5179 *total += rtx_cost (XEXP (x, 1), code, speed);
5180 return true;
5183 if (subcode == MULT
5184 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
5186 *total += rtx_cost (XEXP (x, 0), code, speed);
5187 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5188 return true;
5191 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
5192 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
5194 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5195 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
5196 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
5197 *total += COSTS_N_INSNS (1);
5199 return true;
5202 /* Fall through */
5204 case PLUS:
5205 if (code == PLUS && arm_arch6 && mode == SImode
5206 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5207 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5209 *total = COSTS_N_INSNS (1);
5210 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
5211 speed);
5212 *total += rtx_cost (XEXP (x, 1), code, speed);
5213 return true;
5216 /* MLA: All arguments must be registers. We filter out
5217 multiplication by a power of two, so that we fall down into
5218 the code below. */
5219 if (GET_CODE (XEXP (x, 0)) == MULT
5220 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5222 /* The cost comes from the cost of the multiply. */
5223 return false;
5226 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5228 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5230 *total = COSTS_N_INSNS (1);
5231 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5232 && arm_const_double_rtx (XEXP (x, 1)))
5234 *total += rtx_cost (XEXP (x, 0), code, speed);
5235 return true;
5238 return false;
5241 *total = COSTS_N_INSNS (20);
5242 return false;
5245 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
5246 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
5248 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
5249 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5250 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
5251 *total += COSTS_N_INSNS (1);
5252 return true;
5255 /* Fall through */
5257 case AND: case XOR: case IOR:
5258 extra_cost = 0;
5260 /* Normally the frame registers will be spilt into reg+const during
5261 reload, so it is a bad idea to combine them with other instructions,
5262 since then they might not be moved outside of loops. As a compromise
5263 we allow integration with ops that have a constant as their second
5264 operand. */
5265 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
5266 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
5267 && GET_CODE (XEXP (x, 1)) != CONST_INT)
5268 || (REG_OR_SUBREG_REG (XEXP (x, 0))
5269 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
5270 *total = 4;
5272 if (mode == DImode)
5274 *total += COSTS_N_INSNS (2);
5275 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5276 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5278 *total += rtx_cost (XEXP (x, 0), code, speed);
5279 return true;
5282 return false;
5285 *total += COSTS_N_INSNS (1);
5286 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5287 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5289 *total += rtx_cost (XEXP (x, 0), code, speed);
5290 return true;
5292 subcode = GET_CODE (XEXP (x, 0));
5293 if (subcode == ASHIFT || subcode == ASHIFTRT
5294 || subcode == LSHIFTRT
5295 || subcode == ROTATE || subcode == ROTATERT)
5297 *total += rtx_cost (XEXP (x, 1), code, speed);
5298 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5299 return true;
5302 if (subcode == MULT
5303 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5305 *total += rtx_cost (XEXP (x, 1), code, speed);
5306 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5307 return true;
5310 if (subcode == UMIN || subcode == UMAX
5311 || subcode == SMIN || subcode == SMAX)
5313 *total = COSTS_N_INSNS (3);
5314 return true;
5317 return false;
5319 case MULT:
5320 /* This should have been handled by the CPU specific routines. */
5321 gcc_unreachable ();
5323 case TRUNCATE:
5324 if (arm_arch3m && mode == SImode
5325 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5326 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5327 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5328 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5329 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5330 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5332 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
5333 return true;
5335 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
5336 return false;
5338 case NEG:
5339 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5341 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5343 *total = COSTS_N_INSNS (1);
5344 return false;
5346 *total = COSTS_N_INSNS (2);
5347 return false;
5350 /* Fall through */
5351 case NOT:
5352 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
5353 if (mode == SImode && code == NOT)
5355 subcode = GET_CODE (XEXP (x, 0));
5356 if (subcode == ASHIFT || subcode == ASHIFTRT
5357 || subcode == LSHIFTRT
5358 || subcode == ROTATE || subcode == ROTATERT
5359 || (subcode == MULT
5360 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
5362 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5363 /* Register shifts cost an extra cycle. */
5364 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
5365 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
5366 subcode, speed);
5367 return true;
5371 return false;
5373 case IF_THEN_ELSE:
5374 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5376 *total = COSTS_N_INSNS (4);
5377 return true;
5380 operand = XEXP (x, 0);
5382 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
5383 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
5384 && GET_CODE (XEXP (operand, 0)) == REG
5385 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
5386 *total += COSTS_N_INSNS (1);
5387 *total += (rtx_cost (XEXP (x, 1), code, speed)
5388 + rtx_cost (XEXP (x, 2), code, speed));
5389 return true;
5391 case NE:
5392 if (mode == SImode && XEXP (x, 1) == const0_rtx)
5394 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5395 return true;
5397 goto scc_insn;
5399 case GE:
5400 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5401 && mode == SImode && XEXP (x, 1) == const0_rtx)
5403 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5404 return true;
5406 goto scc_insn;
5408 case LT:
5409 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5410 && mode == SImode && XEXP (x, 1) == const0_rtx)
5412 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5413 return true;
5415 goto scc_insn;
5417 case EQ:
5418 case GT:
5419 case LE:
5420 case GEU:
5421 case LTU:
5422 case GTU:
5423 case LEU:
5424 case UNORDERED:
5425 case ORDERED:
5426 case UNEQ:
5427 case UNGE:
5428 case UNLT:
5429 case UNGT:
5430 case UNLE:
5431 scc_insn:
5432 /* SCC insns. In the case where the comparison has already been
5433 performed, then they cost 2 instructions. Otherwise they need
5434 an additional comparison before them. */
5435 *total = COSTS_N_INSNS (2);
5436 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5438 return true;
5441 /* Fall through */
5442 case COMPARE:
5443 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5445 *total = 0;
5446 return true;
5449 *total += COSTS_N_INSNS (1);
5450 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5451 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5453 *total += rtx_cost (XEXP (x, 0), code, speed);
5454 return true;
5457 subcode = GET_CODE (XEXP (x, 0));
5458 if (subcode == ASHIFT || subcode == ASHIFTRT
5459 || subcode == LSHIFTRT
5460 || subcode == ROTATE || subcode == ROTATERT)
5462 *total += rtx_cost (XEXP (x, 1), code, speed);
5463 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5464 return true;
5467 if (subcode == MULT
5468 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5470 *total += rtx_cost (XEXP (x, 1), code, speed);
5471 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5472 return true;
5475 return false;
5477 case UMIN:
5478 case UMAX:
5479 case SMIN:
5480 case SMAX:
5481 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5482 if (GET_CODE (XEXP (x, 1)) != CONST_INT
5483 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
5484 *total += rtx_cost (XEXP (x, 1), code, speed);
5485 return true;
5487 case ABS:
5488 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5490 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5492 *total = COSTS_N_INSNS (1);
5493 return false;
5495 *total = COSTS_N_INSNS (20);
5496 return false;
5498 *total = COSTS_N_INSNS (1);
5499 if (mode == DImode)
5500 *total += COSTS_N_INSNS (3);
5501 return false;
5503 case SIGN_EXTEND:
5504 if (GET_MODE_CLASS (mode) == MODE_INT)
5506 *total = 0;
5507 if (mode == DImode)
5508 *total += COSTS_N_INSNS (1);
5510 if (GET_MODE (XEXP (x, 0)) != SImode)
5512 if (arm_arch6)
5514 if (GET_CODE (XEXP (x, 0)) != MEM)
5515 *total += COSTS_N_INSNS (1);
5517 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5518 *total += COSTS_N_INSNS (2);
5521 return false;
5524 /* Fall through */
5525 case ZERO_EXTEND:
5526 *total = 0;
5527 if (GET_MODE_CLASS (mode) == MODE_INT)
5529 if (mode == DImode)
5530 *total += COSTS_N_INSNS (1);
5532 if (GET_MODE (XEXP (x, 0)) != SImode)
5534 if (arm_arch6)
5536 if (GET_CODE (XEXP (x, 0)) != MEM)
5537 *total += COSTS_N_INSNS (1);
5539 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5540 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
5541 1 : 2);
5544 return false;
5547 switch (GET_MODE (XEXP (x, 0)))
5549 case V8QImode:
5550 case V4HImode:
5551 case V2SImode:
5552 case V4QImode:
5553 case V2HImode:
5554 *total = COSTS_N_INSNS (1);
5555 return false;
5557 default:
5558 gcc_unreachable ();
5560 gcc_unreachable ();
5562 case ZERO_EXTRACT:
5563 case SIGN_EXTRACT:
5564 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5565 return true;
5567 case CONST_INT:
5568 if (const_ok_for_arm (INTVAL (x))
5569 || const_ok_for_arm (~INTVAL (x)))
5570 *total = COSTS_N_INSNS (1);
5571 else
5572 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
5573 INTVAL (x), NULL_RTX,
5574 NULL_RTX, 0, 0));
5575 return true;
5577 case CONST:
5578 case LABEL_REF:
5579 case SYMBOL_REF:
5580 *total = COSTS_N_INSNS (3);
5581 return true;
5583 case HIGH:
5584 *total = COSTS_N_INSNS (1);
5585 return true;
5587 case LO_SUM:
5588 *total = COSTS_N_INSNS (1);
5589 *total += rtx_cost (XEXP (x, 0), code, speed);
5590 return true;
5592 case CONST_DOUBLE:
5593 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
5594 *total = COSTS_N_INSNS (1);
5595 else
5596 *total = COSTS_N_INSNS (4);
5597 return true;
5599 default:
5600 *total = COSTS_N_INSNS (4);
5601 return false;
5605 /* RTX costs when optimizing for size. */
5606 static bool
5607 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5608 int *total)
5610 enum machine_mode mode = GET_MODE (x);
5611 if (TARGET_THUMB1)
5613 /* XXX TBD. For now, use the standard costs. */
5614 *total = thumb1_rtx_costs (x, code, outer_code);
5615 return true;
5618 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5619 switch (code)
5621 case MEM:
5622 /* A memory access costs 1 insn if the mode is small, or the address is
5623 a single register, otherwise it costs one insn per word. */
5624 if (REG_P (XEXP (x, 0)))
5625 *total = COSTS_N_INSNS (1);
5626 else
5627 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5628 return true;
5630 case DIV:
5631 case MOD:
5632 case UDIV:
5633 case UMOD:
5634 /* Needs a libcall, so it costs about this. */
5635 *total = COSTS_N_INSNS (2);
5636 return false;
5638 case ROTATE:
5639 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5641 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
5642 return true;
5644 /* Fall through */
5645 case ROTATERT:
5646 case ASHIFT:
5647 case LSHIFTRT:
5648 case ASHIFTRT:
5649 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5651 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
5652 return true;
5654 else if (mode == SImode)
5656 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
5657 /* Slightly disparage register shifts, but not by much. */
5658 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5659 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
5660 return true;
5663 /* Needs a libcall. */
5664 *total = COSTS_N_INSNS (2);
5665 return false;
5667 case MINUS:
5668 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5670 *total = COSTS_N_INSNS (1);
5671 return false;
5674 if (mode == SImode)
5676 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5677 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5679 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5680 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5681 || subcode1 == ROTATE || subcode1 == ROTATERT
5682 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5683 || subcode1 == ASHIFTRT)
5685 /* It's just the cost of the two operands. */
5686 *total = 0;
5687 return false;
5690 *total = COSTS_N_INSNS (1);
5691 return false;
5694 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5695 return false;
5697 case PLUS:
5698 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5700 *total = COSTS_N_INSNS (1);
5701 return false;
5704 /* A shift as a part of ADD costs nothing. */
5705 if (GET_CODE (XEXP (x, 0)) == MULT
5706 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5708 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
5709 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
5710 *total += rtx_cost (XEXP (x, 1), code, false);
5711 return true;
5714 /* Fall through */
5715 case AND: case XOR: case IOR:
5716 if (mode == SImode)
5718 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5720 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5721 || subcode == LSHIFTRT || subcode == ASHIFTRT
5722 || (code == AND && subcode == NOT))
5724 /* It's just the cost of the two operands. */
5725 *total = 0;
5726 return false;
5730 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5731 return false;
5733 case MULT:
5734 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5735 return false;
5737 case NEG:
5738 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5740 *total = COSTS_N_INSNS (1);
5741 return false;
5744 /* Fall through */
5745 case NOT:
5746 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5748 return false;
5750 case IF_THEN_ELSE:
5751 *total = 0;
5752 return false;
5754 case COMPARE:
5755 if (cc_register (XEXP (x, 0), VOIDmode))
5756 * total = 0;
5757 else
5758 *total = COSTS_N_INSNS (1);
5759 return false;
5761 case ABS:
5762 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5763 *total = COSTS_N_INSNS (1);
5764 else
5765 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5766 return false;
5768 case SIGN_EXTEND:
5769 *total = 0;
5770 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5772 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5773 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5775 if (mode == DImode)
5776 *total += COSTS_N_INSNS (1);
5777 return false;
5779 case ZERO_EXTEND:
5780 *total = 0;
5781 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5783 switch (GET_MODE (XEXP (x, 0)))
5785 case QImode:
5786 *total += COSTS_N_INSNS (1);
5787 break;
5789 case HImode:
5790 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5792 case SImode:
5793 break;
5795 default:
5796 *total += COSTS_N_INSNS (2);
5800 if (mode == DImode)
5801 *total += COSTS_N_INSNS (1);
5803 return false;
5805 case CONST_INT:
5806 if (const_ok_for_arm (INTVAL (x)))
5807 /* A multiplication by a constant requires another instruction
5808 to load the constant to a register. */
5809 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
5810 ? 1 : 0);
5811 else if (const_ok_for_arm (~INTVAL (x)))
5812 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5813 else if (const_ok_for_arm (-INTVAL (x)))
5815 if (outer_code == COMPARE || outer_code == PLUS
5816 || outer_code == MINUS)
5817 *total = 0;
5818 else
5819 *total = COSTS_N_INSNS (1);
5821 else
5822 *total = COSTS_N_INSNS (2);
5823 return true;
5825 case CONST:
5826 case LABEL_REF:
5827 case SYMBOL_REF:
5828 *total = COSTS_N_INSNS (2);
5829 return true;
5831 case CONST_DOUBLE:
5832 *total = COSTS_N_INSNS (4);
5833 return true;
5835 case HIGH:
5836 case LO_SUM:
5837 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
5838 cost of these slightly. */
5839 *total = COSTS_N_INSNS (1) + 1;
5840 return true;
5842 default:
5843 if (mode != VOIDmode)
5844 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5845 else
5846 *total = COSTS_N_INSNS (4); /* How knows? */
5847 return false;
5851 /* RTX costs when optimizing for size. */
5852 static bool
5853 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
5854 bool speed)
5856 if (!speed)
5857 return arm_size_rtx_costs (x, (enum rtx_code) code,
5858 (enum rtx_code) outer_code, total);
5859 else
5860 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
5861 (enum rtx_code) outer_code,
5862 total, speed);
5865 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5866 supported on any "slowmul" cores, so it can be ignored. */
5868 static bool
5869 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5870 int *total, bool speed)
5872 enum machine_mode mode = GET_MODE (x);
5874 if (TARGET_THUMB)
5876 *total = thumb1_rtx_costs (x, code, outer_code);
5877 return true;
5880 switch (code)
5882 case MULT:
5883 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5884 || mode == DImode)
5886 *total = COSTS_N_INSNS (20);
5887 return false;
5890 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5892 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5893 & (unsigned HOST_WIDE_INT) 0xffffffff);
5894 int cost, const_ok = const_ok_for_arm (i);
5895 int j, booth_unit_size;
5897 /* Tune as appropriate. */
5898 cost = const_ok ? 4 : 8;
5899 booth_unit_size = 2;
5900 for (j = 0; i && j < 32; j += booth_unit_size)
5902 i >>= booth_unit_size;
5903 cost++;
5906 *total = COSTS_N_INSNS (cost);
5907 *total += rtx_cost (XEXP (x, 0), code, speed);
5908 return true;
5911 *total = COSTS_N_INSNS (20);
5912 return false;
5914 default:
5915 return arm_rtx_costs_1 (x, outer_code, total, speed);;
5920 /* RTX cost for cores with a fast multiply unit (M variants). */
5922 static bool
5923 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5924 int *total, bool speed)
5926 enum machine_mode mode = GET_MODE (x);
5928 if (TARGET_THUMB1)
5930 *total = thumb1_rtx_costs (x, code, outer_code);
5931 return true;
5934 /* ??? should thumb2 use different costs? */
5935 switch (code)
5937 case MULT:
5938 /* There is no point basing this on the tuning, since it is always the
5939 fast variant if it exists at all. */
5940 if (mode == DImode
5941 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5942 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5943 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5945 *total = COSTS_N_INSNS(2);
5946 return false;
5950 if (mode == DImode)
5952 *total = COSTS_N_INSNS (5);
5953 return false;
5956 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5958 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5959 & (unsigned HOST_WIDE_INT) 0xffffffff);
5960 int cost, const_ok = const_ok_for_arm (i);
5961 int j, booth_unit_size;
5963 /* Tune as appropriate. */
5964 cost = const_ok ? 4 : 8;
5965 booth_unit_size = 8;
5966 for (j = 0; i && j < 32; j += booth_unit_size)
5968 i >>= booth_unit_size;
5969 cost++;
5972 *total = COSTS_N_INSNS(cost);
5973 return false;
5976 if (mode == SImode)
5978 *total = COSTS_N_INSNS (4);
5979 return false;
5982 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5984 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5986 *total = COSTS_N_INSNS (1);
5987 return false;
5991 /* Requires a lib call */
5992 *total = COSTS_N_INSNS (20);
5993 return false;
5995 default:
5996 return arm_rtx_costs_1 (x, outer_code, total, speed);
6001 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
6002 so it can be ignored. */
6004 static bool
6005 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
6007 enum machine_mode mode = GET_MODE (x);
6009 if (TARGET_THUMB)
6011 *total = thumb1_rtx_costs (x, code, outer_code);
6012 return true;
6015 switch (code)
6017 case COMPARE:
6018 if (GET_CODE (XEXP (x, 0)) != MULT)
6019 return arm_rtx_costs_1 (x, outer_code, total, speed);
6021 /* A COMPARE of a MULT is slow on XScale; the muls instruction
6022 will stall until the multiplication is complete. */
6023 *total = COSTS_N_INSNS (3);
6024 return false;
6026 case MULT:
6027 /* There is no point basing this on the tuning, since it is always the
6028 fast variant if it exists at all. */
6029 if (mode == DImode
6030 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6031 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6032 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6034 *total = COSTS_N_INSNS (2);
6035 return false;
6039 if (mode == DImode)
6041 *total = COSTS_N_INSNS (5);
6042 return false;
6045 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6047 /* If operand 1 is a constant we can more accurately
6048 calculate the cost of the multiply. The multiplier can
6049 retire 15 bits on the first cycle and a further 12 on the
6050 second. We do, of course, have to load the constant into
6051 a register first. */
6052 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6053 /* There's a general overhead of one cycle. */
6054 int cost = 1;
6055 unsigned HOST_WIDE_INT masked_const;
6057 if (i & 0x80000000)
6058 i = ~i;
6060 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
6062 masked_const = i & 0xffff8000;
6063 if (masked_const != 0)
6065 cost++;
6066 masked_const = i & 0xf8000000;
6067 if (masked_const != 0)
6068 cost++;
6070 *total = COSTS_N_INSNS (cost);
6071 return false;
6074 if (mode == SImode)
6076 *total = COSTS_N_INSNS (3);
6077 return false;
6080 /* Requires a lib call */
6081 *total = COSTS_N_INSNS (20);
6082 return false;
6084 default:
6085 return arm_rtx_costs_1 (x, outer_code, total, speed);
6090 /* RTX costs for 9e (and later) cores. */
6092 static bool
6093 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6094 int *total, bool speed)
6096 enum machine_mode mode = GET_MODE (x);
6098 if (TARGET_THUMB1)
6100 switch (code)
6102 case MULT:
6103 *total = COSTS_N_INSNS (3);
6104 return true;
6106 default:
6107 *total = thumb1_rtx_costs (x, code, outer_code);
6108 return true;
6112 switch (code)
6114 case MULT:
6115 /* There is no point basing this on the tuning, since it is always the
6116 fast variant if it exists at all. */
6117 if (mode == DImode
6118 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6119 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6120 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6122 *total = COSTS_N_INSNS (2);
6123 return false;
6127 if (mode == DImode)
6129 *total = COSTS_N_INSNS (5);
6130 return false;
6133 if (mode == SImode)
6135 *total = COSTS_N_INSNS (2);
6136 return false;
6139 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6141 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6143 *total = COSTS_N_INSNS (1);
6144 return false;
6148 *total = COSTS_N_INSNS (20);
6149 return false;
6151 default:
6152 return arm_rtx_costs_1 (x, outer_code, total, speed);
6155 /* All address computations that can be done are free, but rtx cost returns
6156 the same for practically all of them. So we weight the different types
6157 of address here in the order (most pref first):
6158 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
6159 static inline int
6160 arm_arm_address_cost (rtx x)
6162 enum rtx_code c = GET_CODE (x);
6164 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
6165 return 0;
6166 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
6167 return 10;
6169 if (c == PLUS || c == MINUS)
6171 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6172 return 2;
6174 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
6175 return 3;
6177 return 4;
6180 return 6;
6183 static inline int
6184 arm_thumb_address_cost (rtx x)
6186 enum rtx_code c = GET_CODE (x);
6188 if (c == REG)
6189 return 1;
6190 if (c == PLUS
6191 && GET_CODE (XEXP (x, 0)) == REG
6192 && GET_CODE (XEXP (x, 1)) == CONST_INT)
6193 return 1;
6195 return 2;
6198 static int
6199 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
6201 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
6204 static int
6205 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
6207 rtx i_pat, d_pat;
6209 /* Some true dependencies can have a higher cost depending
6210 on precisely how certain input operands are used. */
6211 if (arm_tune_xscale
6212 && REG_NOTE_KIND (link) == 0
6213 && recog_memoized (insn) >= 0
6214 && recog_memoized (dep) >= 0)
6216 int shift_opnum = get_attr_shift (insn);
6217 enum attr_type attr_type = get_attr_type (dep);
6219 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
6220 operand for INSN. If we have a shifted input operand and the
6221 instruction we depend on is another ALU instruction, then we may
6222 have to account for an additional stall. */
6223 if (shift_opnum != 0
6224 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
6226 rtx shifted_operand;
6227 int opno;
6229 /* Get the shifted operand. */
6230 extract_insn (insn);
6231 shifted_operand = recog_data.operand[shift_opnum];
6233 /* Iterate over all the operands in DEP. If we write an operand
6234 that overlaps with SHIFTED_OPERAND, then we have increase the
6235 cost of this dependency. */
6236 extract_insn (dep);
6237 preprocess_constraints ();
6238 for (opno = 0; opno < recog_data.n_operands; opno++)
6240 /* We can ignore strict inputs. */
6241 if (recog_data.operand_type[opno] == OP_IN)
6242 continue;
6244 if (reg_overlap_mentioned_p (recog_data.operand[opno],
6245 shifted_operand))
6246 return 2;
6251 /* XXX This is not strictly true for the FPA. */
6252 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
6253 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
6254 return 0;
6256 /* Call insns don't incur a stall, even if they follow a load. */
6257 if (REG_NOTE_KIND (link) == 0
6258 && GET_CODE (insn) == CALL_INSN)
6259 return 1;
6261 if ((i_pat = single_set (insn)) != NULL
6262 && GET_CODE (SET_SRC (i_pat)) == MEM
6263 && (d_pat = single_set (dep)) != NULL
6264 && GET_CODE (SET_DEST (d_pat)) == MEM)
6266 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
6267 /* This is a load after a store, there is no conflict if the load reads
6268 from a cached area. Assume that loads from the stack, and from the
6269 constant pool are cached, and that others will miss. This is a
6270 hack. */
6272 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
6273 || reg_mentioned_p (stack_pointer_rtx, src_mem)
6274 || reg_mentioned_p (frame_pointer_rtx, src_mem)
6275 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
6276 return 1;
6279 return cost;
6282 static int fp_consts_inited = 0;
6284 /* Only zero is valid for VFP. Other values are also valid for FPA. */
6285 static const char * const strings_fp[8] =
6287 "0", "1", "2", "3",
6288 "4", "5", "0.5", "10"
6291 static REAL_VALUE_TYPE values_fp[8];
6293 static void
6294 init_fp_table (void)
6296 int i;
6297 REAL_VALUE_TYPE r;
6299 if (TARGET_VFP)
6300 fp_consts_inited = 1;
6301 else
6302 fp_consts_inited = 8;
6304 for (i = 0; i < fp_consts_inited; i++)
6306 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
6307 values_fp[i] = r;
6311 /* Return TRUE if rtx X is a valid immediate FP constant. */
6313 arm_const_double_rtx (rtx x)
6315 REAL_VALUE_TYPE r;
6316 int i;
6318 if (!fp_consts_inited)
6319 init_fp_table ();
6321 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6322 if (REAL_VALUE_MINUS_ZERO (r))
6323 return 0;
6325 for (i = 0; i < fp_consts_inited; i++)
6326 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6327 return 1;
6329 return 0;
6332 /* Return TRUE if rtx X is a valid immediate FPA constant. */
6334 neg_const_double_rtx_ok_for_fpa (rtx x)
6336 REAL_VALUE_TYPE r;
6337 int i;
6339 if (!fp_consts_inited)
6340 init_fp_table ();
6342 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6343 r = REAL_VALUE_NEGATE (r);
6344 if (REAL_VALUE_MINUS_ZERO (r))
6345 return 0;
6347 for (i = 0; i < 8; i++)
6348 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6349 return 1;
6351 return 0;
6355 /* VFPv3 has a fairly wide range of representable immediates, formed from
6356 "quarter-precision" floating-point values. These can be evaluated using this
6357 formula (with ^ for exponentiation):
6359 -1^s * n * 2^-r
6361 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
6362 16 <= n <= 31 and 0 <= r <= 7.
6364 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
6366 - A (most-significant) is the sign bit.
6367 - BCD are the exponent (encoded as r XOR 3).
6368 - EFGH are the mantissa (encoded as n - 16).
6371 /* Return an integer index for a VFPv3 immediate operand X suitable for the
6372 fconst[sd] instruction, or -1 if X isn't suitable. */
6373 static int
6374 vfp3_const_double_index (rtx x)
6376 REAL_VALUE_TYPE r, m;
6377 int sign, exponent;
6378 unsigned HOST_WIDE_INT mantissa, mant_hi;
6379 unsigned HOST_WIDE_INT mask;
6380 HOST_WIDE_INT m1, m2;
6381 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6383 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
6384 return -1;
6386 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6388 /* We can't represent these things, so detect them first. */
6389 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
6390 return -1;
6392 /* Extract sign, exponent and mantissa. */
6393 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6394 r = REAL_VALUE_ABS (r);
6395 exponent = REAL_EXP (&r);
6396 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6397 highest (sign) bit, with a fixed binary point at bit point_pos.
6398 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
6399 bits for the mantissa, this may fail (low bits would be lost). */
6400 real_ldexp (&m, &r, point_pos - exponent);
6401 REAL_VALUE_TO_INT (&m1, &m2, m);
6402 mantissa = m1;
6403 mant_hi = m2;
6405 /* If there are bits set in the low part of the mantissa, we can't
6406 represent this value. */
6407 if (mantissa != 0)
6408 return -1;
6410 /* Now make it so that mantissa contains the most-significant bits, and move
6411 the point_pos to indicate that the least-significant bits have been
6412 discarded. */
6413 point_pos -= HOST_BITS_PER_WIDE_INT;
6414 mantissa = mant_hi;
6416 /* We can permit four significant bits of mantissa only, plus a high bit
6417 which is always 1. */
6418 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6419 if ((mantissa & mask) != 0)
6420 return -1;
6422 /* Now we know the mantissa is in range, chop off the unneeded bits. */
6423 mantissa >>= point_pos - 5;
6425 /* The mantissa may be zero. Disallow that case. (It's possible to load the
6426 floating-point immediate zero with Neon using an integer-zero load, but
6427 that case is handled elsewhere.) */
6428 if (mantissa == 0)
6429 return -1;
6431 gcc_assert (mantissa >= 16 && mantissa <= 31);
6433 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
6434 normalized significands are in the range [1, 2). (Our mantissa is shifted
6435 left 4 places at this point relative to normalized IEEE754 values). GCC
6436 internally uses [0.5, 1) (see real.c), so the exponent returned from
6437 REAL_EXP must be altered. */
6438 exponent = 5 - exponent;
6440 if (exponent < 0 || exponent > 7)
6441 return -1;
6443 /* Sign, mantissa and exponent are now in the correct form to plug into the
6444 formula described in the comment above. */
6445 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
6448 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
6450 vfp3_const_double_rtx (rtx x)
6452 if (!TARGET_VFP3)
6453 return 0;
6455 return vfp3_const_double_index (x) != -1;
6458 /* Recognize immediates which can be used in various Neon instructions. Legal
6459 immediates are described by the following table (for VMVN variants, the
6460 bitwise inverse of the constant shown is recognized. In either case, VMOV
6461 is output and the correct instruction to use for a given constant is chosen
6462 by the assembler). The constant shown is replicated across all elements of
6463 the destination vector.
6465 insn elems variant constant (binary)
6466 ---- ----- ------- -----------------
6467 vmov i32 0 00000000 00000000 00000000 abcdefgh
6468 vmov i32 1 00000000 00000000 abcdefgh 00000000
6469 vmov i32 2 00000000 abcdefgh 00000000 00000000
6470 vmov i32 3 abcdefgh 00000000 00000000 00000000
6471 vmov i16 4 00000000 abcdefgh
6472 vmov i16 5 abcdefgh 00000000
6473 vmvn i32 6 00000000 00000000 00000000 abcdefgh
6474 vmvn i32 7 00000000 00000000 abcdefgh 00000000
6475 vmvn i32 8 00000000 abcdefgh 00000000 00000000
6476 vmvn i32 9 abcdefgh 00000000 00000000 00000000
6477 vmvn i16 10 00000000 abcdefgh
6478 vmvn i16 11 abcdefgh 00000000
6479 vmov i32 12 00000000 00000000 abcdefgh 11111111
6480 vmvn i32 13 00000000 00000000 abcdefgh 11111111
6481 vmov i32 14 00000000 abcdefgh 11111111 11111111
6482 vmvn i32 15 00000000 abcdefgh 11111111 11111111
6483 vmov i8 16 abcdefgh
6484 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
6485 eeeeeeee ffffffff gggggggg hhhhhhhh
6486 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
6488 For case 18, B = !b. Representable values are exactly those accepted by
6489 vfp3_const_double_index, but are output as floating-point numbers rather
6490 than indices.
6492 Variants 0-5 (inclusive) may also be used as immediates for the second
6493 operand of VORR/VBIC instructions.
6495 The INVERSE argument causes the bitwise inverse of the given operand to be
6496 recognized instead (used for recognizing legal immediates for the VAND/VORN
6497 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
6498 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
6499 output, rather than the real insns vbic/vorr).
6501 INVERSE makes no difference to the recognition of float vectors.
6503 The return value is the variant of immediate as shown in the above table, or
6504 -1 if the given value doesn't match any of the listed patterns.
6506 static int
6507 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6508 rtx *modconst, int *elementwidth)
6510 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
6511 matches = 1; \
6512 for (i = 0; i < idx; i += (STRIDE)) \
6513 if (!(TEST)) \
6514 matches = 0; \
6515 if (matches) \
6517 immtype = (CLASS); \
6518 elsize = (ELSIZE); \
6519 break; \
6522 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6523 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6524 unsigned char bytes[16];
6525 int immtype = -1, matches;
6526 unsigned int invmask = inverse ? 0xff : 0;
6528 /* Vectors of float constants. */
6529 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6531 rtx el0 = CONST_VECTOR_ELT (op, 0);
6532 REAL_VALUE_TYPE r0;
6534 if (!vfp3_const_double_rtx (el0))
6535 return -1;
6537 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
6539 for (i = 1; i < n_elts; i++)
6541 rtx elt = CONST_VECTOR_ELT (op, i);
6542 REAL_VALUE_TYPE re;
6544 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
6546 if (!REAL_VALUES_EQUAL (r0, re))
6547 return -1;
6550 if (modconst)
6551 *modconst = CONST_VECTOR_ELT (op, 0);
6553 if (elementwidth)
6554 *elementwidth = 0;
6556 return 18;
6559 /* Splat vector constant out into a byte vector. */
6560 for (i = 0; i < n_elts; i++)
6562 rtx el = CONST_VECTOR_ELT (op, i);
6563 unsigned HOST_WIDE_INT elpart;
6564 unsigned int part, parts;
6566 if (GET_CODE (el) == CONST_INT)
6568 elpart = INTVAL (el);
6569 parts = 1;
6571 else if (GET_CODE (el) == CONST_DOUBLE)
6573 elpart = CONST_DOUBLE_LOW (el);
6574 parts = 2;
6576 else
6577 gcc_unreachable ();
6579 for (part = 0; part < parts; part++)
6581 unsigned int byte;
6582 for (byte = 0; byte < innersize; byte++)
6584 bytes[idx++] = (elpart & 0xff) ^ invmask;
6585 elpart >>= BITS_PER_UNIT;
6587 if (GET_CODE (el) == CONST_DOUBLE)
6588 elpart = CONST_DOUBLE_HIGH (el);
6592 /* Sanity check. */
6593 gcc_assert (idx == GET_MODE_SIZE (mode));
6597 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6598 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6600 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6601 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6603 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6604 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6606 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6607 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6609 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6611 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6613 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6614 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6616 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6617 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6619 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6620 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6622 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6623 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6625 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6627 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6629 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6630 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6632 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6633 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6635 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6636 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6638 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6639 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6641 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6643 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6644 && bytes[i] == bytes[(i + 8) % idx]);
6646 while (0);
6648 if (immtype == -1)
6649 return -1;
6651 if (elementwidth)
6652 *elementwidth = elsize;
6654 if (modconst)
6656 unsigned HOST_WIDE_INT imm = 0;
6658 /* Un-invert bytes of recognized vector, if necessary. */
6659 if (invmask != 0)
6660 for (i = 0; i < idx; i++)
6661 bytes[i] ^= invmask;
6663 if (immtype == 17)
6665 /* FIXME: Broken on 32-bit H_W_I hosts. */
6666 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6668 for (i = 0; i < 8; i++)
6669 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6670 << (i * BITS_PER_UNIT);
6672 *modconst = GEN_INT (imm);
6674 else
6676 unsigned HOST_WIDE_INT imm = 0;
6678 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6679 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6681 *modconst = GEN_INT (imm);
6685 return immtype;
6686 #undef CHECK
6689 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6690 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6691 float elements), and a modified constant (whatever should be output for a
6692 VMOV) in *MODCONST. */
6695 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6696 rtx *modconst, int *elementwidth)
6698 rtx tmpconst;
6699 int tmpwidth;
6700 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6702 if (retval == -1)
6703 return 0;
6705 if (modconst)
6706 *modconst = tmpconst;
6708 if (elementwidth)
6709 *elementwidth = tmpwidth;
6711 return 1;
6714 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6715 the immediate is valid, write a constant suitable for using as an operand
6716 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6717 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6720 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6721 rtx *modconst, int *elementwidth)
6723 rtx tmpconst;
6724 int tmpwidth;
6725 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6727 if (retval < 0 || retval > 5)
6728 return 0;
6730 if (modconst)
6731 *modconst = tmpconst;
6733 if (elementwidth)
6734 *elementwidth = tmpwidth;
6736 return 1;
6739 /* Return a string suitable for output of Neon immediate logic operation
6740 MNEM. */
6742 char *
6743 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6744 int inverse, int quad)
6746 int width, is_valid;
6747 static char templ[40];
6749 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6751 gcc_assert (is_valid != 0);
6753 if (quad)
6754 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6755 else
6756 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6758 return templ;
6761 /* Output a sequence of pairwise operations to implement a reduction.
6762 NOTE: We do "too much work" here, because pairwise operations work on two
6763 registers-worth of operands in one go. Unfortunately we can't exploit those
6764 extra calculations to do the full operation in fewer steps, I don't think.
6765 Although all vector elements of the result but the first are ignored, we
6766 actually calculate the same result in each of the elements. An alternative
6767 such as initially loading a vector with zero to use as each of the second
6768 operands would use up an additional register and take an extra instruction,
6769 for no particular gain. */
6771 void
6772 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6773 rtx (*reduc) (rtx, rtx, rtx))
6775 enum machine_mode inner = GET_MODE_INNER (mode);
6776 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6777 rtx tmpsum = op1;
6779 for (i = parts / 2; i >= 1; i /= 2)
6781 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6782 emit_insn (reduc (dest, tmpsum, tmpsum));
6783 tmpsum = dest;
6787 /* Initialize a vector with non-constant elements. FIXME: We can do better
6788 than the current implementation (building a vector on the stack and then
6789 loading it) in many cases. See rs6000.c. */
6791 void
6792 neon_expand_vector_init (rtx target, rtx vals)
6794 enum machine_mode mode = GET_MODE (target);
6795 enum machine_mode inner = GET_MODE_INNER (mode);
6796 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6797 rtx mem;
6799 gcc_assert (VECTOR_MODE_P (mode));
6801 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6802 for (i = 0; i < n_elts; i++)
6803 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6804 XVECEXP (vals, 0, i));
6806 emit_move_insn (target, mem);
6809 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6810 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6811 reported source locations are bogus. */
6813 static void
6814 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6815 const char *err)
6817 HOST_WIDE_INT lane;
6819 gcc_assert (GET_CODE (operand) == CONST_INT);
6821 lane = INTVAL (operand);
6823 if (lane < low || lane >= high)
6824 error (err);
6827 /* Bounds-check lanes. */
6829 void
6830 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6832 bounds_check (operand, low, high, "lane out of range");
6835 /* Bounds-check constants. */
6837 void
6838 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6840 bounds_check (operand, low, high, "constant out of range");
6843 HOST_WIDE_INT
6844 neon_element_bits (enum machine_mode mode)
6846 if (mode == DImode)
6847 return GET_MODE_BITSIZE (mode);
6848 else
6849 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6853 /* Predicates for `match_operand' and `match_operator'. */
6855 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6857 cirrus_memory_offset (rtx op)
6859 /* Reject eliminable registers. */
6860 if (! (reload_in_progress || reload_completed)
6861 && ( reg_mentioned_p (frame_pointer_rtx, op)
6862 || reg_mentioned_p (arg_pointer_rtx, op)
6863 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6864 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6865 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6866 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6867 return 0;
6869 if (GET_CODE (op) == MEM)
6871 rtx ind;
6873 ind = XEXP (op, 0);
6875 /* Match: (mem (reg)). */
6876 if (GET_CODE (ind) == REG)
6877 return 1;
6879 /* Match:
6880 (mem (plus (reg)
6881 (const))). */
6882 if (GET_CODE (ind) == PLUS
6883 && GET_CODE (XEXP (ind, 0)) == REG
6884 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6885 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6886 return 1;
6889 return 0;
6892 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6893 WB is true if full writeback address modes are allowed and is false
6894 if limited writeback address modes (POST_INC and PRE_DEC) are
6895 allowed. */
6898 arm_coproc_mem_operand (rtx op, bool wb)
6900 rtx ind;
6902 /* Reject eliminable registers. */
6903 if (! (reload_in_progress || reload_completed)
6904 && ( reg_mentioned_p (frame_pointer_rtx, op)
6905 || reg_mentioned_p (arg_pointer_rtx, op)
6906 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6907 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6908 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6909 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6910 return FALSE;
6912 /* Constants are converted into offsets from labels. */
6913 if (GET_CODE (op) != MEM)
6914 return FALSE;
6916 ind = XEXP (op, 0);
6918 if (reload_completed
6919 && (GET_CODE (ind) == LABEL_REF
6920 || (GET_CODE (ind) == CONST
6921 && GET_CODE (XEXP (ind, 0)) == PLUS
6922 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6923 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6924 return TRUE;
6926 /* Match: (mem (reg)). */
6927 if (GET_CODE (ind) == REG)
6928 return arm_address_register_rtx_p (ind, 0);
6930 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6931 acceptable in any case (subject to verification by
6932 arm_address_register_rtx_p). We need WB to be true to accept
6933 PRE_INC and POST_DEC. */
6934 if (GET_CODE (ind) == POST_INC
6935 || GET_CODE (ind) == PRE_DEC
6936 || (wb
6937 && (GET_CODE (ind) == PRE_INC
6938 || GET_CODE (ind) == POST_DEC)))
6939 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6941 if (wb
6942 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6943 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6944 && GET_CODE (XEXP (ind, 1)) == PLUS
6945 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6946 ind = XEXP (ind, 1);
6948 /* Match:
6949 (plus (reg)
6950 (const)). */
6951 if (GET_CODE (ind) == PLUS
6952 && GET_CODE (XEXP (ind, 0)) == REG
6953 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6954 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6955 && INTVAL (XEXP (ind, 1)) > -1024
6956 && INTVAL (XEXP (ind, 1)) < 1024
6957 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6958 return TRUE;
6960 return FALSE;
6963 /* Return TRUE if OP is a memory operand which we can load or store a vector
6964 to/from. If CORE is true, we're moving from ARM registers not Neon
6965 registers. */
6967 neon_vector_mem_operand (rtx op, bool core)
6969 rtx ind;
6971 /* Reject eliminable registers. */
6972 if (! (reload_in_progress || reload_completed)
6973 && ( reg_mentioned_p (frame_pointer_rtx, op)
6974 || reg_mentioned_p (arg_pointer_rtx, op)
6975 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6976 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6977 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6978 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6979 return FALSE;
6981 /* Constants are converted into offsets from labels. */
6982 if (GET_CODE (op) != MEM)
6983 return FALSE;
6985 ind = XEXP (op, 0);
6987 if (reload_completed
6988 && (GET_CODE (ind) == LABEL_REF
6989 || (GET_CODE (ind) == CONST
6990 && GET_CODE (XEXP (ind, 0)) == PLUS
6991 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6992 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6993 return TRUE;
6995 /* Match: (mem (reg)). */
6996 if (GET_CODE (ind) == REG)
6997 return arm_address_register_rtx_p (ind, 0);
6999 /* Allow post-increment with Neon registers. */
7000 if (!core && GET_CODE (ind) == POST_INC)
7001 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7003 #if 0
7004 /* FIXME: We can support this too if we use VLD1/VST1. */
7005 if (!core
7006 && GET_CODE (ind) == POST_MODIFY
7007 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
7008 && GET_CODE (XEXP (ind, 1)) == PLUS
7009 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
7010 ind = XEXP (ind, 1);
7011 #endif
7013 /* Match:
7014 (plus (reg)
7015 (const)). */
7016 if (!core
7017 && GET_CODE (ind) == PLUS
7018 && GET_CODE (XEXP (ind, 0)) == REG
7019 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7020 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7021 && INTVAL (XEXP (ind, 1)) > -1024
7022 && INTVAL (XEXP (ind, 1)) < 1016
7023 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7024 return TRUE;
7026 return FALSE;
7029 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
7030 type. */
7032 neon_struct_mem_operand (rtx op)
7034 rtx ind;
7036 /* Reject eliminable registers. */
7037 if (! (reload_in_progress || reload_completed)
7038 && ( reg_mentioned_p (frame_pointer_rtx, op)
7039 || reg_mentioned_p (arg_pointer_rtx, op)
7040 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7041 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7042 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7043 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7044 return FALSE;
7046 /* Constants are converted into offsets from labels. */
7047 if (GET_CODE (op) != MEM)
7048 return FALSE;
7050 ind = XEXP (op, 0);
7052 if (reload_completed
7053 && (GET_CODE (ind) == LABEL_REF
7054 || (GET_CODE (ind) == CONST
7055 && GET_CODE (XEXP (ind, 0)) == PLUS
7056 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7057 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7058 return TRUE;
7060 /* Match: (mem (reg)). */
7061 if (GET_CODE (ind) == REG)
7062 return arm_address_register_rtx_p (ind, 0);
7064 return FALSE;
7067 /* Return true if X is a register that will be eliminated later on. */
7069 arm_eliminable_register (rtx x)
7071 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
7072 || REGNO (x) == ARG_POINTER_REGNUM
7073 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
7074 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
7077 /* Return GENERAL_REGS if a scratch register required to reload x to/from
7078 coprocessor registers. Otherwise return NO_REGS. */
7080 enum reg_class
7081 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
7083 if (TARGET_NEON
7084 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7085 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7086 && neon_vector_mem_operand (x, FALSE))
7087 return NO_REGS;
7089 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
7090 return NO_REGS;
7092 return GENERAL_REGS;
7095 /* Values which must be returned in the most-significant end of the return
7096 register. */
7098 static bool
7099 arm_return_in_msb (const_tree valtype)
7101 return (TARGET_AAPCS_BASED
7102 && BYTES_BIG_ENDIAN
7103 && (AGGREGATE_TYPE_P (valtype)
7104 || TREE_CODE (valtype) == COMPLEX_TYPE));
7107 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
7108 Use by the Cirrus Maverick code which has to workaround
7109 a hardware bug triggered by such instructions. */
7110 static bool
7111 arm_memory_load_p (rtx insn)
7113 rtx body, lhs, rhs;;
7115 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
7116 return false;
7118 body = PATTERN (insn);
7120 if (GET_CODE (body) != SET)
7121 return false;
7123 lhs = XEXP (body, 0);
7124 rhs = XEXP (body, 1);
7126 lhs = REG_OR_SUBREG_RTX (lhs);
7128 /* If the destination is not a general purpose
7129 register we do not have to worry. */
7130 if (GET_CODE (lhs) != REG
7131 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
7132 return false;
7134 /* As well as loads from memory we also have to react
7135 to loads of invalid constants which will be turned
7136 into loads from the minipool. */
7137 return (GET_CODE (rhs) == MEM
7138 || GET_CODE (rhs) == SYMBOL_REF
7139 || note_invalid_constants (insn, -1, false));
7142 /* Return TRUE if INSN is a Cirrus instruction. */
7143 static bool
7144 arm_cirrus_insn_p (rtx insn)
7146 enum attr_cirrus attr;
7148 /* get_attr cannot accept USE or CLOBBER. */
7149 if (!insn
7150 || GET_CODE (insn) != INSN
7151 || GET_CODE (PATTERN (insn)) == USE
7152 || GET_CODE (PATTERN (insn)) == CLOBBER)
7153 return 0;
7155 attr = get_attr_cirrus (insn);
7157 return attr != CIRRUS_NOT;
7160 /* Cirrus reorg for invalid instruction combinations. */
7161 static void
7162 cirrus_reorg (rtx first)
7164 enum attr_cirrus attr;
7165 rtx body = PATTERN (first);
7166 rtx t;
7167 int nops;
7169 /* Any branch must be followed by 2 non Cirrus instructions. */
7170 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
7172 nops = 0;
7173 t = next_nonnote_insn (first);
7175 if (arm_cirrus_insn_p (t))
7176 ++ nops;
7178 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7179 ++ nops;
7181 while (nops --)
7182 emit_insn_after (gen_nop (), first);
7184 return;
7187 /* (float (blah)) is in parallel with a clobber. */
7188 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
7189 body = XVECEXP (body, 0, 0);
7191 if (GET_CODE (body) == SET)
7193 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
7195 /* cfldrd, cfldr64, cfstrd, cfstr64 must
7196 be followed by a non Cirrus insn. */
7197 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
7199 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
7200 emit_insn_after (gen_nop (), first);
7202 return;
7204 else if (arm_memory_load_p (first))
7206 unsigned int arm_regno;
7208 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
7209 ldr/cfmv64hr combination where the Rd field is the same
7210 in both instructions must be split with a non Cirrus
7211 insn. Example:
7213 ldr r0, blah
7215 cfmvsr mvf0, r0. */
7217 /* Get Arm register number for ldr insn. */
7218 if (GET_CODE (lhs) == REG)
7219 arm_regno = REGNO (lhs);
7220 else
7222 gcc_assert (GET_CODE (rhs) == REG);
7223 arm_regno = REGNO (rhs);
7226 /* Next insn. */
7227 first = next_nonnote_insn (first);
7229 if (! arm_cirrus_insn_p (first))
7230 return;
7232 body = PATTERN (first);
7234 /* (float (blah)) is in parallel with a clobber. */
7235 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
7236 body = XVECEXP (body, 0, 0);
7238 if (GET_CODE (body) == FLOAT)
7239 body = XEXP (body, 0);
7241 if (get_attr_cirrus (first) == CIRRUS_MOVE
7242 && GET_CODE (XEXP (body, 1)) == REG
7243 && arm_regno == REGNO (XEXP (body, 1)))
7244 emit_insn_after (gen_nop (), first);
7246 return;
7250 /* get_attr cannot accept USE or CLOBBER. */
7251 if (!first
7252 || GET_CODE (first) != INSN
7253 || GET_CODE (PATTERN (first)) == USE
7254 || GET_CODE (PATTERN (first)) == CLOBBER)
7255 return;
7257 attr = get_attr_cirrus (first);
7259 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
7260 must be followed by a non-coprocessor instruction. */
7261 if (attr == CIRRUS_COMPARE)
7263 nops = 0;
7265 t = next_nonnote_insn (first);
7267 if (arm_cirrus_insn_p (t))
7268 ++ nops;
7270 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7271 ++ nops;
7273 while (nops --)
7274 emit_insn_after (gen_nop (), first);
7276 return;
7280 /* Return TRUE if X references a SYMBOL_REF. */
7282 symbol_mentioned_p (rtx x)
7284 const char * fmt;
7285 int i;
7287 if (GET_CODE (x) == SYMBOL_REF)
7288 return 1;
7290 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
7291 are constant offsets, not symbols. */
7292 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7293 return 0;
7295 fmt = GET_RTX_FORMAT (GET_CODE (x));
7297 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7299 if (fmt[i] == 'E')
7301 int j;
7303 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7304 if (symbol_mentioned_p (XVECEXP (x, i, j)))
7305 return 1;
7307 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
7308 return 1;
7311 return 0;
7314 /* Return TRUE if X references a LABEL_REF. */
7316 label_mentioned_p (rtx x)
7318 const char * fmt;
7319 int i;
7321 if (GET_CODE (x) == LABEL_REF)
7322 return 1;
7324 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
7325 instruction, but they are constant offsets, not symbols. */
7326 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7327 return 0;
7329 fmt = GET_RTX_FORMAT (GET_CODE (x));
7330 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7332 if (fmt[i] == 'E')
7334 int j;
7336 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7337 if (label_mentioned_p (XVECEXP (x, i, j)))
7338 return 1;
7340 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
7341 return 1;
7344 return 0;
7348 tls_mentioned_p (rtx x)
7350 switch (GET_CODE (x))
7352 case CONST:
7353 return tls_mentioned_p (XEXP (x, 0));
7355 case UNSPEC:
7356 if (XINT (x, 1) == UNSPEC_TLS)
7357 return 1;
7359 default:
7360 return 0;
7364 /* Must not copy a SET whose source operand is PC-relative. */
7366 static bool
7367 arm_cannot_copy_insn_p (rtx insn)
7369 rtx pat = PATTERN (insn);
7371 if (GET_CODE (pat) == SET)
7373 rtx rhs = SET_SRC (pat);
7375 if (GET_CODE (rhs) == UNSPEC
7376 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
7377 return TRUE;
7379 if (GET_CODE (rhs) == MEM
7380 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
7381 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
7382 return TRUE;
7385 return FALSE;
7388 enum rtx_code
7389 minmax_code (rtx x)
7391 enum rtx_code code = GET_CODE (x);
7393 switch (code)
7395 case SMAX:
7396 return GE;
7397 case SMIN:
7398 return LE;
7399 case UMIN:
7400 return LEU;
7401 case UMAX:
7402 return GEU;
7403 default:
7404 gcc_unreachable ();
7408 /* Return 1 if memory locations are adjacent. */
7410 adjacent_mem_locations (rtx a, rtx b)
7412 /* We don't guarantee to preserve the order of these memory refs. */
7413 if (volatile_refs_p (a) || volatile_refs_p (b))
7414 return 0;
7416 if ((GET_CODE (XEXP (a, 0)) == REG
7417 || (GET_CODE (XEXP (a, 0)) == PLUS
7418 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
7419 && (GET_CODE (XEXP (b, 0)) == REG
7420 || (GET_CODE (XEXP (b, 0)) == PLUS
7421 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
7423 HOST_WIDE_INT val0 = 0, val1 = 0;
7424 rtx reg0, reg1;
7425 int val_diff;
7427 if (GET_CODE (XEXP (a, 0)) == PLUS)
7429 reg0 = XEXP (XEXP (a, 0), 0);
7430 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
7432 else
7433 reg0 = XEXP (a, 0);
7435 if (GET_CODE (XEXP (b, 0)) == PLUS)
7437 reg1 = XEXP (XEXP (b, 0), 0);
7438 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
7440 else
7441 reg1 = XEXP (b, 0);
7443 /* Don't accept any offset that will require multiple
7444 instructions to handle, since this would cause the
7445 arith_adjacentmem pattern to output an overlong sequence. */
7446 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
7447 return 0;
7449 /* Don't allow an eliminable register: register elimination can make
7450 the offset too large. */
7451 if (arm_eliminable_register (reg0))
7452 return 0;
7454 val_diff = val1 - val0;
7456 if (arm_ld_sched)
7458 /* If the target has load delay slots, then there's no benefit
7459 to using an ldm instruction unless the offset is zero and
7460 we are optimizing for size. */
7461 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
7462 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
7463 && (val_diff == 4 || val_diff == -4));
7466 return ((REGNO (reg0) == REGNO (reg1))
7467 && (val_diff == 4 || val_diff == -4));
7470 return 0;
7474 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7475 HOST_WIDE_INT *load_offset)
7477 int unsorted_regs[4];
7478 HOST_WIDE_INT unsorted_offsets[4];
7479 int order[4];
7480 int base_reg = -1;
7481 int i;
7483 /* Can only handle 2, 3, or 4 insns at present,
7484 though could be easily extended if required. */
7485 gcc_assert (nops >= 2 && nops <= 4);
7487 memset (order, 0, 4 * sizeof (int));
7489 /* Loop over the operands and check that the memory references are
7490 suitable (i.e. immediate offsets from the same base register). At
7491 the same time, extract the target register, and the memory
7492 offsets. */
7493 for (i = 0; i < nops; i++)
7495 rtx reg;
7496 rtx offset;
7498 /* Convert a subreg of a mem into the mem itself. */
7499 if (GET_CODE (operands[nops + i]) == SUBREG)
7500 operands[nops + i] = alter_subreg (operands + (nops + i));
7502 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7504 /* Don't reorder volatile memory references; it doesn't seem worth
7505 looking for the case where the order is ok anyway. */
7506 if (MEM_VOLATILE_P (operands[nops + i]))
7507 return 0;
7509 offset = const0_rtx;
7511 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7512 || (GET_CODE (reg) == SUBREG
7513 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7514 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7515 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7516 == REG)
7517 || (GET_CODE (reg) == SUBREG
7518 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7519 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7520 == CONST_INT)))
7522 if (i == 0)
7524 base_reg = REGNO (reg);
7525 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7526 ? REGNO (operands[i])
7527 : REGNO (SUBREG_REG (operands[i])));
7528 order[0] = 0;
7530 else
7532 if (base_reg != (int) REGNO (reg))
7533 /* Not addressed from the same base register. */
7534 return 0;
7536 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7537 ? REGNO (operands[i])
7538 : REGNO (SUBREG_REG (operands[i])));
7539 if (unsorted_regs[i] < unsorted_regs[order[0]])
7540 order[0] = i;
7543 /* If it isn't an integer register, or if it overwrites the
7544 base register but isn't the last insn in the list, then
7545 we can't do this. */
7546 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
7547 || (i != nops - 1 && unsorted_regs[i] == base_reg))
7548 return 0;
7550 unsorted_offsets[i] = INTVAL (offset);
7552 else
7553 /* Not a suitable memory address. */
7554 return 0;
7557 /* All the useful information has now been extracted from the
7558 operands into unsorted_regs and unsorted_offsets; additionally,
7559 order[0] has been set to the lowest numbered register in the
7560 list. Sort the registers into order, and check that the memory
7561 offsets are ascending and adjacent. */
7563 for (i = 1; i < nops; i++)
7565 int j;
7567 order[i] = order[i - 1];
7568 for (j = 0; j < nops; j++)
7569 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7570 && (order[i] == order[i - 1]
7571 || unsorted_regs[j] < unsorted_regs[order[i]]))
7572 order[i] = j;
7574 /* Have we found a suitable register? if not, one must be used more
7575 than once. */
7576 if (order[i] == order[i - 1])
7577 return 0;
7579 /* Is the memory address adjacent and ascending? */
7580 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7581 return 0;
7584 if (base)
7586 *base = base_reg;
7588 for (i = 0; i < nops; i++)
7589 regs[i] = unsorted_regs[order[i]];
7591 *load_offset = unsorted_offsets[order[0]];
7594 if (unsorted_offsets[order[0]] == 0)
7595 return 1; /* ldmia */
7597 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7598 return 2; /* ldmib */
7600 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7601 return 3; /* ldmda */
7603 if (unsorted_offsets[order[nops - 1]] == -4)
7604 return 4; /* ldmdb */
7606 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7607 if the offset isn't small enough. The reason 2 ldrs are faster
7608 is because these ARMs are able to do more than one cache access
7609 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7610 whilst the ARM8 has a double bandwidth cache. This means that
7611 these cores can do both an instruction fetch and a data fetch in
7612 a single cycle, so the trick of calculating the address into a
7613 scratch register (one of the result regs) and then doing a load
7614 multiple actually becomes slower (and no smaller in code size).
7615 That is the transformation
7617 ldr rd1, [rbase + offset]
7618 ldr rd2, [rbase + offset + 4]
7622 add rd1, rbase, offset
7623 ldmia rd1, {rd1, rd2}
7625 produces worse code -- '3 cycles + any stalls on rd2' instead of
7626 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7627 access per cycle, the first sequence could never complete in less
7628 than 6 cycles, whereas the ldm sequence would only take 5 and
7629 would make better use of sequential accesses if not hitting the
7630 cache.
7632 We cheat here and test 'arm_ld_sched' which we currently know to
7633 only be true for the ARM8, ARM9 and StrongARM. If this ever
7634 changes, then the test below needs to be reworked. */
7635 if (nops == 2 && arm_ld_sched)
7636 return 0;
7638 /* Can't do it without setting up the offset, only do this if it takes
7639 no more than one insn. */
7640 return (const_ok_for_arm (unsorted_offsets[order[0]])
7641 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7644 const char *
7645 emit_ldm_seq (rtx *operands, int nops)
7647 int regs[4];
7648 int base_reg;
7649 HOST_WIDE_INT offset;
7650 char buf[100];
7651 int i;
7653 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7655 case 1:
7656 strcpy (buf, "ldm%(ia%)\t");
7657 break;
7659 case 2:
7660 strcpy (buf, "ldm%(ib%)\t");
7661 break;
7663 case 3:
7664 strcpy (buf, "ldm%(da%)\t");
7665 break;
7667 case 4:
7668 strcpy (buf, "ldm%(db%)\t");
7669 break;
7671 case 5:
7672 if (offset >= 0)
7673 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7674 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7675 (long) offset);
7676 else
7677 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7678 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7679 (long) -offset);
7680 output_asm_insn (buf, operands);
7681 base_reg = regs[0];
7682 strcpy (buf, "ldm%(ia%)\t");
7683 break;
7685 default:
7686 gcc_unreachable ();
7689 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7690 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7692 for (i = 1; i < nops; i++)
7693 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7694 reg_names[regs[i]]);
7696 strcat (buf, "}\t%@ phole ldm");
7698 output_asm_insn (buf, operands);
7699 return "";
7703 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7704 HOST_WIDE_INT * load_offset)
7706 int unsorted_regs[4];
7707 HOST_WIDE_INT unsorted_offsets[4];
7708 int order[4];
7709 int base_reg = -1;
7710 int i;
7712 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7713 extended if required. */
7714 gcc_assert (nops >= 2 && nops <= 4);
7716 memset (order, 0, 4 * sizeof (int));
7718 /* Loop over the operands and check that the memory references are
7719 suitable (i.e. immediate offsets from the same base register). At
7720 the same time, extract the target register, and the memory
7721 offsets. */
7722 for (i = 0; i < nops; i++)
7724 rtx reg;
7725 rtx offset;
7727 /* Convert a subreg of a mem into the mem itself. */
7728 if (GET_CODE (operands[nops + i]) == SUBREG)
7729 operands[nops + i] = alter_subreg (operands + (nops + i));
7731 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7733 /* Don't reorder volatile memory references; it doesn't seem worth
7734 looking for the case where the order is ok anyway. */
7735 if (MEM_VOLATILE_P (operands[nops + i]))
7736 return 0;
7738 offset = const0_rtx;
7740 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7741 || (GET_CODE (reg) == SUBREG
7742 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7743 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7744 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7745 == REG)
7746 || (GET_CODE (reg) == SUBREG
7747 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7748 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7749 == CONST_INT)))
7751 if (i == 0)
7753 base_reg = REGNO (reg);
7754 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7755 ? REGNO (operands[i])
7756 : REGNO (SUBREG_REG (operands[i])));
7757 order[0] = 0;
7759 else
7761 if (base_reg != (int) REGNO (reg))
7762 /* Not addressed from the same base register. */
7763 return 0;
7765 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7766 ? REGNO (operands[i])
7767 : REGNO (SUBREG_REG (operands[i])));
7768 if (unsorted_regs[i] < unsorted_regs[order[0]])
7769 order[0] = i;
7772 /* If it isn't an integer register, then we can't do this. */
7773 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7774 return 0;
7776 unsorted_offsets[i] = INTVAL (offset);
7778 else
7779 /* Not a suitable memory address. */
7780 return 0;
7783 /* All the useful information has now been extracted from the
7784 operands into unsorted_regs and unsorted_offsets; additionally,
7785 order[0] has been set to the lowest numbered register in the
7786 list. Sort the registers into order, and check that the memory
7787 offsets are ascending and adjacent. */
7789 for (i = 1; i < nops; i++)
7791 int j;
7793 order[i] = order[i - 1];
7794 for (j = 0; j < nops; j++)
7795 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7796 && (order[i] == order[i - 1]
7797 || unsorted_regs[j] < unsorted_regs[order[i]]))
7798 order[i] = j;
7800 /* Have we found a suitable register? if not, one must be used more
7801 than once. */
7802 if (order[i] == order[i - 1])
7803 return 0;
7805 /* Is the memory address adjacent and ascending? */
7806 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7807 return 0;
7810 if (base)
7812 *base = base_reg;
7814 for (i = 0; i < nops; i++)
7815 regs[i] = unsorted_regs[order[i]];
7817 *load_offset = unsorted_offsets[order[0]];
7820 if (unsorted_offsets[order[0]] == 0)
7821 return 1; /* stmia */
7823 if (unsorted_offsets[order[0]] == 4)
7824 return 2; /* stmib */
7826 if (unsorted_offsets[order[nops - 1]] == 0)
7827 return 3; /* stmda */
7829 if (unsorted_offsets[order[nops - 1]] == -4)
7830 return 4; /* stmdb */
7832 return 0;
7835 const char *
7836 emit_stm_seq (rtx *operands, int nops)
7838 int regs[4];
7839 int base_reg;
7840 HOST_WIDE_INT offset;
7841 char buf[100];
7842 int i;
7844 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7846 case 1:
7847 strcpy (buf, "stm%(ia%)\t");
7848 break;
7850 case 2:
7851 strcpy (buf, "stm%(ib%)\t");
7852 break;
7854 case 3:
7855 strcpy (buf, "stm%(da%)\t");
7856 break;
7858 case 4:
7859 strcpy (buf, "stm%(db%)\t");
7860 break;
7862 default:
7863 gcc_unreachable ();
7866 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7867 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7869 for (i = 1; i < nops; i++)
7870 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7871 reg_names[regs[i]]);
7873 strcat (buf, "}\t%@ phole stm");
7875 output_asm_insn (buf, operands);
7876 return "";
7879 /* Routines for use in generating RTL. */
7882 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7883 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7885 HOST_WIDE_INT offset = *offsetp;
7886 int i = 0, j;
7887 rtx result;
7888 int sign = up ? 1 : -1;
7889 rtx mem, addr;
7891 /* XScale has load-store double instructions, but they have stricter
7892 alignment requirements than load-store multiple, so we cannot
7893 use them.
7895 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7896 the pipeline until completion.
7898 NREGS CYCLES
7904 An ldr instruction takes 1-3 cycles, but does not block the
7905 pipeline.
7907 NREGS CYCLES
7908 1 1-3
7909 2 2-6
7910 3 3-9
7911 4 4-12
7913 Best case ldr will always win. However, the more ldr instructions
7914 we issue, the less likely we are to be able to schedule them well.
7915 Using ldr instructions also increases code size.
7917 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7918 for counts of 3 or 4 regs. */
7919 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7921 rtx seq;
7923 start_sequence ();
7925 for (i = 0; i < count; i++)
7927 addr = plus_constant (from, i * 4 * sign);
7928 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7929 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7930 offset += 4 * sign;
7933 if (write_back)
7935 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7936 *offsetp = offset;
7939 seq = get_insns ();
7940 end_sequence ();
7942 return seq;
7945 result = gen_rtx_PARALLEL (VOIDmode,
7946 rtvec_alloc (count + (write_back ? 1 : 0)));
7947 if (write_back)
7949 XVECEXP (result, 0, 0)
7950 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7951 i = 1;
7952 count++;
7955 for (j = 0; i < count; i++, j++)
7957 addr = plus_constant (from, j * 4 * sign);
7958 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7959 XVECEXP (result, 0, i)
7960 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7961 offset += 4 * sign;
7964 if (write_back)
7965 *offsetp = offset;
7967 return result;
7971 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7972 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7974 HOST_WIDE_INT offset = *offsetp;
7975 int i = 0, j;
7976 rtx result;
7977 int sign = up ? 1 : -1;
7978 rtx mem, addr;
7980 /* See arm_gen_load_multiple for discussion of
7981 the pros/cons of ldm/stm usage for XScale. */
7982 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7984 rtx seq;
7986 start_sequence ();
7988 for (i = 0; i < count; i++)
7990 addr = plus_constant (to, i * 4 * sign);
7991 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7992 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7993 offset += 4 * sign;
7996 if (write_back)
7998 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7999 *offsetp = offset;
8002 seq = get_insns ();
8003 end_sequence ();
8005 return seq;
8008 result = gen_rtx_PARALLEL (VOIDmode,
8009 rtvec_alloc (count + (write_back ? 1 : 0)));
8010 if (write_back)
8012 XVECEXP (result, 0, 0)
8013 = gen_rtx_SET (VOIDmode, to,
8014 plus_constant (to, count * 4 * sign));
8015 i = 1;
8016 count++;
8019 for (j = 0; i < count; i++, j++)
8021 addr = plus_constant (to, j * 4 * sign);
8022 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8023 XVECEXP (result, 0, i)
8024 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
8025 offset += 4 * sign;
8028 if (write_back)
8029 *offsetp = offset;
8031 return result;
8035 arm_gen_movmemqi (rtx *operands)
8037 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
8038 HOST_WIDE_INT srcoffset, dstoffset;
8039 int i;
8040 rtx src, dst, srcbase, dstbase;
8041 rtx part_bytes_reg = NULL;
8042 rtx mem;
8044 if (GET_CODE (operands[2]) != CONST_INT
8045 || GET_CODE (operands[3]) != CONST_INT
8046 || INTVAL (operands[2]) > 64
8047 || INTVAL (operands[3]) & 3)
8048 return 0;
8050 dstbase = operands[0];
8051 srcbase = operands[1];
8053 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
8054 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
8056 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
8057 out_words_to_go = INTVAL (operands[2]) / 4;
8058 last_bytes = INTVAL (operands[2]) & 3;
8059 dstoffset = srcoffset = 0;
8061 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
8062 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
8064 for (i = 0; in_words_to_go >= 2; i+=4)
8066 if (in_words_to_go > 4)
8067 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
8068 srcbase, &srcoffset));
8069 else
8070 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
8071 FALSE, srcbase, &srcoffset));
8073 if (out_words_to_go)
8075 if (out_words_to_go > 4)
8076 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
8077 dstbase, &dstoffset));
8078 else if (out_words_to_go != 1)
8079 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
8080 dst, TRUE,
8081 (last_bytes == 0
8082 ? FALSE : TRUE),
8083 dstbase, &dstoffset));
8084 else
8086 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8087 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
8088 if (last_bytes != 0)
8090 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
8091 dstoffset += 4;
8096 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
8097 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
8100 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
8101 if (out_words_to_go)
8103 rtx sreg;
8105 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8106 sreg = copy_to_reg (mem);
8108 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8109 emit_move_insn (mem, sreg);
8110 in_words_to_go--;
8112 gcc_assert (!in_words_to_go); /* Sanity check */
8115 if (in_words_to_go)
8117 gcc_assert (in_words_to_go > 0);
8119 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8120 part_bytes_reg = copy_to_mode_reg (SImode, mem);
8123 gcc_assert (!last_bytes || part_bytes_reg);
8125 if (BYTES_BIG_ENDIAN && last_bytes)
8127 rtx tmp = gen_reg_rtx (SImode);
8129 /* The bytes we want are in the top end of the word. */
8130 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
8131 GEN_INT (8 * (4 - last_bytes))));
8132 part_bytes_reg = tmp;
8134 while (last_bytes)
8136 mem = adjust_automodify_address (dstbase, QImode,
8137 plus_constant (dst, last_bytes - 1),
8138 dstoffset + last_bytes - 1);
8139 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8141 if (--last_bytes)
8143 tmp = gen_reg_rtx (SImode);
8144 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
8145 part_bytes_reg = tmp;
8150 else
8152 if (last_bytes > 1)
8154 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
8155 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
8156 last_bytes -= 2;
8157 if (last_bytes)
8159 rtx tmp = gen_reg_rtx (SImode);
8160 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
8161 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
8162 part_bytes_reg = tmp;
8163 dstoffset += 2;
8167 if (last_bytes)
8169 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
8170 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8174 return 1;
8177 /* Select a dominance comparison mode if possible for a test of the general
8178 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
8179 COND_OR == DOM_CC_X_AND_Y => (X && Y)
8180 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
8181 COND_OR == DOM_CC_X_OR_Y => (X || Y)
8182 In all cases OP will be either EQ or NE, but we don't need to know which
8183 here. If we are unable to support a dominance comparison we return
8184 CC mode. This will then fail to match for the RTL expressions that
8185 generate this call. */
8186 enum machine_mode
8187 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
8189 enum rtx_code cond1, cond2;
8190 int swapped = 0;
8192 /* Currently we will probably get the wrong result if the individual
8193 comparisons are not simple. This also ensures that it is safe to
8194 reverse a comparison if necessary. */
8195 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
8196 != CCmode)
8197 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
8198 != CCmode))
8199 return CCmode;
8201 /* The if_then_else variant of this tests the second condition if the
8202 first passes, but is true if the first fails. Reverse the first
8203 condition to get a true "inclusive-or" expression. */
8204 if (cond_or == DOM_CC_NX_OR_Y)
8205 cond1 = reverse_condition (cond1);
8207 /* If the comparisons are not equal, and one doesn't dominate the other,
8208 then we can't do this. */
8209 if (cond1 != cond2
8210 && !comparison_dominates_p (cond1, cond2)
8211 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
8212 return CCmode;
8214 if (swapped)
8216 enum rtx_code temp = cond1;
8217 cond1 = cond2;
8218 cond2 = temp;
8221 switch (cond1)
8223 case EQ:
8224 if (cond_or == DOM_CC_X_AND_Y)
8225 return CC_DEQmode;
8227 switch (cond2)
8229 case EQ: return CC_DEQmode;
8230 case LE: return CC_DLEmode;
8231 case LEU: return CC_DLEUmode;
8232 case GE: return CC_DGEmode;
8233 case GEU: return CC_DGEUmode;
8234 default: gcc_unreachable ();
8237 case LT:
8238 if (cond_or == DOM_CC_X_AND_Y)
8239 return CC_DLTmode;
8241 switch (cond2)
8243 case LT:
8244 return CC_DLTmode;
8245 case LE:
8246 return CC_DLEmode;
8247 case NE:
8248 return CC_DNEmode;
8249 default:
8250 gcc_unreachable ();
8253 case GT:
8254 if (cond_or == DOM_CC_X_AND_Y)
8255 return CC_DGTmode;
8257 switch (cond2)
8259 case GT:
8260 return CC_DGTmode;
8261 case GE:
8262 return CC_DGEmode;
8263 case NE:
8264 return CC_DNEmode;
8265 default:
8266 gcc_unreachable ();
8269 case LTU:
8270 if (cond_or == DOM_CC_X_AND_Y)
8271 return CC_DLTUmode;
8273 switch (cond2)
8275 case LTU:
8276 return CC_DLTUmode;
8277 case LEU:
8278 return CC_DLEUmode;
8279 case NE:
8280 return CC_DNEmode;
8281 default:
8282 gcc_unreachable ();
8285 case GTU:
8286 if (cond_or == DOM_CC_X_AND_Y)
8287 return CC_DGTUmode;
8289 switch (cond2)
8291 case GTU:
8292 return CC_DGTUmode;
8293 case GEU:
8294 return CC_DGEUmode;
8295 case NE:
8296 return CC_DNEmode;
8297 default:
8298 gcc_unreachable ();
8301 /* The remaining cases only occur when both comparisons are the
8302 same. */
8303 case NE:
8304 gcc_assert (cond1 == cond2);
8305 return CC_DNEmode;
8307 case LE:
8308 gcc_assert (cond1 == cond2);
8309 return CC_DLEmode;
8311 case GE:
8312 gcc_assert (cond1 == cond2);
8313 return CC_DGEmode;
8315 case LEU:
8316 gcc_assert (cond1 == cond2);
8317 return CC_DLEUmode;
8319 case GEU:
8320 gcc_assert (cond1 == cond2);
8321 return CC_DGEUmode;
8323 default:
8324 gcc_unreachable ();
8328 enum machine_mode
8329 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
8331 /* All floating point compares return CCFP if it is an equality
8332 comparison, and CCFPE otherwise. */
8333 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
8335 switch (op)
8337 case EQ:
8338 case NE:
8339 case UNORDERED:
8340 case ORDERED:
8341 case UNLT:
8342 case UNLE:
8343 case UNGT:
8344 case UNGE:
8345 case UNEQ:
8346 case LTGT:
8347 return CCFPmode;
8349 case LT:
8350 case LE:
8351 case GT:
8352 case GE:
8353 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
8354 return CCFPmode;
8355 return CCFPEmode;
8357 default:
8358 gcc_unreachable ();
8362 /* A compare with a shifted operand. Because of canonicalization, the
8363 comparison will have to be swapped when we emit the assembler. */
8364 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
8365 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8366 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
8367 || GET_CODE (x) == ROTATERT))
8368 return CC_SWPmode;
8370 /* This operation is performed swapped, but since we only rely on the Z
8371 flag we don't need an additional mode. */
8372 if (GET_MODE (y) == SImode && REG_P (y)
8373 && GET_CODE (x) == NEG
8374 && (op == EQ || op == NE))
8375 return CC_Zmode;
8377 /* This is a special case that is used by combine to allow a
8378 comparison of a shifted byte load to be split into a zero-extend
8379 followed by a comparison of the shifted integer (only valid for
8380 equalities and unsigned inequalities). */
8381 if (GET_MODE (x) == SImode
8382 && GET_CODE (x) == ASHIFT
8383 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
8384 && GET_CODE (XEXP (x, 0)) == SUBREG
8385 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
8386 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
8387 && (op == EQ || op == NE
8388 || op == GEU || op == GTU || op == LTU || op == LEU)
8389 && GET_CODE (y) == CONST_INT)
8390 return CC_Zmode;
8392 /* A construct for a conditional compare, if the false arm contains
8393 0, then both conditions must be true, otherwise either condition
8394 must be true. Not all conditions are possible, so CCmode is
8395 returned if it can't be done. */
8396 if (GET_CODE (x) == IF_THEN_ELSE
8397 && (XEXP (x, 2) == const0_rtx
8398 || XEXP (x, 2) == const1_rtx)
8399 && COMPARISON_P (XEXP (x, 0))
8400 && COMPARISON_P (XEXP (x, 1)))
8401 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8402 INTVAL (XEXP (x, 2)));
8404 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
8405 if (GET_CODE (x) == AND
8406 && COMPARISON_P (XEXP (x, 0))
8407 && COMPARISON_P (XEXP (x, 1)))
8408 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8409 DOM_CC_X_AND_Y);
8411 if (GET_CODE (x) == IOR
8412 && COMPARISON_P (XEXP (x, 0))
8413 && COMPARISON_P (XEXP (x, 1)))
8414 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8415 DOM_CC_X_OR_Y);
8417 /* An operation (on Thumb) where we want to test for a single bit.
8418 This is done by shifting that bit up into the top bit of a
8419 scratch register; we can then branch on the sign bit. */
8420 if (TARGET_THUMB1
8421 && GET_MODE (x) == SImode
8422 && (op == EQ || op == NE)
8423 && GET_CODE (x) == ZERO_EXTRACT
8424 && XEXP (x, 1) == const1_rtx)
8425 return CC_Nmode;
8427 /* An operation that sets the condition codes as a side-effect, the
8428 V flag is not set correctly, so we can only use comparisons where
8429 this doesn't matter. (For LT and GE we can use "mi" and "pl"
8430 instead.) */
8431 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
8432 if (GET_MODE (x) == SImode
8433 && y == const0_rtx
8434 && (op == EQ || op == NE || op == LT || op == GE)
8435 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
8436 || GET_CODE (x) == AND || GET_CODE (x) == IOR
8437 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
8438 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
8439 || GET_CODE (x) == LSHIFTRT
8440 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8441 || GET_CODE (x) == ROTATERT
8442 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
8443 return CC_NOOVmode;
8445 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
8446 return CC_Zmode;
8448 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
8449 && GET_CODE (x) == PLUS
8450 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
8451 return CC_Cmode;
8453 return CCmode;
8456 /* X and Y are two things to compare using CODE. Emit the compare insn and
8457 return the rtx for register 0 in the proper mode. FP means this is a
8458 floating point compare: I don't think that it is needed on the arm. */
8460 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
8462 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
8463 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
8465 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
8467 return cc_reg;
8470 /* Generate a sequence of insns that will generate the correct return
8471 address mask depending on the physical architecture that the program
8472 is running on. */
8474 arm_gen_return_addr_mask (void)
8476 rtx reg = gen_reg_rtx (Pmode);
8478 emit_insn (gen_return_addr_mask (reg));
8479 return reg;
8482 void
8483 arm_reload_in_hi (rtx *operands)
8485 rtx ref = operands[1];
8486 rtx base, scratch;
8487 HOST_WIDE_INT offset = 0;
8489 if (GET_CODE (ref) == SUBREG)
8491 offset = SUBREG_BYTE (ref);
8492 ref = SUBREG_REG (ref);
8495 if (GET_CODE (ref) == REG)
8497 /* We have a pseudo which has been spilt onto the stack; there
8498 are two cases here: the first where there is a simple
8499 stack-slot replacement and a second where the stack-slot is
8500 out of range, or is used as a subreg. */
8501 if (reg_equiv_mem[REGNO (ref)])
8503 ref = reg_equiv_mem[REGNO (ref)];
8504 base = find_replacement (&XEXP (ref, 0));
8506 else
8507 /* The slot is out of range, or was dressed up in a SUBREG. */
8508 base = reg_equiv_address[REGNO (ref)];
8510 else
8511 base = find_replacement (&XEXP (ref, 0));
8513 /* Handle the case where the address is too complex to be offset by 1. */
8514 if (GET_CODE (base) == MINUS
8515 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8517 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8519 emit_set_insn (base_plus, base);
8520 base = base_plus;
8522 else if (GET_CODE (base) == PLUS)
8524 /* The addend must be CONST_INT, or we would have dealt with it above. */
8525 HOST_WIDE_INT hi, lo;
8527 offset += INTVAL (XEXP (base, 1));
8528 base = XEXP (base, 0);
8530 /* Rework the address into a legal sequence of insns. */
8531 /* Valid range for lo is -4095 -> 4095 */
8532 lo = (offset >= 0
8533 ? (offset & 0xfff)
8534 : -((-offset) & 0xfff));
8536 /* Corner case, if lo is the max offset then we would be out of range
8537 once we have added the additional 1 below, so bump the msb into the
8538 pre-loading insn(s). */
8539 if (lo == 4095)
8540 lo &= 0x7ff;
8542 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8543 ^ (HOST_WIDE_INT) 0x80000000)
8544 - (HOST_WIDE_INT) 0x80000000);
8546 gcc_assert (hi + lo == offset);
8548 if (hi != 0)
8550 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8552 /* Get the base address; addsi3 knows how to handle constants
8553 that require more than one insn. */
8554 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8555 base = base_plus;
8556 offset = lo;
8560 /* Operands[2] may overlap operands[0] (though it won't overlap
8561 operands[1]), that's why we asked for a DImode reg -- so we can
8562 use the bit that does not overlap. */
8563 if (REGNO (operands[2]) == REGNO (operands[0]))
8564 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8565 else
8566 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8568 emit_insn (gen_zero_extendqisi2 (scratch,
8569 gen_rtx_MEM (QImode,
8570 plus_constant (base,
8571 offset))));
8572 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
8573 gen_rtx_MEM (QImode,
8574 plus_constant (base,
8575 offset + 1))));
8576 if (!BYTES_BIG_ENDIAN)
8577 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8578 gen_rtx_IOR (SImode,
8579 gen_rtx_ASHIFT
8580 (SImode,
8581 gen_rtx_SUBREG (SImode, operands[0], 0),
8582 GEN_INT (8)),
8583 scratch));
8584 else
8585 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8586 gen_rtx_IOR (SImode,
8587 gen_rtx_ASHIFT (SImode, scratch,
8588 GEN_INT (8)),
8589 gen_rtx_SUBREG (SImode, operands[0], 0)));
8592 /* Handle storing a half-word to memory during reload by synthesizing as two
8593 byte stores. Take care not to clobber the input values until after we
8594 have moved them somewhere safe. This code assumes that if the DImode
8595 scratch in operands[2] overlaps either the input value or output address
8596 in some way, then that value must die in this insn (we absolutely need
8597 two scratch registers for some corner cases). */
8598 void
8599 arm_reload_out_hi (rtx *operands)
8601 rtx ref = operands[0];
8602 rtx outval = operands[1];
8603 rtx base, scratch;
8604 HOST_WIDE_INT offset = 0;
8606 if (GET_CODE (ref) == SUBREG)
8608 offset = SUBREG_BYTE (ref);
8609 ref = SUBREG_REG (ref);
8612 if (GET_CODE (ref) == REG)
8614 /* We have a pseudo which has been spilt onto the stack; there
8615 are two cases here: the first where there is a simple
8616 stack-slot replacement and a second where the stack-slot is
8617 out of range, or is used as a subreg. */
8618 if (reg_equiv_mem[REGNO (ref)])
8620 ref = reg_equiv_mem[REGNO (ref)];
8621 base = find_replacement (&XEXP (ref, 0));
8623 else
8624 /* The slot is out of range, or was dressed up in a SUBREG. */
8625 base = reg_equiv_address[REGNO (ref)];
8627 else
8628 base = find_replacement (&XEXP (ref, 0));
8630 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8632 /* Handle the case where the address is too complex to be offset by 1. */
8633 if (GET_CODE (base) == MINUS
8634 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8636 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8638 /* Be careful not to destroy OUTVAL. */
8639 if (reg_overlap_mentioned_p (base_plus, outval))
8641 /* Updating base_plus might destroy outval, see if we can
8642 swap the scratch and base_plus. */
8643 if (!reg_overlap_mentioned_p (scratch, outval))
8645 rtx tmp = scratch;
8646 scratch = base_plus;
8647 base_plus = tmp;
8649 else
8651 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8653 /* Be conservative and copy OUTVAL into the scratch now,
8654 this should only be necessary if outval is a subreg
8655 of something larger than a word. */
8656 /* XXX Might this clobber base? I can't see how it can,
8657 since scratch is known to overlap with OUTVAL, and
8658 must be wider than a word. */
8659 emit_insn (gen_movhi (scratch_hi, outval));
8660 outval = scratch_hi;
8664 emit_set_insn (base_plus, base);
8665 base = base_plus;
8667 else if (GET_CODE (base) == PLUS)
8669 /* The addend must be CONST_INT, or we would have dealt with it above. */
8670 HOST_WIDE_INT hi, lo;
8672 offset += INTVAL (XEXP (base, 1));
8673 base = XEXP (base, 0);
8675 /* Rework the address into a legal sequence of insns. */
8676 /* Valid range for lo is -4095 -> 4095 */
8677 lo = (offset >= 0
8678 ? (offset & 0xfff)
8679 : -((-offset) & 0xfff));
8681 /* Corner case, if lo is the max offset then we would be out of range
8682 once we have added the additional 1 below, so bump the msb into the
8683 pre-loading insn(s). */
8684 if (lo == 4095)
8685 lo &= 0x7ff;
8687 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8688 ^ (HOST_WIDE_INT) 0x80000000)
8689 - (HOST_WIDE_INT) 0x80000000);
8691 gcc_assert (hi + lo == offset);
8693 if (hi != 0)
8695 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8697 /* Be careful not to destroy OUTVAL. */
8698 if (reg_overlap_mentioned_p (base_plus, outval))
8700 /* Updating base_plus might destroy outval, see if we
8701 can swap the scratch and base_plus. */
8702 if (!reg_overlap_mentioned_p (scratch, outval))
8704 rtx tmp = scratch;
8705 scratch = base_plus;
8706 base_plus = tmp;
8708 else
8710 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8712 /* Be conservative and copy outval into scratch now,
8713 this should only be necessary if outval is a
8714 subreg of something larger than a word. */
8715 /* XXX Might this clobber base? I can't see how it
8716 can, since scratch is known to overlap with
8717 outval. */
8718 emit_insn (gen_movhi (scratch_hi, outval));
8719 outval = scratch_hi;
8723 /* Get the base address; addsi3 knows how to handle constants
8724 that require more than one insn. */
8725 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8726 base = base_plus;
8727 offset = lo;
8731 if (BYTES_BIG_ENDIAN)
8733 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8734 plus_constant (base, offset + 1)),
8735 gen_lowpart (QImode, outval)));
8736 emit_insn (gen_lshrsi3 (scratch,
8737 gen_rtx_SUBREG (SImode, outval, 0),
8738 GEN_INT (8)));
8739 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8740 gen_lowpart (QImode, scratch)));
8742 else
8744 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8745 gen_lowpart (QImode, outval)));
8746 emit_insn (gen_lshrsi3 (scratch,
8747 gen_rtx_SUBREG (SImode, outval, 0),
8748 GEN_INT (8)));
8749 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8750 plus_constant (base, offset + 1)),
8751 gen_lowpart (QImode, scratch)));
8755 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8756 (padded to the size of a word) should be passed in a register. */
8758 static bool
8759 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8761 if (TARGET_AAPCS_BASED)
8762 return must_pass_in_stack_var_size (mode, type);
8763 else
8764 return must_pass_in_stack_var_size_or_pad (mode, type);
8768 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8769 Return true if an argument passed on the stack should be padded upwards,
8770 i.e. if the least-significant byte has useful data.
8771 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8772 aggregate types are placed in the lowest memory address. */
8774 bool
8775 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8777 if (!TARGET_AAPCS_BASED)
8778 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8780 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8781 return false;
8783 return true;
8787 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8788 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8789 byte of the register has useful data, and return the opposite if the
8790 most significant byte does.
8791 For AAPCS, small aggregates and small complex types are always padded
8792 upwards. */
8794 bool
8795 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8796 tree type, int first ATTRIBUTE_UNUSED)
8798 if (TARGET_AAPCS_BASED
8799 && BYTES_BIG_ENDIAN
8800 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8801 && int_size_in_bytes (type) <= 4)
8802 return true;
8804 /* Otherwise, use default padding. */
8805 return !BYTES_BIG_ENDIAN;
8809 /* Print a symbolic form of X to the debug file, F. */
8810 static void
8811 arm_print_value (FILE *f, rtx x)
8813 switch (GET_CODE (x))
8815 case CONST_INT:
8816 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8817 return;
8819 case CONST_DOUBLE:
8820 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8821 return;
8823 case CONST_VECTOR:
8825 int i;
8827 fprintf (f, "<");
8828 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8830 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8831 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8832 fputc (',', f);
8834 fprintf (f, ">");
8836 return;
8838 case CONST_STRING:
8839 fprintf (f, "\"%s\"", XSTR (x, 0));
8840 return;
8842 case SYMBOL_REF:
8843 fprintf (f, "`%s'", XSTR (x, 0));
8844 return;
8846 case LABEL_REF:
8847 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8848 return;
8850 case CONST:
8851 arm_print_value (f, XEXP (x, 0));
8852 return;
8854 case PLUS:
8855 arm_print_value (f, XEXP (x, 0));
8856 fprintf (f, "+");
8857 arm_print_value (f, XEXP (x, 1));
8858 return;
8860 case PC:
8861 fprintf (f, "pc");
8862 return;
8864 default:
8865 fprintf (f, "????");
8866 return;
8870 /* Routines for manipulation of the constant pool. */
8872 /* Arm instructions cannot load a large constant directly into a
8873 register; they have to come from a pc relative load. The constant
8874 must therefore be placed in the addressable range of the pc
8875 relative load. Depending on the precise pc relative load
8876 instruction the range is somewhere between 256 bytes and 4k. This
8877 means that we often have to dump a constant inside a function, and
8878 generate code to branch around it.
8880 It is important to minimize this, since the branches will slow
8881 things down and make the code larger.
8883 Normally we can hide the table after an existing unconditional
8884 branch so that there is no interruption of the flow, but in the
8885 worst case the code looks like this:
8887 ldr rn, L1
8889 b L2
8890 align
8891 L1: .long value
8895 ldr rn, L3
8897 b L4
8898 align
8899 L3: .long value
8903 We fix this by performing a scan after scheduling, which notices
8904 which instructions need to have their operands fetched from the
8905 constant table and builds the table.
8907 The algorithm starts by building a table of all the constants that
8908 need fixing up and all the natural barriers in the function (places
8909 where a constant table can be dropped without breaking the flow).
8910 For each fixup we note how far the pc-relative replacement will be
8911 able to reach and the offset of the instruction into the function.
8913 Having built the table we then group the fixes together to form
8914 tables that are as large as possible (subject to addressing
8915 constraints) and emit each table of constants after the last
8916 barrier that is within range of all the instructions in the group.
8917 If a group does not contain a barrier, then we forcibly create one
8918 by inserting a jump instruction into the flow. Once the table has
8919 been inserted, the insns are then modified to reference the
8920 relevant entry in the pool.
8922 Possible enhancements to the algorithm (not implemented) are:
8924 1) For some processors and object formats, there may be benefit in
8925 aligning the pools to the start of cache lines; this alignment
8926 would need to be taken into account when calculating addressability
8927 of a pool. */
8929 /* These typedefs are located at the start of this file, so that
8930 they can be used in the prototypes there. This comment is to
8931 remind readers of that fact so that the following structures
8932 can be understood more easily.
8934 typedef struct minipool_node Mnode;
8935 typedef struct minipool_fixup Mfix; */
8937 struct minipool_node
8939 /* Doubly linked chain of entries. */
8940 Mnode * next;
8941 Mnode * prev;
8942 /* The maximum offset into the code that this entry can be placed. While
8943 pushing fixes for forward references, all entries are sorted in order
8944 of increasing max_address. */
8945 HOST_WIDE_INT max_address;
8946 /* Similarly for an entry inserted for a backwards ref. */
8947 HOST_WIDE_INT min_address;
8948 /* The number of fixes referencing this entry. This can become zero
8949 if we "unpush" an entry. In this case we ignore the entry when we
8950 come to emit the code. */
8951 int refcount;
8952 /* The offset from the start of the minipool. */
8953 HOST_WIDE_INT offset;
8954 /* The value in table. */
8955 rtx value;
8956 /* The mode of value. */
8957 enum machine_mode mode;
8958 /* The size of the value. With iWMMXt enabled
8959 sizes > 4 also imply an alignment of 8-bytes. */
8960 int fix_size;
8963 struct minipool_fixup
8965 Mfix * next;
8966 rtx insn;
8967 HOST_WIDE_INT address;
8968 rtx * loc;
8969 enum machine_mode mode;
8970 int fix_size;
8971 rtx value;
8972 Mnode * minipool;
8973 HOST_WIDE_INT forwards;
8974 HOST_WIDE_INT backwards;
8977 /* Fixes less than a word need padding out to a word boundary. */
8978 #define MINIPOOL_FIX_SIZE(mode) \
8979 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8981 static Mnode * minipool_vector_head;
8982 static Mnode * minipool_vector_tail;
8983 static rtx minipool_vector_label;
8984 static int minipool_pad;
8986 /* The linked list of all minipool fixes required for this function. */
8987 Mfix * minipool_fix_head;
8988 Mfix * minipool_fix_tail;
8989 /* The fix entry for the current minipool, once it has been placed. */
8990 Mfix * minipool_barrier;
8992 /* Determines if INSN is the start of a jump table. Returns the end
8993 of the TABLE or NULL_RTX. */
8994 static rtx
8995 is_jump_table (rtx insn)
8997 rtx table;
8999 if (GET_CODE (insn) == JUMP_INSN
9000 && JUMP_LABEL (insn) != NULL
9001 && ((table = next_real_insn (JUMP_LABEL (insn)))
9002 == next_real_insn (insn))
9003 && table != NULL
9004 && GET_CODE (table) == JUMP_INSN
9005 && (GET_CODE (PATTERN (table)) == ADDR_VEC
9006 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
9007 return table;
9009 return NULL_RTX;
9012 #ifndef JUMP_TABLES_IN_TEXT_SECTION
9013 #define JUMP_TABLES_IN_TEXT_SECTION 0
9014 #endif
9016 static HOST_WIDE_INT
9017 get_jump_table_size (rtx insn)
9019 /* ADDR_VECs only take room if read-only data does into the text
9020 section. */
9021 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
9023 rtx body = PATTERN (insn);
9024 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
9025 HOST_WIDE_INT size;
9026 HOST_WIDE_INT modesize;
9028 modesize = GET_MODE_SIZE (GET_MODE (body));
9029 size = modesize * XVECLEN (body, elt);
9030 switch (modesize)
9032 case 1:
9033 /* Round up size of TBB table to a halfword boundary. */
9034 size = (size + 1) & ~(HOST_WIDE_INT)1;
9035 break;
9036 case 2:
9037 /* No padding necessary for TBH. */
9038 break;
9039 case 4:
9040 /* Add two bytes for alignment on Thumb. */
9041 if (TARGET_THUMB)
9042 size += 2;
9043 break;
9044 default:
9045 gcc_unreachable ();
9047 return size;
9050 return 0;
9053 /* Move a minipool fix MP from its current location to before MAX_MP.
9054 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
9055 constraints may need updating. */
9056 static Mnode *
9057 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
9058 HOST_WIDE_INT max_address)
9060 /* The code below assumes these are different. */
9061 gcc_assert (mp != max_mp);
9063 if (max_mp == NULL)
9065 if (max_address < mp->max_address)
9066 mp->max_address = max_address;
9068 else
9070 if (max_address > max_mp->max_address - mp->fix_size)
9071 mp->max_address = max_mp->max_address - mp->fix_size;
9072 else
9073 mp->max_address = max_address;
9075 /* Unlink MP from its current position. Since max_mp is non-null,
9076 mp->prev must be non-null. */
9077 mp->prev->next = mp->next;
9078 if (mp->next != NULL)
9079 mp->next->prev = mp->prev;
9080 else
9081 minipool_vector_tail = mp->prev;
9083 /* Re-insert it before MAX_MP. */
9084 mp->next = max_mp;
9085 mp->prev = max_mp->prev;
9086 max_mp->prev = mp;
9088 if (mp->prev != NULL)
9089 mp->prev->next = mp;
9090 else
9091 minipool_vector_head = mp;
9094 /* Save the new entry. */
9095 max_mp = mp;
9097 /* Scan over the preceding entries and adjust their addresses as
9098 required. */
9099 while (mp->prev != NULL
9100 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9102 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9103 mp = mp->prev;
9106 return max_mp;
9109 /* Add a constant to the minipool for a forward reference. Returns the
9110 node added or NULL if the constant will not fit in this pool. */
9111 static Mnode *
9112 add_minipool_forward_ref (Mfix *fix)
9114 /* If set, max_mp is the first pool_entry that has a lower
9115 constraint than the one we are trying to add. */
9116 Mnode * max_mp = NULL;
9117 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
9118 Mnode * mp;
9120 /* If the minipool starts before the end of FIX->INSN then this FIX
9121 can not be placed into the current pool. Furthermore, adding the
9122 new constant pool entry may cause the pool to start FIX_SIZE bytes
9123 earlier. */
9124 if (minipool_vector_head &&
9125 (fix->address + get_attr_length (fix->insn)
9126 >= minipool_vector_head->max_address - fix->fix_size))
9127 return NULL;
9129 /* Scan the pool to see if a constant with the same value has
9130 already been added. While we are doing this, also note the
9131 location where we must insert the constant if it doesn't already
9132 exist. */
9133 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9135 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9136 && fix->mode == mp->mode
9137 && (GET_CODE (fix->value) != CODE_LABEL
9138 || (CODE_LABEL_NUMBER (fix->value)
9139 == CODE_LABEL_NUMBER (mp->value)))
9140 && rtx_equal_p (fix->value, mp->value))
9142 /* More than one fix references this entry. */
9143 mp->refcount++;
9144 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
9147 /* Note the insertion point if necessary. */
9148 if (max_mp == NULL
9149 && mp->max_address > max_address)
9150 max_mp = mp;
9152 /* If we are inserting an 8-bytes aligned quantity and
9153 we have not already found an insertion point, then
9154 make sure that all such 8-byte aligned quantities are
9155 placed at the start of the pool. */
9156 if (ARM_DOUBLEWORD_ALIGN
9157 && max_mp == NULL
9158 && fix->fix_size >= 8
9159 && mp->fix_size < 8)
9161 max_mp = mp;
9162 max_address = mp->max_address;
9166 /* The value is not currently in the minipool, so we need to create
9167 a new entry for it. If MAX_MP is NULL, the entry will be put on
9168 the end of the list since the placement is less constrained than
9169 any existing entry. Otherwise, we insert the new fix before
9170 MAX_MP and, if necessary, adjust the constraints on the other
9171 entries. */
9172 mp = XNEW (Mnode);
9173 mp->fix_size = fix->fix_size;
9174 mp->mode = fix->mode;
9175 mp->value = fix->value;
9176 mp->refcount = 1;
9177 /* Not yet required for a backwards ref. */
9178 mp->min_address = -65536;
9180 if (max_mp == NULL)
9182 mp->max_address = max_address;
9183 mp->next = NULL;
9184 mp->prev = minipool_vector_tail;
9186 if (mp->prev == NULL)
9188 minipool_vector_head = mp;
9189 minipool_vector_label = gen_label_rtx ();
9191 else
9192 mp->prev->next = mp;
9194 minipool_vector_tail = mp;
9196 else
9198 if (max_address > max_mp->max_address - mp->fix_size)
9199 mp->max_address = max_mp->max_address - mp->fix_size;
9200 else
9201 mp->max_address = max_address;
9203 mp->next = max_mp;
9204 mp->prev = max_mp->prev;
9205 max_mp->prev = mp;
9206 if (mp->prev != NULL)
9207 mp->prev->next = mp;
9208 else
9209 minipool_vector_head = mp;
9212 /* Save the new entry. */
9213 max_mp = mp;
9215 /* Scan over the preceding entries and adjust their addresses as
9216 required. */
9217 while (mp->prev != NULL
9218 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9220 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9221 mp = mp->prev;
9224 return max_mp;
9227 static Mnode *
9228 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
9229 HOST_WIDE_INT min_address)
9231 HOST_WIDE_INT offset;
9233 /* The code below assumes these are different. */
9234 gcc_assert (mp != min_mp);
9236 if (min_mp == NULL)
9238 if (min_address > mp->min_address)
9239 mp->min_address = min_address;
9241 else
9243 /* We will adjust this below if it is too loose. */
9244 mp->min_address = min_address;
9246 /* Unlink MP from its current position. Since min_mp is non-null,
9247 mp->next must be non-null. */
9248 mp->next->prev = mp->prev;
9249 if (mp->prev != NULL)
9250 mp->prev->next = mp->next;
9251 else
9252 minipool_vector_head = mp->next;
9254 /* Reinsert it after MIN_MP. */
9255 mp->prev = min_mp;
9256 mp->next = min_mp->next;
9257 min_mp->next = mp;
9258 if (mp->next != NULL)
9259 mp->next->prev = mp;
9260 else
9261 minipool_vector_tail = mp;
9264 min_mp = mp;
9266 offset = 0;
9267 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9269 mp->offset = offset;
9270 if (mp->refcount > 0)
9271 offset += mp->fix_size;
9273 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
9274 mp->next->min_address = mp->min_address + mp->fix_size;
9277 return min_mp;
9280 /* Add a constant to the minipool for a backward reference. Returns the
9281 node added or NULL if the constant will not fit in this pool.
9283 Note that the code for insertion for a backwards reference can be
9284 somewhat confusing because the calculated offsets for each fix do
9285 not take into account the size of the pool (which is still under
9286 construction. */
9287 static Mnode *
9288 add_minipool_backward_ref (Mfix *fix)
9290 /* If set, min_mp is the last pool_entry that has a lower constraint
9291 than the one we are trying to add. */
9292 Mnode *min_mp = NULL;
9293 /* This can be negative, since it is only a constraint. */
9294 HOST_WIDE_INT min_address = fix->address - fix->backwards;
9295 Mnode *mp;
9297 /* If we can't reach the current pool from this insn, or if we can't
9298 insert this entry at the end of the pool without pushing other
9299 fixes out of range, then we don't try. This ensures that we
9300 can't fail later on. */
9301 if (min_address >= minipool_barrier->address
9302 || (minipool_vector_tail->min_address + fix->fix_size
9303 >= minipool_barrier->address))
9304 return NULL;
9306 /* Scan the pool to see if a constant with the same value has
9307 already been added. While we are doing this, also note the
9308 location where we must insert the constant if it doesn't already
9309 exist. */
9310 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
9312 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9313 && fix->mode == mp->mode
9314 && (GET_CODE (fix->value) != CODE_LABEL
9315 || (CODE_LABEL_NUMBER (fix->value)
9316 == CODE_LABEL_NUMBER (mp->value)))
9317 && rtx_equal_p (fix->value, mp->value)
9318 /* Check that there is enough slack to move this entry to the
9319 end of the table (this is conservative). */
9320 && (mp->max_address
9321 > (minipool_barrier->address
9322 + minipool_vector_tail->offset
9323 + minipool_vector_tail->fix_size)))
9325 mp->refcount++;
9326 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
9329 if (min_mp != NULL)
9330 mp->min_address += fix->fix_size;
9331 else
9333 /* Note the insertion point if necessary. */
9334 if (mp->min_address < min_address)
9336 /* For now, we do not allow the insertion of 8-byte alignment
9337 requiring nodes anywhere but at the start of the pool. */
9338 if (ARM_DOUBLEWORD_ALIGN
9339 && fix->fix_size >= 8 && mp->fix_size < 8)
9340 return NULL;
9341 else
9342 min_mp = mp;
9344 else if (mp->max_address
9345 < minipool_barrier->address + mp->offset + fix->fix_size)
9347 /* Inserting before this entry would push the fix beyond
9348 its maximum address (which can happen if we have
9349 re-located a forwards fix); force the new fix to come
9350 after it. */
9351 if (ARM_DOUBLEWORD_ALIGN
9352 && fix->fix_size >= 8 && mp->fix_size < 8)
9353 return NULL;
9354 else
9356 min_mp = mp;
9357 min_address = mp->min_address + fix->fix_size;
9360 /* Do not insert a non-8-byte aligned quantity before 8-byte
9361 aligned quantities. */
9362 else if (ARM_DOUBLEWORD_ALIGN
9363 && fix->fix_size < 8
9364 && mp->fix_size >= 8)
9366 min_mp = mp;
9367 min_address = mp->min_address + fix->fix_size;
9372 /* We need to create a new entry. */
9373 mp = XNEW (Mnode);
9374 mp->fix_size = fix->fix_size;
9375 mp->mode = fix->mode;
9376 mp->value = fix->value;
9377 mp->refcount = 1;
9378 mp->max_address = minipool_barrier->address + 65536;
9380 mp->min_address = min_address;
9382 if (min_mp == NULL)
9384 mp->prev = NULL;
9385 mp->next = minipool_vector_head;
9387 if (mp->next == NULL)
9389 minipool_vector_tail = mp;
9390 minipool_vector_label = gen_label_rtx ();
9392 else
9393 mp->next->prev = mp;
9395 minipool_vector_head = mp;
9397 else
9399 mp->next = min_mp->next;
9400 mp->prev = min_mp;
9401 min_mp->next = mp;
9403 if (mp->next != NULL)
9404 mp->next->prev = mp;
9405 else
9406 minipool_vector_tail = mp;
9409 /* Save the new entry. */
9410 min_mp = mp;
9412 if (mp->prev)
9413 mp = mp->prev;
9414 else
9415 mp->offset = 0;
9417 /* Scan over the following entries and adjust their offsets. */
9418 while (mp->next != NULL)
9420 if (mp->next->min_address < mp->min_address + mp->fix_size)
9421 mp->next->min_address = mp->min_address + mp->fix_size;
9423 if (mp->refcount)
9424 mp->next->offset = mp->offset + mp->fix_size;
9425 else
9426 mp->next->offset = mp->offset;
9428 mp = mp->next;
9431 return min_mp;
9434 static void
9435 assign_minipool_offsets (Mfix *barrier)
9437 HOST_WIDE_INT offset = 0;
9438 Mnode *mp;
9440 minipool_barrier = barrier;
9442 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9444 mp->offset = offset;
9446 if (mp->refcount > 0)
9447 offset += mp->fix_size;
9451 /* Output the literal table */
9452 static void
9453 dump_minipool (rtx scan)
9455 Mnode * mp;
9456 Mnode * nmp;
9457 int align64 = 0;
9459 if (ARM_DOUBLEWORD_ALIGN)
9460 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9461 if (mp->refcount > 0 && mp->fix_size >= 8)
9463 align64 = 1;
9464 break;
9467 if (dump_file)
9468 fprintf (dump_file,
9469 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
9470 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
9472 scan = emit_label_after (gen_label_rtx (), scan);
9473 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
9474 scan = emit_label_after (minipool_vector_label, scan);
9476 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
9478 if (mp->refcount > 0)
9480 if (dump_file)
9482 fprintf (dump_file,
9483 ";; Offset %u, min %ld, max %ld ",
9484 (unsigned) mp->offset, (unsigned long) mp->min_address,
9485 (unsigned long) mp->max_address);
9486 arm_print_value (dump_file, mp->value);
9487 fputc ('\n', dump_file);
9490 switch (mp->fix_size)
9492 #ifdef HAVE_consttable_1
9493 case 1:
9494 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
9495 break;
9497 #endif
9498 #ifdef HAVE_consttable_2
9499 case 2:
9500 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
9501 break;
9503 #endif
9504 #ifdef HAVE_consttable_4
9505 case 4:
9506 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
9507 break;
9509 #endif
9510 #ifdef HAVE_consttable_8
9511 case 8:
9512 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
9513 break;
9515 #endif
9516 #ifdef HAVE_consttable_16
9517 case 16:
9518 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
9519 break;
9521 #endif
9522 default:
9523 gcc_unreachable ();
9527 nmp = mp->next;
9528 free (mp);
9531 minipool_vector_head = minipool_vector_tail = NULL;
9532 scan = emit_insn_after (gen_consttable_end (), scan);
9533 scan = emit_barrier_after (scan);
9536 /* Return the cost of forcibly inserting a barrier after INSN. */
9537 static int
9538 arm_barrier_cost (rtx insn)
9540 /* Basing the location of the pool on the loop depth is preferable,
9541 but at the moment, the basic block information seems to be
9542 corrupt by this stage of the compilation. */
9543 int base_cost = 50;
9544 rtx next = next_nonnote_insn (insn);
9546 if (next != NULL && GET_CODE (next) == CODE_LABEL)
9547 base_cost -= 20;
9549 switch (GET_CODE (insn))
9551 case CODE_LABEL:
9552 /* It will always be better to place the table before the label, rather
9553 than after it. */
9554 return 50;
9556 case INSN:
9557 case CALL_INSN:
9558 return base_cost;
9560 case JUMP_INSN:
9561 return base_cost - 10;
9563 default:
9564 return base_cost + 10;
9568 /* Find the best place in the insn stream in the range
9569 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
9570 Create the barrier by inserting a jump and add a new fix entry for
9571 it. */
9572 static Mfix *
9573 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
9575 HOST_WIDE_INT count = 0;
9576 rtx barrier;
9577 rtx from = fix->insn;
9578 /* The instruction after which we will insert the jump. */
9579 rtx selected = NULL;
9580 int selected_cost;
9581 /* The address at which the jump instruction will be placed. */
9582 HOST_WIDE_INT selected_address;
9583 Mfix * new_fix;
9584 HOST_WIDE_INT max_count = max_address - fix->address;
9585 rtx label = gen_label_rtx ();
9587 selected_cost = arm_barrier_cost (from);
9588 selected_address = fix->address;
9590 while (from && count < max_count)
9592 rtx tmp;
9593 int new_cost;
9595 /* This code shouldn't have been called if there was a natural barrier
9596 within range. */
9597 gcc_assert (GET_CODE (from) != BARRIER);
9599 /* Count the length of this insn. */
9600 count += get_attr_length (from);
9602 /* If there is a jump table, add its length. */
9603 tmp = is_jump_table (from);
9604 if (tmp != NULL)
9606 count += get_jump_table_size (tmp);
9608 /* Jump tables aren't in a basic block, so base the cost on
9609 the dispatch insn. If we select this location, we will
9610 still put the pool after the table. */
9611 new_cost = arm_barrier_cost (from);
9613 if (count < max_count
9614 && (!selected || new_cost <= selected_cost))
9616 selected = tmp;
9617 selected_cost = new_cost;
9618 selected_address = fix->address + count;
9621 /* Continue after the dispatch table. */
9622 from = NEXT_INSN (tmp);
9623 continue;
9626 new_cost = arm_barrier_cost (from);
9628 if (count < max_count
9629 && (!selected || new_cost <= selected_cost))
9631 selected = from;
9632 selected_cost = new_cost;
9633 selected_address = fix->address + count;
9636 from = NEXT_INSN (from);
9639 /* Make sure that we found a place to insert the jump. */
9640 gcc_assert (selected);
9642 /* Create a new JUMP_INSN that branches around a barrier. */
9643 from = emit_jump_insn_after (gen_jump (label), selected);
9644 JUMP_LABEL (from) = label;
9645 barrier = emit_barrier_after (from);
9646 emit_label_after (label, barrier);
9648 /* Create a minipool barrier entry for the new barrier. */
9649 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9650 new_fix->insn = barrier;
9651 new_fix->address = selected_address;
9652 new_fix->next = fix->next;
9653 fix->next = new_fix;
9655 return new_fix;
9658 /* Record that there is a natural barrier in the insn stream at
9659 ADDRESS. */
9660 static void
9661 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9663 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9665 fix->insn = insn;
9666 fix->address = address;
9668 fix->next = NULL;
9669 if (minipool_fix_head != NULL)
9670 minipool_fix_tail->next = fix;
9671 else
9672 minipool_fix_head = fix;
9674 minipool_fix_tail = fix;
9677 /* Record INSN, which will need fixing up to load a value from the
9678 minipool. ADDRESS is the offset of the insn since the start of the
9679 function; LOC is a pointer to the part of the insn which requires
9680 fixing; VALUE is the constant that must be loaded, which is of type
9681 MODE. */
9682 static void
9683 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9684 enum machine_mode mode, rtx value)
9686 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9688 fix->insn = insn;
9689 fix->address = address;
9690 fix->loc = loc;
9691 fix->mode = mode;
9692 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9693 fix->value = value;
9694 fix->forwards = get_attr_pool_range (insn);
9695 fix->backwards = get_attr_neg_pool_range (insn);
9696 fix->minipool = NULL;
9698 /* If an insn doesn't have a range defined for it, then it isn't
9699 expecting to be reworked by this code. Better to stop now than
9700 to generate duff assembly code. */
9701 gcc_assert (fix->forwards || fix->backwards);
9703 /* If an entry requires 8-byte alignment then assume all constant pools
9704 require 4 bytes of padding. Trying to do this later on a per-pool
9705 basis is awkward because existing pool entries have to be modified. */
9706 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9707 minipool_pad = 4;
9709 if (dump_file)
9711 fprintf (dump_file,
9712 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9713 GET_MODE_NAME (mode),
9714 INSN_UID (insn), (unsigned long) address,
9715 -1 * (long)fix->backwards, (long)fix->forwards);
9716 arm_print_value (dump_file, fix->value);
9717 fprintf (dump_file, "\n");
9720 /* Add it to the chain of fixes. */
9721 fix->next = NULL;
9723 if (minipool_fix_head != NULL)
9724 minipool_fix_tail->next = fix;
9725 else
9726 minipool_fix_head = fix;
9728 minipool_fix_tail = fix;
9731 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9732 Returns the number of insns needed, or 99 if we don't know how to
9733 do it. */
9735 arm_const_double_inline_cost (rtx val)
9737 rtx lowpart, highpart;
9738 enum machine_mode mode;
9740 mode = GET_MODE (val);
9742 if (mode == VOIDmode)
9743 mode = DImode;
9745 gcc_assert (GET_MODE_SIZE (mode) == 8);
9747 lowpart = gen_lowpart (SImode, val);
9748 highpart = gen_highpart_mode (SImode, mode, val);
9750 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9751 gcc_assert (GET_CODE (highpart) == CONST_INT);
9753 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9754 NULL_RTX, NULL_RTX, 0, 0)
9755 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9756 NULL_RTX, NULL_RTX, 0, 0));
9759 /* Return true if it is worthwhile to split a 64-bit constant into two
9760 32-bit operations. This is the case if optimizing for size, or
9761 if we have load delay slots, or if one 32-bit part can be done with
9762 a single data operation. */
9763 bool
9764 arm_const_double_by_parts (rtx val)
9766 enum machine_mode mode = GET_MODE (val);
9767 rtx part;
9769 if (optimize_size || arm_ld_sched)
9770 return true;
9772 if (mode == VOIDmode)
9773 mode = DImode;
9775 part = gen_highpart_mode (SImode, mode, val);
9777 gcc_assert (GET_CODE (part) == CONST_INT);
9779 if (const_ok_for_arm (INTVAL (part))
9780 || const_ok_for_arm (~INTVAL (part)))
9781 return true;
9783 part = gen_lowpart (SImode, val);
9785 gcc_assert (GET_CODE (part) == CONST_INT);
9787 if (const_ok_for_arm (INTVAL (part))
9788 || const_ok_for_arm (~INTVAL (part)))
9789 return true;
9791 return false;
9794 /* Scan INSN and note any of its operands that need fixing.
9795 If DO_PUSHES is false we do not actually push any of the fixups
9796 needed. The function returns TRUE if any fixups were needed/pushed.
9797 This is used by arm_memory_load_p() which needs to know about loads
9798 of constants that will be converted into minipool loads. */
9799 static bool
9800 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9802 bool result = false;
9803 int opno;
9805 extract_insn (insn);
9807 if (!constrain_operands (1))
9808 fatal_insn_not_found (insn);
9810 if (recog_data.n_alternatives == 0)
9811 return false;
9813 /* Fill in recog_op_alt with information about the constraints of
9814 this insn. */
9815 preprocess_constraints ();
9817 for (opno = 0; opno < recog_data.n_operands; opno++)
9819 /* Things we need to fix can only occur in inputs. */
9820 if (recog_data.operand_type[opno] != OP_IN)
9821 continue;
9823 /* If this alternative is a memory reference, then any mention
9824 of constants in this alternative is really to fool reload
9825 into allowing us to accept one there. We need to fix them up
9826 now so that we output the right code. */
9827 if (recog_op_alt[opno][which_alternative].memory_ok)
9829 rtx op = recog_data.operand[opno];
9831 if (CONSTANT_P (op))
9833 if (do_pushes)
9834 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9835 recog_data.operand_mode[opno], op);
9836 result = true;
9838 else if (GET_CODE (op) == MEM
9839 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9840 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9842 if (do_pushes)
9844 rtx cop = avoid_constant_pool_reference (op);
9846 /* Casting the address of something to a mode narrower
9847 than a word can cause avoid_constant_pool_reference()
9848 to return the pool reference itself. That's no good to
9849 us here. Lets just hope that we can use the
9850 constant pool value directly. */
9851 if (op == cop)
9852 cop = get_pool_constant (XEXP (op, 0));
9854 push_minipool_fix (insn, address,
9855 recog_data.operand_loc[opno],
9856 recog_data.operand_mode[opno], cop);
9859 result = true;
9864 return result;
9867 /* Gcc puts the pool in the wrong place for ARM, since we can only
9868 load addresses a limited distance around the pc. We do some
9869 special munging to move the constant pool values to the correct
9870 point in the code. */
9871 static void
9872 arm_reorg (void)
9874 rtx insn;
9875 HOST_WIDE_INT address = 0;
9876 Mfix * fix;
9878 minipool_fix_head = minipool_fix_tail = NULL;
9880 /* The first insn must always be a note, or the code below won't
9881 scan it properly. */
9882 insn = get_insns ();
9883 gcc_assert (GET_CODE (insn) == NOTE);
9884 minipool_pad = 0;
9886 /* Scan all the insns and record the operands that will need fixing. */
9887 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9889 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9890 && (arm_cirrus_insn_p (insn)
9891 || GET_CODE (insn) == JUMP_INSN
9892 || arm_memory_load_p (insn)))
9893 cirrus_reorg (insn);
9895 if (GET_CODE (insn) == BARRIER)
9896 push_minipool_barrier (insn, address);
9897 else if (INSN_P (insn))
9899 rtx table;
9901 note_invalid_constants (insn, address, true);
9902 address += get_attr_length (insn);
9904 /* If the insn is a vector jump, add the size of the table
9905 and skip the table. */
9906 if ((table = is_jump_table (insn)) != NULL)
9908 address += get_jump_table_size (table);
9909 insn = table;
9914 fix = minipool_fix_head;
9916 /* Now scan the fixups and perform the required changes. */
9917 while (fix)
9919 Mfix * ftmp;
9920 Mfix * fdel;
9921 Mfix * last_added_fix;
9922 Mfix * last_barrier = NULL;
9923 Mfix * this_fix;
9925 /* Skip any further barriers before the next fix. */
9926 while (fix && GET_CODE (fix->insn) == BARRIER)
9927 fix = fix->next;
9929 /* No more fixes. */
9930 if (fix == NULL)
9931 break;
9933 last_added_fix = NULL;
9935 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9937 if (GET_CODE (ftmp->insn) == BARRIER)
9939 if (ftmp->address >= minipool_vector_head->max_address)
9940 break;
9942 last_barrier = ftmp;
9944 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9945 break;
9947 last_added_fix = ftmp; /* Keep track of the last fix added. */
9950 /* If we found a barrier, drop back to that; any fixes that we
9951 could have reached but come after the barrier will now go in
9952 the next mini-pool. */
9953 if (last_barrier != NULL)
9955 /* Reduce the refcount for those fixes that won't go into this
9956 pool after all. */
9957 for (fdel = last_barrier->next;
9958 fdel && fdel != ftmp;
9959 fdel = fdel->next)
9961 fdel->minipool->refcount--;
9962 fdel->minipool = NULL;
9965 ftmp = last_barrier;
9967 else
9969 /* ftmp is first fix that we can't fit into this pool and
9970 there no natural barriers that we could use. Insert a
9971 new barrier in the code somewhere between the previous
9972 fix and this one, and arrange to jump around it. */
9973 HOST_WIDE_INT max_address;
9975 /* The last item on the list of fixes must be a barrier, so
9976 we can never run off the end of the list of fixes without
9977 last_barrier being set. */
9978 gcc_assert (ftmp);
9980 max_address = minipool_vector_head->max_address;
9981 /* Check that there isn't another fix that is in range that
9982 we couldn't fit into this pool because the pool was
9983 already too large: we need to put the pool before such an
9984 instruction. The pool itself may come just after the
9985 fix because create_fix_barrier also allows space for a
9986 jump instruction. */
9987 if (ftmp->address < max_address)
9988 max_address = ftmp->address + 1;
9990 last_barrier = create_fix_barrier (last_added_fix, max_address);
9993 assign_minipool_offsets (last_barrier);
9995 while (ftmp)
9997 if (GET_CODE (ftmp->insn) != BARRIER
9998 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9999 == NULL))
10000 break;
10002 ftmp = ftmp->next;
10005 /* Scan over the fixes we have identified for this pool, fixing them
10006 up and adding the constants to the pool itself. */
10007 for (this_fix = fix; this_fix && ftmp != this_fix;
10008 this_fix = this_fix->next)
10009 if (GET_CODE (this_fix->insn) != BARRIER)
10011 rtx addr
10012 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
10013 minipool_vector_label),
10014 this_fix->minipool->offset);
10015 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
10018 dump_minipool (last_barrier->insn);
10019 fix = ftmp;
10022 /* From now on we must synthesize any constants that we can't handle
10023 directly. This can happen if the RTL gets split during final
10024 instruction generation. */
10025 after_arm_reorg = 1;
10027 /* Free the minipool memory. */
10028 obstack_free (&minipool_obstack, minipool_startobj);
10031 /* Routines to output assembly language. */
10033 /* If the rtx is the correct value then return the string of the number.
10034 In this way we can ensure that valid double constants are generated even
10035 when cross compiling. */
10036 const char *
10037 fp_immediate_constant (rtx x)
10039 REAL_VALUE_TYPE r;
10040 int i;
10042 if (!fp_consts_inited)
10043 init_fp_table ();
10045 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10046 for (i = 0; i < 8; i++)
10047 if (REAL_VALUES_EQUAL (r, values_fp[i]))
10048 return strings_fp[i];
10050 gcc_unreachable ();
10053 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
10054 static const char *
10055 fp_const_from_val (REAL_VALUE_TYPE *r)
10057 int i;
10059 if (!fp_consts_inited)
10060 init_fp_table ();
10062 for (i = 0; i < 8; i++)
10063 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
10064 return strings_fp[i];
10066 gcc_unreachable ();
10069 /* Output the operands of a LDM/STM instruction to STREAM.
10070 MASK is the ARM register set mask of which only bits 0-15 are important.
10071 REG is the base register, either the frame pointer or the stack pointer,
10072 INSTR is the possibly suffixed load or store instruction.
10073 RFE is nonzero if the instruction should also copy spsr to cpsr. */
10075 static void
10076 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
10077 unsigned long mask, int rfe)
10079 unsigned i;
10080 bool not_first = FALSE;
10082 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
10083 fputc ('\t', stream);
10084 asm_fprintf (stream, instr, reg);
10085 fputc ('{', stream);
10087 for (i = 0; i <= LAST_ARM_REGNUM; i++)
10088 if (mask & (1 << i))
10090 if (not_first)
10091 fprintf (stream, ", ");
10093 asm_fprintf (stream, "%r", i);
10094 not_first = TRUE;
10097 if (rfe)
10098 fprintf (stream, "}^\n");
10099 else
10100 fprintf (stream, "}\n");
10104 /* Output a FLDMD instruction to STREAM.
10105 BASE if the register containing the address.
10106 REG and COUNT specify the register range.
10107 Extra registers may be added to avoid hardware bugs.
10109 We output FLDMD even for ARMv5 VFP implementations. Although
10110 FLDMD is technically not supported until ARMv6, it is believed
10111 that all VFP implementations support its use in this context. */
10113 static void
10114 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
10116 int i;
10118 /* Workaround ARM10 VFPr1 bug. */
10119 if (count == 2 && !arm_arch6)
10121 if (reg == 15)
10122 reg--;
10123 count++;
10126 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
10127 load into multiple parts if we have to handle more than 16 registers. */
10128 if (count > 16)
10130 vfp_output_fldmd (stream, base, reg, 16);
10131 vfp_output_fldmd (stream, base, reg + 16, count - 16);
10132 return;
10135 fputc ('\t', stream);
10136 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
10138 for (i = reg; i < reg + count; i++)
10140 if (i > reg)
10141 fputs (", ", stream);
10142 asm_fprintf (stream, "d%d", i);
10144 fputs ("}\n", stream);
10149 /* Output the assembly for a store multiple. */
10151 const char *
10152 vfp_output_fstmd (rtx * operands)
10154 char pattern[100];
10155 int p;
10156 int base;
10157 int i;
10159 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
10160 p = strlen (pattern);
10162 gcc_assert (GET_CODE (operands[1]) == REG);
10164 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
10165 for (i = 1; i < XVECLEN (operands[2], 0); i++)
10167 p += sprintf (&pattern[p], ", d%d", base + i);
10169 strcpy (&pattern[p], "}");
10171 output_asm_insn (pattern, operands);
10172 return "";
10176 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
10177 number of bytes pushed. */
10179 static int
10180 vfp_emit_fstmd (int base_reg, int count)
10182 rtx par;
10183 rtx dwarf;
10184 rtx tmp, reg;
10185 int i;
10187 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
10188 register pairs are stored by a store multiple insn. We avoid this
10189 by pushing an extra pair. */
10190 if (count == 2 && !arm_arch6)
10192 if (base_reg == LAST_VFP_REGNUM - 3)
10193 base_reg -= 2;
10194 count++;
10197 /* FSTMD may not store more than 16 doubleword registers at once. Split
10198 larger stores into multiple parts (up to a maximum of two, in
10199 practice). */
10200 if (count > 16)
10202 int saved;
10203 /* NOTE: base_reg is an internal register number, so each D register
10204 counts as 2. */
10205 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
10206 saved += vfp_emit_fstmd (base_reg, 16);
10207 return saved;
10210 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10211 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10213 reg = gen_rtx_REG (DFmode, base_reg);
10214 base_reg += 2;
10216 XVECEXP (par, 0, 0)
10217 = gen_rtx_SET (VOIDmode,
10218 gen_frame_mem (BLKmode,
10219 gen_rtx_PRE_DEC (BLKmode,
10220 stack_pointer_rtx)),
10221 gen_rtx_UNSPEC (BLKmode,
10222 gen_rtvec (1, reg),
10223 UNSPEC_PUSH_MULT));
10225 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10226 plus_constant (stack_pointer_rtx, -(count * 8)));
10227 RTX_FRAME_RELATED_P (tmp) = 1;
10228 XVECEXP (dwarf, 0, 0) = tmp;
10230 tmp = gen_rtx_SET (VOIDmode,
10231 gen_frame_mem (DFmode, stack_pointer_rtx),
10232 reg);
10233 RTX_FRAME_RELATED_P (tmp) = 1;
10234 XVECEXP (dwarf, 0, 1) = tmp;
10236 for (i = 1; i < count; i++)
10238 reg = gen_rtx_REG (DFmode, base_reg);
10239 base_reg += 2;
10240 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10242 tmp = gen_rtx_SET (VOIDmode,
10243 gen_frame_mem (DFmode,
10244 plus_constant (stack_pointer_rtx,
10245 i * 8)),
10246 reg);
10247 RTX_FRAME_RELATED_P (tmp) = 1;
10248 XVECEXP (dwarf, 0, i + 1) = tmp;
10251 par = emit_insn (par);
10252 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
10253 RTX_FRAME_RELATED_P (par) = 1;
10255 return count * 8;
10258 /* Emit a call instruction with pattern PAT. ADDR is the address of
10259 the call target. */
10261 void
10262 arm_emit_call_insn (rtx pat, rtx addr)
10264 rtx insn;
10266 insn = emit_call_insn (pat);
10268 /* The PIC register is live on entry to VxWorks PIC PLT entries.
10269 If the call might use such an entry, add a use of the PIC register
10270 to the instruction's CALL_INSN_FUNCTION_USAGE. */
10271 if (TARGET_VXWORKS_RTP
10272 && flag_pic
10273 && GET_CODE (addr) == SYMBOL_REF
10274 && (SYMBOL_REF_DECL (addr)
10275 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
10276 : !SYMBOL_REF_LOCAL_P (addr)))
10278 require_pic_register ();
10279 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
10283 /* Output a 'call' insn. */
10284 const char *
10285 output_call (rtx *operands)
10287 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
10289 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
10290 if (REGNO (operands[0]) == LR_REGNUM)
10292 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
10293 output_asm_insn ("mov%?\t%0, %|lr", operands);
10296 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10298 if (TARGET_INTERWORK || arm_arch4t)
10299 output_asm_insn ("bx%?\t%0", operands);
10300 else
10301 output_asm_insn ("mov%?\t%|pc, %0", operands);
10303 return "";
10306 /* Output a 'call' insn that is a reference in memory. */
10307 const char *
10308 output_call_mem (rtx *operands)
10310 if (TARGET_INTERWORK && !arm_arch5)
10312 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10313 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10314 output_asm_insn ("bx%?\t%|ip", operands);
10316 else if (regno_use_in (LR_REGNUM, operands[0]))
10318 /* LR is used in the memory address. We load the address in the
10319 first instruction. It's safe to use IP as the target of the
10320 load since the call will kill it anyway. */
10321 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10322 if (arm_arch5)
10323 output_asm_insn ("blx%?\t%|ip", operands);
10324 else
10326 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10327 if (arm_arch4t)
10328 output_asm_insn ("bx%?\t%|ip", operands);
10329 else
10330 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
10333 else
10335 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10336 output_asm_insn ("ldr%?\t%|pc, %0", operands);
10339 return "";
10343 /* Output a move from arm registers to an fpa registers.
10344 OPERANDS[0] is an fpa register.
10345 OPERANDS[1] is the first registers of an arm register pair. */
10346 const char *
10347 output_mov_long_double_fpa_from_arm (rtx *operands)
10349 int arm_reg0 = REGNO (operands[1]);
10350 rtx ops[3];
10352 gcc_assert (arm_reg0 != IP_REGNUM);
10354 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10355 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10356 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10358 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10359 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
10361 return "";
10364 /* Output a move from an fpa register to arm registers.
10365 OPERANDS[0] is the first registers of an arm register pair.
10366 OPERANDS[1] is an fpa register. */
10367 const char *
10368 output_mov_long_double_arm_from_fpa (rtx *operands)
10370 int arm_reg0 = REGNO (operands[0]);
10371 rtx ops[3];
10373 gcc_assert (arm_reg0 != IP_REGNUM);
10375 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10376 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10377 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10379 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
10380 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10381 return "";
10384 /* Output a move from arm registers to arm registers of a long double
10385 OPERANDS[0] is the destination.
10386 OPERANDS[1] is the source. */
10387 const char *
10388 output_mov_long_double_arm_from_arm (rtx *operands)
10390 /* We have to be careful here because the two might overlap. */
10391 int dest_start = REGNO (operands[0]);
10392 int src_start = REGNO (operands[1]);
10393 rtx ops[2];
10394 int i;
10396 if (dest_start < src_start)
10398 for (i = 0; i < 3; i++)
10400 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10401 ops[1] = gen_rtx_REG (SImode, src_start + i);
10402 output_asm_insn ("mov%?\t%0, %1", ops);
10405 else
10407 for (i = 2; i >= 0; i--)
10409 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10410 ops[1] = gen_rtx_REG (SImode, src_start + i);
10411 output_asm_insn ("mov%?\t%0, %1", ops);
10415 return "";
10419 /* Emit a MOVW/MOVT pair. */
10420 void arm_emit_movpair (rtx dest, rtx src)
10422 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
10423 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
10427 /* Output a move from arm registers to an fpa registers.
10428 OPERANDS[0] is an fpa register.
10429 OPERANDS[1] is the first registers of an arm register pair. */
10430 const char *
10431 output_mov_double_fpa_from_arm (rtx *operands)
10433 int arm_reg0 = REGNO (operands[1]);
10434 rtx ops[2];
10436 gcc_assert (arm_reg0 != IP_REGNUM);
10438 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10439 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10440 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
10441 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
10442 return "";
10445 /* Output a move from an fpa register to arm registers.
10446 OPERANDS[0] is the first registers of an arm register pair.
10447 OPERANDS[1] is an fpa register. */
10448 const char *
10449 output_mov_double_arm_from_fpa (rtx *operands)
10451 int arm_reg0 = REGNO (operands[0]);
10452 rtx ops[2];
10454 gcc_assert (arm_reg0 != IP_REGNUM);
10456 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10457 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10458 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
10459 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
10460 return "";
10463 /* Output a move between double words.
10464 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
10465 or MEM<-REG and all MEMs must be offsettable addresses. */
10466 const char *
10467 output_move_double (rtx *operands)
10469 enum rtx_code code0 = GET_CODE (operands[0]);
10470 enum rtx_code code1 = GET_CODE (operands[1]);
10471 rtx otherops[3];
10473 if (code0 == REG)
10475 unsigned int reg0 = REGNO (operands[0]);
10477 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10479 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
10481 switch (GET_CODE (XEXP (operands[1], 0)))
10483 case REG:
10484 if (TARGET_LDRD
10485 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
10486 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
10487 else
10488 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10489 break;
10491 case PRE_INC:
10492 gcc_assert (TARGET_LDRD);
10493 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
10494 break;
10496 case PRE_DEC:
10497 if (TARGET_LDRD)
10498 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
10499 else
10500 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
10501 break;
10503 case POST_INC:
10504 if (TARGET_LDRD)
10505 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
10506 else
10507 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
10508 break;
10510 case POST_DEC:
10511 gcc_assert (TARGET_LDRD);
10512 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
10513 break;
10515 case PRE_MODIFY:
10516 case POST_MODIFY:
10517 /* Autoicrement addressing modes should never have overlapping
10518 base and destination registers, and overlapping index registers
10519 are already prohibited, so this doesn't need to worry about
10520 fix_cm3_ldrd. */
10521 otherops[0] = operands[0];
10522 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
10523 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
10525 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
10527 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10529 /* Registers overlap so split out the increment. */
10530 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10531 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
10533 else
10535 /* Use a single insn if we can.
10536 FIXME: IWMMXT allows offsets larger than ldrd can
10537 handle, fix these up with a pair of ldr. */
10538 if (TARGET_THUMB2
10539 || GET_CODE (otherops[2]) != CONST_INT
10540 || (INTVAL (otherops[2]) > -256
10541 && INTVAL (otherops[2]) < 256))
10542 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
10543 else
10545 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10546 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10550 else
10552 /* Use a single insn if we can.
10553 FIXME: IWMMXT allows offsets larger than ldrd can handle,
10554 fix these up with a pair of ldr. */
10555 if (TARGET_THUMB2
10556 || GET_CODE (otherops[2]) != CONST_INT
10557 || (INTVAL (otherops[2]) > -256
10558 && INTVAL (otherops[2]) < 256))
10559 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
10560 else
10562 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10563 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10566 break;
10568 case LABEL_REF:
10569 case CONST:
10570 /* We might be able to use ldrd %0, %1 here. However the range is
10571 different to ldr/adr, and it is broken on some ARMv7-M
10572 implementations. */
10573 /* Use the second register of the pair to avoid problematic
10574 overlap. */
10575 otherops[1] = operands[1];
10576 output_asm_insn ("adr%?\t%0, %1", otherops);
10577 operands[1] = otherops[0];
10578 if (TARGET_LDRD)
10579 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10580 else
10581 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
10582 break;
10584 /* ??? This needs checking for thumb2. */
10585 default:
10586 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
10587 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
10589 otherops[0] = operands[0];
10590 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
10591 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
10593 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10595 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10597 switch ((int) INTVAL (otherops[2]))
10599 case -8:
10600 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10601 return "";
10602 case -4:
10603 if (TARGET_THUMB2)
10604 break;
10605 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10606 return "";
10607 case 4:
10608 if (TARGET_THUMB2)
10609 break;
10610 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10611 return "";
10614 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
10615 operands[1] = otherops[0];
10616 if (TARGET_LDRD
10617 && (GET_CODE (otherops[2]) == REG
10618 || TARGET_THUMB2
10619 || (GET_CODE (otherops[2]) == CONST_INT
10620 && INTVAL (otherops[2]) > -256
10621 && INTVAL (otherops[2]) < 256)))
10623 if (reg_overlap_mentioned_p (operands[0],
10624 otherops[2]))
10626 rtx tmp;
10627 /* Swap base and index registers over to
10628 avoid a conflict. */
10629 tmp = otherops[1];
10630 otherops[1] = otherops[2];
10631 otherops[2] = tmp;
10633 /* If both registers conflict, it will usually
10634 have been fixed by a splitter. */
10635 if (reg_overlap_mentioned_p (operands[0], otherops[2])
10636 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
10638 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10639 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10641 else
10643 otherops[0] = operands[0];
10644 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10646 return "";
10649 if (GET_CODE (otherops[2]) == CONST_INT)
10651 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10652 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10653 else
10654 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10656 else
10657 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10659 else
10660 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10662 if (TARGET_LDRD)
10663 return "ldr%(d%)\t%0, [%1]";
10665 return "ldm%(ia%)\t%1, %M0";
10667 else
10669 otherops[1] = adjust_address (operands[1], SImode, 4);
10670 /* Take care of overlapping base/data reg. */
10671 if (reg_mentioned_p (operands[0], operands[1]))
10673 output_asm_insn ("ldr%?\t%0, %1", otherops);
10674 output_asm_insn ("ldr%?\t%0, %1", operands);
10676 else
10678 output_asm_insn ("ldr%?\t%0, %1", operands);
10679 output_asm_insn ("ldr%?\t%0, %1", otherops);
10684 else
10686 /* Constraints should ensure this. */
10687 gcc_assert (code0 == MEM && code1 == REG);
10688 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10690 switch (GET_CODE (XEXP (operands[0], 0)))
10692 case REG:
10693 if (TARGET_LDRD)
10694 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10695 else
10696 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10697 break;
10699 case PRE_INC:
10700 gcc_assert (TARGET_LDRD);
10701 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10702 break;
10704 case PRE_DEC:
10705 if (TARGET_LDRD)
10706 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10707 else
10708 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10709 break;
10711 case POST_INC:
10712 if (TARGET_LDRD)
10713 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10714 else
10715 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10716 break;
10718 case POST_DEC:
10719 gcc_assert (TARGET_LDRD);
10720 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10721 break;
10723 case PRE_MODIFY:
10724 case POST_MODIFY:
10725 otherops[0] = operands[1];
10726 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10727 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10729 /* IWMMXT allows offsets larger than ldrd can handle,
10730 fix these up with a pair of ldr. */
10731 if (!TARGET_THUMB2
10732 && GET_CODE (otherops[2]) == CONST_INT
10733 && (INTVAL(otherops[2]) <= -256
10734 || INTVAL(otherops[2]) >= 256))
10736 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10738 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10739 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10741 else
10743 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10744 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10747 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10748 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10749 else
10750 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10751 break;
10753 case PLUS:
10754 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10755 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10757 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10759 case -8:
10760 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10761 return "";
10763 case -4:
10764 if (TARGET_THUMB2)
10765 break;
10766 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10767 return "";
10769 case 4:
10770 if (TARGET_THUMB2)
10771 break;
10772 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10773 return "";
10776 if (TARGET_LDRD
10777 && (GET_CODE (otherops[2]) == REG
10778 || TARGET_THUMB2
10779 || (GET_CODE (otherops[2]) == CONST_INT
10780 && INTVAL (otherops[2]) > -256
10781 && INTVAL (otherops[2]) < 256)))
10783 otherops[0] = operands[1];
10784 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10785 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10786 return "";
10788 /* Fall through */
10790 default:
10791 otherops[0] = adjust_address (operands[0], SImode, 4);
10792 otherops[1] = operands[1];
10793 output_asm_insn ("str%?\t%1, %0", operands);
10794 output_asm_insn ("str%?\t%H1, %0", otherops);
10798 return "";
10801 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10802 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10804 const char *
10805 output_move_quad (rtx *operands)
10807 if (REG_P (operands[0]))
10809 /* Load, or reg->reg move. */
10811 if (MEM_P (operands[1]))
10813 switch (GET_CODE (XEXP (operands[1], 0)))
10815 case REG:
10816 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10817 break;
10819 case LABEL_REF:
10820 case CONST:
10821 output_asm_insn ("adr%?\t%0, %1", operands);
10822 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10823 break;
10825 default:
10826 gcc_unreachable ();
10829 else
10831 rtx ops[2];
10832 int dest, src, i;
10834 gcc_assert (REG_P (operands[1]));
10836 dest = REGNO (operands[0]);
10837 src = REGNO (operands[1]);
10839 /* This seems pretty dumb, but hopefully GCC won't try to do it
10840 very often. */
10841 if (dest < src)
10842 for (i = 0; i < 4; i++)
10844 ops[0] = gen_rtx_REG (SImode, dest + i);
10845 ops[1] = gen_rtx_REG (SImode, src + i);
10846 output_asm_insn ("mov%?\t%0, %1", ops);
10848 else
10849 for (i = 3; i >= 0; i--)
10851 ops[0] = gen_rtx_REG (SImode, dest + i);
10852 ops[1] = gen_rtx_REG (SImode, src + i);
10853 output_asm_insn ("mov%?\t%0, %1", ops);
10857 else
10859 gcc_assert (MEM_P (operands[0]));
10860 gcc_assert (REG_P (operands[1]));
10861 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10863 switch (GET_CODE (XEXP (operands[0], 0)))
10865 case REG:
10866 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10867 break;
10869 default:
10870 gcc_unreachable ();
10874 return "";
10877 /* Output a VFP load or store instruction. */
10879 const char *
10880 output_move_vfp (rtx *operands)
10882 rtx reg, mem, addr, ops[2];
10883 int load = REG_P (operands[0]);
10884 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10885 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10886 const char *templ;
10887 char buff[50];
10888 enum machine_mode mode;
10890 reg = operands[!load];
10891 mem = operands[load];
10893 mode = GET_MODE (reg);
10895 gcc_assert (REG_P (reg));
10896 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10897 gcc_assert (mode == SFmode
10898 || mode == DFmode
10899 || mode == SImode
10900 || mode == DImode
10901 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10902 gcc_assert (MEM_P (mem));
10904 addr = XEXP (mem, 0);
10906 switch (GET_CODE (addr))
10908 case PRE_DEC:
10909 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10910 ops[0] = XEXP (addr, 0);
10911 ops[1] = reg;
10912 break;
10914 case POST_INC:
10915 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10916 ops[0] = XEXP (addr, 0);
10917 ops[1] = reg;
10918 break;
10920 default:
10921 templ = "f%s%c%%?\t%%%s0, %%1%s";
10922 ops[0] = reg;
10923 ops[1] = mem;
10924 break;
10927 sprintf (buff, templ,
10928 load ? "ld" : "st",
10929 dp ? 'd' : 's',
10930 dp ? "P" : "",
10931 integer_p ? "\t%@ int" : "");
10932 output_asm_insn (buff, ops);
10934 return "";
10937 /* Output a Neon quad-word load or store, or a load or store for
10938 larger structure modes.
10940 WARNING: The ordering of elements is weird in big-endian mode,
10941 because we use VSTM, as required by the EABI. GCC RTL defines
10942 element ordering based on in-memory order. This can be differ
10943 from the architectural ordering of elements within a NEON register.
10944 The intrinsics defined in arm_neon.h use the NEON register element
10945 ordering, not the GCC RTL element ordering.
10947 For example, the in-memory ordering of a big-endian a quadword
10948 vector with 16-bit elements when stored from register pair {d0,d1}
10949 will be (lowest address first, d0[N] is NEON register element N):
10951 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
10953 When necessary, quadword registers (dN, dN+1) are moved to ARM
10954 registers from rN in the order:
10956 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10958 So that STM/LDM can be used on vectors in ARM registers, and the
10959 same memory layout will result as if VSTM/VLDM were used. */
10961 const char *
10962 output_move_neon (rtx *operands)
10964 rtx reg, mem, addr, ops[2];
10965 int regno, load = REG_P (operands[0]);
10966 const char *templ;
10967 char buff[50];
10968 enum machine_mode mode;
10970 reg = operands[!load];
10971 mem = operands[load];
10973 mode = GET_MODE (reg);
10975 gcc_assert (REG_P (reg));
10976 regno = REGNO (reg);
10977 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10978 || NEON_REGNO_OK_FOR_QUAD (regno));
10979 gcc_assert (VALID_NEON_DREG_MODE (mode)
10980 || VALID_NEON_QREG_MODE (mode)
10981 || VALID_NEON_STRUCT_MODE (mode));
10982 gcc_assert (MEM_P (mem));
10984 addr = XEXP (mem, 0);
10986 /* Strip off const from addresses like (const (plus (...))). */
10987 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10988 addr = XEXP (addr, 0);
10990 switch (GET_CODE (addr))
10992 case POST_INC:
10993 templ = "v%smia%%?\t%%0!, %%h1";
10994 ops[0] = XEXP (addr, 0);
10995 ops[1] = reg;
10996 break;
10998 case POST_MODIFY:
10999 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
11000 gcc_unreachable ();
11002 case LABEL_REF:
11003 case PLUS:
11005 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
11006 int i;
11007 int overlap = -1;
11008 for (i = 0; i < nregs; i++)
11010 /* We're only using DImode here because it's a convenient size. */
11011 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
11012 ops[1] = adjust_address (mem, SImode, 8 * i);
11013 if (reg_overlap_mentioned_p (ops[0], mem))
11015 gcc_assert (overlap == -1);
11016 overlap = i;
11018 else
11020 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11021 output_asm_insn (buff, ops);
11024 if (overlap != -1)
11026 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
11027 ops[1] = adjust_address (mem, SImode, 8 * overlap);
11028 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11029 output_asm_insn (buff, ops);
11032 return "";
11035 default:
11036 templ = "v%smia%%?\t%%m0, %%h1";
11037 ops[0] = mem;
11038 ops[1] = reg;
11041 sprintf (buff, templ, load ? "ld" : "st");
11042 output_asm_insn (buff, ops);
11044 return "";
11047 /* Output an ADD r, s, #n where n may be too big for one instruction.
11048 If adding zero to one register, output nothing. */
11049 const char *
11050 output_add_immediate (rtx *operands)
11052 HOST_WIDE_INT n = INTVAL (operands[2]);
11054 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
11056 if (n < 0)
11057 output_multi_immediate (operands,
11058 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
11059 -n);
11060 else
11061 output_multi_immediate (operands,
11062 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
11066 return "";
11069 /* Output a multiple immediate operation.
11070 OPERANDS is the vector of operands referred to in the output patterns.
11071 INSTR1 is the output pattern to use for the first constant.
11072 INSTR2 is the output pattern to use for subsequent constants.
11073 IMMED_OP is the index of the constant slot in OPERANDS.
11074 N is the constant value. */
11075 static const char *
11076 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
11077 int immed_op, HOST_WIDE_INT n)
11079 #if HOST_BITS_PER_WIDE_INT > 32
11080 n &= 0xffffffff;
11081 #endif
11083 if (n == 0)
11085 /* Quick and easy output. */
11086 operands[immed_op] = const0_rtx;
11087 output_asm_insn (instr1, operands);
11089 else
11091 int i;
11092 const char * instr = instr1;
11094 /* Note that n is never zero here (which would give no output). */
11095 for (i = 0; i < 32; i += 2)
11097 if (n & (3 << i))
11099 operands[immed_op] = GEN_INT (n & (255 << i));
11100 output_asm_insn (instr, operands);
11101 instr = instr2;
11102 i += 6;
11107 return "";
11110 /* Return the name of a shifter operation. */
11111 static const char *
11112 arm_shift_nmem(enum rtx_code code)
11114 switch (code)
11116 case ASHIFT:
11117 return ARM_LSL_NAME;
11119 case ASHIFTRT:
11120 return "asr";
11122 case LSHIFTRT:
11123 return "lsr";
11125 case ROTATERT:
11126 return "ror";
11128 default:
11129 abort();
11133 /* Return the appropriate ARM instruction for the operation code.
11134 The returned result should not be overwritten. OP is the rtx of the
11135 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
11136 was shifted. */
11137 const char *
11138 arithmetic_instr (rtx op, int shift_first_arg)
11140 switch (GET_CODE (op))
11142 case PLUS:
11143 return "add";
11145 case MINUS:
11146 return shift_first_arg ? "rsb" : "sub";
11148 case IOR:
11149 return "orr";
11151 case XOR:
11152 return "eor";
11154 case AND:
11155 return "and";
11157 case ASHIFT:
11158 case ASHIFTRT:
11159 case LSHIFTRT:
11160 case ROTATERT:
11161 return arm_shift_nmem(GET_CODE(op));
11163 default:
11164 gcc_unreachable ();
11168 /* Ensure valid constant shifts and return the appropriate shift mnemonic
11169 for the operation code. The returned result should not be overwritten.
11170 OP is the rtx code of the shift.
11171 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
11172 shift. */
11173 static const char *
11174 shift_op (rtx op, HOST_WIDE_INT *amountp)
11176 const char * mnem;
11177 enum rtx_code code = GET_CODE (op);
11179 switch (GET_CODE (XEXP (op, 1)))
11181 case REG:
11182 case SUBREG:
11183 *amountp = -1;
11184 break;
11186 case CONST_INT:
11187 *amountp = INTVAL (XEXP (op, 1));
11188 break;
11190 default:
11191 gcc_unreachable ();
11194 switch (code)
11196 case ROTATE:
11197 gcc_assert (*amountp != -1);
11198 *amountp = 32 - *amountp;
11199 code = ROTATERT;
11201 /* Fall through. */
11203 case ASHIFT:
11204 case ASHIFTRT:
11205 case LSHIFTRT:
11206 case ROTATERT:
11207 mnem = arm_shift_nmem(code);
11208 break;
11210 case MULT:
11211 /* We never have to worry about the amount being other than a
11212 power of 2, since this case can never be reloaded from a reg. */
11213 gcc_assert (*amountp != -1);
11214 *amountp = int_log2 (*amountp);
11215 return ARM_LSL_NAME;
11217 default:
11218 gcc_unreachable ();
11221 if (*amountp != -1)
11223 /* This is not 100% correct, but follows from the desire to merge
11224 multiplication by a power of 2 with the recognizer for a
11225 shift. >=32 is not a valid shift for "lsl", so we must try and
11226 output a shift that produces the correct arithmetical result.
11227 Using lsr #32 is identical except for the fact that the carry bit
11228 is not set correctly if we set the flags; but we never use the
11229 carry bit from such an operation, so we can ignore that. */
11230 if (code == ROTATERT)
11231 /* Rotate is just modulo 32. */
11232 *amountp &= 31;
11233 else if (*amountp != (*amountp & 31))
11235 if (code == ASHIFT)
11236 mnem = "lsr";
11237 *amountp = 32;
11240 /* Shifts of 0 are no-ops. */
11241 if (*amountp == 0)
11242 return NULL;
11245 return mnem;
11248 /* Obtain the shift from the POWER of two. */
11250 static HOST_WIDE_INT
11251 int_log2 (HOST_WIDE_INT power)
11253 HOST_WIDE_INT shift = 0;
11255 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
11257 gcc_assert (shift <= 31);
11258 shift++;
11261 return shift;
11264 /* Output a .ascii pseudo-op, keeping track of lengths. This is
11265 because /bin/as is horribly restrictive. The judgement about
11266 whether or not each character is 'printable' (and can be output as
11267 is) or not (and must be printed with an octal escape) must be made
11268 with reference to the *host* character set -- the situation is
11269 similar to that discussed in the comments above pp_c_char in
11270 c-pretty-print.c. */
11272 #define MAX_ASCII_LEN 51
11274 void
11275 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
11277 int i;
11278 int len_so_far = 0;
11280 fputs ("\t.ascii\t\"", stream);
11282 for (i = 0; i < len; i++)
11284 int c = p[i];
11286 if (len_so_far >= MAX_ASCII_LEN)
11288 fputs ("\"\n\t.ascii\t\"", stream);
11289 len_so_far = 0;
11292 if (ISPRINT (c))
11294 if (c == '\\' || c == '\"')
11296 putc ('\\', stream);
11297 len_so_far++;
11299 putc (c, stream);
11300 len_so_far++;
11302 else
11304 fprintf (stream, "\\%03o", c);
11305 len_so_far += 4;
11309 fputs ("\"\n", stream);
11312 /* Compute the register save mask for registers 0 through 12
11313 inclusive. This code is used by arm_compute_save_reg_mask. */
11315 static unsigned long
11316 arm_compute_save_reg0_reg12_mask (void)
11318 unsigned long func_type = arm_current_func_type ();
11319 unsigned long save_reg_mask = 0;
11320 unsigned int reg;
11322 if (IS_INTERRUPT (func_type))
11324 unsigned int max_reg;
11325 /* Interrupt functions must not corrupt any registers,
11326 even call clobbered ones. If this is a leaf function
11327 we can just examine the registers used by the RTL, but
11328 otherwise we have to assume that whatever function is
11329 called might clobber anything, and so we have to save
11330 all the call-clobbered registers as well. */
11331 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
11332 /* FIQ handlers have registers r8 - r12 banked, so
11333 we only need to check r0 - r7, Normal ISRs only
11334 bank r14 and r15, so we must check up to r12.
11335 r13 is the stack pointer which is always preserved,
11336 so we do not need to consider it here. */
11337 max_reg = 7;
11338 else
11339 max_reg = 12;
11341 for (reg = 0; reg <= max_reg; reg++)
11342 if (df_regs_ever_live_p (reg)
11343 || (! current_function_is_leaf && call_used_regs[reg]))
11344 save_reg_mask |= (1 << reg);
11346 /* Also save the pic base register if necessary. */
11347 if (flag_pic
11348 && !TARGET_SINGLE_PIC_BASE
11349 && arm_pic_register != INVALID_REGNUM
11350 && crtl->uses_pic_offset_table)
11351 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11353 else
11355 /* In the normal case we only need to save those registers
11356 which are call saved and which are used by this function. */
11357 for (reg = 0; reg <= 11; reg++)
11358 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
11359 save_reg_mask |= (1 << reg);
11361 /* Handle the frame pointer as a special case. */
11362 if (frame_pointer_needed)
11363 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
11365 /* If we aren't loading the PIC register,
11366 don't stack it even though it may be live. */
11367 if (flag_pic
11368 && !TARGET_SINGLE_PIC_BASE
11369 && arm_pic_register != INVALID_REGNUM
11370 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
11371 || crtl->uses_pic_offset_table))
11372 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11374 /* The prologue will copy SP into R0, so save it. */
11375 if (IS_STACKALIGN (func_type))
11376 save_reg_mask |= 1;
11379 /* Save registers so the exception handler can modify them. */
11380 if (crtl->calls_eh_return)
11382 unsigned int i;
11384 for (i = 0; ; i++)
11386 reg = EH_RETURN_DATA_REGNO (i);
11387 if (reg == INVALID_REGNUM)
11388 break;
11389 save_reg_mask |= 1 << reg;
11393 return save_reg_mask;
11397 /* Compute the number of bytes used to store the static chain register on the
11398 stack, above the stack frame. We need to know this accurately to get the
11399 alignment of the rest of the stack frame correct. */
11401 static int arm_compute_static_chain_stack_bytes (void)
11403 unsigned long func_type = arm_current_func_type ();
11404 int static_chain_stack_bytes = 0;
11406 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
11407 IS_NESTED (func_type) &&
11408 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
11409 static_chain_stack_bytes = 4;
11411 return static_chain_stack_bytes;
11415 /* Compute a bit mask of which registers need to be
11416 saved on the stack for the current function.
11417 This is used by arm_get_frame_offsets, which may add extra registers. */
11419 static unsigned long
11420 arm_compute_save_reg_mask (void)
11422 unsigned int save_reg_mask = 0;
11423 unsigned long func_type = arm_current_func_type ();
11424 unsigned int reg;
11426 if (IS_NAKED (func_type))
11427 /* This should never really happen. */
11428 return 0;
11430 /* If we are creating a stack frame, then we must save the frame pointer,
11431 IP (which will hold the old stack pointer), LR and the PC. */
11432 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11433 save_reg_mask |=
11434 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
11435 | (1 << IP_REGNUM)
11436 | (1 << LR_REGNUM)
11437 | (1 << PC_REGNUM);
11439 /* Volatile functions do not return, so there
11440 is no need to save any other registers. */
11441 if (IS_VOLATILE (func_type))
11442 return save_reg_mask;
11444 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
11446 /* Decide if we need to save the link register.
11447 Interrupt routines have their own banked link register,
11448 so they never need to save it.
11449 Otherwise if we do not use the link register we do not need to save
11450 it. If we are pushing other registers onto the stack however, we
11451 can save an instruction in the epilogue by pushing the link register
11452 now and then popping it back into the PC. This incurs extra memory
11453 accesses though, so we only do it when optimizing for size, and only
11454 if we know that we will not need a fancy return sequence. */
11455 if (df_regs_ever_live_p (LR_REGNUM)
11456 || (save_reg_mask
11457 && optimize_size
11458 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11459 && !crtl->calls_eh_return))
11460 save_reg_mask |= 1 << LR_REGNUM;
11462 if (cfun->machine->lr_save_eliminated)
11463 save_reg_mask &= ~ (1 << LR_REGNUM);
11465 if (TARGET_REALLY_IWMMXT
11466 && ((bit_count (save_reg_mask)
11467 + ARM_NUM_INTS (crtl->args.pretend_args_size +
11468 arm_compute_static_chain_stack_bytes())
11469 ) % 2) != 0)
11471 /* The total number of registers that are going to be pushed
11472 onto the stack is odd. We need to ensure that the stack
11473 is 64-bit aligned before we start to save iWMMXt registers,
11474 and also before we start to create locals. (A local variable
11475 might be a double or long long which we will load/store using
11476 an iWMMXt instruction). Therefore we need to push another
11477 ARM register, so that the stack will be 64-bit aligned. We
11478 try to avoid using the arg registers (r0 -r3) as they might be
11479 used to pass values in a tail call. */
11480 for (reg = 4; reg <= 12; reg++)
11481 if ((save_reg_mask & (1 << reg)) == 0)
11482 break;
11484 if (reg <= 12)
11485 save_reg_mask |= (1 << reg);
11486 else
11488 cfun->machine->sibcall_blocked = 1;
11489 save_reg_mask |= (1 << 3);
11493 /* We may need to push an additional register for use initializing the
11494 PIC base register. */
11495 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
11496 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
11498 reg = thumb_find_work_register (1 << 4);
11499 if (!call_used_regs[reg])
11500 save_reg_mask |= (1 << reg);
11503 return save_reg_mask;
11507 /* Compute a bit mask of which registers need to be
11508 saved on the stack for the current function. */
11509 static unsigned long
11510 thumb1_compute_save_reg_mask (void)
11512 unsigned long mask;
11513 unsigned reg;
11515 mask = 0;
11516 for (reg = 0; reg < 12; reg ++)
11517 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11518 mask |= 1 << reg;
11520 if (flag_pic
11521 && !TARGET_SINGLE_PIC_BASE
11522 && arm_pic_register != INVALID_REGNUM
11523 && crtl->uses_pic_offset_table)
11524 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11526 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
11527 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
11528 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
11530 /* LR will also be pushed if any lo regs are pushed. */
11531 if (mask & 0xff || thumb_force_lr_save ())
11532 mask |= (1 << LR_REGNUM);
11534 /* Make sure we have a low work register if we need one.
11535 We will need one if we are going to push a high register,
11536 but we are not currently intending to push a low register. */
11537 if ((mask & 0xff) == 0
11538 && ((mask & 0x0f00) || TARGET_BACKTRACE))
11540 /* Use thumb_find_work_register to choose which register
11541 we will use. If the register is live then we will
11542 have to push it. Use LAST_LO_REGNUM as our fallback
11543 choice for the register to select. */
11544 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
11545 /* Make sure the register returned by thumb_find_work_register is
11546 not part of the return value. */
11547 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
11548 reg = LAST_LO_REGNUM;
11550 if (! call_used_regs[reg])
11551 mask |= 1 << reg;
11554 /* The 504 below is 8 bytes less than 512 because there are two possible
11555 alignment words. We can't tell here if they will be present or not so we
11556 have to play it safe and assume that they are. */
11557 if ((CALLER_INTERWORKING_SLOT_SIZE +
11558 ROUND_UP_WORD (get_frame_size ()) +
11559 crtl->outgoing_args_size) >= 504)
11561 /* This is the same as the code in thumb1_expand_prologue() which
11562 determines which register to use for stack decrement. */
11563 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
11564 if (mask & (1 << reg))
11565 break;
11567 if (reg > LAST_LO_REGNUM)
11569 /* Make sure we have a register available for stack decrement. */
11570 mask |= 1 << LAST_LO_REGNUM;
11574 return mask;
11578 /* Return the number of bytes required to save VFP registers. */
11579 static int
11580 arm_get_vfp_saved_size (void)
11582 unsigned int regno;
11583 int count;
11584 int saved;
11586 saved = 0;
11587 /* Space for saved VFP registers. */
11588 if (TARGET_HARD_FLOAT && TARGET_VFP)
11590 count = 0;
11591 for (regno = FIRST_VFP_REGNUM;
11592 regno < LAST_VFP_REGNUM;
11593 regno += 2)
11595 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
11596 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11598 if (count > 0)
11600 /* Workaround ARM10 VFPr1 bug. */
11601 if (count == 2 && !arm_arch6)
11602 count++;
11603 saved += count * 8;
11605 count = 0;
11607 else
11608 count++;
11610 if (count > 0)
11612 if (count == 2 && !arm_arch6)
11613 count++;
11614 saved += count * 8;
11617 return saved;
11621 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11622 everything bar the final return instruction. */
11623 const char *
11624 output_return_instruction (rtx operand, int really_return, int reverse)
11626 char conditional[10];
11627 char instr[100];
11628 unsigned reg;
11629 unsigned long live_regs_mask;
11630 unsigned long func_type;
11631 arm_stack_offsets *offsets;
11633 func_type = arm_current_func_type ();
11635 if (IS_NAKED (func_type))
11636 return "";
11638 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11640 /* If this function was declared non-returning, and we have
11641 found a tail call, then we have to trust that the called
11642 function won't return. */
11643 if (really_return)
11645 rtx ops[2];
11647 /* Otherwise, trap an attempted return by aborting. */
11648 ops[0] = operand;
11649 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11650 : "abort");
11651 assemble_external_libcall (ops[1]);
11652 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11655 return "";
11658 gcc_assert (!cfun->calls_alloca || really_return);
11660 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11662 cfun->machine->return_used_this_function = 1;
11664 offsets = arm_get_frame_offsets ();
11665 live_regs_mask = offsets->saved_regs_mask;
11667 if (live_regs_mask)
11669 const char * return_reg;
11671 /* If we do not have any special requirements for function exit
11672 (e.g. interworking) then we can load the return address
11673 directly into the PC. Otherwise we must load it into LR. */
11674 if (really_return
11675 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11676 return_reg = reg_names[PC_REGNUM];
11677 else
11678 return_reg = reg_names[LR_REGNUM];
11680 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11682 /* There are three possible reasons for the IP register
11683 being saved. 1) a stack frame was created, in which case
11684 IP contains the old stack pointer, or 2) an ISR routine
11685 corrupted it, or 3) it was saved to align the stack on
11686 iWMMXt. In case 1, restore IP into SP, otherwise just
11687 restore IP. */
11688 if (frame_pointer_needed)
11690 live_regs_mask &= ~ (1 << IP_REGNUM);
11691 live_regs_mask |= (1 << SP_REGNUM);
11693 else
11694 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11697 /* On some ARM architectures it is faster to use LDR rather than
11698 LDM to load a single register. On other architectures, the
11699 cost is the same. In 26 bit mode, or for exception handlers,
11700 we have to use LDM to load the PC so that the CPSR is also
11701 restored. */
11702 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11703 if (live_regs_mask == (1U << reg))
11704 break;
11706 if (reg <= LAST_ARM_REGNUM
11707 && (reg != LR_REGNUM
11708 || ! really_return
11709 || ! IS_INTERRUPT (func_type)))
11711 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11712 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11714 else
11716 char *p;
11717 int first = 1;
11719 /* Generate the load multiple instruction to restore the
11720 registers. Note we can get here, even if
11721 frame_pointer_needed is true, but only if sp already
11722 points to the base of the saved core registers. */
11723 if (live_regs_mask & (1 << SP_REGNUM))
11725 unsigned HOST_WIDE_INT stack_adjust;
11727 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11728 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11730 if (stack_adjust && arm_arch5 && TARGET_ARM)
11731 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11732 else
11734 /* If we can't use ldmib (SA110 bug),
11735 then try to pop r3 instead. */
11736 if (stack_adjust)
11737 live_regs_mask |= 1 << 3;
11738 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11741 else
11742 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11744 p = instr + strlen (instr);
11746 for (reg = 0; reg <= SP_REGNUM; reg++)
11747 if (live_regs_mask & (1 << reg))
11749 int l = strlen (reg_names[reg]);
11751 if (first)
11752 first = 0;
11753 else
11755 memcpy (p, ", ", 2);
11756 p += 2;
11759 memcpy (p, "%|", 2);
11760 memcpy (p + 2, reg_names[reg], l);
11761 p += l + 2;
11764 if (live_regs_mask & (1 << LR_REGNUM))
11766 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11767 /* If returning from an interrupt, restore the CPSR. */
11768 if (IS_INTERRUPT (func_type))
11769 strcat (p, "^");
11771 else
11772 strcpy (p, "}");
11775 output_asm_insn (instr, & operand);
11777 /* See if we need to generate an extra instruction to
11778 perform the actual function return. */
11779 if (really_return
11780 && func_type != ARM_FT_INTERWORKED
11781 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11783 /* The return has already been handled
11784 by loading the LR into the PC. */
11785 really_return = 0;
11789 if (really_return)
11791 switch ((int) ARM_FUNC_TYPE (func_type))
11793 case ARM_FT_ISR:
11794 case ARM_FT_FIQ:
11795 /* ??? This is wrong for unified assembly syntax. */
11796 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11797 break;
11799 case ARM_FT_INTERWORKED:
11800 sprintf (instr, "bx%s\t%%|lr", conditional);
11801 break;
11803 case ARM_FT_EXCEPTION:
11804 /* ??? This is wrong for unified assembly syntax. */
11805 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11806 break;
11808 default:
11809 /* Use bx if it's available. */
11810 if (arm_arch5 || arm_arch4t)
11811 sprintf (instr, "bx%s\t%%|lr", conditional);
11812 else
11813 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11814 break;
11817 output_asm_insn (instr, & operand);
11820 return "";
11823 /* Write the function name into the code section, directly preceding
11824 the function prologue.
11826 Code will be output similar to this:
11828 .ascii "arm_poke_function_name", 0
11829 .align
11831 .word 0xff000000 + (t1 - t0)
11832 arm_poke_function_name
11833 mov ip, sp
11834 stmfd sp!, {fp, ip, lr, pc}
11835 sub fp, ip, #4
11837 When performing a stack backtrace, code can inspect the value
11838 of 'pc' stored at 'fp' + 0. If the trace function then looks
11839 at location pc - 12 and the top 8 bits are set, then we know
11840 that there is a function name embedded immediately preceding this
11841 location and has length ((pc[-3]) & 0xff000000).
11843 We assume that pc is declared as a pointer to an unsigned long.
11845 It is of no benefit to output the function name if we are assembling
11846 a leaf function. These function types will not contain a stack
11847 backtrace structure, therefore it is not possible to determine the
11848 function name. */
11849 void
11850 arm_poke_function_name (FILE *stream, const char *name)
11852 unsigned long alignlength;
11853 unsigned long length;
11854 rtx x;
11856 length = strlen (name) + 1;
11857 alignlength = ROUND_UP_WORD (length);
11859 ASM_OUTPUT_ASCII (stream, name, length);
11860 ASM_OUTPUT_ALIGN (stream, 2);
11861 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11862 assemble_aligned_integer (UNITS_PER_WORD, x);
11865 /* Place some comments into the assembler stream
11866 describing the current function. */
11867 static void
11868 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11870 unsigned long func_type;
11872 if (TARGET_THUMB1)
11874 thumb1_output_function_prologue (f, frame_size);
11875 return;
11878 /* Sanity check. */
11879 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11881 func_type = arm_current_func_type ();
11883 switch ((int) ARM_FUNC_TYPE (func_type))
11885 default:
11886 case ARM_FT_NORMAL:
11887 break;
11888 case ARM_FT_INTERWORKED:
11889 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11890 break;
11891 case ARM_FT_ISR:
11892 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11893 break;
11894 case ARM_FT_FIQ:
11895 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11896 break;
11897 case ARM_FT_EXCEPTION:
11898 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11899 break;
11902 if (IS_NAKED (func_type))
11903 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11905 if (IS_VOLATILE (func_type))
11906 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11908 if (IS_NESTED (func_type))
11909 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11910 if (IS_STACKALIGN (func_type))
11911 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11913 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11914 crtl->args.size,
11915 crtl->args.pretend_args_size, frame_size);
11917 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11918 frame_pointer_needed,
11919 cfun->machine->uses_anonymous_args);
11921 if (cfun->machine->lr_save_eliminated)
11922 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11924 if (crtl->calls_eh_return)
11925 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11929 const char *
11930 arm_output_epilogue (rtx sibling)
11932 int reg;
11933 unsigned long saved_regs_mask;
11934 unsigned long func_type;
11935 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11936 frame that is $fp + 4 for a non-variadic function. */
11937 int floats_offset = 0;
11938 rtx operands[3];
11939 FILE * f = asm_out_file;
11940 unsigned int lrm_count = 0;
11941 int really_return = (sibling == NULL);
11942 int start_reg;
11943 arm_stack_offsets *offsets;
11945 /* If we have already generated the return instruction
11946 then it is futile to generate anything else. */
11947 if (use_return_insn (FALSE, sibling) &&
11948 (cfun->machine->return_used_this_function != 0))
11949 return "";
11951 func_type = arm_current_func_type ();
11953 if (IS_NAKED (func_type))
11954 /* Naked functions don't have epilogues. */
11955 return "";
11957 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11959 rtx op;
11961 /* A volatile function should never return. Call abort. */
11962 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11963 assemble_external_libcall (op);
11964 output_asm_insn ("bl\t%a0", &op);
11966 return "";
11969 /* If we are throwing an exception, then we really must be doing a
11970 return, so we can't tail-call. */
11971 gcc_assert (!crtl->calls_eh_return || really_return);
11973 offsets = arm_get_frame_offsets ();
11974 saved_regs_mask = offsets->saved_regs_mask;
11976 if (TARGET_IWMMXT)
11977 lrm_count = bit_count (saved_regs_mask);
11979 floats_offset = offsets->saved_args;
11980 /* Compute how far away the floats will be. */
11981 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11982 if (saved_regs_mask & (1 << reg))
11983 floats_offset += 4;
11985 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11987 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11988 int vfp_offset = offsets->frame;
11990 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11992 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11993 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11995 floats_offset += 12;
11996 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11997 reg, FP_REGNUM, floats_offset - vfp_offset);
12000 else
12002 start_reg = LAST_FPA_REGNUM;
12004 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12006 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12008 floats_offset += 12;
12010 /* We can't unstack more than four registers at once. */
12011 if (start_reg - reg == 3)
12013 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
12014 reg, FP_REGNUM, floats_offset - vfp_offset);
12015 start_reg = reg - 1;
12018 else
12020 if (reg != start_reg)
12021 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12022 reg + 1, start_reg - reg,
12023 FP_REGNUM, floats_offset - vfp_offset);
12024 start_reg = reg - 1;
12028 /* Just in case the last register checked also needs unstacking. */
12029 if (reg != start_reg)
12030 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12031 reg + 1, start_reg - reg,
12032 FP_REGNUM, floats_offset - vfp_offset);
12035 if (TARGET_HARD_FLOAT && TARGET_VFP)
12037 int saved_size;
12039 /* The fldmd insns do not have base+offset addressing
12040 modes, so we use IP to hold the address. */
12041 saved_size = arm_get_vfp_saved_size ();
12043 if (saved_size > 0)
12045 floats_offset += saved_size;
12046 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
12047 FP_REGNUM, floats_offset - vfp_offset);
12049 start_reg = FIRST_VFP_REGNUM;
12050 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12052 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12053 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12055 if (start_reg != reg)
12056 vfp_output_fldmd (f, IP_REGNUM,
12057 (start_reg - FIRST_VFP_REGNUM) / 2,
12058 (reg - start_reg) / 2);
12059 start_reg = reg + 2;
12062 if (start_reg != reg)
12063 vfp_output_fldmd (f, IP_REGNUM,
12064 (start_reg - FIRST_VFP_REGNUM) / 2,
12065 (reg - start_reg) / 2);
12068 if (TARGET_IWMMXT)
12070 /* The frame pointer is guaranteed to be non-double-word aligned.
12071 This is because it is set to (old_stack_pointer - 4) and the
12072 old_stack_pointer was double word aligned. Thus the offset to
12073 the iWMMXt registers to be loaded must also be non-double-word
12074 sized, so that the resultant address *is* double-word aligned.
12075 We can ignore floats_offset since that was already included in
12076 the live_regs_mask. */
12077 lrm_count += (lrm_count % 2 ? 2 : 1);
12079 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12080 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12082 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
12083 reg, FP_REGNUM, lrm_count * 4);
12084 lrm_count += 2;
12088 /* saved_regs_mask should contain the IP, which at the time of stack
12089 frame generation actually contains the old stack pointer. So a
12090 quick way to unwind the stack is just pop the IP register directly
12091 into the stack pointer. */
12092 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
12093 saved_regs_mask &= ~ (1 << IP_REGNUM);
12094 saved_regs_mask |= (1 << SP_REGNUM);
12096 /* There are two registers left in saved_regs_mask - LR and PC. We
12097 only need to restore the LR register (the return address), but to
12098 save time we can load it directly into the PC, unless we need a
12099 special function exit sequence, or we are not really returning. */
12100 if (really_return
12101 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12102 && !crtl->calls_eh_return)
12103 /* Delete the LR from the register mask, so that the LR on
12104 the stack is loaded into the PC in the register mask. */
12105 saved_regs_mask &= ~ (1 << LR_REGNUM);
12106 else
12107 saved_regs_mask &= ~ (1 << PC_REGNUM);
12109 /* We must use SP as the base register, because SP is one of the
12110 registers being restored. If an interrupt or page fault
12111 happens in the ldm instruction, the SP might or might not
12112 have been restored. That would be bad, as then SP will no
12113 longer indicate the safe area of stack, and we can get stack
12114 corruption. Using SP as the base register means that it will
12115 be reset correctly to the original value, should an interrupt
12116 occur. If the stack pointer already points at the right
12117 place, then omit the subtraction. */
12118 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
12119 || cfun->calls_alloca)
12120 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
12121 4 * bit_count (saved_regs_mask));
12122 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
12124 if (IS_INTERRUPT (func_type))
12125 /* Interrupt handlers will have pushed the
12126 IP onto the stack, so restore it now. */
12127 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
12129 else
12131 /* This branch is executed for ARM mode (non-apcs frames) and
12132 Thumb-2 mode. Frame layout is essentially the same for those
12133 cases, except that in ARM mode frame pointer points to the
12134 first saved register, while in Thumb-2 mode the frame pointer points
12135 to the last saved register.
12137 It is possible to make frame pointer point to last saved
12138 register in both cases, and remove some conditionals below.
12139 That means that fp setup in prologue would be just "mov fp, sp"
12140 and sp restore in epilogue would be just "mov sp, fp", whereas
12141 now we have to use add/sub in those cases. However, the value
12142 of that would be marginal, as both mov and add/sub are 32-bit
12143 in ARM mode, and it would require extra conditionals
12144 in arm_expand_prologue to distingish ARM-apcs-frame case
12145 (where frame pointer is required to point at first register)
12146 and ARM-non-apcs-frame. Therefore, such change is postponed
12147 until real need arise. */
12148 unsigned HOST_WIDE_INT amount;
12149 int rfe;
12150 /* Restore stack pointer if necessary. */
12151 if (TARGET_ARM && frame_pointer_needed)
12153 operands[0] = stack_pointer_rtx;
12154 operands[1] = hard_frame_pointer_rtx;
12156 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
12157 output_add_immediate (operands);
12159 else
12161 if (frame_pointer_needed)
12163 /* For Thumb-2 restore sp from the frame pointer.
12164 Operand restrictions mean we have to incrememnt FP, then copy
12165 to SP. */
12166 amount = offsets->locals_base - offsets->saved_regs;
12167 operands[0] = hard_frame_pointer_rtx;
12169 else
12171 unsigned long count;
12172 operands[0] = stack_pointer_rtx;
12173 amount = offsets->outgoing_args - offsets->saved_regs;
12174 /* pop call clobbered registers if it avoids a
12175 separate stack adjustment. */
12176 count = offsets->saved_regs - offsets->saved_args;
12177 if (optimize_size
12178 && count != 0
12179 && !crtl->calls_eh_return
12180 && bit_count(saved_regs_mask) * 4 == count
12181 && !IS_INTERRUPT (func_type)
12182 && !crtl->tail_call_emit)
12184 unsigned long mask;
12185 mask = (1 << (arm_size_return_regs() / 4)) - 1;
12186 mask ^= 0xf;
12187 mask &= ~saved_regs_mask;
12188 reg = 0;
12189 while (bit_count (mask) * 4 > amount)
12191 while ((mask & (1 << reg)) == 0)
12192 reg++;
12193 mask &= ~(1 << reg);
12195 if (bit_count (mask) * 4 == amount) {
12196 amount = 0;
12197 saved_regs_mask |= mask;
12202 if (amount)
12204 operands[1] = operands[0];
12205 operands[2] = GEN_INT (amount);
12206 output_add_immediate (operands);
12208 if (frame_pointer_needed)
12209 asm_fprintf (f, "\tmov\t%r, %r\n",
12210 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
12213 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12215 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12216 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12217 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
12218 reg, SP_REGNUM);
12220 else
12222 start_reg = FIRST_FPA_REGNUM;
12224 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12226 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12228 if (reg - start_reg == 3)
12230 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
12231 start_reg, SP_REGNUM);
12232 start_reg = reg + 1;
12235 else
12237 if (reg != start_reg)
12238 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12239 start_reg, reg - start_reg,
12240 SP_REGNUM);
12242 start_reg = reg + 1;
12246 /* Just in case the last register checked also needs unstacking. */
12247 if (reg != start_reg)
12248 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12249 start_reg, reg - start_reg, SP_REGNUM);
12252 if (TARGET_HARD_FLOAT && TARGET_VFP)
12254 start_reg = FIRST_VFP_REGNUM;
12255 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12257 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12258 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12260 if (start_reg != reg)
12261 vfp_output_fldmd (f, SP_REGNUM,
12262 (start_reg - FIRST_VFP_REGNUM) / 2,
12263 (reg - start_reg) / 2);
12264 start_reg = reg + 2;
12267 if (start_reg != reg)
12268 vfp_output_fldmd (f, SP_REGNUM,
12269 (start_reg - FIRST_VFP_REGNUM) / 2,
12270 (reg - start_reg) / 2);
12272 if (TARGET_IWMMXT)
12273 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
12274 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12275 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
12277 /* If we can, restore the LR into the PC. */
12278 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
12279 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
12280 && !IS_STACKALIGN (func_type)
12281 && really_return
12282 && crtl->args.pretend_args_size == 0
12283 && saved_regs_mask & (1 << LR_REGNUM)
12284 && !crtl->calls_eh_return)
12286 saved_regs_mask &= ~ (1 << LR_REGNUM);
12287 saved_regs_mask |= (1 << PC_REGNUM);
12288 rfe = IS_INTERRUPT (func_type);
12290 else
12291 rfe = 0;
12293 /* Load the registers off the stack. If we only have one register
12294 to load use the LDR instruction - it is faster. For Thumb-2
12295 always use pop and the assembler will pick the best instruction.*/
12296 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
12297 && !IS_INTERRUPT(func_type))
12299 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
12301 else if (saved_regs_mask)
12303 if (saved_regs_mask & (1 << SP_REGNUM))
12304 /* Note - write back to the stack register is not enabled
12305 (i.e. "ldmfd sp!..."). We know that the stack pointer is
12306 in the list of registers and if we add writeback the
12307 instruction becomes UNPREDICTABLE. */
12308 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
12309 rfe);
12310 else if (TARGET_ARM)
12311 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
12312 rfe);
12313 else
12314 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
12317 if (crtl->args.pretend_args_size)
12319 /* Unwind the pre-pushed regs. */
12320 operands[0] = operands[1] = stack_pointer_rtx;
12321 operands[2] = GEN_INT (crtl->args.pretend_args_size);
12322 output_add_immediate (operands);
12326 /* We may have already restored PC directly from the stack. */
12327 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
12328 return "";
12330 /* Stack adjustment for exception handler. */
12331 if (crtl->calls_eh_return)
12332 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
12333 ARM_EH_STACKADJ_REGNUM);
12335 /* Generate the return instruction. */
12336 switch ((int) ARM_FUNC_TYPE (func_type))
12338 case ARM_FT_ISR:
12339 case ARM_FT_FIQ:
12340 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
12341 break;
12343 case ARM_FT_EXCEPTION:
12344 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12345 break;
12347 case ARM_FT_INTERWORKED:
12348 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12349 break;
12351 default:
12352 if (IS_STACKALIGN (func_type))
12354 /* See comment in arm_expand_prologue. */
12355 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
12357 if (arm_arch5 || arm_arch4t)
12358 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12359 else
12360 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12361 break;
12364 return "";
12367 static void
12368 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
12369 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
12371 arm_stack_offsets *offsets;
12373 if (TARGET_THUMB1)
12375 int regno;
12377 /* Emit any call-via-reg trampolines that are needed for v4t support
12378 of call_reg and call_value_reg type insns. */
12379 for (regno = 0; regno < LR_REGNUM; regno++)
12381 rtx label = cfun->machine->call_via[regno];
12383 if (label != NULL)
12385 switch_to_section (function_section (current_function_decl));
12386 targetm.asm_out.internal_label (asm_out_file, "L",
12387 CODE_LABEL_NUMBER (label));
12388 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
12392 /* ??? Probably not safe to set this here, since it assumes that a
12393 function will be emitted as assembly immediately after we generate
12394 RTL for it. This does not happen for inline functions. */
12395 cfun->machine->return_used_this_function = 0;
12397 else /* TARGET_32BIT */
12399 /* We need to take into account any stack-frame rounding. */
12400 offsets = arm_get_frame_offsets ();
12402 gcc_assert (!use_return_insn (FALSE, NULL)
12403 || (cfun->machine->return_used_this_function != 0)
12404 || offsets->saved_regs == offsets->outgoing_args
12405 || frame_pointer_needed);
12407 /* Reset the ARM-specific per-function variables. */
12408 after_arm_reorg = 0;
12412 /* Generate and emit an insn that we will recognize as a push_multi.
12413 Unfortunately, since this insn does not reflect very well the actual
12414 semantics of the operation, we need to annotate the insn for the benefit
12415 of DWARF2 frame unwind information. */
12416 static rtx
12417 emit_multi_reg_push (unsigned long mask)
12419 int num_regs = 0;
12420 int num_dwarf_regs;
12421 int i, j;
12422 rtx par;
12423 rtx dwarf;
12424 int dwarf_par_index;
12425 rtx tmp, reg;
12427 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12428 if (mask & (1 << i))
12429 num_regs++;
12431 gcc_assert (num_regs && num_regs <= 16);
12433 /* We don't record the PC in the dwarf frame information. */
12434 num_dwarf_regs = num_regs;
12435 if (mask & (1 << PC_REGNUM))
12436 num_dwarf_regs--;
12438 /* For the body of the insn we are going to generate an UNSPEC in
12439 parallel with several USEs. This allows the insn to be recognized
12440 by the push_multi pattern in the arm.md file. The insn looks
12441 something like this:
12443 (parallel [
12444 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
12445 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
12446 (use (reg:SI 11 fp))
12447 (use (reg:SI 12 ip))
12448 (use (reg:SI 14 lr))
12449 (use (reg:SI 15 pc))
12452 For the frame note however, we try to be more explicit and actually
12453 show each register being stored into the stack frame, plus a (single)
12454 decrement of the stack pointer. We do it this way in order to be
12455 friendly to the stack unwinding code, which only wants to see a single
12456 stack decrement per instruction. The RTL we generate for the note looks
12457 something like this:
12459 (sequence [
12460 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
12461 (set (mem:SI (reg:SI sp)) (reg:SI r4))
12462 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
12463 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
12464 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
12467 This sequence is used both by the code to support stack unwinding for
12468 exceptions handlers and the code to generate dwarf2 frame debugging. */
12470 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
12471 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
12472 dwarf_par_index = 1;
12474 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12476 if (mask & (1 << i))
12478 reg = gen_rtx_REG (SImode, i);
12480 XVECEXP (par, 0, 0)
12481 = gen_rtx_SET (VOIDmode,
12482 gen_frame_mem (BLKmode,
12483 gen_rtx_PRE_DEC (BLKmode,
12484 stack_pointer_rtx)),
12485 gen_rtx_UNSPEC (BLKmode,
12486 gen_rtvec (1, reg),
12487 UNSPEC_PUSH_MULT));
12489 if (i != PC_REGNUM)
12491 tmp = gen_rtx_SET (VOIDmode,
12492 gen_frame_mem (SImode, stack_pointer_rtx),
12493 reg);
12494 RTX_FRAME_RELATED_P (tmp) = 1;
12495 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
12496 dwarf_par_index++;
12499 break;
12503 for (j = 1, i++; j < num_regs; i++)
12505 if (mask & (1 << i))
12507 reg = gen_rtx_REG (SImode, i);
12509 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
12511 if (i != PC_REGNUM)
12514 = gen_rtx_SET (VOIDmode,
12515 gen_frame_mem (SImode,
12516 plus_constant (stack_pointer_rtx,
12517 4 * j)),
12518 reg);
12519 RTX_FRAME_RELATED_P (tmp) = 1;
12520 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
12523 j++;
12527 par = emit_insn (par);
12529 tmp = gen_rtx_SET (VOIDmode,
12530 stack_pointer_rtx,
12531 plus_constant (stack_pointer_rtx, -4 * num_regs));
12532 RTX_FRAME_RELATED_P (tmp) = 1;
12533 XVECEXP (dwarf, 0, 0) = tmp;
12535 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12537 return par;
12540 /* Calculate the size of the return value that is passed in registers. */
12541 static unsigned
12542 arm_size_return_regs (void)
12544 enum machine_mode mode;
12546 if (crtl->return_rtx != 0)
12547 mode = GET_MODE (crtl->return_rtx);
12548 else
12549 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12551 return GET_MODE_SIZE (mode);
12554 static rtx
12555 emit_sfm (int base_reg, int count)
12557 rtx par;
12558 rtx dwarf;
12559 rtx tmp, reg;
12560 int i;
12562 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12563 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12565 reg = gen_rtx_REG (XFmode, base_reg++);
12567 XVECEXP (par, 0, 0)
12568 = gen_rtx_SET (VOIDmode,
12569 gen_frame_mem (BLKmode,
12570 gen_rtx_PRE_DEC (BLKmode,
12571 stack_pointer_rtx)),
12572 gen_rtx_UNSPEC (BLKmode,
12573 gen_rtvec (1, reg),
12574 UNSPEC_PUSH_MULT));
12575 tmp = gen_rtx_SET (VOIDmode,
12576 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
12577 RTX_FRAME_RELATED_P (tmp) = 1;
12578 XVECEXP (dwarf, 0, 1) = tmp;
12580 for (i = 1; i < count; i++)
12582 reg = gen_rtx_REG (XFmode, base_reg++);
12583 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12585 tmp = gen_rtx_SET (VOIDmode,
12586 gen_frame_mem (XFmode,
12587 plus_constant (stack_pointer_rtx,
12588 i * 12)),
12589 reg);
12590 RTX_FRAME_RELATED_P (tmp) = 1;
12591 XVECEXP (dwarf, 0, i + 1) = tmp;
12594 tmp = gen_rtx_SET (VOIDmode,
12595 stack_pointer_rtx,
12596 plus_constant (stack_pointer_rtx, -12 * count));
12598 RTX_FRAME_RELATED_P (tmp) = 1;
12599 XVECEXP (dwarf, 0, 0) = tmp;
12601 par = emit_insn (par);
12602 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12604 return par;
12608 /* Return true if the current function needs to save/restore LR. */
12610 static bool
12611 thumb_force_lr_save (void)
12613 return !cfun->machine->lr_save_eliminated
12614 && (!leaf_function_p ()
12615 || thumb_far_jump_used_p ()
12616 || df_regs_ever_live_p (LR_REGNUM));
12620 /* Compute the distance from register FROM to register TO.
12621 These can be the arg pointer (26), the soft frame pointer (25),
12622 the stack pointer (13) or the hard frame pointer (11).
12623 In thumb mode r7 is used as the soft frame pointer, if needed.
12624 Typical stack layout looks like this:
12626 old stack pointer -> | |
12627 ----
12628 | | \
12629 | | saved arguments for
12630 | | vararg functions
12631 | | /
12633 hard FP & arg pointer -> | | \
12634 | | stack
12635 | | frame
12636 | | /
12638 | | \
12639 | | call saved
12640 | | registers
12641 soft frame pointer -> | | /
12643 | | \
12644 | | local
12645 | | variables
12646 locals base pointer -> | | /
12648 | | \
12649 | | outgoing
12650 | | arguments
12651 current stack pointer -> | | /
12654 For a given function some or all of these stack components
12655 may not be needed, giving rise to the possibility of
12656 eliminating some of the registers.
12658 The values returned by this function must reflect the behavior
12659 of arm_expand_prologue() and arm_compute_save_reg_mask().
12661 The sign of the number returned reflects the direction of stack
12662 growth, so the values are positive for all eliminations except
12663 from the soft frame pointer to the hard frame pointer.
12665 SFP may point just inside the local variables block to ensure correct
12666 alignment. */
12669 /* Calculate stack offsets. These are used to calculate register elimination
12670 offsets and in prologue/epilogue code. Also calculates which registers
12671 should be saved. */
12673 static arm_stack_offsets *
12674 arm_get_frame_offsets (void)
12676 struct arm_stack_offsets *offsets;
12677 unsigned long func_type;
12678 int leaf;
12679 int saved;
12680 int core_saved;
12681 HOST_WIDE_INT frame_size;
12682 int i;
12684 offsets = &cfun->machine->stack_offsets;
12686 /* We need to know if we are a leaf function. Unfortunately, it
12687 is possible to be called after start_sequence has been called,
12688 which causes get_insns to return the insns for the sequence,
12689 not the function, which will cause leaf_function_p to return
12690 the incorrect result.
12692 to know about leaf functions once reload has completed, and the
12693 frame size cannot be changed after that time, so we can safely
12694 use the cached value. */
12696 if (reload_completed)
12697 return offsets;
12699 /* Initially this is the size of the local variables. It will translated
12700 into an offset once we have determined the size of preceding data. */
12701 frame_size = ROUND_UP_WORD (get_frame_size ());
12703 leaf = leaf_function_p ();
12705 /* Space for variadic functions. */
12706 offsets->saved_args = crtl->args.pretend_args_size;
12708 /* In Thumb mode this is incorrect, but never used. */
12709 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12710 arm_compute_static_chain_stack_bytes();
12712 if (TARGET_32BIT)
12714 unsigned int regno;
12716 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12717 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12718 saved = core_saved;
12720 /* We know that SP will be doubleword aligned on entry, and we must
12721 preserve that condition at any subroutine call. We also require the
12722 soft frame pointer to be doubleword aligned. */
12724 if (TARGET_REALLY_IWMMXT)
12726 /* Check for the call-saved iWMMXt registers. */
12727 for (regno = FIRST_IWMMXT_REGNUM;
12728 regno <= LAST_IWMMXT_REGNUM;
12729 regno++)
12730 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12731 saved += 8;
12734 func_type = arm_current_func_type ();
12735 if (! IS_VOLATILE (func_type))
12737 /* Space for saved FPA registers. */
12738 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12739 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12740 saved += 12;
12742 /* Space for saved VFP registers. */
12743 if (TARGET_HARD_FLOAT && TARGET_VFP)
12744 saved += arm_get_vfp_saved_size ();
12747 else /* TARGET_THUMB1 */
12749 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12750 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12751 saved = core_saved;
12752 if (TARGET_BACKTRACE)
12753 saved += 16;
12756 /* Saved registers include the stack frame. */
12757 offsets->saved_regs = offsets->saved_args + saved +
12758 arm_compute_static_chain_stack_bytes();
12759 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12760 /* A leaf function does not need any stack alignment if it has nothing
12761 on the stack. */
12762 if (leaf && frame_size == 0)
12764 offsets->outgoing_args = offsets->soft_frame;
12765 offsets->locals_base = offsets->soft_frame;
12766 return offsets;
12769 /* Ensure SFP has the correct alignment. */
12770 if (ARM_DOUBLEWORD_ALIGN
12771 && (offsets->soft_frame & 7))
12773 offsets->soft_frame += 4;
12774 /* Try to align stack by pushing an extra reg. Don't bother doing this
12775 when there is a stack frame as the alignment will be rolled into
12776 the normal stack adjustment. */
12777 if (frame_size + crtl->outgoing_args_size == 0)
12779 int reg = -1;
12781 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12783 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12785 reg = i;
12786 break;
12790 if (reg == -1 && arm_size_return_regs () <= 12
12791 && !crtl->tail_call_emit)
12793 /* Push/pop an argument register (r3) if all callee saved
12794 registers are already being pushed. */
12795 reg = 3;
12798 if (reg != -1)
12800 offsets->saved_regs += 4;
12801 offsets->saved_regs_mask |= (1 << reg);
12806 offsets->locals_base = offsets->soft_frame + frame_size;
12807 offsets->outgoing_args = (offsets->locals_base
12808 + crtl->outgoing_args_size);
12810 if (ARM_DOUBLEWORD_ALIGN)
12812 /* Ensure SP remains doubleword aligned. */
12813 if (offsets->outgoing_args & 7)
12814 offsets->outgoing_args += 4;
12815 gcc_assert (!(offsets->outgoing_args & 7));
12818 return offsets;
12822 /* Calculate the relative offsets for the different stack pointers. Positive
12823 offsets are in the direction of stack growth. */
12825 HOST_WIDE_INT
12826 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12828 arm_stack_offsets *offsets;
12830 offsets = arm_get_frame_offsets ();
12832 /* OK, now we have enough information to compute the distances.
12833 There must be an entry in these switch tables for each pair
12834 of registers in ELIMINABLE_REGS, even if some of the entries
12835 seem to be redundant or useless. */
12836 switch (from)
12838 case ARG_POINTER_REGNUM:
12839 switch (to)
12841 case THUMB_HARD_FRAME_POINTER_REGNUM:
12842 return 0;
12844 case FRAME_POINTER_REGNUM:
12845 /* This is the reverse of the soft frame pointer
12846 to hard frame pointer elimination below. */
12847 return offsets->soft_frame - offsets->saved_args;
12849 case ARM_HARD_FRAME_POINTER_REGNUM:
12850 /* This is only non-zero in the case where the static chain register
12851 is stored above the frame. */
12852 return offsets->frame - offsets->saved_args - 4;
12854 case STACK_POINTER_REGNUM:
12855 /* If nothing has been pushed on the stack at all
12856 then this will return -4. This *is* correct! */
12857 return offsets->outgoing_args - (offsets->saved_args + 4);
12859 default:
12860 gcc_unreachable ();
12862 gcc_unreachable ();
12864 case FRAME_POINTER_REGNUM:
12865 switch (to)
12867 case THUMB_HARD_FRAME_POINTER_REGNUM:
12868 return 0;
12870 case ARM_HARD_FRAME_POINTER_REGNUM:
12871 /* The hard frame pointer points to the top entry in the
12872 stack frame. The soft frame pointer to the bottom entry
12873 in the stack frame. If there is no stack frame at all,
12874 then they are identical. */
12876 return offsets->frame - offsets->soft_frame;
12878 case STACK_POINTER_REGNUM:
12879 return offsets->outgoing_args - offsets->soft_frame;
12881 default:
12882 gcc_unreachable ();
12884 gcc_unreachable ();
12886 default:
12887 /* You cannot eliminate from the stack pointer.
12888 In theory you could eliminate from the hard frame
12889 pointer to the stack pointer, but this will never
12890 happen, since if a stack frame is not needed the
12891 hard frame pointer will never be used. */
12892 gcc_unreachable ();
12897 /* Emit RTL to save coprocessor registers on function entry. Returns the
12898 number of bytes pushed. */
12900 static int
12901 arm_save_coproc_regs(void)
12903 int saved_size = 0;
12904 unsigned reg;
12905 unsigned start_reg;
12906 rtx insn;
12908 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12909 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12911 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12912 insn = gen_rtx_MEM (V2SImode, insn);
12913 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12914 RTX_FRAME_RELATED_P (insn) = 1;
12915 saved_size += 8;
12918 /* Save any floating point call-saved registers used by this
12919 function. */
12920 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12922 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12923 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12925 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12926 insn = gen_rtx_MEM (XFmode, insn);
12927 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12928 RTX_FRAME_RELATED_P (insn) = 1;
12929 saved_size += 12;
12932 else
12934 start_reg = LAST_FPA_REGNUM;
12936 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12938 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12940 if (start_reg - reg == 3)
12942 insn = emit_sfm (reg, 4);
12943 RTX_FRAME_RELATED_P (insn) = 1;
12944 saved_size += 48;
12945 start_reg = reg - 1;
12948 else
12950 if (start_reg != reg)
12952 insn = emit_sfm (reg + 1, start_reg - reg);
12953 RTX_FRAME_RELATED_P (insn) = 1;
12954 saved_size += (start_reg - reg) * 12;
12956 start_reg = reg - 1;
12960 if (start_reg != reg)
12962 insn = emit_sfm (reg + 1, start_reg - reg);
12963 saved_size += (start_reg - reg) * 12;
12964 RTX_FRAME_RELATED_P (insn) = 1;
12967 if (TARGET_HARD_FLOAT && TARGET_VFP)
12969 start_reg = FIRST_VFP_REGNUM;
12971 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12973 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12974 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12976 if (start_reg != reg)
12977 saved_size += vfp_emit_fstmd (start_reg,
12978 (reg - start_reg) / 2);
12979 start_reg = reg + 2;
12982 if (start_reg != reg)
12983 saved_size += vfp_emit_fstmd (start_reg,
12984 (reg - start_reg) / 2);
12986 return saved_size;
12990 /* Set the Thumb frame pointer from the stack pointer. */
12992 static void
12993 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12995 HOST_WIDE_INT amount;
12996 rtx insn, dwarf;
12998 amount = offsets->outgoing_args - offsets->locals_base;
12999 if (amount < 1024)
13000 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13001 stack_pointer_rtx, GEN_INT (amount)));
13002 else
13004 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
13005 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
13006 expects the first two operands to be the same. */
13007 if (TARGET_THUMB2)
13009 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13010 stack_pointer_rtx,
13011 hard_frame_pointer_rtx));
13013 else
13015 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13016 hard_frame_pointer_rtx,
13017 stack_pointer_rtx));
13019 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
13020 plus_constant (stack_pointer_rtx, amount));
13021 RTX_FRAME_RELATED_P (dwarf) = 1;
13022 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13025 RTX_FRAME_RELATED_P (insn) = 1;
13028 /* Generate the prologue instructions for entry into an ARM or Thumb-2
13029 function. */
13030 void
13031 arm_expand_prologue (void)
13033 rtx amount;
13034 rtx insn;
13035 rtx ip_rtx;
13036 unsigned long live_regs_mask;
13037 unsigned long func_type;
13038 int fp_offset = 0;
13039 int saved_pretend_args = 0;
13040 int saved_regs = 0;
13041 unsigned HOST_WIDE_INT args_to_push;
13042 arm_stack_offsets *offsets;
13044 func_type = arm_current_func_type ();
13046 /* Naked functions don't have prologues. */
13047 if (IS_NAKED (func_type))
13048 return;
13050 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
13051 args_to_push = crtl->args.pretend_args_size;
13053 /* Compute which register we will have to save onto the stack. */
13054 offsets = arm_get_frame_offsets ();
13055 live_regs_mask = offsets->saved_regs_mask;
13057 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
13059 if (IS_STACKALIGN (func_type))
13061 rtx dwarf;
13062 rtx r0;
13063 rtx r1;
13064 /* Handle a word-aligned stack pointer. We generate the following:
13066 mov r0, sp
13067 bic r1, r0, #7
13068 mov sp, r1
13069 <save and restore r0 in normal prologue/epilogue>
13070 mov sp, r0
13071 bx lr
13073 The unwinder doesn't need to know about the stack realignment.
13074 Just tell it we saved SP in r0. */
13075 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
13077 r0 = gen_rtx_REG (SImode, 0);
13078 r1 = gen_rtx_REG (SImode, 1);
13079 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
13080 compiler won't choke. */
13081 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
13082 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
13083 insn = gen_movsi (r0, stack_pointer_rtx);
13084 RTX_FRAME_RELATED_P (insn) = 1;
13085 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13086 emit_insn (insn);
13087 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
13088 emit_insn (gen_movsi (stack_pointer_rtx, r1));
13091 /* For APCS frames, if IP register is clobbered
13092 when creating frame, save that register in a special
13093 way. */
13094 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13096 if (IS_INTERRUPT (func_type))
13098 /* Interrupt functions must not corrupt any registers.
13099 Creating a frame pointer however, corrupts the IP
13100 register, so we must push it first. */
13101 insn = emit_multi_reg_push (1 << IP_REGNUM);
13103 /* Do not set RTX_FRAME_RELATED_P on this insn.
13104 The dwarf stack unwinding code only wants to see one
13105 stack decrement per function, and this is not it. If
13106 this instruction is labeled as being part of the frame
13107 creation sequence then dwarf2out_frame_debug_expr will
13108 die when it encounters the assignment of IP to FP
13109 later on, since the use of SP here establishes SP as
13110 the CFA register and not IP.
13112 Anyway this instruction is not really part of the stack
13113 frame creation although it is part of the prologue. */
13115 else if (IS_NESTED (func_type))
13117 /* The Static chain register is the same as the IP register
13118 used as a scratch register during stack frame creation.
13119 To get around this need to find somewhere to store IP
13120 whilst the frame is being created. We try the following
13121 places in order:
13123 1. The last argument register.
13124 2. A slot on the stack above the frame. (This only
13125 works if the function is not a varargs function).
13126 3. Register r3, after pushing the argument registers
13127 onto the stack.
13129 Note - we only need to tell the dwarf2 backend about the SP
13130 adjustment in the second variant; the static chain register
13131 doesn't need to be unwound, as it doesn't contain a value
13132 inherited from the caller. */
13134 if (df_regs_ever_live_p (3) == false)
13135 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13136 else if (args_to_push == 0)
13138 rtx dwarf;
13140 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
13141 saved_regs += 4;
13143 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
13144 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
13145 fp_offset = 4;
13147 /* Just tell the dwarf backend that we adjusted SP. */
13148 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13149 plus_constant (stack_pointer_rtx,
13150 -fp_offset));
13151 RTX_FRAME_RELATED_P (insn) = 1;
13152 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13154 else
13156 /* Store the args on the stack. */
13157 if (cfun->machine->uses_anonymous_args)
13158 insn = emit_multi_reg_push
13159 ((0xf0 >> (args_to_push / 4)) & 0xf);
13160 else
13161 insn = emit_insn
13162 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13163 GEN_INT (- args_to_push)));
13165 RTX_FRAME_RELATED_P (insn) = 1;
13167 saved_pretend_args = 1;
13168 fp_offset = args_to_push;
13169 args_to_push = 0;
13171 /* Now reuse r3 to preserve IP. */
13172 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13176 insn = emit_set_insn (ip_rtx,
13177 plus_constant (stack_pointer_rtx, fp_offset));
13178 RTX_FRAME_RELATED_P (insn) = 1;
13181 if (args_to_push)
13183 /* Push the argument registers, or reserve space for them. */
13184 if (cfun->machine->uses_anonymous_args)
13185 insn = emit_multi_reg_push
13186 ((0xf0 >> (args_to_push / 4)) & 0xf);
13187 else
13188 insn = emit_insn
13189 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13190 GEN_INT (- args_to_push)));
13191 RTX_FRAME_RELATED_P (insn) = 1;
13194 /* If this is an interrupt service routine, and the link register
13195 is going to be pushed, and we're not generating extra
13196 push of IP (needed when frame is needed and frame layout if apcs),
13197 subtracting four from LR now will mean that the function return
13198 can be done with a single instruction. */
13199 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
13200 && (live_regs_mask & (1 << LR_REGNUM)) != 0
13201 && !(frame_pointer_needed && TARGET_APCS_FRAME)
13202 && TARGET_ARM)
13204 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
13206 emit_set_insn (lr, plus_constant (lr, -4));
13209 if (live_regs_mask)
13211 saved_regs += bit_count (live_regs_mask) * 4;
13212 if (optimize_size && !frame_pointer_needed
13213 && saved_regs == offsets->saved_regs - offsets->saved_args)
13215 /* If no coprocessor registers are being pushed and we don't have
13216 to worry about a frame pointer then push extra registers to
13217 create the stack frame. This is done is a way that does not
13218 alter the frame layout, so is independent of the epilogue. */
13219 int n;
13220 int frame;
13221 n = 0;
13222 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
13223 n++;
13224 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
13225 if (frame && n * 4 >= frame)
13227 n = frame / 4;
13228 live_regs_mask |= (1 << n) - 1;
13229 saved_regs += frame;
13232 insn = emit_multi_reg_push (live_regs_mask);
13233 RTX_FRAME_RELATED_P (insn) = 1;
13236 if (! IS_VOLATILE (func_type))
13237 saved_regs += arm_save_coproc_regs ();
13239 if (frame_pointer_needed && TARGET_ARM)
13241 /* Create the new frame pointer. */
13242 if (TARGET_APCS_FRAME)
13244 insn = GEN_INT (-(4 + args_to_push + fp_offset));
13245 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
13246 RTX_FRAME_RELATED_P (insn) = 1;
13248 if (IS_NESTED (func_type))
13250 /* Recover the static chain register. */
13251 if (!df_regs_ever_live_p (3)
13252 || saved_pretend_args)
13253 insn = gen_rtx_REG (SImode, 3);
13254 else /* if (crtl->args.pretend_args_size == 0) */
13256 insn = plus_constant (hard_frame_pointer_rtx, 4);
13257 insn = gen_frame_mem (SImode, insn);
13259 emit_set_insn (ip_rtx, insn);
13260 /* Add a USE to stop propagate_one_insn() from barfing. */
13261 emit_insn (gen_prologue_use (ip_rtx));
13264 else
13266 insn = GEN_INT (saved_regs - 4);
13267 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13268 stack_pointer_rtx, insn));
13269 RTX_FRAME_RELATED_P (insn) = 1;
13273 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
13275 /* This add can produce multiple insns for a large constant, so we
13276 need to get tricky. */
13277 rtx last = get_last_insn ();
13279 amount = GEN_INT (offsets->saved_args + saved_regs
13280 - offsets->outgoing_args);
13282 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13283 amount));
13286 last = last ? NEXT_INSN (last) : get_insns ();
13287 RTX_FRAME_RELATED_P (last) = 1;
13289 while (last != insn);
13291 /* If the frame pointer is needed, emit a special barrier that
13292 will prevent the scheduler from moving stores to the frame
13293 before the stack adjustment. */
13294 if (frame_pointer_needed)
13295 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
13296 hard_frame_pointer_rtx));
13300 if (frame_pointer_needed && TARGET_THUMB2)
13301 thumb_set_frame_pointer (offsets);
13303 if (flag_pic && arm_pic_register != INVALID_REGNUM)
13305 unsigned long mask;
13307 mask = live_regs_mask;
13308 mask &= THUMB2_WORK_REGS;
13309 if (!IS_NESTED (func_type))
13310 mask |= (1 << IP_REGNUM);
13311 arm_load_pic_register (mask);
13314 /* If we are profiling, make sure no instructions are scheduled before
13315 the call to mcount. Similarly if the user has requested no
13316 scheduling in the prolog. Similarly if we want non-call exceptions
13317 using the EABI unwinder, to prevent faulting instructions from being
13318 swapped with a stack adjustment. */
13319 if (crtl->profile || !TARGET_SCHED_PROLOG
13320 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13321 emit_insn (gen_blockage ());
13323 /* If the link register is being kept alive, with the return address in it,
13324 then make sure that it does not get reused by the ce2 pass. */
13325 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
13326 cfun->machine->lr_save_eliminated = 1;
13329 /* Print condition code to STREAM. Helper function for arm_print_operand. */
13330 static void
13331 arm_print_condition (FILE *stream)
13333 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
13335 /* Branch conversion is not implemented for Thumb-2. */
13336 if (TARGET_THUMB)
13338 output_operand_lossage ("predicated Thumb instruction");
13339 return;
13341 if (current_insn_predicate != NULL)
13343 output_operand_lossage
13344 ("predicated instruction in conditional sequence");
13345 return;
13348 fputs (arm_condition_codes[arm_current_cc], stream);
13350 else if (current_insn_predicate)
13352 enum arm_cond_code code;
13354 if (TARGET_THUMB1)
13356 output_operand_lossage ("predicated Thumb instruction");
13357 return;
13360 code = get_arm_condition_code (current_insn_predicate);
13361 fputs (arm_condition_codes[code], stream);
13366 /* If CODE is 'd', then the X is a condition operand and the instruction
13367 should only be executed if the condition is true.
13368 if CODE is 'D', then the X is a condition operand and the instruction
13369 should only be executed if the condition is false: however, if the mode
13370 of the comparison is CCFPEmode, then always execute the instruction -- we
13371 do this because in these circumstances !GE does not necessarily imply LT;
13372 in these cases the instruction pattern will take care to make sure that
13373 an instruction containing %d will follow, thereby undoing the effects of
13374 doing this instruction unconditionally.
13375 If CODE is 'N' then X is a floating point operand that must be negated
13376 before output.
13377 If CODE is 'B' then output a bitwise inverted value of X (a const int).
13378 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
13379 void
13380 arm_print_operand (FILE *stream, rtx x, int code)
13382 switch (code)
13384 case '@':
13385 fputs (ASM_COMMENT_START, stream);
13386 return;
13388 case '_':
13389 fputs (user_label_prefix, stream);
13390 return;
13392 case '|':
13393 fputs (REGISTER_PREFIX, stream);
13394 return;
13396 case '?':
13397 arm_print_condition (stream);
13398 return;
13400 case '(':
13401 /* Nothing in unified syntax, otherwise the current condition code. */
13402 if (!TARGET_UNIFIED_ASM)
13403 arm_print_condition (stream);
13404 break;
13406 case ')':
13407 /* The current condition code in unified syntax, otherwise nothing. */
13408 if (TARGET_UNIFIED_ASM)
13409 arm_print_condition (stream);
13410 break;
13412 case '.':
13413 /* The current condition code for a condition code setting instruction.
13414 Preceded by 's' in unified syntax, otherwise followed by 's'. */
13415 if (TARGET_UNIFIED_ASM)
13417 fputc('s', stream);
13418 arm_print_condition (stream);
13420 else
13422 arm_print_condition (stream);
13423 fputc('s', stream);
13425 return;
13427 case '!':
13428 /* If the instruction is conditionally executed then print
13429 the current condition code, otherwise print 's'. */
13430 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
13431 if (current_insn_predicate)
13432 arm_print_condition (stream);
13433 else
13434 fputc('s', stream);
13435 break;
13437 /* %# is a "break" sequence. It doesn't output anything, but is used to
13438 separate e.g. operand numbers from following text, if that text consists
13439 of further digits which we don't want to be part of the operand
13440 number. */
13441 case '#':
13442 return;
13444 case 'N':
13446 REAL_VALUE_TYPE r;
13447 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13448 r = REAL_VALUE_NEGATE (r);
13449 fprintf (stream, "%s", fp_const_from_val (&r));
13451 return;
13453 /* An integer or symbol address without a preceding # sign. */
13454 case 'c':
13455 switch (GET_CODE (x))
13457 case CONST_INT:
13458 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13459 break;
13461 case SYMBOL_REF:
13462 output_addr_const (stream, x);
13463 break;
13465 default:
13466 gcc_unreachable ();
13468 return;
13470 case 'B':
13471 if (GET_CODE (x) == CONST_INT)
13473 HOST_WIDE_INT val;
13474 val = ARM_SIGN_EXTEND (~INTVAL (x));
13475 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
13477 else
13479 putc ('~', stream);
13480 output_addr_const (stream, x);
13482 return;
13484 case 'L':
13485 /* The low 16 bits of an immediate constant. */
13486 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
13487 return;
13489 case 'i':
13490 fprintf (stream, "%s", arithmetic_instr (x, 1));
13491 return;
13493 /* Truncate Cirrus shift counts. */
13494 case 's':
13495 if (GET_CODE (x) == CONST_INT)
13497 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
13498 return;
13500 arm_print_operand (stream, x, 0);
13501 return;
13503 case 'I':
13504 fprintf (stream, "%s", arithmetic_instr (x, 0));
13505 return;
13507 case 'S':
13509 HOST_WIDE_INT val;
13510 const char *shift;
13512 if (!shift_operator (x, SImode))
13514 output_operand_lossage ("invalid shift operand");
13515 break;
13518 shift = shift_op (x, &val);
13520 if (shift)
13522 fprintf (stream, ", %s ", shift);
13523 if (val == -1)
13524 arm_print_operand (stream, XEXP (x, 1), 0);
13525 else
13526 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
13529 return;
13531 /* An explanation of the 'Q', 'R' and 'H' register operands:
13533 In a pair of registers containing a DI or DF value the 'Q'
13534 operand returns the register number of the register containing
13535 the least significant part of the value. The 'R' operand returns
13536 the register number of the register containing the most
13537 significant part of the value.
13539 The 'H' operand returns the higher of the two register numbers.
13540 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
13541 same as the 'Q' operand, since the most significant part of the
13542 value is held in the lower number register. The reverse is true
13543 on systems where WORDS_BIG_ENDIAN is false.
13545 The purpose of these operands is to distinguish between cases
13546 where the endian-ness of the values is important (for example
13547 when they are added together), and cases where the endian-ness
13548 is irrelevant, but the order of register operations is important.
13549 For example when loading a value from memory into a register
13550 pair, the endian-ness does not matter. Provided that the value
13551 from the lower memory address is put into the lower numbered
13552 register, and the value from the higher address is put into the
13553 higher numbered register, the load will work regardless of whether
13554 the value being loaded is big-wordian or little-wordian. The
13555 order of the two register loads can matter however, if the address
13556 of the memory location is actually held in one of the registers
13557 being overwritten by the load. */
13558 case 'Q':
13559 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13561 output_operand_lossage ("invalid operand for code '%c'", code);
13562 return;
13565 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
13566 return;
13568 case 'R':
13569 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13571 output_operand_lossage ("invalid operand for code '%c'", code);
13572 return;
13575 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
13576 return;
13578 case 'H':
13579 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13581 output_operand_lossage ("invalid operand for code '%c'", code);
13582 return;
13585 asm_fprintf (stream, "%r", REGNO (x) + 1);
13586 return;
13588 case 'J':
13589 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13591 output_operand_lossage ("invalid operand for code '%c'", code);
13592 return;
13595 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
13596 return;
13598 case 'K':
13599 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13601 output_operand_lossage ("invalid operand for code '%c'", code);
13602 return;
13605 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13606 return;
13608 case 'm':
13609 asm_fprintf (stream, "%r",
13610 GET_CODE (XEXP (x, 0)) == REG
13611 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13612 return;
13614 case 'M':
13615 asm_fprintf (stream, "{%r-%r}",
13616 REGNO (x),
13617 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13618 return;
13620 /* Like 'M', but writing doubleword vector registers, for use by Neon
13621 insns. */
13622 case 'h':
13624 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13625 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13626 if (numregs == 1)
13627 asm_fprintf (stream, "{d%d}", regno);
13628 else
13629 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13631 return;
13633 case 'd':
13634 /* CONST_TRUE_RTX means always -- that's the default. */
13635 if (x == const_true_rtx)
13636 return;
13638 if (!COMPARISON_P (x))
13640 output_operand_lossage ("invalid operand for code '%c'", code);
13641 return;
13644 fputs (arm_condition_codes[get_arm_condition_code (x)],
13645 stream);
13646 return;
13648 case 'D':
13649 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13650 want to do that. */
13651 if (x == const_true_rtx)
13653 output_operand_lossage ("instruction never executed");
13654 return;
13656 if (!COMPARISON_P (x))
13658 output_operand_lossage ("invalid operand for code '%c'", code);
13659 return;
13662 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13663 (get_arm_condition_code (x))],
13664 stream);
13665 return;
13667 /* Cirrus registers can be accessed in a variety of ways:
13668 single floating point (f)
13669 double floating point (d)
13670 32bit integer (fx)
13671 64bit integer (dx). */
13672 case 'W': /* Cirrus register in F mode. */
13673 case 'X': /* Cirrus register in D mode. */
13674 case 'Y': /* Cirrus register in FX mode. */
13675 case 'Z': /* Cirrus register in DX mode. */
13676 gcc_assert (GET_CODE (x) == REG
13677 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13679 fprintf (stream, "mv%s%s",
13680 code == 'W' ? "f"
13681 : code == 'X' ? "d"
13682 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13684 return;
13686 /* Print cirrus register in the mode specified by the register's mode. */
13687 case 'V':
13689 int mode = GET_MODE (x);
13691 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13693 output_operand_lossage ("invalid operand for code '%c'", code);
13694 return;
13697 fprintf (stream, "mv%s%s",
13698 mode == DFmode ? "d"
13699 : mode == SImode ? "fx"
13700 : mode == DImode ? "dx"
13701 : "f", reg_names[REGNO (x)] + 2);
13703 return;
13706 case 'U':
13707 if (GET_CODE (x) != REG
13708 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13709 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13710 /* Bad value for wCG register number. */
13712 output_operand_lossage ("invalid operand for code '%c'", code);
13713 return;
13716 else
13717 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13718 return;
13720 /* Print an iWMMXt control register name. */
13721 case 'w':
13722 if (GET_CODE (x) != CONST_INT
13723 || INTVAL (x) < 0
13724 || INTVAL (x) >= 16)
13725 /* Bad value for wC register number. */
13727 output_operand_lossage ("invalid operand for code '%c'", code);
13728 return;
13731 else
13733 static const char * wc_reg_names [16] =
13735 "wCID", "wCon", "wCSSF", "wCASF",
13736 "wC4", "wC5", "wC6", "wC7",
13737 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13738 "wC12", "wC13", "wC14", "wC15"
13741 fprintf (stream, wc_reg_names [INTVAL (x)]);
13743 return;
13745 /* Print a VFP/Neon double precision or quad precision register name. */
13746 case 'P':
13747 case 'q':
13749 int mode = GET_MODE (x);
13750 int is_quad = (code == 'q');
13751 int regno;
13753 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13755 output_operand_lossage ("invalid operand for code '%c'", code);
13756 return;
13759 if (GET_CODE (x) != REG
13760 || !IS_VFP_REGNUM (REGNO (x)))
13762 output_operand_lossage ("invalid operand for code '%c'", code);
13763 return;
13766 regno = REGNO (x);
13767 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13768 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13770 output_operand_lossage ("invalid operand for code '%c'", code);
13771 return;
13774 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13775 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13777 return;
13779 /* These two codes print the low/high doubleword register of a Neon quad
13780 register, respectively. For pair-structure types, can also print
13781 low/high quadword registers. */
13782 case 'e':
13783 case 'f':
13785 int mode = GET_MODE (x);
13786 int regno;
13788 if ((GET_MODE_SIZE (mode) != 16
13789 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13791 output_operand_lossage ("invalid operand for code '%c'", code);
13792 return;
13795 regno = REGNO (x);
13796 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13798 output_operand_lossage ("invalid operand for code '%c'", code);
13799 return;
13802 if (GET_MODE_SIZE (mode) == 16)
13803 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13804 + (code == 'f' ? 1 : 0));
13805 else
13806 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13807 + (code == 'f' ? 1 : 0));
13809 return;
13811 /* Print a VFPv3 floating-point constant, represented as an integer
13812 index. */
13813 case 'G':
13815 int index = vfp3_const_double_index (x);
13816 gcc_assert (index != -1);
13817 fprintf (stream, "%d", index);
13819 return;
13821 /* Print bits representing opcode features for Neon.
13823 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13824 and polynomials as unsigned.
13826 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13828 Bit 2 is 1 for rounding functions, 0 otherwise. */
13830 /* Identify the type as 's', 'u', 'p' or 'f'. */
13831 case 'T':
13833 HOST_WIDE_INT bits = INTVAL (x);
13834 fputc ("uspf"[bits & 3], stream);
13836 return;
13838 /* Likewise, but signed and unsigned integers are both 'i'. */
13839 case 'F':
13841 HOST_WIDE_INT bits = INTVAL (x);
13842 fputc ("iipf"[bits & 3], stream);
13844 return;
13846 /* As for 'T', but emit 'u' instead of 'p'. */
13847 case 't':
13849 HOST_WIDE_INT bits = INTVAL (x);
13850 fputc ("usuf"[bits & 3], stream);
13852 return;
13854 /* Bit 2: rounding (vs none). */
13855 case 'O':
13857 HOST_WIDE_INT bits = INTVAL (x);
13858 fputs ((bits & 4) != 0 ? "r" : "", stream);
13860 return;
13862 default:
13863 if (x == 0)
13865 output_operand_lossage ("missing operand");
13866 return;
13869 switch (GET_CODE (x))
13871 case REG:
13872 asm_fprintf (stream, "%r", REGNO (x));
13873 break;
13875 case MEM:
13876 output_memory_reference_mode = GET_MODE (x);
13877 output_address (XEXP (x, 0));
13878 break;
13880 case CONST_DOUBLE:
13881 if (TARGET_NEON)
13883 char fpstr[20];
13884 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13885 sizeof (fpstr), 0, 1);
13886 fprintf (stream, "#%s", fpstr);
13888 else
13889 fprintf (stream, "#%s", fp_immediate_constant (x));
13890 break;
13892 default:
13893 gcc_assert (GET_CODE (x) != NEG);
13894 fputc ('#', stream);
13895 output_addr_const (stream, x);
13896 break;
13901 /* Target hook for assembling integer objects. The ARM version needs to
13902 handle word-sized values specially. */
13903 static bool
13904 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13906 enum machine_mode mode;
13908 if (size == UNITS_PER_WORD && aligned_p)
13910 fputs ("\t.word\t", asm_out_file);
13911 output_addr_const (asm_out_file, x);
13913 /* Mark symbols as position independent. We only do this in the
13914 .text segment, not in the .data segment. */
13915 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13916 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13918 /* See legitimize_pic_address for an explanation of the
13919 TARGET_VXWORKS_RTP check. */
13920 if (TARGET_VXWORKS_RTP
13921 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13922 fputs ("(GOT)", asm_out_file);
13923 else
13924 fputs ("(GOTOFF)", asm_out_file);
13926 fputc ('\n', asm_out_file);
13927 return true;
13930 mode = GET_MODE (x);
13932 if (arm_vector_mode_supported_p (mode))
13934 int i, units;
13936 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13938 units = CONST_VECTOR_NUNITS (x);
13939 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13941 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13942 for (i = 0; i < units; i++)
13944 rtx elt = CONST_VECTOR_ELT (x, i);
13945 assemble_integer
13946 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13948 else
13949 for (i = 0; i < units; i++)
13951 rtx elt = CONST_VECTOR_ELT (x, i);
13952 REAL_VALUE_TYPE rval;
13954 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13956 assemble_real
13957 (rval, GET_MODE_INNER (mode),
13958 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13961 return true;
13964 return default_assemble_integer (x, size, aligned_p);
13967 static void
13968 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13970 section *s;
13972 if (!TARGET_AAPCS_BASED)
13974 (is_ctor ?
13975 default_named_section_asm_out_constructor
13976 : default_named_section_asm_out_destructor) (symbol, priority);
13977 return;
13980 /* Put these in the .init_array section, using a special relocation. */
13981 if (priority != DEFAULT_INIT_PRIORITY)
13983 char buf[18];
13984 sprintf (buf, "%s.%.5u",
13985 is_ctor ? ".init_array" : ".fini_array",
13986 priority);
13987 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13989 else if (is_ctor)
13990 s = ctors_section;
13991 else
13992 s = dtors_section;
13994 switch_to_section (s);
13995 assemble_align (POINTER_SIZE);
13996 fputs ("\t.word\t", asm_out_file);
13997 output_addr_const (asm_out_file, symbol);
13998 fputs ("(target1)\n", asm_out_file);
14001 /* Add a function to the list of static constructors. */
14003 static void
14004 arm_elf_asm_constructor (rtx symbol, int priority)
14006 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
14009 /* Add a function to the list of static destructors. */
14011 static void
14012 arm_elf_asm_destructor (rtx symbol, int priority)
14014 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
14017 /* A finite state machine takes care of noticing whether or not instructions
14018 can be conditionally executed, and thus decrease execution time and code
14019 size by deleting branch instructions. The fsm is controlled by
14020 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
14022 /* The state of the fsm controlling condition codes are:
14023 0: normal, do nothing special
14024 1: make ASM_OUTPUT_OPCODE not output this instruction
14025 2: make ASM_OUTPUT_OPCODE not output this instruction
14026 3: make instructions conditional
14027 4: make instructions conditional
14029 State transitions (state->state by whom under condition):
14030 0 -> 1 final_prescan_insn if the `target' is a label
14031 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
14032 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
14033 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
14034 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
14035 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
14036 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
14037 (the target insn is arm_target_insn).
14039 If the jump clobbers the conditions then we use states 2 and 4.
14041 A similar thing can be done with conditional return insns.
14043 XXX In case the `target' is an unconditional branch, this conditionalising
14044 of the instructions always reduces code size, but not always execution
14045 time. But then, I want to reduce the code size to somewhere near what
14046 /bin/cc produces. */
14048 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
14049 instructions. When a COND_EXEC instruction is seen the subsequent
14050 instructions are scanned so that multiple conditional instructions can be
14051 combined into a single IT block. arm_condexec_count and arm_condexec_mask
14052 specify the length and true/false mask for the IT block. These will be
14053 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
14055 /* Returns the index of the ARM condition code string in
14056 `arm_condition_codes'. COMPARISON should be an rtx like
14057 `(eq (...) (...))'. */
14058 static enum arm_cond_code
14059 get_arm_condition_code (rtx comparison)
14061 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
14062 enum arm_cond_code code;
14063 enum rtx_code comp_code = GET_CODE (comparison);
14065 if (GET_MODE_CLASS (mode) != MODE_CC)
14066 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
14067 XEXP (comparison, 1));
14069 switch (mode)
14071 case CC_DNEmode: code = ARM_NE; goto dominance;
14072 case CC_DEQmode: code = ARM_EQ; goto dominance;
14073 case CC_DGEmode: code = ARM_GE; goto dominance;
14074 case CC_DGTmode: code = ARM_GT; goto dominance;
14075 case CC_DLEmode: code = ARM_LE; goto dominance;
14076 case CC_DLTmode: code = ARM_LT; goto dominance;
14077 case CC_DGEUmode: code = ARM_CS; goto dominance;
14078 case CC_DGTUmode: code = ARM_HI; goto dominance;
14079 case CC_DLEUmode: code = ARM_LS; goto dominance;
14080 case CC_DLTUmode: code = ARM_CC;
14082 dominance:
14083 gcc_assert (comp_code == EQ || comp_code == NE);
14085 if (comp_code == EQ)
14086 return ARM_INVERSE_CONDITION_CODE (code);
14087 return code;
14089 case CC_NOOVmode:
14090 switch (comp_code)
14092 case NE: return ARM_NE;
14093 case EQ: return ARM_EQ;
14094 case GE: return ARM_PL;
14095 case LT: return ARM_MI;
14096 default: gcc_unreachable ();
14099 case CC_Zmode:
14100 switch (comp_code)
14102 case NE: return ARM_NE;
14103 case EQ: return ARM_EQ;
14104 default: gcc_unreachable ();
14107 case CC_Nmode:
14108 switch (comp_code)
14110 case NE: return ARM_MI;
14111 case EQ: return ARM_PL;
14112 default: gcc_unreachable ();
14115 case CCFPEmode:
14116 case CCFPmode:
14117 /* These encodings assume that AC=1 in the FPA system control
14118 byte. This allows us to handle all cases except UNEQ and
14119 LTGT. */
14120 switch (comp_code)
14122 case GE: return ARM_GE;
14123 case GT: return ARM_GT;
14124 case LE: return ARM_LS;
14125 case LT: return ARM_MI;
14126 case NE: return ARM_NE;
14127 case EQ: return ARM_EQ;
14128 case ORDERED: return ARM_VC;
14129 case UNORDERED: return ARM_VS;
14130 case UNLT: return ARM_LT;
14131 case UNLE: return ARM_LE;
14132 case UNGT: return ARM_HI;
14133 case UNGE: return ARM_PL;
14134 /* UNEQ and LTGT do not have a representation. */
14135 case UNEQ: /* Fall through. */
14136 case LTGT: /* Fall through. */
14137 default: gcc_unreachable ();
14140 case CC_SWPmode:
14141 switch (comp_code)
14143 case NE: return ARM_NE;
14144 case EQ: return ARM_EQ;
14145 case GE: return ARM_LE;
14146 case GT: return ARM_LT;
14147 case LE: return ARM_GE;
14148 case LT: return ARM_GT;
14149 case GEU: return ARM_LS;
14150 case GTU: return ARM_CC;
14151 case LEU: return ARM_CS;
14152 case LTU: return ARM_HI;
14153 default: gcc_unreachable ();
14156 case CC_Cmode:
14157 switch (comp_code)
14159 case LTU: return ARM_CS;
14160 case GEU: return ARM_CC;
14161 default: gcc_unreachable ();
14164 case CCmode:
14165 switch (comp_code)
14167 case NE: return ARM_NE;
14168 case EQ: return ARM_EQ;
14169 case GE: return ARM_GE;
14170 case GT: return ARM_GT;
14171 case LE: return ARM_LE;
14172 case LT: return ARM_LT;
14173 case GEU: return ARM_CS;
14174 case GTU: return ARM_HI;
14175 case LEU: return ARM_LS;
14176 case LTU: return ARM_CC;
14177 default: gcc_unreachable ();
14180 default: gcc_unreachable ();
14184 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
14185 instructions. */
14186 void
14187 thumb2_final_prescan_insn (rtx insn)
14189 rtx first_insn = insn;
14190 rtx body = PATTERN (insn);
14191 rtx predicate;
14192 enum arm_cond_code code;
14193 int n;
14194 int mask;
14196 /* Remove the previous insn from the count of insns to be output. */
14197 if (arm_condexec_count)
14198 arm_condexec_count--;
14200 /* Nothing to do if we are already inside a conditional block. */
14201 if (arm_condexec_count)
14202 return;
14204 if (GET_CODE (body) != COND_EXEC)
14205 return;
14207 /* Conditional jumps are implemented directly. */
14208 if (GET_CODE (insn) == JUMP_INSN)
14209 return;
14211 predicate = COND_EXEC_TEST (body);
14212 arm_current_cc = get_arm_condition_code (predicate);
14214 n = get_attr_ce_count (insn);
14215 arm_condexec_count = 1;
14216 arm_condexec_mask = (1 << n) - 1;
14217 arm_condexec_masklen = n;
14218 /* See if subsequent instructions can be combined into the same block. */
14219 for (;;)
14221 insn = next_nonnote_insn (insn);
14223 /* Jumping into the middle of an IT block is illegal, so a label or
14224 barrier terminates the block. */
14225 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
14226 break;
14228 body = PATTERN (insn);
14229 /* USE and CLOBBER aren't really insns, so just skip them. */
14230 if (GET_CODE (body) == USE
14231 || GET_CODE (body) == CLOBBER)
14232 continue;
14234 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
14235 if (GET_CODE (body) != COND_EXEC)
14236 break;
14237 /* Allow up to 4 conditionally executed instructions in a block. */
14238 n = get_attr_ce_count (insn);
14239 if (arm_condexec_masklen + n > 4)
14240 break;
14242 predicate = COND_EXEC_TEST (body);
14243 code = get_arm_condition_code (predicate);
14244 mask = (1 << n) - 1;
14245 if (arm_current_cc == code)
14246 arm_condexec_mask |= (mask << arm_condexec_masklen);
14247 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
14248 break;
14250 arm_condexec_count++;
14251 arm_condexec_masklen += n;
14253 /* A jump must be the last instruction in a conditional block. */
14254 if (GET_CODE(insn) == JUMP_INSN)
14255 break;
14257 /* Restore recog_data (getting the attributes of other insns can
14258 destroy this array, but final.c assumes that it remains intact
14259 across this call). */
14260 extract_constrain_insn_cached (first_insn);
14263 void
14264 arm_final_prescan_insn (rtx insn)
14266 /* BODY will hold the body of INSN. */
14267 rtx body = PATTERN (insn);
14269 /* This will be 1 if trying to repeat the trick, and things need to be
14270 reversed if it appears to fail. */
14271 int reverse = 0;
14273 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
14274 taken are clobbered, even if the rtl suggests otherwise. It also
14275 means that we have to grub around within the jump expression to find
14276 out what the conditions are when the jump isn't taken. */
14277 int jump_clobbers = 0;
14279 /* If we start with a return insn, we only succeed if we find another one. */
14280 int seeking_return = 0;
14282 /* START_INSN will hold the insn from where we start looking. This is the
14283 first insn after the following code_label if REVERSE is true. */
14284 rtx start_insn = insn;
14286 /* If in state 4, check if the target branch is reached, in order to
14287 change back to state 0. */
14288 if (arm_ccfsm_state == 4)
14290 if (insn == arm_target_insn)
14292 arm_target_insn = NULL;
14293 arm_ccfsm_state = 0;
14295 return;
14298 /* If in state 3, it is possible to repeat the trick, if this insn is an
14299 unconditional branch to a label, and immediately following this branch
14300 is the previous target label which is only used once, and the label this
14301 branch jumps to is not too far off. */
14302 if (arm_ccfsm_state == 3)
14304 if (simplejump_p (insn))
14306 start_insn = next_nonnote_insn (start_insn);
14307 if (GET_CODE (start_insn) == BARRIER)
14309 /* XXX Isn't this always a barrier? */
14310 start_insn = next_nonnote_insn (start_insn);
14312 if (GET_CODE (start_insn) == CODE_LABEL
14313 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14314 && LABEL_NUSES (start_insn) == 1)
14315 reverse = TRUE;
14316 else
14317 return;
14319 else if (GET_CODE (body) == RETURN)
14321 start_insn = next_nonnote_insn (start_insn);
14322 if (GET_CODE (start_insn) == BARRIER)
14323 start_insn = next_nonnote_insn (start_insn);
14324 if (GET_CODE (start_insn) == CODE_LABEL
14325 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14326 && LABEL_NUSES (start_insn) == 1)
14328 reverse = TRUE;
14329 seeking_return = 1;
14331 else
14332 return;
14334 else
14335 return;
14338 gcc_assert (!arm_ccfsm_state || reverse);
14339 if (GET_CODE (insn) != JUMP_INSN)
14340 return;
14342 /* This jump might be paralleled with a clobber of the condition codes
14343 the jump should always come first */
14344 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
14345 body = XVECEXP (body, 0, 0);
14347 if (reverse
14348 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
14349 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
14351 int insns_skipped;
14352 int fail = FALSE, succeed = FALSE;
14353 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
14354 int then_not_else = TRUE;
14355 rtx this_insn = start_insn, label = 0;
14357 /* If the jump cannot be done with one instruction, we cannot
14358 conditionally execute the instruction in the inverse case. */
14359 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
14361 jump_clobbers = 1;
14362 return;
14365 /* Register the insn jumped to. */
14366 if (reverse)
14368 if (!seeking_return)
14369 label = XEXP (SET_SRC (body), 0);
14371 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
14372 label = XEXP (XEXP (SET_SRC (body), 1), 0);
14373 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
14375 label = XEXP (XEXP (SET_SRC (body), 2), 0);
14376 then_not_else = FALSE;
14378 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
14379 seeking_return = 1;
14380 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
14382 seeking_return = 1;
14383 then_not_else = FALSE;
14385 else
14386 gcc_unreachable ();
14388 /* See how many insns this branch skips, and what kind of insns. If all
14389 insns are okay, and the label or unconditional branch to the same
14390 label is not too far away, succeed. */
14391 for (insns_skipped = 0;
14392 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
14394 rtx scanbody;
14396 this_insn = next_nonnote_insn (this_insn);
14397 if (!this_insn)
14398 break;
14400 switch (GET_CODE (this_insn))
14402 case CODE_LABEL:
14403 /* Succeed if it is the target label, otherwise fail since
14404 control falls in from somewhere else. */
14405 if (this_insn == label)
14407 if (jump_clobbers)
14409 arm_ccfsm_state = 2;
14410 this_insn = next_nonnote_insn (this_insn);
14412 else
14413 arm_ccfsm_state = 1;
14414 succeed = TRUE;
14416 else
14417 fail = TRUE;
14418 break;
14420 case BARRIER:
14421 /* Succeed if the following insn is the target label.
14422 Otherwise fail.
14423 If return insns are used then the last insn in a function
14424 will be a barrier. */
14425 this_insn = next_nonnote_insn (this_insn);
14426 if (this_insn && this_insn == label)
14428 if (jump_clobbers)
14430 arm_ccfsm_state = 2;
14431 this_insn = next_nonnote_insn (this_insn);
14433 else
14434 arm_ccfsm_state = 1;
14435 succeed = TRUE;
14437 else
14438 fail = TRUE;
14439 break;
14441 case CALL_INSN:
14442 /* The AAPCS says that conditional calls should not be
14443 used since they make interworking inefficient (the
14444 linker can't transform BL<cond> into BLX). That's
14445 only a problem if the machine has BLX. */
14446 if (arm_arch5)
14448 fail = TRUE;
14449 break;
14452 /* Succeed if the following insn is the target label, or
14453 if the following two insns are a barrier and the
14454 target label. */
14455 this_insn = next_nonnote_insn (this_insn);
14456 if (this_insn && GET_CODE (this_insn) == BARRIER)
14457 this_insn = next_nonnote_insn (this_insn);
14459 if (this_insn && this_insn == label
14460 && insns_skipped < max_insns_skipped)
14462 if (jump_clobbers)
14464 arm_ccfsm_state = 2;
14465 this_insn = next_nonnote_insn (this_insn);
14467 else
14468 arm_ccfsm_state = 1;
14469 succeed = TRUE;
14471 else
14472 fail = TRUE;
14473 break;
14475 case JUMP_INSN:
14476 /* If this is an unconditional branch to the same label, succeed.
14477 If it is to another label, do nothing. If it is conditional,
14478 fail. */
14479 /* XXX Probably, the tests for SET and the PC are
14480 unnecessary. */
14482 scanbody = PATTERN (this_insn);
14483 if (GET_CODE (scanbody) == SET
14484 && GET_CODE (SET_DEST (scanbody)) == PC)
14486 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
14487 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
14489 arm_ccfsm_state = 2;
14490 succeed = TRUE;
14492 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
14493 fail = TRUE;
14495 /* Fail if a conditional return is undesirable (e.g. on a
14496 StrongARM), but still allow this if optimizing for size. */
14497 else if (GET_CODE (scanbody) == RETURN
14498 && !use_return_insn (TRUE, NULL)
14499 && !optimize_size)
14500 fail = TRUE;
14501 else if (GET_CODE (scanbody) == RETURN
14502 && seeking_return)
14504 arm_ccfsm_state = 2;
14505 succeed = TRUE;
14507 else if (GET_CODE (scanbody) == PARALLEL)
14509 switch (get_attr_conds (this_insn))
14511 case CONDS_NOCOND:
14512 break;
14513 default:
14514 fail = TRUE;
14515 break;
14518 else
14519 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
14521 break;
14523 case INSN:
14524 /* Instructions using or affecting the condition codes make it
14525 fail. */
14526 scanbody = PATTERN (this_insn);
14527 if (!(GET_CODE (scanbody) == SET
14528 || GET_CODE (scanbody) == PARALLEL)
14529 || get_attr_conds (this_insn) != CONDS_NOCOND)
14530 fail = TRUE;
14532 /* A conditional cirrus instruction must be followed by
14533 a non Cirrus instruction. However, since we
14534 conditionalize instructions in this function and by
14535 the time we get here we can't add instructions
14536 (nops), because shorten_branches() has already been
14537 called, we will disable conditionalizing Cirrus
14538 instructions to be safe. */
14539 if (GET_CODE (scanbody) != USE
14540 && GET_CODE (scanbody) != CLOBBER
14541 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
14542 fail = TRUE;
14543 break;
14545 default:
14546 break;
14549 if (succeed)
14551 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
14552 arm_target_label = CODE_LABEL_NUMBER (label);
14553 else
14555 gcc_assert (seeking_return || arm_ccfsm_state == 2);
14557 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
14559 this_insn = next_nonnote_insn (this_insn);
14560 gcc_assert (!this_insn
14561 || (GET_CODE (this_insn) != BARRIER
14562 && GET_CODE (this_insn) != CODE_LABEL));
14564 if (!this_insn)
14566 /* Oh, dear! we ran off the end.. give up. */
14567 extract_constrain_insn_cached (insn);
14568 arm_ccfsm_state = 0;
14569 arm_target_insn = NULL;
14570 return;
14572 arm_target_insn = this_insn;
14574 if (jump_clobbers)
14576 gcc_assert (!reverse);
14577 arm_current_cc =
14578 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
14579 0), 0), 1));
14580 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
14581 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14582 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
14583 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14585 else
14587 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
14588 what it was. */
14589 if (!reverse)
14590 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
14591 0));
14594 if (reverse || then_not_else)
14595 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14598 /* Restore recog_data (getting the attributes of other insns can
14599 destroy this array, but final.c assumes that it remains intact
14600 across this call. */
14601 extract_constrain_insn_cached (insn);
14605 /* Output IT instructions. */
14606 void
14607 thumb2_asm_output_opcode (FILE * stream)
14609 char buff[5];
14610 int n;
14612 if (arm_condexec_mask)
14614 for (n = 0; n < arm_condexec_masklen; n++)
14615 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14616 buff[n] = 0;
14617 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14618 arm_condition_codes[arm_current_cc]);
14619 arm_condexec_mask = 0;
14623 /* Returns true if REGNO is a valid register
14624 for holding a quantity of type MODE. */
14626 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14628 if (GET_MODE_CLASS (mode) == MODE_CC)
14629 return (regno == CC_REGNUM
14630 || (TARGET_HARD_FLOAT && TARGET_VFP
14631 && regno == VFPCC_REGNUM));
14633 if (TARGET_THUMB1)
14634 /* For the Thumb we only allow values bigger than SImode in
14635 registers 0 - 6, so that there is always a second low
14636 register available to hold the upper part of the value.
14637 We probably we ought to ensure that the register is the
14638 start of an even numbered register pair. */
14639 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14641 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14642 && IS_CIRRUS_REGNUM (regno))
14643 /* We have outlawed SI values in Cirrus registers because they
14644 reside in the lower 32 bits, but SF values reside in the
14645 upper 32 bits. This causes gcc all sorts of grief. We can't
14646 even split the registers into pairs because Cirrus SI values
14647 get sign extended to 64bits-- aldyh. */
14648 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14650 if (TARGET_HARD_FLOAT && TARGET_VFP
14651 && IS_VFP_REGNUM (regno))
14653 if (mode == SFmode || mode == SImode)
14654 return VFP_REGNO_OK_FOR_SINGLE (regno);
14656 if (mode == DFmode)
14657 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14659 if (TARGET_NEON)
14660 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14661 || (VALID_NEON_QREG_MODE (mode)
14662 && NEON_REGNO_OK_FOR_QUAD (regno))
14663 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14664 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14665 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14666 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14667 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14669 return FALSE;
14672 if (TARGET_REALLY_IWMMXT)
14674 if (IS_IWMMXT_GR_REGNUM (regno))
14675 return mode == SImode;
14677 if (IS_IWMMXT_REGNUM (regno))
14678 return VALID_IWMMXT_REG_MODE (mode);
14681 /* We allow any value to be stored in the general registers.
14682 Restrict doubleword quantities to even register pairs so that we can
14683 use ldrd. Do not allow Neon structure opaque modes in general registers;
14684 they would use too many. */
14685 if (regno <= LAST_ARM_REGNUM)
14686 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14687 && !VALID_NEON_STRUCT_MODE (mode);
14689 if (regno == FRAME_POINTER_REGNUM
14690 || regno == ARG_POINTER_REGNUM)
14691 /* We only allow integers in the fake hard registers. */
14692 return GET_MODE_CLASS (mode) == MODE_INT;
14694 /* The only registers left are the FPA registers
14695 which we only allow to hold FP values. */
14696 return (TARGET_HARD_FLOAT && TARGET_FPA
14697 && GET_MODE_CLASS (mode) == MODE_FLOAT
14698 && regno >= FIRST_FPA_REGNUM
14699 && regno <= LAST_FPA_REGNUM);
14702 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14703 not used in arm mode. */
14705 enum reg_class
14706 arm_regno_class (int regno)
14708 if (TARGET_THUMB1)
14710 if (regno == STACK_POINTER_REGNUM)
14711 return STACK_REG;
14712 if (regno == CC_REGNUM)
14713 return CC_REG;
14714 if (regno < 8)
14715 return LO_REGS;
14716 return HI_REGS;
14719 if (TARGET_THUMB2 && regno < 8)
14720 return LO_REGS;
14722 if ( regno <= LAST_ARM_REGNUM
14723 || regno == FRAME_POINTER_REGNUM
14724 || regno == ARG_POINTER_REGNUM)
14725 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14727 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14728 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14730 if (IS_CIRRUS_REGNUM (regno))
14731 return CIRRUS_REGS;
14733 if (IS_VFP_REGNUM (regno))
14735 if (regno <= D7_VFP_REGNUM)
14736 return VFP_D0_D7_REGS;
14737 else if (regno <= LAST_LO_VFP_REGNUM)
14738 return VFP_LO_REGS;
14739 else
14740 return VFP_HI_REGS;
14743 if (IS_IWMMXT_REGNUM (regno))
14744 return IWMMXT_REGS;
14746 if (IS_IWMMXT_GR_REGNUM (regno))
14747 return IWMMXT_GR_REGS;
14749 return FPA_REGS;
14752 /* Handle a special case when computing the offset
14753 of an argument from the frame pointer. */
14755 arm_debugger_arg_offset (int value, rtx addr)
14757 rtx insn;
14759 /* We are only interested if dbxout_parms() failed to compute the offset. */
14760 if (value != 0)
14761 return 0;
14763 /* We can only cope with the case where the address is held in a register. */
14764 if (GET_CODE (addr) != REG)
14765 return 0;
14767 /* If we are using the frame pointer to point at the argument, then
14768 an offset of 0 is correct. */
14769 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14770 return 0;
14772 /* If we are using the stack pointer to point at the
14773 argument, then an offset of 0 is correct. */
14774 /* ??? Check this is consistent with thumb2 frame layout. */
14775 if ((TARGET_THUMB || !frame_pointer_needed)
14776 && REGNO (addr) == SP_REGNUM)
14777 return 0;
14779 /* Oh dear. The argument is pointed to by a register rather
14780 than being held in a register, or being stored at a known
14781 offset from the frame pointer. Since GDB only understands
14782 those two kinds of argument we must translate the address
14783 held in the register into an offset from the frame pointer.
14784 We do this by searching through the insns for the function
14785 looking to see where this register gets its value. If the
14786 register is initialized from the frame pointer plus an offset
14787 then we are in luck and we can continue, otherwise we give up.
14789 This code is exercised by producing debugging information
14790 for a function with arguments like this:
14792 double func (double a, double b, int c, double d) {return d;}
14794 Without this code the stab for parameter 'd' will be set to
14795 an offset of 0 from the frame pointer, rather than 8. */
14797 /* The if() statement says:
14799 If the insn is a normal instruction
14800 and if the insn is setting the value in a register
14801 and if the register being set is the register holding the address of the argument
14802 and if the address is computing by an addition
14803 that involves adding to a register
14804 which is the frame pointer
14805 a constant integer
14807 then... */
14809 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14811 if ( GET_CODE (insn) == INSN
14812 && GET_CODE (PATTERN (insn)) == SET
14813 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14814 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14815 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14816 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14817 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14820 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14822 break;
14826 if (value == 0)
14828 debug_rtx (addr);
14829 warning (0, "unable to compute real location of stacked parameter");
14830 value = 8; /* XXX magic hack */
14833 return value;
14836 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14837 do \
14839 if ((MASK) & insn_flags) \
14840 add_builtin_function ((NAME), (TYPE), (CODE), \
14841 BUILT_IN_MD, NULL, NULL_TREE); \
14843 while (0)
14845 struct builtin_description
14847 const unsigned int mask;
14848 const enum insn_code icode;
14849 const char * const name;
14850 const enum arm_builtins code;
14851 const enum rtx_code comparison;
14852 const unsigned int flag;
14855 static const struct builtin_description bdesc_2arg[] =
14857 #define IWMMXT_BUILTIN(code, string, builtin) \
14858 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14859 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
14861 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14862 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14863 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14864 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14865 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14866 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14867 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14868 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14869 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14870 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14871 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14872 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14873 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14874 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14875 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14876 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14877 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14878 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14879 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14880 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14881 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14882 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14883 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14884 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14885 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14886 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14887 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14888 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14889 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14890 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14891 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14892 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14893 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14894 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14895 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14896 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14897 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14898 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14899 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14900 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14901 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14902 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14903 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14904 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14905 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14906 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14907 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14908 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14909 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14910 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14911 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14912 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14913 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14914 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14915 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14916 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14917 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14918 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14920 #define IWMMXT_BUILTIN2(code, builtin) \
14921 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
14923 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14924 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14925 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14926 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14927 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14928 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14929 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14930 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14931 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14932 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14933 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14934 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14935 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14936 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14937 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14938 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14939 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14940 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14941 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14942 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14943 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14944 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14945 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14946 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14947 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14948 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14949 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14950 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14951 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14952 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14953 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14954 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14957 static const struct builtin_description bdesc_1arg[] =
14959 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14960 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14961 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14962 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14963 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14964 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14965 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14966 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14967 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14968 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14969 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14970 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14971 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14972 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14973 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14974 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14975 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14976 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14979 /* Set up all the iWMMXt builtins. This is
14980 not called if TARGET_IWMMXT is zero. */
14982 static void
14983 arm_init_iwmmxt_builtins (void)
14985 const struct builtin_description * d;
14986 size_t i;
14987 tree endlink = void_list_node;
14989 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14990 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14991 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14993 tree int_ftype_int
14994 = build_function_type (integer_type_node,
14995 tree_cons (NULL_TREE, integer_type_node, endlink));
14996 tree v8qi_ftype_v8qi_v8qi_int
14997 = build_function_type (V8QI_type_node,
14998 tree_cons (NULL_TREE, V8QI_type_node,
14999 tree_cons (NULL_TREE, V8QI_type_node,
15000 tree_cons (NULL_TREE,
15001 integer_type_node,
15002 endlink))));
15003 tree v4hi_ftype_v4hi_int
15004 = build_function_type (V4HI_type_node,
15005 tree_cons (NULL_TREE, V4HI_type_node,
15006 tree_cons (NULL_TREE, integer_type_node,
15007 endlink)));
15008 tree v2si_ftype_v2si_int
15009 = build_function_type (V2SI_type_node,
15010 tree_cons (NULL_TREE, V2SI_type_node,
15011 tree_cons (NULL_TREE, integer_type_node,
15012 endlink)));
15013 tree v2si_ftype_di_di
15014 = build_function_type (V2SI_type_node,
15015 tree_cons (NULL_TREE, long_long_integer_type_node,
15016 tree_cons (NULL_TREE, long_long_integer_type_node,
15017 endlink)));
15018 tree di_ftype_di_int
15019 = build_function_type (long_long_integer_type_node,
15020 tree_cons (NULL_TREE, long_long_integer_type_node,
15021 tree_cons (NULL_TREE, integer_type_node,
15022 endlink)));
15023 tree di_ftype_di_int_int
15024 = build_function_type (long_long_integer_type_node,
15025 tree_cons (NULL_TREE, long_long_integer_type_node,
15026 tree_cons (NULL_TREE, integer_type_node,
15027 tree_cons (NULL_TREE,
15028 integer_type_node,
15029 endlink))));
15030 tree int_ftype_v8qi
15031 = build_function_type (integer_type_node,
15032 tree_cons (NULL_TREE, V8QI_type_node,
15033 endlink));
15034 tree int_ftype_v4hi
15035 = build_function_type (integer_type_node,
15036 tree_cons (NULL_TREE, V4HI_type_node,
15037 endlink));
15038 tree int_ftype_v2si
15039 = build_function_type (integer_type_node,
15040 tree_cons (NULL_TREE, V2SI_type_node,
15041 endlink));
15042 tree int_ftype_v8qi_int
15043 = build_function_type (integer_type_node,
15044 tree_cons (NULL_TREE, V8QI_type_node,
15045 tree_cons (NULL_TREE, integer_type_node,
15046 endlink)));
15047 tree int_ftype_v4hi_int
15048 = build_function_type (integer_type_node,
15049 tree_cons (NULL_TREE, V4HI_type_node,
15050 tree_cons (NULL_TREE, integer_type_node,
15051 endlink)));
15052 tree int_ftype_v2si_int
15053 = build_function_type (integer_type_node,
15054 tree_cons (NULL_TREE, V2SI_type_node,
15055 tree_cons (NULL_TREE, integer_type_node,
15056 endlink)));
15057 tree v8qi_ftype_v8qi_int_int
15058 = build_function_type (V8QI_type_node,
15059 tree_cons (NULL_TREE, V8QI_type_node,
15060 tree_cons (NULL_TREE, integer_type_node,
15061 tree_cons (NULL_TREE,
15062 integer_type_node,
15063 endlink))));
15064 tree v4hi_ftype_v4hi_int_int
15065 = build_function_type (V4HI_type_node,
15066 tree_cons (NULL_TREE, V4HI_type_node,
15067 tree_cons (NULL_TREE, integer_type_node,
15068 tree_cons (NULL_TREE,
15069 integer_type_node,
15070 endlink))));
15071 tree v2si_ftype_v2si_int_int
15072 = build_function_type (V2SI_type_node,
15073 tree_cons (NULL_TREE, V2SI_type_node,
15074 tree_cons (NULL_TREE, integer_type_node,
15075 tree_cons (NULL_TREE,
15076 integer_type_node,
15077 endlink))));
15078 /* Miscellaneous. */
15079 tree v8qi_ftype_v4hi_v4hi
15080 = build_function_type (V8QI_type_node,
15081 tree_cons (NULL_TREE, V4HI_type_node,
15082 tree_cons (NULL_TREE, V4HI_type_node,
15083 endlink)));
15084 tree v4hi_ftype_v2si_v2si
15085 = build_function_type (V4HI_type_node,
15086 tree_cons (NULL_TREE, V2SI_type_node,
15087 tree_cons (NULL_TREE, V2SI_type_node,
15088 endlink)));
15089 tree v2si_ftype_v4hi_v4hi
15090 = build_function_type (V2SI_type_node,
15091 tree_cons (NULL_TREE, V4HI_type_node,
15092 tree_cons (NULL_TREE, V4HI_type_node,
15093 endlink)));
15094 tree v2si_ftype_v8qi_v8qi
15095 = build_function_type (V2SI_type_node,
15096 tree_cons (NULL_TREE, V8QI_type_node,
15097 tree_cons (NULL_TREE, V8QI_type_node,
15098 endlink)));
15099 tree v4hi_ftype_v4hi_di
15100 = build_function_type (V4HI_type_node,
15101 tree_cons (NULL_TREE, V4HI_type_node,
15102 tree_cons (NULL_TREE,
15103 long_long_integer_type_node,
15104 endlink)));
15105 tree v2si_ftype_v2si_di
15106 = build_function_type (V2SI_type_node,
15107 tree_cons (NULL_TREE, V2SI_type_node,
15108 tree_cons (NULL_TREE,
15109 long_long_integer_type_node,
15110 endlink)));
15111 tree void_ftype_int_int
15112 = build_function_type (void_type_node,
15113 tree_cons (NULL_TREE, integer_type_node,
15114 tree_cons (NULL_TREE, integer_type_node,
15115 endlink)));
15116 tree di_ftype_void
15117 = build_function_type (long_long_unsigned_type_node, endlink);
15118 tree di_ftype_v8qi
15119 = build_function_type (long_long_integer_type_node,
15120 tree_cons (NULL_TREE, V8QI_type_node,
15121 endlink));
15122 tree di_ftype_v4hi
15123 = build_function_type (long_long_integer_type_node,
15124 tree_cons (NULL_TREE, V4HI_type_node,
15125 endlink));
15126 tree di_ftype_v2si
15127 = build_function_type (long_long_integer_type_node,
15128 tree_cons (NULL_TREE, V2SI_type_node,
15129 endlink));
15130 tree v2si_ftype_v4hi
15131 = build_function_type (V2SI_type_node,
15132 tree_cons (NULL_TREE, V4HI_type_node,
15133 endlink));
15134 tree v4hi_ftype_v8qi
15135 = build_function_type (V4HI_type_node,
15136 tree_cons (NULL_TREE, V8QI_type_node,
15137 endlink));
15139 tree di_ftype_di_v4hi_v4hi
15140 = build_function_type (long_long_unsigned_type_node,
15141 tree_cons (NULL_TREE,
15142 long_long_unsigned_type_node,
15143 tree_cons (NULL_TREE, V4HI_type_node,
15144 tree_cons (NULL_TREE,
15145 V4HI_type_node,
15146 endlink))));
15148 tree di_ftype_v4hi_v4hi
15149 = build_function_type (long_long_unsigned_type_node,
15150 tree_cons (NULL_TREE, V4HI_type_node,
15151 tree_cons (NULL_TREE, V4HI_type_node,
15152 endlink)));
15154 /* Normal vector binops. */
15155 tree v8qi_ftype_v8qi_v8qi
15156 = build_function_type (V8QI_type_node,
15157 tree_cons (NULL_TREE, V8QI_type_node,
15158 tree_cons (NULL_TREE, V8QI_type_node,
15159 endlink)));
15160 tree v4hi_ftype_v4hi_v4hi
15161 = build_function_type (V4HI_type_node,
15162 tree_cons (NULL_TREE, V4HI_type_node,
15163 tree_cons (NULL_TREE, V4HI_type_node,
15164 endlink)));
15165 tree v2si_ftype_v2si_v2si
15166 = build_function_type (V2SI_type_node,
15167 tree_cons (NULL_TREE, V2SI_type_node,
15168 tree_cons (NULL_TREE, V2SI_type_node,
15169 endlink)));
15170 tree di_ftype_di_di
15171 = build_function_type (long_long_unsigned_type_node,
15172 tree_cons (NULL_TREE, long_long_unsigned_type_node,
15173 tree_cons (NULL_TREE,
15174 long_long_unsigned_type_node,
15175 endlink)));
15177 /* Add all builtins that are more or less simple operations on two
15178 operands. */
15179 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15181 /* Use one of the operands; the target can have a different mode for
15182 mask-generating compares. */
15183 enum machine_mode mode;
15184 tree type;
15186 if (d->name == 0)
15187 continue;
15189 mode = insn_data[d->icode].operand[1].mode;
15191 switch (mode)
15193 case V8QImode:
15194 type = v8qi_ftype_v8qi_v8qi;
15195 break;
15196 case V4HImode:
15197 type = v4hi_ftype_v4hi_v4hi;
15198 break;
15199 case V2SImode:
15200 type = v2si_ftype_v2si_v2si;
15201 break;
15202 case DImode:
15203 type = di_ftype_di_di;
15204 break;
15206 default:
15207 gcc_unreachable ();
15210 def_mbuiltin (d->mask, d->name, type, d->code);
15213 /* Add the remaining MMX insns with somewhat more complicated types. */
15214 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
15215 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
15216 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
15218 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
15219 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
15220 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
15221 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
15222 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
15223 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
15225 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
15226 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
15227 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
15228 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
15229 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
15230 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
15232 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
15233 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
15234 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
15235 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
15236 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
15237 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
15239 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
15240 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
15241 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
15242 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
15243 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
15244 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
15246 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
15248 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
15249 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
15250 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
15251 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
15253 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
15254 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
15255 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
15256 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
15257 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
15258 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
15259 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
15260 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
15261 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
15263 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
15264 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
15265 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
15267 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
15268 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
15269 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
15271 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
15272 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
15273 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
15274 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
15275 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
15276 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
15278 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
15279 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
15280 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
15281 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
15282 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
15283 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
15284 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
15285 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
15286 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
15287 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
15288 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
15289 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
15291 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
15292 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
15293 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
15294 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
15296 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
15297 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
15298 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
15299 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
15300 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
15301 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
15302 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
15305 static void
15306 arm_init_tls_builtins (void)
15308 tree ftype, decl;
15310 ftype = build_function_type (ptr_type_node, void_list_node);
15311 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
15312 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
15313 NULL, NULL_TREE);
15314 TREE_NOTHROW (decl) = 1;
15315 TREE_READONLY (decl) = 1;
15318 enum neon_builtin_type_bits {
15319 T_V8QI = 0x0001,
15320 T_V4HI = 0x0002,
15321 T_V2SI = 0x0004,
15322 T_V2SF = 0x0008,
15323 T_DI = 0x0010,
15324 T_V16QI = 0x0020,
15325 T_V8HI = 0x0040,
15326 T_V4SI = 0x0080,
15327 T_V4SF = 0x0100,
15328 T_V2DI = 0x0200,
15329 T_TI = 0x0400,
15330 T_EI = 0x0800,
15331 T_OI = 0x1000
15334 #define v8qi_UP T_V8QI
15335 #define v4hi_UP T_V4HI
15336 #define v2si_UP T_V2SI
15337 #define v2sf_UP T_V2SF
15338 #define di_UP T_DI
15339 #define v16qi_UP T_V16QI
15340 #define v8hi_UP T_V8HI
15341 #define v4si_UP T_V4SI
15342 #define v4sf_UP T_V4SF
15343 #define v2di_UP T_V2DI
15344 #define ti_UP T_TI
15345 #define ei_UP T_EI
15346 #define oi_UP T_OI
15348 #define UP(X) X##_UP
15350 #define T_MAX 13
15352 typedef enum {
15353 NEON_BINOP,
15354 NEON_TERNOP,
15355 NEON_UNOP,
15356 NEON_GETLANE,
15357 NEON_SETLANE,
15358 NEON_CREATE,
15359 NEON_DUP,
15360 NEON_DUPLANE,
15361 NEON_COMBINE,
15362 NEON_SPLIT,
15363 NEON_LANEMUL,
15364 NEON_LANEMULL,
15365 NEON_LANEMULH,
15366 NEON_LANEMAC,
15367 NEON_SCALARMUL,
15368 NEON_SCALARMULL,
15369 NEON_SCALARMULH,
15370 NEON_SCALARMAC,
15371 NEON_CONVERT,
15372 NEON_FIXCONV,
15373 NEON_SELECT,
15374 NEON_RESULTPAIR,
15375 NEON_REINTERP,
15376 NEON_VTBL,
15377 NEON_VTBX,
15378 NEON_LOAD1,
15379 NEON_LOAD1LANE,
15380 NEON_STORE1,
15381 NEON_STORE1LANE,
15382 NEON_LOADSTRUCT,
15383 NEON_LOADSTRUCTLANE,
15384 NEON_STORESTRUCT,
15385 NEON_STORESTRUCTLANE,
15386 NEON_LOGICBINOP,
15387 NEON_SHIFTINSERT,
15388 NEON_SHIFTIMM,
15389 NEON_SHIFTACC
15390 } neon_itype;
15392 typedef struct {
15393 const char *name;
15394 const neon_itype itype;
15395 const int bits;
15396 const enum insn_code codes[T_MAX];
15397 const unsigned int num_vars;
15398 unsigned int base_fcode;
15399 } neon_builtin_datum;
15401 #define CF(N,X) CODE_FOR_neon_##N##X
15403 #define VAR1(T, N, A) \
15404 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
15405 #define VAR2(T, N, A, B) \
15406 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
15407 #define VAR3(T, N, A, B, C) \
15408 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
15409 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
15410 #define VAR4(T, N, A, B, C, D) \
15411 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
15412 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
15413 #define VAR5(T, N, A, B, C, D, E) \
15414 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
15415 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
15416 #define VAR6(T, N, A, B, C, D, E, F) \
15417 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
15418 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
15419 #define VAR7(T, N, A, B, C, D, E, F, G) \
15420 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
15421 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15422 CF (N, G) }, 7, 0
15423 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
15424 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15425 | UP (H), \
15426 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15427 CF (N, G), CF (N, H) }, 8, 0
15428 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
15429 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15430 | UP (H) | UP (I), \
15431 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15432 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
15433 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
15434 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15435 | UP (H) | UP (I) | UP (J), \
15436 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15437 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
15439 /* The mode entries in the following table correspond to the "key" type of the
15440 instruction variant, i.e. equivalent to that which would be specified after
15441 the assembler mnemonic, which usually refers to the last vector operand.
15442 (Signed/unsigned/polynomial types are not differentiated between though, and
15443 are all mapped onto the same mode for a given element size.) The modes
15444 listed per instruction should be the same as those defined for that
15445 instruction's pattern in neon.md.
15446 WARNING: Variants should be listed in the same increasing order as
15447 neon_builtin_type_bits. */
15449 static neon_builtin_datum neon_builtin_data[] =
15451 { VAR10 (BINOP, vadd,
15452 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15453 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
15454 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
15455 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15456 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15457 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
15458 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15459 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15460 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
15461 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15462 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
15463 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
15464 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
15465 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
15466 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
15467 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
15468 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
15469 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
15470 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
15471 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
15472 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
15473 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
15474 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15475 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15476 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15477 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
15478 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
15479 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
15480 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15481 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15482 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15483 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
15484 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15485 { VAR10 (BINOP, vsub,
15486 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15487 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
15488 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
15489 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15490 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15491 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
15492 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15493 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15494 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15495 { VAR2 (BINOP, vcage, v2sf, v4sf) },
15496 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
15497 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15498 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15499 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
15500 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15501 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
15502 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15503 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15504 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
15505 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15506 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15507 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
15508 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
15509 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
15510 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
15511 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15512 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15513 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15514 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15515 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15516 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15517 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15518 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15519 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
15520 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
15521 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
15522 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15523 /* FIXME: vget_lane supports more variants than this! */
15524 { VAR10 (GETLANE, vget_lane,
15525 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15526 { VAR10 (SETLANE, vset_lane,
15527 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15528 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
15529 { VAR10 (DUP, vdup_n,
15530 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15531 { VAR10 (DUPLANE, vdup_lane,
15532 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15533 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
15534 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
15535 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
15536 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
15537 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
15538 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
15539 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
15540 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15541 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15542 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
15543 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
15544 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15545 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
15546 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
15547 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15548 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15549 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
15550 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
15551 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15552 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
15553 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
15554 { VAR10 (BINOP, vext,
15555 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15556 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15557 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
15558 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
15559 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
15560 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
15561 { VAR10 (SELECT, vbsl,
15562 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15563 { VAR1 (VTBL, vtbl1, v8qi) },
15564 { VAR1 (VTBL, vtbl2, v8qi) },
15565 { VAR1 (VTBL, vtbl3, v8qi) },
15566 { VAR1 (VTBL, vtbl4, v8qi) },
15567 { VAR1 (VTBX, vtbx1, v8qi) },
15568 { VAR1 (VTBX, vtbx2, v8qi) },
15569 { VAR1 (VTBX, vtbx3, v8qi) },
15570 { VAR1 (VTBX, vtbx4, v8qi) },
15571 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15572 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15573 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15574 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
15575 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
15576 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
15577 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
15578 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
15579 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
15580 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
15581 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
15582 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
15583 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
15584 { VAR10 (LOAD1, vld1,
15585 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15586 { VAR10 (LOAD1LANE, vld1_lane,
15587 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15588 { VAR10 (LOAD1, vld1_dup,
15589 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15590 { VAR10 (STORE1, vst1,
15591 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15592 { VAR10 (STORE1LANE, vst1_lane,
15593 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15594 { VAR9 (LOADSTRUCT,
15595 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15596 { VAR7 (LOADSTRUCTLANE, vld2_lane,
15597 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15598 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
15599 { VAR9 (STORESTRUCT, vst2,
15600 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15601 { VAR7 (STORESTRUCTLANE, vst2_lane,
15602 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15603 { VAR9 (LOADSTRUCT,
15604 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15605 { VAR7 (LOADSTRUCTLANE, vld3_lane,
15606 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15607 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
15608 { VAR9 (STORESTRUCT, vst3,
15609 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15610 { VAR7 (STORESTRUCTLANE, vst3_lane,
15611 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15612 { VAR9 (LOADSTRUCT, vld4,
15613 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15614 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15615 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15616 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15617 { VAR9 (STORESTRUCT, vst4,
15618 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15619 { VAR7 (STORESTRUCTLANE, vst4_lane,
15620 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15621 { VAR10 (LOGICBINOP, vand,
15622 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15623 { VAR10 (LOGICBINOP, vorr,
15624 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15625 { VAR10 (BINOP, veor,
15626 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15627 { VAR10 (LOGICBINOP, vbic,
15628 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15629 { VAR10 (LOGICBINOP, vorn,
15630 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15633 #undef CF
15634 #undef VAR1
15635 #undef VAR2
15636 #undef VAR3
15637 #undef VAR4
15638 #undef VAR5
15639 #undef VAR6
15640 #undef VAR7
15641 #undef VAR8
15642 #undef VAR9
15643 #undef VAR10
15645 static void
15646 arm_init_neon_builtins (void)
15648 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15650 tree neon_intQI_type_node;
15651 tree neon_intHI_type_node;
15652 tree neon_polyQI_type_node;
15653 tree neon_polyHI_type_node;
15654 tree neon_intSI_type_node;
15655 tree neon_intDI_type_node;
15656 tree neon_float_type_node;
15658 tree intQI_pointer_node;
15659 tree intHI_pointer_node;
15660 tree intSI_pointer_node;
15661 tree intDI_pointer_node;
15662 tree float_pointer_node;
15664 tree const_intQI_node;
15665 tree const_intHI_node;
15666 tree const_intSI_node;
15667 tree const_intDI_node;
15668 tree const_float_node;
15670 tree const_intQI_pointer_node;
15671 tree const_intHI_pointer_node;
15672 tree const_intSI_pointer_node;
15673 tree const_intDI_pointer_node;
15674 tree const_float_pointer_node;
15676 tree V8QI_type_node;
15677 tree V4HI_type_node;
15678 tree V2SI_type_node;
15679 tree V2SF_type_node;
15680 tree V16QI_type_node;
15681 tree V8HI_type_node;
15682 tree V4SI_type_node;
15683 tree V4SF_type_node;
15684 tree V2DI_type_node;
15686 tree intUQI_type_node;
15687 tree intUHI_type_node;
15688 tree intUSI_type_node;
15689 tree intUDI_type_node;
15691 tree intEI_type_node;
15692 tree intOI_type_node;
15693 tree intCI_type_node;
15694 tree intXI_type_node;
15696 tree V8QI_pointer_node;
15697 tree V4HI_pointer_node;
15698 tree V2SI_pointer_node;
15699 tree V2SF_pointer_node;
15700 tree V16QI_pointer_node;
15701 tree V8HI_pointer_node;
15702 tree V4SI_pointer_node;
15703 tree V4SF_pointer_node;
15704 tree V2DI_pointer_node;
15706 tree void_ftype_pv8qi_v8qi_v8qi;
15707 tree void_ftype_pv4hi_v4hi_v4hi;
15708 tree void_ftype_pv2si_v2si_v2si;
15709 tree void_ftype_pv2sf_v2sf_v2sf;
15710 tree void_ftype_pdi_di_di;
15711 tree void_ftype_pv16qi_v16qi_v16qi;
15712 tree void_ftype_pv8hi_v8hi_v8hi;
15713 tree void_ftype_pv4si_v4si_v4si;
15714 tree void_ftype_pv4sf_v4sf_v4sf;
15715 tree void_ftype_pv2di_v2di_v2di;
15717 tree reinterp_ftype_dreg[5][5];
15718 tree reinterp_ftype_qreg[5][5];
15719 tree dreg_types[5], qreg_types[5];
15721 /* Create distinguished type nodes for NEON vector element types,
15722 and pointers to values of such types, so we can detect them later. */
15723 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15724 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15725 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15726 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15727 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15728 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15729 neon_float_type_node = make_node (REAL_TYPE);
15730 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15731 layout_type (neon_float_type_node);
15733 /* Define typedefs which exactly correspond to the modes we are basing vector
15734 types on. If you change these names you'll need to change
15735 the table used by arm_mangle_type too. */
15736 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15737 "__builtin_neon_qi");
15738 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15739 "__builtin_neon_hi");
15740 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15741 "__builtin_neon_si");
15742 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15743 "__builtin_neon_sf");
15744 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15745 "__builtin_neon_di");
15746 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15747 "__builtin_neon_poly8");
15748 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15749 "__builtin_neon_poly16");
15751 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15752 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15753 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15754 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15755 float_pointer_node = build_pointer_type (neon_float_type_node);
15757 /* Next create constant-qualified versions of the above types. */
15758 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15759 TYPE_QUAL_CONST);
15760 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15761 TYPE_QUAL_CONST);
15762 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15763 TYPE_QUAL_CONST);
15764 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15765 TYPE_QUAL_CONST);
15766 const_float_node = build_qualified_type (neon_float_type_node,
15767 TYPE_QUAL_CONST);
15769 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15770 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15771 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15772 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15773 const_float_pointer_node = build_pointer_type (const_float_node);
15775 /* Now create vector types based on our NEON element types. */
15776 /* 64-bit vectors. */
15777 V8QI_type_node =
15778 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15779 V4HI_type_node =
15780 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15781 V2SI_type_node =
15782 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15783 V2SF_type_node =
15784 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15785 /* 128-bit vectors. */
15786 V16QI_type_node =
15787 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15788 V8HI_type_node =
15789 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15790 V4SI_type_node =
15791 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15792 V4SF_type_node =
15793 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15794 V2DI_type_node =
15795 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15797 /* Unsigned integer types for various mode sizes. */
15798 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15799 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15800 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15801 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15803 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15804 "__builtin_neon_uqi");
15805 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15806 "__builtin_neon_uhi");
15807 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15808 "__builtin_neon_usi");
15809 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15810 "__builtin_neon_udi");
15812 /* Opaque integer types for structures of vectors. */
15813 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15814 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15815 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15816 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15818 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15819 "__builtin_neon_ti");
15820 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15821 "__builtin_neon_ei");
15822 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15823 "__builtin_neon_oi");
15824 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15825 "__builtin_neon_ci");
15826 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15827 "__builtin_neon_xi");
15829 /* Pointers to vector types. */
15830 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15831 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15832 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15833 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15834 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15835 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15836 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15837 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15838 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15840 /* Operations which return results as pairs. */
15841 void_ftype_pv8qi_v8qi_v8qi =
15842 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15843 V8QI_type_node, NULL);
15844 void_ftype_pv4hi_v4hi_v4hi =
15845 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15846 V4HI_type_node, NULL);
15847 void_ftype_pv2si_v2si_v2si =
15848 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15849 V2SI_type_node, NULL);
15850 void_ftype_pv2sf_v2sf_v2sf =
15851 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15852 V2SF_type_node, NULL);
15853 void_ftype_pdi_di_di =
15854 build_function_type_list (void_type_node, intDI_pointer_node,
15855 neon_intDI_type_node, neon_intDI_type_node, NULL);
15856 void_ftype_pv16qi_v16qi_v16qi =
15857 build_function_type_list (void_type_node, V16QI_pointer_node,
15858 V16QI_type_node, V16QI_type_node, NULL);
15859 void_ftype_pv8hi_v8hi_v8hi =
15860 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15861 V8HI_type_node, NULL);
15862 void_ftype_pv4si_v4si_v4si =
15863 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15864 V4SI_type_node, NULL);
15865 void_ftype_pv4sf_v4sf_v4sf =
15866 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15867 V4SF_type_node, NULL);
15868 void_ftype_pv2di_v2di_v2di =
15869 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15870 V2DI_type_node, NULL);
15872 dreg_types[0] = V8QI_type_node;
15873 dreg_types[1] = V4HI_type_node;
15874 dreg_types[2] = V2SI_type_node;
15875 dreg_types[3] = V2SF_type_node;
15876 dreg_types[4] = neon_intDI_type_node;
15878 qreg_types[0] = V16QI_type_node;
15879 qreg_types[1] = V8HI_type_node;
15880 qreg_types[2] = V4SI_type_node;
15881 qreg_types[3] = V4SF_type_node;
15882 qreg_types[4] = V2DI_type_node;
15884 for (i = 0; i < 5; i++)
15886 int j;
15887 for (j = 0; j < 5; j++)
15889 reinterp_ftype_dreg[i][j]
15890 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15891 reinterp_ftype_qreg[i][j]
15892 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15896 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15898 neon_builtin_datum *d = &neon_builtin_data[i];
15899 unsigned int j, codeidx = 0;
15901 d->base_fcode = fcode;
15903 for (j = 0; j < T_MAX; j++)
15905 const char* const modenames[] = {
15906 "v8qi", "v4hi", "v2si", "v2sf", "di",
15907 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15909 char namebuf[60];
15910 tree ftype = NULL;
15911 enum insn_code icode;
15912 int is_load = 0, is_store = 0;
15914 if ((d->bits & (1 << j)) == 0)
15915 continue;
15917 icode = d->codes[codeidx++];
15919 switch (d->itype)
15921 case NEON_LOAD1:
15922 case NEON_LOAD1LANE:
15923 case NEON_LOADSTRUCT:
15924 case NEON_LOADSTRUCTLANE:
15925 is_load = 1;
15926 /* Fall through. */
15927 case NEON_STORE1:
15928 case NEON_STORE1LANE:
15929 case NEON_STORESTRUCT:
15930 case NEON_STORESTRUCTLANE:
15931 if (!is_load)
15932 is_store = 1;
15933 /* Fall through. */
15934 case NEON_UNOP:
15935 case NEON_BINOP:
15936 case NEON_LOGICBINOP:
15937 case NEON_SHIFTINSERT:
15938 case NEON_TERNOP:
15939 case NEON_GETLANE:
15940 case NEON_SETLANE:
15941 case NEON_CREATE:
15942 case NEON_DUP:
15943 case NEON_DUPLANE:
15944 case NEON_SHIFTIMM:
15945 case NEON_SHIFTACC:
15946 case NEON_COMBINE:
15947 case NEON_SPLIT:
15948 case NEON_CONVERT:
15949 case NEON_FIXCONV:
15950 case NEON_LANEMUL:
15951 case NEON_LANEMULL:
15952 case NEON_LANEMULH:
15953 case NEON_LANEMAC:
15954 case NEON_SCALARMUL:
15955 case NEON_SCALARMULL:
15956 case NEON_SCALARMULH:
15957 case NEON_SCALARMAC:
15958 case NEON_SELECT:
15959 case NEON_VTBL:
15960 case NEON_VTBX:
15962 int k;
15963 tree return_type = void_type_node, args = void_list_node;
15965 /* Build a function type directly from the insn_data for this
15966 builtin. The build_function_type() function takes care of
15967 removing duplicates for us. */
15968 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15970 tree eltype;
15972 if (is_load && k == 1)
15974 /* Neon load patterns always have the memory operand
15975 (a SImode pointer) in the operand 1 position. We
15976 want a const pointer to the element type in that
15977 position. */
15978 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15980 switch (1 << j)
15982 case T_V8QI:
15983 case T_V16QI:
15984 eltype = const_intQI_pointer_node;
15985 break;
15987 case T_V4HI:
15988 case T_V8HI:
15989 eltype = const_intHI_pointer_node;
15990 break;
15992 case T_V2SI:
15993 case T_V4SI:
15994 eltype = const_intSI_pointer_node;
15995 break;
15997 case T_V2SF:
15998 case T_V4SF:
15999 eltype = const_float_pointer_node;
16000 break;
16002 case T_DI:
16003 case T_V2DI:
16004 eltype = const_intDI_pointer_node;
16005 break;
16007 default: gcc_unreachable ();
16010 else if (is_store && k == 0)
16012 /* Similarly, Neon store patterns use operand 0 as
16013 the memory location to store to (a SImode pointer).
16014 Use a pointer to the element type of the store in
16015 that position. */
16016 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16018 switch (1 << j)
16020 case T_V8QI:
16021 case T_V16QI:
16022 eltype = intQI_pointer_node;
16023 break;
16025 case T_V4HI:
16026 case T_V8HI:
16027 eltype = intHI_pointer_node;
16028 break;
16030 case T_V2SI:
16031 case T_V4SI:
16032 eltype = intSI_pointer_node;
16033 break;
16035 case T_V2SF:
16036 case T_V4SF:
16037 eltype = float_pointer_node;
16038 break;
16040 case T_DI:
16041 case T_V2DI:
16042 eltype = intDI_pointer_node;
16043 break;
16045 default: gcc_unreachable ();
16048 else
16050 switch (insn_data[icode].operand[k].mode)
16052 case VOIDmode: eltype = void_type_node; break;
16053 /* Scalars. */
16054 case QImode: eltype = neon_intQI_type_node; break;
16055 case HImode: eltype = neon_intHI_type_node; break;
16056 case SImode: eltype = neon_intSI_type_node; break;
16057 case SFmode: eltype = neon_float_type_node; break;
16058 case DImode: eltype = neon_intDI_type_node; break;
16059 case TImode: eltype = intTI_type_node; break;
16060 case EImode: eltype = intEI_type_node; break;
16061 case OImode: eltype = intOI_type_node; break;
16062 case CImode: eltype = intCI_type_node; break;
16063 case XImode: eltype = intXI_type_node; break;
16064 /* 64-bit vectors. */
16065 case V8QImode: eltype = V8QI_type_node; break;
16066 case V4HImode: eltype = V4HI_type_node; break;
16067 case V2SImode: eltype = V2SI_type_node; break;
16068 case V2SFmode: eltype = V2SF_type_node; break;
16069 /* 128-bit vectors. */
16070 case V16QImode: eltype = V16QI_type_node; break;
16071 case V8HImode: eltype = V8HI_type_node; break;
16072 case V4SImode: eltype = V4SI_type_node; break;
16073 case V4SFmode: eltype = V4SF_type_node; break;
16074 case V2DImode: eltype = V2DI_type_node; break;
16075 default: gcc_unreachable ();
16079 if (k == 0 && !is_store)
16080 return_type = eltype;
16081 else
16082 args = tree_cons (NULL_TREE, eltype, args);
16085 ftype = build_function_type (return_type, args);
16087 break;
16089 case NEON_RESULTPAIR:
16091 switch (insn_data[icode].operand[1].mode)
16093 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
16094 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
16095 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
16096 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
16097 case DImode: ftype = void_ftype_pdi_di_di; break;
16098 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
16099 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
16100 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
16101 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
16102 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
16103 default: gcc_unreachable ();
16106 break;
16108 case NEON_REINTERP:
16110 /* We iterate over 5 doubleword types, then 5 quadword
16111 types. */
16112 int rhs = j % 5;
16113 switch (insn_data[icode].operand[0].mode)
16115 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
16116 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
16117 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
16118 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
16119 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
16120 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
16121 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
16122 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
16123 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
16124 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
16125 default: gcc_unreachable ();
16128 break;
16130 default:
16131 gcc_unreachable ();
16134 gcc_assert (ftype != NULL);
16136 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
16138 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
16139 NULL_TREE);
16144 static void
16145 arm_init_builtins (void)
16147 arm_init_tls_builtins ();
16149 if (TARGET_REALLY_IWMMXT)
16150 arm_init_iwmmxt_builtins ();
16152 if (TARGET_NEON)
16153 arm_init_neon_builtins ();
16156 /* Errors in the source file can cause expand_expr to return const0_rtx
16157 where we expect a vector. To avoid crashing, use one of the vector
16158 clear instructions. */
16160 static rtx
16161 safe_vector_operand (rtx x, enum machine_mode mode)
16163 if (x != const0_rtx)
16164 return x;
16165 x = gen_reg_rtx (mode);
16167 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
16168 : gen_rtx_SUBREG (DImode, x, 0)));
16169 return x;
16172 /* Subroutine of arm_expand_builtin to take care of binop insns. */
16174 static rtx
16175 arm_expand_binop_builtin (enum insn_code icode,
16176 tree exp, rtx target)
16178 rtx pat;
16179 tree arg0 = CALL_EXPR_ARG (exp, 0);
16180 tree arg1 = CALL_EXPR_ARG (exp, 1);
16181 rtx op0 = expand_normal (arg0);
16182 rtx op1 = expand_normal (arg1);
16183 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16184 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16185 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16187 if (VECTOR_MODE_P (mode0))
16188 op0 = safe_vector_operand (op0, mode0);
16189 if (VECTOR_MODE_P (mode1))
16190 op1 = safe_vector_operand (op1, mode1);
16192 if (! target
16193 || GET_MODE (target) != tmode
16194 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16195 target = gen_reg_rtx (tmode);
16197 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
16199 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16200 op0 = copy_to_mode_reg (mode0, op0);
16201 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16202 op1 = copy_to_mode_reg (mode1, op1);
16204 pat = GEN_FCN (icode) (target, op0, op1);
16205 if (! pat)
16206 return 0;
16207 emit_insn (pat);
16208 return target;
16211 /* Subroutine of arm_expand_builtin to take care of unop insns. */
16213 static rtx
16214 arm_expand_unop_builtin (enum insn_code icode,
16215 tree exp, rtx target, int do_load)
16217 rtx pat;
16218 tree arg0 = CALL_EXPR_ARG (exp, 0);
16219 rtx op0 = expand_normal (arg0);
16220 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16221 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16223 if (! target
16224 || GET_MODE (target) != tmode
16225 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16226 target = gen_reg_rtx (tmode);
16227 if (do_load)
16228 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16229 else
16231 if (VECTOR_MODE_P (mode0))
16232 op0 = safe_vector_operand (op0, mode0);
16234 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16235 op0 = copy_to_mode_reg (mode0, op0);
16238 pat = GEN_FCN (icode) (target, op0);
16239 if (! pat)
16240 return 0;
16241 emit_insn (pat);
16242 return target;
16245 static int
16246 neon_builtin_compare (const void *a, const void *b)
16248 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
16249 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
16250 unsigned int soughtcode = key->base_fcode;
16252 if (soughtcode >= memb->base_fcode
16253 && soughtcode < memb->base_fcode + memb->num_vars)
16254 return 0;
16255 else if (soughtcode < memb->base_fcode)
16256 return -1;
16257 else
16258 return 1;
16261 static enum insn_code
16262 locate_neon_builtin_icode (int fcode, neon_itype *itype)
16264 neon_builtin_datum key, *found;
16265 int idx;
16267 key.base_fcode = fcode;
16268 found = (neon_builtin_datum *)
16269 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
16270 sizeof (neon_builtin_data[0]), neon_builtin_compare);
16271 gcc_assert (found);
16272 idx = fcode - (int) found->base_fcode;
16273 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
16275 if (itype)
16276 *itype = found->itype;
16278 return found->codes[idx];
16281 typedef enum {
16282 NEON_ARG_COPY_TO_REG,
16283 NEON_ARG_CONSTANT,
16284 NEON_ARG_STOP
16285 } builtin_arg;
16287 #define NEON_MAX_BUILTIN_ARGS 5
16289 /* Expand a Neon builtin. */
16290 static rtx
16291 arm_expand_neon_args (rtx target, int icode, int have_retval,
16292 tree exp, ...)
16294 va_list ap;
16295 rtx pat;
16296 tree arg[NEON_MAX_BUILTIN_ARGS];
16297 rtx op[NEON_MAX_BUILTIN_ARGS];
16298 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16299 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
16300 int argc = 0;
16302 if (have_retval
16303 && (!target
16304 || GET_MODE (target) != tmode
16305 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
16306 target = gen_reg_rtx (tmode);
16308 va_start (ap, exp);
16310 for (;;)
16312 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
16314 if (thisarg == NEON_ARG_STOP)
16315 break;
16316 else
16318 arg[argc] = CALL_EXPR_ARG (exp, argc);
16319 op[argc] = expand_normal (arg[argc]);
16320 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
16322 switch (thisarg)
16324 case NEON_ARG_COPY_TO_REG:
16325 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
16326 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16327 (op[argc], mode[argc]))
16328 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
16329 break;
16331 case NEON_ARG_CONSTANT:
16332 /* FIXME: This error message is somewhat unhelpful. */
16333 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16334 (op[argc], mode[argc]))
16335 error ("argument must be a constant");
16336 break;
16338 case NEON_ARG_STOP:
16339 gcc_unreachable ();
16342 argc++;
16346 va_end (ap);
16348 if (have_retval)
16349 switch (argc)
16351 case 1:
16352 pat = GEN_FCN (icode) (target, op[0]);
16353 break;
16355 case 2:
16356 pat = GEN_FCN (icode) (target, op[0], op[1]);
16357 break;
16359 case 3:
16360 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
16361 break;
16363 case 4:
16364 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
16365 break;
16367 case 5:
16368 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
16369 break;
16371 default:
16372 gcc_unreachable ();
16374 else
16375 switch (argc)
16377 case 1:
16378 pat = GEN_FCN (icode) (op[0]);
16379 break;
16381 case 2:
16382 pat = GEN_FCN (icode) (op[0], op[1]);
16383 break;
16385 case 3:
16386 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
16387 break;
16389 case 4:
16390 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
16391 break;
16393 case 5:
16394 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
16395 break;
16397 default:
16398 gcc_unreachable ();
16401 if (!pat)
16402 return 0;
16404 emit_insn (pat);
16406 return target;
16409 /* Expand a Neon builtin. These are "special" because they don't have symbolic
16410 constants defined per-instruction or per instruction-variant. Instead, the
16411 required info is looked up in the table neon_builtin_data. */
16412 static rtx
16413 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
16415 neon_itype itype;
16416 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
16418 switch (itype)
16420 case NEON_UNOP:
16421 case NEON_CONVERT:
16422 case NEON_DUPLANE:
16423 return arm_expand_neon_args (target, icode, 1, exp,
16424 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16426 case NEON_BINOP:
16427 case NEON_SETLANE:
16428 case NEON_SCALARMUL:
16429 case NEON_SCALARMULL:
16430 case NEON_SCALARMULH:
16431 case NEON_SHIFTINSERT:
16432 case NEON_LOGICBINOP:
16433 return arm_expand_neon_args (target, icode, 1, exp,
16434 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16435 NEON_ARG_STOP);
16437 case NEON_TERNOP:
16438 return arm_expand_neon_args (target, icode, 1, exp,
16439 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16440 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16442 case NEON_GETLANE:
16443 case NEON_FIXCONV:
16444 case NEON_SHIFTIMM:
16445 return arm_expand_neon_args (target, icode, 1, exp,
16446 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
16447 NEON_ARG_STOP);
16449 case NEON_CREATE:
16450 return arm_expand_neon_args (target, icode, 1, exp,
16451 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16453 case NEON_DUP:
16454 case NEON_SPLIT:
16455 case NEON_REINTERP:
16456 return arm_expand_neon_args (target, icode, 1, exp,
16457 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16459 case NEON_COMBINE:
16460 case NEON_VTBL:
16461 return arm_expand_neon_args (target, icode, 1, exp,
16462 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16464 case NEON_RESULTPAIR:
16465 return arm_expand_neon_args (target, icode, 0, exp,
16466 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16467 NEON_ARG_STOP);
16469 case NEON_LANEMUL:
16470 case NEON_LANEMULL:
16471 case NEON_LANEMULH:
16472 return arm_expand_neon_args (target, icode, 1, exp,
16473 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16474 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16476 case NEON_LANEMAC:
16477 return arm_expand_neon_args (target, icode, 1, exp,
16478 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16479 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16481 case NEON_SHIFTACC:
16482 return arm_expand_neon_args (target, icode, 1, exp,
16483 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16484 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16486 case NEON_SCALARMAC:
16487 return arm_expand_neon_args (target, icode, 1, exp,
16488 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16489 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16491 case NEON_SELECT:
16492 case NEON_VTBX:
16493 return arm_expand_neon_args (target, icode, 1, exp,
16494 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16495 NEON_ARG_STOP);
16497 case NEON_LOAD1:
16498 case NEON_LOADSTRUCT:
16499 return arm_expand_neon_args (target, icode, 1, exp,
16500 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16502 case NEON_LOAD1LANE:
16503 case NEON_LOADSTRUCTLANE:
16504 return arm_expand_neon_args (target, icode, 1, exp,
16505 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16506 NEON_ARG_STOP);
16508 case NEON_STORE1:
16509 case NEON_STORESTRUCT:
16510 return arm_expand_neon_args (target, icode, 0, exp,
16511 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16513 case NEON_STORE1LANE:
16514 case NEON_STORESTRUCTLANE:
16515 return arm_expand_neon_args (target, icode, 0, exp,
16516 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16517 NEON_ARG_STOP);
16520 gcc_unreachable ();
16523 /* Emit code to reinterpret one Neon type as another, without altering bits. */
16524 void
16525 neon_reinterpret (rtx dest, rtx src)
16527 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
16530 /* Emit code to place a Neon pair result in memory locations (with equal
16531 registers). */
16532 void
16533 neon_emit_pair_result_insn (enum machine_mode mode,
16534 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
16535 rtx op1, rtx op2)
16537 rtx mem = gen_rtx_MEM (mode, destaddr);
16538 rtx tmp1 = gen_reg_rtx (mode);
16539 rtx tmp2 = gen_reg_rtx (mode);
16541 emit_insn (intfn (tmp1, op1, tmp2, op2));
16543 emit_move_insn (mem, tmp1);
16544 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
16545 emit_move_insn (mem, tmp2);
16548 /* Set up operands for a register copy from src to dest, taking care not to
16549 clobber registers in the process.
16550 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
16551 be called with a large N, so that should be OK. */
16553 void
16554 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
16556 unsigned int copied = 0, opctr = 0;
16557 unsigned int done = (1 << count) - 1;
16558 unsigned int i, j;
16560 while (copied != done)
16562 for (i = 0; i < count; i++)
16564 int good = 1;
16566 for (j = 0; good && j < count; j++)
16567 if (i != j && (copied & (1 << j)) == 0
16568 && reg_overlap_mentioned_p (src[j], dest[i]))
16569 good = 0;
16571 if (good)
16573 operands[opctr++] = dest[i];
16574 operands[opctr++] = src[i];
16575 copied |= 1 << i;
16580 gcc_assert (opctr == count * 2);
16583 /* Expand an expression EXP that calls a built-in function,
16584 with result going to TARGET if that's convenient
16585 (and in mode MODE if that's convenient).
16586 SUBTARGET may be used as the target for computing one of EXP's operands.
16587 IGNORE is nonzero if the value is to be ignored. */
16589 static rtx
16590 arm_expand_builtin (tree exp,
16591 rtx target,
16592 rtx subtarget ATTRIBUTE_UNUSED,
16593 enum machine_mode mode ATTRIBUTE_UNUSED,
16594 int ignore ATTRIBUTE_UNUSED)
16596 const struct builtin_description * d;
16597 enum insn_code icode;
16598 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16599 tree arg0;
16600 tree arg1;
16601 tree arg2;
16602 rtx op0;
16603 rtx op1;
16604 rtx op2;
16605 rtx pat;
16606 int fcode = DECL_FUNCTION_CODE (fndecl);
16607 size_t i;
16608 enum machine_mode tmode;
16609 enum machine_mode mode0;
16610 enum machine_mode mode1;
16611 enum machine_mode mode2;
16613 if (fcode >= ARM_BUILTIN_NEON_BASE)
16614 return arm_expand_neon_builtin (fcode, exp, target);
16616 switch (fcode)
16618 case ARM_BUILTIN_TEXTRMSB:
16619 case ARM_BUILTIN_TEXTRMUB:
16620 case ARM_BUILTIN_TEXTRMSH:
16621 case ARM_BUILTIN_TEXTRMUH:
16622 case ARM_BUILTIN_TEXTRMSW:
16623 case ARM_BUILTIN_TEXTRMUW:
16624 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16625 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16626 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16627 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16628 : CODE_FOR_iwmmxt_textrmw);
16630 arg0 = CALL_EXPR_ARG (exp, 0);
16631 arg1 = CALL_EXPR_ARG (exp, 1);
16632 op0 = expand_normal (arg0);
16633 op1 = expand_normal (arg1);
16634 tmode = insn_data[icode].operand[0].mode;
16635 mode0 = insn_data[icode].operand[1].mode;
16636 mode1 = insn_data[icode].operand[2].mode;
16638 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16639 op0 = copy_to_mode_reg (mode0, op0);
16640 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16642 /* @@@ better error message */
16643 error ("selector must be an immediate");
16644 return gen_reg_rtx (tmode);
16646 if (target == 0
16647 || GET_MODE (target) != tmode
16648 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16649 target = gen_reg_rtx (tmode);
16650 pat = GEN_FCN (icode) (target, op0, op1);
16651 if (! pat)
16652 return 0;
16653 emit_insn (pat);
16654 return target;
16656 case ARM_BUILTIN_TINSRB:
16657 case ARM_BUILTIN_TINSRH:
16658 case ARM_BUILTIN_TINSRW:
16659 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16660 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16661 : CODE_FOR_iwmmxt_tinsrw);
16662 arg0 = CALL_EXPR_ARG (exp, 0);
16663 arg1 = CALL_EXPR_ARG (exp, 1);
16664 arg2 = CALL_EXPR_ARG (exp, 2);
16665 op0 = expand_normal (arg0);
16666 op1 = expand_normal (arg1);
16667 op2 = expand_normal (arg2);
16668 tmode = insn_data[icode].operand[0].mode;
16669 mode0 = insn_data[icode].operand[1].mode;
16670 mode1 = insn_data[icode].operand[2].mode;
16671 mode2 = insn_data[icode].operand[3].mode;
16673 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16674 op0 = copy_to_mode_reg (mode0, op0);
16675 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16676 op1 = copy_to_mode_reg (mode1, op1);
16677 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16679 /* @@@ better error message */
16680 error ("selector must be an immediate");
16681 return const0_rtx;
16683 if (target == 0
16684 || GET_MODE (target) != tmode
16685 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16686 target = gen_reg_rtx (tmode);
16687 pat = GEN_FCN (icode) (target, op0, op1, op2);
16688 if (! pat)
16689 return 0;
16690 emit_insn (pat);
16691 return target;
16693 case ARM_BUILTIN_SETWCX:
16694 arg0 = CALL_EXPR_ARG (exp, 0);
16695 arg1 = CALL_EXPR_ARG (exp, 1);
16696 op0 = force_reg (SImode, expand_normal (arg0));
16697 op1 = expand_normal (arg1);
16698 emit_insn (gen_iwmmxt_tmcr (op1, op0));
16699 return 0;
16701 case ARM_BUILTIN_GETWCX:
16702 arg0 = CALL_EXPR_ARG (exp, 0);
16703 op0 = expand_normal (arg0);
16704 target = gen_reg_rtx (SImode);
16705 emit_insn (gen_iwmmxt_tmrc (target, op0));
16706 return target;
16708 case ARM_BUILTIN_WSHUFH:
16709 icode = CODE_FOR_iwmmxt_wshufh;
16710 arg0 = CALL_EXPR_ARG (exp, 0);
16711 arg1 = CALL_EXPR_ARG (exp, 1);
16712 op0 = expand_normal (arg0);
16713 op1 = expand_normal (arg1);
16714 tmode = insn_data[icode].operand[0].mode;
16715 mode1 = insn_data[icode].operand[1].mode;
16716 mode2 = insn_data[icode].operand[2].mode;
16718 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16719 op0 = copy_to_mode_reg (mode1, op0);
16720 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16722 /* @@@ better error message */
16723 error ("mask must be an immediate");
16724 return const0_rtx;
16726 if (target == 0
16727 || GET_MODE (target) != tmode
16728 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16729 target = gen_reg_rtx (tmode);
16730 pat = GEN_FCN (icode) (target, op0, op1);
16731 if (! pat)
16732 return 0;
16733 emit_insn (pat);
16734 return target;
16736 case ARM_BUILTIN_WSADB:
16737 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
16738 case ARM_BUILTIN_WSADH:
16739 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
16740 case ARM_BUILTIN_WSADBZ:
16741 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
16742 case ARM_BUILTIN_WSADHZ:
16743 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
16745 /* Several three-argument builtins. */
16746 case ARM_BUILTIN_WMACS:
16747 case ARM_BUILTIN_WMACU:
16748 case ARM_BUILTIN_WALIGN:
16749 case ARM_BUILTIN_TMIA:
16750 case ARM_BUILTIN_TMIAPH:
16751 case ARM_BUILTIN_TMIATT:
16752 case ARM_BUILTIN_TMIATB:
16753 case ARM_BUILTIN_TMIABT:
16754 case ARM_BUILTIN_TMIABB:
16755 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16756 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16757 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16758 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16759 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16760 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16761 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16762 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16763 : CODE_FOR_iwmmxt_walign);
16764 arg0 = CALL_EXPR_ARG (exp, 0);
16765 arg1 = CALL_EXPR_ARG (exp, 1);
16766 arg2 = CALL_EXPR_ARG (exp, 2);
16767 op0 = expand_normal (arg0);
16768 op1 = expand_normal (arg1);
16769 op2 = expand_normal (arg2);
16770 tmode = insn_data[icode].operand[0].mode;
16771 mode0 = insn_data[icode].operand[1].mode;
16772 mode1 = insn_data[icode].operand[2].mode;
16773 mode2 = insn_data[icode].operand[3].mode;
16775 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16776 op0 = copy_to_mode_reg (mode0, op0);
16777 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16778 op1 = copy_to_mode_reg (mode1, op1);
16779 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16780 op2 = copy_to_mode_reg (mode2, op2);
16781 if (target == 0
16782 || GET_MODE (target) != tmode
16783 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16784 target = gen_reg_rtx (tmode);
16785 pat = GEN_FCN (icode) (target, op0, op1, op2);
16786 if (! pat)
16787 return 0;
16788 emit_insn (pat);
16789 return target;
16791 case ARM_BUILTIN_WZERO:
16792 target = gen_reg_rtx (DImode);
16793 emit_insn (gen_iwmmxt_clrdi (target));
16794 return target;
16796 case ARM_BUILTIN_THREAD_POINTER:
16797 return arm_load_tp (target);
16799 default:
16800 break;
16803 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16804 if (d->code == (const enum arm_builtins) fcode)
16805 return arm_expand_binop_builtin (d->icode, exp, target);
16807 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16808 if (d->code == (const enum arm_builtins) fcode)
16809 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16811 /* @@@ Should really do something sensible here. */
16812 return NULL_RTX;
16815 /* Return the number (counting from 0) of
16816 the least significant set bit in MASK. */
16818 inline static int
16819 number_of_first_bit_set (unsigned mask)
16821 int bit;
16823 for (bit = 0;
16824 (mask & (1 << bit)) == 0;
16825 ++bit)
16826 continue;
16828 return bit;
16831 /* Emit code to push or pop registers to or from the stack. F is the
16832 assembly file. MASK is the registers to push or pop. PUSH is
16833 nonzero if we should push, and zero if we should pop. For debugging
16834 output, if pushing, adjust CFA_OFFSET by the amount of space added
16835 to the stack. REAL_REGS should have the same number of bits set as
16836 MASK, and will be used instead (in the same order) to describe which
16837 registers were saved - this is used to mark the save slots when we
16838 push high registers after moving them to low registers. */
16839 static void
16840 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16841 unsigned long real_regs)
16843 int regno;
16844 int lo_mask = mask & 0xFF;
16845 int pushed_words = 0;
16847 gcc_assert (mask);
16849 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16851 /* Special case. Do not generate a POP PC statement here, do it in
16852 thumb_exit() */
16853 thumb_exit (f, -1);
16854 return;
16857 if (ARM_EABI_UNWIND_TABLES && push)
16859 fprintf (f, "\t.save\t{");
16860 for (regno = 0; regno < 15; regno++)
16862 if (real_regs & (1 << regno))
16864 if (real_regs & ((1 << regno) -1))
16865 fprintf (f, ", ");
16866 asm_fprintf (f, "%r", regno);
16869 fprintf (f, "}\n");
16872 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16874 /* Look at the low registers first. */
16875 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16877 if (lo_mask & 1)
16879 asm_fprintf (f, "%r", regno);
16881 if ((lo_mask & ~1) != 0)
16882 fprintf (f, ", ");
16884 pushed_words++;
16888 if (push && (mask & (1 << LR_REGNUM)))
16890 /* Catch pushing the LR. */
16891 if (mask & 0xFF)
16892 fprintf (f, ", ");
16894 asm_fprintf (f, "%r", LR_REGNUM);
16896 pushed_words++;
16898 else if (!push && (mask & (1 << PC_REGNUM)))
16900 /* Catch popping the PC. */
16901 if (TARGET_INTERWORK || TARGET_BACKTRACE
16902 || crtl->calls_eh_return)
16904 /* The PC is never poped directly, instead
16905 it is popped into r3 and then BX is used. */
16906 fprintf (f, "}\n");
16908 thumb_exit (f, -1);
16910 return;
16912 else
16914 if (mask & 0xFF)
16915 fprintf (f, ", ");
16917 asm_fprintf (f, "%r", PC_REGNUM);
16921 fprintf (f, "}\n");
16923 if (push && pushed_words && dwarf2out_do_frame ())
16925 char *l = dwarf2out_cfi_label ();
16926 int pushed_mask = real_regs;
16928 *cfa_offset += pushed_words * 4;
16929 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16931 pushed_words = 0;
16932 pushed_mask = real_regs;
16933 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16935 if (pushed_mask & 1)
16936 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16941 /* Generate code to return from a thumb function.
16942 If 'reg_containing_return_addr' is -1, then the return address is
16943 actually on the stack, at the stack pointer. */
16944 static void
16945 thumb_exit (FILE *f, int reg_containing_return_addr)
16947 unsigned regs_available_for_popping;
16948 unsigned regs_to_pop;
16949 int pops_needed;
16950 unsigned available;
16951 unsigned required;
16952 int mode;
16953 int size;
16954 int restore_a4 = FALSE;
16956 /* Compute the registers we need to pop. */
16957 regs_to_pop = 0;
16958 pops_needed = 0;
16960 if (reg_containing_return_addr == -1)
16962 regs_to_pop |= 1 << LR_REGNUM;
16963 ++pops_needed;
16966 if (TARGET_BACKTRACE)
16968 /* Restore the (ARM) frame pointer and stack pointer. */
16969 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16970 pops_needed += 2;
16973 /* If there is nothing to pop then just emit the BX instruction and
16974 return. */
16975 if (pops_needed == 0)
16977 if (crtl->calls_eh_return)
16978 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16980 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16981 return;
16983 /* Otherwise if we are not supporting interworking and we have not created
16984 a backtrace structure and the function was not entered in ARM mode then
16985 just pop the return address straight into the PC. */
16986 else if (!TARGET_INTERWORK
16987 && !TARGET_BACKTRACE
16988 && !is_called_in_ARM_mode (current_function_decl)
16989 && !crtl->calls_eh_return)
16991 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16992 return;
16995 /* Find out how many of the (return) argument registers we can corrupt. */
16996 regs_available_for_popping = 0;
16998 /* If returning via __builtin_eh_return, the bottom three registers
16999 all contain information needed for the return. */
17000 if (crtl->calls_eh_return)
17001 size = 12;
17002 else
17004 /* If we can deduce the registers used from the function's
17005 return value. This is more reliable that examining
17006 df_regs_ever_live_p () because that will be set if the register is
17007 ever used in the function, not just if the register is used
17008 to hold a return value. */
17010 if (crtl->return_rtx != 0)
17011 mode = GET_MODE (crtl->return_rtx);
17012 else
17013 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17015 size = GET_MODE_SIZE (mode);
17017 if (size == 0)
17019 /* In a void function we can use any argument register.
17020 In a function that returns a structure on the stack
17021 we can use the second and third argument registers. */
17022 if (mode == VOIDmode)
17023 regs_available_for_popping =
17024 (1 << ARG_REGISTER (1))
17025 | (1 << ARG_REGISTER (2))
17026 | (1 << ARG_REGISTER (3));
17027 else
17028 regs_available_for_popping =
17029 (1 << ARG_REGISTER (2))
17030 | (1 << ARG_REGISTER (3));
17032 else if (size <= 4)
17033 regs_available_for_popping =
17034 (1 << ARG_REGISTER (2))
17035 | (1 << ARG_REGISTER (3));
17036 else if (size <= 8)
17037 regs_available_for_popping =
17038 (1 << ARG_REGISTER (3));
17041 /* Match registers to be popped with registers into which we pop them. */
17042 for (available = regs_available_for_popping,
17043 required = regs_to_pop;
17044 required != 0 && available != 0;
17045 available &= ~(available & - available),
17046 required &= ~(required & - required))
17047 -- pops_needed;
17049 /* If we have any popping registers left over, remove them. */
17050 if (available > 0)
17051 regs_available_for_popping &= ~available;
17053 /* Otherwise if we need another popping register we can use
17054 the fourth argument register. */
17055 else if (pops_needed)
17057 /* If we have not found any free argument registers and
17058 reg a4 contains the return address, we must move it. */
17059 if (regs_available_for_popping == 0
17060 && reg_containing_return_addr == LAST_ARG_REGNUM)
17062 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17063 reg_containing_return_addr = LR_REGNUM;
17065 else if (size > 12)
17067 /* Register a4 is being used to hold part of the return value,
17068 but we have dire need of a free, low register. */
17069 restore_a4 = TRUE;
17071 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
17074 if (reg_containing_return_addr != LAST_ARG_REGNUM)
17076 /* The fourth argument register is available. */
17077 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
17079 --pops_needed;
17083 /* Pop as many registers as we can. */
17084 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17085 regs_available_for_popping);
17087 /* Process the registers we popped. */
17088 if (reg_containing_return_addr == -1)
17090 /* The return address was popped into the lowest numbered register. */
17091 regs_to_pop &= ~(1 << LR_REGNUM);
17093 reg_containing_return_addr =
17094 number_of_first_bit_set (regs_available_for_popping);
17096 /* Remove this register for the mask of available registers, so that
17097 the return address will not be corrupted by further pops. */
17098 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
17101 /* If we popped other registers then handle them here. */
17102 if (regs_available_for_popping)
17104 int frame_pointer;
17106 /* Work out which register currently contains the frame pointer. */
17107 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
17109 /* Move it into the correct place. */
17110 asm_fprintf (f, "\tmov\t%r, %r\n",
17111 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
17113 /* (Temporarily) remove it from the mask of popped registers. */
17114 regs_available_for_popping &= ~(1 << frame_pointer);
17115 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
17117 if (regs_available_for_popping)
17119 int stack_pointer;
17121 /* We popped the stack pointer as well,
17122 find the register that contains it. */
17123 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
17125 /* Move it into the stack register. */
17126 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
17128 /* At this point we have popped all necessary registers, so
17129 do not worry about restoring regs_available_for_popping
17130 to its correct value:
17132 assert (pops_needed == 0)
17133 assert (regs_available_for_popping == (1 << frame_pointer))
17134 assert (regs_to_pop == (1 << STACK_POINTER)) */
17136 else
17138 /* Since we have just move the popped value into the frame
17139 pointer, the popping register is available for reuse, and
17140 we know that we still have the stack pointer left to pop. */
17141 regs_available_for_popping |= (1 << frame_pointer);
17145 /* If we still have registers left on the stack, but we no longer have
17146 any registers into which we can pop them, then we must move the return
17147 address into the link register and make available the register that
17148 contained it. */
17149 if (regs_available_for_popping == 0 && pops_needed > 0)
17151 regs_available_for_popping |= 1 << reg_containing_return_addr;
17153 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
17154 reg_containing_return_addr);
17156 reg_containing_return_addr = LR_REGNUM;
17159 /* If we have registers left on the stack then pop some more.
17160 We know that at most we will want to pop FP and SP. */
17161 if (pops_needed > 0)
17163 int popped_into;
17164 int move_to;
17166 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17167 regs_available_for_popping);
17169 /* We have popped either FP or SP.
17170 Move whichever one it is into the correct register. */
17171 popped_into = number_of_first_bit_set (regs_available_for_popping);
17172 move_to = number_of_first_bit_set (regs_to_pop);
17174 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
17176 regs_to_pop &= ~(1 << move_to);
17178 --pops_needed;
17181 /* If we still have not popped everything then we must have only
17182 had one register available to us and we are now popping the SP. */
17183 if (pops_needed > 0)
17185 int popped_into;
17187 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17188 regs_available_for_popping);
17190 popped_into = number_of_first_bit_set (regs_available_for_popping);
17192 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
17194 assert (regs_to_pop == (1 << STACK_POINTER))
17195 assert (pops_needed == 1)
17199 /* If necessary restore the a4 register. */
17200 if (restore_a4)
17202 if (reg_containing_return_addr != LR_REGNUM)
17204 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17205 reg_containing_return_addr = LR_REGNUM;
17208 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
17211 if (crtl->calls_eh_return)
17212 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17214 /* Return to caller. */
17215 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17219 void
17220 thumb1_final_prescan_insn (rtx insn)
17222 if (flag_print_asm_name)
17223 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
17224 INSN_ADDRESSES (INSN_UID (insn)));
17228 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
17230 unsigned HOST_WIDE_INT mask = 0xff;
17231 int i;
17233 if (val == 0) /* XXX */
17234 return 0;
17236 for (i = 0; i < 25; i++)
17237 if ((val & (mask << i)) == val)
17238 return 1;
17240 return 0;
17243 /* Returns nonzero if the current function contains,
17244 or might contain a far jump. */
17245 static int
17246 thumb_far_jump_used_p (void)
17248 rtx insn;
17250 /* This test is only important for leaf functions. */
17251 /* assert (!leaf_function_p ()); */
17253 /* If we have already decided that far jumps may be used,
17254 do not bother checking again, and always return true even if
17255 it turns out that they are not being used. Once we have made
17256 the decision that far jumps are present (and that hence the link
17257 register will be pushed onto the stack) we cannot go back on it. */
17258 if (cfun->machine->far_jump_used)
17259 return 1;
17261 /* If this function is not being called from the prologue/epilogue
17262 generation code then it must be being called from the
17263 INITIAL_ELIMINATION_OFFSET macro. */
17264 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
17266 /* In this case we know that we are being asked about the elimination
17267 of the arg pointer register. If that register is not being used,
17268 then there are no arguments on the stack, and we do not have to
17269 worry that a far jump might force the prologue to push the link
17270 register, changing the stack offsets. In this case we can just
17271 return false, since the presence of far jumps in the function will
17272 not affect stack offsets.
17274 If the arg pointer is live (or if it was live, but has now been
17275 eliminated and so set to dead) then we do have to test to see if
17276 the function might contain a far jump. This test can lead to some
17277 false negatives, since before reload is completed, then length of
17278 branch instructions is not known, so gcc defaults to returning their
17279 longest length, which in turn sets the far jump attribute to true.
17281 A false negative will not result in bad code being generated, but it
17282 will result in a needless push and pop of the link register. We
17283 hope that this does not occur too often.
17285 If we need doubleword stack alignment this could affect the other
17286 elimination offsets so we can't risk getting it wrong. */
17287 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
17288 cfun->machine->arg_pointer_live = 1;
17289 else if (!cfun->machine->arg_pointer_live)
17290 return 0;
17293 /* Check to see if the function contains a branch
17294 insn with the far jump attribute set. */
17295 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17297 if (GET_CODE (insn) == JUMP_INSN
17298 /* Ignore tablejump patterns. */
17299 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17300 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
17301 && get_attr_far_jump (insn) == FAR_JUMP_YES
17304 /* Record the fact that we have decided that
17305 the function does use far jumps. */
17306 cfun->machine->far_jump_used = 1;
17307 return 1;
17311 return 0;
17314 /* Return nonzero if FUNC must be entered in ARM mode. */
17316 is_called_in_ARM_mode (tree func)
17318 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
17320 /* Ignore the problem about functions whose address is taken. */
17321 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
17322 return TRUE;
17324 #ifdef ARM_PE
17325 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
17326 #else
17327 return FALSE;
17328 #endif
17331 /* The bits which aren't usefully expanded as rtl. */
17332 const char *
17333 thumb_unexpanded_epilogue (void)
17335 arm_stack_offsets *offsets;
17336 int regno;
17337 unsigned long live_regs_mask = 0;
17338 int high_regs_pushed = 0;
17339 int had_to_push_lr;
17340 int size;
17342 if (cfun->machine->return_used_this_function != 0)
17343 return "";
17345 if (IS_NAKED (arm_current_func_type ()))
17346 return "";
17348 offsets = arm_get_frame_offsets ();
17349 live_regs_mask = offsets->saved_regs_mask;
17350 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17352 /* If we can deduce the registers used from the function's return value.
17353 This is more reliable that examining df_regs_ever_live_p () because that
17354 will be set if the register is ever used in the function, not just if
17355 the register is used to hold a return value. */
17356 size = arm_size_return_regs ();
17358 /* The prolog may have pushed some high registers to use as
17359 work registers. e.g. the testsuite file:
17360 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
17361 compiles to produce:
17362 push {r4, r5, r6, r7, lr}
17363 mov r7, r9
17364 mov r6, r8
17365 push {r6, r7}
17366 as part of the prolog. We have to undo that pushing here. */
17368 if (high_regs_pushed)
17370 unsigned long mask = live_regs_mask & 0xff;
17371 int next_hi_reg;
17373 /* The available low registers depend on the size of the value we are
17374 returning. */
17375 if (size <= 12)
17376 mask |= 1 << 3;
17377 if (size <= 8)
17378 mask |= 1 << 2;
17380 if (mask == 0)
17381 /* Oh dear! We have no low registers into which we can pop
17382 high registers! */
17383 internal_error
17384 ("no low registers available for popping high registers");
17386 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
17387 if (live_regs_mask & (1 << next_hi_reg))
17388 break;
17390 while (high_regs_pushed)
17392 /* Find lo register(s) into which the high register(s) can
17393 be popped. */
17394 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17396 if (mask & (1 << regno))
17397 high_regs_pushed--;
17398 if (high_regs_pushed == 0)
17399 break;
17402 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
17404 /* Pop the values into the low register(s). */
17405 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
17407 /* Move the value(s) into the high registers. */
17408 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17410 if (mask & (1 << regno))
17412 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
17413 regno);
17415 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
17416 if (live_regs_mask & (1 << next_hi_reg))
17417 break;
17421 live_regs_mask &= ~0x0f00;
17424 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
17425 live_regs_mask &= 0xff;
17427 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
17429 /* Pop the return address into the PC. */
17430 if (had_to_push_lr)
17431 live_regs_mask |= 1 << PC_REGNUM;
17433 /* Either no argument registers were pushed or a backtrace
17434 structure was created which includes an adjusted stack
17435 pointer, so just pop everything. */
17436 if (live_regs_mask)
17437 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17438 live_regs_mask);
17440 /* We have either just popped the return address into the
17441 PC or it is was kept in LR for the entire function. */
17442 if (!had_to_push_lr)
17443 thumb_exit (asm_out_file, LR_REGNUM);
17445 else
17447 /* Pop everything but the return address. */
17448 if (live_regs_mask)
17449 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17450 live_regs_mask);
17452 if (had_to_push_lr)
17454 if (size > 12)
17456 /* We have no free low regs, so save one. */
17457 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
17458 LAST_ARG_REGNUM);
17461 /* Get the return address into a temporary register. */
17462 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
17463 1 << LAST_ARG_REGNUM);
17465 if (size > 12)
17467 /* Move the return address to lr. */
17468 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
17469 LAST_ARG_REGNUM);
17470 /* Restore the low register. */
17471 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
17472 IP_REGNUM);
17473 regno = LR_REGNUM;
17475 else
17476 regno = LAST_ARG_REGNUM;
17478 else
17479 regno = LR_REGNUM;
17481 /* Remove the argument registers that were pushed onto the stack. */
17482 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
17483 SP_REGNUM, SP_REGNUM,
17484 crtl->args.pretend_args_size);
17486 thumb_exit (asm_out_file, regno);
17489 return "";
17492 /* Functions to save and restore machine-specific function data. */
17493 static struct machine_function *
17494 arm_init_machine_status (void)
17496 struct machine_function *machine;
17497 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
17499 #if ARM_FT_UNKNOWN != 0
17500 machine->func_type = ARM_FT_UNKNOWN;
17501 #endif
17502 return machine;
17505 /* Return an RTX indicating where the return address to the
17506 calling function can be found. */
17508 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
17510 if (count != 0)
17511 return NULL_RTX;
17513 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
17516 /* Do anything needed before RTL is emitted for each function. */
17517 void
17518 arm_init_expanders (void)
17520 /* Arrange to initialize and mark the machine per-function status. */
17521 init_machine_status = arm_init_machine_status;
17523 /* This is to stop the combine pass optimizing away the alignment
17524 adjustment of va_arg. */
17525 /* ??? It is claimed that this should not be necessary. */
17526 if (cfun)
17527 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
17531 /* Like arm_compute_initial_elimination offset. Simpler because there
17532 isn't an ABI specified frame pointer for Thumb. Instead, we set it
17533 to point at the base of the local variables after static stack
17534 space for a function has been allocated. */
17536 HOST_WIDE_INT
17537 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17539 arm_stack_offsets *offsets;
17541 offsets = arm_get_frame_offsets ();
17543 switch (from)
17545 case ARG_POINTER_REGNUM:
17546 switch (to)
17548 case STACK_POINTER_REGNUM:
17549 return offsets->outgoing_args - offsets->saved_args;
17551 case FRAME_POINTER_REGNUM:
17552 return offsets->soft_frame - offsets->saved_args;
17554 case ARM_HARD_FRAME_POINTER_REGNUM:
17555 return offsets->saved_regs - offsets->saved_args;
17557 case THUMB_HARD_FRAME_POINTER_REGNUM:
17558 return offsets->locals_base - offsets->saved_args;
17560 default:
17561 gcc_unreachable ();
17563 break;
17565 case FRAME_POINTER_REGNUM:
17566 switch (to)
17568 case STACK_POINTER_REGNUM:
17569 return offsets->outgoing_args - offsets->soft_frame;
17571 case ARM_HARD_FRAME_POINTER_REGNUM:
17572 return offsets->saved_regs - offsets->soft_frame;
17574 case THUMB_HARD_FRAME_POINTER_REGNUM:
17575 return offsets->locals_base - offsets->soft_frame;
17577 default:
17578 gcc_unreachable ();
17580 break;
17582 default:
17583 gcc_unreachable ();
17587 /* Generate the rest of a function's prologue. */
17588 void
17589 thumb1_expand_prologue (void)
17591 rtx insn, dwarf;
17593 HOST_WIDE_INT amount;
17594 arm_stack_offsets *offsets;
17595 unsigned long func_type;
17596 int regno;
17597 unsigned long live_regs_mask;
17599 func_type = arm_current_func_type ();
17601 /* Naked functions don't have prologues. */
17602 if (IS_NAKED (func_type))
17603 return;
17605 if (IS_INTERRUPT (func_type))
17607 error ("interrupt Service Routines cannot be coded in Thumb mode");
17608 return;
17611 offsets = arm_get_frame_offsets ();
17612 live_regs_mask = offsets->saved_regs_mask;
17613 /* Load the pic register before setting the frame pointer,
17614 so we can use r7 as a temporary work register. */
17615 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17616 arm_load_pic_register (live_regs_mask);
17618 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17619 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17620 stack_pointer_rtx);
17622 amount = offsets->outgoing_args - offsets->saved_regs;
17623 if (amount)
17625 if (amount < 512)
17627 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17628 GEN_INT (- amount)));
17629 RTX_FRAME_RELATED_P (insn) = 1;
17631 else
17633 rtx reg;
17635 /* The stack decrement is too big for an immediate value in a single
17636 insn. In theory we could issue multiple subtracts, but after
17637 three of them it becomes more space efficient to place the full
17638 value in the constant pool and load into a register. (Also the
17639 ARM debugger really likes to see only one stack decrement per
17640 function). So instead we look for a scratch register into which
17641 we can load the decrement, and then we subtract this from the
17642 stack pointer. Unfortunately on the thumb the only available
17643 scratch registers are the argument registers, and we cannot use
17644 these as they may hold arguments to the function. Instead we
17645 attempt to locate a call preserved register which is used by this
17646 function. If we can find one, then we know that it will have
17647 been pushed at the start of the prologue and so we can corrupt
17648 it now. */
17649 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17650 if (live_regs_mask & (1 << regno))
17651 break;
17653 gcc_assert(regno <= LAST_LO_REGNUM);
17655 reg = gen_rtx_REG (SImode, regno);
17657 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17659 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17660 stack_pointer_rtx, reg));
17661 RTX_FRAME_RELATED_P (insn) = 1;
17662 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17663 plus_constant (stack_pointer_rtx,
17664 -amount));
17665 RTX_FRAME_RELATED_P (dwarf) = 1;
17666 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17670 if (frame_pointer_needed)
17671 thumb_set_frame_pointer (offsets);
17673 /* If we are profiling, make sure no instructions are scheduled before
17674 the call to mcount. Similarly if the user has requested no
17675 scheduling in the prolog. Similarly if we want non-call exceptions
17676 using the EABI unwinder, to prevent faulting instructions from being
17677 swapped with a stack adjustment. */
17678 if (crtl->profile || !TARGET_SCHED_PROLOG
17679 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17680 emit_insn (gen_blockage ());
17682 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17683 if (live_regs_mask & 0xff)
17684 cfun->machine->lr_save_eliminated = 0;
17688 void
17689 thumb1_expand_epilogue (void)
17691 HOST_WIDE_INT amount;
17692 arm_stack_offsets *offsets;
17693 int regno;
17695 /* Naked functions don't have prologues. */
17696 if (IS_NAKED (arm_current_func_type ()))
17697 return;
17699 offsets = arm_get_frame_offsets ();
17700 amount = offsets->outgoing_args - offsets->saved_regs;
17702 if (frame_pointer_needed)
17704 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17705 amount = offsets->locals_base - offsets->saved_regs;
17708 gcc_assert (amount >= 0);
17709 if (amount)
17711 if (amount < 512)
17712 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17713 GEN_INT (amount)));
17714 else
17716 /* r3 is always free in the epilogue. */
17717 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17719 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17720 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17724 /* Emit a USE (stack_pointer_rtx), so that
17725 the stack adjustment will not be deleted. */
17726 emit_insn (gen_prologue_use (stack_pointer_rtx));
17728 if (crtl->profile || !TARGET_SCHED_PROLOG)
17729 emit_insn (gen_blockage ());
17731 /* Emit a clobber for each insn that will be restored in the epilogue,
17732 so that flow2 will get register lifetimes correct. */
17733 for (regno = 0; regno < 13; regno++)
17734 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17735 emit_clobber (gen_rtx_REG (SImode, regno));
17737 if (! df_regs_ever_live_p (LR_REGNUM))
17738 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
17741 static void
17742 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17744 arm_stack_offsets *offsets;
17745 unsigned long live_regs_mask = 0;
17746 unsigned long l_mask;
17747 unsigned high_regs_pushed = 0;
17748 int cfa_offset = 0;
17749 int regno;
17751 if (IS_NAKED (arm_current_func_type ()))
17752 return;
17754 if (is_called_in_ARM_mode (current_function_decl))
17756 const char * name;
17758 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17759 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17760 == SYMBOL_REF);
17761 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17763 /* Generate code sequence to switch us into Thumb mode. */
17764 /* The .code 32 directive has already been emitted by
17765 ASM_DECLARE_FUNCTION_NAME. */
17766 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17767 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17769 /* Generate a label, so that the debugger will notice the
17770 change in instruction sets. This label is also used by
17771 the assembler to bypass the ARM code when this function
17772 is called from a Thumb encoded function elsewhere in the
17773 same file. Hence the definition of STUB_NAME here must
17774 agree with the definition in gas/config/tc-arm.c. */
17776 #define STUB_NAME ".real_start_of"
17778 fprintf (f, "\t.code\t16\n");
17779 #ifdef ARM_PE
17780 if (arm_dllexport_name_p (name))
17781 name = arm_strip_name_encoding (name);
17782 #endif
17783 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17784 fprintf (f, "\t.thumb_func\n");
17785 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17788 if (crtl->args.pretend_args_size)
17790 /* Output unwind directive for the stack adjustment. */
17791 if (ARM_EABI_UNWIND_TABLES)
17792 fprintf (f, "\t.pad #%d\n",
17793 crtl->args.pretend_args_size);
17795 if (cfun->machine->uses_anonymous_args)
17797 int num_pushes;
17799 fprintf (f, "\tpush\t{");
17801 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
17803 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17804 regno <= LAST_ARG_REGNUM;
17805 regno++)
17806 asm_fprintf (f, "%r%s", regno,
17807 regno == LAST_ARG_REGNUM ? "" : ", ");
17809 fprintf (f, "}\n");
17811 else
17812 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17813 SP_REGNUM, SP_REGNUM,
17814 crtl->args.pretend_args_size);
17816 /* We don't need to record the stores for unwinding (would it
17817 help the debugger any if we did?), but record the change in
17818 the stack pointer. */
17819 if (dwarf2out_do_frame ())
17821 char *l = dwarf2out_cfi_label ();
17823 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
17824 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17828 /* Get the registers we are going to push. */
17829 offsets = arm_get_frame_offsets ();
17830 live_regs_mask = offsets->saved_regs_mask;
17831 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17832 l_mask = live_regs_mask & 0x40ff;
17833 /* Then count how many other high registers will need to be pushed. */
17834 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17836 if (TARGET_BACKTRACE)
17838 unsigned offset;
17839 unsigned work_register;
17841 /* We have been asked to create a stack backtrace structure.
17842 The code looks like this:
17844 0 .align 2
17845 0 func:
17846 0 sub SP, #16 Reserve space for 4 registers.
17847 2 push {R7} Push low registers.
17848 4 add R7, SP, #20 Get the stack pointer before the push.
17849 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17850 8 mov R7, PC Get hold of the start of this code plus 12.
17851 10 str R7, [SP, #16] Store it.
17852 12 mov R7, FP Get hold of the current frame pointer.
17853 14 str R7, [SP, #4] Store it.
17854 16 mov R7, LR Get hold of the current return address.
17855 18 str R7, [SP, #12] Store it.
17856 20 add R7, SP, #16 Point at the start of the backtrace structure.
17857 22 mov FP, R7 Put this value into the frame pointer. */
17859 work_register = thumb_find_work_register (live_regs_mask);
17861 if (ARM_EABI_UNWIND_TABLES)
17862 asm_fprintf (f, "\t.pad #16\n");
17864 asm_fprintf
17865 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17866 SP_REGNUM, SP_REGNUM);
17868 if (dwarf2out_do_frame ())
17870 char *l = dwarf2out_cfi_label ();
17872 cfa_offset = cfa_offset + 16;
17873 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17876 if (l_mask)
17878 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17879 offset = bit_count (l_mask) * UNITS_PER_WORD;
17881 else
17882 offset = 0;
17884 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17885 offset + 16 + crtl->args.pretend_args_size);
17887 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17888 offset + 4);
17890 /* Make sure that the instruction fetching the PC is in the right place
17891 to calculate "start of backtrace creation code + 12". */
17892 if (l_mask)
17894 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17895 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17896 offset + 12);
17897 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17898 ARM_HARD_FRAME_POINTER_REGNUM);
17899 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17900 offset);
17902 else
17904 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17905 ARM_HARD_FRAME_POINTER_REGNUM);
17906 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17907 offset);
17908 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17909 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17910 offset + 12);
17913 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17914 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17915 offset + 8);
17916 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17917 offset + 12);
17918 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17919 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17921 /* Optimization: If we are not pushing any low registers but we are going
17922 to push some high registers then delay our first push. This will just
17923 be a push of LR and we can combine it with the push of the first high
17924 register. */
17925 else if ((l_mask & 0xff) != 0
17926 || (high_regs_pushed == 0 && l_mask))
17927 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17929 if (high_regs_pushed)
17931 unsigned pushable_regs;
17932 unsigned next_hi_reg;
17934 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17935 if (live_regs_mask & (1 << next_hi_reg))
17936 break;
17938 pushable_regs = l_mask & 0xff;
17940 if (pushable_regs == 0)
17941 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17943 while (high_regs_pushed > 0)
17945 unsigned long real_regs_mask = 0;
17947 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17949 if (pushable_regs & (1 << regno))
17951 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17953 high_regs_pushed --;
17954 real_regs_mask |= (1 << next_hi_reg);
17956 if (high_regs_pushed)
17958 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17959 next_hi_reg --)
17960 if (live_regs_mask & (1 << next_hi_reg))
17961 break;
17963 else
17965 pushable_regs &= ~((1 << regno) - 1);
17966 break;
17971 /* If we had to find a work register and we have not yet
17972 saved the LR then add it to the list of regs to push. */
17973 if (l_mask == (1 << LR_REGNUM))
17975 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17976 1, &cfa_offset,
17977 real_regs_mask | (1 << LR_REGNUM));
17978 l_mask = 0;
17980 else
17981 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17986 /* Handle the case of a double word load into a low register from
17987 a computed memory address. The computed address may involve a
17988 register which is overwritten by the load. */
17989 const char *
17990 thumb_load_double_from_address (rtx *operands)
17992 rtx addr;
17993 rtx base;
17994 rtx offset;
17995 rtx arg1;
17996 rtx arg2;
17998 gcc_assert (GET_CODE (operands[0]) == REG);
17999 gcc_assert (GET_CODE (operands[1]) == MEM);
18001 /* Get the memory address. */
18002 addr = XEXP (operands[1], 0);
18004 /* Work out how the memory address is computed. */
18005 switch (GET_CODE (addr))
18007 case REG:
18008 operands[2] = adjust_address (operands[1], SImode, 4);
18010 if (REGNO (operands[0]) == REGNO (addr))
18012 output_asm_insn ("ldr\t%H0, %2", operands);
18013 output_asm_insn ("ldr\t%0, %1", operands);
18015 else
18017 output_asm_insn ("ldr\t%0, %1", operands);
18018 output_asm_insn ("ldr\t%H0, %2", operands);
18020 break;
18022 case CONST:
18023 /* Compute <address> + 4 for the high order load. */
18024 operands[2] = adjust_address (operands[1], SImode, 4);
18026 output_asm_insn ("ldr\t%0, %1", operands);
18027 output_asm_insn ("ldr\t%H0, %2", operands);
18028 break;
18030 case PLUS:
18031 arg1 = XEXP (addr, 0);
18032 arg2 = XEXP (addr, 1);
18034 if (CONSTANT_P (arg1))
18035 base = arg2, offset = arg1;
18036 else
18037 base = arg1, offset = arg2;
18039 gcc_assert (GET_CODE (base) == REG);
18041 /* Catch the case of <address> = <reg> + <reg> */
18042 if (GET_CODE (offset) == REG)
18044 int reg_offset = REGNO (offset);
18045 int reg_base = REGNO (base);
18046 int reg_dest = REGNO (operands[0]);
18048 /* Add the base and offset registers together into the
18049 higher destination register. */
18050 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
18051 reg_dest + 1, reg_base, reg_offset);
18053 /* Load the lower destination register from the address in
18054 the higher destination register. */
18055 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
18056 reg_dest, reg_dest + 1);
18058 /* Load the higher destination register from its own address
18059 plus 4. */
18060 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
18061 reg_dest + 1, reg_dest + 1);
18063 else
18065 /* Compute <address> + 4 for the high order load. */
18066 operands[2] = adjust_address (operands[1], SImode, 4);
18068 /* If the computed address is held in the low order register
18069 then load the high order register first, otherwise always
18070 load the low order register first. */
18071 if (REGNO (operands[0]) == REGNO (base))
18073 output_asm_insn ("ldr\t%H0, %2", operands);
18074 output_asm_insn ("ldr\t%0, %1", operands);
18076 else
18078 output_asm_insn ("ldr\t%0, %1", operands);
18079 output_asm_insn ("ldr\t%H0, %2", operands);
18082 break;
18084 case LABEL_REF:
18085 /* With no registers to worry about we can just load the value
18086 directly. */
18087 operands[2] = adjust_address (operands[1], SImode, 4);
18089 output_asm_insn ("ldr\t%H0, %2", operands);
18090 output_asm_insn ("ldr\t%0, %1", operands);
18091 break;
18093 default:
18094 gcc_unreachable ();
18097 return "";
18100 const char *
18101 thumb_output_move_mem_multiple (int n, rtx *operands)
18103 rtx tmp;
18105 switch (n)
18107 case 2:
18108 if (REGNO (operands[4]) > REGNO (operands[5]))
18110 tmp = operands[4];
18111 operands[4] = operands[5];
18112 operands[5] = tmp;
18114 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
18115 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
18116 break;
18118 case 3:
18119 if (REGNO (operands[4]) > REGNO (operands[5]))
18121 tmp = operands[4];
18122 operands[4] = operands[5];
18123 operands[5] = tmp;
18125 if (REGNO (operands[5]) > REGNO (operands[6]))
18127 tmp = operands[5];
18128 operands[5] = operands[6];
18129 operands[6] = tmp;
18131 if (REGNO (operands[4]) > REGNO (operands[5]))
18133 tmp = operands[4];
18134 operands[4] = operands[5];
18135 operands[5] = tmp;
18138 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
18139 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
18140 break;
18142 default:
18143 gcc_unreachable ();
18146 return "";
18149 /* Output a call-via instruction for thumb state. */
18150 const char *
18151 thumb_call_via_reg (rtx reg)
18153 int regno = REGNO (reg);
18154 rtx *labelp;
18156 gcc_assert (regno < LR_REGNUM);
18158 /* If we are in the normal text section we can use a single instance
18159 per compilation unit. If we are doing function sections, then we need
18160 an entry per section, since we can't rely on reachability. */
18161 if (in_section == text_section)
18163 thumb_call_reg_needed = 1;
18165 if (thumb_call_via_label[regno] == NULL)
18166 thumb_call_via_label[regno] = gen_label_rtx ();
18167 labelp = thumb_call_via_label + regno;
18169 else
18171 if (cfun->machine->call_via[regno] == NULL)
18172 cfun->machine->call_via[regno] = gen_label_rtx ();
18173 labelp = cfun->machine->call_via + regno;
18176 output_asm_insn ("bl\t%a0", labelp);
18177 return "";
18180 /* Routines for generating rtl. */
18181 void
18182 thumb_expand_movmemqi (rtx *operands)
18184 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
18185 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
18186 HOST_WIDE_INT len = INTVAL (operands[2]);
18187 HOST_WIDE_INT offset = 0;
18189 while (len >= 12)
18191 emit_insn (gen_movmem12b (out, in, out, in));
18192 len -= 12;
18195 if (len >= 8)
18197 emit_insn (gen_movmem8b (out, in, out, in));
18198 len -= 8;
18201 if (len >= 4)
18203 rtx reg = gen_reg_rtx (SImode);
18204 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
18205 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
18206 len -= 4;
18207 offset += 4;
18210 if (len >= 2)
18212 rtx reg = gen_reg_rtx (HImode);
18213 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
18214 plus_constant (in, offset))));
18215 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
18216 reg));
18217 len -= 2;
18218 offset += 2;
18221 if (len)
18223 rtx reg = gen_reg_rtx (QImode);
18224 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
18225 plus_constant (in, offset))));
18226 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
18227 reg));
18231 void
18232 thumb_reload_out_hi (rtx *operands)
18234 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
18237 /* Handle reading a half-word from memory during reload. */
18238 void
18239 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
18241 gcc_unreachable ();
18244 /* Return the length of a function name prefix
18245 that starts with the character 'c'. */
18246 static int
18247 arm_get_strip_length (int c)
18249 switch (c)
18251 ARM_NAME_ENCODING_LENGTHS
18252 default: return 0;
18256 /* Return a pointer to a function's name with any
18257 and all prefix encodings stripped from it. */
18258 const char *
18259 arm_strip_name_encoding (const char *name)
18261 int skip;
18263 while ((skip = arm_get_strip_length (* name)))
18264 name += skip;
18266 return name;
18269 /* If there is a '*' anywhere in the name's prefix, then
18270 emit the stripped name verbatim, otherwise prepend an
18271 underscore if leading underscores are being used. */
18272 void
18273 arm_asm_output_labelref (FILE *stream, const char *name)
18275 int skip;
18276 int verbatim = 0;
18278 while ((skip = arm_get_strip_length (* name)))
18280 verbatim |= (*name == '*');
18281 name += skip;
18284 if (verbatim)
18285 fputs (name, stream);
18286 else
18287 asm_fprintf (stream, "%U%s", name);
18290 static void
18291 arm_file_start (void)
18293 int val;
18295 if (TARGET_UNIFIED_ASM)
18296 asm_fprintf (asm_out_file, "\t.syntax unified\n");
18298 if (TARGET_BPABI)
18300 const char *fpu_name;
18301 if (arm_select[0].string)
18302 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
18303 else if (arm_select[1].string)
18304 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
18305 else
18306 asm_fprintf (asm_out_file, "\t.cpu %s\n",
18307 all_cores[arm_default_cpu].name);
18309 if (TARGET_SOFT_FLOAT)
18311 if (TARGET_VFP)
18312 fpu_name = "softvfp";
18313 else
18314 fpu_name = "softfpa";
18316 else
18318 int set_float_abi_attributes = 0;
18319 switch (arm_fpu_arch)
18321 case FPUTYPE_FPA:
18322 fpu_name = "fpa";
18323 break;
18324 case FPUTYPE_FPA_EMU2:
18325 fpu_name = "fpe2";
18326 break;
18327 case FPUTYPE_FPA_EMU3:
18328 fpu_name = "fpe3";
18329 break;
18330 case FPUTYPE_MAVERICK:
18331 fpu_name = "maverick";
18332 break;
18333 case FPUTYPE_VFP:
18334 fpu_name = "vfp";
18335 set_float_abi_attributes = 1;
18336 break;
18337 case FPUTYPE_VFP3D16:
18338 fpu_name = "vfpv3-d16";
18339 set_float_abi_attributes = 1;
18340 break;
18341 case FPUTYPE_VFP3:
18342 fpu_name = "vfpv3";
18343 set_float_abi_attributes = 1;
18344 break;
18345 case FPUTYPE_NEON:
18346 fpu_name = "neon";
18347 set_float_abi_attributes = 1;
18348 break;
18349 default:
18350 abort();
18352 if (set_float_abi_attributes)
18354 if (TARGET_HARD_FLOAT)
18355 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
18356 if (TARGET_HARD_FLOAT_ABI)
18357 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
18360 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
18362 /* Some of these attributes only apply when the corresponding features
18363 are used. However we don't have any easy way of figuring this out.
18364 Conservatively record the setting that would have been used. */
18366 /* Tag_ABI_FP_rounding. */
18367 if (flag_rounding_math)
18368 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
18369 if (!flag_unsafe_math_optimizations)
18371 /* Tag_ABI_FP_denomal. */
18372 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
18373 /* Tag_ABI_FP_exceptions. */
18374 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
18376 /* Tag_ABI_FP_user_exceptions. */
18377 if (flag_signaling_nans)
18378 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
18379 /* Tag_ABI_FP_number_model. */
18380 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
18381 flag_finite_math_only ? 1 : 3);
18383 /* Tag_ABI_align8_needed. */
18384 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
18385 /* Tag_ABI_align8_preserved. */
18386 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
18387 /* Tag_ABI_enum_size. */
18388 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
18389 flag_short_enums ? 1 : 2);
18391 /* Tag_ABI_optimization_goals. */
18392 if (optimize_size)
18393 val = 4;
18394 else if (optimize >= 2)
18395 val = 2;
18396 else if (optimize)
18397 val = 1;
18398 else
18399 val = 6;
18400 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
18402 if (arm_lang_output_object_attributes_hook)
18403 arm_lang_output_object_attributes_hook();
18405 default_file_start();
18408 static void
18409 arm_file_end (void)
18411 int regno;
18413 if (NEED_INDICATE_EXEC_STACK)
18414 /* Add .note.GNU-stack. */
18415 file_end_indicate_exec_stack ();
18417 if (! thumb_call_reg_needed)
18418 return;
18420 switch_to_section (text_section);
18421 asm_fprintf (asm_out_file, "\t.code 16\n");
18422 ASM_OUTPUT_ALIGN (asm_out_file, 1);
18424 for (regno = 0; regno < LR_REGNUM; regno++)
18426 rtx label = thumb_call_via_label[regno];
18428 if (label != 0)
18430 targetm.asm_out.internal_label (asm_out_file, "L",
18431 CODE_LABEL_NUMBER (label));
18432 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18437 #ifndef ARM_PE
18438 /* Symbols in the text segment can be accessed without indirecting via the
18439 constant pool; it may take an extra binary operation, but this is still
18440 faster than indirecting via memory. Don't do this when not optimizing,
18441 since we won't be calculating al of the offsets necessary to do this
18442 simplification. */
18444 static void
18445 arm_encode_section_info (tree decl, rtx rtl, int first)
18447 if (optimize > 0 && TREE_CONSTANT (decl))
18448 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
18450 default_encode_section_info (decl, rtl, first);
18452 #endif /* !ARM_PE */
18454 static void
18455 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
18457 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
18458 && !strcmp (prefix, "L"))
18460 arm_ccfsm_state = 0;
18461 arm_target_insn = NULL;
18463 default_internal_label (stream, prefix, labelno);
18466 /* Output code to add DELTA to the first argument, and then jump
18467 to FUNCTION. Used for C++ multiple inheritance. */
18468 static void
18469 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
18470 HOST_WIDE_INT delta,
18471 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
18472 tree function)
18474 static int thunk_label = 0;
18475 char label[256];
18476 char labelpc[256];
18477 int mi_delta = delta;
18478 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
18479 int shift = 0;
18480 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
18481 ? 1 : 0);
18482 if (mi_delta < 0)
18483 mi_delta = - mi_delta;
18485 if (TARGET_THUMB1)
18487 int labelno = thunk_label++;
18488 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
18489 /* Thunks are entered in arm mode when avaiable. */
18490 if (TARGET_THUMB1_ONLY)
18492 /* push r3 so we can use it as a temporary. */
18493 /* TODO: Omit this save if r3 is not used. */
18494 fputs ("\tpush {r3}\n", file);
18495 fputs ("\tldr\tr3, ", file);
18497 else
18499 fputs ("\tldr\tr12, ", file);
18501 assemble_name (file, label);
18502 fputc ('\n', file);
18503 if (flag_pic)
18505 /* If we are generating PIC, the ldr instruction below loads
18506 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
18507 the address of the add + 8, so we have:
18509 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
18510 = target + 1.
18512 Note that we have "+ 1" because some versions of GNU ld
18513 don't set the low bit of the result for R_ARM_REL32
18514 relocations against thumb function symbols.
18515 On ARMv6M this is +4, not +8. */
18516 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
18517 assemble_name (file, labelpc);
18518 fputs (":\n", file);
18519 if (TARGET_THUMB1_ONLY)
18521 /* This is 2 insns after the start of the thunk, so we know it
18522 is 4-byte aligned. */
18523 fputs ("\tadd\tr3, pc, r3\n", file);
18524 fputs ("\tmov r12, r3\n", file);
18526 else
18527 fputs ("\tadd\tr12, pc, r12\n", file);
18529 else if (TARGET_THUMB1_ONLY)
18530 fputs ("\tmov r12, r3\n", file);
18532 if (TARGET_THUMB1_ONLY)
18534 if (mi_delta > 255)
18536 fputs ("\tldr\tr3, ", file);
18537 assemble_name (file, label);
18538 fputs ("+4\n", file);
18539 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
18540 mi_op, this_regno, this_regno);
18542 else if (mi_delta != 0)
18544 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18545 mi_op, this_regno, this_regno,
18546 mi_delta);
18549 else
18551 /* TODO: Use movw/movt for large constants when available. */
18552 while (mi_delta != 0)
18554 if ((mi_delta & (3 << shift)) == 0)
18555 shift += 2;
18556 else
18558 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18559 mi_op, this_regno, this_regno,
18560 mi_delta & (0xff << shift));
18561 mi_delta &= ~(0xff << shift);
18562 shift += 8;
18566 if (TARGET_THUMB1)
18568 if (TARGET_THUMB1_ONLY)
18569 fputs ("\tpop\t{r3}\n", file);
18571 fprintf (file, "\tbx\tr12\n");
18572 ASM_OUTPUT_ALIGN (file, 2);
18573 assemble_name (file, label);
18574 fputs (":\n", file);
18575 if (flag_pic)
18577 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
18578 rtx tem = XEXP (DECL_RTL (function), 0);
18579 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
18580 tem = gen_rtx_MINUS (GET_MODE (tem),
18581 tem,
18582 gen_rtx_SYMBOL_REF (Pmode,
18583 ggc_strdup (labelpc)));
18584 assemble_integer (tem, 4, BITS_PER_WORD, 1);
18586 else
18587 /* Output ".word .LTHUNKn". */
18588 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
18590 if (TARGET_THUMB1_ONLY && mi_delta > 255)
18591 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
18593 else
18595 fputs ("\tb\t", file);
18596 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
18597 if (NEED_PLT_RELOC)
18598 fputs ("(PLT)", file);
18599 fputc ('\n', file);
18604 arm_emit_vector_const (FILE *file, rtx x)
18606 int i;
18607 const char * pattern;
18609 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18611 switch (GET_MODE (x))
18613 case V2SImode: pattern = "%08x"; break;
18614 case V4HImode: pattern = "%04x"; break;
18615 case V8QImode: pattern = "%02x"; break;
18616 default: gcc_unreachable ();
18619 fprintf (file, "0x");
18620 for (i = CONST_VECTOR_NUNITS (x); i--;)
18622 rtx element;
18624 element = CONST_VECTOR_ELT (x, i);
18625 fprintf (file, pattern, INTVAL (element));
18628 return 1;
18631 const char *
18632 arm_output_load_gr (rtx *operands)
18634 rtx reg;
18635 rtx offset;
18636 rtx wcgr;
18637 rtx sum;
18639 if (GET_CODE (operands [1]) != MEM
18640 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18641 || GET_CODE (reg = XEXP (sum, 0)) != REG
18642 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18643 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18644 return "wldrw%?\t%0, %1";
18646 /* Fix up an out-of-range load of a GR register. */
18647 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18648 wcgr = operands[0];
18649 operands[0] = reg;
18650 output_asm_insn ("ldr%?\t%0, %1", operands);
18652 operands[0] = wcgr;
18653 operands[1] = reg;
18654 output_asm_insn ("tmcr%?\t%0, %1", operands);
18655 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18657 return "";
18660 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18662 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18663 named arg and all anonymous args onto the stack.
18664 XXX I know the prologue shouldn't be pushing registers, but it is faster
18665 that way. */
18667 static void
18668 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18669 enum machine_mode mode,
18670 tree type,
18671 int *pretend_size,
18672 int second_time ATTRIBUTE_UNUSED)
18674 int nregs = cum->nregs;
18675 if (nregs & 1
18676 && ARM_DOUBLEWORD_ALIGN
18677 && arm_needs_doubleword_align (mode, type))
18678 nregs++;
18680 cfun->machine->uses_anonymous_args = 1;
18681 if (nregs < NUM_ARG_REGS)
18682 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18685 /* Return nonzero if the CONSUMER instruction (a store) does not need
18686 PRODUCER's value to calculate the address. */
18689 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18691 rtx value = PATTERN (producer);
18692 rtx addr = PATTERN (consumer);
18694 if (GET_CODE (value) == COND_EXEC)
18695 value = COND_EXEC_CODE (value);
18696 if (GET_CODE (value) == PARALLEL)
18697 value = XVECEXP (value, 0, 0);
18698 value = XEXP (value, 0);
18699 if (GET_CODE (addr) == COND_EXEC)
18700 addr = COND_EXEC_CODE (addr);
18701 if (GET_CODE (addr) == PARALLEL)
18702 addr = XVECEXP (addr, 0, 0);
18703 addr = XEXP (addr, 0);
18705 return !reg_overlap_mentioned_p (value, addr);
18708 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18709 have an early register shift value or amount dependency on the
18710 result of PRODUCER. */
18713 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18715 rtx value = PATTERN (producer);
18716 rtx op = PATTERN (consumer);
18717 rtx early_op;
18719 if (GET_CODE (value) == COND_EXEC)
18720 value = COND_EXEC_CODE (value);
18721 if (GET_CODE (value) == PARALLEL)
18722 value = XVECEXP (value, 0, 0);
18723 value = XEXP (value, 0);
18724 if (GET_CODE (op) == COND_EXEC)
18725 op = COND_EXEC_CODE (op);
18726 if (GET_CODE (op) == PARALLEL)
18727 op = XVECEXP (op, 0, 0);
18728 op = XEXP (op, 1);
18730 early_op = XEXP (op, 0);
18731 /* This is either an actual independent shift, or a shift applied to
18732 the first operand of another operation. We want the whole shift
18733 operation. */
18734 if (GET_CODE (early_op) == REG)
18735 early_op = op;
18737 return !reg_overlap_mentioned_p (value, early_op);
18740 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18741 have an early register shift value dependency on the result of
18742 PRODUCER. */
18745 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18747 rtx value = PATTERN (producer);
18748 rtx op = PATTERN (consumer);
18749 rtx early_op;
18751 if (GET_CODE (value) == COND_EXEC)
18752 value = COND_EXEC_CODE (value);
18753 if (GET_CODE (value) == PARALLEL)
18754 value = XVECEXP (value, 0, 0);
18755 value = XEXP (value, 0);
18756 if (GET_CODE (op) == COND_EXEC)
18757 op = COND_EXEC_CODE (op);
18758 if (GET_CODE (op) == PARALLEL)
18759 op = XVECEXP (op, 0, 0);
18760 op = XEXP (op, 1);
18762 early_op = XEXP (op, 0);
18764 /* This is either an actual independent shift, or a shift applied to
18765 the first operand of another operation. We want the value being
18766 shifted, in either case. */
18767 if (GET_CODE (early_op) != REG)
18768 early_op = XEXP (early_op, 0);
18770 return !reg_overlap_mentioned_p (value, early_op);
18773 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18774 have an early register mult dependency on the result of
18775 PRODUCER. */
18778 arm_no_early_mul_dep (rtx producer, rtx consumer)
18780 rtx value = PATTERN (producer);
18781 rtx op = PATTERN (consumer);
18783 if (GET_CODE (value) == COND_EXEC)
18784 value = COND_EXEC_CODE (value);
18785 if (GET_CODE (value) == PARALLEL)
18786 value = XVECEXP (value, 0, 0);
18787 value = XEXP (value, 0);
18788 if (GET_CODE (op) == COND_EXEC)
18789 op = COND_EXEC_CODE (op);
18790 if (GET_CODE (op) == PARALLEL)
18791 op = XVECEXP (op, 0, 0);
18792 op = XEXP (op, 1);
18794 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
18796 if (GET_CODE (XEXP (op, 0)) == MULT)
18797 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
18798 else
18799 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
18802 return 0;
18805 /* We can't rely on the caller doing the proper promotion when
18806 using APCS or ATPCS. */
18808 static bool
18809 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18811 return !TARGET_AAPCS_BASED;
18815 /* AAPCS based ABIs use short enums by default. */
18817 static bool
18818 arm_default_short_enums (void)
18820 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18824 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18826 static bool
18827 arm_align_anon_bitfield (void)
18829 return TARGET_AAPCS_BASED;
18833 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18835 static tree
18836 arm_cxx_guard_type (void)
18838 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18841 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18842 has an accumulator dependency on the result of the producer (a
18843 multiplication instruction) and no other dependency on that result. */
18845 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18847 rtx mul = PATTERN (producer);
18848 rtx mac = PATTERN (consumer);
18849 rtx mul_result;
18850 rtx mac_op0, mac_op1, mac_acc;
18852 if (GET_CODE (mul) == COND_EXEC)
18853 mul = COND_EXEC_CODE (mul);
18854 if (GET_CODE (mac) == COND_EXEC)
18855 mac = COND_EXEC_CODE (mac);
18857 /* Check that mul is of the form (set (...) (mult ...))
18858 and mla is of the form (set (...) (plus (mult ...) (...))). */
18859 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18860 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18861 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18862 return 0;
18864 mul_result = XEXP (mul, 0);
18865 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18866 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18867 mac_acc = XEXP (XEXP (mac, 1), 1);
18869 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18870 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18871 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18875 /* The EABI says test the least significant bit of a guard variable. */
18877 static bool
18878 arm_cxx_guard_mask_bit (void)
18880 return TARGET_AAPCS_BASED;
18884 /* The EABI specifies that all array cookies are 8 bytes long. */
18886 static tree
18887 arm_get_cookie_size (tree type)
18889 tree size;
18891 if (!TARGET_AAPCS_BASED)
18892 return default_cxx_get_cookie_size (type);
18894 size = build_int_cst (sizetype, 8);
18895 return size;
18899 /* The EABI says that array cookies should also contain the element size. */
18901 static bool
18902 arm_cookie_has_size (void)
18904 return TARGET_AAPCS_BASED;
18908 /* The EABI says constructors and destructors should return a pointer to
18909 the object constructed/destroyed. */
18911 static bool
18912 arm_cxx_cdtor_returns_this (void)
18914 return TARGET_AAPCS_BASED;
18917 /* The EABI says that an inline function may never be the key
18918 method. */
18920 static bool
18921 arm_cxx_key_method_may_be_inline (void)
18923 return !TARGET_AAPCS_BASED;
18926 static void
18927 arm_cxx_determine_class_data_visibility (tree decl)
18929 if (!TARGET_AAPCS_BASED
18930 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
18931 return;
18933 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18934 is exported. However, on systems without dynamic vague linkage,
18935 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18936 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18937 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18938 else
18939 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18940 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18943 static bool
18944 arm_cxx_class_data_always_comdat (void)
18946 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18947 vague linkage if the class has no key function. */
18948 return !TARGET_AAPCS_BASED;
18952 /* The EABI says __aeabi_atexit should be used to register static
18953 destructors. */
18955 static bool
18956 arm_cxx_use_aeabi_atexit (void)
18958 return TARGET_AAPCS_BASED;
18962 void
18963 arm_set_return_address (rtx source, rtx scratch)
18965 arm_stack_offsets *offsets;
18966 HOST_WIDE_INT delta;
18967 rtx addr;
18968 unsigned long saved_regs;
18970 offsets = arm_get_frame_offsets ();
18971 saved_regs = offsets->saved_regs_mask;
18973 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18974 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18975 else
18977 if (frame_pointer_needed)
18978 addr = plus_constant(hard_frame_pointer_rtx, -4);
18979 else
18981 /* LR will be the first saved register. */
18982 delta = offsets->outgoing_args - (offsets->frame + 4);
18985 if (delta >= 4096)
18987 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18988 GEN_INT (delta & ~4095)));
18989 addr = scratch;
18990 delta &= 4095;
18992 else
18993 addr = stack_pointer_rtx;
18995 addr = plus_constant (addr, delta);
18997 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19002 void
19003 thumb_set_return_address (rtx source, rtx scratch)
19005 arm_stack_offsets *offsets;
19006 HOST_WIDE_INT delta;
19007 HOST_WIDE_INT limit;
19008 int reg;
19009 rtx addr;
19010 unsigned long mask;
19012 emit_use (source);
19014 offsets = arm_get_frame_offsets ();
19015 mask = offsets->saved_regs_mask;
19016 if (mask & (1 << LR_REGNUM))
19018 limit = 1024;
19019 /* Find the saved regs. */
19020 if (frame_pointer_needed)
19022 delta = offsets->soft_frame - offsets->saved_args;
19023 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
19024 if (TARGET_THUMB1)
19025 limit = 128;
19027 else
19029 delta = offsets->outgoing_args - offsets->saved_args;
19030 reg = SP_REGNUM;
19032 /* Allow for the stack frame. */
19033 if (TARGET_THUMB1 && TARGET_BACKTRACE)
19034 delta -= 16;
19035 /* The link register is always the first saved register. */
19036 delta -= 4;
19038 /* Construct the address. */
19039 addr = gen_rtx_REG (SImode, reg);
19040 if (delta > limit)
19042 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
19043 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
19044 addr = scratch;
19046 else
19047 addr = plus_constant (addr, delta);
19049 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19051 else
19052 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19055 /* Implements target hook vector_mode_supported_p. */
19056 bool
19057 arm_vector_mode_supported_p (enum machine_mode mode)
19059 /* Neon also supports V2SImode, etc. listed in the clause below. */
19060 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
19061 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
19062 return true;
19064 if ((mode == V2SImode)
19065 || (mode == V4HImode)
19066 || (mode == V8QImode))
19067 return true;
19069 return false;
19072 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
19073 ARM insns and therefore guarantee that the shift count is modulo 256.
19074 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
19075 guarantee no particular behavior for out-of-range counts. */
19077 static unsigned HOST_WIDE_INT
19078 arm_shift_truncation_mask (enum machine_mode mode)
19080 return mode == SImode ? 255 : 0;
19084 /* Map internal gcc register numbers to DWARF2 register numbers. */
19086 unsigned int
19087 arm_dbx_register_number (unsigned int regno)
19089 if (regno < 16)
19090 return regno;
19092 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
19093 compatibility. The EABI defines them as registers 96-103. */
19094 if (IS_FPA_REGNUM (regno))
19095 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
19097 /* FIXME: VFPv3 register numbering. */
19098 if (IS_VFP_REGNUM (regno))
19099 return 64 + regno - FIRST_VFP_REGNUM;
19101 if (IS_IWMMXT_GR_REGNUM (regno))
19102 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
19104 if (IS_IWMMXT_REGNUM (regno))
19105 return 112 + regno - FIRST_IWMMXT_REGNUM;
19107 gcc_unreachable ();
19111 #ifdef TARGET_UNWIND_INFO
19112 /* Emit unwind directives for a store-multiple instruction or stack pointer
19113 push during alignment.
19114 These should only ever be generated by the function prologue code, so
19115 expect them to have a particular form. */
19117 static void
19118 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
19120 int i;
19121 HOST_WIDE_INT offset;
19122 HOST_WIDE_INT nregs;
19123 int reg_size;
19124 unsigned reg;
19125 unsigned lastreg;
19126 rtx e;
19128 e = XVECEXP (p, 0, 0);
19129 if (GET_CODE (e) != SET)
19130 abort ();
19132 /* First insn will adjust the stack pointer. */
19133 if (GET_CODE (e) != SET
19134 || GET_CODE (XEXP (e, 0)) != REG
19135 || REGNO (XEXP (e, 0)) != SP_REGNUM
19136 || GET_CODE (XEXP (e, 1)) != PLUS)
19137 abort ();
19139 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
19140 nregs = XVECLEN (p, 0) - 1;
19142 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
19143 if (reg < 16)
19145 /* The function prologue may also push pc, but not annotate it as it is
19146 never restored. We turn this into a stack pointer adjustment. */
19147 if (nregs * 4 == offset - 4)
19149 fprintf (asm_out_file, "\t.pad #4\n");
19150 offset -= 4;
19152 reg_size = 4;
19153 fprintf (asm_out_file, "\t.save {");
19155 else if (IS_VFP_REGNUM (reg))
19157 reg_size = 8;
19158 fprintf (asm_out_file, "\t.vsave {");
19160 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
19162 /* FPA registers are done differently. */
19163 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
19164 return;
19166 else
19167 /* Unknown register type. */
19168 abort ();
19170 /* If the stack increment doesn't match the size of the saved registers,
19171 something has gone horribly wrong. */
19172 if (offset != nregs * reg_size)
19173 abort ();
19175 offset = 0;
19176 lastreg = 0;
19177 /* The remaining insns will describe the stores. */
19178 for (i = 1; i <= nregs; i++)
19180 /* Expect (set (mem <addr>) (reg)).
19181 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
19182 e = XVECEXP (p, 0, i);
19183 if (GET_CODE (e) != SET
19184 || GET_CODE (XEXP (e, 0)) != MEM
19185 || GET_CODE (XEXP (e, 1)) != REG)
19186 abort ();
19188 reg = REGNO (XEXP (e, 1));
19189 if (reg < lastreg)
19190 abort ();
19192 if (i != 1)
19193 fprintf (asm_out_file, ", ");
19194 /* We can't use %r for vfp because we need to use the
19195 double precision register names. */
19196 if (IS_VFP_REGNUM (reg))
19197 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
19198 else
19199 asm_fprintf (asm_out_file, "%r", reg);
19201 #ifdef ENABLE_CHECKING
19202 /* Check that the addresses are consecutive. */
19203 e = XEXP (XEXP (e, 0), 0);
19204 if (GET_CODE (e) == PLUS)
19206 offset += reg_size;
19207 if (GET_CODE (XEXP (e, 0)) != REG
19208 || REGNO (XEXP (e, 0)) != SP_REGNUM
19209 || GET_CODE (XEXP (e, 1)) != CONST_INT
19210 || offset != INTVAL (XEXP (e, 1)))
19211 abort ();
19213 else if (i != 1
19214 || GET_CODE (e) != REG
19215 || REGNO (e) != SP_REGNUM)
19216 abort ();
19217 #endif
19219 fprintf (asm_out_file, "}\n");
19222 /* Emit unwind directives for a SET. */
19224 static void
19225 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
19227 rtx e0;
19228 rtx e1;
19229 unsigned reg;
19231 e0 = XEXP (p, 0);
19232 e1 = XEXP (p, 1);
19233 switch (GET_CODE (e0))
19235 case MEM:
19236 /* Pushing a single register. */
19237 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
19238 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
19239 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
19240 abort ();
19242 asm_fprintf (asm_out_file, "\t.save ");
19243 if (IS_VFP_REGNUM (REGNO (e1)))
19244 asm_fprintf(asm_out_file, "{d%d}\n",
19245 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
19246 else
19247 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
19248 break;
19250 case REG:
19251 if (REGNO (e0) == SP_REGNUM)
19253 /* A stack increment. */
19254 if (GET_CODE (e1) != PLUS
19255 || GET_CODE (XEXP (e1, 0)) != REG
19256 || REGNO (XEXP (e1, 0)) != SP_REGNUM
19257 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19258 abort ();
19260 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
19261 -INTVAL (XEXP (e1, 1)));
19263 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
19265 HOST_WIDE_INT offset;
19267 if (GET_CODE (e1) == PLUS)
19269 if (GET_CODE (XEXP (e1, 0)) != REG
19270 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19271 abort ();
19272 reg = REGNO (XEXP (e1, 0));
19273 offset = INTVAL (XEXP (e1, 1));
19274 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
19275 HARD_FRAME_POINTER_REGNUM, reg,
19276 INTVAL (XEXP (e1, 1)));
19278 else if (GET_CODE (e1) == REG)
19280 reg = REGNO (e1);
19281 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
19282 HARD_FRAME_POINTER_REGNUM, reg);
19284 else
19285 abort ();
19287 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
19289 /* Move from sp to reg. */
19290 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
19292 else if (GET_CODE (e1) == PLUS
19293 && GET_CODE (XEXP (e1, 0)) == REG
19294 && REGNO (XEXP (e1, 0)) == SP_REGNUM
19295 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
19297 /* Set reg to offset from sp. */
19298 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
19299 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
19301 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
19303 /* Stack pointer save before alignment. */
19304 reg = REGNO (e0);
19305 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
19306 reg + 0x90, reg);
19308 else
19309 abort ();
19310 break;
19312 default:
19313 abort ();
19318 /* Emit unwind directives for the given insn. */
19320 static void
19321 arm_unwind_emit (FILE * asm_out_file, rtx insn)
19323 rtx pat;
19325 if (!ARM_EABI_UNWIND_TABLES)
19326 return;
19328 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19329 && (TREE_NOTHROW (current_function_decl)
19330 || crtl->all_throwers_are_sibcalls))
19331 return;
19333 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
19334 return;
19336 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
19337 if (pat)
19338 pat = XEXP (pat, 0);
19339 else
19340 pat = PATTERN (insn);
19342 switch (GET_CODE (pat))
19344 case SET:
19345 arm_unwind_emit_set (asm_out_file, pat);
19346 break;
19348 case SEQUENCE:
19349 /* Store multiple. */
19350 arm_unwind_emit_sequence (asm_out_file, pat);
19351 break;
19353 default:
19354 abort();
19359 /* Output a reference from a function exception table to the type_info
19360 object X. The EABI specifies that the symbol should be relocated by
19361 an R_ARM_TARGET2 relocation. */
19363 static bool
19364 arm_output_ttype (rtx x)
19366 fputs ("\t.word\t", asm_out_file);
19367 output_addr_const (asm_out_file, x);
19368 /* Use special relocations for symbol references. */
19369 if (GET_CODE (x) != CONST_INT)
19370 fputs ("(TARGET2)", asm_out_file);
19371 fputc ('\n', asm_out_file);
19373 return TRUE;
19375 #endif /* TARGET_UNWIND_INFO */
19378 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
19379 stack alignment. */
19381 static void
19382 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
19384 rtx unspec = SET_SRC (pattern);
19385 gcc_assert (GET_CODE (unspec) == UNSPEC);
19387 switch (index)
19389 case UNSPEC_STACK_ALIGN:
19390 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
19391 put anything on the stack, so hopefully it won't matter.
19392 CFA = SP will be correct after alignment. */
19393 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
19394 SET_DEST (pattern));
19395 break;
19396 default:
19397 gcc_unreachable ();
19402 /* Output unwind directives for the start/end of a function. */
19404 void
19405 arm_output_fn_unwind (FILE * f, bool prologue)
19407 if (!ARM_EABI_UNWIND_TABLES)
19408 return;
19410 if (prologue)
19411 fputs ("\t.fnstart\n", f);
19412 else
19414 /* If this function will never be unwound, then mark it as such.
19415 The came condition is used in arm_unwind_emit to suppress
19416 the frame annotations. */
19417 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19418 && (TREE_NOTHROW (current_function_decl)
19419 || crtl->all_throwers_are_sibcalls))
19420 fputs("\t.cantunwind\n", f);
19422 fputs ("\t.fnend\n", f);
19426 static bool
19427 arm_emit_tls_decoration (FILE *fp, rtx x)
19429 enum tls_reloc reloc;
19430 rtx val;
19432 val = XVECEXP (x, 0, 0);
19433 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
19435 output_addr_const (fp, val);
19437 switch (reloc)
19439 case TLS_GD32:
19440 fputs ("(tlsgd)", fp);
19441 break;
19442 case TLS_LDM32:
19443 fputs ("(tlsldm)", fp);
19444 break;
19445 case TLS_LDO32:
19446 fputs ("(tlsldo)", fp);
19447 break;
19448 case TLS_IE32:
19449 fputs ("(gottpoff)", fp);
19450 break;
19451 case TLS_LE32:
19452 fputs ("(tpoff)", fp);
19453 break;
19454 default:
19455 gcc_unreachable ();
19458 switch (reloc)
19460 case TLS_GD32:
19461 case TLS_LDM32:
19462 case TLS_IE32:
19463 fputs (" + (. - ", fp);
19464 output_addr_const (fp, XVECEXP (x, 0, 2));
19465 fputs (" - ", fp);
19466 output_addr_const (fp, XVECEXP (x, 0, 3));
19467 fputc (')', fp);
19468 break;
19469 default:
19470 break;
19473 return TRUE;
19476 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
19478 static void
19479 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
19481 gcc_assert (size == 4);
19482 fputs ("\t.word\t", file);
19483 output_addr_const (file, x);
19484 fputs ("(tlsldo)", file);
19487 bool
19488 arm_output_addr_const_extra (FILE *fp, rtx x)
19490 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
19491 return arm_emit_tls_decoration (fp, x);
19492 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
19494 char label[256];
19495 int labelno = INTVAL (XVECEXP (x, 0, 0));
19497 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
19498 assemble_name_raw (fp, label);
19500 return TRUE;
19502 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
19504 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
19505 if (GOT_PCREL)
19506 fputs ("+.", fp);
19507 fputs ("-(", fp);
19508 output_addr_const (fp, XVECEXP (x, 0, 0));
19509 fputc (')', fp);
19510 return TRUE;
19512 else if (GET_CODE (x) == CONST_VECTOR)
19513 return arm_emit_vector_const (fp, x);
19515 return FALSE;
19518 /* Output assembly for a shift instruction.
19519 SET_FLAGS determines how the instruction modifies the condition codes.
19520 0 - Do not set condition codes.
19521 1 - Set condition codes.
19522 2 - Use smallest instruction. */
19523 const char *
19524 arm_output_shift(rtx * operands, int set_flags)
19526 char pattern[100];
19527 static const char flag_chars[3] = {'?', '.', '!'};
19528 const char *shift;
19529 HOST_WIDE_INT val;
19530 char c;
19532 c = flag_chars[set_flags];
19533 if (TARGET_UNIFIED_ASM)
19535 shift = shift_op(operands[3], &val);
19536 if (shift)
19538 if (val != -1)
19539 operands[2] = GEN_INT(val);
19540 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
19542 else
19543 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
19545 else
19546 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
19547 output_asm_insn (pattern, operands);
19548 return "";
19551 /* Output a Thumb-2 casesi instruction. */
19552 const char *
19553 thumb2_output_casesi (rtx *operands)
19555 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
19557 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19559 output_asm_insn ("cmp\t%0, %1", operands);
19560 output_asm_insn ("bhi\t%l3", operands);
19561 switch (GET_MODE(diff_vec))
19563 case QImode:
19564 return "tbb\t[%|pc, %0]";
19565 case HImode:
19566 return "tbh\t[%|pc, %0, lsl #1]";
19567 case SImode:
19568 if (flag_pic)
19570 output_asm_insn ("adr\t%4, %l2", operands);
19571 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
19572 output_asm_insn ("add\t%4, %4, %5", operands);
19573 return "bx\t%4";
19575 else
19577 output_asm_insn ("adr\t%4, %l2", operands);
19578 return "ldr\t%|pc, [%4, %0, lsl #2]";
19580 default:
19581 gcc_unreachable ();
19585 /* Most ARM cores are single issue, but some newer ones can dual issue.
19586 The scheduler descriptions rely on this being correct. */
19587 static int
19588 arm_issue_rate (void)
19590 switch (arm_tune)
19592 case cortexr4:
19593 case cortexr4f:
19594 case cortexa8:
19595 case cortexa9:
19596 return 2;
19598 default:
19599 return 1;
19603 /* A table and a function to perform ARM-specific name mangling for
19604 NEON vector types in order to conform to the AAPCS (see "Procedure
19605 Call Standard for the ARM Architecture", Appendix A). To qualify
19606 for emission with the mangled names defined in that document, a
19607 vector type must not only be of the correct mode but also be
19608 composed of NEON vector element types (e.g. __builtin_neon_qi). */
19609 typedef struct
19611 enum machine_mode mode;
19612 const char *element_type_name;
19613 const char *aapcs_name;
19614 } arm_mangle_map_entry;
19616 static arm_mangle_map_entry arm_mangle_map[] = {
19617 /* 64-bit containerized types. */
19618 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
19619 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19620 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
19621 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19622 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
19623 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
19624 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
19625 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19626 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19627 /* 128-bit containerized types. */
19628 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19629 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19630 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19631 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19632 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19633 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19634 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19635 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19636 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19637 { VOIDmode, NULL, NULL }
19640 const char *
19641 arm_mangle_type (const_tree type)
19643 arm_mangle_map_entry *pos = arm_mangle_map;
19645 /* The ARM ABI documents (10th October 2008) say that "__va_list"
19646 has to be managled as if it is in the "std" namespace. */
19647 if (TARGET_AAPCS_BASED
19648 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
19650 static bool warned;
19651 if (!warned && warn_psabi)
19653 warned = true;
19654 inform (input_location,
19655 "the mangling of %<va_list%> has changed in GCC 4.4");
19657 return "St9__va_list";
19660 if (TREE_CODE (type) != VECTOR_TYPE)
19661 return NULL;
19663 /* Check the mode of the vector type, and the name of the vector
19664 element type, against the table. */
19665 while (pos->mode != VOIDmode)
19667 tree elt_type = TREE_TYPE (type);
19669 if (pos->mode == TYPE_MODE (type)
19670 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
19671 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
19672 pos->element_type_name))
19673 return pos->aapcs_name;
19675 pos++;
19678 /* Use the default mangling for unrecognized (possibly user-defined)
19679 vector types. */
19680 return NULL;
19683 /* Order of allocation of core registers for Thumb: this allocation is
19684 written over the corresponding initial entries of the array
19685 initialized with REG_ALLOC_ORDER. We allocate all low registers
19686 first. Saving and restoring a low register is usually cheaper than
19687 using a call-clobbered high register. */
19689 static const int thumb_core_reg_alloc_order[] =
19691 3, 2, 1, 0, 4, 5, 6, 7,
19692 14, 12, 8, 9, 10, 11, 13, 15
19695 /* Adjust register allocation order when compiling for Thumb. */
19697 void
19698 arm_order_regs_for_local_alloc (void)
19700 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
19701 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
19702 if (TARGET_THUMB)
19703 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
19704 sizeof (thumb_core_reg_alloc_order));
19707 /* Set default optimization options. */
19708 void
19709 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
19711 /* Enable section anchors by default at -O1 or higher.
19712 Use 2 to distinguish from an explicit -fsection-anchors
19713 given on the command line. */
19714 if (level > 0)
19715 flag_section_anchors = 2;
19718 #include "gt-arm.h"