Fix a bug that broke -freorder-functions
[official-gcc.git] / gcc / config / arm / arm.c
blob28b34c288156075ae0d4f645bd3fefb85cbd44bd
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 int, HOST_WIDE_INT);
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 rtx);
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 #endif
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
137 const_tree, int);
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 tree);
145 static bool arm_have_conditional_execution (void);
146 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
147 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
148 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
149 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
150 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
154 static bool arm_rtx_costs (rtx, int, int, int *, bool);
155 static int arm_address_cost (rtx, bool);
156 static bool arm_memory_load_p (rtx);
157 static bool arm_cirrus_insn_p (rtx);
158 static void cirrus_reorg (rtx);
159 static void arm_init_builtins (void);
160 static void arm_init_iwmmxt_builtins (void);
161 static rtx safe_vector_operand (rtx, enum machine_mode);
162 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
163 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
164 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
165 static tree arm_builtin_decl (unsigned, bool);
166 static void emit_constant_insn (rtx cond, rtx pattern);
167 static rtx emit_set_insn (rtx, rtx);
168 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
169 tree, bool);
170 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
171 const_tree, bool);
172 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
173 const_tree, bool);
174 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
175 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
176 const_tree);
177 static int aapcs_select_return_coproc (const_tree, const_tree);
179 #ifdef OBJECT_FORMAT_ELF
180 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
181 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
182 #endif
183 #ifndef ARM_PE
184 static void arm_encode_section_info (tree, rtx, int);
185 #endif
187 static void arm_file_end (void);
188 static void arm_file_start (void);
190 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
191 tree, int *, int);
192 static bool arm_pass_by_reference (cumulative_args_t,
193 enum machine_mode, const_tree, bool);
194 static bool arm_promote_prototypes (const_tree);
195 static bool arm_default_short_enums (void);
196 static bool arm_align_anon_bitfield (void);
197 static bool arm_return_in_msb (const_tree);
198 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
199 static bool arm_return_in_memory (const_tree, const_tree);
200 #if ARM_UNWIND_INFO
201 static void arm_unwind_emit (FILE *, rtx);
202 static bool arm_output_ttype (rtx);
203 static void arm_asm_emit_except_personality (rtx);
204 static void arm_asm_init_sections (void);
205 #endif
206 static rtx arm_dwarf_register_span (rtx);
208 static tree arm_cxx_guard_type (void);
209 static bool arm_cxx_guard_mask_bit (void);
210 static tree arm_get_cookie_size (tree);
211 static bool arm_cookie_has_size (void);
212 static bool arm_cxx_cdtor_returns_this (void);
213 static bool arm_cxx_key_method_may_be_inline (void);
214 static void arm_cxx_determine_class_data_visibility (tree);
215 static bool arm_cxx_class_data_always_comdat (void);
216 static bool arm_cxx_use_aeabi_atexit (void);
217 static void arm_init_libfuncs (void);
218 static tree arm_build_builtin_va_list (void);
219 static void arm_expand_builtin_va_start (tree, rtx);
220 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
221 static void arm_option_override (void);
222 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
223 static bool arm_cannot_copy_insn_p (rtx);
224 static bool arm_tls_symbol_p (rtx x);
225 static int arm_issue_rate (void);
226 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
227 static bool arm_output_addr_const_extra (FILE *, rtx);
228 static bool arm_allocate_stack_slots_for_args (void);
229 static const char *arm_invalid_parameter_type (const_tree t);
230 static const char *arm_invalid_return_type (const_tree t);
231 static tree arm_promoted_type (const_tree t);
232 static tree arm_convert_to_type (tree type, tree expr);
233 static bool arm_scalar_mode_supported_p (enum machine_mode);
234 static bool arm_frame_pointer_required (void);
235 static bool arm_can_eliminate (const int, const int);
236 static void arm_asm_trampoline_template (FILE *);
237 static void arm_trampoline_init (rtx, tree, rtx);
238 static rtx arm_trampoline_adjust_address (rtx);
239 static rtx arm_pic_static_addr (rtx orig, rtx reg);
240 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
241 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static bool arm_array_mode_supported_p (enum machine_mode,
244 unsigned HOST_WIDE_INT);
245 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
246 static bool arm_class_likely_spilled_p (reg_class_t);
247 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
248 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
249 const_tree type,
250 int misalignment,
251 bool is_packed);
252 static void arm_conditional_register_usage (void);
253 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
254 static unsigned int arm_autovectorize_vector_sizes (void);
255 static int arm_default_branch_cost (bool, bool);
256 static int arm_cortex_a5_branch_cost (bool, bool);
259 /* Table of machine attributes. */
260 static const struct attribute_spec arm_attribute_table[] =
262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
263 affects_type_identity } */
264 /* Function calls made to this symbol must be done indirectly, because
265 it may lie outside of the 26 bit addressing range of a normal function
266 call. */
267 { "long_call", 0, 0, false, true, true, NULL, false },
268 /* Whereas these functions are always known to reside within the 26 bit
269 addressing range. */
270 { "short_call", 0, 0, false, true, true, NULL, false },
271 /* Specify the procedure call conventions for a function. */
272 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
273 false },
274 /* Interrupt Service Routines have special prologue and epilogue requirements. */
275 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
276 false },
277 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
278 false },
279 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
280 false },
281 #ifdef ARM_PE
282 /* ARM/PE has three new attributes:
283 interfacearm - ?
284 dllexport - for exporting a function/variable that will live in a dll
285 dllimport - for importing a function/variable from a dll
287 Microsoft allows multiple declspecs in one __declspec, separating
288 them with spaces. We do NOT support this. Instead, use __declspec
289 multiple times.
291 { "dllimport", 0, 0, true, false, false, NULL, false },
292 { "dllexport", 0, 0, true, false, false, NULL, false },
293 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
294 false },
295 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
296 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
297 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
298 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
299 false },
300 #endif
301 { NULL, 0, 0, false, false, false, NULL, false }
304 /* Initialize the GCC target structure. */
305 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
306 #undef TARGET_MERGE_DECL_ATTRIBUTES
307 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
308 #endif
310 #undef TARGET_LEGITIMIZE_ADDRESS
311 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
313 #undef TARGET_ATTRIBUTE_TABLE
314 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
316 #undef TARGET_ASM_FILE_START
317 #define TARGET_ASM_FILE_START arm_file_start
318 #undef TARGET_ASM_FILE_END
319 #define TARGET_ASM_FILE_END arm_file_end
321 #undef TARGET_ASM_ALIGNED_SI_OP
322 #define TARGET_ASM_ALIGNED_SI_OP NULL
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER arm_assemble_integer
326 #undef TARGET_PRINT_OPERAND
327 #define TARGET_PRINT_OPERAND arm_print_operand
328 #undef TARGET_PRINT_OPERAND_ADDRESS
329 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
330 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
331 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
333 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
334 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
336 #undef TARGET_ASM_FUNCTION_PROLOGUE
337 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
339 #undef TARGET_ASM_FUNCTION_EPILOGUE
340 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
342 #undef TARGET_OPTION_OVERRIDE
343 #define TARGET_OPTION_OVERRIDE arm_option_override
345 #undef TARGET_COMP_TYPE_ATTRIBUTES
346 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
348 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
349 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
354 #undef TARGET_ENCODE_SECTION_INFO
355 #ifdef ARM_PE
356 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
357 #else
358 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
359 #endif
361 #undef TARGET_STRIP_NAME_ENCODING
362 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
364 #undef TARGET_ASM_INTERNAL_LABEL
365 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
367 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
368 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
370 #undef TARGET_FUNCTION_VALUE
371 #define TARGET_FUNCTION_VALUE arm_function_value
373 #undef TARGET_LIBCALL_VALUE
374 #define TARGET_LIBCALL_VALUE arm_libcall_value
376 #undef TARGET_ASM_OUTPUT_MI_THUNK
377 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
378 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
379 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
381 #undef TARGET_RTX_COSTS
382 #define TARGET_RTX_COSTS arm_rtx_costs
383 #undef TARGET_ADDRESS_COST
384 #define TARGET_ADDRESS_COST arm_address_cost
386 #undef TARGET_SHIFT_TRUNCATION_MASK
387 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
389 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
390 #undef TARGET_ARRAY_MODE_SUPPORTED_P
391 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
392 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
393 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
394 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
395 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
396 arm_autovectorize_vector_sizes
398 #undef TARGET_MACHINE_DEPENDENT_REORG
399 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
401 #undef TARGET_INIT_BUILTINS
402 #define TARGET_INIT_BUILTINS arm_init_builtins
403 #undef TARGET_EXPAND_BUILTIN
404 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
405 #undef TARGET_BUILTIN_DECL
406 #define TARGET_BUILTIN_DECL arm_builtin_decl
408 #undef TARGET_INIT_LIBFUNCS
409 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
411 #undef TARGET_PROMOTE_FUNCTION_MODE
412 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
415 #undef TARGET_PASS_BY_REFERENCE
416 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
417 #undef TARGET_ARG_PARTIAL_BYTES
418 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
419 #undef TARGET_FUNCTION_ARG
420 #define TARGET_FUNCTION_ARG arm_function_arg
421 #undef TARGET_FUNCTION_ARG_ADVANCE
422 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
423 #undef TARGET_FUNCTION_ARG_BOUNDARY
424 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
429 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
430 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
432 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
433 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
434 #undef TARGET_TRAMPOLINE_INIT
435 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
436 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
437 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
439 #undef TARGET_DEFAULT_SHORT_ENUMS
440 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
442 #undef TARGET_ALIGN_ANON_BITFIELD
443 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
445 #undef TARGET_NARROW_VOLATILE_BITFIELD
446 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
448 #undef TARGET_CXX_GUARD_TYPE
449 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
451 #undef TARGET_CXX_GUARD_MASK_BIT
452 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
454 #undef TARGET_CXX_GET_COOKIE_SIZE
455 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
457 #undef TARGET_CXX_COOKIE_HAS_SIZE
458 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
460 #undef TARGET_CXX_CDTOR_RETURNS_THIS
461 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
463 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
464 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
466 #undef TARGET_CXX_USE_AEABI_ATEXIT
467 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
469 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
470 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
471 arm_cxx_determine_class_data_visibility
473 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
474 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
476 #undef TARGET_RETURN_IN_MSB
477 #define TARGET_RETURN_IN_MSB arm_return_in_msb
479 #undef TARGET_RETURN_IN_MEMORY
480 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
482 #undef TARGET_MUST_PASS_IN_STACK
483 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
485 #if ARM_UNWIND_INFO
486 #undef TARGET_ASM_UNWIND_EMIT
487 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
489 /* EABI unwinding tables use a different format for the typeinfo tables. */
490 #undef TARGET_ASM_TTYPE
491 #define TARGET_ASM_TTYPE arm_output_ttype
493 #undef TARGET_ARM_EABI_UNWINDER
494 #define TARGET_ARM_EABI_UNWINDER true
496 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
497 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
499 #undef TARGET_ASM_INIT_SECTIONS
500 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
501 #endif /* ARM_UNWIND_INFO */
503 #undef TARGET_DWARF_REGISTER_SPAN
504 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
506 #undef TARGET_CANNOT_COPY_INSN_P
507 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
509 #ifdef HAVE_AS_TLS
510 #undef TARGET_HAVE_TLS
511 #define TARGET_HAVE_TLS true
512 #endif
514 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
515 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
517 #undef TARGET_LEGITIMATE_CONSTANT_P
518 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
520 #undef TARGET_CANNOT_FORCE_CONST_MEM
521 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
523 #undef TARGET_MAX_ANCHOR_OFFSET
524 #define TARGET_MAX_ANCHOR_OFFSET 4095
526 /* The minimum is set such that the total size of the block
527 for a particular anchor is -4088 + 1 + 4095 bytes, which is
528 divisible by eight, ensuring natural spacing of anchors. */
529 #undef TARGET_MIN_ANCHOR_OFFSET
530 #define TARGET_MIN_ANCHOR_OFFSET -4088
532 #undef TARGET_SCHED_ISSUE_RATE
533 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
535 #undef TARGET_MANGLE_TYPE
536 #define TARGET_MANGLE_TYPE arm_mangle_type
538 #undef TARGET_BUILD_BUILTIN_VA_LIST
539 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
540 #undef TARGET_EXPAND_BUILTIN_VA_START
541 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
543 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
545 #ifdef HAVE_AS_TLS
546 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
547 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
548 #endif
550 #undef TARGET_LEGITIMATE_ADDRESS_P
551 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
553 #undef TARGET_INVALID_PARAMETER_TYPE
554 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
556 #undef TARGET_INVALID_RETURN_TYPE
557 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
559 #undef TARGET_PROMOTED_TYPE
560 #define TARGET_PROMOTED_TYPE arm_promoted_type
562 #undef TARGET_CONVERT_TO_TYPE
563 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
565 #undef TARGET_SCALAR_MODE_SUPPORTED_P
566 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
568 #undef TARGET_FRAME_POINTER_REQUIRED
569 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
571 #undef TARGET_CAN_ELIMINATE
572 #define TARGET_CAN_ELIMINATE arm_can_eliminate
574 #undef TARGET_CONDITIONAL_REGISTER_USAGE
575 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
577 #undef TARGET_CLASS_LIKELY_SPILLED_P
578 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
580 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
581 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
582 arm_vector_alignment_reachable
584 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
585 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
586 arm_builtin_support_vector_misalignment
588 #undef TARGET_PREFERRED_RENAME_CLASS
589 #define TARGET_PREFERRED_RENAME_CLASS \
590 arm_preferred_rename_class
592 struct gcc_target targetm = TARGET_INITIALIZER;
594 /* Obstack for minipool constant handling. */
595 static struct obstack minipool_obstack;
596 static char * minipool_startobj;
598 /* The maximum number of insns skipped which
599 will be conditionalised if possible. */
600 static int max_insns_skipped = 5;
602 extern FILE * asm_out_file;
604 /* True if we are currently building a constant table. */
605 int making_const_table;
607 /* The processor for which instructions should be scheduled. */
608 enum processor_type arm_tune = arm_none;
610 /* The current tuning set. */
611 const struct tune_params *current_tune;
613 /* Which floating point hardware to schedule for. */
614 int arm_fpu_attr;
616 /* Which floating popint hardware to use. */
617 const struct arm_fpu_desc *arm_fpu_desc;
619 /* Used for Thumb call_via trampolines. */
620 rtx thumb_call_via_label[14];
621 static int thumb_call_reg_needed;
623 /* Bit values used to identify processor capabilities. */
624 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
625 #define FL_ARCH3M (1 << 1) /* Extended multiply */
626 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
627 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
628 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
629 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
630 #define FL_THUMB (1 << 6) /* Thumb aware */
631 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
632 #define FL_STRONG (1 << 8) /* StrongARM */
633 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
634 #define FL_XSCALE (1 << 10) /* XScale */
635 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
636 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
637 media instructions. */
638 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
639 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
640 Note: ARM6 & 7 derivatives only. */
641 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
642 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
643 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
644 profile. */
645 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
646 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
647 #define FL_NEON (1 << 20) /* Neon instructions. */
648 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
649 architecture. */
650 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
651 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
653 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
655 /* Flags that only effect tuning, not available instructions. */
656 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
657 | FL_CO_PROC)
659 #define FL_FOR_ARCH2 FL_NOTM
660 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
661 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
662 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
663 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
664 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
665 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
666 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
667 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
668 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
669 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
670 #define FL_FOR_ARCH6J FL_FOR_ARCH6
671 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
672 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
673 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
674 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
675 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
676 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
677 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
678 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
679 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
680 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
682 /* The bits in this mask specify which
683 instructions we are allowed to generate. */
684 static unsigned long insn_flags = 0;
686 /* The bits in this mask specify which instruction scheduling options should
687 be used. */
688 static unsigned long tune_flags = 0;
690 /* The following are used in the arm.md file as equivalents to bits
691 in the above two flag variables. */
693 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
694 int arm_arch3m = 0;
696 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
697 int arm_arch4 = 0;
699 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
700 int arm_arch4t = 0;
702 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
703 int arm_arch5 = 0;
705 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
706 int arm_arch5e = 0;
708 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
709 int arm_arch6 = 0;
711 /* Nonzero if this chip supports the ARM 6K extensions. */
712 int arm_arch6k = 0;
714 /* Nonzero if this chip supports the ARM 7 extensions. */
715 int arm_arch7 = 0;
717 /* Nonzero if instructions not present in the 'M' profile can be used. */
718 int arm_arch_notm = 0;
720 /* Nonzero if instructions present in ARMv7E-M can be used. */
721 int arm_arch7em = 0;
723 /* Nonzero if this chip can benefit from load scheduling. */
724 int arm_ld_sched = 0;
726 /* Nonzero if this chip is a StrongARM. */
727 int arm_tune_strongarm = 0;
729 /* Nonzero if this chip is a Cirrus variant. */
730 int arm_arch_cirrus = 0;
732 /* Nonzero if this chip supports Intel Wireless MMX technology. */
733 int arm_arch_iwmmxt = 0;
735 /* Nonzero if this chip is an XScale. */
736 int arm_arch_xscale = 0;
738 /* Nonzero if tuning for XScale */
739 int arm_tune_xscale = 0;
741 /* Nonzero if we want to tune for stores that access the write-buffer.
742 This typically means an ARM6 or ARM7 with MMU or MPU. */
743 int arm_tune_wbuf = 0;
745 /* Nonzero if tuning for Cortex-A9. */
746 int arm_tune_cortex_a9 = 0;
748 /* Nonzero if generating Thumb instructions. */
749 int thumb_code = 0;
751 /* Nonzero if generating Thumb-1 instructions. */
752 int thumb1_code = 0;
754 /* Nonzero if we should define __THUMB_INTERWORK__ in the
755 preprocessor.
756 XXX This is a bit of a hack, it's intended to help work around
757 problems in GLD which doesn't understand that armv5t code is
758 interworking clean. */
759 int arm_cpp_interwork = 0;
761 /* Nonzero if chip supports Thumb 2. */
762 int arm_arch_thumb2;
764 /* Nonzero if chip supports integer division instruction. */
765 int arm_arch_arm_hwdiv;
766 int arm_arch_thumb_hwdiv;
768 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
769 we must report the mode of the memory reference from
770 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
771 enum machine_mode output_memory_reference_mode;
773 /* The register number to be used for the PIC offset register. */
774 unsigned arm_pic_register = INVALID_REGNUM;
776 /* Set to 1 after arm_reorg has started. Reset to start at the start of
777 the next function. */
778 static int after_arm_reorg = 0;
780 enum arm_pcs arm_pcs_default;
782 /* For an explanation of these variables, see final_prescan_insn below. */
783 int arm_ccfsm_state;
784 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
785 enum arm_cond_code arm_current_cc;
787 rtx arm_target_insn;
788 int arm_target_label;
789 /* The number of conditionally executed insns, including the current insn. */
790 int arm_condexec_count = 0;
791 /* A bitmask specifying the patterns for the IT block.
792 Zero means do not output an IT block before this insn. */
793 int arm_condexec_mask = 0;
794 /* The number of bits used in arm_condexec_mask. */
795 int arm_condexec_masklen = 0;
797 /* The condition codes of the ARM, and the inverse function. */
798 static const char * const arm_condition_codes[] =
800 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
801 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
804 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
805 int arm_regs_in_sequence[] =
807 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
810 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
811 #define streq(string1, string2) (strcmp (string1, string2) == 0)
813 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
814 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
815 | (1 << PIC_OFFSET_TABLE_REGNUM)))
817 /* Initialization code. */
819 struct processors
821 const char *const name;
822 enum processor_type core;
823 const char *arch;
824 const unsigned long flags;
825 const struct tune_params *const tune;
829 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
830 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
831 prefetch_slots, \
832 l1_size, \
833 l1_line_size
835 const struct tune_params arm_slowmul_tune =
837 arm_slowmul_rtx_costs,
838 NULL,
839 3, /* Constant limit. */
840 5, /* Max cond insns. */
841 ARM_PREFETCH_NOT_BENEFICIAL,
842 true, /* Prefer constant pool. */
843 arm_default_branch_cost
846 const struct tune_params arm_fastmul_tune =
848 arm_fastmul_rtx_costs,
849 NULL,
850 1, /* Constant limit. */
851 5, /* Max cond insns. */
852 ARM_PREFETCH_NOT_BENEFICIAL,
853 true, /* Prefer constant pool. */
854 arm_default_branch_cost
857 /* StrongARM has early execution of branches, so a sequence that is worth
858 skipping is shorter. Set max_insns_skipped to a lower value. */
860 const struct tune_params arm_strongarm_tune =
862 arm_fastmul_rtx_costs,
863 NULL,
864 1, /* Constant limit. */
865 3, /* Max cond insns. */
866 ARM_PREFETCH_NOT_BENEFICIAL,
867 true, /* Prefer constant pool. */
868 arm_default_branch_cost
871 const struct tune_params arm_xscale_tune =
873 arm_xscale_rtx_costs,
874 xscale_sched_adjust_cost,
875 2, /* Constant limit. */
876 3, /* Max cond insns. */
877 ARM_PREFETCH_NOT_BENEFICIAL,
878 true, /* Prefer constant pool. */
879 arm_default_branch_cost
882 const struct tune_params arm_9e_tune =
884 arm_9e_rtx_costs,
885 NULL,
886 1, /* Constant limit. */
887 5, /* Max cond insns. */
888 ARM_PREFETCH_NOT_BENEFICIAL,
889 true, /* Prefer constant pool. */
890 arm_default_branch_cost
893 const struct tune_params arm_v6t2_tune =
895 arm_9e_rtx_costs,
896 NULL,
897 1, /* Constant limit. */
898 5, /* Max cond insns. */
899 ARM_PREFETCH_NOT_BENEFICIAL,
900 false, /* Prefer constant pool. */
901 arm_default_branch_cost
904 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
905 const struct tune_params arm_cortex_tune =
907 arm_9e_rtx_costs,
908 NULL,
909 1, /* Constant limit. */
910 5, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL,
912 false, /* Prefer constant pool. */
913 arm_default_branch_cost
916 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
917 less appealing. Set max_insns_skipped to a low value. */
919 const struct tune_params arm_cortex_a5_tune =
921 arm_9e_rtx_costs,
922 NULL,
923 1, /* Constant limit. */
924 1, /* Max cond insns. */
925 ARM_PREFETCH_NOT_BENEFICIAL,
926 false, /* Prefer constant pool. */
927 arm_cortex_a5_branch_cost
930 const struct tune_params arm_cortex_a9_tune =
932 arm_9e_rtx_costs,
933 cortex_a9_sched_adjust_cost,
934 1, /* Constant limit. */
935 5, /* Max cond insns. */
936 ARM_PREFETCH_BENEFICIAL(4,32,32),
937 false, /* Prefer constant pool. */
938 arm_default_branch_cost
941 const struct tune_params arm_fa726te_tune =
943 arm_9e_rtx_costs,
944 fa726te_sched_adjust_cost,
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL,
948 true, /* Prefer constant pool. */
949 arm_default_branch_cost
953 /* Not all of these give usefully different compilation alternatives,
954 but there is no simple way of generalizing them. */
955 static const struct processors all_cores[] =
957 /* ARM Cores */
958 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
959 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
960 #include "arm-cores.def"
961 #undef ARM_CORE
962 {NULL, arm_none, NULL, 0, NULL}
965 static const struct processors all_architectures[] =
967 /* ARM Architectures */
968 /* We don't specify tuning costs here as it will be figured out
969 from the core. */
971 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
972 {NAME, CORE, #ARCH, FLAGS, NULL},
973 #include "arm-arches.def"
974 #undef ARM_ARCH
975 {NULL, arm_none, NULL, 0 , NULL}
979 /* These are populated as commandline arguments are processed, or NULL
980 if not specified. */
981 static const struct processors *arm_selected_arch;
982 static const struct processors *arm_selected_cpu;
983 static const struct processors *arm_selected_tune;
985 /* The name of the preprocessor macro to define for this architecture. */
987 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
989 /* Available values for -mfpu=. */
991 static const struct arm_fpu_desc all_fpus[] =
993 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
994 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
995 #include "arm-fpus.def"
996 #undef ARM_FPU
1000 /* Supported TLS relocations. */
1002 enum tls_reloc {
1003 TLS_GD32,
1004 TLS_LDM32,
1005 TLS_LDO32,
1006 TLS_IE32,
1007 TLS_LE32,
1008 TLS_DESCSEQ /* GNU scheme */
1011 /* The maximum number of insns to be used when loading a constant. */
1012 inline static int
1013 arm_constant_limit (bool size_p)
1015 return size_p ? 1 : current_tune->constant_limit;
1018 /* Emit an insn that's a simple single-set. Both the operands must be known
1019 to be valid. */
1020 inline static rtx
1021 emit_set_insn (rtx x, rtx y)
1023 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1026 /* Return the number of bits set in VALUE. */
1027 static unsigned
1028 bit_count (unsigned long value)
1030 unsigned long count = 0;
1032 while (value)
1034 count++;
1035 value &= value - 1; /* Clear the least-significant set bit. */
1038 return count;
1041 typedef struct
1043 enum machine_mode mode;
1044 const char *name;
1045 } arm_fixed_mode_set;
1047 /* A small helper for setting fixed-point library libfuncs. */
1049 static void
1050 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1051 const char *funcname, const char *modename,
1052 int num_suffix)
1054 char buffer[50];
1056 if (num_suffix == 0)
1057 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1058 else
1059 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1061 set_optab_libfunc (optable, mode, buffer);
1064 static void
1065 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1066 enum machine_mode from, const char *funcname,
1067 const char *toname, const char *fromname)
1069 char buffer[50];
1070 char *maybe_suffix_2 = "";
1072 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1073 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1074 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1075 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1076 maybe_suffix_2 = "2";
1078 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1079 maybe_suffix_2);
1081 set_conv_libfunc (optable, to, from, buffer);
1084 /* Set up library functions unique to ARM. */
1086 static void
1087 arm_init_libfuncs (void)
1089 /* There are no special library functions unless we are using the
1090 ARM BPABI. */
1091 if (!TARGET_BPABI)
1092 return;
1094 /* The functions below are described in Section 4 of the "Run-Time
1095 ABI for the ARM architecture", Version 1.0. */
1097 /* Double-precision floating-point arithmetic. Table 2. */
1098 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1099 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1100 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1101 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1102 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1104 /* Double-precision comparisons. Table 3. */
1105 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1106 set_optab_libfunc (ne_optab, DFmode, NULL);
1107 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1108 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1109 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1110 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1111 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1113 /* Single-precision floating-point arithmetic. Table 4. */
1114 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1115 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1116 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1117 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1118 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1120 /* Single-precision comparisons. Table 5. */
1121 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1122 set_optab_libfunc (ne_optab, SFmode, NULL);
1123 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1124 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1125 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1126 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1127 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1129 /* Floating-point to integer conversions. Table 6. */
1130 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1131 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1132 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1133 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1134 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1135 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1136 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1137 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1139 /* Conversions between floating types. Table 7. */
1140 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1141 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1143 /* Integer to floating-point conversions. Table 8. */
1144 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1145 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1146 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1147 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1148 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1149 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1150 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1151 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1153 /* Long long. Table 9. */
1154 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1155 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1156 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1157 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1158 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1159 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1160 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1161 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1163 /* Integer (32/32->32) division. \S 4.3.1. */
1164 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1165 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1167 /* The divmod functions are designed so that they can be used for
1168 plain division, even though they return both the quotient and the
1169 remainder. The quotient is returned in the usual location (i.e.,
1170 r0 for SImode, {r0, r1} for DImode), just as would be expected
1171 for an ordinary division routine. Because the AAPCS calling
1172 conventions specify that all of { r0, r1, r2, r3 } are
1173 callee-saved registers, there is no need to tell the compiler
1174 explicitly that those registers are clobbered by these
1175 routines. */
1176 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1177 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1179 /* For SImode division the ABI provides div-without-mod routines,
1180 which are faster. */
1181 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1182 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1184 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1185 divmod libcalls instead. */
1186 set_optab_libfunc (smod_optab, DImode, NULL);
1187 set_optab_libfunc (umod_optab, DImode, NULL);
1188 set_optab_libfunc (smod_optab, SImode, NULL);
1189 set_optab_libfunc (umod_optab, SImode, NULL);
1191 /* Half-precision float operations. The compiler handles all operations
1192 with NULL libfuncs by converting the SFmode. */
1193 switch (arm_fp16_format)
1195 case ARM_FP16_FORMAT_IEEE:
1196 case ARM_FP16_FORMAT_ALTERNATIVE:
1198 /* Conversions. */
1199 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1200 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1201 ? "__gnu_f2h_ieee"
1202 : "__gnu_f2h_alternative"));
1203 set_conv_libfunc (sext_optab, SFmode, HFmode,
1204 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1205 ? "__gnu_h2f_ieee"
1206 : "__gnu_h2f_alternative"));
1208 /* Arithmetic. */
1209 set_optab_libfunc (add_optab, HFmode, NULL);
1210 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1211 set_optab_libfunc (smul_optab, HFmode, NULL);
1212 set_optab_libfunc (neg_optab, HFmode, NULL);
1213 set_optab_libfunc (sub_optab, HFmode, NULL);
1215 /* Comparisons. */
1216 set_optab_libfunc (eq_optab, HFmode, NULL);
1217 set_optab_libfunc (ne_optab, HFmode, NULL);
1218 set_optab_libfunc (lt_optab, HFmode, NULL);
1219 set_optab_libfunc (le_optab, HFmode, NULL);
1220 set_optab_libfunc (ge_optab, HFmode, NULL);
1221 set_optab_libfunc (gt_optab, HFmode, NULL);
1222 set_optab_libfunc (unord_optab, HFmode, NULL);
1223 break;
1225 default:
1226 break;
1229 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1231 const arm_fixed_mode_set fixed_arith_modes[] =
1233 { QQmode, "qq" },
1234 { UQQmode, "uqq" },
1235 { HQmode, "hq" },
1236 { UHQmode, "uhq" },
1237 { SQmode, "sq" },
1238 { USQmode, "usq" },
1239 { DQmode, "dq" },
1240 { UDQmode, "udq" },
1241 { TQmode, "tq" },
1242 { UTQmode, "utq" },
1243 { HAmode, "ha" },
1244 { UHAmode, "uha" },
1245 { SAmode, "sa" },
1246 { USAmode, "usa" },
1247 { DAmode, "da" },
1248 { UDAmode, "uda" },
1249 { TAmode, "ta" },
1250 { UTAmode, "uta" }
1252 const arm_fixed_mode_set fixed_conv_modes[] =
1254 { QQmode, "qq" },
1255 { UQQmode, "uqq" },
1256 { HQmode, "hq" },
1257 { UHQmode, "uhq" },
1258 { SQmode, "sq" },
1259 { USQmode, "usq" },
1260 { DQmode, "dq" },
1261 { UDQmode, "udq" },
1262 { TQmode, "tq" },
1263 { UTQmode, "utq" },
1264 { HAmode, "ha" },
1265 { UHAmode, "uha" },
1266 { SAmode, "sa" },
1267 { USAmode, "usa" },
1268 { DAmode, "da" },
1269 { UDAmode, "uda" },
1270 { TAmode, "ta" },
1271 { UTAmode, "uta" },
1272 { QImode, "qi" },
1273 { HImode, "hi" },
1274 { SImode, "si" },
1275 { DImode, "di" },
1276 { TImode, "ti" },
1277 { SFmode, "sf" },
1278 { DFmode, "df" }
1280 unsigned int i, j;
1282 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1284 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1285 "add", fixed_arith_modes[i].name, 3);
1286 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1287 "ssadd", fixed_arith_modes[i].name, 3);
1288 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1289 "usadd", fixed_arith_modes[i].name, 3);
1290 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1291 "sub", fixed_arith_modes[i].name, 3);
1292 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1293 "sssub", fixed_arith_modes[i].name, 3);
1294 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1295 "ussub", fixed_arith_modes[i].name, 3);
1296 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1297 "mul", fixed_arith_modes[i].name, 3);
1298 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1299 "ssmul", fixed_arith_modes[i].name, 3);
1300 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1301 "usmul", fixed_arith_modes[i].name, 3);
1302 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1303 "div", fixed_arith_modes[i].name, 3);
1304 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1305 "udiv", fixed_arith_modes[i].name, 3);
1306 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1307 "ssdiv", fixed_arith_modes[i].name, 3);
1308 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1309 "usdiv", fixed_arith_modes[i].name, 3);
1310 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1311 "neg", fixed_arith_modes[i].name, 2);
1312 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1313 "ssneg", fixed_arith_modes[i].name, 2);
1314 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1315 "usneg", fixed_arith_modes[i].name, 2);
1316 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1317 "ashl", fixed_arith_modes[i].name, 3);
1318 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1319 "ashr", fixed_arith_modes[i].name, 3);
1320 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1321 "lshr", fixed_arith_modes[i].name, 3);
1322 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1323 "ssashl", fixed_arith_modes[i].name, 3);
1324 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1325 "usashl", fixed_arith_modes[i].name, 3);
1326 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1327 "cmp", fixed_arith_modes[i].name, 2);
1330 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1331 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1333 if (i == j
1334 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1335 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1336 continue;
1338 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1339 fixed_conv_modes[j].mode, "fract",
1340 fixed_conv_modes[i].name,
1341 fixed_conv_modes[j].name);
1342 arm_set_fixed_conv_libfunc (satfract_optab,
1343 fixed_conv_modes[i].mode,
1344 fixed_conv_modes[j].mode, "satfract",
1345 fixed_conv_modes[i].name,
1346 fixed_conv_modes[j].name);
1347 arm_set_fixed_conv_libfunc (fractuns_optab,
1348 fixed_conv_modes[i].mode,
1349 fixed_conv_modes[j].mode, "fractuns",
1350 fixed_conv_modes[i].name,
1351 fixed_conv_modes[j].name);
1352 arm_set_fixed_conv_libfunc (satfractuns_optab,
1353 fixed_conv_modes[i].mode,
1354 fixed_conv_modes[j].mode, "satfractuns",
1355 fixed_conv_modes[i].name,
1356 fixed_conv_modes[j].name);
1360 if (TARGET_AAPCS_BASED)
1361 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1364 /* On AAPCS systems, this is the "struct __va_list". */
1365 static GTY(()) tree va_list_type;
1367 /* Return the type to use as __builtin_va_list. */
1368 static tree
1369 arm_build_builtin_va_list (void)
1371 tree va_list_name;
1372 tree ap_field;
1374 if (!TARGET_AAPCS_BASED)
1375 return std_build_builtin_va_list ();
1377 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1378 defined as:
1380 struct __va_list
1382 void *__ap;
1385 The C Library ABI further reinforces this definition in \S
1386 4.1.
1388 We must follow this definition exactly. The structure tag
1389 name is visible in C++ mangled names, and thus forms a part
1390 of the ABI. The field name may be used by people who
1391 #include <stdarg.h>. */
1392 /* Create the type. */
1393 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1394 /* Give it the required name. */
1395 va_list_name = build_decl (BUILTINS_LOCATION,
1396 TYPE_DECL,
1397 get_identifier ("__va_list"),
1398 va_list_type);
1399 DECL_ARTIFICIAL (va_list_name) = 1;
1400 TYPE_NAME (va_list_type) = va_list_name;
1401 TYPE_STUB_DECL (va_list_type) = va_list_name;
1402 /* Create the __ap field. */
1403 ap_field = build_decl (BUILTINS_LOCATION,
1404 FIELD_DECL,
1405 get_identifier ("__ap"),
1406 ptr_type_node);
1407 DECL_ARTIFICIAL (ap_field) = 1;
1408 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1409 TYPE_FIELDS (va_list_type) = ap_field;
1410 /* Compute its layout. */
1411 layout_type (va_list_type);
1413 return va_list_type;
1416 /* Return an expression of type "void *" pointing to the next
1417 available argument in a variable-argument list. VALIST is the
1418 user-level va_list object, of type __builtin_va_list. */
1419 static tree
1420 arm_extract_valist_ptr (tree valist)
1422 if (TREE_TYPE (valist) == error_mark_node)
1423 return error_mark_node;
1425 /* On an AAPCS target, the pointer is stored within "struct
1426 va_list". */
1427 if (TARGET_AAPCS_BASED)
1429 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1430 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1431 valist, ap_field, NULL_TREE);
1434 return valist;
1437 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1438 static void
1439 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1441 valist = arm_extract_valist_ptr (valist);
1442 std_expand_builtin_va_start (valist, nextarg);
1445 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1446 static tree
1447 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1448 gimple_seq *post_p)
1450 valist = arm_extract_valist_ptr (valist);
1451 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1454 /* Fix up any incompatible options that the user has specified. */
1455 static void
1456 arm_option_override (void)
1458 if (global_options_set.x_arm_arch_option)
1459 arm_selected_arch = &all_architectures[arm_arch_option];
1461 if (global_options_set.x_arm_cpu_option)
1462 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1464 if (global_options_set.x_arm_tune_option)
1465 arm_selected_tune = &all_cores[(int) arm_tune_option];
1467 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1468 SUBTARGET_OVERRIDE_OPTIONS;
1469 #endif
1471 if (arm_selected_arch)
1473 if (arm_selected_cpu)
1475 /* Check for conflict between mcpu and march. */
1476 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1478 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1479 arm_selected_cpu->name, arm_selected_arch->name);
1480 /* -march wins for code generation.
1481 -mcpu wins for default tuning. */
1482 if (!arm_selected_tune)
1483 arm_selected_tune = arm_selected_cpu;
1485 arm_selected_cpu = arm_selected_arch;
1487 else
1488 /* -mcpu wins. */
1489 arm_selected_arch = NULL;
1491 else
1492 /* Pick a CPU based on the architecture. */
1493 arm_selected_cpu = arm_selected_arch;
1496 /* If the user did not specify a processor, choose one for them. */
1497 if (!arm_selected_cpu)
1499 const struct processors * sel;
1500 unsigned int sought;
1502 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1503 if (!arm_selected_cpu->name)
1505 #ifdef SUBTARGET_CPU_DEFAULT
1506 /* Use the subtarget default CPU if none was specified by
1507 configure. */
1508 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1509 #endif
1510 /* Default to ARM6. */
1511 if (!arm_selected_cpu->name)
1512 arm_selected_cpu = &all_cores[arm6];
1515 sel = arm_selected_cpu;
1516 insn_flags = sel->flags;
1518 /* Now check to see if the user has specified some command line
1519 switch that require certain abilities from the cpu. */
1520 sought = 0;
1522 if (TARGET_INTERWORK || TARGET_THUMB)
1524 sought |= (FL_THUMB | FL_MODE32);
1526 /* There are no ARM processors that support both APCS-26 and
1527 interworking. Therefore we force FL_MODE26 to be removed
1528 from insn_flags here (if it was set), so that the search
1529 below will always be able to find a compatible processor. */
1530 insn_flags &= ~FL_MODE26;
1533 if (sought != 0 && ((sought & insn_flags) != sought))
1535 /* Try to locate a CPU type that supports all of the abilities
1536 of the default CPU, plus the extra abilities requested by
1537 the user. */
1538 for (sel = all_cores; sel->name != NULL; sel++)
1539 if ((sel->flags & sought) == (sought | insn_flags))
1540 break;
1542 if (sel->name == NULL)
1544 unsigned current_bit_count = 0;
1545 const struct processors * best_fit = NULL;
1547 /* Ideally we would like to issue an error message here
1548 saying that it was not possible to find a CPU compatible
1549 with the default CPU, but which also supports the command
1550 line options specified by the programmer, and so they
1551 ought to use the -mcpu=<name> command line option to
1552 override the default CPU type.
1554 If we cannot find a cpu that has both the
1555 characteristics of the default cpu and the given
1556 command line options we scan the array again looking
1557 for a best match. */
1558 for (sel = all_cores; sel->name != NULL; sel++)
1559 if ((sel->flags & sought) == sought)
1561 unsigned count;
1563 count = bit_count (sel->flags & insn_flags);
1565 if (count >= current_bit_count)
1567 best_fit = sel;
1568 current_bit_count = count;
1572 gcc_assert (best_fit);
1573 sel = best_fit;
1576 arm_selected_cpu = sel;
1580 gcc_assert (arm_selected_cpu);
1581 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1582 if (!arm_selected_tune)
1583 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1585 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1586 insn_flags = arm_selected_cpu->flags;
1588 arm_tune = arm_selected_tune->core;
1589 tune_flags = arm_selected_tune->flags;
1590 current_tune = arm_selected_tune->tune;
1592 /* Make sure that the processor choice does not conflict with any of the
1593 other command line choices. */
1594 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1595 error ("target CPU does not support ARM mode");
1597 /* BPABI targets use linker tricks to allow interworking on cores
1598 without thumb support. */
1599 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1601 warning (0, "target CPU does not support interworking" );
1602 target_flags &= ~MASK_INTERWORK;
1605 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1607 warning (0, "target CPU does not support THUMB instructions");
1608 target_flags &= ~MASK_THUMB;
1611 if (TARGET_APCS_FRAME && TARGET_THUMB)
1613 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1614 target_flags &= ~MASK_APCS_FRAME;
1617 /* Callee super interworking implies thumb interworking. Adding
1618 this to the flags here simplifies the logic elsewhere. */
1619 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1620 target_flags |= MASK_INTERWORK;
1622 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1623 from here where no function is being compiled currently. */
1624 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1625 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1627 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1628 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1630 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1632 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1633 target_flags |= MASK_APCS_FRAME;
1636 if (TARGET_POKE_FUNCTION_NAME)
1637 target_flags |= MASK_APCS_FRAME;
1639 if (TARGET_APCS_REENT && flag_pic)
1640 error ("-fpic and -mapcs-reent are incompatible");
1642 if (TARGET_APCS_REENT)
1643 warning (0, "APCS reentrant code not supported. Ignored");
1645 /* If this target is normally configured to use APCS frames, warn if they
1646 are turned off and debugging is turned on. */
1647 if (TARGET_ARM
1648 && write_symbols != NO_DEBUG
1649 && !TARGET_APCS_FRAME
1650 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1651 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1653 if (TARGET_APCS_FLOAT)
1654 warning (0, "passing floating point arguments in fp regs not yet supported");
1656 if (TARGET_LITTLE_WORDS)
1657 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1658 "will be removed in a future release");
1660 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1661 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1662 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1663 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1664 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1665 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1666 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1667 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1668 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1669 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1670 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1671 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1672 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1673 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1675 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1676 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1677 thumb_code = TARGET_ARM == 0;
1678 thumb1_code = TARGET_THUMB1 != 0;
1679 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1680 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1681 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1682 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1683 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1684 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1686 /* If we are not using the default (ARM mode) section anchor offset
1687 ranges, then set the correct ranges now. */
1688 if (TARGET_THUMB1)
1690 /* Thumb-1 LDR instructions cannot have negative offsets.
1691 Permissible positive offset ranges are 5-bit (for byte loads),
1692 6-bit (for halfword loads), or 7-bit (for word loads).
1693 Empirical results suggest a 7-bit anchor range gives the best
1694 overall code size. */
1695 targetm.min_anchor_offset = 0;
1696 targetm.max_anchor_offset = 127;
1698 else if (TARGET_THUMB2)
1700 /* The minimum is set such that the total size of the block
1701 for a particular anchor is 248 + 1 + 4095 bytes, which is
1702 divisible by eight, ensuring natural spacing of anchors. */
1703 targetm.min_anchor_offset = -248;
1704 targetm.max_anchor_offset = 4095;
1707 /* V5 code we generate is completely interworking capable, so we turn off
1708 TARGET_INTERWORK here to avoid many tests later on. */
1710 /* XXX However, we must pass the right pre-processor defines to CPP
1711 or GLD can get confused. This is a hack. */
1712 if (TARGET_INTERWORK)
1713 arm_cpp_interwork = 1;
1715 if (arm_arch5)
1716 target_flags &= ~MASK_INTERWORK;
1718 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1719 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1721 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1722 error ("iwmmxt abi requires an iwmmxt capable cpu");
1724 if (!global_options_set.x_arm_fpu_index)
1726 const char *target_fpu_name;
1727 bool ok;
1729 #ifdef FPUTYPE_DEFAULT
1730 target_fpu_name = FPUTYPE_DEFAULT;
1731 #else
1732 if (arm_arch_cirrus)
1733 target_fpu_name = "maverick";
1734 else
1735 target_fpu_name = "fpe2";
1736 #endif
1738 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1739 CL_TARGET);
1740 gcc_assert (ok);
1743 arm_fpu_desc = &all_fpus[arm_fpu_index];
1745 switch (arm_fpu_desc->model)
1747 case ARM_FP_MODEL_FPA:
1748 if (arm_fpu_desc->rev == 2)
1749 arm_fpu_attr = FPU_FPE2;
1750 else if (arm_fpu_desc->rev == 3)
1751 arm_fpu_attr = FPU_FPE3;
1752 else
1753 arm_fpu_attr = FPU_FPA;
1754 break;
1756 case ARM_FP_MODEL_MAVERICK:
1757 arm_fpu_attr = FPU_MAVERICK;
1758 break;
1760 case ARM_FP_MODEL_VFP:
1761 arm_fpu_attr = FPU_VFP;
1762 break;
1764 default:
1765 gcc_unreachable();
1768 if (TARGET_AAPCS_BASED
1769 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1770 error ("FPA is unsupported in the AAPCS");
1772 if (TARGET_AAPCS_BASED)
1774 if (TARGET_CALLER_INTERWORKING)
1775 error ("AAPCS does not support -mcaller-super-interworking");
1776 else
1777 if (TARGET_CALLEE_INTERWORKING)
1778 error ("AAPCS does not support -mcallee-super-interworking");
1781 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1782 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1783 will ever exist. GCC makes no attempt to support this combination. */
1784 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1785 sorry ("iWMMXt and hardware floating point");
1787 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1788 if (TARGET_THUMB2 && TARGET_IWMMXT)
1789 sorry ("Thumb-2 iWMMXt");
1791 /* __fp16 support currently assumes the core has ldrh. */
1792 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1793 sorry ("__fp16 and no ldrh");
1795 /* If soft-float is specified then don't use FPU. */
1796 if (TARGET_SOFT_FLOAT)
1797 arm_fpu_attr = FPU_NONE;
1799 if (TARGET_AAPCS_BASED)
1801 if (arm_abi == ARM_ABI_IWMMXT)
1802 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1803 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1804 && TARGET_HARD_FLOAT
1805 && TARGET_VFP)
1806 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1807 else
1808 arm_pcs_default = ARM_PCS_AAPCS;
1810 else
1812 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1813 sorry ("-mfloat-abi=hard and VFP");
1815 if (arm_abi == ARM_ABI_APCS)
1816 arm_pcs_default = ARM_PCS_APCS;
1817 else
1818 arm_pcs_default = ARM_PCS_ATPCS;
1821 /* For arm2/3 there is no need to do any scheduling if there is only
1822 a floating point emulator, or we are doing software floating-point. */
1823 if ((TARGET_SOFT_FLOAT
1824 || (TARGET_FPA && arm_fpu_desc->rev))
1825 && (tune_flags & FL_MODE32) == 0)
1826 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1828 /* Use the cp15 method if it is available. */
1829 if (target_thread_pointer == TP_AUTO)
1831 if (arm_arch6k && !TARGET_THUMB1)
1832 target_thread_pointer = TP_CP15;
1833 else
1834 target_thread_pointer = TP_SOFT;
1837 if (TARGET_HARD_TP && TARGET_THUMB1)
1838 error ("can not use -mtp=cp15 with 16-bit Thumb");
1840 /* Override the default structure alignment for AAPCS ABI. */
1841 if (!global_options_set.x_arm_structure_size_boundary)
1843 if (TARGET_AAPCS_BASED)
1844 arm_structure_size_boundary = 8;
1846 else
1848 if (arm_structure_size_boundary != 8
1849 && arm_structure_size_boundary != 32
1850 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1852 if (ARM_DOUBLEWORD_ALIGN)
1853 warning (0,
1854 "structure size boundary can only be set to 8, 32 or 64");
1855 else
1856 warning (0, "structure size boundary can only be set to 8 or 32");
1857 arm_structure_size_boundary
1858 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1862 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1864 error ("RTP PIC is incompatible with Thumb");
1865 flag_pic = 0;
1868 /* If stack checking is disabled, we can use r10 as the PIC register,
1869 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1870 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1872 if (TARGET_VXWORKS_RTP)
1873 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1874 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1877 if (flag_pic && TARGET_VXWORKS_RTP)
1878 arm_pic_register = 9;
1880 if (arm_pic_register_string != NULL)
1882 int pic_register = decode_reg_name (arm_pic_register_string);
1884 if (!flag_pic)
1885 warning (0, "-mpic-register= is useless without -fpic");
1887 /* Prevent the user from choosing an obviously stupid PIC register. */
1888 else if (pic_register < 0 || call_used_regs[pic_register]
1889 || pic_register == HARD_FRAME_POINTER_REGNUM
1890 || pic_register == STACK_POINTER_REGNUM
1891 || pic_register >= PC_REGNUM
1892 || (TARGET_VXWORKS_RTP
1893 && (unsigned int) pic_register != arm_pic_register))
1894 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1895 else
1896 arm_pic_register = pic_register;
1899 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1900 if (fix_cm3_ldrd == 2)
1902 if (arm_selected_cpu->core == cortexm3)
1903 fix_cm3_ldrd = 1;
1904 else
1905 fix_cm3_ldrd = 0;
1908 if (TARGET_THUMB1 && flag_schedule_insns)
1910 /* Don't warn since it's on by default in -O2. */
1911 flag_schedule_insns = 0;
1914 if (optimize_size)
1916 /* If optimizing for size, bump the number of instructions that we
1917 are prepared to conditionally execute (even on a StrongARM). */
1918 max_insns_skipped = 6;
1920 else
1921 max_insns_skipped = current_tune->max_insns_skipped;
1923 /* Hot/Cold partitioning is not currently supported, since we can't
1924 handle literal pool placement in that case. */
1925 if (flag_reorder_blocks_and_partition)
1927 inform (input_location,
1928 "-freorder-blocks-and-partition not supported on this architecture");
1929 flag_reorder_blocks_and_partition = 0;
1930 flag_reorder_blocks = 1;
1933 if (flag_pic)
1934 /* Hoisting PIC address calculations more aggressively provides a small,
1935 but measurable, size reduction for PIC code. Therefore, we decrease
1936 the bar for unrestricted expression hoisting to the cost of PIC address
1937 calculation, which is 2 instructions. */
1938 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1939 global_options.x_param_values,
1940 global_options_set.x_param_values);
1942 /* ARM EABI defaults to strict volatile bitfields. */
1943 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1944 flag_strict_volatile_bitfields = 1;
1946 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1947 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1948 if (flag_prefetch_loop_arrays < 0
1949 && HAVE_prefetch
1950 && optimize >= 3
1951 && current_tune->num_prefetch_slots > 0)
1952 flag_prefetch_loop_arrays = 1;
1954 /* Set up parameters to be used in prefetching algorithm. Do not override the
1955 defaults unless we are tuning for a core we have researched values for. */
1956 if (current_tune->num_prefetch_slots > 0)
1957 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1958 current_tune->num_prefetch_slots,
1959 global_options.x_param_values,
1960 global_options_set.x_param_values);
1961 if (current_tune->l1_cache_line_size >= 0)
1962 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1963 current_tune->l1_cache_line_size,
1964 global_options.x_param_values,
1965 global_options_set.x_param_values);
1966 if (current_tune->l1_cache_size >= 0)
1967 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1968 current_tune->l1_cache_size,
1969 global_options.x_param_values,
1970 global_options_set.x_param_values);
1972 /* Register global variables with the garbage collector. */
1973 arm_add_gc_roots ();
1976 static void
1977 arm_add_gc_roots (void)
1979 gcc_obstack_init(&minipool_obstack);
1980 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1983 /* A table of known ARM exception types.
1984 For use with the interrupt function attribute. */
1986 typedef struct
1988 const char *const arg;
1989 const unsigned long return_value;
1991 isr_attribute_arg;
1993 static const isr_attribute_arg isr_attribute_args [] =
1995 { "IRQ", ARM_FT_ISR },
1996 { "irq", ARM_FT_ISR },
1997 { "FIQ", ARM_FT_FIQ },
1998 { "fiq", ARM_FT_FIQ },
1999 { "ABORT", ARM_FT_ISR },
2000 { "abort", ARM_FT_ISR },
2001 { "ABORT", ARM_FT_ISR },
2002 { "abort", ARM_FT_ISR },
2003 { "UNDEF", ARM_FT_EXCEPTION },
2004 { "undef", ARM_FT_EXCEPTION },
2005 { "SWI", ARM_FT_EXCEPTION },
2006 { "swi", ARM_FT_EXCEPTION },
2007 { NULL, ARM_FT_NORMAL }
2010 /* Returns the (interrupt) function type of the current
2011 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2013 static unsigned long
2014 arm_isr_value (tree argument)
2016 const isr_attribute_arg * ptr;
2017 const char * arg;
2019 if (!arm_arch_notm)
2020 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2022 /* No argument - default to IRQ. */
2023 if (argument == NULL_TREE)
2024 return ARM_FT_ISR;
2026 /* Get the value of the argument. */
2027 if (TREE_VALUE (argument) == NULL_TREE
2028 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2029 return ARM_FT_UNKNOWN;
2031 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2033 /* Check it against the list of known arguments. */
2034 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2035 if (streq (arg, ptr->arg))
2036 return ptr->return_value;
2038 /* An unrecognized interrupt type. */
2039 return ARM_FT_UNKNOWN;
2042 /* Computes the type of the current function. */
2044 static unsigned long
2045 arm_compute_func_type (void)
2047 unsigned long type = ARM_FT_UNKNOWN;
2048 tree a;
2049 tree attr;
2051 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2053 /* Decide if the current function is volatile. Such functions
2054 never return, and many memory cycles can be saved by not storing
2055 register values that will never be needed again. This optimization
2056 was added to speed up context switching in a kernel application. */
2057 if (optimize > 0
2058 && (TREE_NOTHROW (current_function_decl)
2059 || !(flag_unwind_tables
2060 || (flag_exceptions
2061 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2062 && TREE_THIS_VOLATILE (current_function_decl))
2063 type |= ARM_FT_VOLATILE;
2065 if (cfun->static_chain_decl != NULL)
2066 type |= ARM_FT_NESTED;
2068 attr = DECL_ATTRIBUTES (current_function_decl);
2070 a = lookup_attribute ("naked", attr);
2071 if (a != NULL_TREE)
2072 type |= ARM_FT_NAKED;
2074 a = lookup_attribute ("isr", attr);
2075 if (a == NULL_TREE)
2076 a = lookup_attribute ("interrupt", attr);
2078 if (a == NULL_TREE)
2079 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2080 else
2081 type |= arm_isr_value (TREE_VALUE (a));
2083 return type;
2086 /* Returns the type of the current function. */
2088 unsigned long
2089 arm_current_func_type (void)
2091 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2092 cfun->machine->func_type = arm_compute_func_type ();
2094 return cfun->machine->func_type;
2097 bool
2098 arm_allocate_stack_slots_for_args (void)
2100 /* Naked functions should not allocate stack slots for arguments. */
2101 return !IS_NAKED (arm_current_func_type ());
2105 /* Output assembler code for a block containing the constant parts
2106 of a trampoline, leaving space for the variable parts.
2108 On the ARM, (if r8 is the static chain regnum, and remembering that
2109 referencing pc adds an offset of 8) the trampoline looks like:
2110 ldr r8, [pc, #0]
2111 ldr pc, [pc]
2112 .word static chain value
2113 .word function's address
2114 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2116 static void
2117 arm_asm_trampoline_template (FILE *f)
2119 if (TARGET_ARM)
2121 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2122 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2124 else if (TARGET_THUMB2)
2126 /* The Thumb-2 trampoline is similar to the arm implementation.
2127 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2128 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2129 STATIC_CHAIN_REGNUM, PC_REGNUM);
2130 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2132 else
2134 ASM_OUTPUT_ALIGN (f, 2);
2135 fprintf (f, "\t.code\t16\n");
2136 fprintf (f, ".Ltrampoline_start:\n");
2137 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2138 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2139 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2140 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2141 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2142 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2144 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2145 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2148 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2150 static void
2151 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2153 rtx fnaddr, mem, a_tramp;
2155 emit_block_move (m_tramp, assemble_trampoline_template (),
2156 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2158 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2159 emit_move_insn (mem, chain_value);
2161 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2162 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2163 emit_move_insn (mem, fnaddr);
2165 a_tramp = XEXP (m_tramp, 0);
2166 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2167 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2168 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2171 /* Thumb trampolines should be entered in thumb mode, so set
2172 the bottom bit of the address. */
2174 static rtx
2175 arm_trampoline_adjust_address (rtx addr)
2177 if (TARGET_THUMB)
2178 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2179 NULL, 0, OPTAB_LIB_WIDEN);
2180 return addr;
2183 /* Return 1 if it is possible to return using a single instruction.
2184 If SIBLING is non-null, this is a test for a return before a sibling
2185 call. SIBLING is the call insn, so we can examine its register usage. */
2188 use_return_insn (int iscond, rtx sibling)
2190 int regno;
2191 unsigned int func_type;
2192 unsigned long saved_int_regs;
2193 unsigned HOST_WIDE_INT stack_adjust;
2194 arm_stack_offsets *offsets;
2196 /* Never use a return instruction before reload has run. */
2197 if (!reload_completed)
2198 return 0;
2200 func_type = arm_current_func_type ();
2202 /* Naked, volatile and stack alignment functions need special
2203 consideration. */
2204 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2205 return 0;
2207 /* So do interrupt functions that use the frame pointer and Thumb
2208 interrupt functions. */
2209 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2210 return 0;
2212 offsets = arm_get_frame_offsets ();
2213 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2215 /* As do variadic functions. */
2216 if (crtl->args.pretend_args_size
2217 || cfun->machine->uses_anonymous_args
2218 /* Or if the function calls __builtin_eh_return () */
2219 || crtl->calls_eh_return
2220 /* Or if the function calls alloca */
2221 || cfun->calls_alloca
2222 /* Or if there is a stack adjustment. However, if the stack pointer
2223 is saved on the stack, we can use a pre-incrementing stack load. */
2224 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2225 && stack_adjust == 4)))
2226 return 0;
2228 saved_int_regs = offsets->saved_regs_mask;
2230 /* Unfortunately, the insn
2232 ldmib sp, {..., sp, ...}
2234 triggers a bug on most SA-110 based devices, such that the stack
2235 pointer won't be correctly restored if the instruction takes a
2236 page fault. We work around this problem by popping r3 along with
2237 the other registers, since that is never slower than executing
2238 another instruction.
2240 We test for !arm_arch5 here, because code for any architecture
2241 less than this could potentially be run on one of the buggy
2242 chips. */
2243 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2245 /* Validate that r3 is a call-clobbered register (always true in
2246 the default abi) ... */
2247 if (!call_used_regs[3])
2248 return 0;
2250 /* ... that it isn't being used for a return value ... */
2251 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2252 return 0;
2254 /* ... or for a tail-call argument ... */
2255 if (sibling)
2257 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2259 if (find_regno_fusage (sibling, USE, 3))
2260 return 0;
2263 /* ... and that there are no call-saved registers in r0-r2
2264 (always true in the default ABI). */
2265 if (saved_int_regs & 0x7)
2266 return 0;
2269 /* Can't be done if interworking with Thumb, and any registers have been
2270 stacked. */
2271 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2272 return 0;
2274 /* On StrongARM, conditional returns are expensive if they aren't
2275 taken and multiple registers have been stacked. */
2276 if (iscond && arm_tune_strongarm)
2278 /* Conditional return when just the LR is stored is a simple
2279 conditional-load instruction, that's not expensive. */
2280 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2281 return 0;
2283 if (flag_pic
2284 && arm_pic_register != INVALID_REGNUM
2285 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2286 return 0;
2289 /* If there are saved registers but the LR isn't saved, then we need
2290 two instructions for the return. */
2291 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2292 return 0;
2294 /* Can't be done if any of the FPA regs are pushed,
2295 since this also requires an insn. */
2296 if (TARGET_HARD_FLOAT && TARGET_FPA)
2297 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2298 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2299 return 0;
2301 /* Likewise VFP regs. */
2302 if (TARGET_HARD_FLOAT && TARGET_VFP)
2303 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2304 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2305 return 0;
2307 if (TARGET_REALLY_IWMMXT)
2308 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2309 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2310 return 0;
2312 return 1;
2315 /* Return TRUE if int I is a valid immediate ARM constant. */
2318 const_ok_for_arm (HOST_WIDE_INT i)
2320 int lowbit;
2322 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2323 be all zero, or all one. */
2324 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2325 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2326 != ((~(unsigned HOST_WIDE_INT) 0)
2327 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2328 return FALSE;
2330 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2332 /* Fast return for 0 and small values. We must do this for zero, since
2333 the code below can't handle that one case. */
2334 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2335 return TRUE;
2337 /* Get the number of trailing zeros. */
2338 lowbit = ffs((int) i) - 1;
2340 /* Only even shifts are allowed in ARM mode so round down to the
2341 nearest even number. */
2342 if (TARGET_ARM)
2343 lowbit &= ~1;
2345 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2346 return TRUE;
2348 if (TARGET_ARM)
2350 /* Allow rotated constants in ARM mode. */
2351 if (lowbit <= 4
2352 && ((i & ~0xc000003f) == 0
2353 || (i & ~0xf000000f) == 0
2354 || (i & ~0xfc000003) == 0))
2355 return TRUE;
2357 else
2359 HOST_WIDE_INT v;
2361 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2362 v = i & 0xff;
2363 v |= v << 16;
2364 if (i == v || i == (v | (v << 8)))
2365 return TRUE;
2367 /* Allow repeated pattern 0xXY00XY00. */
2368 v = i & 0xff00;
2369 v |= v << 16;
2370 if (i == v)
2371 return TRUE;
2374 return FALSE;
2377 /* Return true if I is a valid constant for the operation CODE. */
2378 static int
2379 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2381 if (const_ok_for_arm (i))
2382 return 1;
2384 switch (code)
2386 case SET:
2387 /* See if we can use movw. */
2388 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2389 return 1;
2390 else
2391 /* Otherwise, try mvn. */
2392 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2394 case PLUS:
2395 case COMPARE:
2396 case EQ:
2397 case NE:
2398 case GT:
2399 case LE:
2400 case LT:
2401 case GE:
2402 case GEU:
2403 case LTU:
2404 case GTU:
2405 case LEU:
2406 case UNORDERED:
2407 case ORDERED:
2408 case UNEQ:
2409 case UNGE:
2410 case UNLT:
2411 case UNGT:
2412 case UNLE:
2413 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2415 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2416 case XOR:
2417 return 0;
2419 case IOR:
2420 if (TARGET_THUMB2)
2421 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2422 return 0;
2424 case AND:
2425 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2427 default:
2428 gcc_unreachable ();
2432 /* Emit a sequence of insns to handle a large constant.
2433 CODE is the code of the operation required, it can be any of SET, PLUS,
2434 IOR, AND, XOR, MINUS;
2435 MODE is the mode in which the operation is being performed;
2436 VAL is the integer to operate on;
2437 SOURCE is the other operand (a register, or a null-pointer for SET);
2438 SUBTARGETS means it is safe to create scratch registers if that will
2439 either produce a simpler sequence, or we will want to cse the values.
2440 Return value is the number of insns emitted. */
2442 /* ??? Tweak this for thumb2. */
2444 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2445 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2447 rtx cond;
2449 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2450 cond = COND_EXEC_TEST (PATTERN (insn));
2451 else
2452 cond = NULL_RTX;
2454 if (subtargets || code == SET
2455 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2456 && REGNO (target) != REGNO (source)))
2458 /* After arm_reorg has been called, we can't fix up expensive
2459 constants by pushing them into memory so we must synthesize
2460 them in-line, regardless of the cost. This is only likely to
2461 be more costly on chips that have load delay slots and we are
2462 compiling without running the scheduler (so no splitting
2463 occurred before the final instruction emission).
2465 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2467 if (!after_arm_reorg
2468 && !cond
2469 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2470 1, 0)
2471 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2472 + (code != SET))))
2474 if (code == SET)
2476 /* Currently SET is the only monadic value for CODE, all
2477 the rest are diadic. */
2478 if (TARGET_USE_MOVT)
2479 arm_emit_movpair (target, GEN_INT (val));
2480 else
2481 emit_set_insn (target, GEN_INT (val));
2483 return 1;
2485 else
2487 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2489 if (TARGET_USE_MOVT)
2490 arm_emit_movpair (temp, GEN_INT (val));
2491 else
2492 emit_set_insn (temp, GEN_INT (val));
2494 /* For MINUS, the value is subtracted from, since we never
2495 have subtraction of a constant. */
2496 if (code == MINUS)
2497 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2498 else
2499 emit_set_insn (target,
2500 gen_rtx_fmt_ee (code, mode, source, temp));
2501 return 2;
2506 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2510 /* Return the number of instructions required to synthesize the given
2511 constant, if we start emitting them from bit-position I. */
2512 static int
2513 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2515 HOST_WIDE_INT temp1;
2516 int step_size = TARGET_ARM ? 2 : 1;
2517 int num_insns = 0;
2519 gcc_assert (TARGET_ARM || i == 0);
2523 int end;
2525 if (i <= 0)
2526 i += 32;
2527 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2529 end = i - 8;
2530 if (end < 0)
2531 end += 32;
2532 temp1 = remainder & ((0x0ff << end)
2533 | ((i < end) ? (0xff >> (32 - end)) : 0));
2534 remainder &= ~temp1;
2535 num_insns++;
2536 i -= 8 - step_size;
2538 i -= step_size;
2539 } while (remainder);
2540 return num_insns;
2543 static int
2544 find_best_start (unsigned HOST_WIDE_INT remainder)
2546 int best_consecutive_zeros = 0;
2547 int i;
2548 int best_start = 0;
2550 /* If we aren't targetting ARM, the best place to start is always at
2551 the bottom. */
2552 if (! TARGET_ARM)
2553 return 0;
2555 for (i = 0; i < 32; i += 2)
2557 int consecutive_zeros = 0;
2559 if (!(remainder & (3 << i)))
2561 while ((i < 32) && !(remainder & (3 << i)))
2563 consecutive_zeros += 2;
2564 i += 2;
2566 if (consecutive_zeros > best_consecutive_zeros)
2568 best_consecutive_zeros = consecutive_zeros;
2569 best_start = i - consecutive_zeros;
2571 i -= 2;
2575 /* So long as it won't require any more insns to do so, it's
2576 desirable to emit a small constant (in bits 0...9) in the last
2577 insn. This way there is more chance that it can be combined with
2578 a later addressing insn to form a pre-indexed load or store
2579 operation. Consider:
2581 *((volatile int *)0xe0000100) = 1;
2582 *((volatile int *)0xe0000110) = 2;
2584 We want this to wind up as:
2586 mov rA, #0xe0000000
2587 mov rB, #1
2588 str rB, [rA, #0x100]
2589 mov rB, #2
2590 str rB, [rA, #0x110]
2592 rather than having to synthesize both large constants from scratch.
2594 Therefore, we calculate how many insns would be required to emit
2595 the constant starting from `best_start', and also starting from
2596 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2597 yield a shorter sequence, we may as well use zero. */
2598 if (best_start != 0
2599 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2600 && (count_insns_for_constant (remainder, 0) <=
2601 count_insns_for_constant (remainder, best_start)))
2602 best_start = 0;
2604 return best_start;
2607 /* Emit an instruction with the indicated PATTERN. If COND is
2608 non-NULL, conditionalize the execution of the instruction on COND
2609 being true. */
2611 static void
2612 emit_constant_insn (rtx cond, rtx pattern)
2614 if (cond)
2615 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2616 emit_insn (pattern);
2619 /* As above, but extra parameter GENERATE which, if clear, suppresses
2620 RTL generation. */
2621 /* ??? This needs more work for thumb2. */
2623 static int
2624 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2625 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2626 int generate)
2628 int can_invert = 0;
2629 int can_negate = 0;
2630 int final_invert = 0;
2631 int i;
2632 int num_bits_set = 0;
2633 int set_sign_bit_copies = 0;
2634 int clear_sign_bit_copies = 0;
2635 int clear_zero_bit_copies = 0;
2636 int set_zero_bit_copies = 0;
2637 int insns = 0;
2638 unsigned HOST_WIDE_INT temp1, temp2;
2639 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2640 int step_size = TARGET_ARM ? 2 : 1;
2642 /* Find out which operations are safe for a given CODE. Also do a quick
2643 check for degenerate cases; these can occur when DImode operations
2644 are split. */
2645 switch (code)
2647 case SET:
2648 can_invert = 1;
2649 can_negate = 1;
2650 break;
2652 case PLUS:
2653 can_negate = 1;
2654 break;
2656 case IOR:
2657 if (remainder == 0xffffffff)
2659 if (generate)
2660 emit_constant_insn (cond,
2661 gen_rtx_SET (VOIDmode, target,
2662 GEN_INT (ARM_SIGN_EXTEND (val))));
2663 return 1;
2666 if (remainder == 0)
2668 if (reload_completed && rtx_equal_p (target, source))
2669 return 0;
2671 if (generate)
2672 emit_constant_insn (cond,
2673 gen_rtx_SET (VOIDmode, target, source));
2674 return 1;
2676 break;
2678 case AND:
2679 if (remainder == 0)
2681 if (generate)
2682 emit_constant_insn (cond,
2683 gen_rtx_SET (VOIDmode, target, const0_rtx));
2684 return 1;
2686 if (remainder == 0xffffffff)
2688 if (reload_completed && rtx_equal_p (target, source))
2689 return 0;
2690 if (generate)
2691 emit_constant_insn (cond,
2692 gen_rtx_SET (VOIDmode, target, source));
2693 return 1;
2695 can_invert = 1;
2696 break;
2698 case XOR:
2699 if (remainder == 0)
2701 if (reload_completed && rtx_equal_p (target, source))
2702 return 0;
2703 if (generate)
2704 emit_constant_insn (cond,
2705 gen_rtx_SET (VOIDmode, target, source));
2706 return 1;
2709 if (remainder == 0xffffffff)
2711 if (generate)
2712 emit_constant_insn (cond,
2713 gen_rtx_SET (VOIDmode, target,
2714 gen_rtx_NOT (mode, source)));
2715 return 1;
2717 break;
2719 case MINUS:
2720 /* We treat MINUS as (val - source), since (source - val) is always
2721 passed as (source + (-val)). */
2722 if (remainder == 0)
2724 if (generate)
2725 emit_constant_insn (cond,
2726 gen_rtx_SET (VOIDmode, target,
2727 gen_rtx_NEG (mode, source)));
2728 return 1;
2730 if (const_ok_for_arm (val))
2732 if (generate)
2733 emit_constant_insn (cond,
2734 gen_rtx_SET (VOIDmode, target,
2735 gen_rtx_MINUS (mode, GEN_INT (val),
2736 source)));
2737 return 1;
2739 can_negate = 1;
2741 break;
2743 default:
2744 gcc_unreachable ();
2747 /* If we can do it in one insn get out quickly. */
2748 if (const_ok_for_op (val, code))
2750 if (generate)
2751 emit_constant_insn (cond,
2752 gen_rtx_SET (VOIDmode, target,
2753 (source
2754 ? gen_rtx_fmt_ee (code, mode, source,
2755 GEN_INT (val))
2756 : GEN_INT (val))));
2757 return 1;
2760 /* Calculate a few attributes that may be useful for specific
2761 optimizations. */
2762 /* Count number of leading zeros. */
2763 for (i = 31; i >= 0; i--)
2765 if ((remainder & (1 << i)) == 0)
2766 clear_sign_bit_copies++;
2767 else
2768 break;
2771 /* Count number of leading 1's. */
2772 for (i = 31; i >= 0; i--)
2774 if ((remainder & (1 << i)) != 0)
2775 set_sign_bit_copies++;
2776 else
2777 break;
2780 /* Count number of trailing zero's. */
2781 for (i = 0; i <= 31; i++)
2783 if ((remainder & (1 << i)) == 0)
2784 clear_zero_bit_copies++;
2785 else
2786 break;
2789 /* Count number of trailing 1's. */
2790 for (i = 0; i <= 31; i++)
2792 if ((remainder & (1 << i)) != 0)
2793 set_zero_bit_copies++;
2794 else
2795 break;
2798 switch (code)
2800 case SET:
2801 /* See if we can do this by sign_extending a constant that is known
2802 to be negative. This is a good, way of doing it, since the shift
2803 may well merge into a subsequent insn. */
2804 if (set_sign_bit_copies > 1)
2806 if (const_ok_for_arm
2807 (temp1 = ARM_SIGN_EXTEND (remainder
2808 << (set_sign_bit_copies - 1))))
2810 if (generate)
2812 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2813 emit_constant_insn (cond,
2814 gen_rtx_SET (VOIDmode, new_src,
2815 GEN_INT (temp1)));
2816 emit_constant_insn (cond,
2817 gen_ashrsi3 (target, new_src,
2818 GEN_INT (set_sign_bit_copies - 1)));
2820 return 2;
2822 /* For an inverted constant, we will need to set the low bits,
2823 these will be shifted out of harm's way. */
2824 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2825 if (const_ok_for_arm (~temp1))
2827 if (generate)
2829 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2830 emit_constant_insn (cond,
2831 gen_rtx_SET (VOIDmode, new_src,
2832 GEN_INT (temp1)));
2833 emit_constant_insn (cond,
2834 gen_ashrsi3 (target, new_src,
2835 GEN_INT (set_sign_bit_copies - 1)));
2837 return 2;
2841 /* See if we can calculate the value as the difference between two
2842 valid immediates. */
2843 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2845 int topshift = clear_sign_bit_copies & ~1;
2847 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2848 & (0xff000000 >> topshift));
2850 /* If temp1 is zero, then that means the 9 most significant
2851 bits of remainder were 1 and we've caused it to overflow.
2852 When topshift is 0 we don't need to do anything since we
2853 can borrow from 'bit 32'. */
2854 if (temp1 == 0 && topshift != 0)
2855 temp1 = 0x80000000 >> (topshift - 1);
2857 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2859 if (const_ok_for_arm (temp2))
2861 if (generate)
2863 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2864 emit_constant_insn (cond,
2865 gen_rtx_SET (VOIDmode, new_src,
2866 GEN_INT (temp1)));
2867 emit_constant_insn (cond,
2868 gen_addsi3 (target, new_src,
2869 GEN_INT (-temp2)));
2872 return 2;
2876 /* See if we can generate this by setting the bottom (or the top)
2877 16 bits, and then shifting these into the other half of the
2878 word. We only look for the simplest cases, to do more would cost
2879 too much. Be careful, however, not to generate this when the
2880 alternative would take fewer insns. */
2881 if (val & 0xffff0000)
2883 temp1 = remainder & 0xffff0000;
2884 temp2 = remainder & 0x0000ffff;
2886 /* Overlaps outside this range are best done using other methods. */
2887 for (i = 9; i < 24; i++)
2889 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2890 && !const_ok_for_arm (temp2))
2892 rtx new_src = (subtargets
2893 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2894 : target);
2895 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2896 source, subtargets, generate);
2897 source = new_src;
2898 if (generate)
2899 emit_constant_insn
2900 (cond,
2901 gen_rtx_SET
2902 (VOIDmode, target,
2903 gen_rtx_IOR (mode,
2904 gen_rtx_ASHIFT (mode, source,
2905 GEN_INT (i)),
2906 source)));
2907 return insns + 1;
2911 /* Don't duplicate cases already considered. */
2912 for (i = 17; i < 24; i++)
2914 if (((temp1 | (temp1 >> i)) == remainder)
2915 && !const_ok_for_arm (temp1))
2917 rtx new_src = (subtargets
2918 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2919 : target);
2920 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2921 source, subtargets, generate);
2922 source = new_src;
2923 if (generate)
2924 emit_constant_insn
2925 (cond,
2926 gen_rtx_SET (VOIDmode, target,
2927 gen_rtx_IOR
2928 (mode,
2929 gen_rtx_LSHIFTRT (mode, source,
2930 GEN_INT (i)),
2931 source)));
2932 return insns + 1;
2936 break;
2938 case IOR:
2939 case XOR:
2940 /* If we have IOR or XOR, and the constant can be loaded in a
2941 single instruction, and we can find a temporary to put it in,
2942 then this can be done in two instructions instead of 3-4. */
2943 if (subtargets
2944 /* TARGET can't be NULL if SUBTARGETS is 0 */
2945 || (reload_completed && !reg_mentioned_p (target, source)))
2947 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2949 if (generate)
2951 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2953 emit_constant_insn (cond,
2954 gen_rtx_SET (VOIDmode, sub,
2955 GEN_INT (val)));
2956 emit_constant_insn (cond,
2957 gen_rtx_SET (VOIDmode, target,
2958 gen_rtx_fmt_ee (code, mode,
2959 source, sub)));
2961 return 2;
2965 if (code == XOR)
2966 break;
2968 /* Convert.
2969 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2970 and the remainder 0s for e.g. 0xfff00000)
2971 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2973 This can be done in 2 instructions by using shifts with mov or mvn.
2974 e.g. for
2975 x = x | 0xfff00000;
2976 we generate.
2977 mvn r0, r0, asl #12
2978 mvn r0, r0, lsr #12 */
2979 if (set_sign_bit_copies > 8
2980 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2982 if (generate)
2984 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2985 rtx shift = GEN_INT (set_sign_bit_copies);
2987 emit_constant_insn
2988 (cond,
2989 gen_rtx_SET (VOIDmode, sub,
2990 gen_rtx_NOT (mode,
2991 gen_rtx_ASHIFT (mode,
2992 source,
2993 shift))));
2994 emit_constant_insn
2995 (cond,
2996 gen_rtx_SET (VOIDmode, target,
2997 gen_rtx_NOT (mode,
2998 gen_rtx_LSHIFTRT (mode, sub,
2999 shift))));
3001 return 2;
3004 /* Convert
3005 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3007 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3009 For eg. r0 = r0 | 0xfff
3010 mvn r0, r0, lsr #12
3011 mvn r0, r0, asl #12
3014 if (set_zero_bit_copies > 8
3015 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3017 if (generate)
3019 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3020 rtx shift = GEN_INT (set_zero_bit_copies);
3022 emit_constant_insn
3023 (cond,
3024 gen_rtx_SET (VOIDmode, sub,
3025 gen_rtx_NOT (mode,
3026 gen_rtx_LSHIFTRT (mode,
3027 source,
3028 shift))));
3029 emit_constant_insn
3030 (cond,
3031 gen_rtx_SET (VOIDmode, target,
3032 gen_rtx_NOT (mode,
3033 gen_rtx_ASHIFT (mode, sub,
3034 shift))));
3036 return 2;
3039 /* This will never be reached for Thumb2 because orn is a valid
3040 instruction. This is for Thumb1 and the ARM 32 bit cases.
3042 x = y | constant (such that ~constant is a valid constant)
3043 Transform this to
3044 x = ~(~y & ~constant).
3046 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3048 if (generate)
3050 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3051 emit_constant_insn (cond,
3052 gen_rtx_SET (VOIDmode, sub,
3053 gen_rtx_NOT (mode, source)));
3054 source = sub;
3055 if (subtargets)
3056 sub = gen_reg_rtx (mode);
3057 emit_constant_insn (cond,
3058 gen_rtx_SET (VOIDmode, sub,
3059 gen_rtx_AND (mode, source,
3060 GEN_INT (temp1))));
3061 emit_constant_insn (cond,
3062 gen_rtx_SET (VOIDmode, target,
3063 gen_rtx_NOT (mode, sub)));
3065 return 3;
3067 break;
3069 case AND:
3070 /* See if two shifts will do 2 or more insn's worth of work. */
3071 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3073 HOST_WIDE_INT shift_mask = ((0xffffffff
3074 << (32 - clear_sign_bit_copies))
3075 & 0xffffffff);
3077 if ((remainder | shift_mask) != 0xffffffff)
3079 if (generate)
3081 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3082 insns = arm_gen_constant (AND, mode, cond,
3083 remainder | shift_mask,
3084 new_src, source, subtargets, 1);
3085 source = new_src;
3087 else
3089 rtx targ = subtargets ? NULL_RTX : target;
3090 insns = arm_gen_constant (AND, mode, cond,
3091 remainder | shift_mask,
3092 targ, source, subtargets, 0);
3096 if (generate)
3098 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3099 rtx shift = GEN_INT (clear_sign_bit_copies);
3101 emit_insn (gen_ashlsi3 (new_src, source, shift));
3102 emit_insn (gen_lshrsi3 (target, new_src, shift));
3105 return insns + 2;
3108 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3110 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3112 if ((remainder | shift_mask) != 0xffffffff)
3114 if (generate)
3116 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3118 insns = arm_gen_constant (AND, mode, cond,
3119 remainder | shift_mask,
3120 new_src, source, subtargets, 1);
3121 source = new_src;
3123 else
3125 rtx targ = subtargets ? NULL_RTX : target;
3127 insns = arm_gen_constant (AND, mode, cond,
3128 remainder | shift_mask,
3129 targ, source, subtargets, 0);
3133 if (generate)
3135 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3136 rtx shift = GEN_INT (clear_zero_bit_copies);
3138 emit_insn (gen_lshrsi3 (new_src, source, shift));
3139 emit_insn (gen_ashlsi3 (target, new_src, shift));
3142 return insns + 2;
3145 break;
3147 default:
3148 break;
3151 for (i = 0; i < 32; i++)
3152 if (remainder & (1 << i))
3153 num_bits_set++;
3155 if ((code == AND) || (can_invert && num_bits_set > 16))
3156 remainder ^= 0xffffffff;
3157 else if (code == PLUS && num_bits_set > 16)
3158 remainder = (-remainder) & 0xffffffff;
3160 /* For XOR, if more than half the bits are set and there's a sequence
3161 of more than 8 consecutive ones in the pattern then we can XOR by the
3162 inverted constant and then invert the final result; this may save an
3163 instruction and might also lead to the final mvn being merged with
3164 some other operation. */
3165 else if (code == XOR && num_bits_set > 16
3166 && (count_insns_for_constant (remainder ^ 0xffffffff,
3167 find_best_start
3168 (remainder ^ 0xffffffff))
3169 < count_insns_for_constant (remainder,
3170 find_best_start (remainder))))
3172 remainder ^= 0xffffffff;
3173 final_invert = 1;
3175 else
3177 can_invert = 0;
3178 can_negate = 0;
3181 /* Now try and find a way of doing the job in either two or three
3182 instructions.
3183 We start by looking for the largest block of zeros that are aligned on
3184 a 2-bit boundary, we then fill up the temps, wrapping around to the
3185 top of the word when we drop off the bottom.
3186 In the worst case this code should produce no more than four insns.
3187 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3188 best place to start. */
3190 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3191 the same. */
3193 /* Now start emitting the insns. */
3194 i = find_best_start (remainder);
3197 int end;
3199 if (i <= 0)
3200 i += 32;
3201 if (remainder & (3 << (i - 2)))
3203 end = i - 8;
3204 if (end < 0)
3205 end += 32;
3206 temp1 = remainder & ((0x0ff << end)
3207 | ((i < end) ? (0xff >> (32 - end)) : 0));
3208 remainder &= ~temp1;
3210 if (generate)
3212 rtx new_src, temp1_rtx;
3214 if (code == SET || code == MINUS)
3216 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3217 if (can_invert && code != MINUS)
3218 temp1 = ~temp1;
3220 else
3222 if ((final_invert || remainder) && subtargets)
3223 new_src = gen_reg_rtx (mode);
3224 else
3225 new_src = target;
3226 if (can_invert)
3227 temp1 = ~temp1;
3228 else if (can_negate)
3229 temp1 = -temp1;
3232 temp1 = trunc_int_for_mode (temp1, mode);
3233 temp1_rtx = GEN_INT (temp1);
3235 if (code == SET)
3237 else if (code == MINUS)
3238 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3239 else
3240 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3242 emit_constant_insn (cond,
3243 gen_rtx_SET (VOIDmode, new_src,
3244 temp1_rtx));
3245 source = new_src;
3248 if (code == SET)
3250 can_invert = 0;
3251 code = PLUS;
3253 else if (code == MINUS)
3254 code = PLUS;
3256 insns++;
3257 i -= 8 - step_size;
3259 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3260 shifts. */
3261 i -= step_size;
3263 while (remainder);
3266 if (final_invert)
3268 if (generate)
3269 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3270 gen_rtx_NOT (mode, source)));
3271 insns++;
3274 return insns;
3277 /* Canonicalize a comparison so that we are more likely to recognize it.
3278 This can be done for a few constant compares, where we can make the
3279 immediate value easier to load. */
3281 enum rtx_code
3282 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3284 enum machine_mode mode;
3285 unsigned HOST_WIDE_INT i, maxval;
3287 mode = GET_MODE (*op0);
3288 if (mode == VOIDmode)
3289 mode = GET_MODE (*op1);
3291 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3293 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3294 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3295 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3296 for GTU/LEU in Thumb mode. */
3297 if (mode == DImode)
3299 rtx tem;
3301 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3302 available. */
3303 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3304 return code;
3306 if (code == GT || code == LE
3307 || (!TARGET_ARM && (code == GTU || code == LEU)))
3309 /* Missing comparison. First try to use an available
3310 comparison. */
3311 if (GET_CODE (*op1) == CONST_INT)
3313 i = INTVAL (*op1);
3314 switch (code)
3316 case GT:
3317 case LE:
3318 if (i != maxval
3319 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3321 *op1 = GEN_INT (i + 1);
3322 return code == GT ? GE : LT;
3324 break;
3325 case GTU:
3326 case LEU:
3327 if (i != ~((unsigned HOST_WIDE_INT) 0)
3328 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3330 *op1 = GEN_INT (i + 1);
3331 return code == GTU ? GEU : LTU;
3333 break;
3334 default:
3335 gcc_unreachable ();
3339 /* If that did not work, reverse the condition. */
3340 tem = *op0;
3341 *op0 = *op1;
3342 *op1 = tem;
3343 return swap_condition (code);
3346 return code;
3349 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3350 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3351 to facilitate possible combining with a cmp into 'ands'. */
3352 if (mode == SImode
3353 && GET_CODE (*op0) == ZERO_EXTEND
3354 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3355 && GET_MODE (XEXP (*op0, 0)) == QImode
3356 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3357 && subreg_lowpart_p (XEXP (*op0, 0))
3358 && *op1 == const0_rtx)
3359 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3360 GEN_INT (255));
3362 /* Comparisons smaller than DImode. Only adjust comparisons against
3363 an out-of-range constant. */
3364 if (GET_CODE (*op1) != CONST_INT
3365 || const_ok_for_arm (INTVAL (*op1))
3366 || const_ok_for_arm (- INTVAL (*op1)))
3367 return code;
3369 i = INTVAL (*op1);
3371 switch (code)
3373 case EQ:
3374 case NE:
3375 return code;
3377 case GT:
3378 case LE:
3379 if (i != maxval
3380 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3382 *op1 = GEN_INT (i + 1);
3383 return code == GT ? GE : LT;
3385 break;
3387 case GE:
3388 case LT:
3389 if (i != ~maxval
3390 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3392 *op1 = GEN_INT (i - 1);
3393 return code == GE ? GT : LE;
3395 break;
3397 case GTU:
3398 case LEU:
3399 if (i != ~((unsigned HOST_WIDE_INT) 0)
3400 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3402 *op1 = GEN_INT (i + 1);
3403 return code == GTU ? GEU : LTU;
3405 break;
3407 case GEU:
3408 case LTU:
3409 if (i != 0
3410 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3412 *op1 = GEN_INT (i - 1);
3413 return code == GEU ? GTU : LEU;
3415 break;
3417 default:
3418 gcc_unreachable ();
3421 return code;
3425 /* Define how to find the value returned by a function. */
3427 static rtx
3428 arm_function_value(const_tree type, const_tree func,
3429 bool outgoing ATTRIBUTE_UNUSED)
3431 enum machine_mode mode;
3432 int unsignedp ATTRIBUTE_UNUSED;
3433 rtx r ATTRIBUTE_UNUSED;
3435 mode = TYPE_MODE (type);
3437 if (TARGET_AAPCS_BASED)
3438 return aapcs_allocate_return_reg (mode, type, func);
3440 /* Promote integer types. */
3441 if (INTEGRAL_TYPE_P (type))
3442 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3444 /* Promotes small structs returned in a register to full-word size
3445 for big-endian AAPCS. */
3446 if (arm_return_in_msb (type))
3448 HOST_WIDE_INT size = int_size_in_bytes (type);
3449 if (size % UNITS_PER_WORD != 0)
3451 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3452 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3456 return LIBCALL_VALUE (mode);
3459 static int
3460 libcall_eq (const void *p1, const void *p2)
3462 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3465 static hashval_t
3466 libcall_hash (const void *p1)
3468 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3471 static void
3472 add_libcall (htab_t htab, rtx libcall)
3474 *htab_find_slot (htab, libcall, INSERT) = libcall;
3477 static bool
3478 arm_libcall_uses_aapcs_base (const_rtx libcall)
3480 static bool init_done = false;
3481 static htab_t libcall_htab;
3483 if (!init_done)
3485 init_done = true;
3487 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3488 NULL);
3489 add_libcall (libcall_htab,
3490 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3491 add_libcall (libcall_htab,
3492 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3493 add_libcall (libcall_htab,
3494 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3495 add_libcall (libcall_htab,
3496 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3498 add_libcall (libcall_htab,
3499 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3500 add_libcall (libcall_htab,
3501 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3502 add_libcall (libcall_htab,
3503 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3504 add_libcall (libcall_htab,
3505 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3507 add_libcall (libcall_htab,
3508 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3509 add_libcall (libcall_htab,
3510 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3511 add_libcall (libcall_htab,
3512 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3513 add_libcall (libcall_htab,
3514 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3515 add_libcall (libcall_htab,
3516 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3517 add_libcall (libcall_htab,
3518 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3520 /* Values from double-precision helper functions are returned in core
3521 registers if the selected core only supports single-precision
3522 arithmetic, even if we are using the hard-float ABI. The same is
3523 true for single-precision helpers, but we will never be using the
3524 hard-float ABI on a CPU which doesn't support single-precision
3525 operations in hardware. */
3526 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3527 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3528 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3529 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3530 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3531 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3532 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3533 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3534 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3535 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3536 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3537 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3538 SFmode));
3539 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3540 DFmode));
3543 return libcall && htab_find (libcall_htab, libcall) != NULL;
3547 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3549 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3550 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3552 /* The following libcalls return their result in integer registers,
3553 even though they return a floating point value. */
3554 if (arm_libcall_uses_aapcs_base (libcall))
3555 return gen_rtx_REG (mode, ARG_REGISTER(1));
3559 return LIBCALL_VALUE (mode);
3562 /* Determine the amount of memory needed to store the possible return
3563 registers of an untyped call. */
3565 arm_apply_result_size (void)
3567 int size = 16;
3569 if (TARGET_32BIT)
3571 if (TARGET_HARD_FLOAT_ABI)
3573 if (TARGET_VFP)
3574 size += 32;
3575 if (TARGET_FPA)
3576 size += 12;
3577 if (TARGET_MAVERICK)
3578 size += 8;
3580 if (TARGET_IWMMXT_ABI)
3581 size += 8;
3584 return size;
3587 /* Decide whether TYPE should be returned in memory (true)
3588 or in a register (false). FNTYPE is the type of the function making
3589 the call. */
3590 static bool
3591 arm_return_in_memory (const_tree type, const_tree fntype)
3593 HOST_WIDE_INT size;
3595 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3597 if (TARGET_AAPCS_BASED)
3599 /* Simple, non-aggregate types (ie not including vectors and
3600 complex) are always returned in a register (or registers).
3601 We don't care about which register here, so we can short-cut
3602 some of the detail. */
3603 if (!AGGREGATE_TYPE_P (type)
3604 && TREE_CODE (type) != VECTOR_TYPE
3605 && TREE_CODE (type) != COMPLEX_TYPE)
3606 return false;
3608 /* Any return value that is no larger than one word can be
3609 returned in r0. */
3610 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3611 return false;
3613 /* Check any available co-processors to see if they accept the
3614 type as a register candidate (VFP, for example, can return
3615 some aggregates in consecutive registers). These aren't
3616 available if the call is variadic. */
3617 if (aapcs_select_return_coproc (type, fntype) >= 0)
3618 return false;
3620 /* Vector values should be returned using ARM registers, not
3621 memory (unless they're over 16 bytes, which will break since
3622 we only have four call-clobbered registers to play with). */
3623 if (TREE_CODE (type) == VECTOR_TYPE)
3624 return (size < 0 || size > (4 * UNITS_PER_WORD));
3626 /* The rest go in memory. */
3627 return true;
3630 if (TREE_CODE (type) == VECTOR_TYPE)
3631 return (size < 0 || size > (4 * UNITS_PER_WORD));
3633 if (!AGGREGATE_TYPE_P (type) &&
3634 (TREE_CODE (type) != VECTOR_TYPE))
3635 /* All simple types are returned in registers. */
3636 return false;
3638 if (arm_abi != ARM_ABI_APCS)
3640 /* ATPCS and later return aggregate types in memory only if they are
3641 larger than a word (or are variable size). */
3642 return (size < 0 || size > UNITS_PER_WORD);
3645 /* For the arm-wince targets we choose to be compatible with Microsoft's
3646 ARM and Thumb compilers, which always return aggregates in memory. */
3647 #ifndef ARM_WINCE
3648 /* All structures/unions bigger than one word are returned in memory.
3649 Also catch the case where int_size_in_bytes returns -1. In this case
3650 the aggregate is either huge or of variable size, and in either case
3651 we will want to return it via memory and not in a register. */
3652 if (size < 0 || size > UNITS_PER_WORD)
3653 return true;
3655 if (TREE_CODE (type) == RECORD_TYPE)
3657 tree field;
3659 /* For a struct the APCS says that we only return in a register
3660 if the type is 'integer like' and every addressable element
3661 has an offset of zero. For practical purposes this means
3662 that the structure can have at most one non bit-field element
3663 and that this element must be the first one in the structure. */
3665 /* Find the first field, ignoring non FIELD_DECL things which will
3666 have been created by C++. */
3667 for (field = TYPE_FIELDS (type);
3668 field && TREE_CODE (field) != FIELD_DECL;
3669 field = DECL_CHAIN (field))
3670 continue;
3672 if (field == NULL)
3673 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3675 /* Check that the first field is valid for returning in a register. */
3677 /* ... Floats are not allowed */
3678 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3679 return true;
3681 /* ... Aggregates that are not themselves valid for returning in
3682 a register are not allowed. */
3683 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3684 return true;
3686 /* Now check the remaining fields, if any. Only bitfields are allowed,
3687 since they are not addressable. */
3688 for (field = DECL_CHAIN (field);
3689 field;
3690 field = DECL_CHAIN (field))
3692 if (TREE_CODE (field) != FIELD_DECL)
3693 continue;
3695 if (!DECL_BIT_FIELD_TYPE (field))
3696 return true;
3699 return false;
3702 if (TREE_CODE (type) == UNION_TYPE)
3704 tree field;
3706 /* Unions can be returned in registers if every element is
3707 integral, or can be returned in an integer register. */
3708 for (field = TYPE_FIELDS (type);
3709 field;
3710 field = DECL_CHAIN (field))
3712 if (TREE_CODE (field) != FIELD_DECL)
3713 continue;
3715 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3716 return true;
3718 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3719 return true;
3722 return false;
3724 #endif /* not ARM_WINCE */
3726 /* Return all other types in memory. */
3727 return true;
3730 /* Indicate whether or not words of a double are in big-endian order. */
3733 arm_float_words_big_endian (void)
3735 if (TARGET_MAVERICK)
3736 return 0;
3738 /* For FPA, float words are always big-endian. For VFP, floats words
3739 follow the memory system mode. */
3741 if (TARGET_FPA)
3743 return 1;
3746 if (TARGET_VFP)
3747 return (TARGET_BIG_END ? 1 : 0);
3749 return 1;
3752 const struct pcs_attribute_arg
3754 const char *arg;
3755 enum arm_pcs value;
3756 } pcs_attribute_args[] =
3758 {"aapcs", ARM_PCS_AAPCS},
3759 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3760 #if 0
3761 /* We could recognize these, but changes would be needed elsewhere
3762 * to implement them. */
3763 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3764 {"atpcs", ARM_PCS_ATPCS},
3765 {"apcs", ARM_PCS_APCS},
3766 #endif
3767 {NULL, ARM_PCS_UNKNOWN}
3770 static enum arm_pcs
3771 arm_pcs_from_attribute (tree attr)
3773 const struct pcs_attribute_arg *ptr;
3774 const char *arg;
3776 /* Get the value of the argument. */
3777 if (TREE_VALUE (attr) == NULL_TREE
3778 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3779 return ARM_PCS_UNKNOWN;
3781 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3783 /* Check it against the list of known arguments. */
3784 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3785 if (streq (arg, ptr->arg))
3786 return ptr->value;
3788 /* An unrecognized interrupt type. */
3789 return ARM_PCS_UNKNOWN;
3792 /* Get the PCS variant to use for this call. TYPE is the function's type
3793 specification, DECL is the specific declartion. DECL may be null if
3794 the call could be indirect or if this is a library call. */
3795 static enum arm_pcs
3796 arm_get_pcs_model (const_tree type, const_tree decl)
3798 bool user_convention = false;
3799 enum arm_pcs user_pcs = arm_pcs_default;
3800 tree attr;
3802 gcc_assert (type);
3804 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3805 if (attr)
3807 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3808 user_convention = true;
3811 if (TARGET_AAPCS_BASED)
3813 /* Detect varargs functions. These always use the base rules
3814 (no argument is ever a candidate for a co-processor
3815 register). */
3816 bool base_rules = stdarg_p (type);
3818 if (user_convention)
3820 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3821 sorry ("non-AAPCS derived PCS variant");
3822 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3823 error ("variadic functions must use the base AAPCS variant");
3826 if (base_rules)
3827 return ARM_PCS_AAPCS;
3828 else if (user_convention)
3829 return user_pcs;
3830 else if (decl && flag_unit_at_a_time)
3832 /* Local functions never leak outside this compilation unit,
3833 so we are free to use whatever conventions are
3834 appropriate. */
3835 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3836 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3837 if (i && i->local)
3838 return ARM_PCS_AAPCS_LOCAL;
3841 else if (user_convention && user_pcs != arm_pcs_default)
3842 sorry ("PCS variant");
3844 /* For everything else we use the target's default. */
3845 return arm_pcs_default;
3849 static void
3850 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3851 const_tree fntype ATTRIBUTE_UNUSED,
3852 rtx libcall ATTRIBUTE_UNUSED,
3853 const_tree fndecl ATTRIBUTE_UNUSED)
3855 /* Record the unallocated VFP registers. */
3856 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3857 pcum->aapcs_vfp_reg_alloc = 0;
3860 /* Walk down the type tree of TYPE counting consecutive base elements.
3861 If *MODEP is VOIDmode, then set it to the first valid floating point
3862 type. If a non-floating point type is found, or if a floating point
3863 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3864 otherwise return the count in the sub-tree. */
3865 static int
3866 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3868 enum machine_mode mode;
3869 HOST_WIDE_INT size;
3871 switch (TREE_CODE (type))
3873 case REAL_TYPE:
3874 mode = TYPE_MODE (type);
3875 if (mode != DFmode && mode != SFmode)
3876 return -1;
3878 if (*modep == VOIDmode)
3879 *modep = mode;
3881 if (*modep == mode)
3882 return 1;
3884 break;
3886 case COMPLEX_TYPE:
3887 mode = TYPE_MODE (TREE_TYPE (type));
3888 if (mode != DFmode && mode != SFmode)
3889 return -1;
3891 if (*modep == VOIDmode)
3892 *modep = mode;
3894 if (*modep == mode)
3895 return 2;
3897 break;
3899 case VECTOR_TYPE:
3900 /* Use V2SImode and V4SImode as representatives of all 64-bit
3901 and 128-bit vector types, whether or not those modes are
3902 supported with the present options. */
3903 size = int_size_in_bytes (type);
3904 switch (size)
3906 case 8:
3907 mode = V2SImode;
3908 break;
3909 case 16:
3910 mode = V4SImode;
3911 break;
3912 default:
3913 return -1;
3916 if (*modep == VOIDmode)
3917 *modep = mode;
3919 /* Vector modes are considered to be opaque: two vectors are
3920 equivalent for the purposes of being homogeneous aggregates
3921 if they are the same size. */
3922 if (*modep == mode)
3923 return 1;
3925 break;
3927 case ARRAY_TYPE:
3929 int count;
3930 tree index = TYPE_DOMAIN (type);
3932 /* Can't handle incomplete types. */
3933 if (!COMPLETE_TYPE_P(type))
3934 return -1;
3936 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3937 if (count == -1
3938 || !index
3939 || !TYPE_MAX_VALUE (index)
3940 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3941 || !TYPE_MIN_VALUE (index)
3942 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3943 || count < 0)
3944 return -1;
3946 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3947 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3949 /* There must be no padding. */
3950 if (!host_integerp (TYPE_SIZE (type), 1)
3951 || (tree_low_cst (TYPE_SIZE (type), 1)
3952 != count * GET_MODE_BITSIZE (*modep)))
3953 return -1;
3955 return count;
3958 case RECORD_TYPE:
3960 int count = 0;
3961 int sub_count;
3962 tree field;
3964 /* Can't handle incomplete types. */
3965 if (!COMPLETE_TYPE_P(type))
3966 return -1;
3968 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3970 if (TREE_CODE (field) != FIELD_DECL)
3971 continue;
3973 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3974 if (sub_count < 0)
3975 return -1;
3976 count += sub_count;
3979 /* There must be no padding. */
3980 if (!host_integerp (TYPE_SIZE (type), 1)
3981 || (tree_low_cst (TYPE_SIZE (type), 1)
3982 != count * GET_MODE_BITSIZE (*modep)))
3983 return -1;
3985 return count;
3988 case UNION_TYPE:
3989 case QUAL_UNION_TYPE:
3991 /* These aren't very interesting except in a degenerate case. */
3992 int count = 0;
3993 int sub_count;
3994 tree field;
3996 /* Can't handle incomplete types. */
3997 if (!COMPLETE_TYPE_P(type))
3998 return -1;
4000 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4002 if (TREE_CODE (field) != FIELD_DECL)
4003 continue;
4005 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4006 if (sub_count < 0)
4007 return -1;
4008 count = count > sub_count ? count : sub_count;
4011 /* There must be no padding. */
4012 if (!host_integerp (TYPE_SIZE (type), 1)
4013 || (tree_low_cst (TYPE_SIZE (type), 1)
4014 != count * GET_MODE_BITSIZE (*modep)))
4015 return -1;
4017 return count;
4020 default:
4021 break;
4024 return -1;
4027 /* Return true if PCS_VARIANT should use VFP registers. */
4028 static bool
4029 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4031 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4033 static bool seen_thumb1_vfp = false;
4035 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4037 sorry ("Thumb-1 hard-float VFP ABI");
4038 /* sorry() is not immediately fatal, so only display this once. */
4039 seen_thumb1_vfp = true;
4042 return true;
4045 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4046 return false;
4048 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4049 (TARGET_VFP_DOUBLE || !is_double));
4052 static bool
4053 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4054 enum machine_mode mode, const_tree type,
4055 enum machine_mode *base_mode, int *count)
4057 enum machine_mode new_mode = VOIDmode;
4059 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4060 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4061 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4063 *count = 1;
4064 new_mode = mode;
4066 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4068 *count = 2;
4069 new_mode = (mode == DCmode ? DFmode : SFmode);
4071 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4073 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4075 if (ag_count > 0 && ag_count <= 4)
4076 *count = ag_count;
4077 else
4078 return false;
4080 else
4081 return false;
4084 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4085 return false;
4087 *base_mode = new_mode;
4088 return true;
4091 static bool
4092 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4093 enum machine_mode mode, const_tree type)
4095 int count ATTRIBUTE_UNUSED;
4096 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4098 if (!use_vfp_abi (pcs_variant, false))
4099 return false;
4100 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4101 &ag_mode, &count);
4104 static bool
4105 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4106 const_tree type)
4108 if (!use_vfp_abi (pcum->pcs_variant, false))
4109 return false;
4111 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4112 &pcum->aapcs_vfp_rmode,
4113 &pcum->aapcs_vfp_rcount);
4116 static bool
4117 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4118 const_tree type ATTRIBUTE_UNUSED)
4120 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4121 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4122 int regno;
4124 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4125 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4127 pcum->aapcs_vfp_reg_alloc = mask << regno;
4128 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4130 int i;
4131 int rcount = pcum->aapcs_vfp_rcount;
4132 int rshift = shift;
4133 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4134 rtx par;
4135 if (!TARGET_NEON)
4137 /* Avoid using unsupported vector modes. */
4138 if (rmode == V2SImode)
4139 rmode = DImode;
4140 else if (rmode == V4SImode)
4142 rmode = DImode;
4143 rcount *= 2;
4144 rshift /= 2;
4147 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4148 for (i = 0; i < rcount; i++)
4150 rtx tmp = gen_rtx_REG (rmode,
4151 FIRST_VFP_REGNUM + regno + i * rshift);
4152 tmp = gen_rtx_EXPR_LIST
4153 (VOIDmode, tmp,
4154 GEN_INT (i * GET_MODE_SIZE (rmode)));
4155 XVECEXP (par, 0, i) = tmp;
4158 pcum->aapcs_reg = par;
4160 else
4161 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4162 return true;
4164 return false;
4167 static rtx
4168 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4169 enum machine_mode mode,
4170 const_tree type ATTRIBUTE_UNUSED)
4172 if (!use_vfp_abi (pcs_variant, false))
4173 return NULL;
4175 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4177 int count;
4178 enum machine_mode ag_mode;
4179 int i;
4180 rtx par;
4181 int shift;
4183 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4184 &ag_mode, &count);
4186 if (!TARGET_NEON)
4188 if (ag_mode == V2SImode)
4189 ag_mode = DImode;
4190 else if (ag_mode == V4SImode)
4192 ag_mode = DImode;
4193 count *= 2;
4196 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4197 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4198 for (i = 0; i < count; i++)
4200 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4201 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4202 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4203 XVECEXP (par, 0, i) = tmp;
4206 return par;
4209 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4212 static void
4213 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4214 enum machine_mode mode ATTRIBUTE_UNUSED,
4215 const_tree type ATTRIBUTE_UNUSED)
4217 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4218 pcum->aapcs_vfp_reg_alloc = 0;
4219 return;
4222 #define AAPCS_CP(X) \
4224 aapcs_ ## X ## _cum_init, \
4225 aapcs_ ## X ## _is_call_candidate, \
4226 aapcs_ ## X ## _allocate, \
4227 aapcs_ ## X ## _is_return_candidate, \
4228 aapcs_ ## X ## _allocate_return_reg, \
4229 aapcs_ ## X ## _advance \
4232 /* Table of co-processors that can be used to pass arguments in
4233 registers. Idealy no arugment should be a candidate for more than
4234 one co-processor table entry, but the table is processed in order
4235 and stops after the first match. If that entry then fails to put
4236 the argument into a co-processor register, the argument will go on
4237 the stack. */
4238 static struct
4240 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4241 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4243 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4244 BLKmode) is a candidate for this co-processor's registers; this
4245 function should ignore any position-dependent state in
4246 CUMULATIVE_ARGS and only use call-type dependent information. */
4247 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4249 /* Return true if the argument does get a co-processor register; it
4250 should set aapcs_reg to an RTX of the register allocated as is
4251 required for a return from FUNCTION_ARG. */
4252 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4254 /* Return true if a result of mode MODE (or type TYPE if MODE is
4255 BLKmode) is can be returned in this co-processor's registers. */
4256 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4258 /* Allocate and return an RTX element to hold the return type of a
4259 call, this routine must not fail and will only be called if
4260 is_return_candidate returned true with the same parameters. */
4261 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4263 /* Finish processing this argument and prepare to start processing
4264 the next one. */
4265 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4266 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4268 AAPCS_CP(vfp)
4271 #undef AAPCS_CP
4273 static int
4274 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4275 const_tree type)
4277 int i;
4279 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4280 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4281 return i;
4283 return -1;
4286 static int
4287 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4289 /* We aren't passed a decl, so we can't check that a call is local.
4290 However, it isn't clear that that would be a win anyway, since it
4291 might limit some tail-calling opportunities. */
4292 enum arm_pcs pcs_variant;
4294 if (fntype)
4296 const_tree fndecl = NULL_TREE;
4298 if (TREE_CODE (fntype) == FUNCTION_DECL)
4300 fndecl = fntype;
4301 fntype = TREE_TYPE (fntype);
4304 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4306 else
4307 pcs_variant = arm_pcs_default;
4309 if (pcs_variant != ARM_PCS_AAPCS)
4311 int i;
4313 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4314 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4315 TYPE_MODE (type),
4316 type))
4317 return i;
4319 return -1;
4322 static rtx
4323 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4324 const_tree fntype)
4326 /* We aren't passed a decl, so we can't check that a call is local.
4327 However, it isn't clear that that would be a win anyway, since it
4328 might limit some tail-calling opportunities. */
4329 enum arm_pcs pcs_variant;
4330 int unsignedp ATTRIBUTE_UNUSED;
4332 if (fntype)
4334 const_tree fndecl = NULL_TREE;
4336 if (TREE_CODE (fntype) == FUNCTION_DECL)
4338 fndecl = fntype;
4339 fntype = TREE_TYPE (fntype);
4342 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4344 else
4345 pcs_variant = arm_pcs_default;
4347 /* Promote integer types. */
4348 if (type && INTEGRAL_TYPE_P (type))
4349 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4351 if (pcs_variant != ARM_PCS_AAPCS)
4353 int i;
4355 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4356 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4357 type))
4358 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4359 mode, type);
4362 /* Promotes small structs returned in a register to full-word size
4363 for big-endian AAPCS. */
4364 if (type && arm_return_in_msb (type))
4366 HOST_WIDE_INT size = int_size_in_bytes (type);
4367 if (size % UNITS_PER_WORD != 0)
4369 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4370 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4374 return gen_rtx_REG (mode, R0_REGNUM);
4378 aapcs_libcall_value (enum machine_mode mode)
4380 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4381 && GET_MODE_SIZE (mode) <= 4)
4382 mode = SImode;
4384 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4387 /* Lay out a function argument using the AAPCS rules. The rule
4388 numbers referred to here are those in the AAPCS. */
4389 static void
4390 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4391 const_tree type, bool named)
4393 int nregs, nregs2;
4394 int ncrn;
4396 /* We only need to do this once per argument. */
4397 if (pcum->aapcs_arg_processed)
4398 return;
4400 pcum->aapcs_arg_processed = true;
4402 /* Special case: if named is false then we are handling an incoming
4403 anonymous argument which is on the stack. */
4404 if (!named)
4405 return;
4407 /* Is this a potential co-processor register candidate? */
4408 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4410 int slot = aapcs_select_call_coproc (pcum, mode, type);
4411 pcum->aapcs_cprc_slot = slot;
4413 /* We don't have to apply any of the rules from part B of the
4414 preparation phase, these are handled elsewhere in the
4415 compiler. */
4417 if (slot >= 0)
4419 /* A Co-processor register candidate goes either in its own
4420 class of registers or on the stack. */
4421 if (!pcum->aapcs_cprc_failed[slot])
4423 /* C1.cp - Try to allocate the argument to co-processor
4424 registers. */
4425 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4426 return;
4428 /* C2.cp - Put the argument on the stack and note that we
4429 can't assign any more candidates in this slot. We also
4430 need to note that we have allocated stack space, so that
4431 we won't later try to split a non-cprc candidate between
4432 core registers and the stack. */
4433 pcum->aapcs_cprc_failed[slot] = true;
4434 pcum->can_split = false;
4437 /* We didn't get a register, so this argument goes on the
4438 stack. */
4439 gcc_assert (pcum->can_split == false);
4440 return;
4444 /* C3 - For double-word aligned arguments, round the NCRN up to the
4445 next even number. */
4446 ncrn = pcum->aapcs_ncrn;
4447 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4448 ncrn++;
4450 nregs = ARM_NUM_REGS2(mode, type);
4452 /* Sigh, this test should really assert that nregs > 0, but a GCC
4453 extension allows empty structs and then gives them empty size; it
4454 then allows such a structure to be passed by value. For some of
4455 the code below we have to pretend that such an argument has
4456 non-zero size so that we 'locate' it correctly either in
4457 registers or on the stack. */
4458 gcc_assert (nregs >= 0);
4460 nregs2 = nregs ? nregs : 1;
4462 /* C4 - Argument fits entirely in core registers. */
4463 if (ncrn + nregs2 <= NUM_ARG_REGS)
4465 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4466 pcum->aapcs_next_ncrn = ncrn + nregs;
4467 return;
4470 /* C5 - Some core registers left and there are no arguments already
4471 on the stack: split this argument between the remaining core
4472 registers and the stack. */
4473 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4475 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4476 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4477 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4478 return;
4481 /* C6 - NCRN is set to 4. */
4482 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4484 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4485 return;
4488 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4489 for a call to a function whose data type is FNTYPE.
4490 For a library call, FNTYPE is NULL. */
4491 void
4492 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4493 rtx libname,
4494 tree fndecl ATTRIBUTE_UNUSED)
4496 /* Long call handling. */
4497 if (fntype)
4498 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4499 else
4500 pcum->pcs_variant = arm_pcs_default;
4502 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4504 if (arm_libcall_uses_aapcs_base (libname))
4505 pcum->pcs_variant = ARM_PCS_AAPCS;
4507 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4508 pcum->aapcs_reg = NULL_RTX;
4509 pcum->aapcs_partial = 0;
4510 pcum->aapcs_arg_processed = false;
4511 pcum->aapcs_cprc_slot = -1;
4512 pcum->can_split = true;
4514 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4516 int i;
4518 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4520 pcum->aapcs_cprc_failed[i] = false;
4521 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4524 return;
4527 /* Legacy ABIs */
4529 /* On the ARM, the offset starts at 0. */
4530 pcum->nregs = 0;
4531 pcum->iwmmxt_nregs = 0;
4532 pcum->can_split = true;
4534 /* Varargs vectors are treated the same as long long.
4535 named_count avoids having to change the way arm handles 'named' */
4536 pcum->named_count = 0;
4537 pcum->nargs = 0;
4539 if (TARGET_REALLY_IWMMXT && fntype)
4541 tree fn_arg;
4543 for (fn_arg = TYPE_ARG_TYPES (fntype);
4544 fn_arg;
4545 fn_arg = TREE_CHAIN (fn_arg))
4546 pcum->named_count += 1;
4548 if (! pcum->named_count)
4549 pcum->named_count = INT_MAX;
4554 /* Return true if mode/type need doubleword alignment. */
4555 static bool
4556 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4558 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4559 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4563 /* Determine where to put an argument to a function.
4564 Value is zero to push the argument on the stack,
4565 or a hard register in which to store the argument.
4567 MODE is the argument's machine mode.
4568 TYPE is the data type of the argument (as a tree).
4569 This is null for libcalls where that information may
4570 not be available.
4571 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4572 the preceding args and about the function being called.
4573 NAMED is nonzero if this argument is a named parameter
4574 (otherwise it is an extra parameter matching an ellipsis).
4576 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4577 other arguments are passed on the stack. If (NAMED == 0) (which happens
4578 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4579 defined), say it is passed in the stack (function_prologue will
4580 indeed make it pass in the stack if necessary). */
4582 static rtx
4583 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4584 const_tree type, bool named)
4586 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4587 int nregs;
4589 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4590 a call insn (op3 of a call_value insn). */
4591 if (mode == VOIDmode)
4592 return const0_rtx;
4594 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4596 aapcs_layout_arg (pcum, mode, type, named);
4597 return pcum->aapcs_reg;
4600 /* Varargs vectors are treated the same as long long.
4601 named_count avoids having to change the way arm handles 'named' */
4602 if (TARGET_IWMMXT_ABI
4603 && arm_vector_mode_supported_p (mode)
4604 && pcum->named_count > pcum->nargs + 1)
4606 if (pcum->iwmmxt_nregs <= 9)
4607 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4608 else
4610 pcum->can_split = false;
4611 return NULL_RTX;
4615 /* Put doubleword aligned quantities in even register pairs. */
4616 if (pcum->nregs & 1
4617 && ARM_DOUBLEWORD_ALIGN
4618 && arm_needs_doubleword_align (mode, type))
4619 pcum->nregs++;
4621 /* Only allow splitting an arg between regs and memory if all preceding
4622 args were allocated to regs. For args passed by reference we only count
4623 the reference pointer. */
4624 if (pcum->can_split)
4625 nregs = 1;
4626 else
4627 nregs = ARM_NUM_REGS2 (mode, type);
4629 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4630 return NULL_RTX;
4632 return gen_rtx_REG (mode, pcum->nregs);
4635 static unsigned int
4636 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4638 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4639 ? DOUBLEWORD_ALIGNMENT
4640 : PARM_BOUNDARY);
4643 static int
4644 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4645 tree type, bool named)
4647 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4648 int nregs = pcum->nregs;
4650 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4652 aapcs_layout_arg (pcum, mode, type, named);
4653 return pcum->aapcs_partial;
4656 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4657 return 0;
4659 if (NUM_ARG_REGS > nregs
4660 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4661 && pcum->can_split)
4662 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4664 return 0;
4667 /* Update the data in PCUM to advance over an argument
4668 of mode MODE and data type TYPE.
4669 (TYPE is null for libcalls where that information may not be available.) */
4671 static void
4672 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4673 const_tree type, bool named)
4675 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4677 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4679 aapcs_layout_arg (pcum, mode, type, named);
4681 if (pcum->aapcs_cprc_slot >= 0)
4683 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4684 type);
4685 pcum->aapcs_cprc_slot = -1;
4688 /* Generic stuff. */
4689 pcum->aapcs_arg_processed = false;
4690 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4691 pcum->aapcs_reg = NULL_RTX;
4692 pcum->aapcs_partial = 0;
4694 else
4696 pcum->nargs += 1;
4697 if (arm_vector_mode_supported_p (mode)
4698 && pcum->named_count > pcum->nargs
4699 && TARGET_IWMMXT_ABI)
4700 pcum->iwmmxt_nregs += 1;
4701 else
4702 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4706 /* Variable sized types are passed by reference. This is a GCC
4707 extension to the ARM ABI. */
4709 static bool
4710 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4711 enum machine_mode mode ATTRIBUTE_UNUSED,
4712 const_tree type, bool named ATTRIBUTE_UNUSED)
4714 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4717 /* Encode the current state of the #pragma [no_]long_calls. */
4718 typedef enum
4720 OFF, /* No #pragma [no_]long_calls is in effect. */
4721 LONG, /* #pragma long_calls is in effect. */
4722 SHORT /* #pragma no_long_calls is in effect. */
4723 } arm_pragma_enum;
4725 static arm_pragma_enum arm_pragma_long_calls = OFF;
4727 void
4728 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4730 arm_pragma_long_calls = LONG;
4733 void
4734 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4736 arm_pragma_long_calls = SHORT;
4739 void
4740 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4742 arm_pragma_long_calls = OFF;
4745 /* Handle an attribute requiring a FUNCTION_DECL;
4746 arguments as in struct attribute_spec.handler. */
4747 static tree
4748 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4749 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4751 if (TREE_CODE (*node) != FUNCTION_DECL)
4753 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4754 name);
4755 *no_add_attrs = true;
4758 return NULL_TREE;
4761 /* Handle an "interrupt" or "isr" attribute;
4762 arguments as in struct attribute_spec.handler. */
4763 static tree
4764 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4765 bool *no_add_attrs)
4767 if (DECL_P (*node))
4769 if (TREE_CODE (*node) != FUNCTION_DECL)
4771 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4772 name);
4773 *no_add_attrs = true;
4775 /* FIXME: the argument if any is checked for type attributes;
4776 should it be checked for decl ones? */
4778 else
4780 if (TREE_CODE (*node) == FUNCTION_TYPE
4781 || TREE_CODE (*node) == METHOD_TYPE)
4783 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4785 warning (OPT_Wattributes, "%qE attribute ignored",
4786 name);
4787 *no_add_attrs = true;
4790 else if (TREE_CODE (*node) == POINTER_TYPE
4791 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4792 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4793 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4795 *node = build_variant_type_copy (*node);
4796 TREE_TYPE (*node) = build_type_attribute_variant
4797 (TREE_TYPE (*node),
4798 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4799 *no_add_attrs = true;
4801 else
4803 /* Possibly pass this attribute on from the type to a decl. */
4804 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4805 | (int) ATTR_FLAG_FUNCTION_NEXT
4806 | (int) ATTR_FLAG_ARRAY_NEXT))
4808 *no_add_attrs = true;
4809 return tree_cons (name, args, NULL_TREE);
4811 else
4813 warning (OPT_Wattributes, "%qE attribute ignored",
4814 name);
4819 return NULL_TREE;
4822 /* Handle a "pcs" attribute; arguments as in struct
4823 attribute_spec.handler. */
4824 static tree
4825 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4826 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4828 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4830 warning (OPT_Wattributes, "%qE attribute ignored", name);
4831 *no_add_attrs = true;
4833 return NULL_TREE;
4836 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4837 /* Handle the "notshared" attribute. This attribute is another way of
4838 requesting hidden visibility. ARM's compiler supports
4839 "__declspec(notshared)"; we support the same thing via an
4840 attribute. */
4842 static tree
4843 arm_handle_notshared_attribute (tree *node,
4844 tree name ATTRIBUTE_UNUSED,
4845 tree args ATTRIBUTE_UNUSED,
4846 int flags ATTRIBUTE_UNUSED,
4847 bool *no_add_attrs)
4849 tree decl = TYPE_NAME (*node);
4851 if (decl)
4853 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4854 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4855 *no_add_attrs = false;
4857 return NULL_TREE;
4859 #endif
4861 /* Return 0 if the attributes for two types are incompatible, 1 if they
4862 are compatible, and 2 if they are nearly compatible (which causes a
4863 warning to be generated). */
4864 static int
4865 arm_comp_type_attributes (const_tree type1, const_tree type2)
4867 int l1, l2, s1, s2;
4869 /* Check for mismatch of non-default calling convention. */
4870 if (TREE_CODE (type1) != FUNCTION_TYPE)
4871 return 1;
4873 /* Check for mismatched call attributes. */
4874 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4875 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4876 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4877 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4879 /* Only bother to check if an attribute is defined. */
4880 if (l1 | l2 | s1 | s2)
4882 /* If one type has an attribute, the other must have the same attribute. */
4883 if ((l1 != l2) || (s1 != s2))
4884 return 0;
4886 /* Disallow mixed attributes. */
4887 if ((l1 & s2) || (l2 & s1))
4888 return 0;
4891 /* Check for mismatched ISR attribute. */
4892 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4893 if (! l1)
4894 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4895 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4896 if (! l2)
4897 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4898 if (l1 != l2)
4899 return 0;
4901 return 1;
4904 /* Assigns default attributes to newly defined type. This is used to
4905 set short_call/long_call attributes for function types of
4906 functions defined inside corresponding #pragma scopes. */
4907 static void
4908 arm_set_default_type_attributes (tree type)
4910 /* Add __attribute__ ((long_call)) to all functions, when
4911 inside #pragma long_calls or __attribute__ ((short_call)),
4912 when inside #pragma no_long_calls. */
4913 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4915 tree type_attr_list, attr_name;
4916 type_attr_list = TYPE_ATTRIBUTES (type);
4918 if (arm_pragma_long_calls == LONG)
4919 attr_name = get_identifier ("long_call");
4920 else if (arm_pragma_long_calls == SHORT)
4921 attr_name = get_identifier ("short_call");
4922 else
4923 return;
4925 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4926 TYPE_ATTRIBUTES (type) = type_attr_list;
4930 /* Return true if DECL is known to be linked into section SECTION. */
4932 static bool
4933 arm_function_in_section_p (tree decl, section *section)
4935 /* We can only be certain about functions defined in the same
4936 compilation unit. */
4937 if (!TREE_STATIC (decl))
4938 return false;
4940 /* Make sure that SYMBOL always binds to the definition in this
4941 compilation unit. */
4942 if (!targetm.binds_local_p (decl))
4943 return false;
4945 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4946 if (!DECL_SECTION_NAME (decl))
4948 /* Make sure that we will not create a unique section for DECL. */
4949 if (flag_function_sections || DECL_ONE_ONLY (decl))
4950 return false;
4953 return function_section (decl) == section;
4956 /* Return nonzero if a 32-bit "long_call" should be generated for
4957 a call from the current function to DECL. We generate a long_call
4958 if the function:
4960 a. has an __attribute__((long call))
4961 or b. is within the scope of a #pragma long_calls
4962 or c. the -mlong-calls command line switch has been specified
4964 However we do not generate a long call if the function:
4966 d. has an __attribute__ ((short_call))
4967 or e. is inside the scope of a #pragma no_long_calls
4968 or f. is defined in the same section as the current function. */
4970 bool
4971 arm_is_long_call_p (tree decl)
4973 tree attrs;
4975 if (!decl)
4976 return TARGET_LONG_CALLS;
4978 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4979 if (lookup_attribute ("short_call", attrs))
4980 return false;
4982 /* For "f", be conservative, and only cater for cases in which the
4983 whole of the current function is placed in the same section. */
4984 if (!flag_reorder_blocks_and_partition
4985 && TREE_CODE (decl) == FUNCTION_DECL
4986 && arm_function_in_section_p (decl, current_function_section ()))
4987 return false;
4989 if (lookup_attribute ("long_call", attrs))
4990 return true;
4992 return TARGET_LONG_CALLS;
4995 /* Return nonzero if it is ok to make a tail-call to DECL. */
4996 static bool
4997 arm_function_ok_for_sibcall (tree decl, tree exp)
4999 unsigned long func_type;
5001 if (cfun->machine->sibcall_blocked)
5002 return false;
5004 /* Never tailcall something for which we have no decl, or if we
5005 are generating code for Thumb-1. */
5006 if (decl == NULL || TARGET_THUMB1)
5007 return false;
5009 /* The PIC register is live on entry to VxWorks PLT entries, so we
5010 must make the call before restoring the PIC register. */
5011 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5012 return false;
5014 /* Cannot tail-call to long calls, since these are out of range of
5015 a branch instruction. */
5016 if (arm_is_long_call_p (decl))
5017 return false;
5019 /* If we are interworking and the function is not declared static
5020 then we can't tail-call it unless we know that it exists in this
5021 compilation unit (since it might be a Thumb routine). */
5022 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5023 return false;
5025 func_type = arm_current_func_type ();
5026 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5027 if (IS_INTERRUPT (func_type))
5028 return false;
5030 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5032 /* Check that the return value locations are the same. For
5033 example that we aren't returning a value from the sibling in
5034 a VFP register but then need to transfer it to a core
5035 register. */
5036 rtx a, b;
5038 a = arm_function_value (TREE_TYPE (exp), decl, false);
5039 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5040 cfun->decl, false);
5041 if (!rtx_equal_p (a, b))
5042 return false;
5045 /* Never tailcall if function may be called with a misaligned SP. */
5046 if (IS_STACKALIGN (func_type))
5047 return false;
5049 /* Everything else is ok. */
5050 return true;
5054 /* Addressing mode support functions. */
5056 /* Return nonzero if X is a legitimate immediate operand when compiling
5057 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5059 legitimate_pic_operand_p (rtx x)
5061 if (GET_CODE (x) == SYMBOL_REF
5062 || (GET_CODE (x) == CONST
5063 && GET_CODE (XEXP (x, 0)) == PLUS
5064 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5065 return 0;
5067 return 1;
5070 /* Record that the current function needs a PIC register. Initialize
5071 cfun->machine->pic_reg if we have not already done so. */
5073 static void
5074 require_pic_register (void)
5076 /* A lot of the logic here is made obscure by the fact that this
5077 routine gets called as part of the rtx cost estimation process.
5078 We don't want those calls to affect any assumptions about the real
5079 function; and further, we can't call entry_of_function() until we
5080 start the real expansion process. */
5081 if (!crtl->uses_pic_offset_table)
5083 gcc_assert (can_create_pseudo_p ());
5084 if (arm_pic_register != INVALID_REGNUM)
5086 if (!cfun->machine->pic_reg)
5087 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5089 /* Play games to avoid marking the function as needing pic
5090 if we are being called as part of the cost-estimation
5091 process. */
5092 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5093 crtl->uses_pic_offset_table = 1;
5095 else
5097 rtx seq, insn;
5099 if (!cfun->machine->pic_reg)
5100 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5102 /* Play games to avoid marking the function as needing pic
5103 if we are being called as part of the cost-estimation
5104 process. */
5105 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5107 crtl->uses_pic_offset_table = 1;
5108 start_sequence ();
5110 arm_load_pic_register (0UL);
5112 seq = get_insns ();
5113 end_sequence ();
5115 for (insn = seq; insn; insn = NEXT_INSN (insn))
5116 if (INSN_P (insn))
5117 INSN_LOCATOR (insn) = prologue_locator;
5119 /* We can be called during expansion of PHI nodes, where
5120 we can't yet emit instructions directly in the final
5121 insn stream. Queue the insns on the entry edge, they will
5122 be committed after everything else is expanded. */
5123 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5130 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5132 if (GET_CODE (orig) == SYMBOL_REF
5133 || GET_CODE (orig) == LABEL_REF)
5135 rtx insn;
5137 if (reg == 0)
5139 gcc_assert (can_create_pseudo_p ());
5140 reg = gen_reg_rtx (Pmode);
5143 /* VxWorks does not impose a fixed gap between segments; the run-time
5144 gap can be different from the object-file gap. We therefore can't
5145 use GOTOFF unless we are absolutely sure that the symbol is in the
5146 same segment as the GOT. Unfortunately, the flexibility of linker
5147 scripts means that we can't be sure of that in general, so assume
5148 that GOTOFF is never valid on VxWorks. */
5149 if ((GET_CODE (orig) == LABEL_REF
5150 || (GET_CODE (orig) == SYMBOL_REF &&
5151 SYMBOL_REF_LOCAL_P (orig)))
5152 && NEED_GOT_RELOC
5153 && !TARGET_VXWORKS_RTP)
5154 insn = arm_pic_static_addr (orig, reg);
5155 else
5157 rtx pat;
5158 rtx mem;
5160 /* If this function doesn't have a pic register, create one now. */
5161 require_pic_register ();
5163 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5165 /* Make the MEM as close to a constant as possible. */
5166 mem = SET_SRC (pat);
5167 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5168 MEM_READONLY_P (mem) = 1;
5169 MEM_NOTRAP_P (mem) = 1;
5171 insn = emit_insn (pat);
5174 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5175 by loop. */
5176 set_unique_reg_note (insn, REG_EQUAL, orig);
5178 return reg;
5180 else if (GET_CODE (orig) == CONST)
5182 rtx base, offset;
5184 if (GET_CODE (XEXP (orig, 0)) == PLUS
5185 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5186 return orig;
5188 /* Handle the case where we have: const (UNSPEC_TLS). */
5189 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5190 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5191 return orig;
5193 /* Handle the case where we have:
5194 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5195 CONST_INT. */
5196 if (GET_CODE (XEXP (orig, 0)) == PLUS
5197 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5198 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5200 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5201 return orig;
5204 if (reg == 0)
5206 gcc_assert (can_create_pseudo_p ());
5207 reg = gen_reg_rtx (Pmode);
5210 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5212 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5213 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5214 base == reg ? 0 : reg);
5216 if (GET_CODE (offset) == CONST_INT)
5218 /* The base register doesn't really matter, we only want to
5219 test the index for the appropriate mode. */
5220 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5222 gcc_assert (can_create_pseudo_p ());
5223 offset = force_reg (Pmode, offset);
5226 if (GET_CODE (offset) == CONST_INT)
5227 return plus_constant (base, INTVAL (offset));
5230 if (GET_MODE_SIZE (mode) > 4
5231 && (GET_MODE_CLASS (mode) == MODE_INT
5232 || TARGET_SOFT_FLOAT))
5234 emit_insn (gen_addsi3 (reg, base, offset));
5235 return reg;
5238 return gen_rtx_PLUS (Pmode, base, offset);
5241 return orig;
5245 /* Find a spare register to use during the prolog of a function. */
5247 static int
5248 thumb_find_work_register (unsigned long pushed_regs_mask)
5250 int reg;
5252 /* Check the argument registers first as these are call-used. The
5253 register allocation order means that sometimes r3 might be used
5254 but earlier argument registers might not, so check them all. */
5255 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5256 if (!df_regs_ever_live_p (reg))
5257 return reg;
5259 /* Before going on to check the call-saved registers we can try a couple
5260 more ways of deducing that r3 is available. The first is when we are
5261 pushing anonymous arguments onto the stack and we have less than 4
5262 registers worth of fixed arguments(*). In this case r3 will be part of
5263 the variable argument list and so we can be sure that it will be
5264 pushed right at the start of the function. Hence it will be available
5265 for the rest of the prologue.
5266 (*): ie crtl->args.pretend_args_size is greater than 0. */
5267 if (cfun->machine->uses_anonymous_args
5268 && crtl->args.pretend_args_size > 0)
5269 return LAST_ARG_REGNUM;
5271 /* The other case is when we have fixed arguments but less than 4 registers
5272 worth. In this case r3 might be used in the body of the function, but
5273 it is not being used to convey an argument into the function. In theory
5274 we could just check crtl->args.size to see how many bytes are
5275 being passed in argument registers, but it seems that it is unreliable.
5276 Sometimes it will have the value 0 when in fact arguments are being
5277 passed. (See testcase execute/20021111-1.c for an example). So we also
5278 check the args_info.nregs field as well. The problem with this field is
5279 that it makes no allowances for arguments that are passed to the
5280 function but which are not used. Hence we could miss an opportunity
5281 when a function has an unused argument in r3. But it is better to be
5282 safe than to be sorry. */
5283 if (! cfun->machine->uses_anonymous_args
5284 && crtl->args.size >= 0
5285 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5286 && crtl->args.info.nregs < 4)
5287 return LAST_ARG_REGNUM;
5289 /* Otherwise look for a call-saved register that is going to be pushed. */
5290 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5291 if (pushed_regs_mask & (1 << reg))
5292 return reg;
5294 if (TARGET_THUMB2)
5296 /* Thumb-2 can use high regs. */
5297 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5298 if (pushed_regs_mask & (1 << reg))
5299 return reg;
5301 /* Something went wrong - thumb_compute_save_reg_mask()
5302 should have arranged for a suitable register to be pushed. */
5303 gcc_unreachable ();
5306 static GTY(()) int pic_labelno;
5308 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5309 low register. */
5311 void
5312 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5314 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5316 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5317 return;
5319 gcc_assert (flag_pic);
5321 pic_reg = cfun->machine->pic_reg;
5322 if (TARGET_VXWORKS_RTP)
5324 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5325 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5326 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5328 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5330 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5331 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5333 else
5335 /* We use an UNSPEC rather than a LABEL_REF because this label
5336 never appears in the code stream. */
5338 labelno = GEN_INT (pic_labelno++);
5339 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5340 l1 = gen_rtx_CONST (VOIDmode, l1);
5342 /* On the ARM the PC register contains 'dot + 8' at the time of the
5343 addition, on the Thumb it is 'dot + 4'. */
5344 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5345 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5346 UNSPEC_GOTSYM_OFF);
5347 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5349 if (TARGET_32BIT)
5351 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5352 if (TARGET_ARM)
5353 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5354 else
5355 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5357 else /* TARGET_THUMB1 */
5359 if (arm_pic_register != INVALID_REGNUM
5360 && REGNO (pic_reg) > LAST_LO_REGNUM)
5362 /* We will have pushed the pic register, so we should always be
5363 able to find a work register. */
5364 pic_tmp = gen_rtx_REG (SImode,
5365 thumb_find_work_register (saved_regs));
5366 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5367 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5369 else
5370 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5371 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5375 /* Need to emit this whether or not we obey regdecls,
5376 since setjmp/longjmp can cause life info to screw up. */
5377 emit_use (pic_reg);
5380 /* Generate code to load the address of a static var when flag_pic is set. */
5381 static rtx
5382 arm_pic_static_addr (rtx orig, rtx reg)
5384 rtx l1, labelno, offset_rtx, insn;
5386 gcc_assert (flag_pic);
5388 /* We use an UNSPEC rather than a LABEL_REF because this label
5389 never appears in the code stream. */
5390 labelno = GEN_INT (pic_labelno++);
5391 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5392 l1 = gen_rtx_CONST (VOIDmode, l1);
5394 /* On the ARM the PC register contains 'dot + 8' at the time of the
5395 addition, on the Thumb it is 'dot + 4'. */
5396 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5397 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5398 UNSPEC_SYMBOL_OFFSET);
5399 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5401 if (TARGET_32BIT)
5403 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5404 if (TARGET_ARM)
5405 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5406 else
5407 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5409 else /* TARGET_THUMB1 */
5411 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5412 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5415 return insn;
5418 /* Return nonzero if X is valid as an ARM state addressing register. */
5419 static int
5420 arm_address_register_rtx_p (rtx x, int strict_p)
5422 int regno;
5424 if (GET_CODE (x) != REG)
5425 return 0;
5427 regno = REGNO (x);
5429 if (strict_p)
5430 return ARM_REGNO_OK_FOR_BASE_P (regno);
5432 return (regno <= LAST_ARM_REGNUM
5433 || regno >= FIRST_PSEUDO_REGISTER
5434 || regno == FRAME_POINTER_REGNUM
5435 || regno == ARG_POINTER_REGNUM);
5438 /* Return TRUE if this rtx is the difference of a symbol and a label,
5439 and will reduce to a PC-relative relocation in the object file.
5440 Expressions like this can be left alone when generating PIC, rather
5441 than forced through the GOT. */
5442 static int
5443 pcrel_constant_p (rtx x)
5445 if (GET_CODE (x) == MINUS)
5446 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5448 return FALSE;
5451 /* Return true if X will surely end up in an index register after next
5452 splitting pass. */
5453 static bool
5454 will_be_in_index_register (const_rtx x)
5456 /* arm.md: calculate_pic_address will split this into a register. */
5457 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5460 /* Return nonzero if X is a valid ARM state address operand. */
5462 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5463 int strict_p)
5465 bool use_ldrd;
5466 enum rtx_code code = GET_CODE (x);
5468 if (arm_address_register_rtx_p (x, strict_p))
5469 return 1;
5471 use_ldrd = (TARGET_LDRD
5472 && (mode == DImode
5473 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5475 if (code == POST_INC || code == PRE_DEC
5476 || ((code == PRE_INC || code == POST_DEC)
5477 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5478 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5480 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5481 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5482 && GET_CODE (XEXP (x, 1)) == PLUS
5483 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5485 rtx addend = XEXP (XEXP (x, 1), 1);
5487 /* Don't allow ldrd post increment by register because it's hard
5488 to fixup invalid register choices. */
5489 if (use_ldrd
5490 && GET_CODE (x) == POST_MODIFY
5491 && GET_CODE (addend) == REG)
5492 return 0;
5494 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5495 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5498 /* After reload constants split into minipools will have addresses
5499 from a LABEL_REF. */
5500 else if (reload_completed
5501 && (code == LABEL_REF
5502 || (code == CONST
5503 && GET_CODE (XEXP (x, 0)) == PLUS
5504 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5505 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5506 return 1;
5508 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5509 return 0;
5511 else if (code == PLUS)
5513 rtx xop0 = XEXP (x, 0);
5514 rtx xop1 = XEXP (x, 1);
5516 return ((arm_address_register_rtx_p (xop0, strict_p)
5517 && ((GET_CODE(xop1) == CONST_INT
5518 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5519 || (!strict_p && will_be_in_index_register (xop1))))
5520 || (arm_address_register_rtx_p (xop1, strict_p)
5521 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5524 #if 0
5525 /* Reload currently can't handle MINUS, so disable this for now */
5526 else if (GET_CODE (x) == MINUS)
5528 rtx xop0 = XEXP (x, 0);
5529 rtx xop1 = XEXP (x, 1);
5531 return (arm_address_register_rtx_p (xop0, strict_p)
5532 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5534 #endif
5536 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5537 && code == SYMBOL_REF
5538 && CONSTANT_POOL_ADDRESS_P (x)
5539 && ! (flag_pic
5540 && symbol_mentioned_p (get_pool_constant (x))
5541 && ! pcrel_constant_p (get_pool_constant (x))))
5542 return 1;
5544 return 0;
5547 /* Return nonzero if X is a valid Thumb-2 address operand. */
5548 static int
5549 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5551 bool use_ldrd;
5552 enum rtx_code code = GET_CODE (x);
5554 if (arm_address_register_rtx_p (x, strict_p))
5555 return 1;
5557 use_ldrd = (TARGET_LDRD
5558 && (mode == DImode
5559 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5561 if (code == POST_INC || code == PRE_DEC
5562 || ((code == PRE_INC || code == POST_DEC)
5563 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5564 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5566 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5567 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5568 && GET_CODE (XEXP (x, 1)) == PLUS
5569 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5571 /* Thumb-2 only has autoincrement by constant. */
5572 rtx addend = XEXP (XEXP (x, 1), 1);
5573 HOST_WIDE_INT offset;
5575 if (GET_CODE (addend) != CONST_INT)
5576 return 0;
5578 offset = INTVAL(addend);
5579 if (GET_MODE_SIZE (mode) <= 4)
5580 return (offset > -256 && offset < 256);
5582 return (use_ldrd && offset > -1024 && offset < 1024
5583 && (offset & 3) == 0);
5586 /* After reload constants split into minipools will have addresses
5587 from a LABEL_REF. */
5588 else if (reload_completed
5589 && (code == LABEL_REF
5590 || (code == CONST
5591 && GET_CODE (XEXP (x, 0)) == PLUS
5592 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5593 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5594 return 1;
5596 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5597 return 0;
5599 else if (code == PLUS)
5601 rtx xop0 = XEXP (x, 0);
5602 rtx xop1 = XEXP (x, 1);
5604 return ((arm_address_register_rtx_p (xop0, strict_p)
5605 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5606 || (!strict_p && will_be_in_index_register (xop1))))
5607 || (arm_address_register_rtx_p (xop1, strict_p)
5608 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5611 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5612 && code == SYMBOL_REF
5613 && CONSTANT_POOL_ADDRESS_P (x)
5614 && ! (flag_pic
5615 && symbol_mentioned_p (get_pool_constant (x))
5616 && ! pcrel_constant_p (get_pool_constant (x))))
5617 return 1;
5619 return 0;
5622 /* Return nonzero if INDEX is valid for an address index operand in
5623 ARM state. */
5624 static int
5625 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5626 int strict_p)
5628 HOST_WIDE_INT range;
5629 enum rtx_code code = GET_CODE (index);
5631 /* Standard coprocessor addressing modes. */
5632 if (TARGET_HARD_FLOAT
5633 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5634 && (mode == SFmode || mode == DFmode
5635 || (TARGET_MAVERICK && mode == DImode)))
5636 return (code == CONST_INT && INTVAL (index) < 1024
5637 && INTVAL (index) > -1024
5638 && (INTVAL (index) & 3) == 0);
5640 /* For quad modes, we restrict the constant offset to be slightly less
5641 than what the instruction format permits. We do this because for
5642 quad mode moves, we will actually decompose them into two separate
5643 double-mode reads or writes. INDEX must therefore be a valid
5644 (double-mode) offset and so should INDEX+8. */
5645 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5646 return (code == CONST_INT
5647 && INTVAL (index) < 1016
5648 && INTVAL (index) > -1024
5649 && (INTVAL (index) & 3) == 0);
5651 /* We have no such constraint on double mode offsets, so we permit the
5652 full range of the instruction format. */
5653 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5654 return (code == CONST_INT
5655 && INTVAL (index) < 1024
5656 && INTVAL (index) > -1024
5657 && (INTVAL (index) & 3) == 0);
5659 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5660 return (code == CONST_INT
5661 && INTVAL (index) < 1024
5662 && INTVAL (index) > -1024
5663 && (INTVAL (index) & 3) == 0);
5665 if (arm_address_register_rtx_p (index, strict_p)
5666 && (GET_MODE_SIZE (mode) <= 4))
5667 return 1;
5669 if (mode == DImode || mode == DFmode)
5671 if (code == CONST_INT)
5673 HOST_WIDE_INT val = INTVAL (index);
5675 if (TARGET_LDRD)
5676 return val > -256 && val < 256;
5677 else
5678 return val > -4096 && val < 4092;
5681 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5684 if (GET_MODE_SIZE (mode) <= 4
5685 && ! (arm_arch4
5686 && (mode == HImode
5687 || mode == HFmode
5688 || (mode == QImode && outer == SIGN_EXTEND))))
5690 if (code == MULT)
5692 rtx xiop0 = XEXP (index, 0);
5693 rtx xiop1 = XEXP (index, 1);
5695 return ((arm_address_register_rtx_p (xiop0, strict_p)
5696 && power_of_two_operand (xiop1, SImode))
5697 || (arm_address_register_rtx_p (xiop1, strict_p)
5698 && power_of_two_operand (xiop0, SImode)));
5700 else if (code == LSHIFTRT || code == ASHIFTRT
5701 || code == ASHIFT || code == ROTATERT)
5703 rtx op = XEXP (index, 1);
5705 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5706 && GET_CODE (op) == CONST_INT
5707 && INTVAL (op) > 0
5708 && INTVAL (op) <= 31);
5712 /* For ARM v4 we may be doing a sign-extend operation during the
5713 load. */
5714 if (arm_arch4)
5716 if (mode == HImode
5717 || mode == HFmode
5718 || (outer == SIGN_EXTEND && mode == QImode))
5719 range = 256;
5720 else
5721 range = 4096;
5723 else
5724 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5726 return (code == CONST_INT
5727 && INTVAL (index) < range
5728 && INTVAL (index) > -range);
5731 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5732 index operand. i.e. 1, 2, 4 or 8. */
5733 static bool
5734 thumb2_index_mul_operand (rtx op)
5736 HOST_WIDE_INT val;
5738 if (GET_CODE(op) != CONST_INT)
5739 return false;
5741 val = INTVAL(op);
5742 return (val == 1 || val == 2 || val == 4 || val == 8);
5745 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5746 static int
5747 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5749 enum rtx_code code = GET_CODE (index);
5751 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5752 /* Standard coprocessor addressing modes. */
5753 if (TARGET_HARD_FLOAT
5754 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5755 && (mode == SFmode || mode == DFmode
5756 || (TARGET_MAVERICK && mode == DImode)))
5757 return (code == CONST_INT && INTVAL (index) < 1024
5758 /* Thumb-2 allows only > -256 index range for it's core register
5759 load/stores. Since we allow SF/DF in core registers, we have
5760 to use the intersection between -256~4096 (core) and -1024~1024
5761 (coprocessor). */
5762 && INTVAL (index) > -256
5763 && (INTVAL (index) & 3) == 0);
5765 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5767 /* For DImode assume values will usually live in core regs
5768 and only allow LDRD addressing modes. */
5769 if (!TARGET_LDRD || mode != DImode)
5770 return (code == CONST_INT
5771 && INTVAL (index) < 1024
5772 && INTVAL (index) > -1024
5773 && (INTVAL (index) & 3) == 0);
5776 /* For quad modes, we restrict the constant offset to be slightly less
5777 than what the instruction format permits. We do this because for
5778 quad mode moves, we will actually decompose them into two separate
5779 double-mode reads or writes. INDEX must therefore be a valid
5780 (double-mode) offset and so should INDEX+8. */
5781 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5782 return (code == CONST_INT
5783 && INTVAL (index) < 1016
5784 && INTVAL (index) > -1024
5785 && (INTVAL (index) & 3) == 0);
5787 /* We have no such constraint on double mode offsets, so we permit the
5788 full range of the instruction format. */
5789 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5790 return (code == CONST_INT
5791 && INTVAL (index) < 1024
5792 && INTVAL (index) > -1024
5793 && (INTVAL (index) & 3) == 0);
5795 if (arm_address_register_rtx_p (index, strict_p)
5796 && (GET_MODE_SIZE (mode) <= 4))
5797 return 1;
5799 if (mode == DImode || mode == DFmode)
5801 if (code == CONST_INT)
5803 HOST_WIDE_INT val = INTVAL (index);
5804 /* ??? Can we assume ldrd for thumb2? */
5805 /* Thumb-2 ldrd only has reg+const addressing modes. */
5806 /* ldrd supports offsets of +-1020.
5807 However the ldr fallback does not. */
5808 return val > -256 && val < 256 && (val & 3) == 0;
5810 else
5811 return 0;
5814 if (code == MULT)
5816 rtx xiop0 = XEXP (index, 0);
5817 rtx xiop1 = XEXP (index, 1);
5819 return ((arm_address_register_rtx_p (xiop0, strict_p)
5820 && thumb2_index_mul_operand (xiop1))
5821 || (arm_address_register_rtx_p (xiop1, strict_p)
5822 && thumb2_index_mul_operand (xiop0)));
5824 else if (code == ASHIFT)
5826 rtx op = XEXP (index, 1);
5828 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5829 && GET_CODE (op) == CONST_INT
5830 && INTVAL (op) > 0
5831 && INTVAL (op) <= 3);
5834 return (code == CONST_INT
5835 && INTVAL (index) < 4096
5836 && INTVAL (index) > -256);
5839 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5840 static int
5841 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5843 int regno;
5845 if (GET_CODE (x) != REG)
5846 return 0;
5848 regno = REGNO (x);
5850 if (strict_p)
5851 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5853 return (regno <= LAST_LO_REGNUM
5854 || regno > LAST_VIRTUAL_REGISTER
5855 || regno == FRAME_POINTER_REGNUM
5856 || (GET_MODE_SIZE (mode) >= 4
5857 && (regno == STACK_POINTER_REGNUM
5858 || regno >= FIRST_PSEUDO_REGISTER
5859 || x == hard_frame_pointer_rtx
5860 || x == arg_pointer_rtx)));
5863 /* Return nonzero if x is a legitimate index register. This is the case
5864 for any base register that can access a QImode object. */
5865 inline static int
5866 thumb1_index_register_rtx_p (rtx x, int strict_p)
5868 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5871 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5873 The AP may be eliminated to either the SP or the FP, so we use the
5874 least common denominator, e.g. SImode, and offsets from 0 to 64.
5876 ??? Verify whether the above is the right approach.
5878 ??? Also, the FP may be eliminated to the SP, so perhaps that
5879 needs special handling also.
5881 ??? Look at how the mips16 port solves this problem. It probably uses
5882 better ways to solve some of these problems.
5884 Although it is not incorrect, we don't accept QImode and HImode
5885 addresses based on the frame pointer or arg pointer until the
5886 reload pass starts. This is so that eliminating such addresses
5887 into stack based ones won't produce impossible code. */
5889 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5891 /* ??? Not clear if this is right. Experiment. */
5892 if (GET_MODE_SIZE (mode) < 4
5893 && !(reload_in_progress || reload_completed)
5894 && (reg_mentioned_p (frame_pointer_rtx, x)
5895 || reg_mentioned_p (arg_pointer_rtx, x)
5896 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5897 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5898 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5899 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5900 return 0;
5902 /* Accept any base register. SP only in SImode or larger. */
5903 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5904 return 1;
5906 /* This is PC relative data before arm_reorg runs. */
5907 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5908 && GET_CODE (x) == SYMBOL_REF
5909 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5910 return 1;
5912 /* This is PC relative data after arm_reorg runs. */
5913 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5914 && reload_completed
5915 && (GET_CODE (x) == LABEL_REF
5916 || (GET_CODE (x) == CONST
5917 && GET_CODE (XEXP (x, 0)) == PLUS
5918 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5919 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5920 return 1;
5922 /* Post-inc indexing only supported for SImode and larger. */
5923 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5924 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5925 return 1;
5927 else if (GET_CODE (x) == PLUS)
5929 /* REG+REG address can be any two index registers. */
5930 /* We disallow FRAME+REG addressing since we know that FRAME
5931 will be replaced with STACK, and SP relative addressing only
5932 permits SP+OFFSET. */
5933 if (GET_MODE_SIZE (mode) <= 4
5934 && XEXP (x, 0) != frame_pointer_rtx
5935 && XEXP (x, 1) != frame_pointer_rtx
5936 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5937 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5938 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5939 return 1;
5941 /* REG+const has 5-7 bit offset for non-SP registers. */
5942 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5943 || XEXP (x, 0) == arg_pointer_rtx)
5944 && GET_CODE (XEXP (x, 1)) == CONST_INT
5945 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5946 return 1;
5948 /* REG+const has 10-bit offset for SP, but only SImode and
5949 larger is supported. */
5950 /* ??? Should probably check for DI/DFmode overflow here
5951 just like GO_IF_LEGITIMATE_OFFSET does. */
5952 else if (GET_CODE (XEXP (x, 0)) == REG
5953 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5954 && GET_MODE_SIZE (mode) >= 4
5955 && GET_CODE (XEXP (x, 1)) == CONST_INT
5956 && INTVAL (XEXP (x, 1)) >= 0
5957 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5958 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5959 return 1;
5961 else if (GET_CODE (XEXP (x, 0)) == REG
5962 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5963 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5964 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5965 && REGNO (XEXP (x, 0))
5966 <= LAST_VIRTUAL_POINTER_REGISTER))
5967 && GET_MODE_SIZE (mode) >= 4
5968 && GET_CODE (XEXP (x, 1)) == CONST_INT
5969 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5970 return 1;
5973 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5974 && GET_MODE_SIZE (mode) == 4
5975 && GET_CODE (x) == SYMBOL_REF
5976 && CONSTANT_POOL_ADDRESS_P (x)
5977 && ! (flag_pic
5978 && symbol_mentioned_p (get_pool_constant (x))
5979 && ! pcrel_constant_p (get_pool_constant (x))))
5980 return 1;
5982 return 0;
5985 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5986 instruction of mode MODE. */
5988 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5990 switch (GET_MODE_SIZE (mode))
5992 case 1:
5993 return val >= 0 && val < 32;
5995 case 2:
5996 return val >= 0 && val < 64 && (val & 1) == 0;
5998 default:
5999 return (val >= 0
6000 && (val + GET_MODE_SIZE (mode)) <= 128
6001 && (val & 3) == 0);
6005 bool
6006 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6008 if (TARGET_ARM)
6009 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6010 else if (TARGET_THUMB2)
6011 return thumb2_legitimate_address_p (mode, x, strict_p);
6012 else /* if (TARGET_THUMB1) */
6013 return thumb1_legitimate_address_p (mode, x, strict_p);
6016 /* Build the SYMBOL_REF for __tls_get_addr. */
6018 static GTY(()) rtx tls_get_addr_libfunc;
6020 static rtx
6021 get_tls_get_addr (void)
6023 if (!tls_get_addr_libfunc)
6024 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6025 return tls_get_addr_libfunc;
6028 static rtx
6029 arm_load_tp (rtx target)
6031 if (!target)
6032 target = gen_reg_rtx (SImode);
6034 if (TARGET_HARD_TP)
6036 /* Can return in any reg. */
6037 emit_insn (gen_load_tp_hard (target));
6039 else
6041 /* Always returned in r0. Immediately copy the result into a pseudo,
6042 otherwise other uses of r0 (e.g. setting up function arguments) may
6043 clobber the value. */
6045 rtx tmp;
6047 emit_insn (gen_load_tp_soft ());
6049 tmp = gen_rtx_REG (SImode, 0);
6050 emit_move_insn (target, tmp);
6052 return target;
6055 static rtx
6056 load_tls_operand (rtx x, rtx reg)
6058 rtx tmp;
6060 if (reg == NULL_RTX)
6061 reg = gen_reg_rtx (SImode);
6063 tmp = gen_rtx_CONST (SImode, x);
6065 emit_move_insn (reg, tmp);
6067 return reg;
6070 static rtx
6071 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6073 rtx insns, label, labelno, sum;
6075 gcc_assert (reloc != TLS_DESCSEQ);
6076 start_sequence ();
6078 labelno = GEN_INT (pic_labelno++);
6079 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6080 label = gen_rtx_CONST (VOIDmode, label);
6082 sum = gen_rtx_UNSPEC (Pmode,
6083 gen_rtvec (4, x, GEN_INT (reloc), label,
6084 GEN_INT (TARGET_ARM ? 8 : 4)),
6085 UNSPEC_TLS);
6086 reg = load_tls_operand (sum, reg);
6088 if (TARGET_ARM)
6089 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6090 else
6091 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6093 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6094 LCT_PURE, /* LCT_CONST? */
6095 Pmode, 1, reg, Pmode);
6097 insns = get_insns ();
6098 end_sequence ();
6100 return insns;
6103 static rtx
6104 arm_tls_descseq_addr (rtx x, rtx reg)
6106 rtx labelno = GEN_INT (pic_labelno++);
6107 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6108 rtx sum = gen_rtx_UNSPEC (Pmode,
6109 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6110 gen_rtx_CONST (VOIDmode, label),
6111 GEN_INT (!TARGET_ARM)),
6112 UNSPEC_TLS);
6113 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6115 emit_insn (gen_tlscall (x, labelno));
6116 if (!reg)
6117 reg = gen_reg_rtx (SImode);
6118 else
6119 gcc_assert (REGNO (reg) != 0);
6121 emit_move_insn (reg, reg0);
6123 return reg;
6127 legitimize_tls_address (rtx x, rtx reg)
6129 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6130 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6132 switch (model)
6134 case TLS_MODEL_GLOBAL_DYNAMIC:
6135 if (TARGET_GNU2_TLS)
6137 reg = arm_tls_descseq_addr (x, reg);
6139 tp = arm_load_tp (NULL_RTX);
6141 dest = gen_rtx_PLUS (Pmode, tp, reg);
6143 else
6145 /* Original scheme */
6146 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6147 dest = gen_reg_rtx (Pmode);
6148 emit_libcall_block (insns, dest, ret, x);
6150 return dest;
6152 case TLS_MODEL_LOCAL_DYNAMIC:
6153 if (TARGET_GNU2_TLS)
6155 reg = arm_tls_descseq_addr (x, reg);
6157 tp = arm_load_tp (NULL_RTX);
6159 dest = gen_rtx_PLUS (Pmode, tp, reg);
6161 else
6163 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6165 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6166 share the LDM result with other LD model accesses. */
6167 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6168 UNSPEC_TLS);
6169 dest = gen_reg_rtx (Pmode);
6170 emit_libcall_block (insns, dest, ret, eqv);
6172 /* Load the addend. */
6173 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6174 GEN_INT (TLS_LDO32)),
6175 UNSPEC_TLS);
6176 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6177 dest = gen_rtx_PLUS (Pmode, dest, addend);
6179 return dest;
6181 case TLS_MODEL_INITIAL_EXEC:
6182 labelno = GEN_INT (pic_labelno++);
6183 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6184 label = gen_rtx_CONST (VOIDmode, label);
6185 sum = gen_rtx_UNSPEC (Pmode,
6186 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6187 GEN_INT (TARGET_ARM ? 8 : 4)),
6188 UNSPEC_TLS);
6189 reg = load_tls_operand (sum, reg);
6191 if (TARGET_ARM)
6192 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6193 else if (TARGET_THUMB2)
6194 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6195 else
6197 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6198 emit_move_insn (reg, gen_const_mem (SImode, reg));
6201 tp = arm_load_tp (NULL_RTX);
6203 return gen_rtx_PLUS (Pmode, tp, reg);
6205 case TLS_MODEL_LOCAL_EXEC:
6206 tp = arm_load_tp (NULL_RTX);
6208 reg = gen_rtx_UNSPEC (Pmode,
6209 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6210 UNSPEC_TLS);
6211 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6213 return gen_rtx_PLUS (Pmode, tp, reg);
6215 default:
6216 abort ();
6220 /* Try machine-dependent ways of modifying an illegitimate address
6221 to be legitimate. If we find one, return the new, valid address. */
6223 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6225 if (!TARGET_ARM)
6227 /* TODO: legitimize_address for Thumb2. */
6228 if (TARGET_THUMB2)
6229 return x;
6230 return thumb_legitimize_address (x, orig_x, mode);
6233 if (arm_tls_symbol_p (x))
6234 return legitimize_tls_address (x, NULL_RTX);
6236 if (GET_CODE (x) == PLUS)
6238 rtx xop0 = XEXP (x, 0);
6239 rtx xop1 = XEXP (x, 1);
6241 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6242 xop0 = force_reg (SImode, xop0);
6244 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6245 xop1 = force_reg (SImode, xop1);
6247 if (ARM_BASE_REGISTER_RTX_P (xop0)
6248 && GET_CODE (xop1) == CONST_INT)
6250 HOST_WIDE_INT n, low_n;
6251 rtx base_reg, val;
6252 n = INTVAL (xop1);
6254 /* VFP addressing modes actually allow greater offsets, but for
6255 now we just stick with the lowest common denominator. */
6256 if (mode == DImode
6257 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6259 low_n = n & 0x0f;
6260 n &= ~0x0f;
6261 if (low_n > 4)
6263 n += 16;
6264 low_n -= 16;
6267 else
6269 low_n = ((mode) == TImode ? 0
6270 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6271 n -= low_n;
6274 base_reg = gen_reg_rtx (SImode);
6275 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6276 emit_move_insn (base_reg, val);
6277 x = plus_constant (base_reg, low_n);
6279 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6280 x = gen_rtx_PLUS (SImode, xop0, xop1);
6283 /* XXX We don't allow MINUS any more -- see comment in
6284 arm_legitimate_address_outer_p (). */
6285 else if (GET_CODE (x) == MINUS)
6287 rtx xop0 = XEXP (x, 0);
6288 rtx xop1 = XEXP (x, 1);
6290 if (CONSTANT_P (xop0))
6291 xop0 = force_reg (SImode, xop0);
6293 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6294 xop1 = force_reg (SImode, xop1);
6296 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6297 x = gen_rtx_MINUS (SImode, xop0, xop1);
6300 /* Make sure to take full advantage of the pre-indexed addressing mode
6301 with absolute addresses which often allows for the base register to
6302 be factorized for multiple adjacent memory references, and it might
6303 even allows for the mini pool to be avoided entirely. */
6304 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6306 unsigned int bits;
6307 HOST_WIDE_INT mask, base, index;
6308 rtx base_reg;
6310 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6311 use a 8-bit index. So let's use a 12-bit index for SImode only and
6312 hope that arm_gen_constant will enable ldrb to use more bits. */
6313 bits = (mode == SImode) ? 12 : 8;
6314 mask = (1 << bits) - 1;
6315 base = INTVAL (x) & ~mask;
6316 index = INTVAL (x) & mask;
6317 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6319 /* It'll most probably be more efficient to generate the base
6320 with more bits set and use a negative index instead. */
6321 base |= mask;
6322 index -= mask;
6324 base_reg = force_reg (SImode, GEN_INT (base));
6325 x = plus_constant (base_reg, index);
6328 if (flag_pic)
6330 /* We need to find and carefully transform any SYMBOL and LABEL
6331 references; so go back to the original address expression. */
6332 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6334 if (new_x != orig_x)
6335 x = new_x;
6338 return x;
6342 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6343 to be legitimate. If we find one, return the new, valid address. */
6345 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6347 if (arm_tls_symbol_p (x))
6348 return legitimize_tls_address (x, NULL_RTX);
6350 if (GET_CODE (x) == PLUS
6351 && GET_CODE (XEXP (x, 1)) == CONST_INT
6352 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6353 || INTVAL (XEXP (x, 1)) < 0))
6355 rtx xop0 = XEXP (x, 0);
6356 rtx xop1 = XEXP (x, 1);
6357 HOST_WIDE_INT offset = INTVAL (xop1);
6359 /* Try and fold the offset into a biasing of the base register and
6360 then offsetting that. Don't do this when optimizing for space
6361 since it can cause too many CSEs. */
6362 if (optimize_size && offset >= 0
6363 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6365 HOST_WIDE_INT delta;
6367 if (offset >= 256)
6368 delta = offset - (256 - GET_MODE_SIZE (mode));
6369 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6370 delta = 31 * GET_MODE_SIZE (mode);
6371 else
6372 delta = offset & (~31 * GET_MODE_SIZE (mode));
6374 xop0 = force_operand (plus_constant (xop0, offset - delta),
6375 NULL_RTX);
6376 x = plus_constant (xop0, delta);
6378 else if (offset < 0 && offset > -256)
6379 /* Small negative offsets are best done with a subtract before the
6380 dereference, forcing these into a register normally takes two
6381 instructions. */
6382 x = force_operand (x, NULL_RTX);
6383 else
6385 /* For the remaining cases, force the constant into a register. */
6386 xop1 = force_reg (SImode, xop1);
6387 x = gen_rtx_PLUS (SImode, xop0, xop1);
6390 else if (GET_CODE (x) == PLUS
6391 && s_register_operand (XEXP (x, 1), SImode)
6392 && !s_register_operand (XEXP (x, 0), SImode))
6394 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6396 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6399 if (flag_pic)
6401 /* We need to find and carefully transform any SYMBOL and LABEL
6402 references; so go back to the original address expression. */
6403 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6405 if (new_x != orig_x)
6406 x = new_x;
6409 return x;
6412 bool
6413 arm_legitimize_reload_address (rtx *p,
6414 enum machine_mode mode,
6415 int opnum, int type,
6416 int ind_levels ATTRIBUTE_UNUSED)
6418 if (GET_CODE (*p) == PLUS
6419 && GET_CODE (XEXP (*p, 0)) == REG
6420 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6421 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6423 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6424 HOST_WIDE_INT low, high;
6426 /* Detect coprocessor load/stores. */
6427 bool coproc_p = ((TARGET_HARD_FLOAT
6428 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6429 && (mode == SFmode || mode == DFmode
6430 || (mode == DImode && TARGET_MAVERICK)))
6431 || (TARGET_REALLY_IWMMXT
6432 && VALID_IWMMXT_REG_MODE (mode))
6433 || (TARGET_NEON
6434 && (VALID_NEON_DREG_MODE (mode)
6435 || VALID_NEON_QREG_MODE (mode))));
6437 /* For some conditions, bail out when lower two bits are unaligned. */
6438 if ((val & 0x3) != 0
6439 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6440 && (coproc_p
6441 /* For DI, and DF under soft-float: */
6442 || ((mode == DImode || mode == DFmode)
6443 /* Without ldrd, we use stm/ldm, which does not
6444 fair well with unaligned bits. */
6445 && (! TARGET_LDRD
6446 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6447 || TARGET_THUMB2))))
6448 return false;
6450 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6451 of which the (reg+high) gets turned into a reload add insn,
6452 we try to decompose the index into high/low values that can often
6453 also lead to better reload CSE.
6454 For example:
6455 ldr r0, [r2, #4100] // Offset too large
6456 ldr r1, [r2, #4104] // Offset too large
6458 is best reloaded as:
6459 add t1, r2, #4096
6460 ldr r0, [t1, #4]
6461 add t2, r2, #4096
6462 ldr r1, [t2, #8]
6464 which post-reload CSE can simplify in most cases to eliminate the
6465 second add instruction:
6466 add t1, r2, #4096
6467 ldr r0, [t1, #4]
6468 ldr r1, [t1, #8]
6470 The idea here is that we want to split out the bits of the constant
6471 as a mask, rather than as subtracting the maximum offset that the
6472 respective type of load/store used can handle.
6474 When encountering negative offsets, we can still utilize it even if
6475 the overall offset is positive; sometimes this may lead to an immediate
6476 that can be constructed with fewer instructions.
6477 For example:
6478 ldr r0, [r2, #0x3FFFFC]
6480 This is best reloaded as:
6481 add t1, r2, #0x400000
6482 ldr r0, [t1, #-4]
6484 The trick for spotting this for a load insn with N bits of offset
6485 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6486 negative offset that is going to make bit N and all the bits below
6487 it become zero in the remainder part.
6489 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6490 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6491 used in most cases of ARM load/store instructions. */
6493 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6494 (((VAL) & ((1 << (N)) - 1)) \
6495 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6496 : 0)
6498 if (coproc_p)
6500 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6502 /* NEON quad-word load/stores are made of two double-word accesses,
6503 so the valid index range is reduced by 8. Treat as 9-bit range if
6504 we go over it. */
6505 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6506 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6508 else if (GET_MODE_SIZE (mode) == 8)
6510 if (TARGET_LDRD)
6511 low = (TARGET_THUMB2
6512 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6513 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6514 else
6515 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6516 to access doublewords. The supported load/store offsets are
6517 -8, -4, and 4, which we try to produce here. */
6518 low = ((val & 0xf) ^ 0x8) - 0x8;
6520 else if (GET_MODE_SIZE (mode) < 8)
6522 /* NEON element load/stores do not have an offset. */
6523 if (TARGET_NEON_FP16 && mode == HFmode)
6524 return false;
6526 if (TARGET_THUMB2)
6528 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6529 Try the wider 12-bit range first, and re-try if the result
6530 is out of range. */
6531 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6532 if (low < -255)
6533 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6535 else
6537 if (mode == HImode || mode == HFmode)
6539 if (arm_arch4)
6540 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6541 else
6543 /* The storehi/movhi_bytes fallbacks can use only
6544 [-4094,+4094] of the full ldrb/strb index range. */
6545 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6546 if (low == 4095 || low == -4095)
6547 return false;
6550 else
6551 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6554 else
6555 return false;
6557 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6558 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6559 - (unsigned HOST_WIDE_INT) 0x80000000);
6560 /* Check for overflow or zero */
6561 if (low == 0 || high == 0 || (high + low != val))
6562 return false;
6564 /* Reload the high part into a base reg; leave the low part
6565 in the mem. */
6566 *p = gen_rtx_PLUS (GET_MODE (*p),
6567 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6568 GEN_INT (high)),
6569 GEN_INT (low));
6570 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6571 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6572 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6573 return true;
6576 return false;
6580 thumb_legitimize_reload_address (rtx *x_p,
6581 enum machine_mode mode,
6582 int opnum, int type,
6583 int ind_levels ATTRIBUTE_UNUSED)
6585 rtx x = *x_p;
6587 if (GET_CODE (x) == PLUS
6588 && GET_MODE_SIZE (mode) < 4
6589 && REG_P (XEXP (x, 0))
6590 && XEXP (x, 0) == stack_pointer_rtx
6591 && GET_CODE (XEXP (x, 1)) == CONST_INT
6592 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6594 rtx orig_x = x;
6596 x = copy_rtx (x);
6597 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6598 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6599 return x;
6602 /* If both registers are hi-regs, then it's better to reload the
6603 entire expression rather than each register individually. That
6604 only requires one reload register rather than two. */
6605 if (GET_CODE (x) == PLUS
6606 && REG_P (XEXP (x, 0))
6607 && REG_P (XEXP (x, 1))
6608 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6609 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6611 rtx orig_x = x;
6613 x = copy_rtx (x);
6614 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6615 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6616 return x;
6619 return NULL;
6622 /* Test for various thread-local symbols. */
6624 /* Return TRUE if X is a thread-local symbol. */
6626 static bool
6627 arm_tls_symbol_p (rtx x)
6629 if (! TARGET_HAVE_TLS)
6630 return false;
6632 if (GET_CODE (x) != SYMBOL_REF)
6633 return false;
6635 return SYMBOL_REF_TLS_MODEL (x) != 0;
6638 /* Helper for arm_tls_referenced_p. */
6640 static int
6641 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6643 if (GET_CODE (*x) == SYMBOL_REF)
6644 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6646 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6647 TLS offsets, not real symbol references. */
6648 if (GET_CODE (*x) == UNSPEC
6649 && XINT (*x, 1) == UNSPEC_TLS)
6650 return -1;
6652 return 0;
6655 /* Return TRUE if X contains any TLS symbol references. */
6657 bool
6658 arm_tls_referenced_p (rtx x)
6660 if (! TARGET_HAVE_TLS)
6661 return false;
6663 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6666 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6668 On the ARM, allow any integer (invalid ones are removed later by insn
6669 patterns), nice doubles and symbol_refs which refer to the function's
6670 constant pool XXX.
6672 When generating pic allow anything. */
6674 static bool
6675 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6677 /* At present, we have no support for Neon structure constants, so forbid
6678 them here. It might be possible to handle simple cases like 0 and -1
6679 in future. */
6680 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6681 return false;
6683 return flag_pic || !label_mentioned_p (x);
6686 static bool
6687 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6689 return (GET_CODE (x) == CONST_INT
6690 || GET_CODE (x) == CONST_DOUBLE
6691 || CONSTANT_ADDRESS_P (x)
6692 || flag_pic);
6695 static bool
6696 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6698 return (!arm_cannot_force_const_mem (mode, x)
6699 && (TARGET_32BIT
6700 ? arm_legitimate_constant_p_1 (mode, x)
6701 : thumb_legitimate_constant_p (mode, x)));
6704 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6706 static bool
6707 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6709 rtx base, offset;
6711 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6713 split_const (x, &base, &offset);
6714 if (GET_CODE (base) == SYMBOL_REF
6715 && !offset_within_block_p (base, INTVAL (offset)))
6716 return true;
6718 return arm_tls_referenced_p (x);
6721 #define REG_OR_SUBREG_REG(X) \
6722 (GET_CODE (X) == REG \
6723 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6725 #define REG_OR_SUBREG_RTX(X) \
6726 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6728 static inline int
6729 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6731 enum machine_mode mode = GET_MODE (x);
6732 int total;
6734 switch (code)
6736 case ASHIFT:
6737 case ASHIFTRT:
6738 case LSHIFTRT:
6739 case ROTATERT:
6740 case PLUS:
6741 case MINUS:
6742 case COMPARE:
6743 case NEG:
6744 case NOT:
6745 return COSTS_N_INSNS (1);
6747 case MULT:
6748 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6750 int cycles = 0;
6751 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6753 while (i)
6755 i >>= 2;
6756 cycles++;
6758 return COSTS_N_INSNS (2) + cycles;
6760 return COSTS_N_INSNS (1) + 16;
6762 case SET:
6763 return (COSTS_N_INSNS (1)
6764 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6765 + GET_CODE (SET_DEST (x)) == MEM));
6767 case CONST_INT:
6768 if (outer == SET)
6770 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6771 return 0;
6772 if (thumb_shiftable_const (INTVAL (x)))
6773 return COSTS_N_INSNS (2);
6774 return COSTS_N_INSNS (3);
6776 else if ((outer == PLUS || outer == COMPARE)
6777 && INTVAL (x) < 256 && INTVAL (x) > -256)
6778 return 0;
6779 else if ((outer == IOR || outer == XOR || outer == AND)
6780 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6781 return COSTS_N_INSNS (1);
6782 else if (outer == AND)
6784 int i;
6785 /* This duplicates the tests in the andsi3 expander. */
6786 for (i = 9; i <= 31; i++)
6787 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6788 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6789 return COSTS_N_INSNS (2);
6791 else if (outer == ASHIFT || outer == ASHIFTRT
6792 || outer == LSHIFTRT)
6793 return 0;
6794 return COSTS_N_INSNS (2);
6796 case CONST:
6797 case CONST_DOUBLE:
6798 case LABEL_REF:
6799 case SYMBOL_REF:
6800 return COSTS_N_INSNS (3);
6802 case UDIV:
6803 case UMOD:
6804 case DIV:
6805 case MOD:
6806 return 100;
6808 case TRUNCATE:
6809 return 99;
6811 case AND:
6812 case XOR:
6813 case IOR:
6814 /* XXX guess. */
6815 return 8;
6817 case MEM:
6818 /* XXX another guess. */
6819 /* Memory costs quite a lot for the first word, but subsequent words
6820 load at the equivalent of a single insn each. */
6821 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6822 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6823 ? 4 : 0));
6825 case IF_THEN_ELSE:
6826 /* XXX a guess. */
6827 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6828 return 14;
6829 return 2;
6831 case SIGN_EXTEND:
6832 case ZERO_EXTEND:
6833 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6834 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6836 if (mode == SImode)
6837 return total;
6839 if (arm_arch6)
6840 return total + COSTS_N_INSNS (1);
6842 /* Assume a two-shift sequence. Increase the cost slightly so
6843 we prefer actual shifts over an extend operation. */
6844 return total + 1 + COSTS_N_INSNS (2);
6846 default:
6847 return 99;
6851 static inline bool
6852 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6854 enum machine_mode mode = GET_MODE (x);
6855 enum rtx_code subcode;
6856 rtx operand;
6857 enum rtx_code code = GET_CODE (x);
6858 *total = 0;
6860 switch (code)
6862 case MEM:
6863 /* Memory costs quite a lot for the first word, but subsequent words
6864 load at the equivalent of a single insn each. */
6865 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6866 return true;
6868 case DIV:
6869 case MOD:
6870 case UDIV:
6871 case UMOD:
6872 if (TARGET_HARD_FLOAT && mode == SFmode)
6873 *total = COSTS_N_INSNS (2);
6874 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6875 *total = COSTS_N_INSNS (4);
6876 else
6877 *total = COSTS_N_INSNS (20);
6878 return false;
6880 case ROTATE:
6881 if (GET_CODE (XEXP (x, 1)) == REG)
6882 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6883 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6884 *total = rtx_cost (XEXP (x, 1), code, speed);
6886 /* Fall through */
6887 case ROTATERT:
6888 if (mode != SImode)
6890 *total += COSTS_N_INSNS (4);
6891 return true;
6894 /* Fall through */
6895 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6896 *total += rtx_cost (XEXP (x, 0), code, speed);
6897 if (mode == DImode)
6899 *total += COSTS_N_INSNS (3);
6900 return true;
6903 *total += COSTS_N_INSNS (1);
6904 /* Increase the cost of complex shifts because they aren't any faster,
6905 and reduce dual issue opportunities. */
6906 if (arm_tune_cortex_a9
6907 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6908 ++*total;
6910 return true;
6912 case MINUS:
6913 if (mode == DImode)
6915 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6916 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6917 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6919 *total += rtx_cost (XEXP (x, 1), code, speed);
6920 return true;
6923 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6924 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6926 *total += rtx_cost (XEXP (x, 0), code, speed);
6927 return true;
6930 return false;
6933 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6935 if (TARGET_HARD_FLOAT
6936 && (mode == SFmode
6937 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6939 *total = COSTS_N_INSNS (1);
6940 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6941 && arm_const_double_rtx (XEXP (x, 0)))
6943 *total += rtx_cost (XEXP (x, 1), code, speed);
6944 return true;
6947 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6948 && arm_const_double_rtx (XEXP (x, 1)))
6950 *total += rtx_cost (XEXP (x, 0), code, speed);
6951 return true;
6954 return false;
6956 *total = COSTS_N_INSNS (20);
6957 return false;
6960 *total = COSTS_N_INSNS (1);
6961 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6962 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6964 *total += rtx_cost (XEXP (x, 1), code, speed);
6965 return true;
6968 subcode = GET_CODE (XEXP (x, 1));
6969 if (subcode == ASHIFT || subcode == ASHIFTRT
6970 || subcode == LSHIFTRT
6971 || subcode == ROTATE || subcode == ROTATERT)
6973 *total += rtx_cost (XEXP (x, 0), code, speed);
6974 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6975 return true;
6978 /* A shift as a part of RSB costs no more than RSB itself. */
6979 if (GET_CODE (XEXP (x, 0)) == MULT
6980 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6982 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6983 *total += rtx_cost (XEXP (x, 1), code, speed);
6984 return true;
6987 if (subcode == MULT
6988 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6990 *total += rtx_cost (XEXP (x, 0), code, speed);
6991 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6992 return true;
6995 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6996 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6998 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6999 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7000 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7001 *total += COSTS_N_INSNS (1);
7003 return true;
7006 /* Fall through */
7008 case PLUS:
7009 if (code == PLUS && arm_arch6 && mode == SImode
7010 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7011 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7013 *total = COSTS_N_INSNS (1);
7014 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7015 speed);
7016 *total += rtx_cost (XEXP (x, 1), code, speed);
7017 return true;
7020 /* MLA: All arguments must be registers. We filter out
7021 multiplication by a power of two, so that we fall down into
7022 the code below. */
7023 if (GET_CODE (XEXP (x, 0)) == MULT
7024 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7026 /* The cost comes from the cost of the multiply. */
7027 return false;
7030 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7032 if (TARGET_HARD_FLOAT
7033 && (mode == SFmode
7034 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7036 *total = COSTS_N_INSNS (1);
7037 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7038 && arm_const_double_rtx (XEXP (x, 1)))
7040 *total += rtx_cost (XEXP (x, 0), code, speed);
7041 return true;
7044 return false;
7047 *total = COSTS_N_INSNS (20);
7048 return false;
7051 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7052 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7054 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
7055 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7056 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7057 *total += COSTS_N_INSNS (1);
7058 return true;
7061 /* Fall through */
7063 case AND: case XOR: case IOR:
7065 /* Normally the frame registers will be spilt into reg+const during
7066 reload, so it is a bad idea to combine them with other instructions,
7067 since then they might not be moved outside of loops. As a compromise
7068 we allow integration with ops that have a constant as their second
7069 operand. */
7070 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7071 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7072 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7073 *total = COSTS_N_INSNS (1);
7075 if (mode == DImode)
7077 *total += COSTS_N_INSNS (2);
7078 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7079 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7081 *total += rtx_cost (XEXP (x, 0), code, speed);
7082 return true;
7085 return false;
7088 *total += COSTS_N_INSNS (1);
7089 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7090 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7092 *total += rtx_cost (XEXP (x, 0), code, speed);
7093 return true;
7095 subcode = GET_CODE (XEXP (x, 0));
7096 if (subcode == ASHIFT || subcode == ASHIFTRT
7097 || subcode == LSHIFTRT
7098 || subcode == ROTATE || subcode == ROTATERT)
7100 *total += rtx_cost (XEXP (x, 1), code, speed);
7101 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7102 return true;
7105 if (subcode == MULT
7106 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7108 *total += rtx_cost (XEXP (x, 1), code, speed);
7109 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7110 return true;
7113 if (subcode == UMIN || subcode == UMAX
7114 || subcode == SMIN || subcode == SMAX)
7116 *total = COSTS_N_INSNS (3);
7117 return true;
7120 return false;
7122 case MULT:
7123 /* This should have been handled by the CPU specific routines. */
7124 gcc_unreachable ();
7126 case TRUNCATE:
7127 if (arm_arch3m && mode == SImode
7128 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7129 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7130 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7131 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7132 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7133 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7135 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
7136 return true;
7138 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7139 return false;
7141 case NEG:
7142 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7144 if (TARGET_HARD_FLOAT
7145 && (mode == SFmode
7146 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7148 *total = COSTS_N_INSNS (1);
7149 return false;
7151 *total = COSTS_N_INSNS (2);
7152 return false;
7155 /* Fall through */
7156 case NOT:
7157 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7158 if (mode == SImode && code == NOT)
7160 subcode = GET_CODE (XEXP (x, 0));
7161 if (subcode == ASHIFT || subcode == ASHIFTRT
7162 || subcode == LSHIFTRT
7163 || subcode == ROTATE || subcode == ROTATERT
7164 || (subcode == MULT
7165 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7167 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7168 /* Register shifts cost an extra cycle. */
7169 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7170 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7171 subcode, speed);
7172 return true;
7176 return false;
7178 case IF_THEN_ELSE:
7179 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7181 *total = COSTS_N_INSNS (4);
7182 return true;
7185 operand = XEXP (x, 0);
7187 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7188 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7189 && GET_CODE (XEXP (operand, 0)) == REG
7190 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7191 *total += COSTS_N_INSNS (1);
7192 *total += (rtx_cost (XEXP (x, 1), code, speed)
7193 + rtx_cost (XEXP (x, 2), code, speed));
7194 return true;
7196 case NE:
7197 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7199 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7200 return true;
7202 goto scc_insn;
7204 case GE:
7205 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7206 && mode == SImode && XEXP (x, 1) == const0_rtx)
7208 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7209 return true;
7211 goto scc_insn;
7213 case LT:
7214 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7215 && mode == SImode && XEXP (x, 1) == const0_rtx)
7217 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7218 return true;
7220 goto scc_insn;
7222 case EQ:
7223 case GT:
7224 case LE:
7225 case GEU:
7226 case LTU:
7227 case GTU:
7228 case LEU:
7229 case UNORDERED:
7230 case ORDERED:
7231 case UNEQ:
7232 case UNGE:
7233 case UNLT:
7234 case UNGT:
7235 case UNLE:
7236 scc_insn:
7237 /* SCC insns. In the case where the comparison has already been
7238 performed, then they cost 2 instructions. Otherwise they need
7239 an additional comparison before them. */
7240 *total = COSTS_N_INSNS (2);
7241 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7243 return true;
7246 /* Fall through */
7247 case COMPARE:
7248 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7250 *total = 0;
7251 return true;
7254 *total += COSTS_N_INSNS (1);
7255 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7256 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7258 *total += rtx_cost (XEXP (x, 0), code, speed);
7259 return true;
7262 subcode = GET_CODE (XEXP (x, 0));
7263 if (subcode == ASHIFT || subcode == ASHIFTRT
7264 || subcode == LSHIFTRT
7265 || subcode == ROTATE || subcode == ROTATERT)
7267 *total += rtx_cost (XEXP (x, 1), code, speed);
7268 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7269 return true;
7272 if (subcode == MULT
7273 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7275 *total += rtx_cost (XEXP (x, 1), code, speed);
7276 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7277 return true;
7280 return false;
7282 case UMIN:
7283 case UMAX:
7284 case SMIN:
7285 case SMAX:
7286 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7287 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7288 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7289 *total += rtx_cost (XEXP (x, 1), code, speed);
7290 return true;
7292 case ABS:
7293 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7295 if (TARGET_HARD_FLOAT
7296 && (mode == SFmode
7297 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7299 *total = COSTS_N_INSNS (1);
7300 return false;
7302 *total = COSTS_N_INSNS (20);
7303 return false;
7305 *total = COSTS_N_INSNS (1);
7306 if (mode == DImode)
7307 *total += COSTS_N_INSNS (3);
7308 return false;
7310 case SIGN_EXTEND:
7311 case ZERO_EXTEND:
7312 *total = 0;
7313 if (GET_MODE_CLASS (mode) == MODE_INT)
7315 rtx op = XEXP (x, 0);
7316 enum machine_mode opmode = GET_MODE (op);
7318 if (mode == DImode)
7319 *total += COSTS_N_INSNS (1);
7321 if (opmode != SImode)
7323 if (MEM_P (op))
7325 /* If !arm_arch4, we use one of the extendhisi2_mem
7326 or movhi_bytes patterns for HImode. For a QImode
7327 sign extension, we first zero-extend from memory
7328 and then perform a shift sequence. */
7329 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7330 *total += COSTS_N_INSNS (2);
7332 else if (arm_arch6)
7333 *total += COSTS_N_INSNS (1);
7335 /* We don't have the necessary insn, so we need to perform some
7336 other operation. */
7337 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7338 /* An and with constant 255. */
7339 *total += COSTS_N_INSNS (1);
7340 else
7341 /* A shift sequence. Increase costs slightly to avoid
7342 combining two shifts into an extend operation. */
7343 *total += COSTS_N_INSNS (2) + 1;
7346 return false;
7349 switch (GET_MODE (XEXP (x, 0)))
7351 case V8QImode:
7352 case V4HImode:
7353 case V2SImode:
7354 case V4QImode:
7355 case V2HImode:
7356 *total = COSTS_N_INSNS (1);
7357 return false;
7359 default:
7360 gcc_unreachable ();
7362 gcc_unreachable ();
7364 case ZERO_EXTRACT:
7365 case SIGN_EXTRACT:
7366 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7367 return true;
7369 case CONST_INT:
7370 if (const_ok_for_arm (INTVAL (x))
7371 || const_ok_for_arm (~INTVAL (x)))
7372 *total = COSTS_N_INSNS (1);
7373 else
7374 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7375 INTVAL (x), NULL_RTX,
7376 NULL_RTX, 0, 0));
7377 return true;
7379 case CONST:
7380 case LABEL_REF:
7381 case SYMBOL_REF:
7382 *total = COSTS_N_INSNS (3);
7383 return true;
7385 case HIGH:
7386 *total = COSTS_N_INSNS (1);
7387 return true;
7389 case LO_SUM:
7390 *total = COSTS_N_INSNS (1);
7391 *total += rtx_cost (XEXP (x, 0), code, speed);
7392 return true;
7394 case CONST_DOUBLE:
7395 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7396 && (mode == SFmode || !TARGET_VFP_SINGLE))
7397 *total = COSTS_N_INSNS (1);
7398 else
7399 *total = COSTS_N_INSNS (4);
7400 return true;
7402 default:
7403 *total = COSTS_N_INSNS (4);
7404 return false;
7408 /* Estimates the size cost of thumb1 instructions.
7409 For now most of the code is copied from thumb1_rtx_costs. We need more
7410 fine grain tuning when we have more related test cases. */
7411 static inline int
7412 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7414 enum machine_mode mode = GET_MODE (x);
7416 switch (code)
7418 case ASHIFT:
7419 case ASHIFTRT:
7420 case LSHIFTRT:
7421 case ROTATERT:
7422 case PLUS:
7423 case MINUS:
7424 case COMPARE:
7425 case NEG:
7426 case NOT:
7427 return COSTS_N_INSNS (1);
7429 case MULT:
7430 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7432 /* Thumb1 mul instruction can't operate on const. We must Load it
7433 into a register first. */
7434 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7435 return COSTS_N_INSNS (1) + const_size;
7437 return COSTS_N_INSNS (1);
7439 case SET:
7440 return (COSTS_N_INSNS (1)
7441 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7442 + GET_CODE (SET_DEST (x)) == MEM));
7444 case CONST_INT:
7445 if (outer == SET)
7447 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7448 return COSTS_N_INSNS (1);
7449 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7450 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7451 return COSTS_N_INSNS (2);
7452 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7453 if (thumb_shiftable_const (INTVAL (x)))
7454 return COSTS_N_INSNS (2);
7455 return COSTS_N_INSNS (3);
7457 else if ((outer == PLUS || outer == COMPARE)
7458 && INTVAL (x) < 256 && INTVAL (x) > -256)
7459 return 0;
7460 else if ((outer == IOR || outer == XOR || outer == AND)
7461 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7462 return COSTS_N_INSNS (1);
7463 else if (outer == AND)
7465 int i;
7466 /* This duplicates the tests in the andsi3 expander. */
7467 for (i = 9; i <= 31; i++)
7468 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7469 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7470 return COSTS_N_INSNS (2);
7472 else if (outer == ASHIFT || outer == ASHIFTRT
7473 || outer == LSHIFTRT)
7474 return 0;
7475 return COSTS_N_INSNS (2);
7477 case CONST:
7478 case CONST_DOUBLE:
7479 case LABEL_REF:
7480 case SYMBOL_REF:
7481 return COSTS_N_INSNS (3);
7483 case UDIV:
7484 case UMOD:
7485 case DIV:
7486 case MOD:
7487 return 100;
7489 case TRUNCATE:
7490 return 99;
7492 case AND:
7493 case XOR:
7494 case IOR:
7495 /* XXX guess. */
7496 return 8;
7498 case MEM:
7499 /* XXX another guess. */
7500 /* Memory costs quite a lot for the first word, but subsequent words
7501 load at the equivalent of a single insn each. */
7502 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7503 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7504 ? 4 : 0));
7506 case IF_THEN_ELSE:
7507 /* XXX a guess. */
7508 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7509 return 14;
7510 return 2;
7512 case ZERO_EXTEND:
7513 /* XXX still guessing. */
7514 switch (GET_MODE (XEXP (x, 0)))
7516 case QImode:
7517 return (1 + (mode == DImode ? 4 : 0)
7518 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7520 case HImode:
7521 return (4 + (mode == DImode ? 4 : 0)
7522 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7524 case SImode:
7525 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7527 default:
7528 return 99;
7531 default:
7532 return 99;
7536 /* RTX costs when optimizing for size. */
7537 static bool
7538 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7539 int *total)
7541 enum machine_mode mode = GET_MODE (x);
7542 if (TARGET_THUMB1)
7544 *total = thumb1_size_rtx_costs (x, code, outer_code);
7545 return true;
7548 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7549 switch (code)
7551 case MEM:
7552 /* A memory access costs 1 insn if the mode is small, or the address is
7553 a single register, otherwise it costs one insn per word. */
7554 if (REG_P (XEXP (x, 0)))
7555 *total = COSTS_N_INSNS (1);
7556 else if (flag_pic
7557 && GET_CODE (XEXP (x, 0)) == PLUS
7558 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7559 /* This will be split into two instructions.
7560 See arm.md:calculate_pic_address. */
7561 *total = COSTS_N_INSNS (2);
7562 else
7563 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7564 return true;
7566 case DIV:
7567 case MOD:
7568 case UDIV:
7569 case UMOD:
7570 /* Needs a libcall, so it costs about this. */
7571 *total = COSTS_N_INSNS (2);
7572 return false;
7574 case ROTATE:
7575 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7577 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7578 return true;
7580 /* Fall through */
7581 case ROTATERT:
7582 case ASHIFT:
7583 case LSHIFTRT:
7584 case ASHIFTRT:
7585 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7587 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7588 return true;
7590 else if (mode == SImode)
7592 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7593 /* Slightly disparage register shifts, but not by much. */
7594 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7595 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7596 return true;
7599 /* Needs a libcall. */
7600 *total = COSTS_N_INSNS (2);
7601 return false;
7603 case MINUS:
7604 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7605 && (mode == SFmode || !TARGET_VFP_SINGLE))
7607 *total = COSTS_N_INSNS (1);
7608 return false;
7611 if (mode == SImode)
7613 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7614 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7616 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7617 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7618 || subcode1 == ROTATE || subcode1 == ROTATERT
7619 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7620 || subcode1 == ASHIFTRT)
7622 /* It's just the cost of the two operands. */
7623 *total = 0;
7624 return false;
7627 *total = COSTS_N_INSNS (1);
7628 return false;
7631 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7632 return false;
7634 case PLUS:
7635 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7636 && (mode == SFmode || !TARGET_VFP_SINGLE))
7638 *total = COSTS_N_INSNS (1);
7639 return false;
7642 /* A shift as a part of ADD costs nothing. */
7643 if (GET_CODE (XEXP (x, 0)) == MULT
7644 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7646 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7647 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7648 *total += rtx_cost (XEXP (x, 1), code, false);
7649 return true;
7652 /* Fall through */
7653 case AND: case XOR: case IOR:
7654 if (mode == SImode)
7656 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7658 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7659 || subcode == LSHIFTRT || subcode == ASHIFTRT
7660 || (code == AND && subcode == NOT))
7662 /* It's just the cost of the two operands. */
7663 *total = 0;
7664 return false;
7668 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7669 return false;
7671 case MULT:
7672 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7673 return false;
7675 case NEG:
7676 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7677 && (mode == SFmode || !TARGET_VFP_SINGLE))
7679 *total = COSTS_N_INSNS (1);
7680 return false;
7683 /* Fall through */
7684 case NOT:
7685 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7687 return false;
7689 case IF_THEN_ELSE:
7690 *total = 0;
7691 return false;
7693 case COMPARE:
7694 if (cc_register (XEXP (x, 0), VOIDmode))
7695 * total = 0;
7696 else
7697 *total = COSTS_N_INSNS (1);
7698 return false;
7700 case ABS:
7701 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7702 && (mode == SFmode || !TARGET_VFP_SINGLE))
7703 *total = COSTS_N_INSNS (1);
7704 else
7705 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7706 return false;
7708 case SIGN_EXTEND:
7709 case ZERO_EXTEND:
7710 return arm_rtx_costs_1 (x, outer_code, total, 0);
7712 case CONST_INT:
7713 if (const_ok_for_arm (INTVAL (x)))
7714 /* A multiplication by a constant requires another instruction
7715 to load the constant to a register. */
7716 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7717 ? 1 : 0);
7718 else if (const_ok_for_arm (~INTVAL (x)))
7719 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7720 else if (const_ok_for_arm (-INTVAL (x)))
7722 if (outer_code == COMPARE || outer_code == PLUS
7723 || outer_code == MINUS)
7724 *total = 0;
7725 else
7726 *total = COSTS_N_INSNS (1);
7728 else
7729 *total = COSTS_N_INSNS (2);
7730 return true;
7732 case CONST:
7733 case LABEL_REF:
7734 case SYMBOL_REF:
7735 *total = COSTS_N_INSNS (2);
7736 return true;
7738 case CONST_DOUBLE:
7739 *total = COSTS_N_INSNS (4);
7740 return true;
7742 case HIGH:
7743 case LO_SUM:
7744 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7745 cost of these slightly. */
7746 *total = COSTS_N_INSNS (1) + 1;
7747 return true;
7749 default:
7750 if (mode != VOIDmode)
7751 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7752 else
7753 *total = COSTS_N_INSNS (4); /* How knows? */
7754 return false;
7758 /* RTX costs when optimizing for size. */
7759 static bool
7760 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7761 bool speed)
7763 if (!speed)
7764 return arm_size_rtx_costs (x, (enum rtx_code) code,
7765 (enum rtx_code) outer_code, total);
7766 else
7767 return current_tune->rtx_costs (x, (enum rtx_code) code,
7768 (enum rtx_code) outer_code,
7769 total, speed);
7772 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7773 supported on any "slowmul" cores, so it can be ignored. */
7775 static bool
7776 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7777 int *total, bool speed)
7779 enum machine_mode mode = GET_MODE (x);
7781 if (TARGET_THUMB)
7783 *total = thumb1_rtx_costs (x, code, outer_code);
7784 return true;
7787 switch (code)
7789 case MULT:
7790 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7791 || mode == DImode)
7793 *total = COSTS_N_INSNS (20);
7794 return false;
7797 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7799 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7800 & (unsigned HOST_WIDE_INT) 0xffffffff);
7801 int cost, const_ok = const_ok_for_arm (i);
7802 int j, booth_unit_size;
7804 /* Tune as appropriate. */
7805 cost = const_ok ? 4 : 8;
7806 booth_unit_size = 2;
7807 for (j = 0; i && j < 32; j += booth_unit_size)
7809 i >>= booth_unit_size;
7810 cost++;
7813 *total = COSTS_N_INSNS (cost);
7814 *total += rtx_cost (XEXP (x, 0), code, speed);
7815 return true;
7818 *total = COSTS_N_INSNS (20);
7819 return false;
7821 default:
7822 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7827 /* RTX cost for cores with a fast multiply unit (M variants). */
7829 static bool
7830 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7831 int *total, bool speed)
7833 enum machine_mode mode = GET_MODE (x);
7835 if (TARGET_THUMB1)
7837 *total = thumb1_rtx_costs (x, code, outer_code);
7838 return true;
7841 /* ??? should thumb2 use different costs? */
7842 switch (code)
7844 case MULT:
7845 /* There is no point basing this on the tuning, since it is always the
7846 fast variant if it exists at all. */
7847 if (mode == DImode
7848 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7849 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7850 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7852 *total = COSTS_N_INSNS(2);
7853 return false;
7857 if (mode == DImode)
7859 *total = COSTS_N_INSNS (5);
7860 return false;
7863 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7865 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7866 & (unsigned HOST_WIDE_INT) 0xffffffff);
7867 int cost, const_ok = const_ok_for_arm (i);
7868 int j, booth_unit_size;
7870 /* Tune as appropriate. */
7871 cost = const_ok ? 4 : 8;
7872 booth_unit_size = 8;
7873 for (j = 0; i && j < 32; j += booth_unit_size)
7875 i >>= booth_unit_size;
7876 cost++;
7879 *total = COSTS_N_INSNS(cost);
7880 return false;
7883 if (mode == SImode)
7885 *total = COSTS_N_INSNS (4);
7886 return false;
7889 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7891 if (TARGET_HARD_FLOAT
7892 && (mode == SFmode
7893 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7895 *total = COSTS_N_INSNS (1);
7896 return false;
7900 /* Requires a lib call */
7901 *total = COSTS_N_INSNS (20);
7902 return false;
7904 default:
7905 return arm_rtx_costs_1 (x, outer_code, total, speed);
7910 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7911 so it can be ignored. */
7913 static bool
7914 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7915 int *total, bool speed)
7917 enum machine_mode mode = GET_MODE (x);
7919 if (TARGET_THUMB)
7921 *total = thumb1_rtx_costs (x, code, outer_code);
7922 return true;
7925 switch (code)
7927 case COMPARE:
7928 if (GET_CODE (XEXP (x, 0)) != MULT)
7929 return arm_rtx_costs_1 (x, outer_code, total, speed);
7931 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7932 will stall until the multiplication is complete. */
7933 *total = COSTS_N_INSNS (3);
7934 return false;
7936 case MULT:
7937 /* There is no point basing this on the tuning, since it is always the
7938 fast variant if it exists at all. */
7939 if (mode == DImode
7940 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7941 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7942 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7944 *total = COSTS_N_INSNS (2);
7945 return false;
7949 if (mode == DImode)
7951 *total = COSTS_N_INSNS (5);
7952 return false;
7955 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7957 /* If operand 1 is a constant we can more accurately
7958 calculate the cost of the multiply. The multiplier can
7959 retire 15 bits on the first cycle and a further 12 on the
7960 second. We do, of course, have to load the constant into
7961 a register first. */
7962 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7963 /* There's a general overhead of one cycle. */
7964 int cost = 1;
7965 unsigned HOST_WIDE_INT masked_const;
7967 if (i & 0x80000000)
7968 i = ~i;
7970 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7972 masked_const = i & 0xffff8000;
7973 if (masked_const != 0)
7975 cost++;
7976 masked_const = i & 0xf8000000;
7977 if (masked_const != 0)
7978 cost++;
7980 *total = COSTS_N_INSNS (cost);
7981 return false;
7984 if (mode == SImode)
7986 *total = COSTS_N_INSNS (3);
7987 return false;
7990 /* Requires a lib call */
7991 *total = COSTS_N_INSNS (20);
7992 return false;
7994 default:
7995 return arm_rtx_costs_1 (x, outer_code, total, speed);
8000 /* RTX costs for 9e (and later) cores. */
8002 static bool
8003 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8004 int *total, bool speed)
8006 enum machine_mode mode = GET_MODE (x);
8008 if (TARGET_THUMB1)
8010 switch (code)
8012 case MULT:
8013 *total = COSTS_N_INSNS (3);
8014 return true;
8016 default:
8017 *total = thumb1_rtx_costs (x, code, outer_code);
8018 return true;
8022 switch (code)
8024 case MULT:
8025 /* There is no point basing this on the tuning, since it is always the
8026 fast variant if it exists at all. */
8027 if (mode == DImode
8028 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8029 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8030 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8032 *total = COSTS_N_INSNS (2);
8033 return false;
8037 if (mode == DImode)
8039 *total = COSTS_N_INSNS (5);
8040 return false;
8043 if (mode == SImode)
8045 *total = COSTS_N_INSNS (2);
8046 return false;
8049 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8051 if (TARGET_HARD_FLOAT
8052 && (mode == SFmode
8053 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8055 *total = COSTS_N_INSNS (1);
8056 return false;
8060 *total = COSTS_N_INSNS (20);
8061 return false;
8063 default:
8064 return arm_rtx_costs_1 (x, outer_code, total, speed);
8067 /* All address computations that can be done are free, but rtx cost returns
8068 the same for practically all of them. So we weight the different types
8069 of address here in the order (most pref first):
8070 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8071 static inline int
8072 arm_arm_address_cost (rtx x)
8074 enum rtx_code c = GET_CODE (x);
8076 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8077 return 0;
8078 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8079 return 10;
8081 if (c == PLUS)
8083 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8084 return 2;
8086 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8087 return 3;
8089 return 4;
8092 return 6;
8095 static inline int
8096 arm_thumb_address_cost (rtx x)
8098 enum rtx_code c = GET_CODE (x);
8100 if (c == REG)
8101 return 1;
8102 if (c == PLUS
8103 && GET_CODE (XEXP (x, 0)) == REG
8104 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8105 return 1;
8107 return 2;
8110 static int
8111 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8113 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8116 /* Adjust cost hook for XScale. */
8117 static bool
8118 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8120 /* Some true dependencies can have a higher cost depending
8121 on precisely how certain input operands are used. */
8122 if (REG_NOTE_KIND(link) == 0
8123 && recog_memoized (insn) >= 0
8124 && recog_memoized (dep) >= 0)
8126 int shift_opnum = get_attr_shift (insn);
8127 enum attr_type attr_type = get_attr_type (dep);
8129 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8130 operand for INSN. If we have a shifted input operand and the
8131 instruction we depend on is another ALU instruction, then we may
8132 have to account for an additional stall. */
8133 if (shift_opnum != 0
8134 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8136 rtx shifted_operand;
8137 int opno;
8139 /* Get the shifted operand. */
8140 extract_insn (insn);
8141 shifted_operand = recog_data.operand[shift_opnum];
8143 /* Iterate over all the operands in DEP. If we write an operand
8144 that overlaps with SHIFTED_OPERAND, then we have increase the
8145 cost of this dependency. */
8146 extract_insn (dep);
8147 preprocess_constraints ();
8148 for (opno = 0; opno < recog_data.n_operands; opno++)
8150 /* We can ignore strict inputs. */
8151 if (recog_data.operand_type[opno] == OP_IN)
8152 continue;
8154 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8155 shifted_operand))
8157 *cost = 2;
8158 return false;
8163 return true;
8166 /* Adjust cost hook for Cortex A9. */
8167 static bool
8168 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8170 switch (REG_NOTE_KIND (link))
8172 case REG_DEP_ANTI:
8173 *cost = 0;
8174 return false;
8176 case REG_DEP_TRUE:
8177 case REG_DEP_OUTPUT:
8178 if (recog_memoized (insn) >= 0
8179 && recog_memoized (dep) >= 0)
8181 if (GET_CODE (PATTERN (insn)) == SET)
8183 if (GET_MODE_CLASS
8184 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8185 || GET_MODE_CLASS
8186 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8188 enum attr_type attr_type_insn = get_attr_type (insn);
8189 enum attr_type attr_type_dep = get_attr_type (dep);
8191 /* By default all dependencies of the form
8192 s0 = s0 <op> s1
8193 s0 = s0 <op> s2
8194 have an extra latency of 1 cycle because
8195 of the input and output dependency in this
8196 case. However this gets modeled as an true
8197 dependency and hence all these checks. */
8198 if (REG_P (SET_DEST (PATTERN (insn)))
8199 && REG_P (SET_DEST (PATTERN (dep)))
8200 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8201 SET_DEST (PATTERN (dep))))
8203 /* FMACS is a special case where the dependant
8204 instruction can be issued 3 cycles before
8205 the normal latency in case of an output
8206 dependency. */
8207 if ((attr_type_insn == TYPE_FMACS
8208 || attr_type_insn == TYPE_FMACD)
8209 && (attr_type_dep == TYPE_FMACS
8210 || attr_type_dep == TYPE_FMACD))
8212 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8213 *cost = insn_default_latency (dep) - 3;
8214 else
8215 *cost = insn_default_latency (dep);
8216 return false;
8218 else
8220 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8221 *cost = insn_default_latency (dep) + 1;
8222 else
8223 *cost = insn_default_latency (dep);
8225 return false;
8230 break;
8232 default:
8233 gcc_unreachable ();
8236 return true;
8239 /* Adjust cost hook for FA726TE. */
8240 static bool
8241 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8243 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8244 have penalty of 3. */
8245 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8246 && recog_memoized (insn) >= 0
8247 && recog_memoized (dep) >= 0
8248 && get_attr_conds (dep) == CONDS_SET)
8250 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8251 if (get_attr_conds (insn) == CONDS_USE
8252 && get_attr_type (insn) != TYPE_BRANCH)
8254 *cost = 3;
8255 return false;
8258 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8259 || get_attr_conds (insn) == CONDS_USE)
8261 *cost = 0;
8262 return false;
8266 return true;
8269 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8270 It corrects the value of COST based on the relationship between
8271 INSN and DEP through the dependence LINK. It returns the new
8272 value. There is a per-core adjust_cost hook to adjust scheduler costs
8273 and the per-core hook can choose to completely override the generic
8274 adjust_cost function. Only put bits of code into arm_adjust_cost that
8275 are common across all cores. */
8276 static int
8277 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8279 rtx i_pat, d_pat;
8281 /* When generating Thumb-1 code, we want to place flag-setting operations
8282 close to a conditional branch which depends on them, so that we can
8283 omit the comparison. */
8284 if (TARGET_THUMB1
8285 && REG_NOTE_KIND (link) == 0
8286 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8287 && recog_memoized (dep) >= 0
8288 && get_attr_conds (dep) == CONDS_SET)
8289 return 0;
8291 if (current_tune->sched_adjust_cost != NULL)
8293 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8294 return cost;
8297 /* XXX This is not strictly true for the FPA. */
8298 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8299 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8300 return 0;
8302 /* Call insns don't incur a stall, even if they follow a load. */
8303 if (REG_NOTE_KIND (link) == 0
8304 && GET_CODE (insn) == CALL_INSN)
8305 return 1;
8307 if ((i_pat = single_set (insn)) != NULL
8308 && GET_CODE (SET_SRC (i_pat)) == MEM
8309 && (d_pat = single_set (dep)) != NULL
8310 && GET_CODE (SET_DEST (d_pat)) == MEM)
8312 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8313 /* This is a load after a store, there is no conflict if the load reads
8314 from a cached area. Assume that loads from the stack, and from the
8315 constant pool are cached, and that others will miss. This is a
8316 hack. */
8318 if ((GET_CODE (src_mem) == SYMBOL_REF
8319 && CONSTANT_POOL_ADDRESS_P (src_mem))
8320 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8321 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8322 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8323 return 1;
8326 return cost;
8329 static int
8330 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8332 if (TARGET_32BIT)
8333 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8334 else
8335 return (optimize > 0) ? 2 : 0;
8338 static int
8339 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8341 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8344 static int fp_consts_inited = 0;
8346 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8347 static const char * const strings_fp[8] =
8349 "0", "1", "2", "3",
8350 "4", "5", "0.5", "10"
8353 static REAL_VALUE_TYPE values_fp[8];
8355 static void
8356 init_fp_table (void)
8358 int i;
8359 REAL_VALUE_TYPE r;
8361 if (TARGET_VFP)
8362 fp_consts_inited = 1;
8363 else
8364 fp_consts_inited = 8;
8366 for (i = 0; i < fp_consts_inited; i++)
8368 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8369 values_fp[i] = r;
8373 /* Return TRUE if rtx X is a valid immediate FP constant. */
8375 arm_const_double_rtx (rtx x)
8377 REAL_VALUE_TYPE r;
8378 int i;
8380 if (!fp_consts_inited)
8381 init_fp_table ();
8383 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8384 if (REAL_VALUE_MINUS_ZERO (r))
8385 return 0;
8387 for (i = 0; i < fp_consts_inited; i++)
8388 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8389 return 1;
8391 return 0;
8394 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8396 neg_const_double_rtx_ok_for_fpa (rtx x)
8398 REAL_VALUE_TYPE r;
8399 int i;
8401 if (!fp_consts_inited)
8402 init_fp_table ();
8404 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8405 r = real_value_negate (&r);
8406 if (REAL_VALUE_MINUS_ZERO (r))
8407 return 0;
8409 for (i = 0; i < 8; i++)
8410 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8411 return 1;
8413 return 0;
8417 /* VFPv3 has a fairly wide range of representable immediates, formed from
8418 "quarter-precision" floating-point values. These can be evaluated using this
8419 formula (with ^ for exponentiation):
8421 -1^s * n * 2^-r
8423 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8424 16 <= n <= 31 and 0 <= r <= 7.
8426 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8428 - A (most-significant) is the sign bit.
8429 - BCD are the exponent (encoded as r XOR 3).
8430 - EFGH are the mantissa (encoded as n - 16).
8433 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8434 fconst[sd] instruction, or -1 if X isn't suitable. */
8435 static int
8436 vfp3_const_double_index (rtx x)
8438 REAL_VALUE_TYPE r, m;
8439 int sign, exponent;
8440 unsigned HOST_WIDE_INT mantissa, mant_hi;
8441 unsigned HOST_WIDE_INT mask;
8442 HOST_WIDE_INT m1, m2;
8443 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8445 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8446 return -1;
8448 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8450 /* We can't represent these things, so detect them first. */
8451 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8452 return -1;
8454 /* Extract sign, exponent and mantissa. */
8455 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8456 r = real_value_abs (&r);
8457 exponent = REAL_EXP (&r);
8458 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8459 highest (sign) bit, with a fixed binary point at bit point_pos.
8460 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8461 bits for the mantissa, this may fail (low bits would be lost). */
8462 real_ldexp (&m, &r, point_pos - exponent);
8463 REAL_VALUE_TO_INT (&m1, &m2, m);
8464 mantissa = m1;
8465 mant_hi = m2;
8467 /* If there are bits set in the low part of the mantissa, we can't
8468 represent this value. */
8469 if (mantissa != 0)
8470 return -1;
8472 /* Now make it so that mantissa contains the most-significant bits, and move
8473 the point_pos to indicate that the least-significant bits have been
8474 discarded. */
8475 point_pos -= HOST_BITS_PER_WIDE_INT;
8476 mantissa = mant_hi;
8478 /* We can permit four significant bits of mantissa only, plus a high bit
8479 which is always 1. */
8480 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8481 if ((mantissa & mask) != 0)
8482 return -1;
8484 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8485 mantissa >>= point_pos - 5;
8487 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8488 floating-point immediate zero with Neon using an integer-zero load, but
8489 that case is handled elsewhere.) */
8490 if (mantissa == 0)
8491 return -1;
8493 gcc_assert (mantissa >= 16 && mantissa <= 31);
8495 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8496 normalized significands are in the range [1, 2). (Our mantissa is shifted
8497 left 4 places at this point relative to normalized IEEE754 values). GCC
8498 internally uses [0.5, 1) (see real.c), so the exponent returned from
8499 REAL_EXP must be altered. */
8500 exponent = 5 - exponent;
8502 if (exponent < 0 || exponent > 7)
8503 return -1;
8505 /* Sign, mantissa and exponent are now in the correct form to plug into the
8506 formula described in the comment above. */
8507 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8510 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8512 vfp3_const_double_rtx (rtx x)
8514 if (!TARGET_VFP3)
8515 return 0;
8517 return vfp3_const_double_index (x) != -1;
8520 /* Recognize immediates which can be used in various Neon instructions. Legal
8521 immediates are described by the following table (for VMVN variants, the
8522 bitwise inverse of the constant shown is recognized. In either case, VMOV
8523 is output and the correct instruction to use for a given constant is chosen
8524 by the assembler). The constant shown is replicated across all elements of
8525 the destination vector.
8527 insn elems variant constant (binary)
8528 ---- ----- ------- -----------------
8529 vmov i32 0 00000000 00000000 00000000 abcdefgh
8530 vmov i32 1 00000000 00000000 abcdefgh 00000000
8531 vmov i32 2 00000000 abcdefgh 00000000 00000000
8532 vmov i32 3 abcdefgh 00000000 00000000 00000000
8533 vmov i16 4 00000000 abcdefgh
8534 vmov i16 5 abcdefgh 00000000
8535 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8536 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8537 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8538 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8539 vmvn i16 10 00000000 abcdefgh
8540 vmvn i16 11 abcdefgh 00000000
8541 vmov i32 12 00000000 00000000 abcdefgh 11111111
8542 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8543 vmov i32 14 00000000 abcdefgh 11111111 11111111
8544 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8545 vmov i8 16 abcdefgh
8546 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8547 eeeeeeee ffffffff gggggggg hhhhhhhh
8548 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8550 For case 18, B = !b. Representable values are exactly those accepted by
8551 vfp3_const_double_index, but are output as floating-point numbers rather
8552 than indices.
8554 Variants 0-5 (inclusive) may also be used as immediates for the second
8555 operand of VORR/VBIC instructions.
8557 The INVERSE argument causes the bitwise inverse of the given operand to be
8558 recognized instead (used for recognizing legal immediates for the VAND/VORN
8559 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8560 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8561 output, rather than the real insns vbic/vorr).
8563 INVERSE makes no difference to the recognition of float vectors.
8565 The return value is the variant of immediate as shown in the above table, or
8566 -1 if the given value doesn't match any of the listed patterns.
8568 static int
8569 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8570 rtx *modconst, int *elementwidth)
8572 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8573 matches = 1; \
8574 for (i = 0; i < idx; i += (STRIDE)) \
8575 if (!(TEST)) \
8576 matches = 0; \
8577 if (matches) \
8579 immtype = (CLASS); \
8580 elsize = (ELSIZE); \
8581 break; \
8584 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8585 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8586 unsigned char bytes[16];
8587 int immtype = -1, matches;
8588 unsigned int invmask = inverse ? 0xff : 0;
8590 /* Vectors of float constants. */
8591 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8593 rtx el0 = CONST_VECTOR_ELT (op, 0);
8594 REAL_VALUE_TYPE r0;
8596 if (!vfp3_const_double_rtx (el0))
8597 return -1;
8599 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8601 for (i = 1; i < n_elts; i++)
8603 rtx elt = CONST_VECTOR_ELT (op, i);
8604 REAL_VALUE_TYPE re;
8606 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8608 if (!REAL_VALUES_EQUAL (r0, re))
8609 return -1;
8612 if (modconst)
8613 *modconst = CONST_VECTOR_ELT (op, 0);
8615 if (elementwidth)
8616 *elementwidth = 0;
8618 return 18;
8621 /* Splat vector constant out into a byte vector. */
8622 for (i = 0; i < n_elts; i++)
8624 rtx el = CONST_VECTOR_ELT (op, i);
8625 unsigned HOST_WIDE_INT elpart;
8626 unsigned int part, parts;
8628 if (GET_CODE (el) == CONST_INT)
8630 elpart = INTVAL (el);
8631 parts = 1;
8633 else if (GET_CODE (el) == CONST_DOUBLE)
8635 elpart = CONST_DOUBLE_LOW (el);
8636 parts = 2;
8638 else
8639 gcc_unreachable ();
8641 for (part = 0; part < parts; part++)
8643 unsigned int byte;
8644 for (byte = 0; byte < innersize; byte++)
8646 bytes[idx++] = (elpart & 0xff) ^ invmask;
8647 elpart >>= BITS_PER_UNIT;
8649 if (GET_CODE (el) == CONST_DOUBLE)
8650 elpart = CONST_DOUBLE_HIGH (el);
8654 /* Sanity check. */
8655 gcc_assert (idx == GET_MODE_SIZE (mode));
8659 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8660 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8662 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8663 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8665 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8666 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8668 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8669 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8671 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8673 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8675 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8676 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8678 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8679 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8681 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8682 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8684 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8685 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8687 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8689 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8691 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8692 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8694 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8695 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8697 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8698 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8700 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8701 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8703 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8705 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8706 && bytes[i] == bytes[(i + 8) % idx]);
8708 while (0);
8710 if (immtype == -1)
8711 return -1;
8713 if (elementwidth)
8714 *elementwidth = elsize;
8716 if (modconst)
8718 unsigned HOST_WIDE_INT imm = 0;
8720 /* Un-invert bytes of recognized vector, if necessary. */
8721 if (invmask != 0)
8722 for (i = 0; i < idx; i++)
8723 bytes[i] ^= invmask;
8725 if (immtype == 17)
8727 /* FIXME: Broken on 32-bit H_W_I hosts. */
8728 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8730 for (i = 0; i < 8; i++)
8731 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8732 << (i * BITS_PER_UNIT);
8734 *modconst = GEN_INT (imm);
8736 else
8738 unsigned HOST_WIDE_INT imm = 0;
8740 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8741 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8743 *modconst = GEN_INT (imm);
8747 return immtype;
8748 #undef CHECK
8751 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8752 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8753 float elements), and a modified constant (whatever should be output for a
8754 VMOV) in *MODCONST. */
8757 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8758 rtx *modconst, int *elementwidth)
8760 rtx tmpconst;
8761 int tmpwidth;
8762 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8764 if (retval == -1)
8765 return 0;
8767 if (modconst)
8768 *modconst = tmpconst;
8770 if (elementwidth)
8771 *elementwidth = tmpwidth;
8773 return 1;
8776 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8777 the immediate is valid, write a constant suitable for using as an operand
8778 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8779 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8782 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8783 rtx *modconst, int *elementwidth)
8785 rtx tmpconst;
8786 int tmpwidth;
8787 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8789 if (retval < 0 || retval > 5)
8790 return 0;
8792 if (modconst)
8793 *modconst = tmpconst;
8795 if (elementwidth)
8796 *elementwidth = tmpwidth;
8798 return 1;
8801 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
8802 the immediate is valid, write a constant suitable for using as an operand
8803 to VSHR/VSHL to *MODCONST and the corresponding element width to
8804 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
8805 because they have different limitations. */
8808 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
8809 rtx *modconst, int *elementwidth,
8810 bool isleftshift)
8812 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8813 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
8814 unsigned HOST_WIDE_INT last_elt = 0;
8815 unsigned HOST_WIDE_INT maxshift;
8817 /* Split vector constant out into a byte vector. */
8818 for (i = 0; i < n_elts; i++)
8820 rtx el = CONST_VECTOR_ELT (op, i);
8821 unsigned HOST_WIDE_INT elpart;
8823 if (GET_CODE (el) == CONST_INT)
8824 elpart = INTVAL (el);
8825 else if (GET_CODE (el) == CONST_DOUBLE)
8826 return 0;
8827 else
8828 gcc_unreachable ();
8830 if (i != 0 && elpart != last_elt)
8831 return 0;
8833 last_elt = elpart;
8836 /* Shift less than element size. */
8837 maxshift = innersize * 8;
8839 if (isleftshift)
8841 /* Left shift immediate value can be from 0 to <size>-1. */
8842 if (last_elt >= maxshift)
8843 return 0;
8845 else
8847 /* Right shift immediate value can be from 1 to <size>. */
8848 if (last_elt == 0 || last_elt > maxshift)
8849 return 0;
8852 if (elementwidth)
8853 *elementwidth = innersize * 8;
8855 if (modconst)
8856 *modconst = CONST_VECTOR_ELT (op, 0);
8858 return 1;
8861 /* Return a string suitable for output of Neon immediate logic operation
8862 MNEM. */
8864 char *
8865 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8866 int inverse, int quad)
8868 int width, is_valid;
8869 static char templ[40];
8871 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8873 gcc_assert (is_valid != 0);
8875 if (quad)
8876 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8877 else
8878 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8880 return templ;
8883 /* Return a string suitable for output of Neon immediate shift operation
8884 (VSHR or VSHL) MNEM. */
8886 char *
8887 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
8888 enum machine_mode mode, int quad,
8889 bool isleftshift)
8891 int width, is_valid;
8892 static char templ[40];
8894 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
8895 gcc_assert (is_valid != 0);
8897 if (quad)
8898 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
8899 else
8900 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
8902 return templ;
8905 /* Output a sequence of pairwise operations to implement a reduction.
8906 NOTE: We do "too much work" here, because pairwise operations work on two
8907 registers-worth of operands in one go. Unfortunately we can't exploit those
8908 extra calculations to do the full operation in fewer steps, I don't think.
8909 Although all vector elements of the result but the first are ignored, we
8910 actually calculate the same result in each of the elements. An alternative
8911 such as initially loading a vector with zero to use as each of the second
8912 operands would use up an additional register and take an extra instruction,
8913 for no particular gain. */
8915 void
8916 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8917 rtx (*reduc) (rtx, rtx, rtx))
8919 enum machine_mode inner = GET_MODE_INNER (mode);
8920 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8921 rtx tmpsum = op1;
8923 for (i = parts / 2; i >= 1; i /= 2)
8925 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8926 emit_insn (reduc (dest, tmpsum, tmpsum));
8927 tmpsum = dest;
8931 /* If VALS is a vector constant that can be loaded into a register
8932 using VDUP, generate instructions to do so and return an RTX to
8933 assign to the register. Otherwise return NULL_RTX. */
8935 static rtx
8936 neon_vdup_constant (rtx vals)
8938 enum machine_mode mode = GET_MODE (vals);
8939 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8940 int n_elts = GET_MODE_NUNITS (mode);
8941 bool all_same = true;
8942 rtx x;
8943 int i;
8945 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8946 return NULL_RTX;
8948 for (i = 0; i < n_elts; ++i)
8950 x = XVECEXP (vals, 0, i);
8951 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8952 all_same = false;
8955 if (!all_same)
8956 /* The elements are not all the same. We could handle repeating
8957 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8958 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8959 vdup.i16). */
8960 return NULL_RTX;
8962 /* We can load this constant by using VDUP and a constant in a
8963 single ARM register. This will be cheaper than a vector
8964 load. */
8966 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8967 return gen_rtx_VEC_DUPLICATE (mode, x);
8970 /* Generate code to load VALS, which is a PARALLEL containing only
8971 constants (for vec_init) or CONST_VECTOR, efficiently into a
8972 register. Returns an RTX to copy into the register, or NULL_RTX
8973 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8976 neon_make_constant (rtx vals)
8978 enum machine_mode mode = GET_MODE (vals);
8979 rtx target;
8980 rtx const_vec = NULL_RTX;
8981 int n_elts = GET_MODE_NUNITS (mode);
8982 int n_const = 0;
8983 int i;
8985 if (GET_CODE (vals) == CONST_VECTOR)
8986 const_vec = vals;
8987 else if (GET_CODE (vals) == PARALLEL)
8989 /* A CONST_VECTOR must contain only CONST_INTs and
8990 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8991 Only store valid constants in a CONST_VECTOR. */
8992 for (i = 0; i < n_elts; ++i)
8994 rtx x = XVECEXP (vals, 0, i);
8995 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8996 n_const++;
8998 if (n_const == n_elts)
8999 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9001 else
9002 gcc_unreachable ();
9004 if (const_vec != NULL
9005 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9006 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9007 return const_vec;
9008 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9009 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9010 pipeline cycle; creating the constant takes one or two ARM
9011 pipeline cycles. */
9012 return target;
9013 else if (const_vec != NULL_RTX)
9014 /* Load from constant pool. On Cortex-A8 this takes two cycles
9015 (for either double or quad vectors). We can not take advantage
9016 of single-cycle VLD1 because we need a PC-relative addressing
9017 mode. */
9018 return const_vec;
9019 else
9020 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9021 We can not construct an initializer. */
9022 return NULL_RTX;
9025 /* Initialize vector TARGET to VALS. */
9027 void
9028 neon_expand_vector_init (rtx target, rtx vals)
9030 enum machine_mode mode = GET_MODE (target);
9031 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9032 int n_elts = GET_MODE_NUNITS (mode);
9033 int n_var = 0, one_var = -1;
9034 bool all_same = true;
9035 rtx x, mem;
9036 int i;
9038 for (i = 0; i < n_elts; ++i)
9040 x = XVECEXP (vals, 0, i);
9041 if (!CONSTANT_P (x))
9042 ++n_var, one_var = i;
9044 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9045 all_same = false;
9048 if (n_var == 0)
9050 rtx constant = neon_make_constant (vals);
9051 if (constant != NULL_RTX)
9053 emit_move_insn (target, constant);
9054 return;
9058 /* Splat a single non-constant element if we can. */
9059 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9061 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9062 emit_insn (gen_rtx_SET (VOIDmode, target,
9063 gen_rtx_VEC_DUPLICATE (mode, x)));
9064 return;
9067 /* One field is non-constant. Load constant then overwrite varying
9068 field. This is more efficient than using the stack. */
9069 if (n_var == 1)
9071 rtx copy = copy_rtx (vals);
9072 rtx index = GEN_INT (one_var);
9074 /* Load constant part of vector, substitute neighboring value for
9075 varying element. */
9076 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9077 neon_expand_vector_init (target, copy);
9079 /* Insert variable. */
9080 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9081 switch (mode)
9083 case V8QImode:
9084 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9085 break;
9086 case V16QImode:
9087 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9088 break;
9089 case V4HImode:
9090 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9091 break;
9092 case V8HImode:
9093 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9094 break;
9095 case V2SImode:
9096 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9097 break;
9098 case V4SImode:
9099 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9100 break;
9101 case V2SFmode:
9102 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9103 break;
9104 case V4SFmode:
9105 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9106 break;
9107 case V2DImode:
9108 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9109 break;
9110 default:
9111 gcc_unreachable ();
9113 return;
9116 /* Construct the vector in memory one field at a time
9117 and load the whole vector. */
9118 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9119 for (i = 0; i < n_elts; i++)
9120 emit_move_insn (adjust_address_nv (mem, inner_mode,
9121 i * GET_MODE_SIZE (inner_mode)),
9122 XVECEXP (vals, 0, i));
9123 emit_move_insn (target, mem);
9126 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9127 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9128 reported source locations are bogus. */
9130 static void
9131 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9132 const char *err)
9134 HOST_WIDE_INT lane;
9136 gcc_assert (GET_CODE (operand) == CONST_INT);
9138 lane = INTVAL (operand);
9140 if (lane < low || lane >= high)
9141 error (err);
9144 /* Bounds-check lanes. */
9146 void
9147 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9149 bounds_check (operand, low, high, "lane out of range");
9152 /* Bounds-check constants. */
9154 void
9155 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9157 bounds_check (operand, low, high, "constant out of range");
9160 HOST_WIDE_INT
9161 neon_element_bits (enum machine_mode mode)
9163 if (mode == DImode)
9164 return GET_MODE_BITSIZE (mode);
9165 else
9166 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9170 /* Predicates for `match_operand' and `match_operator'. */
9172 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9174 cirrus_memory_offset (rtx op)
9176 /* Reject eliminable registers. */
9177 if (! (reload_in_progress || reload_completed)
9178 && ( reg_mentioned_p (frame_pointer_rtx, op)
9179 || reg_mentioned_p (arg_pointer_rtx, op)
9180 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9181 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9182 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9183 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9184 return 0;
9186 if (GET_CODE (op) == MEM)
9188 rtx ind;
9190 ind = XEXP (op, 0);
9192 /* Match: (mem (reg)). */
9193 if (GET_CODE (ind) == REG)
9194 return 1;
9196 /* Match:
9197 (mem (plus (reg)
9198 (const))). */
9199 if (GET_CODE (ind) == PLUS
9200 && GET_CODE (XEXP (ind, 0)) == REG
9201 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9202 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9203 return 1;
9206 return 0;
9209 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9210 WB is true if full writeback address modes are allowed and is false
9211 if limited writeback address modes (POST_INC and PRE_DEC) are
9212 allowed. */
9215 arm_coproc_mem_operand (rtx op, bool wb)
9217 rtx ind;
9219 /* Reject eliminable registers. */
9220 if (! (reload_in_progress || reload_completed)
9221 && ( reg_mentioned_p (frame_pointer_rtx, op)
9222 || reg_mentioned_p (arg_pointer_rtx, op)
9223 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9224 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9225 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9226 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9227 return FALSE;
9229 /* Constants are converted into offsets from labels. */
9230 if (GET_CODE (op) != MEM)
9231 return FALSE;
9233 ind = XEXP (op, 0);
9235 if (reload_completed
9236 && (GET_CODE (ind) == LABEL_REF
9237 || (GET_CODE (ind) == CONST
9238 && GET_CODE (XEXP (ind, 0)) == PLUS
9239 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9240 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9241 return TRUE;
9243 /* Match: (mem (reg)). */
9244 if (GET_CODE (ind) == REG)
9245 return arm_address_register_rtx_p (ind, 0);
9247 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9248 acceptable in any case (subject to verification by
9249 arm_address_register_rtx_p). We need WB to be true to accept
9250 PRE_INC and POST_DEC. */
9251 if (GET_CODE (ind) == POST_INC
9252 || GET_CODE (ind) == PRE_DEC
9253 || (wb
9254 && (GET_CODE (ind) == PRE_INC
9255 || GET_CODE (ind) == POST_DEC)))
9256 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9258 if (wb
9259 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9260 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9261 && GET_CODE (XEXP (ind, 1)) == PLUS
9262 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9263 ind = XEXP (ind, 1);
9265 /* Match:
9266 (plus (reg)
9267 (const)). */
9268 if (GET_CODE (ind) == PLUS
9269 && GET_CODE (XEXP (ind, 0)) == REG
9270 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9271 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9272 && INTVAL (XEXP (ind, 1)) > -1024
9273 && INTVAL (XEXP (ind, 1)) < 1024
9274 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9275 return TRUE;
9277 return FALSE;
9280 /* Return TRUE if OP is a memory operand which we can load or store a vector
9281 to/from. TYPE is one of the following values:
9282 0 - Vector load/stor (vldr)
9283 1 - Core registers (ldm)
9284 2 - Element/structure loads (vld1)
9287 neon_vector_mem_operand (rtx op, int type)
9289 rtx ind;
9291 /* Reject eliminable registers. */
9292 if (! (reload_in_progress || reload_completed)
9293 && ( reg_mentioned_p (frame_pointer_rtx, op)
9294 || reg_mentioned_p (arg_pointer_rtx, op)
9295 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9296 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9297 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9298 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9299 return FALSE;
9301 /* Constants are converted into offsets from labels. */
9302 if (GET_CODE (op) != MEM)
9303 return FALSE;
9305 ind = XEXP (op, 0);
9307 if (reload_completed
9308 && (GET_CODE (ind) == LABEL_REF
9309 || (GET_CODE (ind) == CONST
9310 && GET_CODE (XEXP (ind, 0)) == PLUS
9311 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9312 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9313 return TRUE;
9315 /* Match: (mem (reg)). */
9316 if (GET_CODE (ind) == REG)
9317 return arm_address_register_rtx_p (ind, 0);
9319 /* Allow post-increment with Neon registers. */
9320 if ((type != 1 && GET_CODE (ind) == POST_INC)
9321 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9322 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9324 /* FIXME: vld1 allows register post-modify. */
9326 /* Match:
9327 (plus (reg)
9328 (const)). */
9329 if (type == 0
9330 && GET_CODE (ind) == PLUS
9331 && GET_CODE (XEXP (ind, 0)) == REG
9332 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9333 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9334 && INTVAL (XEXP (ind, 1)) > -1024
9335 && INTVAL (XEXP (ind, 1)) < 1016
9336 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9337 return TRUE;
9339 return FALSE;
9342 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9343 type. */
9345 neon_struct_mem_operand (rtx op)
9347 rtx ind;
9349 /* Reject eliminable registers. */
9350 if (! (reload_in_progress || reload_completed)
9351 && ( reg_mentioned_p (frame_pointer_rtx, op)
9352 || reg_mentioned_p (arg_pointer_rtx, op)
9353 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9354 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9355 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9356 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9357 return FALSE;
9359 /* Constants are converted into offsets from labels. */
9360 if (GET_CODE (op) != MEM)
9361 return FALSE;
9363 ind = XEXP (op, 0);
9365 if (reload_completed
9366 && (GET_CODE (ind) == LABEL_REF
9367 || (GET_CODE (ind) == CONST
9368 && GET_CODE (XEXP (ind, 0)) == PLUS
9369 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9370 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9371 return TRUE;
9373 /* Match: (mem (reg)). */
9374 if (GET_CODE (ind) == REG)
9375 return arm_address_register_rtx_p (ind, 0);
9377 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9378 if (GET_CODE (ind) == POST_INC
9379 || GET_CODE (ind) == PRE_DEC)
9380 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9382 return FALSE;
9385 /* Return true if X is a register that will be eliminated later on. */
9387 arm_eliminable_register (rtx x)
9389 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9390 || REGNO (x) == ARG_POINTER_REGNUM
9391 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9392 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9395 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9396 coprocessor registers. Otherwise return NO_REGS. */
9398 enum reg_class
9399 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9401 if (mode == HFmode)
9403 if (!TARGET_NEON_FP16)
9404 return GENERAL_REGS;
9405 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9406 return NO_REGS;
9407 return GENERAL_REGS;
9410 /* The neon move patterns handle all legitimate vector and struct
9411 addresses. */
9412 if (TARGET_NEON
9413 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9414 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9415 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9416 || VALID_NEON_STRUCT_MODE (mode)))
9417 return NO_REGS;
9419 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9420 return NO_REGS;
9422 return GENERAL_REGS;
9425 /* Values which must be returned in the most-significant end of the return
9426 register. */
9428 static bool
9429 arm_return_in_msb (const_tree valtype)
9431 return (TARGET_AAPCS_BASED
9432 && BYTES_BIG_ENDIAN
9433 && (AGGREGATE_TYPE_P (valtype)
9434 || TREE_CODE (valtype) == COMPLEX_TYPE
9435 || FIXED_POINT_TYPE_P (valtype)));
9438 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9439 Use by the Cirrus Maverick code which has to workaround
9440 a hardware bug triggered by such instructions. */
9441 static bool
9442 arm_memory_load_p (rtx insn)
9444 rtx body, lhs, rhs;;
9446 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9447 return false;
9449 body = PATTERN (insn);
9451 if (GET_CODE (body) != SET)
9452 return false;
9454 lhs = XEXP (body, 0);
9455 rhs = XEXP (body, 1);
9457 lhs = REG_OR_SUBREG_RTX (lhs);
9459 /* If the destination is not a general purpose
9460 register we do not have to worry. */
9461 if (GET_CODE (lhs) != REG
9462 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9463 return false;
9465 /* As well as loads from memory we also have to react
9466 to loads of invalid constants which will be turned
9467 into loads from the minipool. */
9468 return (GET_CODE (rhs) == MEM
9469 || GET_CODE (rhs) == SYMBOL_REF
9470 || note_invalid_constants (insn, -1, false));
9473 /* Return TRUE if INSN is a Cirrus instruction. */
9474 static bool
9475 arm_cirrus_insn_p (rtx insn)
9477 enum attr_cirrus attr;
9479 /* get_attr cannot accept USE or CLOBBER. */
9480 if (!insn
9481 || GET_CODE (insn) != INSN
9482 || GET_CODE (PATTERN (insn)) == USE
9483 || GET_CODE (PATTERN (insn)) == CLOBBER)
9484 return 0;
9486 attr = get_attr_cirrus (insn);
9488 return attr != CIRRUS_NOT;
9491 /* Cirrus reorg for invalid instruction combinations. */
9492 static void
9493 cirrus_reorg (rtx first)
9495 enum attr_cirrus attr;
9496 rtx body = PATTERN (first);
9497 rtx t;
9498 int nops;
9500 /* Any branch must be followed by 2 non Cirrus instructions. */
9501 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9503 nops = 0;
9504 t = next_nonnote_insn (first);
9506 if (arm_cirrus_insn_p (t))
9507 ++ nops;
9509 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9510 ++ nops;
9512 while (nops --)
9513 emit_insn_after (gen_nop (), first);
9515 return;
9518 /* (float (blah)) is in parallel with a clobber. */
9519 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9520 body = XVECEXP (body, 0, 0);
9522 if (GET_CODE (body) == SET)
9524 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9526 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9527 be followed by a non Cirrus insn. */
9528 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9530 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9531 emit_insn_after (gen_nop (), first);
9533 return;
9535 else if (arm_memory_load_p (first))
9537 unsigned int arm_regno;
9539 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9540 ldr/cfmv64hr combination where the Rd field is the same
9541 in both instructions must be split with a non Cirrus
9542 insn. Example:
9544 ldr r0, blah
9546 cfmvsr mvf0, r0. */
9548 /* Get Arm register number for ldr insn. */
9549 if (GET_CODE (lhs) == REG)
9550 arm_regno = REGNO (lhs);
9551 else
9553 gcc_assert (GET_CODE (rhs) == REG);
9554 arm_regno = REGNO (rhs);
9557 /* Next insn. */
9558 first = next_nonnote_insn (first);
9560 if (! arm_cirrus_insn_p (first))
9561 return;
9563 body = PATTERN (first);
9565 /* (float (blah)) is in parallel with a clobber. */
9566 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9567 body = XVECEXP (body, 0, 0);
9569 if (GET_CODE (body) == FLOAT)
9570 body = XEXP (body, 0);
9572 if (get_attr_cirrus (first) == CIRRUS_MOVE
9573 && GET_CODE (XEXP (body, 1)) == REG
9574 && arm_regno == REGNO (XEXP (body, 1)))
9575 emit_insn_after (gen_nop (), first);
9577 return;
9581 /* get_attr cannot accept USE or CLOBBER. */
9582 if (!first
9583 || GET_CODE (first) != INSN
9584 || GET_CODE (PATTERN (first)) == USE
9585 || GET_CODE (PATTERN (first)) == CLOBBER)
9586 return;
9588 attr = get_attr_cirrus (first);
9590 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9591 must be followed by a non-coprocessor instruction. */
9592 if (attr == CIRRUS_COMPARE)
9594 nops = 0;
9596 t = next_nonnote_insn (first);
9598 if (arm_cirrus_insn_p (t))
9599 ++ nops;
9601 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9602 ++ nops;
9604 while (nops --)
9605 emit_insn_after (gen_nop (), first);
9607 return;
9611 /* Return TRUE if X references a SYMBOL_REF. */
9613 symbol_mentioned_p (rtx x)
9615 const char * fmt;
9616 int i;
9618 if (GET_CODE (x) == SYMBOL_REF)
9619 return 1;
9621 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9622 are constant offsets, not symbols. */
9623 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9624 return 0;
9626 fmt = GET_RTX_FORMAT (GET_CODE (x));
9628 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9630 if (fmt[i] == 'E')
9632 int j;
9634 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9635 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9636 return 1;
9638 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9639 return 1;
9642 return 0;
9645 /* Return TRUE if X references a LABEL_REF. */
9647 label_mentioned_p (rtx x)
9649 const char * fmt;
9650 int i;
9652 if (GET_CODE (x) == LABEL_REF)
9653 return 1;
9655 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9656 instruction, but they are constant offsets, not symbols. */
9657 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9658 return 0;
9660 fmt = GET_RTX_FORMAT (GET_CODE (x));
9661 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9663 if (fmt[i] == 'E')
9665 int j;
9667 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9668 if (label_mentioned_p (XVECEXP (x, i, j)))
9669 return 1;
9671 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9672 return 1;
9675 return 0;
9679 tls_mentioned_p (rtx x)
9681 switch (GET_CODE (x))
9683 case CONST:
9684 return tls_mentioned_p (XEXP (x, 0));
9686 case UNSPEC:
9687 if (XINT (x, 1) == UNSPEC_TLS)
9688 return 1;
9690 default:
9691 return 0;
9695 /* Must not copy any rtx that uses a pc-relative address. */
9697 static int
9698 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9700 if (GET_CODE (*x) == UNSPEC
9701 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9702 return 1;
9703 return 0;
9706 static bool
9707 arm_cannot_copy_insn_p (rtx insn)
9709 /* The tls call insn cannot be copied, as it is paired with a data
9710 word. */
9711 if (recog_memoized (insn) == CODE_FOR_tlscall)
9712 return true;
9714 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9717 enum rtx_code
9718 minmax_code (rtx x)
9720 enum rtx_code code = GET_CODE (x);
9722 switch (code)
9724 case SMAX:
9725 return GE;
9726 case SMIN:
9727 return LE;
9728 case UMIN:
9729 return LEU;
9730 case UMAX:
9731 return GEU;
9732 default:
9733 gcc_unreachable ();
9737 /* Return 1 if memory locations are adjacent. */
9739 adjacent_mem_locations (rtx a, rtx b)
9741 /* We don't guarantee to preserve the order of these memory refs. */
9742 if (volatile_refs_p (a) || volatile_refs_p (b))
9743 return 0;
9745 if ((GET_CODE (XEXP (a, 0)) == REG
9746 || (GET_CODE (XEXP (a, 0)) == PLUS
9747 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9748 && (GET_CODE (XEXP (b, 0)) == REG
9749 || (GET_CODE (XEXP (b, 0)) == PLUS
9750 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9752 HOST_WIDE_INT val0 = 0, val1 = 0;
9753 rtx reg0, reg1;
9754 int val_diff;
9756 if (GET_CODE (XEXP (a, 0)) == PLUS)
9758 reg0 = XEXP (XEXP (a, 0), 0);
9759 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9761 else
9762 reg0 = XEXP (a, 0);
9764 if (GET_CODE (XEXP (b, 0)) == PLUS)
9766 reg1 = XEXP (XEXP (b, 0), 0);
9767 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9769 else
9770 reg1 = XEXP (b, 0);
9772 /* Don't accept any offset that will require multiple
9773 instructions to handle, since this would cause the
9774 arith_adjacentmem pattern to output an overlong sequence. */
9775 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9776 return 0;
9778 /* Don't allow an eliminable register: register elimination can make
9779 the offset too large. */
9780 if (arm_eliminable_register (reg0))
9781 return 0;
9783 val_diff = val1 - val0;
9785 if (arm_ld_sched)
9787 /* If the target has load delay slots, then there's no benefit
9788 to using an ldm instruction unless the offset is zero and
9789 we are optimizing for size. */
9790 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9791 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9792 && (val_diff == 4 || val_diff == -4));
9795 return ((REGNO (reg0) == REGNO (reg1))
9796 && (val_diff == 4 || val_diff == -4));
9799 return 0;
9802 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9803 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9804 instruction. ADD_OFFSET is nonzero if the base address register needs
9805 to be modified with an add instruction before we can use it. */
9807 static bool
9808 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9809 int nops, HOST_WIDE_INT add_offset)
9811 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9812 if the offset isn't small enough. The reason 2 ldrs are faster
9813 is because these ARMs are able to do more than one cache access
9814 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9815 whilst the ARM8 has a double bandwidth cache. This means that
9816 these cores can do both an instruction fetch and a data fetch in
9817 a single cycle, so the trick of calculating the address into a
9818 scratch register (one of the result regs) and then doing a load
9819 multiple actually becomes slower (and no smaller in code size).
9820 That is the transformation
9822 ldr rd1, [rbase + offset]
9823 ldr rd2, [rbase + offset + 4]
9827 add rd1, rbase, offset
9828 ldmia rd1, {rd1, rd2}
9830 produces worse code -- '3 cycles + any stalls on rd2' instead of
9831 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9832 access per cycle, the first sequence could never complete in less
9833 than 6 cycles, whereas the ldm sequence would only take 5 and
9834 would make better use of sequential accesses if not hitting the
9835 cache.
9837 We cheat here and test 'arm_ld_sched' which we currently know to
9838 only be true for the ARM8, ARM9 and StrongARM. If this ever
9839 changes, then the test below needs to be reworked. */
9840 if (nops == 2 && arm_ld_sched && add_offset != 0)
9841 return false;
9843 /* XScale has load-store double instructions, but they have stricter
9844 alignment requirements than load-store multiple, so we cannot
9845 use them.
9847 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9848 the pipeline until completion.
9850 NREGS CYCLES
9856 An ldr instruction takes 1-3 cycles, but does not block the
9857 pipeline.
9859 NREGS CYCLES
9860 1 1-3
9861 2 2-6
9862 3 3-9
9863 4 4-12
9865 Best case ldr will always win. However, the more ldr instructions
9866 we issue, the less likely we are to be able to schedule them well.
9867 Using ldr instructions also increases code size.
9869 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9870 for counts of 3 or 4 regs. */
9871 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9872 return false;
9873 return true;
9876 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9877 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9878 an array ORDER which describes the sequence to use when accessing the
9879 offsets that produces an ascending order. In this sequence, each
9880 offset must be larger by exactly 4 than the previous one. ORDER[0]
9881 must have been filled in with the lowest offset by the caller.
9882 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9883 we use to verify that ORDER produces an ascending order of registers.
9884 Return true if it was possible to construct such an order, false if
9885 not. */
9887 static bool
9888 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9889 int *unsorted_regs)
9891 int i;
9892 for (i = 1; i < nops; i++)
9894 int j;
9896 order[i] = order[i - 1];
9897 for (j = 0; j < nops; j++)
9898 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9900 /* We must find exactly one offset that is higher than the
9901 previous one by 4. */
9902 if (order[i] != order[i - 1])
9903 return false;
9904 order[i] = j;
9906 if (order[i] == order[i - 1])
9907 return false;
9908 /* The register numbers must be ascending. */
9909 if (unsorted_regs != NULL
9910 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9911 return false;
9913 return true;
9916 /* Used to determine in a peephole whether a sequence of load
9917 instructions can be changed into a load-multiple instruction.
9918 NOPS is the number of separate load instructions we are examining. The
9919 first NOPS entries in OPERANDS are the destination registers, the
9920 next NOPS entries are memory operands. If this function is
9921 successful, *BASE is set to the common base register of the memory
9922 accesses; *LOAD_OFFSET is set to the first memory location's offset
9923 from that base register.
9924 REGS is an array filled in with the destination register numbers.
9925 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9926 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9927 the sequence of registers in REGS matches the loads from ascending memory
9928 locations, and the function verifies that the register numbers are
9929 themselves ascending. If CHECK_REGS is false, the register numbers
9930 are stored in the order they are found in the operands. */
9931 static int
9932 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9933 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9935 int unsorted_regs[MAX_LDM_STM_OPS];
9936 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9937 int order[MAX_LDM_STM_OPS];
9938 rtx base_reg_rtx = NULL;
9939 int base_reg = -1;
9940 int i, ldm_case;
9942 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9943 easily extended if required. */
9944 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9946 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9948 /* Loop over the operands and check that the memory references are
9949 suitable (i.e. immediate offsets from the same base register). At
9950 the same time, extract the target register, and the memory
9951 offsets. */
9952 for (i = 0; i < nops; i++)
9954 rtx reg;
9955 rtx offset;
9957 /* Convert a subreg of a mem into the mem itself. */
9958 if (GET_CODE (operands[nops + i]) == SUBREG)
9959 operands[nops + i] = alter_subreg (operands + (nops + i));
9961 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9963 /* Don't reorder volatile memory references; it doesn't seem worth
9964 looking for the case where the order is ok anyway. */
9965 if (MEM_VOLATILE_P (operands[nops + i]))
9966 return 0;
9968 offset = const0_rtx;
9970 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9971 || (GET_CODE (reg) == SUBREG
9972 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9973 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9974 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9975 == REG)
9976 || (GET_CODE (reg) == SUBREG
9977 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9978 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9979 == CONST_INT)))
9981 if (i == 0)
9983 base_reg = REGNO (reg);
9984 base_reg_rtx = reg;
9985 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9986 return 0;
9988 else if (base_reg != (int) REGNO (reg))
9989 /* Not addressed from the same base register. */
9990 return 0;
9992 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9993 ? REGNO (operands[i])
9994 : REGNO (SUBREG_REG (operands[i])));
9996 /* If it isn't an integer register, or if it overwrites the
9997 base register but isn't the last insn in the list, then
9998 we can't do this. */
9999 if (unsorted_regs[i] < 0
10000 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10001 || unsorted_regs[i] > 14
10002 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10003 return 0;
10005 unsorted_offsets[i] = INTVAL (offset);
10006 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10007 order[0] = i;
10009 else
10010 /* Not a suitable memory address. */
10011 return 0;
10014 /* All the useful information has now been extracted from the
10015 operands into unsorted_regs and unsorted_offsets; additionally,
10016 order[0] has been set to the lowest offset in the list. Sort
10017 the offsets into order, verifying that they are adjacent, and
10018 check that the register numbers are ascending. */
10019 if (!compute_offset_order (nops, unsorted_offsets, order,
10020 check_regs ? unsorted_regs : NULL))
10021 return 0;
10023 if (saved_order)
10024 memcpy (saved_order, order, sizeof order);
10026 if (base)
10028 *base = base_reg;
10030 for (i = 0; i < nops; i++)
10031 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10033 *load_offset = unsorted_offsets[order[0]];
10036 if (TARGET_THUMB1
10037 && !peep2_reg_dead_p (nops, base_reg_rtx))
10038 return 0;
10040 if (unsorted_offsets[order[0]] == 0)
10041 ldm_case = 1; /* ldmia */
10042 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10043 ldm_case = 2; /* ldmib */
10044 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10045 ldm_case = 3; /* ldmda */
10046 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10047 ldm_case = 4; /* ldmdb */
10048 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10049 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10050 ldm_case = 5;
10051 else
10052 return 0;
10054 if (!multiple_operation_profitable_p (false, nops,
10055 ldm_case == 5
10056 ? unsorted_offsets[order[0]] : 0))
10057 return 0;
10059 return ldm_case;
10062 /* Used to determine in a peephole whether a sequence of store instructions can
10063 be changed into a store-multiple instruction.
10064 NOPS is the number of separate store instructions we are examining.
10065 NOPS_TOTAL is the total number of instructions recognized by the peephole
10066 pattern.
10067 The first NOPS entries in OPERANDS are the source registers, the next
10068 NOPS entries are memory operands. If this function is successful, *BASE is
10069 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10070 to the first memory location's offset from that base register. REGS is an
10071 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10072 likewise filled with the corresponding rtx's.
10073 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10074 numbers to an ascending order of stores.
10075 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10076 from ascending memory locations, and the function verifies that the register
10077 numbers are themselves ascending. If CHECK_REGS is false, the register
10078 numbers are stored in the order they are found in the operands. */
10079 static int
10080 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10081 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10082 HOST_WIDE_INT *load_offset, bool check_regs)
10084 int unsorted_regs[MAX_LDM_STM_OPS];
10085 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10086 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10087 int order[MAX_LDM_STM_OPS];
10088 int base_reg = -1;
10089 rtx base_reg_rtx = NULL;
10090 int i, stm_case;
10092 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10093 easily extended if required. */
10094 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10096 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10098 /* Loop over the operands and check that the memory references are
10099 suitable (i.e. immediate offsets from the same base register). At
10100 the same time, extract the target register, and the memory
10101 offsets. */
10102 for (i = 0; i < nops; i++)
10104 rtx reg;
10105 rtx offset;
10107 /* Convert a subreg of a mem into the mem itself. */
10108 if (GET_CODE (operands[nops + i]) == SUBREG)
10109 operands[nops + i] = alter_subreg (operands + (nops + i));
10111 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10113 /* Don't reorder volatile memory references; it doesn't seem worth
10114 looking for the case where the order is ok anyway. */
10115 if (MEM_VOLATILE_P (operands[nops + i]))
10116 return 0;
10118 offset = const0_rtx;
10120 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10121 || (GET_CODE (reg) == SUBREG
10122 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10123 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10124 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10125 == REG)
10126 || (GET_CODE (reg) == SUBREG
10127 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10128 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10129 == CONST_INT)))
10131 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10132 ? operands[i] : SUBREG_REG (operands[i]));
10133 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10135 if (i == 0)
10137 base_reg = REGNO (reg);
10138 base_reg_rtx = reg;
10139 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10140 return 0;
10142 else if (base_reg != (int) REGNO (reg))
10143 /* Not addressed from the same base register. */
10144 return 0;
10146 /* If it isn't an integer register, then we can't do this. */
10147 if (unsorted_regs[i] < 0
10148 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10149 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
10150 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10151 || unsorted_regs[i] > 14)
10152 return 0;
10154 unsorted_offsets[i] = INTVAL (offset);
10155 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10156 order[0] = i;
10158 else
10159 /* Not a suitable memory address. */
10160 return 0;
10163 /* All the useful information has now been extracted from the
10164 operands into unsorted_regs and unsorted_offsets; additionally,
10165 order[0] has been set to the lowest offset in the list. Sort
10166 the offsets into order, verifying that they are adjacent, and
10167 check that the register numbers are ascending. */
10168 if (!compute_offset_order (nops, unsorted_offsets, order,
10169 check_regs ? unsorted_regs : NULL))
10170 return 0;
10172 if (saved_order)
10173 memcpy (saved_order, order, sizeof order);
10175 if (base)
10177 *base = base_reg;
10179 for (i = 0; i < nops; i++)
10181 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10182 if (reg_rtxs)
10183 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10186 *load_offset = unsorted_offsets[order[0]];
10189 if (TARGET_THUMB1
10190 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10191 return 0;
10193 if (unsorted_offsets[order[0]] == 0)
10194 stm_case = 1; /* stmia */
10195 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10196 stm_case = 2; /* stmib */
10197 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10198 stm_case = 3; /* stmda */
10199 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10200 stm_case = 4; /* stmdb */
10201 else
10202 return 0;
10204 if (!multiple_operation_profitable_p (false, nops, 0))
10205 return 0;
10207 return stm_case;
10210 /* Routines for use in generating RTL. */
10212 /* Generate a load-multiple instruction. COUNT is the number of loads in
10213 the instruction; REGS and MEMS are arrays containing the operands.
10214 BASEREG is the base register to be used in addressing the memory operands.
10215 WBACK_OFFSET is nonzero if the instruction should update the base
10216 register. */
10218 static rtx
10219 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10220 HOST_WIDE_INT wback_offset)
10222 int i = 0, j;
10223 rtx result;
10225 if (!multiple_operation_profitable_p (false, count, 0))
10227 rtx seq;
10229 start_sequence ();
10231 for (i = 0; i < count; i++)
10232 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10234 if (wback_offset != 0)
10235 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10237 seq = get_insns ();
10238 end_sequence ();
10240 return seq;
10243 result = gen_rtx_PARALLEL (VOIDmode,
10244 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10245 if (wback_offset != 0)
10247 XVECEXP (result, 0, 0)
10248 = gen_rtx_SET (VOIDmode, basereg,
10249 plus_constant (basereg, wback_offset));
10250 i = 1;
10251 count++;
10254 for (j = 0; i < count; i++, j++)
10255 XVECEXP (result, 0, i)
10256 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10258 return result;
10261 /* Generate a store-multiple instruction. COUNT is the number of stores in
10262 the instruction; REGS and MEMS are arrays containing the operands.
10263 BASEREG is the base register to be used in addressing the memory operands.
10264 WBACK_OFFSET is nonzero if the instruction should update the base
10265 register. */
10267 static rtx
10268 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10269 HOST_WIDE_INT wback_offset)
10271 int i = 0, j;
10272 rtx result;
10274 if (GET_CODE (basereg) == PLUS)
10275 basereg = XEXP (basereg, 0);
10277 if (!multiple_operation_profitable_p (false, count, 0))
10279 rtx seq;
10281 start_sequence ();
10283 for (i = 0; i < count; i++)
10284 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10286 if (wback_offset != 0)
10287 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10289 seq = get_insns ();
10290 end_sequence ();
10292 return seq;
10295 result = gen_rtx_PARALLEL (VOIDmode,
10296 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10297 if (wback_offset != 0)
10299 XVECEXP (result, 0, 0)
10300 = gen_rtx_SET (VOIDmode, basereg,
10301 plus_constant (basereg, wback_offset));
10302 i = 1;
10303 count++;
10306 for (j = 0; i < count; i++, j++)
10307 XVECEXP (result, 0, i)
10308 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10310 return result;
10313 /* Generate either a load-multiple or a store-multiple instruction. This
10314 function can be used in situations where we can start with a single MEM
10315 rtx and adjust its address upwards.
10316 COUNT is the number of operations in the instruction, not counting a
10317 possible update of the base register. REGS is an array containing the
10318 register operands.
10319 BASEREG is the base register to be used in addressing the memory operands,
10320 which are constructed from BASEMEM.
10321 WRITE_BACK specifies whether the generated instruction should include an
10322 update of the base register.
10323 OFFSETP is used to pass an offset to and from this function; this offset
10324 is not used when constructing the address (instead BASEMEM should have an
10325 appropriate offset in its address), it is used only for setting
10326 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10328 static rtx
10329 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10330 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10332 rtx mems[MAX_LDM_STM_OPS];
10333 HOST_WIDE_INT offset = *offsetp;
10334 int i;
10336 gcc_assert (count <= MAX_LDM_STM_OPS);
10338 if (GET_CODE (basereg) == PLUS)
10339 basereg = XEXP (basereg, 0);
10341 for (i = 0; i < count; i++)
10343 rtx addr = plus_constant (basereg, i * 4);
10344 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10345 offset += 4;
10348 if (write_back)
10349 *offsetp = offset;
10351 if (is_load)
10352 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10353 write_back ? 4 * count : 0);
10354 else
10355 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10356 write_back ? 4 * count : 0);
10360 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10361 rtx basemem, HOST_WIDE_INT *offsetp)
10363 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10364 offsetp);
10368 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10369 rtx basemem, HOST_WIDE_INT *offsetp)
10371 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10372 offsetp);
10375 /* Called from a peephole2 expander to turn a sequence of loads into an
10376 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10377 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10378 is true if we can reorder the registers because they are used commutatively
10379 subsequently.
10380 Returns true iff we could generate a new instruction. */
10382 bool
10383 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10385 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10386 rtx mems[MAX_LDM_STM_OPS];
10387 int i, j, base_reg;
10388 rtx base_reg_rtx;
10389 HOST_WIDE_INT offset;
10390 int write_back = FALSE;
10391 int ldm_case;
10392 rtx addr;
10394 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10395 &base_reg, &offset, !sort_regs);
10397 if (ldm_case == 0)
10398 return false;
10400 if (sort_regs)
10401 for (i = 0; i < nops - 1; i++)
10402 for (j = i + 1; j < nops; j++)
10403 if (regs[i] > regs[j])
10405 int t = regs[i];
10406 regs[i] = regs[j];
10407 regs[j] = t;
10409 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10411 if (TARGET_THUMB1)
10413 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10414 gcc_assert (ldm_case == 1 || ldm_case == 5);
10415 write_back = TRUE;
10418 if (ldm_case == 5)
10420 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10421 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10422 offset = 0;
10423 if (!TARGET_THUMB1)
10425 base_reg = regs[0];
10426 base_reg_rtx = newbase;
10430 for (i = 0; i < nops; i++)
10432 addr = plus_constant (base_reg_rtx, offset + i * 4);
10433 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10434 SImode, addr, 0);
10436 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10437 write_back ? offset + i * 4 : 0));
10438 return true;
10441 /* Called from a peephole2 expander to turn a sequence of stores into an
10442 STM instruction. OPERANDS are the operands found by the peephole matcher;
10443 NOPS indicates how many separate stores we are trying to combine.
10444 Returns true iff we could generate a new instruction. */
10446 bool
10447 gen_stm_seq (rtx *operands, int nops)
10449 int i;
10450 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10451 rtx mems[MAX_LDM_STM_OPS];
10452 int base_reg;
10453 rtx base_reg_rtx;
10454 HOST_WIDE_INT offset;
10455 int write_back = FALSE;
10456 int stm_case;
10457 rtx addr;
10458 bool base_reg_dies;
10460 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10461 mem_order, &base_reg, &offset, true);
10463 if (stm_case == 0)
10464 return false;
10466 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10468 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10469 if (TARGET_THUMB1)
10471 gcc_assert (base_reg_dies);
10472 write_back = TRUE;
10475 if (stm_case == 5)
10477 gcc_assert (base_reg_dies);
10478 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10479 offset = 0;
10482 addr = plus_constant (base_reg_rtx, offset);
10484 for (i = 0; i < nops; i++)
10486 addr = plus_constant (base_reg_rtx, offset + i * 4);
10487 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10488 SImode, addr, 0);
10490 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10491 write_back ? offset + i * 4 : 0));
10492 return true;
10495 /* Called from a peephole2 expander to turn a sequence of stores that are
10496 preceded by constant loads into an STM instruction. OPERANDS are the
10497 operands found by the peephole matcher; NOPS indicates how many
10498 separate stores we are trying to combine; there are 2 * NOPS
10499 instructions in the peephole.
10500 Returns true iff we could generate a new instruction. */
10502 bool
10503 gen_const_stm_seq (rtx *operands, int nops)
10505 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10506 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10507 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10508 rtx mems[MAX_LDM_STM_OPS];
10509 int base_reg;
10510 rtx base_reg_rtx;
10511 HOST_WIDE_INT offset;
10512 int write_back = FALSE;
10513 int stm_case;
10514 rtx addr;
10515 bool base_reg_dies;
10516 int i, j;
10517 HARD_REG_SET allocated;
10519 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10520 mem_order, &base_reg, &offset, false);
10522 if (stm_case == 0)
10523 return false;
10525 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10527 /* If the same register is used more than once, try to find a free
10528 register. */
10529 CLEAR_HARD_REG_SET (allocated);
10530 for (i = 0; i < nops; i++)
10532 for (j = i + 1; j < nops; j++)
10533 if (regs[i] == regs[j])
10535 rtx t = peep2_find_free_register (0, nops * 2,
10536 TARGET_THUMB1 ? "l" : "r",
10537 SImode, &allocated);
10538 if (t == NULL_RTX)
10539 return false;
10540 reg_rtxs[i] = t;
10541 regs[i] = REGNO (t);
10545 /* Compute an ordering that maps the register numbers to an ascending
10546 sequence. */
10547 reg_order[0] = 0;
10548 for (i = 0; i < nops; i++)
10549 if (regs[i] < regs[reg_order[0]])
10550 reg_order[0] = i;
10552 for (i = 1; i < nops; i++)
10554 int this_order = reg_order[i - 1];
10555 for (j = 0; j < nops; j++)
10556 if (regs[j] > regs[reg_order[i - 1]]
10557 && (this_order == reg_order[i - 1]
10558 || regs[j] < regs[this_order]))
10559 this_order = j;
10560 reg_order[i] = this_order;
10563 /* Ensure that registers that must be live after the instruction end
10564 up with the correct value. */
10565 for (i = 0; i < nops; i++)
10567 int this_order = reg_order[i];
10568 if ((this_order != mem_order[i]
10569 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10570 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10571 return false;
10574 /* Load the constants. */
10575 for (i = 0; i < nops; i++)
10577 rtx op = operands[2 * nops + mem_order[i]];
10578 sorted_regs[i] = regs[reg_order[i]];
10579 emit_move_insn (reg_rtxs[reg_order[i]], op);
10582 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10584 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10585 if (TARGET_THUMB1)
10587 gcc_assert (base_reg_dies);
10588 write_back = TRUE;
10591 if (stm_case == 5)
10593 gcc_assert (base_reg_dies);
10594 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10595 offset = 0;
10598 addr = plus_constant (base_reg_rtx, offset);
10600 for (i = 0; i < nops; i++)
10602 addr = plus_constant (base_reg_rtx, offset + i * 4);
10603 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10604 SImode, addr, 0);
10606 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10607 write_back ? offset + i * 4 : 0));
10608 return true;
10612 arm_gen_movmemqi (rtx *operands)
10614 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10615 HOST_WIDE_INT srcoffset, dstoffset;
10616 int i;
10617 rtx src, dst, srcbase, dstbase;
10618 rtx part_bytes_reg = NULL;
10619 rtx mem;
10621 if (GET_CODE (operands[2]) != CONST_INT
10622 || GET_CODE (operands[3]) != CONST_INT
10623 || INTVAL (operands[2]) > 64
10624 || INTVAL (operands[3]) & 3)
10625 return 0;
10627 dstbase = operands[0];
10628 srcbase = operands[1];
10630 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10631 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10633 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10634 out_words_to_go = INTVAL (operands[2]) / 4;
10635 last_bytes = INTVAL (operands[2]) & 3;
10636 dstoffset = srcoffset = 0;
10638 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10639 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10641 for (i = 0; in_words_to_go >= 2; i+=4)
10643 if (in_words_to_go > 4)
10644 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10645 TRUE, srcbase, &srcoffset));
10646 else
10647 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10648 src, FALSE, srcbase,
10649 &srcoffset));
10651 if (out_words_to_go)
10653 if (out_words_to_go > 4)
10654 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10655 TRUE, dstbase, &dstoffset));
10656 else if (out_words_to_go != 1)
10657 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10658 out_words_to_go, dst,
10659 (last_bytes == 0
10660 ? FALSE : TRUE),
10661 dstbase, &dstoffset));
10662 else
10664 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10665 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10666 if (last_bytes != 0)
10668 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10669 dstoffset += 4;
10674 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10675 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10678 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10679 if (out_words_to_go)
10681 rtx sreg;
10683 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10684 sreg = copy_to_reg (mem);
10686 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10687 emit_move_insn (mem, sreg);
10688 in_words_to_go--;
10690 gcc_assert (!in_words_to_go); /* Sanity check */
10693 if (in_words_to_go)
10695 gcc_assert (in_words_to_go > 0);
10697 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10698 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10701 gcc_assert (!last_bytes || part_bytes_reg);
10703 if (BYTES_BIG_ENDIAN && last_bytes)
10705 rtx tmp = gen_reg_rtx (SImode);
10707 /* The bytes we want are in the top end of the word. */
10708 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10709 GEN_INT (8 * (4 - last_bytes))));
10710 part_bytes_reg = tmp;
10712 while (last_bytes)
10714 mem = adjust_automodify_address (dstbase, QImode,
10715 plus_constant (dst, last_bytes - 1),
10716 dstoffset + last_bytes - 1);
10717 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10719 if (--last_bytes)
10721 tmp = gen_reg_rtx (SImode);
10722 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10723 part_bytes_reg = tmp;
10728 else
10730 if (last_bytes > 1)
10732 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10733 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10734 last_bytes -= 2;
10735 if (last_bytes)
10737 rtx tmp = gen_reg_rtx (SImode);
10738 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10739 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10740 part_bytes_reg = tmp;
10741 dstoffset += 2;
10745 if (last_bytes)
10747 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10748 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10752 return 1;
10755 /* Select a dominance comparison mode if possible for a test of the general
10756 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10757 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10758 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10759 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10760 In all cases OP will be either EQ or NE, but we don't need to know which
10761 here. If we are unable to support a dominance comparison we return
10762 CC mode. This will then fail to match for the RTL expressions that
10763 generate this call. */
10764 enum machine_mode
10765 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10767 enum rtx_code cond1, cond2;
10768 int swapped = 0;
10770 /* Currently we will probably get the wrong result if the individual
10771 comparisons are not simple. This also ensures that it is safe to
10772 reverse a comparison if necessary. */
10773 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10774 != CCmode)
10775 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10776 != CCmode))
10777 return CCmode;
10779 /* The if_then_else variant of this tests the second condition if the
10780 first passes, but is true if the first fails. Reverse the first
10781 condition to get a true "inclusive-or" expression. */
10782 if (cond_or == DOM_CC_NX_OR_Y)
10783 cond1 = reverse_condition (cond1);
10785 /* If the comparisons are not equal, and one doesn't dominate the other,
10786 then we can't do this. */
10787 if (cond1 != cond2
10788 && !comparison_dominates_p (cond1, cond2)
10789 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10790 return CCmode;
10792 if (swapped)
10794 enum rtx_code temp = cond1;
10795 cond1 = cond2;
10796 cond2 = temp;
10799 switch (cond1)
10801 case EQ:
10802 if (cond_or == DOM_CC_X_AND_Y)
10803 return CC_DEQmode;
10805 switch (cond2)
10807 case EQ: return CC_DEQmode;
10808 case LE: return CC_DLEmode;
10809 case LEU: return CC_DLEUmode;
10810 case GE: return CC_DGEmode;
10811 case GEU: return CC_DGEUmode;
10812 default: gcc_unreachable ();
10815 case LT:
10816 if (cond_or == DOM_CC_X_AND_Y)
10817 return CC_DLTmode;
10819 switch (cond2)
10821 case LT:
10822 return CC_DLTmode;
10823 case LE:
10824 return CC_DLEmode;
10825 case NE:
10826 return CC_DNEmode;
10827 default:
10828 gcc_unreachable ();
10831 case GT:
10832 if (cond_or == DOM_CC_X_AND_Y)
10833 return CC_DGTmode;
10835 switch (cond2)
10837 case GT:
10838 return CC_DGTmode;
10839 case GE:
10840 return CC_DGEmode;
10841 case NE:
10842 return CC_DNEmode;
10843 default:
10844 gcc_unreachable ();
10847 case LTU:
10848 if (cond_or == DOM_CC_X_AND_Y)
10849 return CC_DLTUmode;
10851 switch (cond2)
10853 case LTU:
10854 return CC_DLTUmode;
10855 case LEU:
10856 return CC_DLEUmode;
10857 case NE:
10858 return CC_DNEmode;
10859 default:
10860 gcc_unreachable ();
10863 case GTU:
10864 if (cond_or == DOM_CC_X_AND_Y)
10865 return CC_DGTUmode;
10867 switch (cond2)
10869 case GTU:
10870 return CC_DGTUmode;
10871 case GEU:
10872 return CC_DGEUmode;
10873 case NE:
10874 return CC_DNEmode;
10875 default:
10876 gcc_unreachable ();
10879 /* The remaining cases only occur when both comparisons are the
10880 same. */
10881 case NE:
10882 gcc_assert (cond1 == cond2);
10883 return CC_DNEmode;
10885 case LE:
10886 gcc_assert (cond1 == cond2);
10887 return CC_DLEmode;
10889 case GE:
10890 gcc_assert (cond1 == cond2);
10891 return CC_DGEmode;
10893 case LEU:
10894 gcc_assert (cond1 == cond2);
10895 return CC_DLEUmode;
10897 case GEU:
10898 gcc_assert (cond1 == cond2);
10899 return CC_DGEUmode;
10901 default:
10902 gcc_unreachable ();
10906 enum machine_mode
10907 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10909 /* All floating point compares return CCFP if it is an equality
10910 comparison, and CCFPE otherwise. */
10911 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10913 switch (op)
10915 case EQ:
10916 case NE:
10917 case UNORDERED:
10918 case ORDERED:
10919 case UNLT:
10920 case UNLE:
10921 case UNGT:
10922 case UNGE:
10923 case UNEQ:
10924 case LTGT:
10925 return CCFPmode;
10927 case LT:
10928 case LE:
10929 case GT:
10930 case GE:
10931 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10932 return CCFPmode;
10933 return CCFPEmode;
10935 default:
10936 gcc_unreachable ();
10940 /* A compare with a shifted operand. Because of canonicalization, the
10941 comparison will have to be swapped when we emit the assembler. */
10942 if (GET_MODE (y) == SImode
10943 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10944 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10945 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10946 || GET_CODE (x) == ROTATERT))
10947 return CC_SWPmode;
10949 /* This operation is performed swapped, but since we only rely on the Z
10950 flag we don't need an additional mode. */
10951 if (GET_MODE (y) == SImode
10952 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10953 && GET_CODE (x) == NEG
10954 && (op == EQ || op == NE))
10955 return CC_Zmode;
10957 /* This is a special case that is used by combine to allow a
10958 comparison of a shifted byte load to be split into a zero-extend
10959 followed by a comparison of the shifted integer (only valid for
10960 equalities and unsigned inequalities). */
10961 if (GET_MODE (x) == SImode
10962 && GET_CODE (x) == ASHIFT
10963 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10964 && GET_CODE (XEXP (x, 0)) == SUBREG
10965 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10966 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10967 && (op == EQ || op == NE
10968 || op == GEU || op == GTU || op == LTU || op == LEU)
10969 && GET_CODE (y) == CONST_INT)
10970 return CC_Zmode;
10972 /* A construct for a conditional compare, if the false arm contains
10973 0, then both conditions must be true, otherwise either condition
10974 must be true. Not all conditions are possible, so CCmode is
10975 returned if it can't be done. */
10976 if (GET_CODE (x) == IF_THEN_ELSE
10977 && (XEXP (x, 2) == const0_rtx
10978 || XEXP (x, 2) == const1_rtx)
10979 && COMPARISON_P (XEXP (x, 0))
10980 && COMPARISON_P (XEXP (x, 1)))
10981 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10982 INTVAL (XEXP (x, 2)));
10984 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10985 if (GET_CODE (x) == AND
10986 && (op == EQ || op == NE)
10987 && COMPARISON_P (XEXP (x, 0))
10988 && COMPARISON_P (XEXP (x, 1)))
10989 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10990 DOM_CC_X_AND_Y);
10992 if (GET_CODE (x) == IOR
10993 && (op == EQ || op == NE)
10994 && COMPARISON_P (XEXP (x, 0))
10995 && COMPARISON_P (XEXP (x, 1)))
10996 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10997 DOM_CC_X_OR_Y);
10999 /* An operation (on Thumb) where we want to test for a single bit.
11000 This is done by shifting that bit up into the top bit of a
11001 scratch register; we can then branch on the sign bit. */
11002 if (TARGET_THUMB1
11003 && GET_MODE (x) == SImode
11004 && (op == EQ || op == NE)
11005 && GET_CODE (x) == ZERO_EXTRACT
11006 && XEXP (x, 1) == const1_rtx)
11007 return CC_Nmode;
11009 /* An operation that sets the condition codes as a side-effect, the
11010 V flag is not set correctly, so we can only use comparisons where
11011 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11012 instead.) */
11013 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11014 if (GET_MODE (x) == SImode
11015 && y == const0_rtx
11016 && (op == EQ || op == NE || op == LT || op == GE)
11017 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11018 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11019 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11020 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11021 || GET_CODE (x) == LSHIFTRT
11022 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11023 || GET_CODE (x) == ROTATERT
11024 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11025 return CC_NOOVmode;
11027 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11028 return CC_Zmode;
11030 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11031 && GET_CODE (x) == PLUS
11032 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11033 return CC_Cmode;
11035 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11037 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11038 available. */
11039 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11040 return CCmode;
11042 switch (op)
11044 case EQ:
11045 case NE:
11046 /* A DImode comparison against zero can be implemented by
11047 or'ing the two halves together. */
11048 if (y == const0_rtx)
11049 return CC_Zmode;
11051 /* We can do an equality test in three Thumb instructions. */
11052 if (!TARGET_ARM)
11053 return CC_Zmode;
11055 /* FALLTHROUGH */
11057 case LTU:
11058 case LEU:
11059 case GTU:
11060 case GEU:
11061 /* DImode unsigned comparisons can be implemented by cmp +
11062 cmpeq without a scratch register. Not worth doing in
11063 Thumb-2. */
11064 if (TARGET_ARM)
11065 return CC_CZmode;
11067 /* FALLTHROUGH */
11069 case LT:
11070 case LE:
11071 case GT:
11072 case GE:
11073 /* DImode signed and unsigned comparisons can be implemented
11074 by cmp + sbcs with a scratch register, but that does not
11075 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11076 gcc_assert (op != EQ && op != NE);
11077 return CC_NCVmode;
11079 default:
11080 gcc_unreachable ();
11084 return CCmode;
11087 /* X and Y are two things to compare using CODE. Emit the compare insn and
11088 return the rtx for register 0 in the proper mode. FP means this is a
11089 floating point compare: I don't think that it is needed on the arm. */
11091 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
11093 enum machine_mode mode;
11094 rtx cc_reg;
11095 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11097 /* We might have X as a constant, Y as a register because of the predicates
11098 used for cmpdi. If so, force X to a register here. */
11099 if (dimode_comparison && !REG_P (x))
11100 x = force_reg (DImode, x);
11102 mode = SELECT_CC_MODE (code, x, y);
11103 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11105 if (dimode_comparison
11106 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11107 && mode != CC_CZmode)
11109 rtx clobber, set;
11111 /* To compare two non-zero values for equality, XOR them and
11112 then compare against zero. Not used for ARM mode; there
11113 CC_CZmode is cheaper. */
11114 if (mode == CC_Zmode && y != const0_rtx)
11116 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11117 y = const0_rtx;
11119 /* A scratch register is required. */
11120 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
11121 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11122 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11124 else
11125 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11127 return cc_reg;
11130 /* Generate a sequence of insns that will generate the correct return
11131 address mask depending on the physical architecture that the program
11132 is running on. */
11134 arm_gen_return_addr_mask (void)
11136 rtx reg = gen_reg_rtx (Pmode);
11138 emit_insn (gen_return_addr_mask (reg));
11139 return reg;
11142 void
11143 arm_reload_in_hi (rtx *operands)
11145 rtx ref = operands[1];
11146 rtx base, scratch;
11147 HOST_WIDE_INT offset = 0;
11149 if (GET_CODE (ref) == SUBREG)
11151 offset = SUBREG_BYTE (ref);
11152 ref = SUBREG_REG (ref);
11155 if (GET_CODE (ref) == REG)
11157 /* We have a pseudo which has been spilt onto the stack; there
11158 are two cases here: the first where there is a simple
11159 stack-slot replacement and a second where the stack-slot is
11160 out of range, or is used as a subreg. */
11161 if (reg_equiv_mem (REGNO (ref)))
11163 ref = reg_equiv_mem (REGNO (ref));
11164 base = find_replacement (&XEXP (ref, 0));
11166 else
11167 /* The slot is out of range, or was dressed up in a SUBREG. */
11168 base = reg_equiv_address (REGNO (ref));
11170 else
11171 base = find_replacement (&XEXP (ref, 0));
11173 /* Handle the case where the address is too complex to be offset by 1. */
11174 if (GET_CODE (base) == MINUS
11175 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11177 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11179 emit_set_insn (base_plus, base);
11180 base = base_plus;
11182 else if (GET_CODE (base) == PLUS)
11184 /* The addend must be CONST_INT, or we would have dealt with it above. */
11185 HOST_WIDE_INT hi, lo;
11187 offset += INTVAL (XEXP (base, 1));
11188 base = XEXP (base, 0);
11190 /* Rework the address into a legal sequence of insns. */
11191 /* Valid range for lo is -4095 -> 4095 */
11192 lo = (offset >= 0
11193 ? (offset & 0xfff)
11194 : -((-offset) & 0xfff));
11196 /* Corner case, if lo is the max offset then we would be out of range
11197 once we have added the additional 1 below, so bump the msb into the
11198 pre-loading insn(s). */
11199 if (lo == 4095)
11200 lo &= 0x7ff;
11202 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11203 ^ (HOST_WIDE_INT) 0x80000000)
11204 - (HOST_WIDE_INT) 0x80000000);
11206 gcc_assert (hi + lo == offset);
11208 if (hi != 0)
11210 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11212 /* Get the base address; addsi3 knows how to handle constants
11213 that require more than one insn. */
11214 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11215 base = base_plus;
11216 offset = lo;
11220 /* Operands[2] may overlap operands[0] (though it won't overlap
11221 operands[1]), that's why we asked for a DImode reg -- so we can
11222 use the bit that does not overlap. */
11223 if (REGNO (operands[2]) == REGNO (operands[0]))
11224 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11225 else
11226 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11228 emit_insn (gen_zero_extendqisi2 (scratch,
11229 gen_rtx_MEM (QImode,
11230 plus_constant (base,
11231 offset))));
11232 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11233 gen_rtx_MEM (QImode,
11234 plus_constant (base,
11235 offset + 1))));
11236 if (!BYTES_BIG_ENDIAN)
11237 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11238 gen_rtx_IOR (SImode,
11239 gen_rtx_ASHIFT
11240 (SImode,
11241 gen_rtx_SUBREG (SImode, operands[0], 0),
11242 GEN_INT (8)),
11243 scratch));
11244 else
11245 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11246 gen_rtx_IOR (SImode,
11247 gen_rtx_ASHIFT (SImode, scratch,
11248 GEN_INT (8)),
11249 gen_rtx_SUBREG (SImode, operands[0], 0)));
11252 /* Handle storing a half-word to memory during reload by synthesizing as two
11253 byte stores. Take care not to clobber the input values until after we
11254 have moved them somewhere safe. This code assumes that if the DImode
11255 scratch in operands[2] overlaps either the input value or output address
11256 in some way, then that value must die in this insn (we absolutely need
11257 two scratch registers for some corner cases). */
11258 void
11259 arm_reload_out_hi (rtx *operands)
11261 rtx ref = operands[0];
11262 rtx outval = operands[1];
11263 rtx base, scratch;
11264 HOST_WIDE_INT offset = 0;
11266 if (GET_CODE (ref) == SUBREG)
11268 offset = SUBREG_BYTE (ref);
11269 ref = SUBREG_REG (ref);
11272 if (GET_CODE (ref) == REG)
11274 /* We have a pseudo which has been spilt onto the stack; there
11275 are two cases here: the first where there is a simple
11276 stack-slot replacement and a second where the stack-slot is
11277 out of range, or is used as a subreg. */
11278 if (reg_equiv_mem (REGNO (ref)))
11280 ref = reg_equiv_mem (REGNO (ref));
11281 base = find_replacement (&XEXP (ref, 0));
11283 else
11284 /* The slot is out of range, or was dressed up in a SUBREG. */
11285 base = reg_equiv_address (REGNO (ref));
11287 else
11288 base = find_replacement (&XEXP (ref, 0));
11290 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11292 /* Handle the case where the address is too complex to be offset by 1. */
11293 if (GET_CODE (base) == MINUS
11294 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11296 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11298 /* Be careful not to destroy OUTVAL. */
11299 if (reg_overlap_mentioned_p (base_plus, outval))
11301 /* Updating base_plus might destroy outval, see if we can
11302 swap the scratch and base_plus. */
11303 if (!reg_overlap_mentioned_p (scratch, outval))
11305 rtx tmp = scratch;
11306 scratch = base_plus;
11307 base_plus = tmp;
11309 else
11311 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11313 /* Be conservative and copy OUTVAL into the scratch now,
11314 this should only be necessary if outval is a subreg
11315 of something larger than a word. */
11316 /* XXX Might this clobber base? I can't see how it can,
11317 since scratch is known to overlap with OUTVAL, and
11318 must be wider than a word. */
11319 emit_insn (gen_movhi (scratch_hi, outval));
11320 outval = scratch_hi;
11324 emit_set_insn (base_plus, base);
11325 base = base_plus;
11327 else if (GET_CODE (base) == PLUS)
11329 /* The addend must be CONST_INT, or we would have dealt with it above. */
11330 HOST_WIDE_INT hi, lo;
11332 offset += INTVAL (XEXP (base, 1));
11333 base = XEXP (base, 0);
11335 /* Rework the address into a legal sequence of insns. */
11336 /* Valid range for lo is -4095 -> 4095 */
11337 lo = (offset >= 0
11338 ? (offset & 0xfff)
11339 : -((-offset) & 0xfff));
11341 /* Corner case, if lo is the max offset then we would be out of range
11342 once we have added the additional 1 below, so bump the msb into the
11343 pre-loading insn(s). */
11344 if (lo == 4095)
11345 lo &= 0x7ff;
11347 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11348 ^ (HOST_WIDE_INT) 0x80000000)
11349 - (HOST_WIDE_INT) 0x80000000);
11351 gcc_assert (hi + lo == offset);
11353 if (hi != 0)
11355 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11357 /* Be careful not to destroy OUTVAL. */
11358 if (reg_overlap_mentioned_p (base_plus, outval))
11360 /* Updating base_plus might destroy outval, see if we
11361 can swap the scratch and base_plus. */
11362 if (!reg_overlap_mentioned_p (scratch, outval))
11364 rtx tmp = scratch;
11365 scratch = base_plus;
11366 base_plus = tmp;
11368 else
11370 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11372 /* Be conservative and copy outval into scratch now,
11373 this should only be necessary if outval is a
11374 subreg of something larger than a word. */
11375 /* XXX Might this clobber base? I can't see how it
11376 can, since scratch is known to overlap with
11377 outval. */
11378 emit_insn (gen_movhi (scratch_hi, outval));
11379 outval = scratch_hi;
11383 /* Get the base address; addsi3 knows how to handle constants
11384 that require more than one insn. */
11385 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11386 base = base_plus;
11387 offset = lo;
11391 if (BYTES_BIG_ENDIAN)
11393 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11394 plus_constant (base, offset + 1)),
11395 gen_lowpart (QImode, outval)));
11396 emit_insn (gen_lshrsi3 (scratch,
11397 gen_rtx_SUBREG (SImode, outval, 0),
11398 GEN_INT (8)));
11399 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11400 gen_lowpart (QImode, scratch)));
11402 else
11404 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11405 gen_lowpart (QImode, outval)));
11406 emit_insn (gen_lshrsi3 (scratch,
11407 gen_rtx_SUBREG (SImode, outval, 0),
11408 GEN_INT (8)));
11409 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11410 plus_constant (base, offset + 1)),
11411 gen_lowpart (QImode, scratch)));
11415 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11416 (padded to the size of a word) should be passed in a register. */
11418 static bool
11419 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11421 if (TARGET_AAPCS_BASED)
11422 return must_pass_in_stack_var_size (mode, type);
11423 else
11424 return must_pass_in_stack_var_size_or_pad (mode, type);
11428 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11429 Return true if an argument passed on the stack should be padded upwards,
11430 i.e. if the least-significant byte has useful data.
11431 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11432 aggregate types are placed in the lowest memory address. */
11434 bool
11435 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11437 if (!TARGET_AAPCS_BASED)
11438 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11440 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11441 return false;
11443 /* Half-float values are only passed to libcalls, not regular functions.
11444 They should be passed and returned as "short"s (see RTABI). To achieve
11445 that effect in big-endian mode, pad downwards so the value is passed in
11446 the least-significant end of the register. ??? This needs to be here
11447 rather than in arm_pad_reg_upward due to peculiarity in the handling of
11448 libcall arguments. */
11449 if (BYTES_BIG_ENDIAN && mode == HFmode)
11450 return false;
11452 return true;
11456 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11457 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11458 byte of the register has useful data, and return the opposite if the
11459 most significant byte does.
11460 For AAPCS, small aggregates and small complex types are always padded
11461 upwards. */
11463 bool
11464 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11465 tree type, int first ATTRIBUTE_UNUSED)
11467 if (TARGET_AAPCS_BASED
11468 && BYTES_BIG_ENDIAN
11469 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE
11470 || FIXED_POINT_TYPE_P (type))
11471 && int_size_in_bytes (type) <= 4)
11472 return true;
11474 /* Otherwise, use default padding. */
11475 return !BYTES_BIG_ENDIAN;
11479 /* Print a symbolic form of X to the debug file, F. */
11480 static void
11481 arm_print_value (FILE *f, rtx x)
11483 switch (GET_CODE (x))
11485 case CONST_INT:
11486 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11487 return;
11489 case CONST_DOUBLE:
11490 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11491 return;
11493 case CONST_VECTOR:
11495 int i;
11497 fprintf (f, "<");
11498 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11500 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11501 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11502 fputc (',', f);
11504 fprintf (f, ">");
11506 return;
11508 case CONST_STRING:
11509 fprintf (f, "\"%s\"", XSTR (x, 0));
11510 return;
11512 case SYMBOL_REF:
11513 fprintf (f, "`%s'", XSTR (x, 0));
11514 return;
11516 case LABEL_REF:
11517 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11518 return;
11520 case CONST:
11521 arm_print_value (f, XEXP (x, 0));
11522 return;
11524 case PLUS:
11525 arm_print_value (f, XEXP (x, 0));
11526 fprintf (f, "+");
11527 arm_print_value (f, XEXP (x, 1));
11528 return;
11530 case PC:
11531 fprintf (f, "pc");
11532 return;
11534 default:
11535 fprintf (f, "????");
11536 return;
11540 /* Routines for manipulation of the constant pool. */
11542 /* Arm instructions cannot load a large constant directly into a
11543 register; they have to come from a pc relative load. The constant
11544 must therefore be placed in the addressable range of the pc
11545 relative load. Depending on the precise pc relative load
11546 instruction the range is somewhere between 256 bytes and 4k. This
11547 means that we often have to dump a constant inside a function, and
11548 generate code to branch around it.
11550 It is important to minimize this, since the branches will slow
11551 things down and make the code larger.
11553 Normally we can hide the table after an existing unconditional
11554 branch so that there is no interruption of the flow, but in the
11555 worst case the code looks like this:
11557 ldr rn, L1
11559 b L2
11560 align
11561 L1: .long value
11565 ldr rn, L3
11567 b L4
11568 align
11569 L3: .long value
11573 We fix this by performing a scan after scheduling, which notices
11574 which instructions need to have their operands fetched from the
11575 constant table and builds the table.
11577 The algorithm starts by building a table of all the constants that
11578 need fixing up and all the natural barriers in the function (places
11579 where a constant table can be dropped without breaking the flow).
11580 For each fixup we note how far the pc-relative replacement will be
11581 able to reach and the offset of the instruction into the function.
11583 Having built the table we then group the fixes together to form
11584 tables that are as large as possible (subject to addressing
11585 constraints) and emit each table of constants after the last
11586 barrier that is within range of all the instructions in the group.
11587 If a group does not contain a barrier, then we forcibly create one
11588 by inserting a jump instruction into the flow. Once the table has
11589 been inserted, the insns are then modified to reference the
11590 relevant entry in the pool.
11592 Possible enhancements to the algorithm (not implemented) are:
11594 1) For some processors and object formats, there may be benefit in
11595 aligning the pools to the start of cache lines; this alignment
11596 would need to be taken into account when calculating addressability
11597 of a pool. */
11599 /* These typedefs are located at the start of this file, so that
11600 they can be used in the prototypes there. This comment is to
11601 remind readers of that fact so that the following structures
11602 can be understood more easily.
11604 typedef struct minipool_node Mnode;
11605 typedef struct minipool_fixup Mfix; */
11607 struct minipool_node
11609 /* Doubly linked chain of entries. */
11610 Mnode * next;
11611 Mnode * prev;
11612 /* The maximum offset into the code that this entry can be placed. While
11613 pushing fixes for forward references, all entries are sorted in order
11614 of increasing max_address. */
11615 HOST_WIDE_INT max_address;
11616 /* Similarly for an entry inserted for a backwards ref. */
11617 HOST_WIDE_INT min_address;
11618 /* The number of fixes referencing this entry. This can become zero
11619 if we "unpush" an entry. In this case we ignore the entry when we
11620 come to emit the code. */
11621 int refcount;
11622 /* The offset from the start of the minipool. */
11623 HOST_WIDE_INT offset;
11624 /* The value in table. */
11625 rtx value;
11626 /* The mode of value. */
11627 enum machine_mode mode;
11628 /* The size of the value. With iWMMXt enabled
11629 sizes > 4 also imply an alignment of 8-bytes. */
11630 int fix_size;
11633 struct minipool_fixup
11635 Mfix * next;
11636 rtx insn;
11637 HOST_WIDE_INT address;
11638 rtx * loc;
11639 enum machine_mode mode;
11640 int fix_size;
11641 rtx value;
11642 Mnode * minipool;
11643 HOST_WIDE_INT forwards;
11644 HOST_WIDE_INT backwards;
11647 /* Fixes less than a word need padding out to a word boundary. */
11648 #define MINIPOOL_FIX_SIZE(mode) \
11649 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11651 static Mnode * minipool_vector_head;
11652 static Mnode * minipool_vector_tail;
11653 static rtx minipool_vector_label;
11654 static int minipool_pad;
11656 /* The linked list of all minipool fixes required for this function. */
11657 Mfix * minipool_fix_head;
11658 Mfix * minipool_fix_tail;
11659 /* The fix entry for the current minipool, once it has been placed. */
11660 Mfix * minipool_barrier;
11662 /* Determines if INSN is the start of a jump table. Returns the end
11663 of the TABLE or NULL_RTX. */
11664 static rtx
11665 is_jump_table (rtx insn)
11667 rtx table;
11669 if (jump_to_label_p (insn)
11670 && ((table = next_real_insn (JUMP_LABEL (insn)))
11671 == next_real_insn (insn))
11672 && table != NULL
11673 && GET_CODE (table) == JUMP_INSN
11674 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11675 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11676 return table;
11678 return NULL_RTX;
11681 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11682 #define JUMP_TABLES_IN_TEXT_SECTION 0
11683 #endif
11685 static HOST_WIDE_INT
11686 get_jump_table_size (rtx insn)
11688 /* ADDR_VECs only take room if read-only data does into the text
11689 section. */
11690 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11692 rtx body = PATTERN (insn);
11693 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11694 HOST_WIDE_INT size;
11695 HOST_WIDE_INT modesize;
11697 modesize = GET_MODE_SIZE (GET_MODE (body));
11698 size = modesize * XVECLEN (body, elt);
11699 switch (modesize)
11701 case 1:
11702 /* Round up size of TBB table to a halfword boundary. */
11703 size = (size + 1) & ~(HOST_WIDE_INT)1;
11704 break;
11705 case 2:
11706 /* No padding necessary for TBH. */
11707 break;
11708 case 4:
11709 /* Add two bytes for alignment on Thumb. */
11710 if (TARGET_THUMB)
11711 size += 2;
11712 break;
11713 default:
11714 gcc_unreachable ();
11716 return size;
11719 return 0;
11722 /* Move a minipool fix MP from its current location to before MAX_MP.
11723 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11724 constraints may need updating. */
11725 static Mnode *
11726 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11727 HOST_WIDE_INT max_address)
11729 /* The code below assumes these are different. */
11730 gcc_assert (mp != max_mp);
11732 if (max_mp == NULL)
11734 if (max_address < mp->max_address)
11735 mp->max_address = max_address;
11737 else
11739 if (max_address > max_mp->max_address - mp->fix_size)
11740 mp->max_address = max_mp->max_address - mp->fix_size;
11741 else
11742 mp->max_address = max_address;
11744 /* Unlink MP from its current position. Since max_mp is non-null,
11745 mp->prev must be non-null. */
11746 mp->prev->next = mp->next;
11747 if (mp->next != NULL)
11748 mp->next->prev = mp->prev;
11749 else
11750 minipool_vector_tail = mp->prev;
11752 /* Re-insert it before MAX_MP. */
11753 mp->next = max_mp;
11754 mp->prev = max_mp->prev;
11755 max_mp->prev = mp;
11757 if (mp->prev != NULL)
11758 mp->prev->next = mp;
11759 else
11760 minipool_vector_head = mp;
11763 /* Save the new entry. */
11764 max_mp = mp;
11766 /* Scan over the preceding entries and adjust their addresses as
11767 required. */
11768 while (mp->prev != NULL
11769 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11771 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11772 mp = mp->prev;
11775 return max_mp;
11778 /* Add a constant to the minipool for a forward reference. Returns the
11779 node added or NULL if the constant will not fit in this pool. */
11780 static Mnode *
11781 add_minipool_forward_ref (Mfix *fix)
11783 /* If set, max_mp is the first pool_entry that has a lower
11784 constraint than the one we are trying to add. */
11785 Mnode * max_mp = NULL;
11786 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11787 Mnode * mp;
11789 /* If the minipool starts before the end of FIX->INSN then this FIX
11790 can not be placed into the current pool. Furthermore, adding the
11791 new constant pool entry may cause the pool to start FIX_SIZE bytes
11792 earlier. */
11793 if (minipool_vector_head &&
11794 (fix->address + get_attr_length (fix->insn)
11795 >= minipool_vector_head->max_address - fix->fix_size))
11796 return NULL;
11798 /* Scan the pool to see if a constant with the same value has
11799 already been added. While we are doing this, also note the
11800 location where we must insert the constant if it doesn't already
11801 exist. */
11802 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11804 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11805 && fix->mode == mp->mode
11806 && (GET_CODE (fix->value) != CODE_LABEL
11807 || (CODE_LABEL_NUMBER (fix->value)
11808 == CODE_LABEL_NUMBER (mp->value)))
11809 && rtx_equal_p (fix->value, mp->value))
11811 /* More than one fix references this entry. */
11812 mp->refcount++;
11813 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11816 /* Note the insertion point if necessary. */
11817 if (max_mp == NULL
11818 && mp->max_address > max_address)
11819 max_mp = mp;
11821 /* If we are inserting an 8-bytes aligned quantity and
11822 we have not already found an insertion point, then
11823 make sure that all such 8-byte aligned quantities are
11824 placed at the start of the pool. */
11825 if (ARM_DOUBLEWORD_ALIGN
11826 && max_mp == NULL
11827 && fix->fix_size >= 8
11828 && mp->fix_size < 8)
11830 max_mp = mp;
11831 max_address = mp->max_address;
11835 /* The value is not currently in the minipool, so we need to create
11836 a new entry for it. If MAX_MP is NULL, the entry will be put on
11837 the end of the list since the placement is less constrained than
11838 any existing entry. Otherwise, we insert the new fix before
11839 MAX_MP and, if necessary, adjust the constraints on the other
11840 entries. */
11841 mp = XNEW (Mnode);
11842 mp->fix_size = fix->fix_size;
11843 mp->mode = fix->mode;
11844 mp->value = fix->value;
11845 mp->refcount = 1;
11846 /* Not yet required for a backwards ref. */
11847 mp->min_address = -65536;
11849 if (max_mp == NULL)
11851 mp->max_address = max_address;
11852 mp->next = NULL;
11853 mp->prev = minipool_vector_tail;
11855 if (mp->prev == NULL)
11857 minipool_vector_head = mp;
11858 minipool_vector_label = gen_label_rtx ();
11860 else
11861 mp->prev->next = mp;
11863 minipool_vector_tail = mp;
11865 else
11867 if (max_address > max_mp->max_address - mp->fix_size)
11868 mp->max_address = max_mp->max_address - mp->fix_size;
11869 else
11870 mp->max_address = max_address;
11872 mp->next = max_mp;
11873 mp->prev = max_mp->prev;
11874 max_mp->prev = mp;
11875 if (mp->prev != NULL)
11876 mp->prev->next = mp;
11877 else
11878 minipool_vector_head = mp;
11881 /* Save the new entry. */
11882 max_mp = mp;
11884 /* Scan over the preceding entries and adjust their addresses as
11885 required. */
11886 while (mp->prev != NULL
11887 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11889 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11890 mp = mp->prev;
11893 return max_mp;
11896 static Mnode *
11897 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11898 HOST_WIDE_INT min_address)
11900 HOST_WIDE_INT offset;
11902 /* The code below assumes these are different. */
11903 gcc_assert (mp != min_mp);
11905 if (min_mp == NULL)
11907 if (min_address > mp->min_address)
11908 mp->min_address = min_address;
11910 else
11912 /* We will adjust this below if it is too loose. */
11913 mp->min_address = min_address;
11915 /* Unlink MP from its current position. Since min_mp is non-null,
11916 mp->next must be non-null. */
11917 mp->next->prev = mp->prev;
11918 if (mp->prev != NULL)
11919 mp->prev->next = mp->next;
11920 else
11921 minipool_vector_head = mp->next;
11923 /* Reinsert it after MIN_MP. */
11924 mp->prev = min_mp;
11925 mp->next = min_mp->next;
11926 min_mp->next = mp;
11927 if (mp->next != NULL)
11928 mp->next->prev = mp;
11929 else
11930 minipool_vector_tail = mp;
11933 min_mp = mp;
11935 offset = 0;
11936 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11938 mp->offset = offset;
11939 if (mp->refcount > 0)
11940 offset += mp->fix_size;
11942 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11943 mp->next->min_address = mp->min_address + mp->fix_size;
11946 return min_mp;
11949 /* Add a constant to the minipool for a backward reference. Returns the
11950 node added or NULL if the constant will not fit in this pool.
11952 Note that the code for insertion for a backwards reference can be
11953 somewhat confusing because the calculated offsets for each fix do
11954 not take into account the size of the pool (which is still under
11955 construction. */
11956 static Mnode *
11957 add_minipool_backward_ref (Mfix *fix)
11959 /* If set, min_mp is the last pool_entry that has a lower constraint
11960 than the one we are trying to add. */
11961 Mnode *min_mp = NULL;
11962 /* This can be negative, since it is only a constraint. */
11963 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11964 Mnode *mp;
11966 /* If we can't reach the current pool from this insn, or if we can't
11967 insert this entry at the end of the pool without pushing other
11968 fixes out of range, then we don't try. This ensures that we
11969 can't fail later on. */
11970 if (min_address >= minipool_barrier->address
11971 || (minipool_vector_tail->min_address + fix->fix_size
11972 >= minipool_barrier->address))
11973 return NULL;
11975 /* Scan the pool to see if a constant with the same value has
11976 already been added. While we are doing this, also note the
11977 location where we must insert the constant if it doesn't already
11978 exist. */
11979 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11981 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11982 && fix->mode == mp->mode
11983 && (GET_CODE (fix->value) != CODE_LABEL
11984 || (CODE_LABEL_NUMBER (fix->value)
11985 == CODE_LABEL_NUMBER (mp->value)))
11986 && rtx_equal_p (fix->value, mp->value)
11987 /* Check that there is enough slack to move this entry to the
11988 end of the table (this is conservative). */
11989 && (mp->max_address
11990 > (minipool_barrier->address
11991 + minipool_vector_tail->offset
11992 + minipool_vector_tail->fix_size)))
11994 mp->refcount++;
11995 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11998 if (min_mp != NULL)
11999 mp->min_address += fix->fix_size;
12000 else
12002 /* Note the insertion point if necessary. */
12003 if (mp->min_address < min_address)
12005 /* For now, we do not allow the insertion of 8-byte alignment
12006 requiring nodes anywhere but at the start of the pool. */
12007 if (ARM_DOUBLEWORD_ALIGN
12008 && fix->fix_size >= 8 && mp->fix_size < 8)
12009 return NULL;
12010 else
12011 min_mp = mp;
12013 else if (mp->max_address
12014 < minipool_barrier->address + mp->offset + fix->fix_size)
12016 /* Inserting before this entry would push the fix beyond
12017 its maximum address (which can happen if we have
12018 re-located a forwards fix); force the new fix to come
12019 after it. */
12020 if (ARM_DOUBLEWORD_ALIGN
12021 && fix->fix_size >= 8 && mp->fix_size < 8)
12022 return NULL;
12023 else
12025 min_mp = mp;
12026 min_address = mp->min_address + fix->fix_size;
12029 /* Do not insert a non-8-byte aligned quantity before 8-byte
12030 aligned quantities. */
12031 else if (ARM_DOUBLEWORD_ALIGN
12032 && fix->fix_size < 8
12033 && mp->fix_size >= 8)
12035 min_mp = mp;
12036 min_address = mp->min_address + fix->fix_size;
12041 /* We need to create a new entry. */
12042 mp = XNEW (Mnode);
12043 mp->fix_size = fix->fix_size;
12044 mp->mode = fix->mode;
12045 mp->value = fix->value;
12046 mp->refcount = 1;
12047 mp->max_address = minipool_barrier->address + 65536;
12049 mp->min_address = min_address;
12051 if (min_mp == NULL)
12053 mp->prev = NULL;
12054 mp->next = minipool_vector_head;
12056 if (mp->next == NULL)
12058 minipool_vector_tail = mp;
12059 minipool_vector_label = gen_label_rtx ();
12061 else
12062 mp->next->prev = mp;
12064 minipool_vector_head = mp;
12066 else
12068 mp->next = min_mp->next;
12069 mp->prev = min_mp;
12070 min_mp->next = mp;
12072 if (mp->next != NULL)
12073 mp->next->prev = mp;
12074 else
12075 minipool_vector_tail = mp;
12078 /* Save the new entry. */
12079 min_mp = mp;
12081 if (mp->prev)
12082 mp = mp->prev;
12083 else
12084 mp->offset = 0;
12086 /* Scan over the following entries and adjust their offsets. */
12087 while (mp->next != NULL)
12089 if (mp->next->min_address < mp->min_address + mp->fix_size)
12090 mp->next->min_address = mp->min_address + mp->fix_size;
12092 if (mp->refcount)
12093 mp->next->offset = mp->offset + mp->fix_size;
12094 else
12095 mp->next->offset = mp->offset;
12097 mp = mp->next;
12100 return min_mp;
12103 static void
12104 assign_minipool_offsets (Mfix *barrier)
12106 HOST_WIDE_INT offset = 0;
12107 Mnode *mp;
12109 minipool_barrier = barrier;
12111 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12113 mp->offset = offset;
12115 if (mp->refcount > 0)
12116 offset += mp->fix_size;
12120 /* Output the literal table */
12121 static void
12122 dump_minipool (rtx scan)
12124 Mnode * mp;
12125 Mnode * nmp;
12126 int align64 = 0;
12128 if (ARM_DOUBLEWORD_ALIGN)
12129 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12130 if (mp->refcount > 0 && mp->fix_size >= 8)
12132 align64 = 1;
12133 break;
12136 if (dump_file)
12137 fprintf (dump_file,
12138 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12139 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12141 scan = emit_label_after (gen_label_rtx (), scan);
12142 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12143 scan = emit_label_after (minipool_vector_label, scan);
12145 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12147 if (mp->refcount > 0)
12149 if (dump_file)
12151 fprintf (dump_file,
12152 ";; Offset %u, min %ld, max %ld ",
12153 (unsigned) mp->offset, (unsigned long) mp->min_address,
12154 (unsigned long) mp->max_address);
12155 arm_print_value (dump_file, mp->value);
12156 fputc ('\n', dump_file);
12159 switch (mp->fix_size)
12161 #ifdef HAVE_consttable_1
12162 case 1:
12163 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12164 break;
12166 #endif
12167 #ifdef HAVE_consttable_2
12168 case 2:
12169 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12170 break;
12172 #endif
12173 #ifdef HAVE_consttable_4
12174 case 4:
12175 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12176 break;
12178 #endif
12179 #ifdef HAVE_consttable_8
12180 case 8:
12181 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12182 break;
12184 #endif
12185 #ifdef HAVE_consttable_16
12186 case 16:
12187 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12188 break;
12190 #endif
12191 default:
12192 gcc_unreachable ();
12196 nmp = mp->next;
12197 free (mp);
12200 minipool_vector_head = minipool_vector_tail = NULL;
12201 scan = emit_insn_after (gen_consttable_end (), scan);
12202 scan = emit_barrier_after (scan);
12205 /* Return the cost of forcibly inserting a barrier after INSN. */
12206 static int
12207 arm_barrier_cost (rtx insn)
12209 /* Basing the location of the pool on the loop depth is preferable,
12210 but at the moment, the basic block information seems to be
12211 corrupt by this stage of the compilation. */
12212 int base_cost = 50;
12213 rtx next = next_nonnote_insn (insn);
12215 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12216 base_cost -= 20;
12218 switch (GET_CODE (insn))
12220 case CODE_LABEL:
12221 /* It will always be better to place the table before the label, rather
12222 than after it. */
12223 return 50;
12225 case INSN:
12226 case CALL_INSN:
12227 return base_cost;
12229 case JUMP_INSN:
12230 return base_cost - 10;
12232 default:
12233 return base_cost + 10;
12237 /* Find the best place in the insn stream in the range
12238 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12239 Create the barrier by inserting a jump and add a new fix entry for
12240 it. */
12241 static Mfix *
12242 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12244 HOST_WIDE_INT count = 0;
12245 rtx barrier;
12246 rtx from = fix->insn;
12247 /* The instruction after which we will insert the jump. */
12248 rtx selected = NULL;
12249 int selected_cost;
12250 /* The address at which the jump instruction will be placed. */
12251 HOST_WIDE_INT selected_address;
12252 Mfix * new_fix;
12253 HOST_WIDE_INT max_count = max_address - fix->address;
12254 rtx label = gen_label_rtx ();
12256 selected_cost = arm_barrier_cost (from);
12257 selected_address = fix->address;
12259 while (from && count < max_count)
12261 rtx tmp;
12262 int new_cost;
12264 /* This code shouldn't have been called if there was a natural barrier
12265 within range. */
12266 gcc_assert (GET_CODE (from) != BARRIER);
12268 /* Count the length of this insn. */
12269 count += get_attr_length (from);
12271 /* If there is a jump table, add its length. */
12272 tmp = is_jump_table (from);
12273 if (tmp != NULL)
12275 count += get_jump_table_size (tmp);
12277 /* Jump tables aren't in a basic block, so base the cost on
12278 the dispatch insn. If we select this location, we will
12279 still put the pool after the table. */
12280 new_cost = arm_barrier_cost (from);
12282 if (count < max_count
12283 && (!selected || new_cost <= selected_cost))
12285 selected = tmp;
12286 selected_cost = new_cost;
12287 selected_address = fix->address + count;
12290 /* Continue after the dispatch table. */
12291 from = NEXT_INSN (tmp);
12292 continue;
12295 new_cost = arm_barrier_cost (from);
12297 if (count < max_count
12298 && (!selected || new_cost <= selected_cost))
12300 selected = from;
12301 selected_cost = new_cost;
12302 selected_address = fix->address + count;
12305 from = NEXT_INSN (from);
12308 /* Make sure that we found a place to insert the jump. */
12309 gcc_assert (selected);
12311 /* Make sure we do not split a call and its corresponding
12312 CALL_ARG_LOCATION note. */
12313 if (CALL_P (selected))
12315 rtx next = NEXT_INSN (selected);
12316 if (next && NOTE_P (next)
12317 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12318 selected = next;
12321 /* Create a new JUMP_INSN that branches around a barrier. */
12322 from = emit_jump_insn_after (gen_jump (label), selected);
12323 JUMP_LABEL (from) = label;
12324 barrier = emit_barrier_after (from);
12325 emit_label_after (label, barrier);
12327 /* Create a minipool barrier entry for the new barrier. */
12328 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12329 new_fix->insn = barrier;
12330 new_fix->address = selected_address;
12331 new_fix->next = fix->next;
12332 fix->next = new_fix;
12334 return new_fix;
12337 /* Record that there is a natural barrier in the insn stream at
12338 ADDRESS. */
12339 static void
12340 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12342 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12344 fix->insn = insn;
12345 fix->address = address;
12347 fix->next = NULL;
12348 if (minipool_fix_head != NULL)
12349 minipool_fix_tail->next = fix;
12350 else
12351 minipool_fix_head = fix;
12353 minipool_fix_tail = fix;
12356 /* Record INSN, which will need fixing up to load a value from the
12357 minipool. ADDRESS is the offset of the insn since the start of the
12358 function; LOC is a pointer to the part of the insn which requires
12359 fixing; VALUE is the constant that must be loaded, which is of type
12360 MODE. */
12361 static void
12362 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12363 enum machine_mode mode, rtx value)
12365 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12367 fix->insn = insn;
12368 fix->address = address;
12369 fix->loc = loc;
12370 fix->mode = mode;
12371 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12372 fix->value = value;
12373 fix->forwards = get_attr_pool_range (insn);
12374 fix->backwards = get_attr_neg_pool_range (insn);
12375 fix->minipool = NULL;
12377 /* If an insn doesn't have a range defined for it, then it isn't
12378 expecting to be reworked by this code. Better to stop now than
12379 to generate duff assembly code. */
12380 gcc_assert (fix->forwards || fix->backwards);
12382 /* If an entry requires 8-byte alignment then assume all constant pools
12383 require 4 bytes of padding. Trying to do this later on a per-pool
12384 basis is awkward because existing pool entries have to be modified. */
12385 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12386 minipool_pad = 4;
12388 if (dump_file)
12390 fprintf (dump_file,
12391 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12392 GET_MODE_NAME (mode),
12393 INSN_UID (insn), (unsigned long) address,
12394 -1 * (long)fix->backwards, (long)fix->forwards);
12395 arm_print_value (dump_file, fix->value);
12396 fprintf (dump_file, "\n");
12399 /* Add it to the chain of fixes. */
12400 fix->next = NULL;
12402 if (minipool_fix_head != NULL)
12403 minipool_fix_tail->next = fix;
12404 else
12405 minipool_fix_head = fix;
12407 minipool_fix_tail = fix;
12410 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12411 Returns the number of insns needed, or 99 if we don't know how to
12412 do it. */
12414 arm_const_double_inline_cost (rtx val)
12416 rtx lowpart, highpart;
12417 enum machine_mode mode;
12419 mode = GET_MODE (val);
12421 if (mode == VOIDmode)
12422 mode = DImode;
12424 gcc_assert (GET_MODE_SIZE (mode) == 8);
12426 lowpart = gen_lowpart (SImode, val);
12427 highpart = gen_highpart_mode (SImode, mode, val);
12429 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12430 gcc_assert (GET_CODE (highpart) == CONST_INT);
12432 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12433 NULL_RTX, NULL_RTX, 0, 0)
12434 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12435 NULL_RTX, NULL_RTX, 0, 0));
12438 /* Return true if it is worthwhile to split a 64-bit constant into two
12439 32-bit operations. This is the case if optimizing for size, or
12440 if we have load delay slots, or if one 32-bit part can be done with
12441 a single data operation. */
12442 bool
12443 arm_const_double_by_parts (rtx val)
12445 enum machine_mode mode = GET_MODE (val);
12446 rtx part;
12448 if (optimize_size || arm_ld_sched)
12449 return true;
12451 if (mode == VOIDmode)
12452 mode = DImode;
12454 part = gen_highpart_mode (SImode, mode, val);
12456 gcc_assert (GET_CODE (part) == CONST_INT);
12458 if (const_ok_for_arm (INTVAL (part))
12459 || const_ok_for_arm (~INTVAL (part)))
12460 return true;
12462 part = gen_lowpart (SImode, val);
12464 gcc_assert (GET_CODE (part) == CONST_INT);
12466 if (const_ok_for_arm (INTVAL (part))
12467 || const_ok_for_arm (~INTVAL (part)))
12468 return true;
12470 return false;
12473 /* Return true if it is possible to inline both the high and low parts
12474 of a 64-bit constant into 32-bit data processing instructions. */
12475 bool
12476 arm_const_double_by_immediates (rtx val)
12478 enum machine_mode mode = GET_MODE (val);
12479 rtx part;
12481 if (mode == VOIDmode)
12482 mode = DImode;
12484 part = gen_highpart_mode (SImode, mode, val);
12486 gcc_assert (GET_CODE (part) == CONST_INT);
12488 if (!const_ok_for_arm (INTVAL (part)))
12489 return false;
12491 part = gen_lowpart (SImode, val);
12493 gcc_assert (GET_CODE (part) == CONST_INT);
12495 if (!const_ok_for_arm (INTVAL (part)))
12496 return false;
12498 return true;
12501 /* Scan INSN and note any of its operands that need fixing.
12502 If DO_PUSHES is false we do not actually push any of the fixups
12503 needed. The function returns TRUE if any fixups were needed/pushed.
12504 This is used by arm_memory_load_p() which needs to know about loads
12505 of constants that will be converted into minipool loads. */
12506 static bool
12507 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12509 bool result = false;
12510 int opno;
12512 extract_insn (insn);
12514 if (!constrain_operands (1))
12515 fatal_insn_not_found (insn);
12517 if (recog_data.n_alternatives == 0)
12518 return false;
12520 /* Fill in recog_op_alt with information about the constraints of
12521 this insn. */
12522 preprocess_constraints ();
12524 for (opno = 0; opno < recog_data.n_operands; opno++)
12526 /* Things we need to fix can only occur in inputs. */
12527 if (recog_data.operand_type[opno] != OP_IN)
12528 continue;
12530 /* If this alternative is a memory reference, then any mention
12531 of constants in this alternative is really to fool reload
12532 into allowing us to accept one there. We need to fix them up
12533 now so that we output the right code. */
12534 if (recog_op_alt[opno][which_alternative].memory_ok)
12536 rtx op = recog_data.operand[opno];
12538 if (CONSTANT_P (op))
12540 if (do_pushes)
12541 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12542 recog_data.operand_mode[opno], op);
12543 result = true;
12545 else if (GET_CODE (op) == MEM
12546 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12547 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12549 if (do_pushes)
12551 rtx cop = avoid_constant_pool_reference (op);
12553 /* Casting the address of something to a mode narrower
12554 than a word can cause avoid_constant_pool_reference()
12555 to return the pool reference itself. That's no good to
12556 us here. Lets just hope that we can use the
12557 constant pool value directly. */
12558 if (op == cop)
12559 cop = get_pool_constant (XEXP (op, 0));
12561 push_minipool_fix (insn, address,
12562 recog_data.operand_loc[opno],
12563 recog_data.operand_mode[opno], cop);
12566 result = true;
12571 return result;
12574 /* Convert instructions to their cc-clobbering variant if possible, since
12575 that allows us to use smaller encodings. */
12577 static void
12578 thumb2_reorg (void)
12580 basic_block bb;
12581 regset_head live;
12583 INIT_REG_SET (&live);
12585 /* We are freeing block_for_insn in the toplev to keep compatibility
12586 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12587 compute_bb_for_insn ();
12588 df_analyze ();
12590 FOR_EACH_BB (bb)
12592 rtx insn;
12594 COPY_REG_SET (&live, DF_LR_OUT (bb));
12595 df_simulate_initialize_backwards (bb, &live);
12596 FOR_BB_INSNS_REVERSE (bb, insn)
12598 if (NONJUMP_INSN_P (insn)
12599 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12601 rtx pat = PATTERN (insn);
12602 if (GET_CODE (pat) == SET
12603 && low_register_operand (XEXP (pat, 0), SImode)
12604 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12605 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12606 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12608 rtx dst = XEXP (pat, 0);
12609 rtx src = XEXP (pat, 1);
12610 rtx op0 = XEXP (src, 0);
12611 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12612 ? XEXP (src, 1) : NULL);
12614 if (rtx_equal_p (dst, op0)
12615 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12617 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12618 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12619 rtvec vec = gen_rtvec (2, pat, clobber);
12621 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12622 INSN_CODE (insn) = -1;
12624 /* We can also handle a commutative operation where the
12625 second operand matches the destination. */
12626 else if (op1 && rtx_equal_p (dst, op1))
12628 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12629 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12630 rtvec vec;
12632 src = copy_rtx (src);
12633 XEXP (src, 0) = op1;
12634 XEXP (src, 1) = op0;
12635 pat = gen_rtx_SET (VOIDmode, dst, src);
12636 vec = gen_rtvec (2, pat, clobber);
12637 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12638 INSN_CODE (insn) = -1;
12643 if (NONDEBUG_INSN_P (insn))
12644 df_simulate_one_insn_backwards (bb, insn, &live);
12648 CLEAR_REG_SET (&live);
12651 /* Gcc puts the pool in the wrong place for ARM, since we can only
12652 load addresses a limited distance around the pc. We do some
12653 special munging to move the constant pool values to the correct
12654 point in the code. */
12655 static void
12656 arm_reorg (void)
12658 rtx insn;
12659 HOST_WIDE_INT address = 0;
12660 Mfix * fix;
12662 if (TARGET_THUMB2)
12663 thumb2_reorg ();
12665 minipool_fix_head = minipool_fix_tail = NULL;
12667 /* The first insn must always be a note, or the code below won't
12668 scan it properly. */
12669 insn = get_insns ();
12670 gcc_assert (GET_CODE (insn) == NOTE);
12671 minipool_pad = 0;
12673 /* Scan all the insns and record the operands that will need fixing. */
12674 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12676 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12677 && (arm_cirrus_insn_p (insn)
12678 || GET_CODE (insn) == JUMP_INSN
12679 || arm_memory_load_p (insn)))
12680 cirrus_reorg (insn);
12682 if (GET_CODE (insn) == BARRIER)
12683 push_minipool_barrier (insn, address);
12684 else if (INSN_P (insn))
12686 rtx table;
12688 note_invalid_constants (insn, address, true);
12689 address += get_attr_length (insn);
12691 /* If the insn is a vector jump, add the size of the table
12692 and skip the table. */
12693 if ((table = is_jump_table (insn)) != NULL)
12695 address += get_jump_table_size (table);
12696 insn = table;
12701 fix = minipool_fix_head;
12703 /* Now scan the fixups and perform the required changes. */
12704 while (fix)
12706 Mfix * ftmp;
12707 Mfix * fdel;
12708 Mfix * last_added_fix;
12709 Mfix * last_barrier = NULL;
12710 Mfix * this_fix;
12712 /* Skip any further barriers before the next fix. */
12713 while (fix && GET_CODE (fix->insn) == BARRIER)
12714 fix = fix->next;
12716 /* No more fixes. */
12717 if (fix == NULL)
12718 break;
12720 last_added_fix = NULL;
12722 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12724 if (GET_CODE (ftmp->insn) == BARRIER)
12726 if (ftmp->address >= minipool_vector_head->max_address)
12727 break;
12729 last_barrier = ftmp;
12731 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12732 break;
12734 last_added_fix = ftmp; /* Keep track of the last fix added. */
12737 /* If we found a barrier, drop back to that; any fixes that we
12738 could have reached but come after the barrier will now go in
12739 the next mini-pool. */
12740 if (last_barrier != NULL)
12742 /* Reduce the refcount for those fixes that won't go into this
12743 pool after all. */
12744 for (fdel = last_barrier->next;
12745 fdel && fdel != ftmp;
12746 fdel = fdel->next)
12748 fdel->minipool->refcount--;
12749 fdel->minipool = NULL;
12752 ftmp = last_barrier;
12754 else
12756 /* ftmp is first fix that we can't fit into this pool and
12757 there no natural barriers that we could use. Insert a
12758 new barrier in the code somewhere between the previous
12759 fix and this one, and arrange to jump around it. */
12760 HOST_WIDE_INT max_address;
12762 /* The last item on the list of fixes must be a barrier, so
12763 we can never run off the end of the list of fixes without
12764 last_barrier being set. */
12765 gcc_assert (ftmp);
12767 max_address = minipool_vector_head->max_address;
12768 /* Check that there isn't another fix that is in range that
12769 we couldn't fit into this pool because the pool was
12770 already too large: we need to put the pool before such an
12771 instruction. The pool itself may come just after the
12772 fix because create_fix_barrier also allows space for a
12773 jump instruction. */
12774 if (ftmp->address < max_address)
12775 max_address = ftmp->address + 1;
12777 last_barrier = create_fix_barrier (last_added_fix, max_address);
12780 assign_minipool_offsets (last_barrier);
12782 while (ftmp)
12784 if (GET_CODE (ftmp->insn) != BARRIER
12785 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12786 == NULL))
12787 break;
12789 ftmp = ftmp->next;
12792 /* Scan over the fixes we have identified for this pool, fixing them
12793 up and adding the constants to the pool itself. */
12794 for (this_fix = fix; this_fix && ftmp != this_fix;
12795 this_fix = this_fix->next)
12796 if (GET_CODE (this_fix->insn) != BARRIER)
12798 rtx addr
12799 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12800 minipool_vector_label),
12801 this_fix->minipool->offset);
12802 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12805 dump_minipool (last_barrier->insn);
12806 fix = ftmp;
12809 /* From now on we must synthesize any constants that we can't handle
12810 directly. This can happen if the RTL gets split during final
12811 instruction generation. */
12812 after_arm_reorg = 1;
12814 /* Free the minipool memory. */
12815 obstack_free (&minipool_obstack, minipool_startobj);
12818 /* Routines to output assembly language. */
12820 /* If the rtx is the correct value then return the string of the number.
12821 In this way we can ensure that valid double constants are generated even
12822 when cross compiling. */
12823 const char *
12824 fp_immediate_constant (rtx x)
12826 REAL_VALUE_TYPE r;
12827 int i;
12829 if (!fp_consts_inited)
12830 init_fp_table ();
12832 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12833 for (i = 0; i < 8; i++)
12834 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12835 return strings_fp[i];
12837 gcc_unreachable ();
12840 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12841 static const char *
12842 fp_const_from_val (REAL_VALUE_TYPE *r)
12844 int i;
12846 if (!fp_consts_inited)
12847 init_fp_table ();
12849 for (i = 0; i < 8; i++)
12850 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12851 return strings_fp[i];
12853 gcc_unreachable ();
12856 /* Output the operands of a LDM/STM instruction to STREAM.
12857 MASK is the ARM register set mask of which only bits 0-15 are important.
12858 REG is the base register, either the frame pointer or the stack pointer,
12859 INSTR is the possibly suffixed load or store instruction.
12860 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12862 static void
12863 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12864 unsigned long mask, int rfe)
12866 unsigned i;
12867 bool not_first = FALSE;
12869 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12870 fputc ('\t', stream);
12871 asm_fprintf (stream, instr, reg);
12872 fputc ('{', stream);
12874 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12875 if (mask & (1 << i))
12877 if (not_first)
12878 fprintf (stream, ", ");
12880 asm_fprintf (stream, "%r", i);
12881 not_first = TRUE;
12884 if (rfe)
12885 fprintf (stream, "}^\n");
12886 else
12887 fprintf (stream, "}\n");
12891 /* Output a FLDMD instruction to STREAM.
12892 BASE if the register containing the address.
12893 REG and COUNT specify the register range.
12894 Extra registers may be added to avoid hardware bugs.
12896 We output FLDMD even for ARMv5 VFP implementations. Although
12897 FLDMD is technically not supported until ARMv6, it is believed
12898 that all VFP implementations support its use in this context. */
12900 static void
12901 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12903 int i;
12905 /* Workaround ARM10 VFPr1 bug. */
12906 if (count == 2 && !arm_arch6)
12908 if (reg == 15)
12909 reg--;
12910 count++;
12913 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12914 load into multiple parts if we have to handle more than 16 registers. */
12915 if (count > 16)
12917 vfp_output_fldmd (stream, base, reg, 16);
12918 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12919 return;
12922 fputc ('\t', stream);
12923 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12925 for (i = reg; i < reg + count; i++)
12927 if (i > reg)
12928 fputs (", ", stream);
12929 asm_fprintf (stream, "d%d", i);
12931 fputs ("}\n", stream);
12936 /* Output the assembly for a store multiple. */
12938 const char *
12939 vfp_output_fstmd (rtx * operands)
12941 char pattern[100];
12942 int p;
12943 int base;
12944 int i;
12946 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12947 p = strlen (pattern);
12949 gcc_assert (GET_CODE (operands[1]) == REG);
12951 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12952 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12954 p += sprintf (&pattern[p], ", d%d", base + i);
12956 strcpy (&pattern[p], "}");
12958 output_asm_insn (pattern, operands);
12959 return "";
12963 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12964 number of bytes pushed. */
12966 static int
12967 vfp_emit_fstmd (int base_reg, int count)
12969 rtx par;
12970 rtx dwarf;
12971 rtx tmp, reg;
12972 int i;
12974 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12975 register pairs are stored by a store multiple insn. We avoid this
12976 by pushing an extra pair. */
12977 if (count == 2 && !arm_arch6)
12979 if (base_reg == LAST_VFP_REGNUM - 3)
12980 base_reg -= 2;
12981 count++;
12984 /* FSTMD may not store more than 16 doubleword registers at once. Split
12985 larger stores into multiple parts (up to a maximum of two, in
12986 practice). */
12987 if (count > 16)
12989 int saved;
12990 /* NOTE: base_reg is an internal register number, so each D register
12991 counts as 2. */
12992 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12993 saved += vfp_emit_fstmd (base_reg, 16);
12994 return saved;
12997 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12998 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13000 reg = gen_rtx_REG (DFmode, base_reg);
13001 base_reg += 2;
13003 XVECEXP (par, 0, 0)
13004 = gen_rtx_SET (VOIDmode,
13005 gen_frame_mem
13006 (BLKmode,
13007 gen_rtx_PRE_MODIFY (Pmode,
13008 stack_pointer_rtx,
13009 plus_constant
13010 (stack_pointer_rtx,
13011 - (count * 8)))
13013 gen_rtx_UNSPEC (BLKmode,
13014 gen_rtvec (1, reg),
13015 UNSPEC_PUSH_MULT));
13017 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13018 plus_constant (stack_pointer_rtx, -(count * 8)));
13019 RTX_FRAME_RELATED_P (tmp) = 1;
13020 XVECEXP (dwarf, 0, 0) = tmp;
13022 tmp = gen_rtx_SET (VOIDmode,
13023 gen_frame_mem (DFmode, stack_pointer_rtx),
13024 reg);
13025 RTX_FRAME_RELATED_P (tmp) = 1;
13026 XVECEXP (dwarf, 0, 1) = tmp;
13028 for (i = 1; i < count; i++)
13030 reg = gen_rtx_REG (DFmode, base_reg);
13031 base_reg += 2;
13032 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13034 tmp = gen_rtx_SET (VOIDmode,
13035 gen_frame_mem (DFmode,
13036 plus_constant (stack_pointer_rtx,
13037 i * 8)),
13038 reg);
13039 RTX_FRAME_RELATED_P (tmp) = 1;
13040 XVECEXP (dwarf, 0, i + 1) = tmp;
13043 par = emit_insn (par);
13044 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13045 RTX_FRAME_RELATED_P (par) = 1;
13047 return count * 8;
13050 /* Emit a call instruction with pattern PAT. ADDR is the address of
13051 the call target. */
13053 void
13054 arm_emit_call_insn (rtx pat, rtx addr)
13056 rtx insn;
13058 insn = emit_call_insn (pat);
13060 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13061 If the call might use such an entry, add a use of the PIC register
13062 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13063 if (TARGET_VXWORKS_RTP
13064 && flag_pic
13065 && GET_CODE (addr) == SYMBOL_REF
13066 && (SYMBOL_REF_DECL (addr)
13067 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13068 : !SYMBOL_REF_LOCAL_P (addr)))
13070 require_pic_register ();
13071 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13075 /* Output a 'call' insn. */
13076 const char *
13077 output_call (rtx *operands)
13079 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13081 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13082 if (REGNO (operands[0]) == LR_REGNUM)
13084 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13085 output_asm_insn ("mov%?\t%0, %|lr", operands);
13088 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13090 if (TARGET_INTERWORK || arm_arch4t)
13091 output_asm_insn ("bx%?\t%0", operands);
13092 else
13093 output_asm_insn ("mov%?\t%|pc, %0", operands);
13095 return "";
13098 /* Output a 'call' insn that is a reference in memory. This is
13099 disabled for ARMv5 and we prefer a blx instead because otherwise
13100 there's a significant performance overhead. */
13101 const char *
13102 output_call_mem (rtx *operands)
13104 gcc_assert (!arm_arch5);
13105 if (TARGET_INTERWORK)
13107 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13108 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13109 output_asm_insn ("bx%?\t%|ip", operands);
13111 else if (regno_use_in (LR_REGNUM, operands[0]))
13113 /* LR is used in the memory address. We load the address in the
13114 first instruction. It's safe to use IP as the target of the
13115 load since the call will kill it anyway. */
13116 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13117 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13118 if (arm_arch4t)
13119 output_asm_insn ("bx%?\t%|ip", operands);
13120 else
13121 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13123 else
13125 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13126 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13129 return "";
13133 /* Output a move from arm registers to an fpa registers.
13134 OPERANDS[0] is an fpa register.
13135 OPERANDS[1] is the first registers of an arm register pair. */
13136 const char *
13137 output_mov_long_double_fpa_from_arm (rtx *operands)
13139 int arm_reg0 = REGNO (operands[1]);
13140 rtx ops[3];
13142 gcc_assert (arm_reg0 != IP_REGNUM);
13144 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13145 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13146 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13148 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13149 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13151 return "";
13154 /* Output a move from an fpa register to arm registers.
13155 OPERANDS[0] is the first registers of an arm register pair.
13156 OPERANDS[1] is an fpa register. */
13157 const char *
13158 output_mov_long_double_arm_from_fpa (rtx *operands)
13160 int arm_reg0 = REGNO (operands[0]);
13161 rtx ops[3];
13163 gcc_assert (arm_reg0 != IP_REGNUM);
13165 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13166 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13167 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13169 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13170 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13171 return "";
13174 /* Output a move from arm registers to arm registers of a long double
13175 OPERANDS[0] is the destination.
13176 OPERANDS[1] is the source. */
13177 const char *
13178 output_mov_long_double_arm_from_arm (rtx *operands)
13180 /* We have to be careful here because the two might overlap. */
13181 int dest_start = REGNO (operands[0]);
13182 int src_start = REGNO (operands[1]);
13183 rtx ops[2];
13184 int i;
13186 if (dest_start < src_start)
13188 for (i = 0; i < 3; i++)
13190 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13191 ops[1] = gen_rtx_REG (SImode, src_start + i);
13192 output_asm_insn ("mov%?\t%0, %1", ops);
13195 else
13197 for (i = 2; i >= 0; i--)
13199 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13200 ops[1] = gen_rtx_REG (SImode, src_start + i);
13201 output_asm_insn ("mov%?\t%0, %1", ops);
13205 return "";
13208 void
13209 arm_emit_movpair (rtx dest, rtx src)
13211 /* If the src is an immediate, simplify it. */
13212 if (CONST_INT_P (src))
13214 HOST_WIDE_INT val = INTVAL (src);
13215 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13216 if ((val >> 16) & 0x0000ffff)
13217 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13218 GEN_INT (16)),
13219 GEN_INT ((val >> 16) & 0x0000ffff));
13220 return;
13222 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13223 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13226 /* Output a move from arm registers to an fpa registers.
13227 OPERANDS[0] is an fpa register.
13228 OPERANDS[1] is the first registers of an arm register pair. */
13229 const char *
13230 output_mov_double_fpa_from_arm (rtx *operands)
13232 int arm_reg0 = REGNO (operands[1]);
13233 rtx ops[2];
13235 gcc_assert (arm_reg0 != IP_REGNUM);
13237 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13238 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13239 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13240 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13241 return "";
13244 /* Output a move from an fpa register to arm registers.
13245 OPERANDS[0] is the first registers of an arm register pair.
13246 OPERANDS[1] is an fpa register. */
13247 const char *
13248 output_mov_double_arm_from_fpa (rtx *operands)
13250 int arm_reg0 = REGNO (operands[0]);
13251 rtx ops[2];
13253 gcc_assert (arm_reg0 != IP_REGNUM);
13255 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13256 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13257 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13258 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13259 return "";
13262 /* Output a move between double words. It must be REG<-MEM
13263 or MEM<-REG. */
13264 const char *
13265 output_move_double (rtx *operands)
13267 enum rtx_code code0 = GET_CODE (operands[0]);
13268 enum rtx_code code1 = GET_CODE (operands[1]);
13269 rtx otherops[3];
13271 if (code0 == REG)
13273 unsigned int reg0 = REGNO (operands[0]);
13275 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13277 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13279 switch (GET_CODE (XEXP (operands[1], 0)))
13281 case REG:
13282 if (TARGET_LDRD
13283 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13284 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13285 else
13286 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13287 break;
13289 case PRE_INC:
13290 gcc_assert (TARGET_LDRD);
13291 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13292 break;
13294 case PRE_DEC:
13295 if (TARGET_LDRD)
13296 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13297 else
13298 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13299 break;
13301 case POST_INC:
13302 if (TARGET_LDRD)
13303 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13304 else
13305 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13306 break;
13308 case POST_DEC:
13309 gcc_assert (TARGET_LDRD);
13310 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13311 break;
13313 case PRE_MODIFY:
13314 case POST_MODIFY:
13315 /* Autoicrement addressing modes should never have overlapping
13316 base and destination registers, and overlapping index registers
13317 are already prohibited, so this doesn't need to worry about
13318 fix_cm3_ldrd. */
13319 otherops[0] = operands[0];
13320 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13321 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13323 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13325 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13327 /* Registers overlap so split out the increment. */
13328 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13329 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13331 else
13333 /* Use a single insn if we can.
13334 FIXME: IWMMXT allows offsets larger than ldrd can
13335 handle, fix these up with a pair of ldr. */
13336 if (TARGET_THUMB2
13337 || GET_CODE (otherops[2]) != CONST_INT
13338 || (INTVAL (otherops[2]) > -256
13339 && INTVAL (otherops[2]) < 256))
13340 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13341 else
13343 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13344 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13348 else
13350 /* Use a single insn if we can.
13351 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13352 fix these up with a pair of ldr. */
13353 if (TARGET_THUMB2
13354 || GET_CODE (otherops[2]) != CONST_INT
13355 || (INTVAL (otherops[2]) > -256
13356 && INTVAL (otherops[2]) < 256))
13357 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13358 else
13360 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13361 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13364 break;
13366 case LABEL_REF:
13367 case CONST:
13368 /* We might be able to use ldrd %0, %1 here. However the range is
13369 different to ldr/adr, and it is broken on some ARMv7-M
13370 implementations. */
13371 /* Use the second register of the pair to avoid problematic
13372 overlap. */
13373 otherops[1] = operands[1];
13374 output_asm_insn ("adr%?\t%0, %1", otherops);
13375 operands[1] = otherops[0];
13376 if (TARGET_LDRD)
13377 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13378 else
13379 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13380 break;
13382 /* ??? This needs checking for thumb2. */
13383 default:
13384 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13385 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13387 otherops[0] = operands[0];
13388 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13389 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13391 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13393 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13395 switch ((int) INTVAL (otherops[2]))
13397 case -8:
13398 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13399 return "";
13400 case -4:
13401 if (TARGET_THUMB2)
13402 break;
13403 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13404 return "";
13405 case 4:
13406 if (TARGET_THUMB2)
13407 break;
13408 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13409 return "";
13412 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13413 operands[1] = otherops[0];
13414 if (TARGET_LDRD
13415 && (GET_CODE (otherops[2]) == REG
13416 || TARGET_THUMB2
13417 || (GET_CODE (otherops[2]) == CONST_INT
13418 && INTVAL (otherops[2]) > -256
13419 && INTVAL (otherops[2]) < 256)))
13421 if (reg_overlap_mentioned_p (operands[0],
13422 otherops[2]))
13424 rtx tmp;
13425 /* Swap base and index registers over to
13426 avoid a conflict. */
13427 tmp = otherops[1];
13428 otherops[1] = otherops[2];
13429 otherops[2] = tmp;
13431 /* If both registers conflict, it will usually
13432 have been fixed by a splitter. */
13433 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13434 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13436 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13437 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13439 else
13441 otherops[0] = operands[0];
13442 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13444 return "";
13447 if (GET_CODE (otherops[2]) == CONST_INT)
13449 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13450 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13451 else
13452 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13454 else
13455 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13457 else
13458 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13460 if (TARGET_LDRD)
13461 return "ldr%(d%)\t%0, [%1]";
13463 return "ldm%(ia%)\t%1, %M0";
13465 else
13467 otherops[1] = adjust_address (operands[1], SImode, 4);
13468 /* Take care of overlapping base/data reg. */
13469 if (reg_mentioned_p (operands[0], operands[1]))
13471 output_asm_insn ("ldr%?\t%0, %1", otherops);
13472 output_asm_insn ("ldr%?\t%0, %1", operands);
13474 else
13476 output_asm_insn ("ldr%?\t%0, %1", operands);
13477 output_asm_insn ("ldr%?\t%0, %1", otherops);
13482 else
13484 /* Constraints should ensure this. */
13485 gcc_assert (code0 == MEM && code1 == REG);
13486 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13488 switch (GET_CODE (XEXP (operands[0], 0)))
13490 case REG:
13491 if (TARGET_LDRD)
13492 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13493 else
13494 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13495 break;
13497 case PRE_INC:
13498 gcc_assert (TARGET_LDRD);
13499 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13500 break;
13502 case PRE_DEC:
13503 if (TARGET_LDRD)
13504 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13505 else
13506 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13507 break;
13509 case POST_INC:
13510 if (TARGET_LDRD)
13511 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13512 else
13513 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13514 break;
13516 case POST_DEC:
13517 gcc_assert (TARGET_LDRD);
13518 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13519 break;
13521 case PRE_MODIFY:
13522 case POST_MODIFY:
13523 otherops[0] = operands[1];
13524 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13525 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13527 /* IWMMXT allows offsets larger than ldrd can handle,
13528 fix these up with a pair of ldr. */
13529 if (!TARGET_THUMB2
13530 && GET_CODE (otherops[2]) == CONST_INT
13531 && (INTVAL(otherops[2]) <= -256
13532 || INTVAL(otherops[2]) >= 256))
13534 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13536 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13537 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13539 else
13541 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13542 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13545 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13546 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13547 else
13548 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13549 break;
13551 case PLUS:
13552 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13553 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13555 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13557 case -8:
13558 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13559 return "";
13561 case -4:
13562 if (TARGET_THUMB2)
13563 break;
13564 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13565 return "";
13567 case 4:
13568 if (TARGET_THUMB2)
13569 break;
13570 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13571 return "";
13574 if (TARGET_LDRD
13575 && (GET_CODE (otherops[2]) == REG
13576 || TARGET_THUMB2
13577 || (GET_CODE (otherops[2]) == CONST_INT
13578 && INTVAL (otherops[2]) > -256
13579 && INTVAL (otherops[2]) < 256)))
13581 otherops[0] = operands[1];
13582 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13583 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13584 return "";
13586 /* Fall through */
13588 default:
13589 otherops[0] = adjust_address (operands[0], SImode, 4);
13590 otherops[1] = operands[1];
13591 output_asm_insn ("str%?\t%1, %0", operands);
13592 output_asm_insn ("str%?\t%H1, %0", otherops);
13596 return "";
13599 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13600 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13602 const char *
13603 output_move_quad (rtx *operands)
13605 if (REG_P (operands[0]))
13607 /* Load, or reg->reg move. */
13609 if (MEM_P (operands[1]))
13611 switch (GET_CODE (XEXP (operands[1], 0)))
13613 case REG:
13614 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13615 break;
13617 case LABEL_REF:
13618 case CONST:
13619 output_asm_insn ("adr%?\t%0, %1", operands);
13620 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13621 break;
13623 default:
13624 gcc_unreachable ();
13627 else
13629 rtx ops[2];
13630 int dest, src, i;
13632 gcc_assert (REG_P (operands[1]));
13634 dest = REGNO (operands[0]);
13635 src = REGNO (operands[1]);
13637 /* This seems pretty dumb, but hopefully GCC won't try to do it
13638 very often. */
13639 if (dest < src)
13640 for (i = 0; i < 4; i++)
13642 ops[0] = gen_rtx_REG (SImode, dest + i);
13643 ops[1] = gen_rtx_REG (SImode, src + i);
13644 output_asm_insn ("mov%?\t%0, %1", ops);
13646 else
13647 for (i = 3; i >= 0; i--)
13649 ops[0] = gen_rtx_REG (SImode, dest + i);
13650 ops[1] = gen_rtx_REG (SImode, src + i);
13651 output_asm_insn ("mov%?\t%0, %1", ops);
13655 else
13657 gcc_assert (MEM_P (operands[0]));
13658 gcc_assert (REG_P (operands[1]));
13659 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13661 switch (GET_CODE (XEXP (operands[0], 0)))
13663 case REG:
13664 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13665 break;
13667 default:
13668 gcc_unreachable ();
13672 return "";
13675 /* Output a VFP load or store instruction. */
13677 const char *
13678 output_move_vfp (rtx *operands)
13680 rtx reg, mem, addr, ops[2];
13681 int load = REG_P (operands[0]);
13682 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13683 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13684 const char *templ;
13685 char buff[50];
13686 enum machine_mode mode;
13688 reg = operands[!load];
13689 mem = operands[load];
13691 mode = GET_MODE (reg);
13693 gcc_assert (REG_P (reg));
13694 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13695 gcc_assert (mode == SFmode
13696 || mode == DFmode
13697 || mode == SImode
13698 || mode == DImode
13699 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13700 gcc_assert (MEM_P (mem));
13702 addr = XEXP (mem, 0);
13704 switch (GET_CODE (addr))
13706 case PRE_DEC:
13707 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13708 ops[0] = XEXP (addr, 0);
13709 ops[1] = reg;
13710 break;
13712 case POST_INC:
13713 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13714 ops[0] = XEXP (addr, 0);
13715 ops[1] = reg;
13716 break;
13718 default:
13719 templ = "f%s%c%%?\t%%%s0, %%1%s";
13720 ops[0] = reg;
13721 ops[1] = mem;
13722 break;
13725 sprintf (buff, templ,
13726 load ? "ld" : "st",
13727 dp ? 'd' : 's',
13728 dp ? "P" : "",
13729 integer_p ? "\t%@ int" : "");
13730 output_asm_insn (buff, ops);
13732 return "";
13735 /* Output a Neon quad-word load or store, or a load or store for
13736 larger structure modes.
13738 WARNING: The ordering of elements is weird in big-endian mode,
13739 because we use VSTM, as required by the EABI. GCC RTL defines
13740 element ordering based on in-memory order. This can be differ
13741 from the architectural ordering of elements within a NEON register.
13742 The intrinsics defined in arm_neon.h use the NEON register element
13743 ordering, not the GCC RTL element ordering.
13745 For example, the in-memory ordering of a big-endian a quadword
13746 vector with 16-bit elements when stored from register pair {d0,d1}
13747 will be (lowest address first, d0[N] is NEON register element N):
13749 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13751 When necessary, quadword registers (dN, dN+1) are moved to ARM
13752 registers from rN in the order:
13754 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13756 So that STM/LDM can be used on vectors in ARM registers, and the
13757 same memory layout will result as if VSTM/VLDM were used. */
13759 const char *
13760 output_move_neon (rtx *operands)
13762 rtx reg, mem, addr, ops[2];
13763 int regno, load = REG_P (operands[0]);
13764 const char *templ;
13765 char buff[50];
13766 enum machine_mode mode;
13768 reg = operands[!load];
13769 mem = operands[load];
13771 mode = GET_MODE (reg);
13773 gcc_assert (REG_P (reg));
13774 regno = REGNO (reg);
13775 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13776 || NEON_REGNO_OK_FOR_QUAD (regno));
13777 gcc_assert (VALID_NEON_DREG_MODE (mode)
13778 || VALID_NEON_QREG_MODE (mode)
13779 || VALID_NEON_STRUCT_MODE (mode));
13780 gcc_assert (MEM_P (mem));
13782 addr = XEXP (mem, 0);
13784 /* Strip off const from addresses like (const (plus (...))). */
13785 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13786 addr = XEXP (addr, 0);
13788 switch (GET_CODE (addr))
13790 case POST_INC:
13791 templ = "v%smia%%?\t%%0!, %%h1";
13792 ops[0] = XEXP (addr, 0);
13793 ops[1] = reg;
13794 break;
13796 case PRE_DEC:
13797 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13798 templ = "v%smdb%%?\t%%0!, %%h1";
13799 ops[0] = XEXP (addr, 0);
13800 ops[1] = reg;
13801 break;
13803 case POST_MODIFY:
13804 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13805 gcc_unreachable ();
13807 case LABEL_REF:
13808 case PLUS:
13810 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13811 int i;
13812 int overlap = -1;
13813 for (i = 0; i < nregs; i++)
13815 /* We're only using DImode here because it's a convenient size. */
13816 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13817 ops[1] = adjust_address (mem, DImode, 8 * i);
13818 if (reg_overlap_mentioned_p (ops[0], mem))
13820 gcc_assert (overlap == -1);
13821 overlap = i;
13823 else
13825 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13826 output_asm_insn (buff, ops);
13829 if (overlap != -1)
13831 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13832 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13833 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13834 output_asm_insn (buff, ops);
13837 return "";
13840 default:
13841 templ = "v%smia%%?\t%%m0, %%h1";
13842 ops[0] = mem;
13843 ops[1] = reg;
13846 sprintf (buff, templ, load ? "ld" : "st");
13847 output_asm_insn (buff, ops);
13849 return "";
13852 /* Compute and return the length of neon_mov<mode>, where <mode> is
13853 one of VSTRUCT modes: EI, OI, CI or XI. */
13855 arm_attr_length_move_neon (rtx insn)
13857 rtx reg, mem, addr;
13858 int load;
13859 enum machine_mode mode;
13861 extract_insn_cached (insn);
13863 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13865 mode = GET_MODE (recog_data.operand[0]);
13866 switch (mode)
13868 case EImode:
13869 case OImode:
13870 return 8;
13871 case CImode:
13872 return 12;
13873 case XImode:
13874 return 16;
13875 default:
13876 gcc_unreachable ();
13880 load = REG_P (recog_data.operand[0]);
13881 reg = recog_data.operand[!load];
13882 mem = recog_data.operand[load];
13884 gcc_assert (MEM_P (mem));
13886 mode = GET_MODE (reg);
13887 addr = XEXP (mem, 0);
13889 /* Strip off const from addresses like (const (plus (...))). */
13890 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13891 addr = XEXP (addr, 0);
13893 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13895 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13896 return insns * 4;
13898 else
13899 return 4;
13902 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13903 return zero. */
13906 arm_address_offset_is_imm (rtx insn)
13908 rtx mem, addr;
13910 extract_insn_cached (insn);
13912 if (REG_P (recog_data.operand[0]))
13913 return 0;
13915 mem = recog_data.operand[0];
13917 gcc_assert (MEM_P (mem));
13919 addr = XEXP (mem, 0);
13921 if (GET_CODE (addr) == REG
13922 || (GET_CODE (addr) == PLUS
13923 && GET_CODE (XEXP (addr, 0)) == REG
13924 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13925 return 1;
13926 else
13927 return 0;
13930 /* Output an ADD r, s, #n where n may be too big for one instruction.
13931 If adding zero to one register, output nothing. */
13932 const char *
13933 output_add_immediate (rtx *operands)
13935 HOST_WIDE_INT n = INTVAL (operands[2]);
13937 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13939 if (n < 0)
13940 output_multi_immediate (operands,
13941 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13942 -n);
13943 else
13944 output_multi_immediate (operands,
13945 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13949 return "";
13952 /* Output a multiple immediate operation.
13953 OPERANDS is the vector of operands referred to in the output patterns.
13954 INSTR1 is the output pattern to use for the first constant.
13955 INSTR2 is the output pattern to use for subsequent constants.
13956 IMMED_OP is the index of the constant slot in OPERANDS.
13957 N is the constant value. */
13958 static const char *
13959 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13960 int immed_op, HOST_WIDE_INT n)
13962 #if HOST_BITS_PER_WIDE_INT > 32
13963 n &= 0xffffffff;
13964 #endif
13966 if (n == 0)
13968 /* Quick and easy output. */
13969 operands[immed_op] = const0_rtx;
13970 output_asm_insn (instr1, operands);
13972 else
13974 int i;
13975 const char * instr = instr1;
13977 /* Note that n is never zero here (which would give no output). */
13978 for (i = 0; i < 32; i += 2)
13980 if (n & (3 << i))
13982 operands[immed_op] = GEN_INT (n & (255 << i));
13983 output_asm_insn (instr, operands);
13984 instr = instr2;
13985 i += 6;
13990 return "";
13993 /* Return the name of a shifter operation. */
13994 static const char *
13995 arm_shift_nmem(enum rtx_code code)
13997 switch (code)
13999 case ASHIFT:
14000 return ARM_LSL_NAME;
14002 case ASHIFTRT:
14003 return "asr";
14005 case LSHIFTRT:
14006 return "lsr";
14008 case ROTATERT:
14009 return "ror";
14011 default:
14012 abort();
14016 /* Return the appropriate ARM instruction for the operation code.
14017 The returned result should not be overwritten. OP is the rtx of the
14018 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14019 was shifted. */
14020 const char *
14021 arithmetic_instr (rtx op, int shift_first_arg)
14023 switch (GET_CODE (op))
14025 case PLUS:
14026 return "add";
14028 case MINUS:
14029 return shift_first_arg ? "rsb" : "sub";
14031 case IOR:
14032 return "orr";
14034 case XOR:
14035 return "eor";
14037 case AND:
14038 return "and";
14040 case ASHIFT:
14041 case ASHIFTRT:
14042 case LSHIFTRT:
14043 case ROTATERT:
14044 return arm_shift_nmem(GET_CODE(op));
14046 default:
14047 gcc_unreachable ();
14051 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14052 for the operation code. The returned result should not be overwritten.
14053 OP is the rtx code of the shift.
14054 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14055 shift. */
14056 static const char *
14057 shift_op (rtx op, HOST_WIDE_INT *amountp)
14059 const char * mnem;
14060 enum rtx_code code = GET_CODE (op);
14062 switch (GET_CODE (XEXP (op, 1)))
14064 case REG:
14065 case SUBREG:
14066 *amountp = -1;
14067 break;
14069 case CONST_INT:
14070 *amountp = INTVAL (XEXP (op, 1));
14071 break;
14073 default:
14074 gcc_unreachable ();
14077 switch (code)
14079 case ROTATE:
14080 gcc_assert (*amountp != -1);
14081 *amountp = 32 - *amountp;
14082 code = ROTATERT;
14084 /* Fall through. */
14086 case ASHIFT:
14087 case ASHIFTRT:
14088 case LSHIFTRT:
14089 case ROTATERT:
14090 mnem = arm_shift_nmem(code);
14091 break;
14093 case MULT:
14094 /* We never have to worry about the amount being other than a
14095 power of 2, since this case can never be reloaded from a reg. */
14096 gcc_assert (*amountp != -1);
14097 *amountp = int_log2 (*amountp);
14098 return ARM_LSL_NAME;
14100 default:
14101 gcc_unreachable ();
14104 if (*amountp != -1)
14106 /* This is not 100% correct, but follows from the desire to merge
14107 multiplication by a power of 2 with the recognizer for a
14108 shift. >=32 is not a valid shift for "lsl", so we must try and
14109 output a shift that produces the correct arithmetical result.
14110 Using lsr #32 is identical except for the fact that the carry bit
14111 is not set correctly if we set the flags; but we never use the
14112 carry bit from such an operation, so we can ignore that. */
14113 if (code == ROTATERT)
14114 /* Rotate is just modulo 32. */
14115 *amountp &= 31;
14116 else if (*amountp != (*amountp & 31))
14118 if (code == ASHIFT)
14119 mnem = "lsr";
14120 *amountp = 32;
14123 /* Shifts of 0 are no-ops. */
14124 if (*amountp == 0)
14125 return NULL;
14128 return mnem;
14131 /* Obtain the shift from the POWER of two. */
14133 static HOST_WIDE_INT
14134 int_log2 (HOST_WIDE_INT power)
14136 HOST_WIDE_INT shift = 0;
14138 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14140 gcc_assert (shift <= 31);
14141 shift++;
14144 return shift;
14147 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14148 because /bin/as is horribly restrictive. The judgement about
14149 whether or not each character is 'printable' (and can be output as
14150 is) or not (and must be printed with an octal escape) must be made
14151 with reference to the *host* character set -- the situation is
14152 similar to that discussed in the comments above pp_c_char in
14153 c-pretty-print.c. */
14155 #define MAX_ASCII_LEN 51
14157 void
14158 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14160 int i;
14161 int len_so_far = 0;
14163 fputs ("\t.ascii\t\"", stream);
14165 for (i = 0; i < len; i++)
14167 int c = p[i];
14169 if (len_so_far >= MAX_ASCII_LEN)
14171 fputs ("\"\n\t.ascii\t\"", stream);
14172 len_so_far = 0;
14175 if (ISPRINT (c))
14177 if (c == '\\' || c == '\"')
14179 putc ('\\', stream);
14180 len_so_far++;
14182 putc (c, stream);
14183 len_so_far++;
14185 else
14187 fprintf (stream, "\\%03o", c);
14188 len_so_far += 4;
14192 fputs ("\"\n", stream);
14195 /* Compute the register save mask for registers 0 through 12
14196 inclusive. This code is used by arm_compute_save_reg_mask. */
14198 static unsigned long
14199 arm_compute_save_reg0_reg12_mask (void)
14201 unsigned long func_type = arm_current_func_type ();
14202 unsigned long save_reg_mask = 0;
14203 unsigned int reg;
14205 if (IS_INTERRUPT (func_type))
14207 unsigned int max_reg;
14208 /* Interrupt functions must not corrupt any registers,
14209 even call clobbered ones. If this is a leaf function
14210 we can just examine the registers used by the RTL, but
14211 otherwise we have to assume that whatever function is
14212 called might clobber anything, and so we have to save
14213 all the call-clobbered registers as well. */
14214 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14215 /* FIQ handlers have registers r8 - r12 banked, so
14216 we only need to check r0 - r7, Normal ISRs only
14217 bank r14 and r15, so we must check up to r12.
14218 r13 is the stack pointer which is always preserved,
14219 so we do not need to consider it here. */
14220 max_reg = 7;
14221 else
14222 max_reg = 12;
14224 for (reg = 0; reg <= max_reg; reg++)
14225 if (df_regs_ever_live_p (reg)
14226 || (! current_function_is_leaf && call_used_regs[reg]))
14227 save_reg_mask |= (1 << reg);
14229 /* Also save the pic base register if necessary. */
14230 if (flag_pic
14231 && !TARGET_SINGLE_PIC_BASE
14232 && arm_pic_register != INVALID_REGNUM
14233 && crtl->uses_pic_offset_table)
14234 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14236 else if (IS_VOLATILE(func_type))
14238 /* For noreturn functions we historically omitted register saves
14239 altogether. However this really messes up debugging. As a
14240 compromise save just the frame pointers. Combined with the link
14241 register saved elsewhere this should be sufficient to get
14242 a backtrace. */
14243 if (frame_pointer_needed)
14244 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14245 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
14246 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14247 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
14248 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
14250 else
14252 /* In the normal case we only need to save those registers
14253 which are call saved and which are used by this function. */
14254 for (reg = 0; reg <= 11; reg++)
14255 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14256 save_reg_mask |= (1 << reg);
14258 /* Handle the frame pointer as a special case. */
14259 if (frame_pointer_needed)
14260 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14262 /* If we aren't loading the PIC register,
14263 don't stack it even though it may be live. */
14264 if (flag_pic
14265 && !TARGET_SINGLE_PIC_BASE
14266 && arm_pic_register != INVALID_REGNUM
14267 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
14268 || crtl->uses_pic_offset_table))
14269 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14271 /* The prologue will copy SP into R0, so save it. */
14272 if (IS_STACKALIGN (func_type))
14273 save_reg_mask |= 1;
14276 /* Save registers so the exception handler can modify them. */
14277 if (crtl->calls_eh_return)
14279 unsigned int i;
14281 for (i = 0; ; i++)
14283 reg = EH_RETURN_DATA_REGNO (i);
14284 if (reg == INVALID_REGNUM)
14285 break;
14286 save_reg_mask |= 1 << reg;
14290 return save_reg_mask;
14294 /* Compute the number of bytes used to store the static chain register on the
14295 stack, above the stack frame. We need to know this accurately to get the
14296 alignment of the rest of the stack frame correct. */
14298 static int arm_compute_static_chain_stack_bytes (void)
14300 unsigned long func_type = arm_current_func_type ();
14301 int static_chain_stack_bytes = 0;
14303 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14304 IS_NESTED (func_type) &&
14305 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14306 static_chain_stack_bytes = 4;
14308 return static_chain_stack_bytes;
14312 /* Compute a bit mask of which registers need to be
14313 saved on the stack for the current function.
14314 This is used by arm_get_frame_offsets, which may add extra registers. */
14316 static unsigned long
14317 arm_compute_save_reg_mask (void)
14319 unsigned int save_reg_mask = 0;
14320 unsigned long func_type = arm_current_func_type ();
14321 unsigned int reg;
14323 if (IS_NAKED (func_type))
14324 /* This should never really happen. */
14325 return 0;
14327 /* If we are creating a stack frame, then we must save the frame pointer,
14328 IP (which will hold the old stack pointer), LR and the PC. */
14329 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14330 save_reg_mask |=
14331 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14332 | (1 << IP_REGNUM)
14333 | (1 << LR_REGNUM)
14334 | (1 << PC_REGNUM);
14336 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14338 /* Decide if we need to save the link register.
14339 Interrupt routines have their own banked link register,
14340 so they never need to save it.
14341 Otherwise if we do not use the link register we do not need to save
14342 it. If we are pushing other registers onto the stack however, we
14343 can save an instruction in the epilogue by pushing the link register
14344 now and then popping it back into the PC. This incurs extra memory
14345 accesses though, so we only do it when optimizing for size, and only
14346 if we know that we will not need a fancy return sequence. */
14347 if (df_regs_ever_live_p (LR_REGNUM)
14348 || (save_reg_mask
14349 && optimize_size
14350 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14351 && !crtl->calls_eh_return))
14352 save_reg_mask |= 1 << LR_REGNUM;
14354 if (cfun->machine->lr_save_eliminated)
14355 save_reg_mask &= ~ (1 << LR_REGNUM);
14357 if (TARGET_REALLY_IWMMXT
14358 && ((bit_count (save_reg_mask)
14359 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14360 arm_compute_static_chain_stack_bytes())
14361 ) % 2) != 0)
14363 /* The total number of registers that are going to be pushed
14364 onto the stack is odd. We need to ensure that the stack
14365 is 64-bit aligned before we start to save iWMMXt registers,
14366 and also before we start to create locals. (A local variable
14367 might be a double or long long which we will load/store using
14368 an iWMMXt instruction). Therefore we need to push another
14369 ARM register, so that the stack will be 64-bit aligned. We
14370 try to avoid using the arg registers (r0 -r3) as they might be
14371 used to pass values in a tail call. */
14372 for (reg = 4; reg <= 12; reg++)
14373 if ((save_reg_mask & (1 << reg)) == 0)
14374 break;
14376 if (reg <= 12)
14377 save_reg_mask |= (1 << reg);
14378 else
14380 cfun->machine->sibcall_blocked = 1;
14381 save_reg_mask |= (1 << 3);
14385 /* We may need to push an additional register for use initializing the
14386 PIC base register. */
14387 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14388 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14390 reg = thumb_find_work_register (1 << 4);
14391 if (!call_used_regs[reg])
14392 save_reg_mask |= (1 << reg);
14395 return save_reg_mask;
14399 /* Compute a bit mask of which registers need to be
14400 saved on the stack for the current function. */
14401 static unsigned long
14402 thumb1_compute_save_reg_mask (void)
14404 unsigned long mask;
14405 unsigned reg;
14407 mask = 0;
14408 for (reg = 0; reg < 12; reg ++)
14409 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14410 mask |= 1 << reg;
14412 if (flag_pic
14413 && !TARGET_SINGLE_PIC_BASE
14414 && arm_pic_register != INVALID_REGNUM
14415 && crtl->uses_pic_offset_table)
14416 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14418 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14419 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14420 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14422 /* LR will also be pushed if any lo regs are pushed. */
14423 if (mask & 0xff || thumb_force_lr_save ())
14424 mask |= (1 << LR_REGNUM);
14426 /* Make sure we have a low work register if we need one.
14427 We will need one if we are going to push a high register,
14428 but we are not currently intending to push a low register. */
14429 if ((mask & 0xff) == 0
14430 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14432 /* Use thumb_find_work_register to choose which register
14433 we will use. If the register is live then we will
14434 have to push it. Use LAST_LO_REGNUM as our fallback
14435 choice for the register to select. */
14436 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14437 /* Make sure the register returned by thumb_find_work_register is
14438 not part of the return value. */
14439 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14440 reg = LAST_LO_REGNUM;
14442 if (! call_used_regs[reg])
14443 mask |= 1 << reg;
14446 /* The 504 below is 8 bytes less than 512 because there are two possible
14447 alignment words. We can't tell here if they will be present or not so we
14448 have to play it safe and assume that they are. */
14449 if ((CALLER_INTERWORKING_SLOT_SIZE +
14450 ROUND_UP_WORD (get_frame_size ()) +
14451 crtl->outgoing_args_size) >= 504)
14453 /* This is the same as the code in thumb1_expand_prologue() which
14454 determines which register to use for stack decrement. */
14455 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14456 if (mask & (1 << reg))
14457 break;
14459 if (reg > LAST_LO_REGNUM)
14461 /* Make sure we have a register available for stack decrement. */
14462 mask |= 1 << LAST_LO_REGNUM;
14466 return mask;
14470 /* Return the number of bytes required to save VFP registers. */
14471 static int
14472 arm_get_vfp_saved_size (void)
14474 unsigned int regno;
14475 int count;
14476 int saved;
14478 saved = 0;
14479 /* Space for saved VFP registers. */
14480 if (TARGET_HARD_FLOAT && TARGET_VFP)
14482 count = 0;
14483 for (regno = FIRST_VFP_REGNUM;
14484 regno < LAST_VFP_REGNUM;
14485 regno += 2)
14487 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14488 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14490 if (count > 0)
14492 /* Workaround ARM10 VFPr1 bug. */
14493 if (count == 2 && !arm_arch6)
14494 count++;
14495 saved += count * 8;
14497 count = 0;
14499 else
14500 count++;
14502 if (count > 0)
14504 if (count == 2 && !arm_arch6)
14505 count++;
14506 saved += count * 8;
14509 return saved;
14513 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14514 everything bar the final return instruction. */
14515 const char *
14516 output_return_instruction (rtx operand, int really_return, int reverse)
14518 char conditional[10];
14519 char instr[100];
14520 unsigned reg;
14521 unsigned long live_regs_mask;
14522 unsigned long func_type;
14523 arm_stack_offsets *offsets;
14525 func_type = arm_current_func_type ();
14527 if (IS_NAKED (func_type))
14528 return "";
14530 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14532 /* If this function was declared non-returning, and we have
14533 found a tail call, then we have to trust that the called
14534 function won't return. */
14535 if (really_return)
14537 rtx ops[2];
14539 /* Otherwise, trap an attempted return by aborting. */
14540 ops[0] = operand;
14541 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14542 : "abort");
14543 assemble_external_libcall (ops[1]);
14544 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14547 return "";
14550 gcc_assert (!cfun->calls_alloca || really_return);
14552 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14554 cfun->machine->return_used_this_function = 1;
14556 offsets = arm_get_frame_offsets ();
14557 live_regs_mask = offsets->saved_regs_mask;
14559 if (live_regs_mask)
14561 const char * return_reg;
14563 /* If we do not have any special requirements for function exit
14564 (e.g. interworking) then we can load the return address
14565 directly into the PC. Otherwise we must load it into LR. */
14566 if (really_return
14567 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14568 return_reg = reg_names[PC_REGNUM];
14569 else
14570 return_reg = reg_names[LR_REGNUM];
14572 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14574 /* There are three possible reasons for the IP register
14575 being saved. 1) a stack frame was created, in which case
14576 IP contains the old stack pointer, or 2) an ISR routine
14577 corrupted it, or 3) it was saved to align the stack on
14578 iWMMXt. In case 1, restore IP into SP, otherwise just
14579 restore IP. */
14580 if (frame_pointer_needed)
14582 live_regs_mask &= ~ (1 << IP_REGNUM);
14583 live_regs_mask |= (1 << SP_REGNUM);
14585 else
14586 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14589 /* On some ARM architectures it is faster to use LDR rather than
14590 LDM to load a single register. On other architectures, the
14591 cost is the same. In 26 bit mode, or for exception handlers,
14592 we have to use LDM to load the PC so that the CPSR is also
14593 restored. */
14594 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14595 if (live_regs_mask == (1U << reg))
14596 break;
14598 if (reg <= LAST_ARM_REGNUM
14599 && (reg != LR_REGNUM
14600 || ! really_return
14601 || ! IS_INTERRUPT (func_type)))
14603 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14604 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14606 else
14608 char *p;
14609 int first = 1;
14611 /* Generate the load multiple instruction to restore the
14612 registers. Note we can get here, even if
14613 frame_pointer_needed is true, but only if sp already
14614 points to the base of the saved core registers. */
14615 if (live_regs_mask & (1 << SP_REGNUM))
14617 unsigned HOST_WIDE_INT stack_adjust;
14619 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14620 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14622 if (stack_adjust && arm_arch5 && TARGET_ARM)
14623 if (TARGET_UNIFIED_ASM)
14624 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14625 else
14626 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14627 else
14629 /* If we can't use ldmib (SA110 bug),
14630 then try to pop r3 instead. */
14631 if (stack_adjust)
14632 live_regs_mask |= 1 << 3;
14634 if (TARGET_UNIFIED_ASM)
14635 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14636 else
14637 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14640 else
14641 if (TARGET_UNIFIED_ASM)
14642 sprintf (instr, "pop%s\t{", conditional);
14643 else
14644 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14646 p = instr + strlen (instr);
14648 for (reg = 0; reg <= SP_REGNUM; reg++)
14649 if (live_regs_mask & (1 << reg))
14651 int l = strlen (reg_names[reg]);
14653 if (first)
14654 first = 0;
14655 else
14657 memcpy (p, ", ", 2);
14658 p += 2;
14661 memcpy (p, "%|", 2);
14662 memcpy (p + 2, reg_names[reg], l);
14663 p += l + 2;
14666 if (live_regs_mask & (1 << LR_REGNUM))
14668 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14669 /* If returning from an interrupt, restore the CPSR. */
14670 if (IS_INTERRUPT (func_type))
14671 strcat (p, "^");
14673 else
14674 strcpy (p, "}");
14677 output_asm_insn (instr, & operand);
14679 /* See if we need to generate an extra instruction to
14680 perform the actual function return. */
14681 if (really_return
14682 && func_type != ARM_FT_INTERWORKED
14683 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14685 /* The return has already been handled
14686 by loading the LR into the PC. */
14687 really_return = 0;
14691 if (really_return)
14693 switch ((int) ARM_FUNC_TYPE (func_type))
14695 case ARM_FT_ISR:
14696 case ARM_FT_FIQ:
14697 /* ??? This is wrong for unified assembly syntax. */
14698 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14699 break;
14701 case ARM_FT_INTERWORKED:
14702 sprintf (instr, "bx%s\t%%|lr", conditional);
14703 break;
14705 case ARM_FT_EXCEPTION:
14706 /* ??? This is wrong for unified assembly syntax. */
14707 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14708 break;
14710 default:
14711 /* Use bx if it's available. */
14712 if (arm_arch5 || arm_arch4t)
14713 sprintf (instr, "bx%s\t%%|lr", conditional);
14714 else
14715 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14716 break;
14719 output_asm_insn (instr, & operand);
14722 return "";
14725 /* Write the function name into the code section, directly preceding
14726 the function prologue.
14728 Code will be output similar to this:
14730 .ascii "arm_poke_function_name", 0
14731 .align
14733 .word 0xff000000 + (t1 - t0)
14734 arm_poke_function_name
14735 mov ip, sp
14736 stmfd sp!, {fp, ip, lr, pc}
14737 sub fp, ip, #4
14739 When performing a stack backtrace, code can inspect the value
14740 of 'pc' stored at 'fp' + 0. If the trace function then looks
14741 at location pc - 12 and the top 8 bits are set, then we know
14742 that there is a function name embedded immediately preceding this
14743 location and has length ((pc[-3]) & 0xff000000).
14745 We assume that pc is declared as a pointer to an unsigned long.
14747 It is of no benefit to output the function name if we are assembling
14748 a leaf function. These function types will not contain a stack
14749 backtrace structure, therefore it is not possible to determine the
14750 function name. */
14751 void
14752 arm_poke_function_name (FILE *stream, const char *name)
14754 unsigned long alignlength;
14755 unsigned long length;
14756 rtx x;
14758 length = strlen (name) + 1;
14759 alignlength = ROUND_UP_WORD (length);
14761 ASM_OUTPUT_ASCII (stream, name, length);
14762 ASM_OUTPUT_ALIGN (stream, 2);
14763 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14764 assemble_aligned_integer (UNITS_PER_WORD, x);
14767 /* Place some comments into the assembler stream
14768 describing the current function. */
14769 static void
14770 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14772 unsigned long func_type;
14774 /* ??? Do we want to print some of the below anyway? */
14775 if (TARGET_THUMB1)
14776 return;
14778 /* Sanity check. */
14779 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14781 func_type = arm_current_func_type ();
14783 switch ((int) ARM_FUNC_TYPE (func_type))
14785 default:
14786 case ARM_FT_NORMAL:
14787 break;
14788 case ARM_FT_INTERWORKED:
14789 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14790 break;
14791 case ARM_FT_ISR:
14792 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14793 break;
14794 case ARM_FT_FIQ:
14795 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14796 break;
14797 case ARM_FT_EXCEPTION:
14798 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14799 break;
14802 if (IS_NAKED (func_type))
14803 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14805 if (IS_VOLATILE (func_type))
14806 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14808 if (IS_NESTED (func_type))
14809 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14810 if (IS_STACKALIGN (func_type))
14811 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14813 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14814 crtl->args.size,
14815 crtl->args.pretend_args_size, frame_size);
14817 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14818 frame_pointer_needed,
14819 cfun->machine->uses_anonymous_args);
14821 if (cfun->machine->lr_save_eliminated)
14822 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14824 if (crtl->calls_eh_return)
14825 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14829 const char *
14830 arm_output_epilogue (rtx sibling)
14832 int reg;
14833 unsigned long saved_regs_mask;
14834 unsigned long func_type;
14835 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14836 frame that is $fp + 4 for a non-variadic function. */
14837 int floats_offset = 0;
14838 rtx operands[3];
14839 FILE * f = asm_out_file;
14840 unsigned int lrm_count = 0;
14841 int really_return = (sibling == NULL);
14842 int start_reg;
14843 arm_stack_offsets *offsets;
14845 /* If we have already generated the return instruction
14846 then it is futile to generate anything else. */
14847 if (use_return_insn (FALSE, sibling) &&
14848 (cfun->machine->return_used_this_function != 0))
14849 return "";
14851 func_type = arm_current_func_type ();
14853 if (IS_NAKED (func_type))
14854 /* Naked functions don't have epilogues. */
14855 return "";
14857 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14859 rtx op;
14861 /* A volatile function should never return. Call abort. */
14862 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14863 assemble_external_libcall (op);
14864 output_asm_insn ("bl\t%a0", &op);
14866 return "";
14869 /* If we are throwing an exception, then we really must be doing a
14870 return, so we can't tail-call. */
14871 gcc_assert (!crtl->calls_eh_return || really_return);
14873 offsets = arm_get_frame_offsets ();
14874 saved_regs_mask = offsets->saved_regs_mask;
14876 if (TARGET_IWMMXT)
14877 lrm_count = bit_count (saved_regs_mask);
14879 floats_offset = offsets->saved_args;
14880 /* Compute how far away the floats will be. */
14881 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14882 if (saved_regs_mask & (1 << reg))
14883 floats_offset += 4;
14885 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14887 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14888 int vfp_offset = offsets->frame;
14890 if (TARGET_FPA_EMU2)
14892 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14893 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14895 floats_offset += 12;
14896 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14897 reg, FP_REGNUM, floats_offset - vfp_offset);
14900 else
14902 start_reg = LAST_FPA_REGNUM;
14904 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14906 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14908 floats_offset += 12;
14910 /* We can't unstack more than four registers at once. */
14911 if (start_reg - reg == 3)
14913 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14914 reg, FP_REGNUM, floats_offset - vfp_offset);
14915 start_reg = reg - 1;
14918 else
14920 if (reg != start_reg)
14921 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14922 reg + 1, start_reg - reg,
14923 FP_REGNUM, floats_offset - vfp_offset);
14924 start_reg = reg - 1;
14928 /* Just in case the last register checked also needs unstacking. */
14929 if (reg != start_reg)
14930 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14931 reg + 1, start_reg - reg,
14932 FP_REGNUM, floats_offset - vfp_offset);
14935 if (TARGET_HARD_FLOAT && TARGET_VFP)
14937 int saved_size;
14939 /* The fldmd insns do not have base+offset addressing
14940 modes, so we use IP to hold the address. */
14941 saved_size = arm_get_vfp_saved_size ();
14943 if (saved_size > 0)
14945 floats_offset += saved_size;
14946 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14947 FP_REGNUM, floats_offset - vfp_offset);
14949 start_reg = FIRST_VFP_REGNUM;
14950 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14952 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14953 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14955 if (start_reg != reg)
14956 vfp_output_fldmd (f, IP_REGNUM,
14957 (start_reg - FIRST_VFP_REGNUM) / 2,
14958 (reg - start_reg) / 2);
14959 start_reg = reg + 2;
14962 if (start_reg != reg)
14963 vfp_output_fldmd (f, IP_REGNUM,
14964 (start_reg - FIRST_VFP_REGNUM) / 2,
14965 (reg - start_reg) / 2);
14968 if (TARGET_IWMMXT)
14970 /* The frame pointer is guaranteed to be non-double-word aligned.
14971 This is because it is set to (old_stack_pointer - 4) and the
14972 old_stack_pointer was double word aligned. Thus the offset to
14973 the iWMMXt registers to be loaded must also be non-double-word
14974 sized, so that the resultant address *is* double-word aligned.
14975 We can ignore floats_offset since that was already included in
14976 the live_regs_mask. */
14977 lrm_count += (lrm_count % 2 ? 2 : 1);
14979 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14980 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14982 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14983 reg, FP_REGNUM, lrm_count * 4);
14984 lrm_count += 2;
14988 /* saved_regs_mask should contain the IP, which at the time of stack
14989 frame generation actually contains the old stack pointer. So a
14990 quick way to unwind the stack is just pop the IP register directly
14991 into the stack pointer. */
14992 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14993 saved_regs_mask &= ~ (1 << IP_REGNUM);
14994 saved_regs_mask |= (1 << SP_REGNUM);
14996 /* There are two registers left in saved_regs_mask - LR and PC. We
14997 only need to restore the LR register (the return address), but to
14998 save time we can load it directly into the PC, unless we need a
14999 special function exit sequence, or we are not really returning. */
15000 if (really_return
15001 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15002 && !crtl->calls_eh_return)
15003 /* Delete the LR from the register mask, so that the LR on
15004 the stack is loaded into the PC in the register mask. */
15005 saved_regs_mask &= ~ (1 << LR_REGNUM);
15006 else
15007 saved_regs_mask &= ~ (1 << PC_REGNUM);
15009 /* We must use SP as the base register, because SP is one of the
15010 registers being restored. If an interrupt or page fault
15011 happens in the ldm instruction, the SP might or might not
15012 have been restored. That would be bad, as then SP will no
15013 longer indicate the safe area of stack, and we can get stack
15014 corruption. Using SP as the base register means that it will
15015 be reset correctly to the original value, should an interrupt
15016 occur. If the stack pointer already points at the right
15017 place, then omit the subtraction. */
15018 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15019 || cfun->calls_alloca)
15020 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15021 4 * bit_count (saved_regs_mask));
15022 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15024 if (IS_INTERRUPT (func_type))
15025 /* Interrupt handlers will have pushed the
15026 IP onto the stack, so restore it now. */
15027 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15029 else
15031 /* This branch is executed for ARM mode (non-apcs frames) and
15032 Thumb-2 mode. Frame layout is essentially the same for those
15033 cases, except that in ARM mode frame pointer points to the
15034 first saved register, while in Thumb-2 mode the frame pointer points
15035 to the last saved register.
15037 It is possible to make frame pointer point to last saved
15038 register in both cases, and remove some conditionals below.
15039 That means that fp setup in prologue would be just "mov fp, sp"
15040 and sp restore in epilogue would be just "mov sp, fp", whereas
15041 now we have to use add/sub in those cases. However, the value
15042 of that would be marginal, as both mov and add/sub are 32-bit
15043 in ARM mode, and it would require extra conditionals
15044 in arm_expand_prologue to distingish ARM-apcs-frame case
15045 (where frame pointer is required to point at first register)
15046 and ARM-non-apcs-frame. Therefore, such change is postponed
15047 until real need arise. */
15048 unsigned HOST_WIDE_INT amount;
15049 int rfe;
15050 /* Restore stack pointer if necessary. */
15051 if (TARGET_ARM && frame_pointer_needed)
15053 operands[0] = stack_pointer_rtx;
15054 operands[1] = hard_frame_pointer_rtx;
15056 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15057 output_add_immediate (operands);
15059 else
15061 if (frame_pointer_needed)
15063 /* For Thumb-2 restore sp from the frame pointer.
15064 Operand restrictions mean we have to incrememnt FP, then copy
15065 to SP. */
15066 amount = offsets->locals_base - offsets->saved_regs;
15067 operands[0] = hard_frame_pointer_rtx;
15069 else
15071 unsigned long count;
15072 operands[0] = stack_pointer_rtx;
15073 amount = offsets->outgoing_args - offsets->saved_regs;
15074 /* pop call clobbered registers if it avoids a
15075 separate stack adjustment. */
15076 count = offsets->saved_regs - offsets->saved_args;
15077 if (optimize_size
15078 && count != 0
15079 && !crtl->calls_eh_return
15080 && bit_count(saved_regs_mask) * 4 == count
15081 && !IS_INTERRUPT (func_type)
15082 && !crtl->tail_call_emit)
15084 unsigned long mask;
15085 /* Preserve return values, of any size. */
15086 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15087 mask ^= 0xf;
15088 mask &= ~saved_regs_mask;
15089 reg = 0;
15090 while (bit_count (mask) * 4 > amount)
15092 while ((mask & (1 << reg)) == 0)
15093 reg++;
15094 mask &= ~(1 << reg);
15096 if (bit_count (mask) * 4 == amount) {
15097 amount = 0;
15098 saved_regs_mask |= mask;
15103 if (amount)
15105 operands[1] = operands[0];
15106 operands[2] = GEN_INT (amount);
15107 output_add_immediate (operands);
15109 if (frame_pointer_needed)
15110 asm_fprintf (f, "\tmov\t%r, %r\n",
15111 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15114 if (TARGET_FPA_EMU2)
15116 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15117 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15118 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15119 reg, SP_REGNUM);
15121 else
15123 start_reg = FIRST_FPA_REGNUM;
15125 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15127 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15129 if (reg - start_reg == 3)
15131 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15132 start_reg, SP_REGNUM);
15133 start_reg = reg + 1;
15136 else
15138 if (reg != start_reg)
15139 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15140 start_reg, reg - start_reg,
15141 SP_REGNUM);
15143 start_reg = reg + 1;
15147 /* Just in case the last register checked also needs unstacking. */
15148 if (reg != start_reg)
15149 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15150 start_reg, reg - start_reg, SP_REGNUM);
15153 if (TARGET_HARD_FLOAT && TARGET_VFP)
15155 int end_reg = LAST_VFP_REGNUM + 1;
15157 /* Scan the registers in reverse order. We need to match
15158 any groupings made in the prologue and generate matching
15159 pop operations. */
15160 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15162 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15163 && (!df_regs_ever_live_p (reg + 1)
15164 || call_used_regs[reg + 1]))
15166 if (end_reg > reg + 2)
15167 vfp_output_fldmd (f, SP_REGNUM,
15168 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15169 (end_reg - (reg + 2)) / 2);
15170 end_reg = reg;
15173 if (end_reg > reg + 2)
15174 vfp_output_fldmd (f, SP_REGNUM, 0,
15175 (end_reg - (reg + 2)) / 2);
15178 if (TARGET_IWMMXT)
15179 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15180 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15181 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15183 /* If we can, restore the LR into the PC. */
15184 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15185 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15186 && !IS_STACKALIGN (func_type)
15187 && really_return
15188 && crtl->args.pretend_args_size == 0
15189 && saved_regs_mask & (1 << LR_REGNUM)
15190 && !crtl->calls_eh_return)
15192 saved_regs_mask &= ~ (1 << LR_REGNUM);
15193 saved_regs_mask |= (1 << PC_REGNUM);
15194 rfe = IS_INTERRUPT (func_type);
15196 else
15197 rfe = 0;
15199 /* Load the registers off the stack. If we only have one register
15200 to load use the LDR instruction - it is faster. For Thumb-2
15201 always use pop and the assembler will pick the best instruction.*/
15202 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15203 && !IS_INTERRUPT(func_type))
15205 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
15207 else if (saved_regs_mask)
15209 if (saved_regs_mask & (1 << SP_REGNUM))
15210 /* Note - write back to the stack register is not enabled
15211 (i.e. "ldmfd sp!..."). We know that the stack pointer is
15212 in the list of registers and if we add writeback the
15213 instruction becomes UNPREDICTABLE. */
15214 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
15215 rfe);
15216 else if (TARGET_ARM)
15217 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
15218 rfe);
15219 else
15220 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
15223 if (crtl->args.pretend_args_size)
15225 /* Unwind the pre-pushed regs. */
15226 operands[0] = operands[1] = stack_pointer_rtx;
15227 operands[2] = GEN_INT (crtl->args.pretend_args_size);
15228 output_add_immediate (operands);
15232 /* We may have already restored PC directly from the stack. */
15233 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
15234 return "";
15236 /* Stack adjustment for exception handler. */
15237 if (crtl->calls_eh_return)
15238 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
15239 ARM_EH_STACKADJ_REGNUM);
15241 /* Generate the return instruction. */
15242 switch ((int) ARM_FUNC_TYPE (func_type))
15244 case ARM_FT_ISR:
15245 case ARM_FT_FIQ:
15246 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
15247 break;
15249 case ARM_FT_EXCEPTION:
15250 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15251 break;
15253 case ARM_FT_INTERWORKED:
15254 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15255 break;
15257 default:
15258 if (IS_STACKALIGN (func_type))
15260 /* See comment in arm_expand_prologue. */
15261 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
15263 if (arm_arch5 || arm_arch4t)
15264 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15265 else
15266 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15267 break;
15270 return "";
15273 static void
15274 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15275 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15277 arm_stack_offsets *offsets;
15279 if (TARGET_THUMB1)
15281 int regno;
15283 /* Emit any call-via-reg trampolines that are needed for v4t support
15284 of call_reg and call_value_reg type insns. */
15285 for (regno = 0; regno < LR_REGNUM; regno++)
15287 rtx label = cfun->machine->call_via[regno];
15289 if (label != NULL)
15291 switch_to_section (function_section (current_function_decl));
15292 targetm.asm_out.internal_label (asm_out_file, "L",
15293 CODE_LABEL_NUMBER (label));
15294 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15298 /* ??? Probably not safe to set this here, since it assumes that a
15299 function will be emitted as assembly immediately after we generate
15300 RTL for it. This does not happen for inline functions. */
15301 cfun->machine->return_used_this_function = 0;
15303 else /* TARGET_32BIT */
15305 /* We need to take into account any stack-frame rounding. */
15306 offsets = arm_get_frame_offsets ();
15308 gcc_assert (!use_return_insn (FALSE, NULL)
15309 || (cfun->machine->return_used_this_function != 0)
15310 || offsets->saved_regs == offsets->outgoing_args
15311 || frame_pointer_needed);
15313 /* Reset the ARM-specific per-function variables. */
15314 after_arm_reorg = 0;
15318 /* Generate and emit an insn that we will recognize as a push_multi.
15319 Unfortunately, since this insn does not reflect very well the actual
15320 semantics of the operation, we need to annotate the insn for the benefit
15321 of DWARF2 frame unwind information. */
15322 static rtx
15323 emit_multi_reg_push (unsigned long mask)
15325 int num_regs = 0;
15326 int num_dwarf_regs;
15327 int i, j;
15328 rtx par;
15329 rtx dwarf;
15330 int dwarf_par_index;
15331 rtx tmp, reg;
15333 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15334 if (mask & (1 << i))
15335 num_regs++;
15337 gcc_assert (num_regs && num_regs <= 16);
15339 /* We don't record the PC in the dwarf frame information. */
15340 num_dwarf_regs = num_regs;
15341 if (mask & (1 << PC_REGNUM))
15342 num_dwarf_regs--;
15344 /* For the body of the insn we are going to generate an UNSPEC in
15345 parallel with several USEs. This allows the insn to be recognized
15346 by the push_multi pattern in the arm.md file.
15348 The body of the insn looks something like this:
15350 (parallel [
15351 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15352 (const_int:SI <num>)))
15353 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15354 (use (reg:SI XX))
15355 (use (reg:SI YY))
15359 For the frame note however, we try to be more explicit and actually
15360 show each register being stored into the stack frame, plus a (single)
15361 decrement of the stack pointer. We do it this way in order to be
15362 friendly to the stack unwinding code, which only wants to see a single
15363 stack decrement per instruction. The RTL we generate for the note looks
15364 something like this:
15366 (sequence [
15367 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15368 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15369 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15370 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15374 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15375 instead we'd have a parallel expression detailing all
15376 the stores to the various memory addresses so that debug
15377 information is more up-to-date. Remember however while writing
15378 this to take care of the constraints with the push instruction.
15380 Note also that this has to be taken care of for the VFP registers.
15382 For more see PR43399. */
15384 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15385 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15386 dwarf_par_index = 1;
15388 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15390 if (mask & (1 << i))
15392 reg = gen_rtx_REG (SImode, i);
15394 XVECEXP (par, 0, 0)
15395 = gen_rtx_SET (VOIDmode,
15396 gen_frame_mem
15397 (BLKmode,
15398 gen_rtx_PRE_MODIFY (Pmode,
15399 stack_pointer_rtx,
15400 plus_constant
15401 (stack_pointer_rtx,
15402 -4 * num_regs))
15404 gen_rtx_UNSPEC (BLKmode,
15405 gen_rtvec (1, reg),
15406 UNSPEC_PUSH_MULT));
15408 if (i != PC_REGNUM)
15410 tmp = gen_rtx_SET (VOIDmode,
15411 gen_frame_mem (SImode, stack_pointer_rtx),
15412 reg);
15413 RTX_FRAME_RELATED_P (tmp) = 1;
15414 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15415 dwarf_par_index++;
15418 break;
15422 for (j = 1, i++; j < num_regs; i++)
15424 if (mask & (1 << i))
15426 reg = gen_rtx_REG (SImode, i);
15428 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15430 if (i != PC_REGNUM)
15433 = gen_rtx_SET (VOIDmode,
15434 gen_frame_mem
15435 (SImode,
15436 plus_constant (stack_pointer_rtx,
15437 4 * j)),
15438 reg);
15439 RTX_FRAME_RELATED_P (tmp) = 1;
15440 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15443 j++;
15447 par = emit_insn (par);
15449 tmp = gen_rtx_SET (VOIDmode,
15450 stack_pointer_rtx,
15451 plus_constant (stack_pointer_rtx, -4 * num_regs));
15452 RTX_FRAME_RELATED_P (tmp) = 1;
15453 XVECEXP (dwarf, 0, 0) = tmp;
15455 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15457 return par;
15460 /* Calculate the size of the return value that is passed in registers. */
15461 static unsigned
15462 arm_size_return_regs (void)
15464 enum machine_mode mode;
15466 if (crtl->return_rtx != 0)
15467 mode = GET_MODE (crtl->return_rtx);
15468 else
15469 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15471 return GET_MODE_SIZE (mode);
15474 static rtx
15475 emit_sfm (int base_reg, int count)
15477 rtx par;
15478 rtx dwarf;
15479 rtx tmp, reg;
15480 int i;
15482 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15483 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15485 reg = gen_rtx_REG (XFmode, base_reg++);
15487 XVECEXP (par, 0, 0)
15488 = gen_rtx_SET (VOIDmode,
15489 gen_frame_mem
15490 (BLKmode,
15491 gen_rtx_PRE_MODIFY (Pmode,
15492 stack_pointer_rtx,
15493 plus_constant
15494 (stack_pointer_rtx,
15495 -12 * count))
15497 gen_rtx_UNSPEC (BLKmode,
15498 gen_rtvec (1, reg),
15499 UNSPEC_PUSH_MULT));
15500 tmp = gen_rtx_SET (VOIDmode,
15501 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15502 RTX_FRAME_RELATED_P (tmp) = 1;
15503 XVECEXP (dwarf, 0, 1) = tmp;
15505 for (i = 1; i < count; i++)
15507 reg = gen_rtx_REG (XFmode, base_reg++);
15508 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15510 tmp = gen_rtx_SET (VOIDmode,
15511 gen_frame_mem (XFmode,
15512 plus_constant (stack_pointer_rtx,
15513 i * 12)),
15514 reg);
15515 RTX_FRAME_RELATED_P (tmp) = 1;
15516 XVECEXP (dwarf, 0, i + 1) = tmp;
15519 tmp = gen_rtx_SET (VOIDmode,
15520 stack_pointer_rtx,
15521 plus_constant (stack_pointer_rtx, -12 * count));
15523 RTX_FRAME_RELATED_P (tmp) = 1;
15524 XVECEXP (dwarf, 0, 0) = tmp;
15526 par = emit_insn (par);
15527 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15529 return par;
15533 /* Return true if the current function needs to save/restore LR. */
15535 static bool
15536 thumb_force_lr_save (void)
15538 return !cfun->machine->lr_save_eliminated
15539 && (!leaf_function_p ()
15540 || thumb_far_jump_used_p ()
15541 || df_regs_ever_live_p (LR_REGNUM));
15545 /* Return true if r3 is used by any of the tail call insns in the
15546 current function. */
15548 static bool
15549 any_sibcall_uses_r3 (void)
15551 edge_iterator ei;
15552 edge e;
15554 if (!crtl->tail_call_emit)
15555 return false;
15556 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15557 if (e->flags & EDGE_SIBCALL)
15559 rtx call = BB_END (e->src);
15560 if (!CALL_P (call))
15561 call = prev_nonnote_nondebug_insn (call);
15562 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15563 if (find_regno_fusage (call, USE, 3))
15564 return true;
15566 return false;
15570 /* Compute the distance from register FROM to register TO.
15571 These can be the arg pointer (26), the soft frame pointer (25),
15572 the stack pointer (13) or the hard frame pointer (11).
15573 In thumb mode r7 is used as the soft frame pointer, if needed.
15574 Typical stack layout looks like this:
15576 old stack pointer -> | |
15577 ----
15578 | | \
15579 | | saved arguments for
15580 | | vararg functions
15581 | | /
15583 hard FP & arg pointer -> | | \
15584 | | stack
15585 | | frame
15586 | | /
15588 | | \
15589 | | call saved
15590 | | registers
15591 soft frame pointer -> | | /
15593 | | \
15594 | | local
15595 | | variables
15596 locals base pointer -> | | /
15598 | | \
15599 | | outgoing
15600 | | arguments
15601 current stack pointer -> | | /
15604 For a given function some or all of these stack components
15605 may not be needed, giving rise to the possibility of
15606 eliminating some of the registers.
15608 The values returned by this function must reflect the behavior
15609 of arm_expand_prologue() and arm_compute_save_reg_mask().
15611 The sign of the number returned reflects the direction of stack
15612 growth, so the values are positive for all eliminations except
15613 from the soft frame pointer to the hard frame pointer.
15615 SFP may point just inside the local variables block to ensure correct
15616 alignment. */
15619 /* Calculate stack offsets. These are used to calculate register elimination
15620 offsets and in prologue/epilogue code. Also calculates which registers
15621 should be saved. */
15623 static arm_stack_offsets *
15624 arm_get_frame_offsets (void)
15626 struct arm_stack_offsets *offsets;
15627 unsigned long func_type;
15628 int leaf;
15629 int saved;
15630 int core_saved;
15631 HOST_WIDE_INT frame_size;
15632 int i;
15634 offsets = &cfun->machine->stack_offsets;
15636 /* We need to know if we are a leaf function. Unfortunately, it
15637 is possible to be called after start_sequence has been called,
15638 which causes get_insns to return the insns for the sequence,
15639 not the function, which will cause leaf_function_p to return
15640 the incorrect result.
15642 to know about leaf functions once reload has completed, and the
15643 frame size cannot be changed after that time, so we can safely
15644 use the cached value. */
15646 if (reload_completed)
15647 return offsets;
15649 /* Initially this is the size of the local variables. It will translated
15650 into an offset once we have determined the size of preceding data. */
15651 frame_size = ROUND_UP_WORD (get_frame_size ());
15653 leaf = leaf_function_p ();
15655 /* Space for variadic functions. */
15656 offsets->saved_args = crtl->args.pretend_args_size;
15658 /* In Thumb mode this is incorrect, but never used. */
15659 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15660 arm_compute_static_chain_stack_bytes();
15662 if (TARGET_32BIT)
15664 unsigned int regno;
15666 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15667 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15668 saved = core_saved;
15670 /* We know that SP will be doubleword aligned on entry, and we must
15671 preserve that condition at any subroutine call. We also require the
15672 soft frame pointer to be doubleword aligned. */
15674 if (TARGET_REALLY_IWMMXT)
15676 /* Check for the call-saved iWMMXt registers. */
15677 for (regno = FIRST_IWMMXT_REGNUM;
15678 regno <= LAST_IWMMXT_REGNUM;
15679 regno++)
15680 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15681 saved += 8;
15684 func_type = arm_current_func_type ();
15685 if (! IS_VOLATILE (func_type))
15687 /* Space for saved FPA registers. */
15688 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15689 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15690 saved += 12;
15692 /* Space for saved VFP registers. */
15693 if (TARGET_HARD_FLOAT && TARGET_VFP)
15694 saved += arm_get_vfp_saved_size ();
15697 else /* TARGET_THUMB1 */
15699 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15700 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15701 saved = core_saved;
15702 if (TARGET_BACKTRACE)
15703 saved += 16;
15706 /* Saved registers include the stack frame. */
15707 offsets->saved_regs = offsets->saved_args + saved +
15708 arm_compute_static_chain_stack_bytes();
15709 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15710 /* A leaf function does not need any stack alignment if it has nothing
15711 on the stack. */
15712 if (leaf && frame_size == 0
15713 /* However if it calls alloca(), we have a dynamically allocated
15714 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15715 && ! cfun->calls_alloca)
15717 offsets->outgoing_args = offsets->soft_frame;
15718 offsets->locals_base = offsets->soft_frame;
15719 return offsets;
15722 /* Ensure SFP has the correct alignment. */
15723 if (ARM_DOUBLEWORD_ALIGN
15724 && (offsets->soft_frame & 7))
15726 offsets->soft_frame += 4;
15727 /* Try to align stack by pushing an extra reg. Don't bother doing this
15728 when there is a stack frame as the alignment will be rolled into
15729 the normal stack adjustment. */
15730 if (frame_size + crtl->outgoing_args_size == 0)
15732 int reg = -1;
15734 /* If it is safe to use r3, then do so. This sometimes
15735 generates better code on Thumb-2 by avoiding the need to
15736 use 32-bit push/pop instructions. */
15737 if (! any_sibcall_uses_r3 ()
15738 && arm_size_return_regs () <= 12
15739 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15741 reg = 3;
15743 else
15744 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15746 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15748 reg = i;
15749 break;
15753 if (reg != -1)
15755 offsets->saved_regs += 4;
15756 offsets->saved_regs_mask |= (1 << reg);
15761 offsets->locals_base = offsets->soft_frame + frame_size;
15762 offsets->outgoing_args = (offsets->locals_base
15763 + crtl->outgoing_args_size);
15765 if (ARM_DOUBLEWORD_ALIGN)
15767 /* Ensure SP remains doubleword aligned. */
15768 if (offsets->outgoing_args & 7)
15769 offsets->outgoing_args += 4;
15770 gcc_assert (!(offsets->outgoing_args & 7));
15773 return offsets;
15777 /* Calculate the relative offsets for the different stack pointers. Positive
15778 offsets are in the direction of stack growth. */
15780 HOST_WIDE_INT
15781 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15783 arm_stack_offsets *offsets;
15785 offsets = arm_get_frame_offsets ();
15787 /* OK, now we have enough information to compute the distances.
15788 There must be an entry in these switch tables for each pair
15789 of registers in ELIMINABLE_REGS, even if some of the entries
15790 seem to be redundant or useless. */
15791 switch (from)
15793 case ARG_POINTER_REGNUM:
15794 switch (to)
15796 case THUMB_HARD_FRAME_POINTER_REGNUM:
15797 return 0;
15799 case FRAME_POINTER_REGNUM:
15800 /* This is the reverse of the soft frame pointer
15801 to hard frame pointer elimination below. */
15802 return offsets->soft_frame - offsets->saved_args;
15804 case ARM_HARD_FRAME_POINTER_REGNUM:
15805 /* This is only non-zero in the case where the static chain register
15806 is stored above the frame. */
15807 return offsets->frame - offsets->saved_args - 4;
15809 case STACK_POINTER_REGNUM:
15810 /* If nothing has been pushed on the stack at all
15811 then this will return -4. This *is* correct! */
15812 return offsets->outgoing_args - (offsets->saved_args + 4);
15814 default:
15815 gcc_unreachable ();
15817 gcc_unreachable ();
15819 case FRAME_POINTER_REGNUM:
15820 switch (to)
15822 case THUMB_HARD_FRAME_POINTER_REGNUM:
15823 return 0;
15825 case ARM_HARD_FRAME_POINTER_REGNUM:
15826 /* The hard frame pointer points to the top entry in the
15827 stack frame. The soft frame pointer to the bottom entry
15828 in the stack frame. If there is no stack frame at all,
15829 then they are identical. */
15831 return offsets->frame - offsets->soft_frame;
15833 case STACK_POINTER_REGNUM:
15834 return offsets->outgoing_args - offsets->soft_frame;
15836 default:
15837 gcc_unreachable ();
15839 gcc_unreachable ();
15841 default:
15842 /* You cannot eliminate from the stack pointer.
15843 In theory you could eliminate from the hard frame
15844 pointer to the stack pointer, but this will never
15845 happen, since if a stack frame is not needed the
15846 hard frame pointer will never be used. */
15847 gcc_unreachable ();
15851 /* Given FROM and TO register numbers, say whether this elimination is
15852 allowed. Frame pointer elimination is automatically handled.
15854 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15855 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15856 pointer, we must eliminate FRAME_POINTER_REGNUM into
15857 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15858 ARG_POINTER_REGNUM. */
15860 bool
15861 arm_can_eliminate (const int from, const int to)
15863 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15864 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15865 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15866 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15867 true);
15870 /* Emit RTL to save coprocessor registers on function entry. Returns the
15871 number of bytes pushed. */
15873 static int
15874 arm_save_coproc_regs(void)
15876 int saved_size = 0;
15877 unsigned reg;
15878 unsigned start_reg;
15879 rtx insn;
15881 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15882 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15884 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15885 insn = gen_rtx_MEM (V2SImode, insn);
15886 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15887 RTX_FRAME_RELATED_P (insn) = 1;
15888 saved_size += 8;
15891 /* Save any floating point call-saved registers used by this
15892 function. */
15893 if (TARGET_FPA_EMU2)
15895 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15896 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15898 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15899 insn = gen_rtx_MEM (XFmode, insn);
15900 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15901 RTX_FRAME_RELATED_P (insn) = 1;
15902 saved_size += 12;
15905 else
15907 start_reg = LAST_FPA_REGNUM;
15909 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15911 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15913 if (start_reg - reg == 3)
15915 insn = emit_sfm (reg, 4);
15916 RTX_FRAME_RELATED_P (insn) = 1;
15917 saved_size += 48;
15918 start_reg = reg - 1;
15921 else
15923 if (start_reg != reg)
15925 insn = emit_sfm (reg + 1, start_reg - reg);
15926 RTX_FRAME_RELATED_P (insn) = 1;
15927 saved_size += (start_reg - reg) * 12;
15929 start_reg = reg - 1;
15933 if (start_reg != reg)
15935 insn = emit_sfm (reg + 1, start_reg - reg);
15936 saved_size += (start_reg - reg) * 12;
15937 RTX_FRAME_RELATED_P (insn) = 1;
15940 if (TARGET_HARD_FLOAT && TARGET_VFP)
15942 start_reg = FIRST_VFP_REGNUM;
15944 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15946 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15947 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15949 if (start_reg != reg)
15950 saved_size += vfp_emit_fstmd (start_reg,
15951 (reg - start_reg) / 2);
15952 start_reg = reg + 2;
15955 if (start_reg != reg)
15956 saved_size += vfp_emit_fstmd (start_reg,
15957 (reg - start_reg) / 2);
15959 return saved_size;
15963 /* Set the Thumb frame pointer from the stack pointer. */
15965 static void
15966 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15968 HOST_WIDE_INT amount;
15969 rtx insn, dwarf;
15971 amount = offsets->outgoing_args - offsets->locals_base;
15972 if (amount < 1024)
15973 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15974 stack_pointer_rtx, GEN_INT (amount)));
15975 else
15977 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15978 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15979 expects the first two operands to be the same. */
15980 if (TARGET_THUMB2)
15982 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15983 stack_pointer_rtx,
15984 hard_frame_pointer_rtx));
15986 else
15988 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15989 hard_frame_pointer_rtx,
15990 stack_pointer_rtx));
15992 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15993 plus_constant (stack_pointer_rtx, amount));
15994 RTX_FRAME_RELATED_P (dwarf) = 1;
15995 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15998 RTX_FRAME_RELATED_P (insn) = 1;
16001 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16002 function. */
16003 void
16004 arm_expand_prologue (void)
16006 rtx amount;
16007 rtx insn;
16008 rtx ip_rtx;
16009 unsigned long live_regs_mask;
16010 unsigned long func_type;
16011 int fp_offset = 0;
16012 int saved_pretend_args = 0;
16013 int saved_regs = 0;
16014 unsigned HOST_WIDE_INT args_to_push;
16015 arm_stack_offsets *offsets;
16017 func_type = arm_current_func_type ();
16019 /* Naked functions don't have prologues. */
16020 if (IS_NAKED (func_type))
16021 return;
16023 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16024 args_to_push = crtl->args.pretend_args_size;
16026 /* Compute which register we will have to save onto the stack. */
16027 offsets = arm_get_frame_offsets ();
16028 live_regs_mask = offsets->saved_regs_mask;
16030 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16032 if (IS_STACKALIGN (func_type))
16034 rtx r0, r1;
16036 /* Handle a word-aligned stack pointer. We generate the following:
16038 mov r0, sp
16039 bic r1, r0, #7
16040 mov sp, r1
16041 <save and restore r0 in normal prologue/epilogue>
16042 mov sp, r0
16043 bx lr
16045 The unwinder doesn't need to know about the stack realignment.
16046 Just tell it we saved SP in r0. */
16047 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16049 r0 = gen_rtx_REG (SImode, 0);
16050 r1 = gen_rtx_REG (SImode, 1);
16052 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16053 RTX_FRAME_RELATED_P (insn) = 1;
16054 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16056 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16058 /* ??? The CFA changes here, which may cause GDB to conclude that it
16059 has entered a different function. That said, the unwind info is
16060 correct, individually, before and after this instruction because
16061 we've described the save of SP, which will override the default
16062 handling of SP as restoring from the CFA. */
16063 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16066 /* For APCS frames, if IP register is clobbered
16067 when creating frame, save that register in a special
16068 way. */
16069 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16071 if (IS_INTERRUPT (func_type))
16073 /* Interrupt functions must not corrupt any registers.
16074 Creating a frame pointer however, corrupts the IP
16075 register, so we must push it first. */
16076 emit_multi_reg_push (1 << IP_REGNUM);
16078 /* Do not set RTX_FRAME_RELATED_P on this insn.
16079 The dwarf stack unwinding code only wants to see one
16080 stack decrement per function, and this is not it. If
16081 this instruction is labeled as being part of the frame
16082 creation sequence then dwarf2out_frame_debug_expr will
16083 die when it encounters the assignment of IP to FP
16084 later on, since the use of SP here establishes SP as
16085 the CFA register and not IP.
16087 Anyway this instruction is not really part of the stack
16088 frame creation although it is part of the prologue. */
16090 else if (IS_NESTED (func_type))
16092 /* The Static chain register is the same as the IP register
16093 used as a scratch register during stack frame creation.
16094 To get around this need to find somewhere to store IP
16095 whilst the frame is being created. We try the following
16096 places in order:
16098 1. The last argument register.
16099 2. A slot on the stack above the frame. (This only
16100 works if the function is not a varargs function).
16101 3. Register r3, after pushing the argument registers
16102 onto the stack.
16104 Note - we only need to tell the dwarf2 backend about the SP
16105 adjustment in the second variant; the static chain register
16106 doesn't need to be unwound, as it doesn't contain a value
16107 inherited from the caller. */
16109 if (df_regs_ever_live_p (3) == false)
16110 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16111 else if (args_to_push == 0)
16113 rtx dwarf;
16115 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16116 saved_regs += 4;
16118 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16119 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16120 fp_offset = 4;
16122 /* Just tell the dwarf backend that we adjusted SP. */
16123 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16124 plus_constant (stack_pointer_rtx,
16125 -fp_offset));
16126 RTX_FRAME_RELATED_P (insn) = 1;
16127 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16129 else
16131 /* Store the args on the stack. */
16132 if (cfun->machine->uses_anonymous_args)
16133 insn = emit_multi_reg_push
16134 ((0xf0 >> (args_to_push / 4)) & 0xf);
16135 else
16136 insn = emit_insn
16137 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16138 GEN_INT (- args_to_push)));
16140 RTX_FRAME_RELATED_P (insn) = 1;
16142 saved_pretend_args = 1;
16143 fp_offset = args_to_push;
16144 args_to_push = 0;
16146 /* Now reuse r3 to preserve IP. */
16147 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16151 insn = emit_set_insn (ip_rtx,
16152 plus_constant (stack_pointer_rtx, fp_offset));
16153 RTX_FRAME_RELATED_P (insn) = 1;
16156 if (args_to_push)
16158 /* Push the argument registers, or reserve space for them. */
16159 if (cfun->machine->uses_anonymous_args)
16160 insn = emit_multi_reg_push
16161 ((0xf0 >> (args_to_push / 4)) & 0xf);
16162 else
16163 insn = emit_insn
16164 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16165 GEN_INT (- args_to_push)));
16166 RTX_FRAME_RELATED_P (insn) = 1;
16169 /* If this is an interrupt service routine, and the link register
16170 is going to be pushed, and we're not generating extra
16171 push of IP (needed when frame is needed and frame layout if apcs),
16172 subtracting four from LR now will mean that the function return
16173 can be done with a single instruction. */
16174 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16175 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16176 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16177 && TARGET_ARM)
16179 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16181 emit_set_insn (lr, plus_constant (lr, -4));
16184 if (live_regs_mask)
16186 saved_regs += bit_count (live_regs_mask) * 4;
16187 if (optimize_size && !frame_pointer_needed
16188 && saved_regs == offsets->saved_regs - offsets->saved_args)
16190 /* If no coprocessor registers are being pushed and we don't have
16191 to worry about a frame pointer then push extra registers to
16192 create the stack frame. This is done is a way that does not
16193 alter the frame layout, so is independent of the epilogue. */
16194 int n;
16195 int frame;
16196 n = 0;
16197 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16198 n++;
16199 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16200 if (frame && n * 4 >= frame)
16202 n = frame / 4;
16203 live_regs_mask |= (1 << n) - 1;
16204 saved_regs += frame;
16207 insn = emit_multi_reg_push (live_regs_mask);
16208 RTX_FRAME_RELATED_P (insn) = 1;
16211 if (! IS_VOLATILE (func_type))
16212 saved_regs += arm_save_coproc_regs ();
16214 if (frame_pointer_needed && TARGET_ARM)
16216 /* Create the new frame pointer. */
16217 if (TARGET_APCS_FRAME)
16219 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16220 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16221 RTX_FRAME_RELATED_P (insn) = 1;
16223 if (IS_NESTED (func_type))
16225 /* Recover the static chain register. */
16226 if (!df_regs_ever_live_p (3)
16227 || saved_pretend_args)
16228 insn = gen_rtx_REG (SImode, 3);
16229 else /* if (crtl->args.pretend_args_size == 0) */
16231 insn = plus_constant (hard_frame_pointer_rtx, 4);
16232 insn = gen_frame_mem (SImode, insn);
16234 emit_set_insn (ip_rtx, insn);
16235 /* Add a USE to stop propagate_one_insn() from barfing. */
16236 emit_insn (gen_prologue_use (ip_rtx));
16239 else
16241 insn = GEN_INT (saved_regs - 4);
16242 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16243 stack_pointer_rtx, insn));
16244 RTX_FRAME_RELATED_P (insn) = 1;
16248 if (flag_stack_usage_info)
16249 current_function_static_stack_size
16250 = offsets->outgoing_args - offsets->saved_args;
16252 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16254 /* This add can produce multiple insns for a large constant, so we
16255 need to get tricky. */
16256 rtx last = get_last_insn ();
16258 amount = GEN_INT (offsets->saved_args + saved_regs
16259 - offsets->outgoing_args);
16261 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16262 amount));
16265 last = last ? NEXT_INSN (last) : get_insns ();
16266 RTX_FRAME_RELATED_P (last) = 1;
16268 while (last != insn);
16270 /* If the frame pointer is needed, emit a special barrier that
16271 will prevent the scheduler from moving stores to the frame
16272 before the stack adjustment. */
16273 if (frame_pointer_needed)
16274 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16275 hard_frame_pointer_rtx));
16279 if (frame_pointer_needed && TARGET_THUMB2)
16280 thumb_set_frame_pointer (offsets);
16282 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16284 unsigned long mask;
16286 mask = live_regs_mask;
16287 mask &= THUMB2_WORK_REGS;
16288 if (!IS_NESTED (func_type))
16289 mask |= (1 << IP_REGNUM);
16290 arm_load_pic_register (mask);
16293 /* If we are profiling, make sure no instructions are scheduled before
16294 the call to mcount. Similarly if the user has requested no
16295 scheduling in the prolog. Similarly if we want non-call exceptions
16296 using the EABI unwinder, to prevent faulting instructions from being
16297 swapped with a stack adjustment. */
16298 if (crtl->profile || !TARGET_SCHED_PROLOG
16299 || (arm_except_unwind_info (&global_options) == UI_TARGET
16300 && cfun->can_throw_non_call_exceptions))
16301 emit_insn (gen_blockage ());
16303 /* If the link register is being kept alive, with the return address in it,
16304 then make sure that it does not get reused by the ce2 pass. */
16305 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16306 cfun->machine->lr_save_eliminated = 1;
16309 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16310 static void
16311 arm_print_condition (FILE *stream)
16313 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16315 /* Branch conversion is not implemented for Thumb-2. */
16316 if (TARGET_THUMB)
16318 output_operand_lossage ("predicated Thumb instruction");
16319 return;
16321 if (current_insn_predicate != NULL)
16323 output_operand_lossage
16324 ("predicated instruction in conditional sequence");
16325 return;
16328 fputs (arm_condition_codes[arm_current_cc], stream);
16330 else if (current_insn_predicate)
16332 enum arm_cond_code code;
16334 if (TARGET_THUMB1)
16336 output_operand_lossage ("predicated Thumb instruction");
16337 return;
16340 code = get_arm_condition_code (current_insn_predicate);
16341 fputs (arm_condition_codes[code], stream);
16346 /* If CODE is 'd', then the X is a condition operand and the instruction
16347 should only be executed if the condition is true.
16348 if CODE is 'D', then the X is a condition operand and the instruction
16349 should only be executed if the condition is false: however, if the mode
16350 of the comparison is CCFPEmode, then always execute the instruction -- we
16351 do this because in these circumstances !GE does not necessarily imply LT;
16352 in these cases the instruction pattern will take care to make sure that
16353 an instruction containing %d will follow, thereby undoing the effects of
16354 doing this instruction unconditionally.
16355 If CODE is 'N' then X is a floating point operand that must be negated
16356 before output.
16357 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16358 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16359 static void
16360 arm_print_operand (FILE *stream, rtx x, int code)
16362 switch (code)
16364 case '@':
16365 fputs (ASM_COMMENT_START, stream);
16366 return;
16368 case '_':
16369 fputs (user_label_prefix, stream);
16370 return;
16372 case '|':
16373 fputs (REGISTER_PREFIX, stream);
16374 return;
16376 case '?':
16377 arm_print_condition (stream);
16378 return;
16380 case '(':
16381 /* Nothing in unified syntax, otherwise the current condition code. */
16382 if (!TARGET_UNIFIED_ASM)
16383 arm_print_condition (stream);
16384 break;
16386 case ')':
16387 /* The current condition code in unified syntax, otherwise nothing. */
16388 if (TARGET_UNIFIED_ASM)
16389 arm_print_condition (stream);
16390 break;
16392 case '.':
16393 /* The current condition code for a condition code setting instruction.
16394 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16395 if (TARGET_UNIFIED_ASM)
16397 fputc('s', stream);
16398 arm_print_condition (stream);
16400 else
16402 arm_print_condition (stream);
16403 fputc('s', stream);
16405 return;
16407 case '!':
16408 /* If the instruction is conditionally executed then print
16409 the current condition code, otherwise print 's'. */
16410 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16411 if (current_insn_predicate)
16412 arm_print_condition (stream);
16413 else
16414 fputc('s', stream);
16415 break;
16417 /* %# is a "break" sequence. It doesn't output anything, but is used to
16418 separate e.g. operand numbers from following text, if that text consists
16419 of further digits which we don't want to be part of the operand
16420 number. */
16421 case '#':
16422 return;
16424 case 'N':
16426 REAL_VALUE_TYPE r;
16427 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16428 r = real_value_negate (&r);
16429 fprintf (stream, "%s", fp_const_from_val (&r));
16431 return;
16433 /* An integer or symbol address without a preceding # sign. */
16434 case 'c':
16435 switch (GET_CODE (x))
16437 case CONST_INT:
16438 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16439 break;
16441 case SYMBOL_REF:
16442 output_addr_const (stream, x);
16443 break;
16445 case CONST:
16446 if (GET_CODE (XEXP (x, 0)) == PLUS
16447 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16449 output_addr_const (stream, x);
16450 break;
16452 /* Fall through. */
16454 default:
16455 output_operand_lossage ("Unsupported operand for code '%c'", code);
16457 return;
16459 case 'B':
16460 if (GET_CODE (x) == CONST_INT)
16462 HOST_WIDE_INT val;
16463 val = ARM_SIGN_EXTEND (~INTVAL (x));
16464 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16466 else
16468 putc ('~', stream);
16469 output_addr_const (stream, x);
16471 return;
16473 case 'L':
16474 /* The low 16 bits of an immediate constant. */
16475 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16476 return;
16478 case 'i':
16479 fprintf (stream, "%s", arithmetic_instr (x, 1));
16480 return;
16482 /* Truncate Cirrus shift counts. */
16483 case 's':
16484 if (GET_CODE (x) == CONST_INT)
16486 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16487 return;
16489 arm_print_operand (stream, x, 0);
16490 return;
16492 case 'I':
16493 fprintf (stream, "%s", arithmetic_instr (x, 0));
16494 return;
16496 case 'S':
16498 HOST_WIDE_INT val;
16499 const char *shift;
16501 if (!shift_operator (x, SImode))
16503 output_operand_lossage ("invalid shift operand");
16504 break;
16507 shift = shift_op (x, &val);
16509 if (shift)
16511 fprintf (stream, ", %s ", shift);
16512 if (val == -1)
16513 arm_print_operand (stream, XEXP (x, 1), 0);
16514 else
16515 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16518 return;
16520 /* An explanation of the 'Q', 'R' and 'H' register operands:
16522 In a pair of registers containing a DI or DF value the 'Q'
16523 operand returns the register number of the register containing
16524 the least significant part of the value. The 'R' operand returns
16525 the register number of the register containing the most
16526 significant part of the value.
16528 The 'H' operand returns the higher of the two register numbers.
16529 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16530 same as the 'Q' operand, since the most significant part of the
16531 value is held in the lower number register. The reverse is true
16532 on systems where WORDS_BIG_ENDIAN is false.
16534 The purpose of these operands is to distinguish between cases
16535 where the endian-ness of the values is important (for example
16536 when they are added together), and cases where the endian-ness
16537 is irrelevant, but the order of register operations is important.
16538 For example when loading a value from memory into a register
16539 pair, the endian-ness does not matter. Provided that the value
16540 from the lower memory address is put into the lower numbered
16541 register, and the value from the higher address is put into the
16542 higher numbered register, the load will work regardless of whether
16543 the value being loaded is big-wordian or little-wordian. The
16544 order of the two register loads can matter however, if the address
16545 of the memory location is actually held in one of the registers
16546 being overwritten by the load.
16548 The 'Q' and 'R' constraints are also available for 64-bit
16549 constants. */
16550 case 'Q':
16551 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16553 rtx part = gen_lowpart (SImode, x);
16554 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16555 return;
16558 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16560 output_operand_lossage ("invalid operand for code '%c'", code);
16561 return;
16564 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16565 return;
16567 case 'R':
16568 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16570 enum machine_mode mode = GET_MODE (x);
16571 rtx part;
16573 if (mode == VOIDmode)
16574 mode = DImode;
16575 part = gen_highpart_mode (SImode, mode, x);
16576 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16577 return;
16580 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16582 output_operand_lossage ("invalid operand for code '%c'", code);
16583 return;
16586 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16587 return;
16589 case 'H':
16590 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16592 output_operand_lossage ("invalid operand for code '%c'", code);
16593 return;
16596 asm_fprintf (stream, "%r", REGNO (x) + 1);
16597 return;
16599 case 'J':
16600 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16602 output_operand_lossage ("invalid operand for code '%c'", code);
16603 return;
16606 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16607 return;
16609 case 'K':
16610 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16612 output_operand_lossage ("invalid operand for code '%c'", code);
16613 return;
16616 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16617 return;
16619 case 'm':
16620 asm_fprintf (stream, "%r",
16621 GET_CODE (XEXP (x, 0)) == REG
16622 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16623 return;
16625 case 'M':
16626 asm_fprintf (stream, "{%r-%r}",
16627 REGNO (x),
16628 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16629 return;
16631 /* Like 'M', but writing doubleword vector registers, for use by Neon
16632 insns. */
16633 case 'h':
16635 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16636 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16637 if (numregs == 1)
16638 asm_fprintf (stream, "{d%d}", regno);
16639 else
16640 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16642 return;
16644 case 'd':
16645 /* CONST_TRUE_RTX means always -- that's the default. */
16646 if (x == const_true_rtx)
16647 return;
16649 if (!COMPARISON_P (x))
16651 output_operand_lossage ("invalid operand for code '%c'", code);
16652 return;
16655 fputs (arm_condition_codes[get_arm_condition_code (x)],
16656 stream);
16657 return;
16659 case 'D':
16660 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16661 want to do that. */
16662 if (x == const_true_rtx)
16664 output_operand_lossage ("instruction never executed");
16665 return;
16667 if (!COMPARISON_P (x))
16669 output_operand_lossage ("invalid operand for code '%c'", code);
16670 return;
16673 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16674 (get_arm_condition_code (x))],
16675 stream);
16676 return;
16678 /* Cirrus registers can be accessed in a variety of ways:
16679 single floating point (f)
16680 double floating point (d)
16681 32bit integer (fx)
16682 64bit integer (dx). */
16683 case 'W': /* Cirrus register in F mode. */
16684 case 'X': /* Cirrus register in D mode. */
16685 case 'Y': /* Cirrus register in FX mode. */
16686 case 'Z': /* Cirrus register in DX mode. */
16687 gcc_assert (GET_CODE (x) == REG
16688 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16690 fprintf (stream, "mv%s%s",
16691 code == 'W' ? "f"
16692 : code == 'X' ? "d"
16693 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16695 return;
16697 /* Print cirrus register in the mode specified by the register's mode. */
16698 case 'V':
16700 int mode = GET_MODE (x);
16702 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16704 output_operand_lossage ("invalid operand for code '%c'", code);
16705 return;
16708 fprintf (stream, "mv%s%s",
16709 mode == DFmode ? "d"
16710 : mode == SImode ? "fx"
16711 : mode == DImode ? "dx"
16712 : "f", reg_names[REGNO (x)] + 2);
16714 return;
16717 case 'U':
16718 if (GET_CODE (x) != REG
16719 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16720 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16721 /* Bad value for wCG register number. */
16723 output_operand_lossage ("invalid operand for code '%c'", code);
16724 return;
16727 else
16728 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16729 return;
16731 /* Print an iWMMXt control register name. */
16732 case 'w':
16733 if (GET_CODE (x) != CONST_INT
16734 || INTVAL (x) < 0
16735 || INTVAL (x) >= 16)
16736 /* Bad value for wC register number. */
16738 output_operand_lossage ("invalid operand for code '%c'", code);
16739 return;
16742 else
16744 static const char * wc_reg_names [16] =
16746 "wCID", "wCon", "wCSSF", "wCASF",
16747 "wC4", "wC5", "wC6", "wC7",
16748 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16749 "wC12", "wC13", "wC14", "wC15"
16752 fprintf (stream, wc_reg_names [INTVAL (x)]);
16754 return;
16756 /* Print the high single-precision register of a VFP double-precision
16757 register. */
16758 case 'p':
16760 int mode = GET_MODE (x);
16761 int regno;
16763 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16765 output_operand_lossage ("invalid operand for code '%c'", code);
16766 return;
16769 regno = REGNO (x);
16770 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16772 output_operand_lossage ("invalid operand for code '%c'", code);
16773 return;
16776 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16778 return;
16780 /* Print a VFP/Neon double precision or quad precision register name. */
16781 case 'P':
16782 case 'q':
16784 int mode = GET_MODE (x);
16785 int is_quad = (code == 'q');
16786 int regno;
16788 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16790 output_operand_lossage ("invalid operand for code '%c'", code);
16791 return;
16794 if (GET_CODE (x) != REG
16795 || !IS_VFP_REGNUM (REGNO (x)))
16797 output_operand_lossage ("invalid operand for code '%c'", code);
16798 return;
16801 regno = REGNO (x);
16802 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16803 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16805 output_operand_lossage ("invalid operand for code '%c'", code);
16806 return;
16809 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16810 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16812 return;
16814 /* These two codes print the low/high doubleword register of a Neon quad
16815 register, respectively. For pair-structure types, can also print
16816 low/high quadword registers. */
16817 case 'e':
16818 case 'f':
16820 int mode = GET_MODE (x);
16821 int regno;
16823 if ((GET_MODE_SIZE (mode) != 16
16824 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16826 output_operand_lossage ("invalid operand for code '%c'", code);
16827 return;
16830 regno = REGNO (x);
16831 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16833 output_operand_lossage ("invalid operand for code '%c'", code);
16834 return;
16837 if (GET_MODE_SIZE (mode) == 16)
16838 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16839 + (code == 'f' ? 1 : 0));
16840 else
16841 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16842 + (code == 'f' ? 1 : 0));
16844 return;
16846 /* Print a VFPv3 floating-point constant, represented as an integer
16847 index. */
16848 case 'G':
16850 int index = vfp3_const_double_index (x);
16851 gcc_assert (index != -1);
16852 fprintf (stream, "%d", index);
16854 return;
16856 /* Print bits representing opcode features for Neon.
16858 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16859 and polynomials as unsigned.
16861 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16863 Bit 2 is 1 for rounding functions, 0 otherwise. */
16865 /* Identify the type as 's', 'u', 'p' or 'f'. */
16866 case 'T':
16868 HOST_WIDE_INT bits = INTVAL (x);
16869 fputc ("uspf"[bits & 3], stream);
16871 return;
16873 /* Likewise, but signed and unsigned integers are both 'i'. */
16874 case 'F':
16876 HOST_WIDE_INT bits = INTVAL (x);
16877 fputc ("iipf"[bits & 3], stream);
16879 return;
16881 /* As for 'T', but emit 'u' instead of 'p'. */
16882 case 't':
16884 HOST_WIDE_INT bits = INTVAL (x);
16885 fputc ("usuf"[bits & 3], stream);
16887 return;
16889 /* Bit 2: rounding (vs none). */
16890 case 'O':
16892 HOST_WIDE_INT bits = INTVAL (x);
16893 fputs ((bits & 4) != 0 ? "r" : "", stream);
16895 return;
16897 /* Memory operand for vld1/vst1 instruction. */
16898 case 'A':
16900 rtx addr;
16901 bool postinc = FALSE;
16902 unsigned align, memsize, align_bits;
16904 gcc_assert (GET_CODE (x) == MEM);
16905 addr = XEXP (x, 0);
16906 if (GET_CODE (addr) == POST_INC)
16908 postinc = 1;
16909 addr = XEXP (addr, 0);
16911 asm_fprintf (stream, "[%r", REGNO (addr));
16913 /* We know the alignment of this access, so we can emit a hint in the
16914 instruction (for some alignments) as an aid to the memory subsystem
16915 of the target. */
16916 align = MEM_ALIGN (x) >> 3;
16917 memsize = MEM_SIZE (x);
16919 /* Only certain alignment specifiers are supported by the hardware. */
16920 if (memsize == 16 && (align % 32) == 0)
16921 align_bits = 256;
16922 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
16923 align_bits = 128;
16924 else if ((align % 8) == 0)
16925 align_bits = 64;
16926 else
16927 align_bits = 0;
16929 if (align_bits != 0)
16930 asm_fprintf (stream, ":%d", align_bits);
16932 asm_fprintf (stream, "]");
16934 if (postinc)
16935 fputs("!", stream);
16937 return;
16939 case 'C':
16941 rtx addr;
16943 gcc_assert (GET_CODE (x) == MEM);
16944 addr = XEXP (x, 0);
16945 gcc_assert (GET_CODE (addr) == REG);
16946 asm_fprintf (stream, "[%r]", REGNO (addr));
16948 return;
16950 /* Translate an S register number into a D register number and element index. */
16951 case 'y':
16953 int mode = GET_MODE (x);
16954 int regno;
16956 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16958 output_operand_lossage ("invalid operand for code '%c'", code);
16959 return;
16962 regno = REGNO (x);
16963 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16965 output_operand_lossage ("invalid operand for code '%c'", code);
16966 return;
16969 regno = regno - FIRST_VFP_REGNUM;
16970 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16972 return;
16974 /* Register specifier for vld1.16/vst1.16. Translate the S register
16975 number into a D register number and element index. */
16976 case 'z':
16978 int mode = GET_MODE (x);
16979 int regno;
16981 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16983 output_operand_lossage ("invalid operand for code '%c'", code);
16984 return;
16987 regno = REGNO (x);
16988 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16990 output_operand_lossage ("invalid operand for code '%c'", code);
16991 return;
16994 regno = regno - FIRST_VFP_REGNUM;
16995 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16997 return;
16999 default:
17000 if (x == 0)
17002 output_operand_lossage ("missing operand");
17003 return;
17006 switch (GET_CODE (x))
17008 case REG:
17009 asm_fprintf (stream, "%r", REGNO (x));
17010 break;
17012 case MEM:
17013 output_memory_reference_mode = GET_MODE (x);
17014 output_address (XEXP (x, 0));
17015 break;
17017 case CONST_DOUBLE:
17018 if (TARGET_NEON)
17020 char fpstr[20];
17021 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17022 sizeof (fpstr), 0, 1);
17023 fprintf (stream, "#%s", fpstr);
17025 else
17026 fprintf (stream, "#%s", fp_immediate_constant (x));
17027 break;
17029 default:
17030 gcc_assert (GET_CODE (x) != NEG);
17031 fputc ('#', stream);
17032 if (GET_CODE (x) == HIGH)
17034 fputs (":lower16:", stream);
17035 x = XEXP (x, 0);
17038 output_addr_const (stream, x);
17039 break;
17044 /* Target hook for printing a memory address. */
17045 static void
17046 arm_print_operand_address (FILE *stream, rtx x)
17048 if (TARGET_32BIT)
17050 int is_minus = GET_CODE (x) == MINUS;
17052 if (GET_CODE (x) == REG)
17053 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17054 else if (GET_CODE (x) == PLUS || is_minus)
17056 rtx base = XEXP (x, 0);
17057 rtx index = XEXP (x, 1);
17058 HOST_WIDE_INT offset = 0;
17059 if (GET_CODE (base) != REG
17060 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17062 /* Ensure that BASE is a register. */
17063 /* (one of them must be). */
17064 /* Also ensure the SP is not used as in index register. */
17065 rtx temp = base;
17066 base = index;
17067 index = temp;
17069 switch (GET_CODE (index))
17071 case CONST_INT:
17072 offset = INTVAL (index);
17073 if (is_minus)
17074 offset = -offset;
17075 asm_fprintf (stream, "[%r, #%wd]",
17076 REGNO (base), offset);
17077 break;
17079 case REG:
17080 asm_fprintf (stream, "[%r, %s%r]",
17081 REGNO (base), is_minus ? "-" : "",
17082 REGNO (index));
17083 break;
17085 case MULT:
17086 case ASHIFTRT:
17087 case LSHIFTRT:
17088 case ASHIFT:
17089 case ROTATERT:
17091 asm_fprintf (stream, "[%r, %s%r",
17092 REGNO (base), is_minus ? "-" : "",
17093 REGNO (XEXP (index, 0)));
17094 arm_print_operand (stream, index, 'S');
17095 fputs ("]", stream);
17096 break;
17099 default:
17100 gcc_unreachable ();
17103 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17104 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17106 extern enum machine_mode output_memory_reference_mode;
17108 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17110 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17111 asm_fprintf (stream, "[%r, #%s%d]!",
17112 REGNO (XEXP (x, 0)),
17113 GET_CODE (x) == PRE_DEC ? "-" : "",
17114 GET_MODE_SIZE (output_memory_reference_mode));
17115 else
17116 asm_fprintf (stream, "[%r], #%s%d",
17117 REGNO (XEXP (x, 0)),
17118 GET_CODE (x) == POST_DEC ? "-" : "",
17119 GET_MODE_SIZE (output_memory_reference_mode));
17121 else if (GET_CODE (x) == PRE_MODIFY)
17123 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17124 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17125 asm_fprintf (stream, "#%wd]!",
17126 INTVAL (XEXP (XEXP (x, 1), 1)));
17127 else
17128 asm_fprintf (stream, "%r]!",
17129 REGNO (XEXP (XEXP (x, 1), 1)));
17131 else if (GET_CODE (x) == POST_MODIFY)
17133 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17134 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17135 asm_fprintf (stream, "#%wd",
17136 INTVAL (XEXP (XEXP (x, 1), 1)));
17137 else
17138 asm_fprintf (stream, "%r",
17139 REGNO (XEXP (XEXP (x, 1), 1)));
17141 else output_addr_const (stream, x);
17143 else
17145 if (GET_CODE (x) == REG)
17146 asm_fprintf (stream, "[%r]", REGNO (x));
17147 else if (GET_CODE (x) == POST_INC)
17148 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17149 else if (GET_CODE (x) == PLUS)
17151 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17152 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17153 asm_fprintf (stream, "[%r, #%wd]",
17154 REGNO (XEXP (x, 0)),
17155 INTVAL (XEXP (x, 1)));
17156 else
17157 asm_fprintf (stream, "[%r, %r]",
17158 REGNO (XEXP (x, 0)),
17159 REGNO (XEXP (x, 1)));
17161 else
17162 output_addr_const (stream, x);
17166 /* Target hook for indicating whether a punctuation character for
17167 TARGET_PRINT_OPERAND is valid. */
17168 static bool
17169 arm_print_operand_punct_valid_p (unsigned char code)
17171 return (code == '@' || code == '|' || code == '.'
17172 || code == '(' || code == ')' || code == '#'
17173 || (TARGET_32BIT && (code == '?'))
17174 || (TARGET_THUMB2 && (code == '!'))
17175 || (TARGET_THUMB && (code == '_')));
17178 /* Target hook for assembling integer objects. The ARM version needs to
17179 handle word-sized values specially. */
17180 static bool
17181 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17183 enum machine_mode mode;
17185 if (size == UNITS_PER_WORD && aligned_p)
17187 fputs ("\t.word\t", asm_out_file);
17188 output_addr_const (asm_out_file, x);
17190 /* Mark symbols as position independent. We only do this in the
17191 .text segment, not in the .data segment. */
17192 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17193 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17195 /* See legitimize_pic_address for an explanation of the
17196 TARGET_VXWORKS_RTP check. */
17197 if (TARGET_VXWORKS_RTP
17198 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17199 fputs ("(GOT)", asm_out_file);
17200 else
17201 fputs ("(GOTOFF)", asm_out_file);
17203 fputc ('\n', asm_out_file);
17204 return true;
17207 mode = GET_MODE (x);
17209 if (arm_vector_mode_supported_p (mode))
17211 int i, units;
17213 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17215 units = CONST_VECTOR_NUNITS (x);
17216 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17218 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17219 for (i = 0; i < units; i++)
17221 rtx elt = CONST_VECTOR_ELT (x, i);
17222 assemble_integer
17223 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17225 else
17226 for (i = 0; i < units; i++)
17228 rtx elt = CONST_VECTOR_ELT (x, i);
17229 REAL_VALUE_TYPE rval;
17231 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17233 assemble_real
17234 (rval, GET_MODE_INNER (mode),
17235 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17238 return true;
17241 return default_assemble_integer (x, size, aligned_p);
17244 static void
17245 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17247 section *s;
17249 if (!TARGET_AAPCS_BASED)
17251 (is_ctor ?
17252 default_named_section_asm_out_constructor
17253 : default_named_section_asm_out_destructor) (symbol, priority);
17254 return;
17257 /* Put these in the .init_array section, using a special relocation. */
17258 if (priority != DEFAULT_INIT_PRIORITY)
17260 char buf[18];
17261 sprintf (buf, "%s.%.5u",
17262 is_ctor ? ".init_array" : ".fini_array",
17263 priority);
17264 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17266 else if (is_ctor)
17267 s = ctors_section;
17268 else
17269 s = dtors_section;
17271 switch_to_section (s);
17272 assemble_align (POINTER_SIZE);
17273 fputs ("\t.word\t", asm_out_file);
17274 output_addr_const (asm_out_file, symbol);
17275 fputs ("(target1)\n", asm_out_file);
17278 /* Add a function to the list of static constructors. */
17280 static void
17281 arm_elf_asm_constructor (rtx symbol, int priority)
17283 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17286 /* Add a function to the list of static destructors. */
17288 static void
17289 arm_elf_asm_destructor (rtx symbol, int priority)
17291 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17294 /* A finite state machine takes care of noticing whether or not instructions
17295 can be conditionally executed, and thus decrease execution time and code
17296 size by deleting branch instructions. The fsm is controlled by
17297 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17299 /* The state of the fsm controlling condition codes are:
17300 0: normal, do nothing special
17301 1: make ASM_OUTPUT_OPCODE not output this instruction
17302 2: make ASM_OUTPUT_OPCODE not output this instruction
17303 3: make instructions conditional
17304 4: make instructions conditional
17306 State transitions (state->state by whom under condition):
17307 0 -> 1 final_prescan_insn if the `target' is a label
17308 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17309 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17310 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17311 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17312 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17313 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17314 (the target insn is arm_target_insn).
17316 If the jump clobbers the conditions then we use states 2 and 4.
17318 A similar thing can be done with conditional return insns.
17320 XXX In case the `target' is an unconditional branch, this conditionalising
17321 of the instructions always reduces code size, but not always execution
17322 time. But then, I want to reduce the code size to somewhere near what
17323 /bin/cc produces. */
17325 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17326 instructions. When a COND_EXEC instruction is seen the subsequent
17327 instructions are scanned so that multiple conditional instructions can be
17328 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17329 specify the length and true/false mask for the IT block. These will be
17330 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17332 /* Returns the index of the ARM condition code string in
17333 `arm_condition_codes'. COMPARISON should be an rtx like
17334 `(eq (...) (...))'. */
17335 static enum arm_cond_code
17336 get_arm_condition_code (rtx comparison)
17338 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17339 enum arm_cond_code code;
17340 enum rtx_code comp_code = GET_CODE (comparison);
17342 if (GET_MODE_CLASS (mode) != MODE_CC)
17343 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17344 XEXP (comparison, 1));
17346 switch (mode)
17348 case CC_DNEmode: code = ARM_NE; goto dominance;
17349 case CC_DEQmode: code = ARM_EQ; goto dominance;
17350 case CC_DGEmode: code = ARM_GE; goto dominance;
17351 case CC_DGTmode: code = ARM_GT; goto dominance;
17352 case CC_DLEmode: code = ARM_LE; goto dominance;
17353 case CC_DLTmode: code = ARM_LT; goto dominance;
17354 case CC_DGEUmode: code = ARM_CS; goto dominance;
17355 case CC_DGTUmode: code = ARM_HI; goto dominance;
17356 case CC_DLEUmode: code = ARM_LS; goto dominance;
17357 case CC_DLTUmode: code = ARM_CC;
17359 dominance:
17360 gcc_assert (comp_code == EQ || comp_code == NE);
17362 if (comp_code == EQ)
17363 return ARM_INVERSE_CONDITION_CODE (code);
17364 return code;
17366 case CC_NOOVmode:
17367 switch (comp_code)
17369 case NE: return ARM_NE;
17370 case EQ: return ARM_EQ;
17371 case GE: return ARM_PL;
17372 case LT: return ARM_MI;
17373 default: gcc_unreachable ();
17376 case CC_Zmode:
17377 switch (comp_code)
17379 case NE: return ARM_NE;
17380 case EQ: return ARM_EQ;
17381 default: gcc_unreachable ();
17384 case CC_Nmode:
17385 switch (comp_code)
17387 case NE: return ARM_MI;
17388 case EQ: return ARM_PL;
17389 default: gcc_unreachable ();
17392 case CCFPEmode:
17393 case CCFPmode:
17394 /* These encodings assume that AC=1 in the FPA system control
17395 byte. This allows us to handle all cases except UNEQ and
17396 LTGT. */
17397 switch (comp_code)
17399 case GE: return ARM_GE;
17400 case GT: return ARM_GT;
17401 case LE: return ARM_LS;
17402 case LT: return ARM_MI;
17403 case NE: return ARM_NE;
17404 case EQ: return ARM_EQ;
17405 case ORDERED: return ARM_VC;
17406 case UNORDERED: return ARM_VS;
17407 case UNLT: return ARM_LT;
17408 case UNLE: return ARM_LE;
17409 case UNGT: return ARM_HI;
17410 case UNGE: return ARM_PL;
17411 /* UNEQ and LTGT do not have a representation. */
17412 case UNEQ: /* Fall through. */
17413 case LTGT: /* Fall through. */
17414 default: gcc_unreachable ();
17417 case CC_SWPmode:
17418 switch (comp_code)
17420 case NE: return ARM_NE;
17421 case EQ: return ARM_EQ;
17422 case GE: return ARM_LE;
17423 case GT: return ARM_LT;
17424 case LE: return ARM_GE;
17425 case LT: return ARM_GT;
17426 case GEU: return ARM_LS;
17427 case GTU: return ARM_CC;
17428 case LEU: return ARM_CS;
17429 case LTU: return ARM_HI;
17430 default: gcc_unreachable ();
17433 case CC_Cmode:
17434 switch (comp_code)
17436 case LTU: return ARM_CS;
17437 case GEU: return ARM_CC;
17438 default: gcc_unreachable ();
17441 case CC_CZmode:
17442 switch (comp_code)
17444 case NE: return ARM_NE;
17445 case EQ: return ARM_EQ;
17446 case GEU: return ARM_CS;
17447 case GTU: return ARM_HI;
17448 case LEU: return ARM_LS;
17449 case LTU: return ARM_CC;
17450 default: gcc_unreachable ();
17453 case CC_NCVmode:
17454 switch (comp_code)
17456 case GE: return ARM_GE;
17457 case LT: return ARM_LT;
17458 case GEU: return ARM_CS;
17459 case LTU: return ARM_CC;
17460 default: gcc_unreachable ();
17463 case CCmode:
17464 switch (comp_code)
17466 case NE: return ARM_NE;
17467 case EQ: return ARM_EQ;
17468 case GE: return ARM_GE;
17469 case GT: return ARM_GT;
17470 case LE: return ARM_LE;
17471 case LT: return ARM_LT;
17472 case GEU: return ARM_CS;
17473 case GTU: return ARM_HI;
17474 case LEU: return ARM_LS;
17475 case LTU: return ARM_CC;
17476 default: gcc_unreachable ();
17479 default: gcc_unreachable ();
17483 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17484 instructions. */
17485 void
17486 thumb2_final_prescan_insn (rtx insn)
17488 rtx first_insn = insn;
17489 rtx body = PATTERN (insn);
17490 rtx predicate;
17491 enum arm_cond_code code;
17492 int n;
17493 int mask;
17495 /* Remove the previous insn from the count of insns to be output. */
17496 if (arm_condexec_count)
17497 arm_condexec_count--;
17499 /* Nothing to do if we are already inside a conditional block. */
17500 if (arm_condexec_count)
17501 return;
17503 if (GET_CODE (body) != COND_EXEC)
17504 return;
17506 /* Conditional jumps are implemented directly. */
17507 if (GET_CODE (insn) == JUMP_INSN)
17508 return;
17510 predicate = COND_EXEC_TEST (body);
17511 arm_current_cc = get_arm_condition_code (predicate);
17513 n = get_attr_ce_count (insn);
17514 arm_condexec_count = 1;
17515 arm_condexec_mask = (1 << n) - 1;
17516 arm_condexec_masklen = n;
17517 /* See if subsequent instructions can be combined into the same block. */
17518 for (;;)
17520 insn = next_nonnote_insn (insn);
17522 /* Jumping into the middle of an IT block is illegal, so a label or
17523 barrier terminates the block. */
17524 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17525 break;
17527 body = PATTERN (insn);
17528 /* USE and CLOBBER aren't really insns, so just skip them. */
17529 if (GET_CODE (body) == USE
17530 || GET_CODE (body) == CLOBBER)
17531 continue;
17533 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17534 if (GET_CODE (body) != COND_EXEC)
17535 break;
17536 /* Allow up to 4 conditionally executed instructions in a block. */
17537 n = get_attr_ce_count (insn);
17538 if (arm_condexec_masklen + n > 4)
17539 break;
17541 predicate = COND_EXEC_TEST (body);
17542 code = get_arm_condition_code (predicate);
17543 mask = (1 << n) - 1;
17544 if (arm_current_cc == code)
17545 arm_condexec_mask |= (mask << arm_condexec_masklen);
17546 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17547 break;
17549 arm_condexec_count++;
17550 arm_condexec_masklen += n;
17552 /* A jump must be the last instruction in a conditional block. */
17553 if (GET_CODE(insn) == JUMP_INSN)
17554 break;
17556 /* Restore recog_data (getting the attributes of other insns can
17557 destroy this array, but final.c assumes that it remains intact
17558 across this call). */
17559 extract_constrain_insn_cached (first_insn);
17562 void
17563 arm_final_prescan_insn (rtx insn)
17565 /* BODY will hold the body of INSN. */
17566 rtx body = PATTERN (insn);
17568 /* This will be 1 if trying to repeat the trick, and things need to be
17569 reversed if it appears to fail. */
17570 int reverse = 0;
17572 /* If we start with a return insn, we only succeed if we find another one. */
17573 int seeking_return = 0;
17575 /* START_INSN will hold the insn from where we start looking. This is the
17576 first insn after the following code_label if REVERSE is true. */
17577 rtx start_insn = insn;
17579 /* If in state 4, check if the target branch is reached, in order to
17580 change back to state 0. */
17581 if (arm_ccfsm_state == 4)
17583 if (insn == arm_target_insn)
17585 arm_target_insn = NULL;
17586 arm_ccfsm_state = 0;
17588 return;
17591 /* If in state 3, it is possible to repeat the trick, if this insn is an
17592 unconditional branch to a label, and immediately following this branch
17593 is the previous target label which is only used once, and the label this
17594 branch jumps to is not too far off. */
17595 if (arm_ccfsm_state == 3)
17597 if (simplejump_p (insn))
17599 start_insn = next_nonnote_insn (start_insn);
17600 if (GET_CODE (start_insn) == BARRIER)
17602 /* XXX Isn't this always a barrier? */
17603 start_insn = next_nonnote_insn (start_insn);
17605 if (GET_CODE (start_insn) == CODE_LABEL
17606 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17607 && LABEL_NUSES (start_insn) == 1)
17608 reverse = TRUE;
17609 else
17610 return;
17612 else if (GET_CODE (body) == RETURN)
17614 start_insn = next_nonnote_insn (start_insn);
17615 if (GET_CODE (start_insn) == BARRIER)
17616 start_insn = next_nonnote_insn (start_insn);
17617 if (GET_CODE (start_insn) == CODE_LABEL
17618 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17619 && LABEL_NUSES (start_insn) == 1)
17621 reverse = TRUE;
17622 seeking_return = 1;
17624 else
17625 return;
17627 else
17628 return;
17631 gcc_assert (!arm_ccfsm_state || reverse);
17632 if (GET_CODE (insn) != JUMP_INSN)
17633 return;
17635 /* This jump might be paralleled with a clobber of the condition codes
17636 the jump should always come first */
17637 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17638 body = XVECEXP (body, 0, 0);
17640 if (reverse
17641 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17642 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17644 int insns_skipped;
17645 int fail = FALSE, succeed = FALSE;
17646 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17647 int then_not_else = TRUE;
17648 rtx this_insn = start_insn, label = 0;
17650 /* Register the insn jumped to. */
17651 if (reverse)
17653 if (!seeking_return)
17654 label = XEXP (SET_SRC (body), 0);
17656 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17657 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17658 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17660 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17661 then_not_else = FALSE;
17663 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17664 seeking_return = 1;
17665 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17667 seeking_return = 1;
17668 then_not_else = FALSE;
17670 else
17671 gcc_unreachable ();
17673 /* See how many insns this branch skips, and what kind of insns. If all
17674 insns are okay, and the label or unconditional branch to the same
17675 label is not too far away, succeed. */
17676 for (insns_skipped = 0;
17677 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17679 rtx scanbody;
17681 this_insn = next_nonnote_insn (this_insn);
17682 if (!this_insn)
17683 break;
17685 switch (GET_CODE (this_insn))
17687 case CODE_LABEL:
17688 /* Succeed if it is the target label, otherwise fail since
17689 control falls in from somewhere else. */
17690 if (this_insn == label)
17692 arm_ccfsm_state = 1;
17693 succeed = TRUE;
17695 else
17696 fail = TRUE;
17697 break;
17699 case BARRIER:
17700 /* Succeed if the following insn is the target label.
17701 Otherwise fail.
17702 If return insns are used then the last insn in a function
17703 will be a barrier. */
17704 this_insn = next_nonnote_insn (this_insn);
17705 if (this_insn && this_insn == label)
17707 arm_ccfsm_state = 1;
17708 succeed = TRUE;
17710 else
17711 fail = TRUE;
17712 break;
17714 case CALL_INSN:
17715 /* The AAPCS says that conditional calls should not be
17716 used since they make interworking inefficient (the
17717 linker can't transform BL<cond> into BLX). That's
17718 only a problem if the machine has BLX. */
17719 if (arm_arch5)
17721 fail = TRUE;
17722 break;
17725 /* Succeed if the following insn is the target label, or
17726 if the following two insns are a barrier and the
17727 target label. */
17728 this_insn = next_nonnote_insn (this_insn);
17729 if (this_insn && GET_CODE (this_insn) == BARRIER)
17730 this_insn = next_nonnote_insn (this_insn);
17732 if (this_insn && this_insn == label
17733 && insns_skipped < max_insns_skipped)
17735 arm_ccfsm_state = 1;
17736 succeed = TRUE;
17738 else
17739 fail = TRUE;
17740 break;
17742 case JUMP_INSN:
17743 /* If this is an unconditional branch to the same label, succeed.
17744 If it is to another label, do nothing. If it is conditional,
17745 fail. */
17746 /* XXX Probably, the tests for SET and the PC are
17747 unnecessary. */
17749 scanbody = PATTERN (this_insn);
17750 if (GET_CODE (scanbody) == SET
17751 && GET_CODE (SET_DEST (scanbody)) == PC)
17753 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17754 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17756 arm_ccfsm_state = 2;
17757 succeed = TRUE;
17759 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17760 fail = TRUE;
17762 /* Fail if a conditional return is undesirable (e.g. on a
17763 StrongARM), but still allow this if optimizing for size. */
17764 else if (GET_CODE (scanbody) == RETURN
17765 && !use_return_insn (TRUE, NULL)
17766 && !optimize_size)
17767 fail = TRUE;
17768 else if (GET_CODE (scanbody) == RETURN
17769 && seeking_return)
17771 arm_ccfsm_state = 2;
17772 succeed = TRUE;
17774 else if (GET_CODE (scanbody) == PARALLEL)
17776 switch (get_attr_conds (this_insn))
17778 case CONDS_NOCOND:
17779 break;
17780 default:
17781 fail = TRUE;
17782 break;
17785 else
17786 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17788 break;
17790 case INSN:
17791 /* Instructions using or affecting the condition codes make it
17792 fail. */
17793 scanbody = PATTERN (this_insn);
17794 if (!(GET_CODE (scanbody) == SET
17795 || GET_CODE (scanbody) == PARALLEL)
17796 || get_attr_conds (this_insn) != CONDS_NOCOND)
17797 fail = TRUE;
17799 /* A conditional cirrus instruction must be followed by
17800 a non Cirrus instruction. However, since we
17801 conditionalize instructions in this function and by
17802 the time we get here we can't add instructions
17803 (nops), because shorten_branches() has already been
17804 called, we will disable conditionalizing Cirrus
17805 instructions to be safe. */
17806 if (GET_CODE (scanbody) != USE
17807 && GET_CODE (scanbody) != CLOBBER
17808 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17809 fail = TRUE;
17810 break;
17812 default:
17813 break;
17816 if (succeed)
17818 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17819 arm_target_label = CODE_LABEL_NUMBER (label);
17820 else
17822 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17824 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17826 this_insn = next_nonnote_insn (this_insn);
17827 gcc_assert (!this_insn
17828 || (GET_CODE (this_insn) != BARRIER
17829 && GET_CODE (this_insn) != CODE_LABEL));
17831 if (!this_insn)
17833 /* Oh, dear! we ran off the end.. give up. */
17834 extract_constrain_insn_cached (insn);
17835 arm_ccfsm_state = 0;
17836 arm_target_insn = NULL;
17837 return;
17839 arm_target_insn = this_insn;
17842 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17843 what it was. */
17844 if (!reverse)
17845 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17847 if (reverse || then_not_else)
17848 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17851 /* Restore recog_data (getting the attributes of other insns can
17852 destroy this array, but final.c assumes that it remains intact
17853 across this call. */
17854 extract_constrain_insn_cached (insn);
17858 /* Output IT instructions. */
17859 void
17860 thumb2_asm_output_opcode (FILE * stream)
17862 char buff[5];
17863 int n;
17865 if (arm_condexec_mask)
17867 for (n = 0; n < arm_condexec_masklen; n++)
17868 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17869 buff[n] = 0;
17870 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17871 arm_condition_codes[arm_current_cc]);
17872 arm_condexec_mask = 0;
17876 /* Returns true if REGNO is a valid register
17877 for holding a quantity of type MODE. */
17879 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17881 if (GET_MODE_CLASS (mode) == MODE_CC)
17882 return (regno == CC_REGNUM
17883 || (TARGET_HARD_FLOAT && TARGET_VFP
17884 && regno == VFPCC_REGNUM));
17886 if (TARGET_THUMB1)
17887 /* For the Thumb we only allow values bigger than SImode in
17888 registers 0 - 6, so that there is always a second low
17889 register available to hold the upper part of the value.
17890 We probably we ought to ensure that the register is the
17891 start of an even numbered register pair. */
17892 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17894 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17895 && IS_CIRRUS_REGNUM (regno))
17896 /* We have outlawed SI values in Cirrus registers because they
17897 reside in the lower 32 bits, but SF values reside in the
17898 upper 32 bits. This causes gcc all sorts of grief. We can't
17899 even split the registers into pairs because Cirrus SI values
17900 get sign extended to 64bits-- aldyh. */
17901 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17903 if (TARGET_HARD_FLOAT && TARGET_VFP
17904 && IS_VFP_REGNUM (regno))
17906 if (mode == SFmode || mode == SImode)
17907 return VFP_REGNO_OK_FOR_SINGLE (regno);
17909 if (mode == DFmode)
17910 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17912 /* VFP registers can hold HFmode values, but there is no point in
17913 putting them there unless we have hardware conversion insns. */
17914 if (mode == HFmode)
17915 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17917 if (TARGET_NEON)
17918 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17919 || (VALID_NEON_QREG_MODE (mode)
17920 && NEON_REGNO_OK_FOR_QUAD (regno))
17921 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17922 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17923 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17924 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17925 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17927 return FALSE;
17930 if (TARGET_REALLY_IWMMXT)
17932 if (IS_IWMMXT_GR_REGNUM (regno))
17933 return mode == SImode;
17935 if (IS_IWMMXT_REGNUM (regno))
17936 return VALID_IWMMXT_REG_MODE (mode);
17939 /* We allow almost any value to be stored in the general registers.
17940 Restrict doubleword quantities to even register pairs so that we can
17941 use ldrd. Do not allow very large Neon structure opaque modes in
17942 general registers; they would use too many. */
17943 if (regno <= LAST_ARM_REGNUM)
17944 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17945 && ARM_NUM_REGS (mode) <= 4;
17947 if (regno == FRAME_POINTER_REGNUM
17948 || regno == ARG_POINTER_REGNUM)
17949 /* We only allow integers in the fake hard registers. */
17950 return GET_MODE_CLASS (mode) == MODE_INT;
17952 /* The only registers left are the FPA registers
17953 which we only allow to hold FP values. */
17954 return (TARGET_HARD_FLOAT && TARGET_FPA
17955 && GET_MODE_CLASS (mode) == MODE_FLOAT
17956 && regno >= FIRST_FPA_REGNUM
17957 && regno <= LAST_FPA_REGNUM);
17960 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17961 not used in arm mode. */
17963 enum reg_class
17964 arm_regno_class (int regno)
17966 if (TARGET_THUMB1)
17968 if (regno == STACK_POINTER_REGNUM)
17969 return STACK_REG;
17970 if (regno == CC_REGNUM)
17971 return CC_REG;
17972 if (regno < 8)
17973 return LO_REGS;
17974 return HI_REGS;
17977 if (TARGET_THUMB2 && regno < 8)
17978 return LO_REGS;
17980 if ( regno <= LAST_ARM_REGNUM
17981 || regno == FRAME_POINTER_REGNUM
17982 || regno == ARG_POINTER_REGNUM)
17983 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17985 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17986 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17988 if (IS_CIRRUS_REGNUM (regno))
17989 return CIRRUS_REGS;
17991 if (IS_VFP_REGNUM (regno))
17993 if (regno <= D7_VFP_REGNUM)
17994 return VFP_D0_D7_REGS;
17995 else if (regno <= LAST_LO_VFP_REGNUM)
17996 return VFP_LO_REGS;
17997 else
17998 return VFP_HI_REGS;
18001 if (IS_IWMMXT_REGNUM (regno))
18002 return IWMMXT_REGS;
18004 if (IS_IWMMXT_GR_REGNUM (regno))
18005 return IWMMXT_GR_REGS;
18007 return FPA_REGS;
18010 /* Handle a special case when computing the offset
18011 of an argument from the frame pointer. */
18013 arm_debugger_arg_offset (int value, rtx addr)
18015 rtx insn;
18017 /* We are only interested if dbxout_parms() failed to compute the offset. */
18018 if (value != 0)
18019 return 0;
18021 /* We can only cope with the case where the address is held in a register. */
18022 if (GET_CODE (addr) != REG)
18023 return 0;
18025 /* If we are using the frame pointer to point at the argument, then
18026 an offset of 0 is correct. */
18027 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18028 return 0;
18030 /* If we are using the stack pointer to point at the
18031 argument, then an offset of 0 is correct. */
18032 /* ??? Check this is consistent with thumb2 frame layout. */
18033 if ((TARGET_THUMB || !frame_pointer_needed)
18034 && REGNO (addr) == SP_REGNUM)
18035 return 0;
18037 /* Oh dear. The argument is pointed to by a register rather
18038 than being held in a register, or being stored at a known
18039 offset from the frame pointer. Since GDB only understands
18040 those two kinds of argument we must translate the address
18041 held in the register into an offset from the frame pointer.
18042 We do this by searching through the insns for the function
18043 looking to see where this register gets its value. If the
18044 register is initialized from the frame pointer plus an offset
18045 then we are in luck and we can continue, otherwise we give up.
18047 This code is exercised by producing debugging information
18048 for a function with arguments like this:
18050 double func (double a, double b, int c, double d) {return d;}
18052 Without this code the stab for parameter 'd' will be set to
18053 an offset of 0 from the frame pointer, rather than 8. */
18055 /* The if() statement says:
18057 If the insn is a normal instruction
18058 and if the insn is setting the value in a register
18059 and if the register being set is the register holding the address of the argument
18060 and if the address is computing by an addition
18061 that involves adding to a register
18062 which is the frame pointer
18063 a constant integer
18065 then... */
18067 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18069 if ( GET_CODE (insn) == INSN
18070 && GET_CODE (PATTERN (insn)) == SET
18071 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18072 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18073 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18074 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18075 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18078 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18080 break;
18084 if (value == 0)
18086 debug_rtx (addr);
18087 warning (0, "unable to compute real location of stacked parameter");
18088 value = 8; /* XXX magic hack */
18091 return value;
18094 typedef enum {
18095 T_V8QI,
18096 T_V4HI,
18097 T_V2SI,
18098 T_V2SF,
18099 T_DI,
18100 T_V16QI,
18101 T_V8HI,
18102 T_V4SI,
18103 T_V4SF,
18104 T_V2DI,
18105 T_TI,
18106 T_EI,
18107 T_OI,
18108 T_MAX /* Size of enum. Keep last. */
18109 } neon_builtin_type_mode;
18111 #define TYPE_MODE_BIT(X) (1 << (X))
18113 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18114 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18115 | TYPE_MODE_BIT (T_DI))
18116 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18117 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18118 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18120 #define v8qi_UP T_V8QI
18121 #define v4hi_UP T_V4HI
18122 #define v2si_UP T_V2SI
18123 #define v2sf_UP T_V2SF
18124 #define di_UP T_DI
18125 #define v16qi_UP T_V16QI
18126 #define v8hi_UP T_V8HI
18127 #define v4si_UP T_V4SI
18128 #define v4sf_UP T_V4SF
18129 #define v2di_UP T_V2DI
18130 #define ti_UP T_TI
18131 #define ei_UP T_EI
18132 #define oi_UP T_OI
18134 #define UP(X) X##_UP
18136 typedef enum {
18137 NEON_BINOP,
18138 NEON_TERNOP,
18139 NEON_UNOP,
18140 NEON_GETLANE,
18141 NEON_SETLANE,
18142 NEON_CREATE,
18143 NEON_DUP,
18144 NEON_DUPLANE,
18145 NEON_COMBINE,
18146 NEON_SPLIT,
18147 NEON_LANEMUL,
18148 NEON_LANEMULL,
18149 NEON_LANEMULH,
18150 NEON_LANEMAC,
18151 NEON_SCALARMUL,
18152 NEON_SCALARMULL,
18153 NEON_SCALARMULH,
18154 NEON_SCALARMAC,
18155 NEON_CONVERT,
18156 NEON_FIXCONV,
18157 NEON_SELECT,
18158 NEON_RESULTPAIR,
18159 NEON_REINTERP,
18160 NEON_VTBL,
18161 NEON_VTBX,
18162 NEON_LOAD1,
18163 NEON_LOAD1LANE,
18164 NEON_STORE1,
18165 NEON_STORE1LANE,
18166 NEON_LOADSTRUCT,
18167 NEON_LOADSTRUCTLANE,
18168 NEON_STORESTRUCT,
18169 NEON_STORESTRUCTLANE,
18170 NEON_LOGICBINOP,
18171 NEON_SHIFTINSERT,
18172 NEON_SHIFTIMM,
18173 NEON_SHIFTACC
18174 } neon_itype;
18176 typedef struct {
18177 const char *name;
18178 const neon_itype itype;
18179 const neon_builtin_type_mode mode;
18180 const enum insn_code code;
18181 unsigned int fcode;
18182 } neon_builtin_datum;
18184 #define CF(N,X) CODE_FOR_neon_##N##X
18186 #define VAR1(T, N, A) \
18187 {#N, NEON_##T, UP (A), CF (N, A), 0}
18188 #define VAR2(T, N, A, B) \
18189 VAR1 (T, N, A), \
18190 {#N, NEON_##T, UP (B), CF (N, B), 0}
18191 #define VAR3(T, N, A, B, C) \
18192 VAR2 (T, N, A, B), \
18193 {#N, NEON_##T, UP (C), CF (N, C), 0}
18194 #define VAR4(T, N, A, B, C, D) \
18195 VAR3 (T, N, A, B, C), \
18196 {#N, NEON_##T, UP (D), CF (N, D), 0}
18197 #define VAR5(T, N, A, B, C, D, E) \
18198 VAR4 (T, N, A, B, C, D), \
18199 {#N, NEON_##T, UP (E), CF (N, E), 0}
18200 #define VAR6(T, N, A, B, C, D, E, F) \
18201 VAR5 (T, N, A, B, C, D, E), \
18202 {#N, NEON_##T, UP (F), CF (N, F), 0}
18203 #define VAR7(T, N, A, B, C, D, E, F, G) \
18204 VAR6 (T, N, A, B, C, D, E, F), \
18205 {#N, NEON_##T, UP (G), CF (N, G), 0}
18206 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18207 VAR7 (T, N, A, B, C, D, E, F, G), \
18208 {#N, NEON_##T, UP (H), CF (N, H), 0}
18209 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18210 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18211 {#N, NEON_##T, UP (I), CF (N, I), 0}
18212 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18213 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18214 {#N, NEON_##T, UP (J), CF (N, J), 0}
18216 /* The mode entries in the following table correspond to the "key" type of the
18217 instruction variant, i.e. equivalent to that which would be specified after
18218 the assembler mnemonic, which usually refers to the last vector operand.
18219 (Signed/unsigned/polynomial types are not differentiated between though, and
18220 are all mapped onto the same mode for a given element size.) The modes
18221 listed per instruction should be the same as those defined for that
18222 instruction's pattern in neon.md. */
18224 static neon_builtin_datum neon_builtin_data[] =
18226 VAR10 (BINOP, vadd,
18227 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18228 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18229 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18230 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18231 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18232 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18233 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18234 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18235 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18236 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18237 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18238 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18239 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18240 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18241 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18242 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18243 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18244 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18245 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18246 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18247 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18248 VAR2 (BINOP, vqdmull, v4hi, v2si),
18249 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18250 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18251 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18252 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18253 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18254 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18255 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18256 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18257 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18258 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18259 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18260 VAR10 (BINOP, vsub,
18261 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18262 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18263 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18264 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18265 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18266 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18267 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18268 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18269 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18270 VAR2 (BINOP, vcage, v2sf, v4sf),
18271 VAR2 (BINOP, vcagt, v2sf, v4sf),
18272 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18273 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18274 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18275 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18276 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18277 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18278 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18279 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18280 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18281 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18282 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18283 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18284 VAR2 (BINOP, vrecps, v2sf, v4sf),
18285 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18286 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18287 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18288 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18289 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18290 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18291 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18292 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18293 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18294 VAR2 (UNOP, vcnt, v8qi, v16qi),
18295 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18296 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18297 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18298 /* FIXME: vget_lane supports more variants than this! */
18299 VAR10 (GETLANE, vget_lane,
18300 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18301 VAR10 (SETLANE, vset_lane,
18302 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18303 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18304 VAR10 (DUP, vdup_n,
18305 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18306 VAR10 (DUPLANE, vdup_lane,
18307 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18308 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18309 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18310 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18311 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18312 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18313 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18314 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18315 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18316 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18317 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18318 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18319 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18320 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18321 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18322 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18323 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18324 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18325 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18326 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18327 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18328 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18329 VAR10 (BINOP, vext,
18330 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18331 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18332 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18333 VAR2 (UNOP, vrev16, v8qi, v16qi),
18334 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18335 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18336 VAR10 (SELECT, vbsl,
18337 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18338 VAR1 (VTBL, vtbl1, v8qi),
18339 VAR1 (VTBL, vtbl2, v8qi),
18340 VAR1 (VTBL, vtbl3, v8qi),
18341 VAR1 (VTBL, vtbl4, v8qi),
18342 VAR1 (VTBX, vtbx1, v8qi),
18343 VAR1 (VTBX, vtbx2, v8qi),
18344 VAR1 (VTBX, vtbx3, v8qi),
18345 VAR1 (VTBX, vtbx4, v8qi),
18346 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18347 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18348 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18349 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18350 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18351 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18352 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18353 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18354 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18355 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18356 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18357 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18358 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18359 VAR10 (LOAD1, vld1,
18360 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18361 VAR10 (LOAD1LANE, vld1_lane,
18362 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18363 VAR10 (LOAD1, vld1_dup,
18364 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18365 VAR10 (STORE1, vst1,
18366 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18367 VAR10 (STORE1LANE, vst1_lane,
18368 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18369 VAR9 (LOADSTRUCT,
18370 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18371 VAR7 (LOADSTRUCTLANE, vld2_lane,
18372 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18373 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18374 VAR9 (STORESTRUCT, vst2,
18375 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18376 VAR7 (STORESTRUCTLANE, vst2_lane,
18377 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18378 VAR9 (LOADSTRUCT,
18379 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18380 VAR7 (LOADSTRUCTLANE, vld3_lane,
18381 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18382 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18383 VAR9 (STORESTRUCT, vst3,
18384 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18385 VAR7 (STORESTRUCTLANE, vst3_lane,
18386 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18387 VAR9 (LOADSTRUCT, vld4,
18388 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18389 VAR7 (LOADSTRUCTLANE, vld4_lane,
18390 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18391 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18392 VAR9 (STORESTRUCT, vst4,
18393 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18394 VAR7 (STORESTRUCTLANE, vst4_lane,
18395 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18396 VAR10 (LOGICBINOP, vand,
18397 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18398 VAR10 (LOGICBINOP, vorr,
18399 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18400 VAR10 (BINOP, veor,
18401 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18402 VAR10 (LOGICBINOP, vbic,
18403 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18404 VAR10 (LOGICBINOP, vorn,
18405 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18408 #undef CF
18409 #undef VAR1
18410 #undef VAR2
18411 #undef VAR3
18412 #undef VAR4
18413 #undef VAR5
18414 #undef VAR6
18415 #undef VAR7
18416 #undef VAR8
18417 #undef VAR9
18418 #undef VAR10
18420 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18421 symbolic names defined here (which would require too much duplication).
18422 FIXME? */
18423 enum arm_builtins
18425 ARM_BUILTIN_GETWCX,
18426 ARM_BUILTIN_SETWCX,
18428 ARM_BUILTIN_WZERO,
18430 ARM_BUILTIN_WAVG2BR,
18431 ARM_BUILTIN_WAVG2HR,
18432 ARM_BUILTIN_WAVG2B,
18433 ARM_BUILTIN_WAVG2H,
18435 ARM_BUILTIN_WACCB,
18436 ARM_BUILTIN_WACCH,
18437 ARM_BUILTIN_WACCW,
18439 ARM_BUILTIN_WMACS,
18440 ARM_BUILTIN_WMACSZ,
18441 ARM_BUILTIN_WMACU,
18442 ARM_BUILTIN_WMACUZ,
18444 ARM_BUILTIN_WSADB,
18445 ARM_BUILTIN_WSADBZ,
18446 ARM_BUILTIN_WSADH,
18447 ARM_BUILTIN_WSADHZ,
18449 ARM_BUILTIN_WALIGN,
18451 ARM_BUILTIN_TMIA,
18452 ARM_BUILTIN_TMIAPH,
18453 ARM_BUILTIN_TMIABB,
18454 ARM_BUILTIN_TMIABT,
18455 ARM_BUILTIN_TMIATB,
18456 ARM_BUILTIN_TMIATT,
18458 ARM_BUILTIN_TMOVMSKB,
18459 ARM_BUILTIN_TMOVMSKH,
18460 ARM_BUILTIN_TMOVMSKW,
18462 ARM_BUILTIN_TBCSTB,
18463 ARM_BUILTIN_TBCSTH,
18464 ARM_BUILTIN_TBCSTW,
18466 ARM_BUILTIN_WMADDS,
18467 ARM_BUILTIN_WMADDU,
18469 ARM_BUILTIN_WPACKHSS,
18470 ARM_BUILTIN_WPACKWSS,
18471 ARM_BUILTIN_WPACKDSS,
18472 ARM_BUILTIN_WPACKHUS,
18473 ARM_BUILTIN_WPACKWUS,
18474 ARM_BUILTIN_WPACKDUS,
18476 ARM_BUILTIN_WADDB,
18477 ARM_BUILTIN_WADDH,
18478 ARM_BUILTIN_WADDW,
18479 ARM_BUILTIN_WADDSSB,
18480 ARM_BUILTIN_WADDSSH,
18481 ARM_BUILTIN_WADDSSW,
18482 ARM_BUILTIN_WADDUSB,
18483 ARM_BUILTIN_WADDUSH,
18484 ARM_BUILTIN_WADDUSW,
18485 ARM_BUILTIN_WSUBB,
18486 ARM_BUILTIN_WSUBH,
18487 ARM_BUILTIN_WSUBW,
18488 ARM_BUILTIN_WSUBSSB,
18489 ARM_BUILTIN_WSUBSSH,
18490 ARM_BUILTIN_WSUBSSW,
18491 ARM_BUILTIN_WSUBUSB,
18492 ARM_BUILTIN_WSUBUSH,
18493 ARM_BUILTIN_WSUBUSW,
18495 ARM_BUILTIN_WAND,
18496 ARM_BUILTIN_WANDN,
18497 ARM_BUILTIN_WOR,
18498 ARM_BUILTIN_WXOR,
18500 ARM_BUILTIN_WCMPEQB,
18501 ARM_BUILTIN_WCMPEQH,
18502 ARM_BUILTIN_WCMPEQW,
18503 ARM_BUILTIN_WCMPGTUB,
18504 ARM_BUILTIN_WCMPGTUH,
18505 ARM_BUILTIN_WCMPGTUW,
18506 ARM_BUILTIN_WCMPGTSB,
18507 ARM_BUILTIN_WCMPGTSH,
18508 ARM_BUILTIN_WCMPGTSW,
18510 ARM_BUILTIN_TEXTRMSB,
18511 ARM_BUILTIN_TEXTRMSH,
18512 ARM_BUILTIN_TEXTRMSW,
18513 ARM_BUILTIN_TEXTRMUB,
18514 ARM_BUILTIN_TEXTRMUH,
18515 ARM_BUILTIN_TEXTRMUW,
18516 ARM_BUILTIN_TINSRB,
18517 ARM_BUILTIN_TINSRH,
18518 ARM_BUILTIN_TINSRW,
18520 ARM_BUILTIN_WMAXSW,
18521 ARM_BUILTIN_WMAXSH,
18522 ARM_BUILTIN_WMAXSB,
18523 ARM_BUILTIN_WMAXUW,
18524 ARM_BUILTIN_WMAXUH,
18525 ARM_BUILTIN_WMAXUB,
18526 ARM_BUILTIN_WMINSW,
18527 ARM_BUILTIN_WMINSH,
18528 ARM_BUILTIN_WMINSB,
18529 ARM_BUILTIN_WMINUW,
18530 ARM_BUILTIN_WMINUH,
18531 ARM_BUILTIN_WMINUB,
18533 ARM_BUILTIN_WMULUM,
18534 ARM_BUILTIN_WMULSM,
18535 ARM_BUILTIN_WMULUL,
18537 ARM_BUILTIN_PSADBH,
18538 ARM_BUILTIN_WSHUFH,
18540 ARM_BUILTIN_WSLLH,
18541 ARM_BUILTIN_WSLLW,
18542 ARM_BUILTIN_WSLLD,
18543 ARM_BUILTIN_WSRAH,
18544 ARM_BUILTIN_WSRAW,
18545 ARM_BUILTIN_WSRAD,
18546 ARM_BUILTIN_WSRLH,
18547 ARM_BUILTIN_WSRLW,
18548 ARM_BUILTIN_WSRLD,
18549 ARM_BUILTIN_WRORH,
18550 ARM_BUILTIN_WRORW,
18551 ARM_BUILTIN_WRORD,
18552 ARM_BUILTIN_WSLLHI,
18553 ARM_BUILTIN_WSLLWI,
18554 ARM_BUILTIN_WSLLDI,
18555 ARM_BUILTIN_WSRAHI,
18556 ARM_BUILTIN_WSRAWI,
18557 ARM_BUILTIN_WSRADI,
18558 ARM_BUILTIN_WSRLHI,
18559 ARM_BUILTIN_WSRLWI,
18560 ARM_BUILTIN_WSRLDI,
18561 ARM_BUILTIN_WRORHI,
18562 ARM_BUILTIN_WRORWI,
18563 ARM_BUILTIN_WRORDI,
18565 ARM_BUILTIN_WUNPCKIHB,
18566 ARM_BUILTIN_WUNPCKIHH,
18567 ARM_BUILTIN_WUNPCKIHW,
18568 ARM_BUILTIN_WUNPCKILB,
18569 ARM_BUILTIN_WUNPCKILH,
18570 ARM_BUILTIN_WUNPCKILW,
18572 ARM_BUILTIN_WUNPCKEHSB,
18573 ARM_BUILTIN_WUNPCKEHSH,
18574 ARM_BUILTIN_WUNPCKEHSW,
18575 ARM_BUILTIN_WUNPCKEHUB,
18576 ARM_BUILTIN_WUNPCKEHUH,
18577 ARM_BUILTIN_WUNPCKEHUW,
18578 ARM_BUILTIN_WUNPCKELSB,
18579 ARM_BUILTIN_WUNPCKELSH,
18580 ARM_BUILTIN_WUNPCKELSW,
18581 ARM_BUILTIN_WUNPCKELUB,
18582 ARM_BUILTIN_WUNPCKELUH,
18583 ARM_BUILTIN_WUNPCKELUW,
18585 ARM_BUILTIN_THREAD_POINTER,
18587 ARM_BUILTIN_NEON_BASE,
18589 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18592 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18594 static void
18595 arm_init_neon_builtins (void)
18597 unsigned int i, fcode;
18598 tree decl;
18600 tree neon_intQI_type_node;
18601 tree neon_intHI_type_node;
18602 tree neon_polyQI_type_node;
18603 tree neon_polyHI_type_node;
18604 tree neon_intSI_type_node;
18605 tree neon_intDI_type_node;
18606 tree neon_float_type_node;
18608 tree intQI_pointer_node;
18609 tree intHI_pointer_node;
18610 tree intSI_pointer_node;
18611 tree intDI_pointer_node;
18612 tree float_pointer_node;
18614 tree const_intQI_node;
18615 tree const_intHI_node;
18616 tree const_intSI_node;
18617 tree const_intDI_node;
18618 tree const_float_node;
18620 tree const_intQI_pointer_node;
18621 tree const_intHI_pointer_node;
18622 tree const_intSI_pointer_node;
18623 tree const_intDI_pointer_node;
18624 tree const_float_pointer_node;
18626 tree V8QI_type_node;
18627 tree V4HI_type_node;
18628 tree V2SI_type_node;
18629 tree V2SF_type_node;
18630 tree V16QI_type_node;
18631 tree V8HI_type_node;
18632 tree V4SI_type_node;
18633 tree V4SF_type_node;
18634 tree V2DI_type_node;
18636 tree intUQI_type_node;
18637 tree intUHI_type_node;
18638 tree intUSI_type_node;
18639 tree intUDI_type_node;
18641 tree intEI_type_node;
18642 tree intOI_type_node;
18643 tree intCI_type_node;
18644 tree intXI_type_node;
18646 tree V8QI_pointer_node;
18647 tree V4HI_pointer_node;
18648 tree V2SI_pointer_node;
18649 tree V2SF_pointer_node;
18650 tree V16QI_pointer_node;
18651 tree V8HI_pointer_node;
18652 tree V4SI_pointer_node;
18653 tree V4SF_pointer_node;
18654 tree V2DI_pointer_node;
18656 tree void_ftype_pv8qi_v8qi_v8qi;
18657 tree void_ftype_pv4hi_v4hi_v4hi;
18658 tree void_ftype_pv2si_v2si_v2si;
18659 tree void_ftype_pv2sf_v2sf_v2sf;
18660 tree void_ftype_pdi_di_di;
18661 tree void_ftype_pv16qi_v16qi_v16qi;
18662 tree void_ftype_pv8hi_v8hi_v8hi;
18663 tree void_ftype_pv4si_v4si_v4si;
18664 tree void_ftype_pv4sf_v4sf_v4sf;
18665 tree void_ftype_pv2di_v2di_v2di;
18667 tree reinterp_ftype_dreg[5][5];
18668 tree reinterp_ftype_qreg[5][5];
18669 tree dreg_types[5], qreg_types[5];
18671 /* Create distinguished type nodes for NEON vector element types,
18672 and pointers to values of such types, so we can detect them later. */
18673 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18674 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18675 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18676 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18677 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18678 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18679 neon_float_type_node = make_node (REAL_TYPE);
18680 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18681 layout_type (neon_float_type_node);
18683 /* Define typedefs which exactly correspond to the modes we are basing vector
18684 types on. If you change these names you'll need to change
18685 the table used by arm_mangle_type too. */
18686 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18687 "__builtin_neon_qi");
18688 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18689 "__builtin_neon_hi");
18690 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18691 "__builtin_neon_si");
18692 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18693 "__builtin_neon_sf");
18694 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18695 "__builtin_neon_di");
18696 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18697 "__builtin_neon_poly8");
18698 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18699 "__builtin_neon_poly16");
18701 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18702 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18703 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18704 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18705 float_pointer_node = build_pointer_type (neon_float_type_node);
18707 /* Next create constant-qualified versions of the above types. */
18708 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18709 TYPE_QUAL_CONST);
18710 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18711 TYPE_QUAL_CONST);
18712 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18713 TYPE_QUAL_CONST);
18714 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18715 TYPE_QUAL_CONST);
18716 const_float_node = build_qualified_type (neon_float_type_node,
18717 TYPE_QUAL_CONST);
18719 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18720 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18721 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18722 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18723 const_float_pointer_node = build_pointer_type (const_float_node);
18725 /* Now create vector types based on our NEON element types. */
18726 /* 64-bit vectors. */
18727 V8QI_type_node =
18728 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18729 V4HI_type_node =
18730 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18731 V2SI_type_node =
18732 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18733 V2SF_type_node =
18734 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18735 /* 128-bit vectors. */
18736 V16QI_type_node =
18737 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18738 V8HI_type_node =
18739 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18740 V4SI_type_node =
18741 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18742 V4SF_type_node =
18743 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18744 V2DI_type_node =
18745 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18747 /* Unsigned integer types for various mode sizes. */
18748 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18749 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18750 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18751 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18753 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18754 "__builtin_neon_uqi");
18755 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18756 "__builtin_neon_uhi");
18757 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18758 "__builtin_neon_usi");
18759 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18760 "__builtin_neon_udi");
18762 /* Opaque integer types for structures of vectors. */
18763 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18764 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18765 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18766 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18768 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18769 "__builtin_neon_ti");
18770 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18771 "__builtin_neon_ei");
18772 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18773 "__builtin_neon_oi");
18774 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18775 "__builtin_neon_ci");
18776 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18777 "__builtin_neon_xi");
18779 /* Pointers to vector types. */
18780 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18781 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18782 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18783 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18784 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18785 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18786 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18787 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18788 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18790 /* Operations which return results as pairs. */
18791 void_ftype_pv8qi_v8qi_v8qi =
18792 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18793 V8QI_type_node, NULL);
18794 void_ftype_pv4hi_v4hi_v4hi =
18795 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18796 V4HI_type_node, NULL);
18797 void_ftype_pv2si_v2si_v2si =
18798 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18799 V2SI_type_node, NULL);
18800 void_ftype_pv2sf_v2sf_v2sf =
18801 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18802 V2SF_type_node, NULL);
18803 void_ftype_pdi_di_di =
18804 build_function_type_list (void_type_node, intDI_pointer_node,
18805 neon_intDI_type_node, neon_intDI_type_node, NULL);
18806 void_ftype_pv16qi_v16qi_v16qi =
18807 build_function_type_list (void_type_node, V16QI_pointer_node,
18808 V16QI_type_node, V16QI_type_node, NULL);
18809 void_ftype_pv8hi_v8hi_v8hi =
18810 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18811 V8HI_type_node, NULL);
18812 void_ftype_pv4si_v4si_v4si =
18813 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18814 V4SI_type_node, NULL);
18815 void_ftype_pv4sf_v4sf_v4sf =
18816 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18817 V4SF_type_node, NULL);
18818 void_ftype_pv2di_v2di_v2di =
18819 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18820 V2DI_type_node, NULL);
18822 dreg_types[0] = V8QI_type_node;
18823 dreg_types[1] = V4HI_type_node;
18824 dreg_types[2] = V2SI_type_node;
18825 dreg_types[3] = V2SF_type_node;
18826 dreg_types[4] = neon_intDI_type_node;
18828 qreg_types[0] = V16QI_type_node;
18829 qreg_types[1] = V8HI_type_node;
18830 qreg_types[2] = V4SI_type_node;
18831 qreg_types[3] = V4SF_type_node;
18832 qreg_types[4] = V2DI_type_node;
18834 for (i = 0; i < 5; i++)
18836 int j;
18837 for (j = 0; j < 5; j++)
18839 reinterp_ftype_dreg[i][j]
18840 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18841 reinterp_ftype_qreg[i][j]
18842 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18846 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
18847 i < ARRAY_SIZE (neon_builtin_data);
18848 i++, fcode++)
18850 neon_builtin_datum *d = &neon_builtin_data[i];
18852 const char* const modenames[] = {
18853 "v8qi", "v4hi", "v2si", "v2sf", "di",
18854 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18855 "ti", "ei", "oi"
18857 char namebuf[60];
18858 tree ftype = NULL;
18859 int is_load = 0, is_store = 0;
18861 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
18863 d->fcode = fcode;
18865 switch (d->itype)
18867 case NEON_LOAD1:
18868 case NEON_LOAD1LANE:
18869 case NEON_LOADSTRUCT:
18870 case NEON_LOADSTRUCTLANE:
18871 is_load = 1;
18872 /* Fall through. */
18873 case NEON_STORE1:
18874 case NEON_STORE1LANE:
18875 case NEON_STORESTRUCT:
18876 case NEON_STORESTRUCTLANE:
18877 if (!is_load)
18878 is_store = 1;
18879 /* Fall through. */
18880 case NEON_UNOP:
18881 case NEON_BINOP:
18882 case NEON_LOGICBINOP:
18883 case NEON_SHIFTINSERT:
18884 case NEON_TERNOP:
18885 case NEON_GETLANE:
18886 case NEON_SETLANE:
18887 case NEON_CREATE:
18888 case NEON_DUP:
18889 case NEON_DUPLANE:
18890 case NEON_SHIFTIMM:
18891 case NEON_SHIFTACC:
18892 case NEON_COMBINE:
18893 case NEON_SPLIT:
18894 case NEON_CONVERT:
18895 case NEON_FIXCONV:
18896 case NEON_LANEMUL:
18897 case NEON_LANEMULL:
18898 case NEON_LANEMULH:
18899 case NEON_LANEMAC:
18900 case NEON_SCALARMUL:
18901 case NEON_SCALARMULL:
18902 case NEON_SCALARMULH:
18903 case NEON_SCALARMAC:
18904 case NEON_SELECT:
18905 case NEON_VTBL:
18906 case NEON_VTBX:
18908 int k;
18909 tree return_type = void_type_node, args = void_list_node;
18911 /* Build a function type directly from the insn_data for
18912 this builtin. The build_function_type() function takes
18913 care of removing duplicates for us. */
18914 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
18916 tree eltype;
18918 if (is_load && k == 1)
18920 /* Neon load patterns always have the memory
18921 operand in the operand 1 position. */
18922 gcc_assert (insn_data[d->code].operand[k].predicate
18923 == neon_struct_operand);
18925 switch (d->mode)
18927 case T_V8QI:
18928 case T_V16QI:
18929 eltype = const_intQI_pointer_node;
18930 break;
18932 case T_V4HI:
18933 case T_V8HI:
18934 eltype = const_intHI_pointer_node;
18935 break;
18937 case T_V2SI:
18938 case T_V4SI:
18939 eltype = const_intSI_pointer_node;
18940 break;
18942 case T_V2SF:
18943 case T_V4SF:
18944 eltype = const_float_pointer_node;
18945 break;
18947 case T_DI:
18948 case T_V2DI:
18949 eltype = const_intDI_pointer_node;
18950 break;
18952 default: gcc_unreachable ();
18955 else if (is_store && k == 0)
18957 /* Similarly, Neon store patterns use operand 0 as
18958 the memory location to store to. */
18959 gcc_assert (insn_data[d->code].operand[k].predicate
18960 == neon_struct_operand);
18962 switch (d->mode)
18964 case T_V8QI:
18965 case T_V16QI:
18966 eltype = intQI_pointer_node;
18967 break;
18969 case T_V4HI:
18970 case T_V8HI:
18971 eltype = intHI_pointer_node;
18972 break;
18974 case T_V2SI:
18975 case T_V4SI:
18976 eltype = intSI_pointer_node;
18977 break;
18979 case T_V2SF:
18980 case T_V4SF:
18981 eltype = float_pointer_node;
18982 break;
18984 case T_DI:
18985 case T_V2DI:
18986 eltype = intDI_pointer_node;
18987 break;
18989 default: gcc_unreachable ();
18992 else
18994 switch (insn_data[d->code].operand[k].mode)
18996 case VOIDmode: eltype = void_type_node; break;
18997 /* Scalars. */
18998 case QImode: eltype = neon_intQI_type_node; break;
18999 case HImode: eltype = neon_intHI_type_node; break;
19000 case SImode: eltype = neon_intSI_type_node; break;
19001 case SFmode: eltype = neon_float_type_node; break;
19002 case DImode: eltype = neon_intDI_type_node; break;
19003 case TImode: eltype = intTI_type_node; break;
19004 case EImode: eltype = intEI_type_node; break;
19005 case OImode: eltype = intOI_type_node; break;
19006 case CImode: eltype = intCI_type_node; break;
19007 case XImode: eltype = intXI_type_node; break;
19008 /* 64-bit vectors. */
19009 case V8QImode: eltype = V8QI_type_node; break;
19010 case V4HImode: eltype = V4HI_type_node; break;
19011 case V2SImode: eltype = V2SI_type_node; break;
19012 case V2SFmode: eltype = V2SF_type_node; break;
19013 /* 128-bit vectors. */
19014 case V16QImode: eltype = V16QI_type_node; break;
19015 case V8HImode: eltype = V8HI_type_node; break;
19016 case V4SImode: eltype = V4SI_type_node; break;
19017 case V4SFmode: eltype = V4SF_type_node; break;
19018 case V2DImode: eltype = V2DI_type_node; break;
19019 default: gcc_unreachable ();
19023 if (k == 0 && !is_store)
19024 return_type = eltype;
19025 else
19026 args = tree_cons (NULL_TREE, eltype, args);
19029 ftype = build_function_type (return_type, args);
19031 break;
19033 case NEON_RESULTPAIR:
19035 switch (insn_data[d->code].operand[1].mode)
19037 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19038 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19039 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19040 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19041 case DImode: ftype = void_ftype_pdi_di_di; break;
19042 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19043 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19044 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19045 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19046 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19047 default: gcc_unreachable ();
19050 break;
19052 case NEON_REINTERP:
19054 /* We iterate over 5 doubleword types, then 5 quadword
19055 types. */
19056 int rhs = d->mode % 5;
19057 switch (insn_data[d->code].operand[0].mode)
19059 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19060 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19061 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19062 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19063 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19064 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19065 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19066 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19067 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19068 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19069 default: gcc_unreachable ();
19072 break;
19074 default:
19075 gcc_unreachable ();
19078 gcc_assert (ftype != NULL);
19080 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19082 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19083 NULL_TREE);
19084 arm_builtin_decls[fcode] = decl;
19088 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19089 do \
19091 if ((MASK) & insn_flags) \
19093 tree bdecl; \
19094 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19095 BUILT_IN_MD, NULL, NULL_TREE); \
19096 arm_builtin_decls[CODE] = bdecl; \
19099 while (0)
19101 struct builtin_description
19103 const unsigned int mask;
19104 const enum insn_code icode;
19105 const char * const name;
19106 const enum arm_builtins code;
19107 const enum rtx_code comparison;
19108 const unsigned int flag;
19111 static const struct builtin_description bdesc_2arg[] =
19113 #define IWMMXT_BUILTIN(code, string, builtin) \
19114 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19115 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19117 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19118 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19119 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19120 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19121 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19122 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19123 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19124 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19125 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19126 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19127 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19128 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19129 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19130 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19131 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19132 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19133 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19134 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19135 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19136 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19137 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19138 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19139 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19140 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19141 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19142 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19143 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19144 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19145 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19146 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19147 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19148 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19149 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19150 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19151 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19152 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19153 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19154 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19155 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19156 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19157 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19158 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19159 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19160 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19161 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19162 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19163 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19164 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19165 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19166 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19167 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19168 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19169 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19170 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19171 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19172 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19173 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
19174 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
19176 #define IWMMXT_BUILTIN2(code, builtin) \
19177 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19179 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19180 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19181 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19182 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19183 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19184 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19185 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
19186 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
19187 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
19188 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
19189 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
19190 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
19191 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
19192 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
19193 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
19194 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
19195 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
19196 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
19197 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
19198 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
19199 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
19200 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
19201 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
19202 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
19203 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
19204 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
19205 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
19206 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
19207 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
19208 IWMMXT_BUILTIN2 (rordi3, WRORDI)
19209 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19210 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19213 static const struct builtin_description bdesc_1arg[] =
19215 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19216 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19217 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19218 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19219 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19220 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19221 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19222 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19223 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19224 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19225 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19226 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19227 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19228 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19229 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19230 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19231 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19232 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19235 /* Set up all the iWMMXt builtins. This is not called if
19236 TARGET_IWMMXT is zero. */
19238 static void
19239 arm_init_iwmmxt_builtins (void)
19241 const struct builtin_description * d;
19242 size_t i;
19244 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19245 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19246 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19248 tree int_ftype_int
19249 = build_function_type_list (integer_type_node,
19250 integer_type_node, NULL_TREE);
19251 tree v8qi_ftype_v8qi_v8qi_int
19252 = build_function_type_list (V8QI_type_node,
19253 V8QI_type_node, V8QI_type_node,
19254 integer_type_node, NULL_TREE);
19255 tree v4hi_ftype_v4hi_int
19256 = build_function_type_list (V4HI_type_node,
19257 V4HI_type_node, integer_type_node, NULL_TREE);
19258 tree v2si_ftype_v2si_int
19259 = build_function_type_list (V2SI_type_node,
19260 V2SI_type_node, integer_type_node, NULL_TREE);
19261 tree v2si_ftype_di_di
19262 = build_function_type_list (V2SI_type_node,
19263 long_long_integer_type_node,
19264 long_long_integer_type_node,
19265 NULL_TREE);
19266 tree di_ftype_di_int
19267 = build_function_type_list (long_long_integer_type_node,
19268 long_long_integer_type_node,
19269 integer_type_node, NULL_TREE);
19270 tree di_ftype_di_int_int
19271 = build_function_type_list (long_long_integer_type_node,
19272 long_long_integer_type_node,
19273 integer_type_node,
19274 integer_type_node, NULL_TREE);
19275 tree int_ftype_v8qi
19276 = build_function_type_list (integer_type_node,
19277 V8QI_type_node, NULL_TREE);
19278 tree int_ftype_v4hi
19279 = build_function_type_list (integer_type_node,
19280 V4HI_type_node, NULL_TREE);
19281 tree int_ftype_v2si
19282 = build_function_type_list (integer_type_node,
19283 V2SI_type_node, NULL_TREE);
19284 tree int_ftype_v8qi_int
19285 = build_function_type_list (integer_type_node,
19286 V8QI_type_node, integer_type_node, NULL_TREE);
19287 tree int_ftype_v4hi_int
19288 = build_function_type_list (integer_type_node,
19289 V4HI_type_node, integer_type_node, NULL_TREE);
19290 tree int_ftype_v2si_int
19291 = build_function_type_list (integer_type_node,
19292 V2SI_type_node, integer_type_node, NULL_TREE);
19293 tree v8qi_ftype_v8qi_int_int
19294 = build_function_type_list (V8QI_type_node,
19295 V8QI_type_node, integer_type_node,
19296 integer_type_node, NULL_TREE);
19297 tree v4hi_ftype_v4hi_int_int
19298 = build_function_type_list (V4HI_type_node,
19299 V4HI_type_node, integer_type_node,
19300 integer_type_node, NULL_TREE);
19301 tree v2si_ftype_v2si_int_int
19302 = build_function_type_list (V2SI_type_node,
19303 V2SI_type_node, integer_type_node,
19304 integer_type_node, NULL_TREE);
19305 /* Miscellaneous. */
19306 tree v8qi_ftype_v4hi_v4hi
19307 = build_function_type_list (V8QI_type_node,
19308 V4HI_type_node, V4HI_type_node, NULL_TREE);
19309 tree v4hi_ftype_v2si_v2si
19310 = build_function_type_list (V4HI_type_node,
19311 V2SI_type_node, V2SI_type_node, NULL_TREE);
19312 tree v2si_ftype_v4hi_v4hi
19313 = build_function_type_list (V2SI_type_node,
19314 V4HI_type_node, V4HI_type_node, NULL_TREE);
19315 tree v2si_ftype_v8qi_v8qi
19316 = build_function_type_list (V2SI_type_node,
19317 V8QI_type_node, V8QI_type_node, NULL_TREE);
19318 tree v4hi_ftype_v4hi_di
19319 = build_function_type_list (V4HI_type_node,
19320 V4HI_type_node, long_long_integer_type_node,
19321 NULL_TREE);
19322 tree v2si_ftype_v2si_di
19323 = build_function_type_list (V2SI_type_node,
19324 V2SI_type_node, long_long_integer_type_node,
19325 NULL_TREE);
19326 tree void_ftype_int_int
19327 = build_function_type_list (void_type_node,
19328 integer_type_node, integer_type_node,
19329 NULL_TREE);
19330 tree di_ftype_void
19331 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19332 tree di_ftype_v8qi
19333 = build_function_type_list (long_long_integer_type_node,
19334 V8QI_type_node, NULL_TREE);
19335 tree di_ftype_v4hi
19336 = build_function_type_list (long_long_integer_type_node,
19337 V4HI_type_node, NULL_TREE);
19338 tree di_ftype_v2si
19339 = build_function_type_list (long_long_integer_type_node,
19340 V2SI_type_node, NULL_TREE);
19341 tree v2si_ftype_v4hi
19342 = build_function_type_list (V2SI_type_node,
19343 V4HI_type_node, NULL_TREE);
19344 tree v4hi_ftype_v8qi
19345 = build_function_type_list (V4HI_type_node,
19346 V8QI_type_node, NULL_TREE);
19348 tree di_ftype_di_v4hi_v4hi
19349 = build_function_type_list (long_long_unsigned_type_node,
19350 long_long_unsigned_type_node,
19351 V4HI_type_node, V4HI_type_node,
19352 NULL_TREE);
19354 tree di_ftype_v4hi_v4hi
19355 = build_function_type_list (long_long_unsigned_type_node,
19356 V4HI_type_node,V4HI_type_node,
19357 NULL_TREE);
19359 /* Normal vector binops. */
19360 tree v8qi_ftype_v8qi_v8qi
19361 = build_function_type_list (V8QI_type_node,
19362 V8QI_type_node, V8QI_type_node, NULL_TREE);
19363 tree v4hi_ftype_v4hi_v4hi
19364 = build_function_type_list (V4HI_type_node,
19365 V4HI_type_node,V4HI_type_node, NULL_TREE);
19366 tree v2si_ftype_v2si_v2si
19367 = build_function_type_list (V2SI_type_node,
19368 V2SI_type_node, V2SI_type_node, NULL_TREE);
19369 tree di_ftype_di_di
19370 = build_function_type_list (long_long_unsigned_type_node,
19371 long_long_unsigned_type_node,
19372 long_long_unsigned_type_node,
19373 NULL_TREE);
19375 /* Add all builtins that are more or less simple operations on two
19376 operands. */
19377 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19379 /* Use one of the operands; the target can have a different mode for
19380 mask-generating compares. */
19381 enum machine_mode mode;
19382 tree type;
19384 if (d->name == 0)
19385 continue;
19387 mode = insn_data[d->icode].operand[1].mode;
19389 switch (mode)
19391 case V8QImode:
19392 type = v8qi_ftype_v8qi_v8qi;
19393 break;
19394 case V4HImode:
19395 type = v4hi_ftype_v4hi_v4hi;
19396 break;
19397 case V2SImode:
19398 type = v2si_ftype_v2si_v2si;
19399 break;
19400 case DImode:
19401 type = di_ftype_di_di;
19402 break;
19404 default:
19405 gcc_unreachable ();
19408 def_mbuiltin (d->mask, d->name, type, d->code);
19411 /* Add the remaining MMX insns with somewhat more complicated types. */
19412 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19414 ARM_BUILTIN_ ## CODE)
19416 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19417 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
19418 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
19420 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19421 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19422 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19423 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19424 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19425 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19427 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19428 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19429 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19430 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19431 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19432 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19434 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19435 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19436 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19437 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19438 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19439 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19441 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19442 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19443 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19444 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19445 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19446 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19448 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19450 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19451 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19452 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19453 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19455 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19456 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19457 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19458 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19459 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19460 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19461 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19462 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19463 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19465 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19466 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19467 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19469 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19470 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19471 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19473 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19474 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19475 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19476 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19477 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19478 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19480 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19481 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19482 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19483 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19484 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19485 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19486 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19487 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19488 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19489 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19490 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19491 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19493 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19494 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19495 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19496 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19498 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19499 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19500 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19501 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19502 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19503 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19504 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19506 #undef iwmmx_mbuiltin
19509 static void
19510 arm_init_tls_builtins (void)
19512 tree ftype, decl;
19514 ftype = build_function_type (ptr_type_node, void_list_node);
19515 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19516 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19517 NULL, NULL_TREE);
19518 TREE_NOTHROW (decl) = 1;
19519 TREE_READONLY (decl) = 1;
19520 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19523 static void
19524 arm_init_fp16_builtins (void)
19526 tree fp16_type = make_node (REAL_TYPE);
19527 TYPE_PRECISION (fp16_type) = 16;
19528 layout_type (fp16_type);
19529 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19532 static void
19533 arm_init_builtins (void)
19535 arm_init_tls_builtins ();
19537 if (TARGET_REALLY_IWMMXT)
19538 arm_init_iwmmxt_builtins ();
19540 if (TARGET_NEON)
19541 arm_init_neon_builtins ();
19543 if (arm_fp16_format)
19544 arm_init_fp16_builtins ();
19547 /* Return the ARM builtin for CODE. */
19549 static tree
19550 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19552 if (code >= ARM_BUILTIN_MAX)
19553 return error_mark_node;
19555 return arm_builtin_decls[code];
19558 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19560 static const char *
19561 arm_invalid_parameter_type (const_tree t)
19563 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19564 return N_("function parameters cannot have __fp16 type");
19565 return NULL;
19568 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19570 static const char *
19571 arm_invalid_return_type (const_tree t)
19573 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19574 return N_("functions cannot return __fp16 type");
19575 return NULL;
19578 /* Implement TARGET_PROMOTED_TYPE. */
19580 static tree
19581 arm_promoted_type (const_tree t)
19583 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19584 return float_type_node;
19585 return NULL_TREE;
19588 /* Implement TARGET_CONVERT_TO_TYPE.
19589 Specifically, this hook implements the peculiarity of the ARM
19590 half-precision floating-point C semantics that requires conversions between
19591 __fp16 to or from double to do an intermediate conversion to float. */
19593 static tree
19594 arm_convert_to_type (tree type, tree expr)
19596 tree fromtype = TREE_TYPE (expr);
19597 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19598 return NULL_TREE;
19599 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19600 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19601 return convert (type, convert (float_type_node, expr));
19602 return NULL_TREE;
19605 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19606 This simply adds HFmode as a supported mode; even though we don't
19607 implement arithmetic on this type directly, it's supported by
19608 optabs conversions, much the way the double-word arithmetic is
19609 special-cased in the default hook. */
19611 static bool
19612 arm_scalar_mode_supported_p (enum machine_mode mode)
19614 if (mode == HFmode)
19615 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19616 else if (ALL_FIXED_POINT_MODE_P (mode))
19617 return true;
19618 else
19619 return default_scalar_mode_supported_p (mode);
19622 /* Errors in the source file can cause expand_expr to return const0_rtx
19623 where we expect a vector. To avoid crashing, use one of the vector
19624 clear instructions. */
19626 static rtx
19627 safe_vector_operand (rtx x, enum machine_mode mode)
19629 if (x != const0_rtx)
19630 return x;
19631 x = gen_reg_rtx (mode);
19633 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19634 : gen_rtx_SUBREG (DImode, x, 0)));
19635 return x;
19638 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19640 static rtx
19641 arm_expand_binop_builtin (enum insn_code icode,
19642 tree exp, rtx target)
19644 rtx pat;
19645 tree arg0 = CALL_EXPR_ARG (exp, 0);
19646 tree arg1 = CALL_EXPR_ARG (exp, 1);
19647 rtx op0 = expand_normal (arg0);
19648 rtx op1 = expand_normal (arg1);
19649 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19650 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19651 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19653 if (VECTOR_MODE_P (mode0))
19654 op0 = safe_vector_operand (op0, mode0);
19655 if (VECTOR_MODE_P (mode1))
19656 op1 = safe_vector_operand (op1, mode1);
19658 if (! target
19659 || GET_MODE (target) != tmode
19660 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19661 target = gen_reg_rtx (tmode);
19663 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19665 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19666 op0 = copy_to_mode_reg (mode0, op0);
19667 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19668 op1 = copy_to_mode_reg (mode1, op1);
19670 pat = GEN_FCN (icode) (target, op0, op1);
19671 if (! pat)
19672 return 0;
19673 emit_insn (pat);
19674 return target;
19677 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19679 static rtx
19680 arm_expand_unop_builtin (enum insn_code icode,
19681 tree exp, rtx target, int do_load)
19683 rtx pat;
19684 tree arg0 = CALL_EXPR_ARG (exp, 0);
19685 rtx op0 = expand_normal (arg0);
19686 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19687 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19689 if (! target
19690 || GET_MODE (target) != tmode
19691 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19692 target = gen_reg_rtx (tmode);
19693 if (do_load)
19694 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19695 else
19697 if (VECTOR_MODE_P (mode0))
19698 op0 = safe_vector_operand (op0, mode0);
19700 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19701 op0 = copy_to_mode_reg (mode0, op0);
19704 pat = GEN_FCN (icode) (target, op0);
19705 if (! pat)
19706 return 0;
19707 emit_insn (pat);
19708 return target;
19711 typedef enum {
19712 NEON_ARG_COPY_TO_REG,
19713 NEON_ARG_CONSTANT,
19714 NEON_ARG_MEMORY,
19715 NEON_ARG_STOP
19716 } builtin_arg;
19718 #define NEON_MAX_BUILTIN_ARGS 5
19720 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19721 and return an expression for the accessed memory.
19723 The intrinsic function operates on a block of registers that has
19724 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19725 The function references the memory at EXP in mode MEM_MODE;
19726 this mode may be BLKmode if no more suitable mode is available. */
19728 static tree
19729 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
19730 enum machine_mode reg_mode,
19731 neon_builtin_type_mode type_mode)
19733 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
19734 tree elem_type, upper_bound, array_type;
19736 /* Work out the size of the register block in bytes. */
19737 reg_size = GET_MODE_SIZE (reg_mode);
19739 /* Work out the size of each vector in bytes. */
19740 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
19741 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
19743 /* Work out how many vectors there are. */
19744 gcc_assert (reg_size % vector_size == 0);
19745 nvectors = reg_size / vector_size;
19747 /* Work out how many elements are being loaded or stored.
19748 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19749 and memory elements; anything else implies a lane load or store. */
19750 if (mem_mode == reg_mode)
19751 nelems = vector_size * nvectors;
19752 else
19753 nelems = nvectors;
19755 /* Work out the type of each element. */
19756 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
19757 elem_type = TREE_TYPE (TREE_TYPE (exp));
19759 /* Create a type that describes the full access. */
19760 upper_bound = build_int_cst (size_type_node, nelems - 1);
19761 array_type = build_array_type (elem_type, build_index_type (upper_bound));
19763 /* Dereference EXP using that type. */
19764 exp = convert (build_pointer_type (array_type), exp);
19765 return fold_build2 (MEM_REF, array_type, exp,
19766 build_int_cst (TREE_TYPE (exp), 0));
19769 /* Expand a Neon builtin. */
19770 static rtx
19771 arm_expand_neon_args (rtx target, int icode, int have_retval,
19772 neon_builtin_type_mode type_mode,
19773 tree exp, ...)
19775 va_list ap;
19776 rtx pat;
19777 tree arg[NEON_MAX_BUILTIN_ARGS];
19778 rtx op[NEON_MAX_BUILTIN_ARGS];
19779 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19780 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19781 enum machine_mode other_mode;
19782 int argc = 0;
19783 int opno;
19785 if (have_retval
19786 && (!target
19787 || GET_MODE (target) != tmode
19788 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19789 target = gen_reg_rtx (tmode);
19791 va_start (ap, exp);
19793 for (;;)
19795 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19797 if (thisarg == NEON_ARG_STOP)
19798 break;
19799 else
19801 opno = argc + have_retval;
19802 mode[argc] = insn_data[icode].operand[opno].mode;
19803 arg[argc] = CALL_EXPR_ARG (exp, argc);
19804 if (thisarg == NEON_ARG_MEMORY)
19806 other_mode = insn_data[icode].operand[1 - opno].mode;
19807 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
19808 other_mode, type_mode);
19810 op[argc] = expand_normal (arg[argc]);
19812 switch (thisarg)
19814 case NEON_ARG_COPY_TO_REG:
19815 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19816 if (!(*insn_data[icode].operand[opno].predicate)
19817 (op[argc], mode[argc]))
19818 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19819 break;
19821 case NEON_ARG_CONSTANT:
19822 /* FIXME: This error message is somewhat unhelpful. */
19823 if (!(*insn_data[icode].operand[opno].predicate)
19824 (op[argc], mode[argc]))
19825 error ("argument must be a constant");
19826 break;
19828 case NEON_ARG_MEMORY:
19829 gcc_assert (MEM_P (op[argc]));
19830 PUT_MODE (op[argc], mode[argc]);
19831 /* ??? arm_neon.h uses the same built-in functions for signed
19832 and unsigned accesses, casting where necessary. This isn't
19833 alias safe. */
19834 set_mem_alias_set (op[argc], 0);
19835 if (!(*insn_data[icode].operand[opno].predicate)
19836 (op[argc], mode[argc]))
19837 op[argc] = (replace_equiv_address
19838 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
19839 break;
19841 case NEON_ARG_STOP:
19842 gcc_unreachable ();
19845 argc++;
19849 va_end (ap);
19851 if (have_retval)
19852 switch (argc)
19854 case 1:
19855 pat = GEN_FCN (icode) (target, op[0]);
19856 break;
19858 case 2:
19859 pat = GEN_FCN (icode) (target, op[0], op[1]);
19860 break;
19862 case 3:
19863 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19864 break;
19866 case 4:
19867 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19868 break;
19870 case 5:
19871 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19872 break;
19874 default:
19875 gcc_unreachable ();
19877 else
19878 switch (argc)
19880 case 1:
19881 pat = GEN_FCN (icode) (op[0]);
19882 break;
19884 case 2:
19885 pat = GEN_FCN (icode) (op[0], op[1]);
19886 break;
19888 case 3:
19889 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19890 break;
19892 case 4:
19893 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19894 break;
19896 case 5:
19897 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19898 break;
19900 default:
19901 gcc_unreachable ();
19904 if (!pat)
19905 return 0;
19907 emit_insn (pat);
19909 return target;
19912 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19913 constants defined per-instruction or per instruction-variant. Instead, the
19914 required info is looked up in the table neon_builtin_data. */
19915 static rtx
19916 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19918 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
19919 neon_itype itype = d->itype;
19920 enum insn_code icode = d->code;
19921 neon_builtin_type_mode type_mode = d->mode;
19923 switch (itype)
19925 case NEON_UNOP:
19926 case NEON_CONVERT:
19927 case NEON_DUPLANE:
19928 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19929 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19931 case NEON_BINOP:
19932 case NEON_SETLANE:
19933 case NEON_SCALARMUL:
19934 case NEON_SCALARMULL:
19935 case NEON_SCALARMULH:
19936 case NEON_SHIFTINSERT:
19937 case NEON_LOGICBINOP:
19938 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19939 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19940 NEON_ARG_STOP);
19942 case NEON_TERNOP:
19943 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19944 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19945 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19947 case NEON_GETLANE:
19948 case NEON_FIXCONV:
19949 case NEON_SHIFTIMM:
19950 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19951 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19952 NEON_ARG_STOP);
19954 case NEON_CREATE:
19955 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19956 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19958 case NEON_DUP:
19959 case NEON_SPLIT:
19960 case NEON_REINTERP:
19961 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19962 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19964 case NEON_COMBINE:
19965 case NEON_VTBL:
19966 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19967 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19969 case NEON_RESULTPAIR:
19970 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19971 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19972 NEON_ARG_STOP);
19974 case NEON_LANEMUL:
19975 case NEON_LANEMULL:
19976 case NEON_LANEMULH:
19977 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19978 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19979 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19981 case NEON_LANEMAC:
19982 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19983 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19984 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19986 case NEON_SHIFTACC:
19987 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19988 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19989 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19991 case NEON_SCALARMAC:
19992 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19993 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19994 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19996 case NEON_SELECT:
19997 case NEON_VTBX:
19998 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19999 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20000 NEON_ARG_STOP);
20002 case NEON_LOAD1:
20003 case NEON_LOADSTRUCT:
20004 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20005 NEON_ARG_MEMORY, NEON_ARG_STOP);
20007 case NEON_LOAD1LANE:
20008 case NEON_LOADSTRUCTLANE:
20009 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20010 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20011 NEON_ARG_STOP);
20013 case NEON_STORE1:
20014 case NEON_STORESTRUCT:
20015 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20016 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20018 case NEON_STORE1LANE:
20019 case NEON_STORESTRUCTLANE:
20020 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20021 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20022 NEON_ARG_STOP);
20025 gcc_unreachable ();
20028 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20029 void
20030 neon_reinterpret (rtx dest, rtx src)
20032 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20035 /* Emit code to place a Neon pair result in memory locations (with equal
20036 registers). */
20037 void
20038 neon_emit_pair_result_insn (enum machine_mode mode,
20039 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20040 rtx op1, rtx op2)
20042 rtx mem = gen_rtx_MEM (mode, destaddr);
20043 rtx tmp1 = gen_reg_rtx (mode);
20044 rtx tmp2 = gen_reg_rtx (mode);
20046 emit_insn (intfn (tmp1, op1, op2, tmp2));
20048 emit_move_insn (mem, tmp1);
20049 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20050 emit_move_insn (mem, tmp2);
20053 /* Set up operands for a register copy from src to dest, taking care not to
20054 clobber registers in the process.
20055 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
20056 be called with a large N, so that should be OK. */
20058 void
20059 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20061 unsigned int copied = 0, opctr = 0;
20062 unsigned int done = (1 << count) - 1;
20063 unsigned int i, j;
20065 while (copied != done)
20067 for (i = 0; i < count; i++)
20069 int good = 1;
20071 for (j = 0; good && j < count; j++)
20072 if (i != j && (copied & (1 << j)) == 0
20073 && reg_overlap_mentioned_p (src[j], dest[i]))
20074 good = 0;
20076 if (good)
20078 operands[opctr++] = dest[i];
20079 operands[opctr++] = src[i];
20080 copied |= 1 << i;
20085 gcc_assert (opctr == count * 2);
20088 /* Expand an expression EXP that calls a built-in function,
20089 with result going to TARGET if that's convenient
20090 (and in mode MODE if that's convenient).
20091 SUBTARGET may be used as the target for computing one of EXP's operands.
20092 IGNORE is nonzero if the value is to be ignored. */
20094 static rtx
20095 arm_expand_builtin (tree exp,
20096 rtx target,
20097 rtx subtarget ATTRIBUTE_UNUSED,
20098 enum machine_mode mode ATTRIBUTE_UNUSED,
20099 int ignore ATTRIBUTE_UNUSED)
20101 const struct builtin_description * d;
20102 enum insn_code icode;
20103 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20104 tree arg0;
20105 tree arg1;
20106 tree arg2;
20107 rtx op0;
20108 rtx op1;
20109 rtx op2;
20110 rtx pat;
20111 int fcode = DECL_FUNCTION_CODE (fndecl);
20112 size_t i;
20113 enum machine_mode tmode;
20114 enum machine_mode mode0;
20115 enum machine_mode mode1;
20116 enum machine_mode mode2;
20118 if (fcode >= ARM_BUILTIN_NEON_BASE)
20119 return arm_expand_neon_builtin (fcode, exp, target);
20121 switch (fcode)
20123 case ARM_BUILTIN_TEXTRMSB:
20124 case ARM_BUILTIN_TEXTRMUB:
20125 case ARM_BUILTIN_TEXTRMSH:
20126 case ARM_BUILTIN_TEXTRMUH:
20127 case ARM_BUILTIN_TEXTRMSW:
20128 case ARM_BUILTIN_TEXTRMUW:
20129 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20130 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20131 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20132 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20133 : CODE_FOR_iwmmxt_textrmw);
20135 arg0 = CALL_EXPR_ARG (exp, 0);
20136 arg1 = CALL_EXPR_ARG (exp, 1);
20137 op0 = expand_normal (arg0);
20138 op1 = expand_normal (arg1);
20139 tmode = insn_data[icode].operand[0].mode;
20140 mode0 = insn_data[icode].operand[1].mode;
20141 mode1 = insn_data[icode].operand[2].mode;
20143 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20144 op0 = copy_to_mode_reg (mode0, op0);
20145 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20147 /* @@@ better error message */
20148 error ("selector must be an immediate");
20149 return gen_reg_rtx (tmode);
20151 if (target == 0
20152 || GET_MODE (target) != tmode
20153 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20154 target = gen_reg_rtx (tmode);
20155 pat = GEN_FCN (icode) (target, op0, op1);
20156 if (! pat)
20157 return 0;
20158 emit_insn (pat);
20159 return target;
20161 case ARM_BUILTIN_TINSRB:
20162 case ARM_BUILTIN_TINSRH:
20163 case ARM_BUILTIN_TINSRW:
20164 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20165 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20166 : CODE_FOR_iwmmxt_tinsrw);
20167 arg0 = CALL_EXPR_ARG (exp, 0);
20168 arg1 = CALL_EXPR_ARG (exp, 1);
20169 arg2 = CALL_EXPR_ARG (exp, 2);
20170 op0 = expand_normal (arg0);
20171 op1 = expand_normal (arg1);
20172 op2 = expand_normal (arg2);
20173 tmode = insn_data[icode].operand[0].mode;
20174 mode0 = insn_data[icode].operand[1].mode;
20175 mode1 = insn_data[icode].operand[2].mode;
20176 mode2 = insn_data[icode].operand[3].mode;
20178 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20179 op0 = copy_to_mode_reg (mode0, op0);
20180 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20181 op1 = copy_to_mode_reg (mode1, op1);
20182 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20184 /* @@@ better error message */
20185 error ("selector must be an immediate");
20186 return const0_rtx;
20188 if (target == 0
20189 || GET_MODE (target) != tmode
20190 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20191 target = gen_reg_rtx (tmode);
20192 pat = GEN_FCN (icode) (target, op0, op1, op2);
20193 if (! pat)
20194 return 0;
20195 emit_insn (pat);
20196 return target;
20198 case ARM_BUILTIN_SETWCX:
20199 arg0 = CALL_EXPR_ARG (exp, 0);
20200 arg1 = CALL_EXPR_ARG (exp, 1);
20201 op0 = force_reg (SImode, expand_normal (arg0));
20202 op1 = expand_normal (arg1);
20203 emit_insn (gen_iwmmxt_tmcr (op1, op0));
20204 return 0;
20206 case ARM_BUILTIN_GETWCX:
20207 arg0 = CALL_EXPR_ARG (exp, 0);
20208 op0 = expand_normal (arg0);
20209 target = gen_reg_rtx (SImode);
20210 emit_insn (gen_iwmmxt_tmrc (target, op0));
20211 return target;
20213 case ARM_BUILTIN_WSHUFH:
20214 icode = CODE_FOR_iwmmxt_wshufh;
20215 arg0 = CALL_EXPR_ARG (exp, 0);
20216 arg1 = CALL_EXPR_ARG (exp, 1);
20217 op0 = expand_normal (arg0);
20218 op1 = expand_normal (arg1);
20219 tmode = insn_data[icode].operand[0].mode;
20220 mode1 = insn_data[icode].operand[1].mode;
20221 mode2 = insn_data[icode].operand[2].mode;
20223 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20224 op0 = copy_to_mode_reg (mode1, op0);
20225 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20227 /* @@@ better error message */
20228 error ("mask must be an immediate");
20229 return const0_rtx;
20231 if (target == 0
20232 || GET_MODE (target) != tmode
20233 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20234 target = gen_reg_rtx (tmode);
20235 pat = GEN_FCN (icode) (target, op0, op1);
20236 if (! pat)
20237 return 0;
20238 emit_insn (pat);
20239 return target;
20241 case ARM_BUILTIN_WSADB:
20242 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
20243 case ARM_BUILTIN_WSADH:
20244 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
20245 case ARM_BUILTIN_WSADBZ:
20246 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20247 case ARM_BUILTIN_WSADHZ:
20248 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20250 /* Several three-argument builtins. */
20251 case ARM_BUILTIN_WMACS:
20252 case ARM_BUILTIN_WMACU:
20253 case ARM_BUILTIN_WALIGN:
20254 case ARM_BUILTIN_TMIA:
20255 case ARM_BUILTIN_TMIAPH:
20256 case ARM_BUILTIN_TMIATT:
20257 case ARM_BUILTIN_TMIATB:
20258 case ARM_BUILTIN_TMIABT:
20259 case ARM_BUILTIN_TMIABB:
20260 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20261 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20262 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20263 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20264 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20265 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20266 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
20267 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
20268 : CODE_FOR_iwmmxt_walign);
20269 arg0 = CALL_EXPR_ARG (exp, 0);
20270 arg1 = CALL_EXPR_ARG (exp, 1);
20271 arg2 = CALL_EXPR_ARG (exp, 2);
20272 op0 = expand_normal (arg0);
20273 op1 = expand_normal (arg1);
20274 op2 = expand_normal (arg2);
20275 tmode = insn_data[icode].operand[0].mode;
20276 mode0 = insn_data[icode].operand[1].mode;
20277 mode1 = insn_data[icode].operand[2].mode;
20278 mode2 = insn_data[icode].operand[3].mode;
20280 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20281 op0 = copy_to_mode_reg (mode0, op0);
20282 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20283 op1 = copy_to_mode_reg (mode1, op1);
20284 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20285 op2 = copy_to_mode_reg (mode2, op2);
20286 if (target == 0
20287 || GET_MODE (target) != tmode
20288 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20289 target = gen_reg_rtx (tmode);
20290 pat = GEN_FCN (icode) (target, op0, op1, op2);
20291 if (! pat)
20292 return 0;
20293 emit_insn (pat);
20294 return target;
20296 case ARM_BUILTIN_WZERO:
20297 target = gen_reg_rtx (DImode);
20298 emit_insn (gen_iwmmxt_clrdi (target));
20299 return target;
20301 case ARM_BUILTIN_THREAD_POINTER:
20302 return arm_load_tp (target);
20304 default:
20305 break;
20308 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20309 if (d->code == (const enum arm_builtins) fcode)
20310 return arm_expand_binop_builtin (d->icode, exp, target);
20312 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
20313 if (d->code == (const enum arm_builtins) fcode)
20314 return arm_expand_unop_builtin (d->icode, exp, target, 0);
20316 /* @@@ Should really do something sensible here. */
20317 return NULL_RTX;
20320 /* Return the number (counting from 0) of
20321 the least significant set bit in MASK. */
20323 inline static int
20324 number_of_first_bit_set (unsigned mask)
20326 return ctz_hwi (mask);
20329 /* Like emit_multi_reg_push, but allowing for a different set of
20330 registers to be described as saved. MASK is the set of registers
20331 to be saved; REAL_REGS is the set of registers to be described as
20332 saved. If REAL_REGS is 0, only describe the stack adjustment. */
20334 static rtx
20335 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
20337 unsigned long regno;
20338 rtx par[10], tmp, reg, insn;
20339 int i, j;
20341 /* Build the parallel of the registers actually being stored. */
20342 for (i = 0; mask; ++i, mask &= mask - 1)
20344 regno = ctz_hwi (mask);
20345 reg = gen_rtx_REG (SImode, regno);
20347 if (i == 0)
20348 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
20349 else
20350 tmp = gen_rtx_USE (VOIDmode, reg);
20352 par[i] = tmp;
20355 tmp = plus_constant (stack_pointer_rtx, -4 * i);
20356 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20357 tmp = gen_frame_mem (BLKmode, tmp);
20358 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
20359 par[0] = tmp;
20361 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
20362 insn = emit_insn (tmp);
20364 /* Always build the stack adjustment note for unwind info. */
20365 tmp = plus_constant (stack_pointer_rtx, -4 * i);
20366 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
20367 par[0] = tmp;
20369 /* Build the parallel of the registers recorded as saved for unwind. */
20370 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
20372 regno = ctz_hwi (real_regs);
20373 reg = gen_rtx_REG (SImode, regno);
20375 tmp = plus_constant (stack_pointer_rtx, j * 4);
20376 tmp = gen_frame_mem (SImode, tmp);
20377 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
20378 RTX_FRAME_RELATED_P (tmp) = 1;
20379 par[j + 1] = tmp;
20382 if (j == 0)
20383 tmp = par[0];
20384 else
20386 RTX_FRAME_RELATED_P (par[0]) = 1;
20387 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
20390 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
20392 return insn;
20395 /* Emit code to push or pop registers to or from the stack. F is the
20396 assembly file. MASK is the registers to pop. */
20397 static void
20398 thumb_pop (FILE *f, unsigned long mask)
20400 int regno;
20401 int lo_mask = mask & 0xFF;
20402 int pushed_words = 0;
20404 gcc_assert (mask);
20406 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
20408 /* Special case. Do not generate a POP PC statement here, do it in
20409 thumb_exit() */
20410 thumb_exit (f, -1);
20411 return;
20414 fprintf (f, "\tpop\t{");
20416 /* Look at the low registers first. */
20417 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
20419 if (lo_mask & 1)
20421 asm_fprintf (f, "%r", regno);
20423 if ((lo_mask & ~1) != 0)
20424 fprintf (f, ", ");
20426 pushed_words++;
20430 if (mask & (1 << PC_REGNUM))
20432 /* Catch popping the PC. */
20433 if (TARGET_INTERWORK || TARGET_BACKTRACE
20434 || crtl->calls_eh_return)
20436 /* The PC is never poped directly, instead
20437 it is popped into r3 and then BX is used. */
20438 fprintf (f, "}\n");
20440 thumb_exit (f, -1);
20442 return;
20444 else
20446 if (mask & 0xFF)
20447 fprintf (f, ", ");
20449 asm_fprintf (f, "%r", PC_REGNUM);
20453 fprintf (f, "}\n");
20456 /* Generate code to return from a thumb function.
20457 If 'reg_containing_return_addr' is -1, then the return address is
20458 actually on the stack, at the stack pointer. */
20459 static void
20460 thumb_exit (FILE *f, int reg_containing_return_addr)
20462 unsigned regs_available_for_popping;
20463 unsigned regs_to_pop;
20464 int pops_needed;
20465 unsigned available;
20466 unsigned required;
20467 int mode;
20468 int size;
20469 int restore_a4 = FALSE;
20471 /* Compute the registers we need to pop. */
20472 regs_to_pop = 0;
20473 pops_needed = 0;
20475 if (reg_containing_return_addr == -1)
20477 regs_to_pop |= 1 << LR_REGNUM;
20478 ++pops_needed;
20481 if (TARGET_BACKTRACE)
20483 /* Restore the (ARM) frame pointer and stack pointer. */
20484 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20485 pops_needed += 2;
20488 /* If there is nothing to pop then just emit the BX instruction and
20489 return. */
20490 if (pops_needed == 0)
20492 if (crtl->calls_eh_return)
20493 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20495 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20496 return;
20498 /* Otherwise if we are not supporting interworking and we have not created
20499 a backtrace structure and the function was not entered in ARM mode then
20500 just pop the return address straight into the PC. */
20501 else if (!TARGET_INTERWORK
20502 && !TARGET_BACKTRACE
20503 && !is_called_in_ARM_mode (current_function_decl)
20504 && !crtl->calls_eh_return)
20506 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20507 return;
20510 /* Find out how many of the (return) argument registers we can corrupt. */
20511 regs_available_for_popping = 0;
20513 /* If returning via __builtin_eh_return, the bottom three registers
20514 all contain information needed for the return. */
20515 if (crtl->calls_eh_return)
20516 size = 12;
20517 else
20519 /* If we can deduce the registers used from the function's
20520 return value. This is more reliable that examining
20521 df_regs_ever_live_p () because that will be set if the register is
20522 ever used in the function, not just if the register is used
20523 to hold a return value. */
20525 if (crtl->return_rtx != 0)
20526 mode = GET_MODE (crtl->return_rtx);
20527 else
20528 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20530 size = GET_MODE_SIZE (mode);
20532 if (size == 0)
20534 /* In a void function we can use any argument register.
20535 In a function that returns a structure on the stack
20536 we can use the second and third argument registers. */
20537 if (mode == VOIDmode)
20538 regs_available_for_popping =
20539 (1 << ARG_REGISTER (1))
20540 | (1 << ARG_REGISTER (2))
20541 | (1 << ARG_REGISTER (3));
20542 else
20543 regs_available_for_popping =
20544 (1 << ARG_REGISTER (2))
20545 | (1 << ARG_REGISTER (3));
20547 else if (size <= 4)
20548 regs_available_for_popping =
20549 (1 << ARG_REGISTER (2))
20550 | (1 << ARG_REGISTER (3));
20551 else if (size <= 8)
20552 regs_available_for_popping =
20553 (1 << ARG_REGISTER (3));
20556 /* Match registers to be popped with registers into which we pop them. */
20557 for (available = regs_available_for_popping,
20558 required = regs_to_pop;
20559 required != 0 && available != 0;
20560 available &= ~(available & - available),
20561 required &= ~(required & - required))
20562 -- pops_needed;
20564 /* If we have any popping registers left over, remove them. */
20565 if (available > 0)
20566 regs_available_for_popping &= ~available;
20568 /* Otherwise if we need another popping register we can use
20569 the fourth argument register. */
20570 else if (pops_needed)
20572 /* If we have not found any free argument registers and
20573 reg a4 contains the return address, we must move it. */
20574 if (regs_available_for_popping == 0
20575 && reg_containing_return_addr == LAST_ARG_REGNUM)
20577 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20578 reg_containing_return_addr = LR_REGNUM;
20580 else if (size > 12)
20582 /* Register a4 is being used to hold part of the return value,
20583 but we have dire need of a free, low register. */
20584 restore_a4 = TRUE;
20586 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20589 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20591 /* The fourth argument register is available. */
20592 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20594 --pops_needed;
20598 /* Pop as many registers as we can. */
20599 thumb_pop (f, regs_available_for_popping);
20601 /* Process the registers we popped. */
20602 if (reg_containing_return_addr == -1)
20604 /* The return address was popped into the lowest numbered register. */
20605 regs_to_pop &= ~(1 << LR_REGNUM);
20607 reg_containing_return_addr =
20608 number_of_first_bit_set (regs_available_for_popping);
20610 /* Remove this register for the mask of available registers, so that
20611 the return address will not be corrupted by further pops. */
20612 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20615 /* If we popped other registers then handle them here. */
20616 if (regs_available_for_popping)
20618 int frame_pointer;
20620 /* Work out which register currently contains the frame pointer. */
20621 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20623 /* Move it into the correct place. */
20624 asm_fprintf (f, "\tmov\t%r, %r\n",
20625 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20627 /* (Temporarily) remove it from the mask of popped registers. */
20628 regs_available_for_popping &= ~(1 << frame_pointer);
20629 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20631 if (regs_available_for_popping)
20633 int stack_pointer;
20635 /* We popped the stack pointer as well,
20636 find the register that contains it. */
20637 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20639 /* Move it into the stack register. */
20640 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20642 /* At this point we have popped all necessary registers, so
20643 do not worry about restoring regs_available_for_popping
20644 to its correct value:
20646 assert (pops_needed == 0)
20647 assert (regs_available_for_popping == (1 << frame_pointer))
20648 assert (regs_to_pop == (1 << STACK_POINTER)) */
20650 else
20652 /* Since we have just move the popped value into the frame
20653 pointer, the popping register is available for reuse, and
20654 we know that we still have the stack pointer left to pop. */
20655 regs_available_for_popping |= (1 << frame_pointer);
20659 /* If we still have registers left on the stack, but we no longer have
20660 any registers into which we can pop them, then we must move the return
20661 address into the link register and make available the register that
20662 contained it. */
20663 if (regs_available_for_popping == 0 && pops_needed > 0)
20665 regs_available_for_popping |= 1 << reg_containing_return_addr;
20667 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20668 reg_containing_return_addr);
20670 reg_containing_return_addr = LR_REGNUM;
20673 /* If we have registers left on the stack then pop some more.
20674 We know that at most we will want to pop FP and SP. */
20675 if (pops_needed > 0)
20677 int popped_into;
20678 int move_to;
20680 thumb_pop (f, regs_available_for_popping);
20682 /* We have popped either FP or SP.
20683 Move whichever one it is into the correct register. */
20684 popped_into = number_of_first_bit_set (regs_available_for_popping);
20685 move_to = number_of_first_bit_set (regs_to_pop);
20687 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20689 regs_to_pop &= ~(1 << move_to);
20691 --pops_needed;
20694 /* If we still have not popped everything then we must have only
20695 had one register available to us and we are now popping the SP. */
20696 if (pops_needed > 0)
20698 int popped_into;
20700 thumb_pop (f, regs_available_for_popping);
20702 popped_into = number_of_first_bit_set (regs_available_for_popping);
20704 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20706 assert (regs_to_pop == (1 << STACK_POINTER))
20707 assert (pops_needed == 1)
20711 /* If necessary restore the a4 register. */
20712 if (restore_a4)
20714 if (reg_containing_return_addr != LR_REGNUM)
20716 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20717 reg_containing_return_addr = LR_REGNUM;
20720 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20723 if (crtl->calls_eh_return)
20724 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20726 /* Return to caller. */
20727 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20730 /* Scan INSN just before assembler is output for it.
20731 For Thumb-1, we track the status of the condition codes; this
20732 information is used in the cbranchsi4_insn pattern. */
20733 void
20734 thumb1_final_prescan_insn (rtx insn)
20736 if (flag_print_asm_name)
20737 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20738 INSN_ADDRESSES (INSN_UID (insn)));
20739 /* Don't overwrite the previous setter when we get to a cbranch. */
20740 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20742 enum attr_conds conds;
20744 if (cfun->machine->thumb1_cc_insn)
20746 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20747 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20748 CC_STATUS_INIT;
20750 conds = get_attr_conds (insn);
20751 if (conds == CONDS_SET)
20753 rtx set = single_set (insn);
20754 cfun->machine->thumb1_cc_insn = insn;
20755 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20756 cfun->machine->thumb1_cc_op1 = const0_rtx;
20757 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20758 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20760 rtx src1 = XEXP (SET_SRC (set), 1);
20761 if (src1 == const0_rtx)
20762 cfun->machine->thumb1_cc_mode = CCmode;
20765 else if (conds != CONDS_NOCOND)
20766 cfun->machine->thumb1_cc_insn = NULL_RTX;
20771 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20773 unsigned HOST_WIDE_INT mask = 0xff;
20774 int i;
20776 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20777 if (val == 0) /* XXX */
20778 return 0;
20780 for (i = 0; i < 25; i++)
20781 if ((val & (mask << i)) == val)
20782 return 1;
20784 return 0;
20787 /* Returns nonzero if the current function contains,
20788 or might contain a far jump. */
20789 static int
20790 thumb_far_jump_used_p (void)
20792 rtx insn;
20794 /* This test is only important for leaf functions. */
20795 /* assert (!leaf_function_p ()); */
20797 /* If we have already decided that far jumps may be used,
20798 do not bother checking again, and always return true even if
20799 it turns out that they are not being used. Once we have made
20800 the decision that far jumps are present (and that hence the link
20801 register will be pushed onto the stack) we cannot go back on it. */
20802 if (cfun->machine->far_jump_used)
20803 return 1;
20805 /* If this function is not being called from the prologue/epilogue
20806 generation code then it must be being called from the
20807 INITIAL_ELIMINATION_OFFSET macro. */
20808 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20810 /* In this case we know that we are being asked about the elimination
20811 of the arg pointer register. If that register is not being used,
20812 then there are no arguments on the stack, and we do not have to
20813 worry that a far jump might force the prologue to push the link
20814 register, changing the stack offsets. In this case we can just
20815 return false, since the presence of far jumps in the function will
20816 not affect stack offsets.
20818 If the arg pointer is live (or if it was live, but has now been
20819 eliminated and so set to dead) then we do have to test to see if
20820 the function might contain a far jump. This test can lead to some
20821 false negatives, since before reload is completed, then length of
20822 branch instructions is not known, so gcc defaults to returning their
20823 longest length, which in turn sets the far jump attribute to true.
20825 A false negative will not result in bad code being generated, but it
20826 will result in a needless push and pop of the link register. We
20827 hope that this does not occur too often.
20829 If we need doubleword stack alignment this could affect the other
20830 elimination offsets so we can't risk getting it wrong. */
20831 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20832 cfun->machine->arg_pointer_live = 1;
20833 else if (!cfun->machine->arg_pointer_live)
20834 return 0;
20837 /* Check to see if the function contains a branch
20838 insn with the far jump attribute set. */
20839 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20841 if (GET_CODE (insn) == JUMP_INSN
20842 /* Ignore tablejump patterns. */
20843 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20844 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20845 && get_attr_far_jump (insn) == FAR_JUMP_YES
20848 /* Record the fact that we have decided that
20849 the function does use far jumps. */
20850 cfun->machine->far_jump_used = 1;
20851 return 1;
20855 return 0;
20858 /* Return nonzero if FUNC must be entered in ARM mode. */
20860 is_called_in_ARM_mode (tree func)
20862 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20864 /* Ignore the problem about functions whose address is taken. */
20865 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20866 return TRUE;
20868 #ifdef ARM_PE
20869 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20870 #else
20871 return FALSE;
20872 #endif
20875 /* Given the stack offsets and register mask in OFFSETS, decide how
20876 many additional registers to push instead of subtracting a constant
20877 from SP. For epilogues the principle is the same except we use pop.
20878 FOR_PROLOGUE indicates which we're generating. */
20879 static int
20880 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20882 HOST_WIDE_INT amount;
20883 unsigned long live_regs_mask = offsets->saved_regs_mask;
20884 /* Extract a mask of the ones we can give to the Thumb's push/pop
20885 instruction. */
20886 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20887 /* Then count how many other high registers will need to be pushed. */
20888 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20889 int n_free, reg_base;
20891 if (!for_prologue && frame_pointer_needed)
20892 amount = offsets->locals_base - offsets->saved_regs;
20893 else
20894 amount = offsets->outgoing_args - offsets->saved_regs;
20896 /* If the stack frame size is 512 exactly, we can save one load
20897 instruction, which should make this a win even when optimizing
20898 for speed. */
20899 if (!optimize_size && amount != 512)
20900 return 0;
20902 /* Can't do this if there are high registers to push. */
20903 if (high_regs_pushed != 0)
20904 return 0;
20906 /* Shouldn't do it in the prologue if no registers would normally
20907 be pushed at all. In the epilogue, also allow it if we'll have
20908 a pop insn for the PC. */
20909 if (l_mask == 0
20910 && (for_prologue
20911 || TARGET_BACKTRACE
20912 || (live_regs_mask & 1 << LR_REGNUM) == 0
20913 || TARGET_INTERWORK
20914 || crtl->args.pretend_args_size != 0))
20915 return 0;
20917 /* Don't do this if thumb_expand_prologue wants to emit instructions
20918 between the push and the stack frame allocation. */
20919 if (for_prologue
20920 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20921 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20922 return 0;
20924 reg_base = 0;
20925 n_free = 0;
20926 if (!for_prologue)
20928 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20929 live_regs_mask >>= reg_base;
20932 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20933 && (for_prologue || call_used_regs[reg_base + n_free]))
20935 live_regs_mask >>= 1;
20936 n_free++;
20939 if (n_free == 0)
20940 return 0;
20941 gcc_assert (amount / 4 * 4 == amount);
20943 if (amount >= 512 && (amount - n_free * 4) < 512)
20944 return (amount - 508) / 4;
20945 if (amount <= n_free * 4)
20946 return amount / 4;
20947 return 0;
20950 /* The bits which aren't usefully expanded as rtl. */
20951 const char *
20952 thumb_unexpanded_epilogue (void)
20954 arm_stack_offsets *offsets;
20955 int regno;
20956 unsigned long live_regs_mask = 0;
20957 int high_regs_pushed = 0;
20958 int extra_pop;
20959 int had_to_push_lr;
20960 int size;
20962 if (cfun->machine->return_used_this_function != 0)
20963 return "";
20965 if (IS_NAKED (arm_current_func_type ()))
20966 return "";
20968 offsets = arm_get_frame_offsets ();
20969 live_regs_mask = offsets->saved_regs_mask;
20970 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20972 /* If we can deduce the registers used from the function's return value.
20973 This is more reliable that examining df_regs_ever_live_p () because that
20974 will be set if the register is ever used in the function, not just if
20975 the register is used to hold a return value. */
20976 size = arm_size_return_regs ();
20978 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20979 if (extra_pop > 0)
20981 unsigned long extra_mask = (1 << extra_pop) - 1;
20982 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20985 /* The prolog may have pushed some high registers to use as
20986 work registers. e.g. the testsuite file:
20987 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20988 compiles to produce:
20989 push {r4, r5, r6, r7, lr}
20990 mov r7, r9
20991 mov r6, r8
20992 push {r6, r7}
20993 as part of the prolog. We have to undo that pushing here. */
20995 if (high_regs_pushed)
20997 unsigned long mask = live_regs_mask & 0xff;
20998 int next_hi_reg;
21000 /* The available low registers depend on the size of the value we are
21001 returning. */
21002 if (size <= 12)
21003 mask |= 1 << 3;
21004 if (size <= 8)
21005 mask |= 1 << 2;
21007 if (mask == 0)
21008 /* Oh dear! We have no low registers into which we can pop
21009 high registers! */
21010 internal_error
21011 ("no low registers available for popping high registers");
21013 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21014 if (live_regs_mask & (1 << next_hi_reg))
21015 break;
21017 while (high_regs_pushed)
21019 /* Find lo register(s) into which the high register(s) can
21020 be popped. */
21021 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21023 if (mask & (1 << regno))
21024 high_regs_pushed--;
21025 if (high_regs_pushed == 0)
21026 break;
21029 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21031 /* Pop the values into the low register(s). */
21032 thumb_pop (asm_out_file, mask);
21034 /* Move the value(s) into the high registers. */
21035 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21037 if (mask & (1 << regno))
21039 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21040 regno);
21042 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21043 if (live_regs_mask & (1 << next_hi_reg))
21044 break;
21048 live_regs_mask &= ~0x0f00;
21051 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21052 live_regs_mask &= 0xff;
21054 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21056 /* Pop the return address into the PC. */
21057 if (had_to_push_lr)
21058 live_regs_mask |= 1 << PC_REGNUM;
21060 /* Either no argument registers were pushed or a backtrace
21061 structure was created which includes an adjusted stack
21062 pointer, so just pop everything. */
21063 if (live_regs_mask)
21064 thumb_pop (asm_out_file, live_regs_mask);
21066 /* We have either just popped the return address into the
21067 PC or it is was kept in LR for the entire function.
21068 Note that thumb_pop has already called thumb_exit if the
21069 PC was in the list. */
21070 if (!had_to_push_lr)
21071 thumb_exit (asm_out_file, LR_REGNUM);
21073 else
21075 /* Pop everything but the return address. */
21076 if (live_regs_mask)
21077 thumb_pop (asm_out_file, live_regs_mask);
21079 if (had_to_push_lr)
21081 if (size > 12)
21083 /* We have no free low regs, so save one. */
21084 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21085 LAST_ARG_REGNUM);
21088 /* Get the return address into a temporary register. */
21089 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21091 if (size > 12)
21093 /* Move the return address to lr. */
21094 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21095 LAST_ARG_REGNUM);
21096 /* Restore the low register. */
21097 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21098 IP_REGNUM);
21099 regno = LR_REGNUM;
21101 else
21102 regno = LAST_ARG_REGNUM;
21104 else
21105 regno = LR_REGNUM;
21107 /* Remove the argument registers that were pushed onto the stack. */
21108 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21109 SP_REGNUM, SP_REGNUM,
21110 crtl->args.pretend_args_size);
21112 thumb_exit (asm_out_file, regno);
21115 return "";
21118 /* Functions to save and restore machine-specific function data. */
21119 static struct machine_function *
21120 arm_init_machine_status (void)
21122 struct machine_function *machine;
21123 machine = ggc_alloc_cleared_machine_function ();
21125 #if ARM_FT_UNKNOWN != 0
21126 machine->func_type = ARM_FT_UNKNOWN;
21127 #endif
21128 return machine;
21131 /* Return an RTX indicating where the return address to the
21132 calling function can be found. */
21134 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21136 if (count != 0)
21137 return NULL_RTX;
21139 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
21142 /* Do anything needed before RTL is emitted for each function. */
21143 void
21144 arm_init_expanders (void)
21146 /* Arrange to initialize and mark the machine per-function status. */
21147 init_machine_status = arm_init_machine_status;
21149 /* This is to stop the combine pass optimizing away the alignment
21150 adjustment of va_arg. */
21151 /* ??? It is claimed that this should not be necessary. */
21152 if (cfun)
21153 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
21157 /* Like arm_compute_initial_elimination offset. Simpler because there
21158 isn't an ABI specified frame pointer for Thumb. Instead, we set it
21159 to point at the base of the local variables after static stack
21160 space for a function has been allocated. */
21162 HOST_WIDE_INT
21163 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21165 arm_stack_offsets *offsets;
21167 offsets = arm_get_frame_offsets ();
21169 switch (from)
21171 case ARG_POINTER_REGNUM:
21172 switch (to)
21174 case STACK_POINTER_REGNUM:
21175 return offsets->outgoing_args - offsets->saved_args;
21177 case FRAME_POINTER_REGNUM:
21178 return offsets->soft_frame - offsets->saved_args;
21180 case ARM_HARD_FRAME_POINTER_REGNUM:
21181 return offsets->saved_regs - offsets->saved_args;
21183 case THUMB_HARD_FRAME_POINTER_REGNUM:
21184 return offsets->locals_base - offsets->saved_args;
21186 default:
21187 gcc_unreachable ();
21189 break;
21191 case FRAME_POINTER_REGNUM:
21192 switch (to)
21194 case STACK_POINTER_REGNUM:
21195 return offsets->outgoing_args - offsets->soft_frame;
21197 case ARM_HARD_FRAME_POINTER_REGNUM:
21198 return offsets->saved_regs - offsets->soft_frame;
21200 case THUMB_HARD_FRAME_POINTER_REGNUM:
21201 return offsets->locals_base - offsets->soft_frame;
21203 default:
21204 gcc_unreachable ();
21206 break;
21208 default:
21209 gcc_unreachable ();
21213 /* Generate the function's prologue. */
21215 void
21216 thumb1_expand_prologue (void)
21218 rtx insn;
21220 HOST_WIDE_INT amount;
21221 arm_stack_offsets *offsets;
21222 unsigned long func_type;
21223 int regno;
21224 unsigned long live_regs_mask;
21225 unsigned long l_mask;
21226 unsigned high_regs_pushed = 0;
21228 func_type = arm_current_func_type ();
21230 /* Naked functions don't have prologues. */
21231 if (IS_NAKED (func_type))
21232 return;
21234 if (IS_INTERRUPT (func_type))
21236 error ("interrupt Service Routines cannot be coded in Thumb mode");
21237 return;
21240 if (is_called_in_ARM_mode (current_function_decl))
21241 emit_insn (gen_prologue_thumb1_interwork ());
21243 offsets = arm_get_frame_offsets ();
21244 live_regs_mask = offsets->saved_regs_mask;
21246 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21247 l_mask = live_regs_mask & 0x40ff;
21248 /* Then count how many other high registers will need to be pushed. */
21249 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21251 if (crtl->args.pretend_args_size)
21253 rtx x = GEN_INT (-crtl->args.pretend_args_size);
21255 if (cfun->machine->uses_anonymous_args)
21257 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21258 unsigned long mask;
21260 mask = 1ul << (LAST_ARG_REGNUM + 1);
21261 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
21263 insn = thumb1_emit_multi_reg_push (mask, 0);
21265 else
21267 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21268 stack_pointer_rtx, x));
21270 RTX_FRAME_RELATED_P (insn) = 1;
21273 if (TARGET_BACKTRACE)
21275 HOST_WIDE_INT offset = 0;
21276 unsigned work_register;
21277 rtx work_reg, x, arm_hfp_rtx;
21279 /* We have been asked to create a stack backtrace structure.
21280 The code looks like this:
21282 0 .align 2
21283 0 func:
21284 0 sub SP, #16 Reserve space for 4 registers.
21285 2 push {R7} Push low registers.
21286 4 add R7, SP, #20 Get the stack pointer before the push.
21287 6 str R7, [SP, #8] Store the stack pointer
21288 (before reserving the space).
21289 8 mov R7, PC Get hold of the start of this code + 12.
21290 10 str R7, [SP, #16] Store it.
21291 12 mov R7, FP Get hold of the current frame pointer.
21292 14 str R7, [SP, #4] Store it.
21293 16 mov R7, LR Get hold of the current return address.
21294 18 str R7, [SP, #12] Store it.
21295 20 add R7, SP, #16 Point at the start of the
21296 backtrace structure.
21297 22 mov FP, R7 Put this value into the frame pointer. */
21299 work_register = thumb_find_work_register (live_regs_mask);
21300 work_reg = gen_rtx_REG (SImode, work_register);
21301 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
21303 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21304 stack_pointer_rtx, GEN_INT (-16)));
21305 RTX_FRAME_RELATED_P (insn) = 1;
21307 if (l_mask)
21309 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
21310 RTX_FRAME_RELATED_P (insn) = 1;
21312 offset = bit_count (l_mask) * UNITS_PER_WORD;
21315 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
21316 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
21318 x = plus_constant (stack_pointer_rtx, offset + 4);
21319 x = gen_frame_mem (SImode, x);
21320 emit_move_insn (x, work_reg);
21322 /* Make sure that the instruction fetching the PC is in the right place
21323 to calculate "start of backtrace creation code + 12". */
21324 /* ??? The stores using the common WORK_REG ought to be enough to
21325 prevent the scheduler from doing anything weird. Failing that
21326 we could always move all of the following into an UNSPEC_VOLATILE. */
21327 if (l_mask)
21329 x = gen_rtx_REG (SImode, PC_REGNUM);
21330 emit_move_insn (work_reg, x);
21332 x = plus_constant (stack_pointer_rtx, offset + 12);
21333 x = gen_frame_mem (SImode, x);
21334 emit_move_insn (x, work_reg);
21336 emit_move_insn (work_reg, arm_hfp_rtx);
21338 x = plus_constant (stack_pointer_rtx, offset);
21339 x = gen_frame_mem (SImode, x);
21340 emit_move_insn (x, work_reg);
21342 else
21344 emit_move_insn (work_reg, arm_hfp_rtx);
21346 x = plus_constant (stack_pointer_rtx, offset);
21347 x = gen_frame_mem (SImode, x);
21348 emit_move_insn (x, work_reg);
21350 x = gen_rtx_REG (SImode, PC_REGNUM);
21351 emit_move_insn (work_reg, x);
21353 x = plus_constant (stack_pointer_rtx, offset + 12);
21354 x = gen_frame_mem (SImode, x);
21355 emit_move_insn (x, work_reg);
21358 x = gen_rtx_REG (SImode, LR_REGNUM);
21359 emit_move_insn (work_reg, x);
21361 x = plus_constant (stack_pointer_rtx, offset + 8);
21362 x = gen_frame_mem (SImode, x);
21363 emit_move_insn (x, work_reg);
21365 x = GEN_INT (offset + 12);
21366 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
21368 emit_move_insn (arm_hfp_rtx, work_reg);
21370 /* Optimization: If we are not pushing any low registers but we are going
21371 to push some high registers then delay our first push. This will just
21372 be a push of LR and we can combine it with the push of the first high
21373 register. */
21374 else if ((l_mask & 0xff) != 0
21375 || (high_regs_pushed == 0 && l_mask))
21377 unsigned long mask = l_mask;
21378 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21379 insn = thumb1_emit_multi_reg_push (mask, mask);
21380 RTX_FRAME_RELATED_P (insn) = 1;
21383 if (high_regs_pushed)
21385 unsigned pushable_regs;
21386 unsigned next_hi_reg;
21388 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21389 if (live_regs_mask & (1 << next_hi_reg))
21390 break;
21392 pushable_regs = l_mask & 0xff;
21394 if (pushable_regs == 0)
21395 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21397 while (high_regs_pushed > 0)
21399 unsigned long real_regs_mask = 0;
21401 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21403 if (pushable_regs & (1 << regno))
21405 emit_move_insn (gen_rtx_REG (SImode, regno),
21406 gen_rtx_REG (SImode, next_hi_reg));
21408 high_regs_pushed --;
21409 real_regs_mask |= (1 << next_hi_reg);
21411 if (high_regs_pushed)
21413 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21414 next_hi_reg --)
21415 if (live_regs_mask & (1 << next_hi_reg))
21416 break;
21418 else
21420 pushable_regs &= ~((1 << regno) - 1);
21421 break;
21426 /* If we had to find a work register and we have not yet
21427 saved the LR then add it to the list of regs to push. */
21428 if (l_mask == (1 << LR_REGNUM))
21430 pushable_regs |= l_mask;
21431 real_regs_mask |= l_mask;
21432 l_mask = 0;
21435 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
21436 RTX_FRAME_RELATED_P (insn) = 1;
21440 /* Load the pic register before setting the frame pointer,
21441 so we can use r7 as a temporary work register. */
21442 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21443 arm_load_pic_register (live_regs_mask);
21445 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21446 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
21447 stack_pointer_rtx);
21449 if (flag_stack_usage_info)
21450 current_function_static_stack_size
21451 = offsets->outgoing_args - offsets->saved_args;
21453 amount = offsets->outgoing_args - offsets->saved_regs;
21454 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
21455 if (amount)
21457 if (amount < 512)
21459 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21460 GEN_INT (- amount)));
21461 RTX_FRAME_RELATED_P (insn) = 1;
21463 else
21465 rtx reg, dwarf;
21467 /* The stack decrement is too big for an immediate value in a single
21468 insn. In theory we could issue multiple subtracts, but after
21469 three of them it becomes more space efficient to place the full
21470 value in the constant pool and load into a register. (Also the
21471 ARM debugger really likes to see only one stack decrement per
21472 function). So instead we look for a scratch register into which
21473 we can load the decrement, and then we subtract this from the
21474 stack pointer. Unfortunately on the thumb the only available
21475 scratch registers are the argument registers, and we cannot use
21476 these as they may hold arguments to the function. Instead we
21477 attempt to locate a call preserved register which is used by this
21478 function. If we can find one, then we know that it will have
21479 been pushed at the start of the prologue and so we can corrupt
21480 it now. */
21481 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
21482 if (live_regs_mask & (1 << regno))
21483 break;
21485 gcc_assert(regno <= LAST_LO_REGNUM);
21487 reg = gen_rtx_REG (SImode, regno);
21489 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
21491 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21492 stack_pointer_rtx, reg));
21494 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21495 plus_constant (stack_pointer_rtx,
21496 -amount));
21497 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21498 RTX_FRAME_RELATED_P (insn) = 1;
21502 if (frame_pointer_needed)
21503 thumb_set_frame_pointer (offsets);
21505 /* If we are profiling, make sure no instructions are scheduled before
21506 the call to mcount. Similarly if the user has requested no
21507 scheduling in the prolog. Similarly if we want non-call exceptions
21508 using the EABI unwinder, to prevent faulting instructions from being
21509 swapped with a stack adjustment. */
21510 if (crtl->profile || !TARGET_SCHED_PROLOG
21511 || (arm_except_unwind_info (&global_options) == UI_TARGET
21512 && cfun->can_throw_non_call_exceptions))
21513 emit_insn (gen_blockage ());
21515 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
21516 if (live_regs_mask & 0xff)
21517 cfun->machine->lr_save_eliminated = 0;
21521 void
21522 thumb1_expand_epilogue (void)
21524 HOST_WIDE_INT amount;
21525 arm_stack_offsets *offsets;
21526 int regno;
21528 /* Naked functions don't have prologues. */
21529 if (IS_NAKED (arm_current_func_type ()))
21530 return;
21532 offsets = arm_get_frame_offsets ();
21533 amount = offsets->outgoing_args - offsets->saved_regs;
21535 if (frame_pointer_needed)
21537 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
21538 amount = offsets->locals_base - offsets->saved_regs;
21540 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
21542 gcc_assert (amount >= 0);
21543 if (amount)
21545 if (amount < 512)
21546 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21547 GEN_INT (amount)));
21548 else
21550 /* r3 is always free in the epilogue. */
21551 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
21553 emit_insn (gen_movsi (reg, GEN_INT (amount)));
21554 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
21558 /* Emit a USE (stack_pointer_rtx), so that
21559 the stack adjustment will not be deleted. */
21560 emit_insn (gen_prologue_use (stack_pointer_rtx));
21562 if (crtl->profile || !TARGET_SCHED_PROLOG)
21563 emit_insn (gen_blockage ());
21565 /* Emit a clobber for each insn that will be restored in the epilogue,
21566 so that flow2 will get register lifetimes correct. */
21567 for (regno = 0; regno < 13; regno++)
21568 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
21569 emit_clobber (gen_rtx_REG (SImode, regno));
21571 if (! df_regs_ever_live_p (LR_REGNUM))
21572 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
21575 /* Implementation of insn prologue_thumb1_interwork. This is the first
21576 "instruction" of a function called in ARM mode. Swap to thumb mode. */
21578 const char *
21579 thumb1_output_interwork (void)
21581 const char * name;
21582 FILE *f = asm_out_file;
21584 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
21585 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
21586 == SYMBOL_REF);
21587 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
21589 /* Generate code sequence to switch us into Thumb mode. */
21590 /* The .code 32 directive has already been emitted by
21591 ASM_DECLARE_FUNCTION_NAME. */
21592 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
21593 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
21595 /* Generate a label, so that the debugger will notice the
21596 change in instruction sets. This label is also used by
21597 the assembler to bypass the ARM code when this function
21598 is called from a Thumb encoded function elsewhere in the
21599 same file. Hence the definition of STUB_NAME here must
21600 agree with the definition in gas/config/tc-arm.c. */
21602 #define STUB_NAME ".real_start_of"
21604 fprintf (f, "\t.code\t16\n");
21605 #ifdef ARM_PE
21606 if (arm_dllexport_name_p (name))
21607 name = arm_strip_name_encoding (name);
21608 #endif
21609 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
21610 fprintf (f, "\t.thumb_func\n");
21611 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
21613 return "";
21616 /* Handle the case of a double word load into a low register from
21617 a computed memory address. The computed address may involve a
21618 register which is overwritten by the load. */
21619 const char *
21620 thumb_load_double_from_address (rtx *operands)
21622 rtx addr;
21623 rtx base;
21624 rtx offset;
21625 rtx arg1;
21626 rtx arg2;
21628 gcc_assert (GET_CODE (operands[0]) == REG);
21629 gcc_assert (GET_CODE (operands[1]) == MEM);
21631 /* Get the memory address. */
21632 addr = XEXP (operands[1], 0);
21634 /* Work out how the memory address is computed. */
21635 switch (GET_CODE (addr))
21637 case REG:
21638 operands[2] = adjust_address (operands[1], SImode, 4);
21640 if (REGNO (operands[0]) == REGNO (addr))
21642 output_asm_insn ("ldr\t%H0, %2", operands);
21643 output_asm_insn ("ldr\t%0, %1", operands);
21645 else
21647 output_asm_insn ("ldr\t%0, %1", operands);
21648 output_asm_insn ("ldr\t%H0, %2", operands);
21650 break;
21652 case CONST:
21653 /* Compute <address> + 4 for the high order load. */
21654 operands[2] = adjust_address (operands[1], SImode, 4);
21656 output_asm_insn ("ldr\t%0, %1", operands);
21657 output_asm_insn ("ldr\t%H0, %2", operands);
21658 break;
21660 case PLUS:
21661 arg1 = XEXP (addr, 0);
21662 arg2 = XEXP (addr, 1);
21664 if (CONSTANT_P (arg1))
21665 base = arg2, offset = arg1;
21666 else
21667 base = arg1, offset = arg2;
21669 gcc_assert (GET_CODE (base) == REG);
21671 /* Catch the case of <address> = <reg> + <reg> */
21672 if (GET_CODE (offset) == REG)
21674 int reg_offset = REGNO (offset);
21675 int reg_base = REGNO (base);
21676 int reg_dest = REGNO (operands[0]);
21678 /* Add the base and offset registers together into the
21679 higher destination register. */
21680 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21681 reg_dest + 1, reg_base, reg_offset);
21683 /* Load the lower destination register from the address in
21684 the higher destination register. */
21685 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21686 reg_dest, reg_dest + 1);
21688 /* Load the higher destination register from its own address
21689 plus 4. */
21690 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21691 reg_dest + 1, reg_dest + 1);
21693 else
21695 /* Compute <address> + 4 for the high order load. */
21696 operands[2] = adjust_address (operands[1], SImode, 4);
21698 /* If the computed address is held in the low order register
21699 then load the high order register first, otherwise always
21700 load the low order register first. */
21701 if (REGNO (operands[0]) == REGNO (base))
21703 output_asm_insn ("ldr\t%H0, %2", operands);
21704 output_asm_insn ("ldr\t%0, %1", operands);
21706 else
21708 output_asm_insn ("ldr\t%0, %1", operands);
21709 output_asm_insn ("ldr\t%H0, %2", operands);
21712 break;
21714 case LABEL_REF:
21715 /* With no registers to worry about we can just load the value
21716 directly. */
21717 operands[2] = adjust_address (operands[1], SImode, 4);
21719 output_asm_insn ("ldr\t%H0, %2", operands);
21720 output_asm_insn ("ldr\t%0, %1", operands);
21721 break;
21723 default:
21724 gcc_unreachable ();
21727 return "";
21730 const char *
21731 thumb_output_move_mem_multiple (int n, rtx *operands)
21733 rtx tmp;
21735 switch (n)
21737 case 2:
21738 if (REGNO (operands[4]) > REGNO (operands[5]))
21740 tmp = operands[4];
21741 operands[4] = operands[5];
21742 operands[5] = tmp;
21744 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21745 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21746 break;
21748 case 3:
21749 if (REGNO (operands[4]) > REGNO (operands[5]))
21751 tmp = operands[4];
21752 operands[4] = operands[5];
21753 operands[5] = tmp;
21755 if (REGNO (operands[5]) > REGNO (operands[6]))
21757 tmp = operands[5];
21758 operands[5] = operands[6];
21759 operands[6] = tmp;
21761 if (REGNO (operands[4]) > REGNO (operands[5]))
21763 tmp = operands[4];
21764 operands[4] = operands[5];
21765 operands[5] = tmp;
21768 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21769 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21770 break;
21772 default:
21773 gcc_unreachable ();
21776 return "";
21779 /* Output a call-via instruction for thumb state. */
21780 const char *
21781 thumb_call_via_reg (rtx reg)
21783 int regno = REGNO (reg);
21784 rtx *labelp;
21786 gcc_assert (regno < LR_REGNUM);
21788 /* If we are in the normal text section we can use a single instance
21789 per compilation unit. If we are doing function sections, then we need
21790 an entry per section, since we can't rely on reachability. */
21791 if (in_section == text_section)
21793 thumb_call_reg_needed = 1;
21795 if (thumb_call_via_label[regno] == NULL)
21796 thumb_call_via_label[regno] = gen_label_rtx ();
21797 labelp = thumb_call_via_label + regno;
21799 else
21801 if (cfun->machine->call_via[regno] == NULL)
21802 cfun->machine->call_via[regno] = gen_label_rtx ();
21803 labelp = cfun->machine->call_via + regno;
21806 output_asm_insn ("bl\t%a0", labelp);
21807 return "";
21810 /* Routines for generating rtl. */
21811 void
21812 thumb_expand_movmemqi (rtx *operands)
21814 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21815 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21816 HOST_WIDE_INT len = INTVAL (operands[2]);
21817 HOST_WIDE_INT offset = 0;
21819 while (len >= 12)
21821 emit_insn (gen_movmem12b (out, in, out, in));
21822 len -= 12;
21825 if (len >= 8)
21827 emit_insn (gen_movmem8b (out, in, out, in));
21828 len -= 8;
21831 if (len >= 4)
21833 rtx reg = gen_reg_rtx (SImode);
21834 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21835 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21836 len -= 4;
21837 offset += 4;
21840 if (len >= 2)
21842 rtx reg = gen_reg_rtx (HImode);
21843 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21844 plus_constant (in, offset))));
21845 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21846 reg));
21847 len -= 2;
21848 offset += 2;
21851 if (len)
21853 rtx reg = gen_reg_rtx (QImode);
21854 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21855 plus_constant (in, offset))));
21856 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21857 reg));
21861 void
21862 thumb_reload_out_hi (rtx *operands)
21864 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21867 /* Handle reading a half-word from memory during reload. */
21868 void
21869 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21871 gcc_unreachable ();
21874 /* Return the length of a function name prefix
21875 that starts with the character 'c'. */
21876 static int
21877 arm_get_strip_length (int c)
21879 switch (c)
21881 ARM_NAME_ENCODING_LENGTHS
21882 default: return 0;
21886 /* Return a pointer to a function's name with any
21887 and all prefix encodings stripped from it. */
21888 const char *
21889 arm_strip_name_encoding (const char *name)
21891 int skip;
21893 while ((skip = arm_get_strip_length (* name)))
21894 name += skip;
21896 return name;
21899 /* If there is a '*' anywhere in the name's prefix, then
21900 emit the stripped name verbatim, otherwise prepend an
21901 underscore if leading underscores are being used. */
21902 void
21903 arm_asm_output_labelref (FILE *stream, const char *name)
21905 int skip;
21906 int verbatim = 0;
21908 while ((skip = arm_get_strip_length (* name)))
21910 verbatim |= (*name == '*');
21911 name += skip;
21914 if (verbatim)
21915 fputs (name, stream);
21916 else
21917 asm_fprintf (stream, "%U%s", name);
21920 static void
21921 arm_file_start (void)
21923 int val;
21925 if (TARGET_UNIFIED_ASM)
21926 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21928 if (TARGET_BPABI)
21930 const char *fpu_name;
21931 if (arm_selected_arch)
21932 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21933 else
21934 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21936 if (TARGET_SOFT_FLOAT)
21938 if (TARGET_VFP)
21939 fpu_name = "softvfp";
21940 else
21941 fpu_name = "softfpa";
21943 else
21945 fpu_name = arm_fpu_desc->name;
21946 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21948 if (TARGET_HARD_FLOAT)
21949 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21950 if (TARGET_HARD_FLOAT_ABI)
21951 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21954 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21956 /* Some of these attributes only apply when the corresponding features
21957 are used. However we don't have any easy way of figuring this out.
21958 Conservatively record the setting that would have been used. */
21960 /* Tag_ABI_FP_rounding. */
21961 if (flag_rounding_math)
21962 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21963 if (!flag_unsafe_math_optimizations)
21965 /* Tag_ABI_FP_denomal. */
21966 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21967 /* Tag_ABI_FP_exceptions. */
21968 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21970 /* Tag_ABI_FP_user_exceptions. */
21971 if (flag_signaling_nans)
21972 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21973 /* Tag_ABI_FP_number_model. */
21974 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21975 flag_finite_math_only ? 1 : 3);
21977 /* Tag_ABI_align8_needed. */
21978 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21979 /* Tag_ABI_align8_preserved. */
21980 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21981 /* Tag_ABI_enum_size. */
21982 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21983 flag_short_enums ? 1 : 2);
21985 /* Tag_ABI_optimization_goals. */
21986 if (optimize_size)
21987 val = 4;
21988 else if (optimize >= 2)
21989 val = 2;
21990 else if (optimize)
21991 val = 1;
21992 else
21993 val = 6;
21994 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21996 /* Tag_ABI_FP_16bit_format. */
21997 if (arm_fp16_format)
21998 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21999 (int)arm_fp16_format);
22001 if (arm_lang_output_object_attributes_hook)
22002 arm_lang_output_object_attributes_hook();
22004 default_file_start();
22007 static void
22008 arm_file_end (void)
22010 int regno;
22012 if (NEED_INDICATE_EXEC_STACK)
22013 /* Add .note.GNU-stack. */
22014 file_end_indicate_exec_stack ();
22016 if (! thumb_call_reg_needed)
22017 return;
22019 switch_to_section (text_section);
22020 asm_fprintf (asm_out_file, "\t.code 16\n");
22021 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22023 for (regno = 0; regno < LR_REGNUM; regno++)
22025 rtx label = thumb_call_via_label[regno];
22027 if (label != 0)
22029 targetm.asm_out.internal_label (asm_out_file, "L",
22030 CODE_LABEL_NUMBER (label));
22031 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22036 #ifndef ARM_PE
22037 /* Symbols in the text segment can be accessed without indirecting via the
22038 constant pool; it may take an extra binary operation, but this is still
22039 faster than indirecting via memory. Don't do this when not optimizing,
22040 since we won't be calculating al of the offsets necessary to do this
22041 simplification. */
22043 static void
22044 arm_encode_section_info (tree decl, rtx rtl, int first)
22046 if (optimize > 0 && TREE_CONSTANT (decl))
22047 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22049 default_encode_section_info (decl, rtl, first);
22051 #endif /* !ARM_PE */
22053 static void
22054 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22056 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22057 && !strcmp (prefix, "L"))
22059 arm_ccfsm_state = 0;
22060 arm_target_insn = NULL;
22062 default_internal_label (stream, prefix, labelno);
22065 /* Output code to add DELTA to the first argument, and then jump
22066 to FUNCTION. Used for C++ multiple inheritance. */
22067 static void
22068 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22069 HOST_WIDE_INT delta,
22070 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22071 tree function)
22073 static int thunk_label = 0;
22074 char label[256];
22075 char labelpc[256];
22076 int mi_delta = delta;
22077 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22078 int shift = 0;
22079 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22080 ? 1 : 0);
22081 if (mi_delta < 0)
22082 mi_delta = - mi_delta;
22084 if (TARGET_THUMB1)
22086 int labelno = thunk_label++;
22087 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22088 /* Thunks are entered in arm mode when avaiable. */
22089 if (TARGET_THUMB1_ONLY)
22091 /* push r3 so we can use it as a temporary. */
22092 /* TODO: Omit this save if r3 is not used. */
22093 fputs ("\tpush {r3}\n", file);
22094 fputs ("\tldr\tr3, ", file);
22096 else
22098 fputs ("\tldr\tr12, ", file);
22100 assemble_name (file, label);
22101 fputc ('\n', file);
22102 if (flag_pic)
22104 /* If we are generating PIC, the ldr instruction below loads
22105 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22106 the address of the add + 8, so we have:
22108 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22109 = target + 1.
22111 Note that we have "+ 1" because some versions of GNU ld
22112 don't set the low bit of the result for R_ARM_REL32
22113 relocations against thumb function symbols.
22114 On ARMv6M this is +4, not +8. */
22115 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
22116 assemble_name (file, labelpc);
22117 fputs (":\n", file);
22118 if (TARGET_THUMB1_ONLY)
22120 /* This is 2 insns after the start of the thunk, so we know it
22121 is 4-byte aligned. */
22122 fputs ("\tadd\tr3, pc, r3\n", file);
22123 fputs ("\tmov r12, r3\n", file);
22125 else
22126 fputs ("\tadd\tr12, pc, r12\n", file);
22128 else if (TARGET_THUMB1_ONLY)
22129 fputs ("\tmov r12, r3\n", file);
22131 if (TARGET_THUMB1_ONLY)
22133 if (mi_delta > 255)
22135 fputs ("\tldr\tr3, ", file);
22136 assemble_name (file, label);
22137 fputs ("+4\n", file);
22138 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
22139 mi_op, this_regno, this_regno);
22141 else if (mi_delta != 0)
22143 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22144 mi_op, this_regno, this_regno,
22145 mi_delta);
22148 else
22150 /* TODO: Use movw/movt for large constants when available. */
22151 while (mi_delta != 0)
22153 if ((mi_delta & (3 << shift)) == 0)
22154 shift += 2;
22155 else
22157 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22158 mi_op, this_regno, this_regno,
22159 mi_delta & (0xff << shift));
22160 mi_delta &= ~(0xff << shift);
22161 shift += 8;
22165 if (TARGET_THUMB1)
22167 if (TARGET_THUMB1_ONLY)
22168 fputs ("\tpop\t{r3}\n", file);
22170 fprintf (file, "\tbx\tr12\n");
22171 ASM_OUTPUT_ALIGN (file, 2);
22172 assemble_name (file, label);
22173 fputs (":\n", file);
22174 if (flag_pic)
22176 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
22177 rtx tem = XEXP (DECL_RTL (function), 0);
22178 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
22179 tem = gen_rtx_MINUS (GET_MODE (tem),
22180 tem,
22181 gen_rtx_SYMBOL_REF (Pmode,
22182 ggc_strdup (labelpc)));
22183 assemble_integer (tem, 4, BITS_PER_WORD, 1);
22185 else
22186 /* Output ".word .LTHUNKn". */
22187 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
22189 if (TARGET_THUMB1_ONLY && mi_delta > 255)
22190 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
22192 else
22194 fputs ("\tb\t", file);
22195 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
22196 if (NEED_PLT_RELOC)
22197 fputs ("(PLT)", file);
22198 fputc ('\n', file);
22203 arm_emit_vector_const (FILE *file, rtx x)
22205 int i;
22206 const char * pattern;
22208 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22210 switch (GET_MODE (x))
22212 case V2SImode: pattern = "%08x"; break;
22213 case V4HImode: pattern = "%04x"; break;
22214 case V8QImode: pattern = "%02x"; break;
22215 default: gcc_unreachable ();
22218 fprintf (file, "0x");
22219 for (i = CONST_VECTOR_NUNITS (x); i--;)
22221 rtx element;
22223 element = CONST_VECTOR_ELT (x, i);
22224 fprintf (file, pattern, INTVAL (element));
22227 return 1;
22230 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
22231 HFmode constant pool entries are actually loaded with ldr. */
22232 void
22233 arm_emit_fp16_const (rtx c)
22235 REAL_VALUE_TYPE r;
22236 long bits;
22238 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
22239 bits = real_to_target (NULL, &r, HFmode);
22240 if (WORDS_BIG_ENDIAN)
22241 assemble_zeros (2);
22242 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
22243 if (!WORDS_BIG_ENDIAN)
22244 assemble_zeros (2);
22247 const char *
22248 arm_output_load_gr (rtx *operands)
22250 rtx reg;
22251 rtx offset;
22252 rtx wcgr;
22253 rtx sum;
22255 if (GET_CODE (operands [1]) != MEM
22256 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
22257 || GET_CODE (reg = XEXP (sum, 0)) != REG
22258 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
22259 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
22260 return "wldrw%?\t%0, %1";
22262 /* Fix up an out-of-range load of a GR register. */
22263 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
22264 wcgr = operands[0];
22265 operands[0] = reg;
22266 output_asm_insn ("ldr%?\t%0, %1", operands);
22268 operands[0] = wcgr;
22269 operands[1] = reg;
22270 output_asm_insn ("tmcr%?\t%0, %1", operands);
22271 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
22273 return "";
22276 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
22278 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
22279 named arg and all anonymous args onto the stack.
22280 XXX I know the prologue shouldn't be pushing registers, but it is faster
22281 that way. */
22283 static void
22284 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
22285 enum machine_mode mode,
22286 tree type,
22287 int *pretend_size,
22288 int second_time ATTRIBUTE_UNUSED)
22290 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
22291 int nregs;
22293 cfun->machine->uses_anonymous_args = 1;
22294 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
22296 nregs = pcum->aapcs_ncrn;
22297 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
22298 nregs++;
22300 else
22301 nregs = pcum->nregs;
22303 if (nregs < NUM_ARG_REGS)
22304 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
22307 /* Return nonzero if the CONSUMER instruction (a store) does not need
22308 PRODUCER's value to calculate the address. */
22311 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
22313 rtx value = PATTERN (producer);
22314 rtx addr = PATTERN (consumer);
22316 if (GET_CODE (value) == COND_EXEC)
22317 value = COND_EXEC_CODE (value);
22318 if (GET_CODE (value) == PARALLEL)
22319 value = XVECEXP (value, 0, 0);
22320 value = XEXP (value, 0);
22321 if (GET_CODE (addr) == COND_EXEC)
22322 addr = COND_EXEC_CODE (addr);
22323 if (GET_CODE (addr) == PARALLEL)
22324 addr = XVECEXP (addr, 0, 0);
22325 addr = XEXP (addr, 0);
22327 return !reg_overlap_mentioned_p (value, addr);
22330 /* Return nonzero if the CONSUMER instruction (a store) does need
22331 PRODUCER's value to calculate the address. */
22334 arm_early_store_addr_dep (rtx producer, rtx consumer)
22336 return !arm_no_early_store_addr_dep (producer, consumer);
22339 /* Return nonzero if the CONSUMER instruction (a load) does need
22340 PRODUCER's value to calculate the address. */
22343 arm_early_load_addr_dep (rtx producer, rtx consumer)
22345 rtx value = PATTERN (producer);
22346 rtx addr = PATTERN (consumer);
22348 if (GET_CODE (value) == COND_EXEC)
22349 value = COND_EXEC_CODE (value);
22350 if (GET_CODE (value) == PARALLEL)
22351 value = XVECEXP (value, 0, 0);
22352 value = XEXP (value, 0);
22353 if (GET_CODE (addr) == COND_EXEC)
22354 addr = COND_EXEC_CODE (addr);
22355 if (GET_CODE (addr) == PARALLEL)
22356 addr = XVECEXP (addr, 0, 0);
22357 addr = XEXP (addr, 1);
22359 return reg_overlap_mentioned_p (value, addr);
22362 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22363 have an early register shift value or amount dependency on the
22364 result of PRODUCER. */
22367 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
22369 rtx value = PATTERN (producer);
22370 rtx op = PATTERN (consumer);
22371 rtx early_op;
22373 if (GET_CODE (value) == COND_EXEC)
22374 value = COND_EXEC_CODE (value);
22375 if (GET_CODE (value) == PARALLEL)
22376 value = XVECEXP (value, 0, 0);
22377 value = XEXP (value, 0);
22378 if (GET_CODE (op) == COND_EXEC)
22379 op = COND_EXEC_CODE (op);
22380 if (GET_CODE (op) == PARALLEL)
22381 op = XVECEXP (op, 0, 0);
22382 op = XEXP (op, 1);
22384 early_op = XEXP (op, 0);
22385 /* This is either an actual independent shift, or a shift applied to
22386 the first operand of another operation. We want the whole shift
22387 operation. */
22388 if (GET_CODE (early_op) == REG)
22389 early_op = op;
22391 return !reg_overlap_mentioned_p (value, early_op);
22394 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22395 have an early register shift value dependency on the result of
22396 PRODUCER. */
22399 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
22401 rtx value = PATTERN (producer);
22402 rtx op = PATTERN (consumer);
22403 rtx early_op;
22405 if (GET_CODE (value) == COND_EXEC)
22406 value = COND_EXEC_CODE (value);
22407 if (GET_CODE (value) == PARALLEL)
22408 value = XVECEXP (value, 0, 0);
22409 value = XEXP (value, 0);
22410 if (GET_CODE (op) == COND_EXEC)
22411 op = COND_EXEC_CODE (op);
22412 if (GET_CODE (op) == PARALLEL)
22413 op = XVECEXP (op, 0, 0);
22414 op = XEXP (op, 1);
22416 early_op = XEXP (op, 0);
22418 /* This is either an actual independent shift, or a shift applied to
22419 the first operand of another operation. We want the value being
22420 shifted, in either case. */
22421 if (GET_CODE (early_op) != REG)
22422 early_op = XEXP (early_op, 0);
22424 return !reg_overlap_mentioned_p (value, early_op);
22427 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22428 have an early register mult dependency on the result of
22429 PRODUCER. */
22432 arm_no_early_mul_dep (rtx producer, rtx consumer)
22434 rtx value = PATTERN (producer);
22435 rtx op = PATTERN (consumer);
22437 if (GET_CODE (value) == COND_EXEC)
22438 value = COND_EXEC_CODE (value);
22439 if (GET_CODE (value) == PARALLEL)
22440 value = XVECEXP (value, 0, 0);
22441 value = XEXP (value, 0);
22442 if (GET_CODE (op) == COND_EXEC)
22443 op = COND_EXEC_CODE (op);
22444 if (GET_CODE (op) == PARALLEL)
22445 op = XVECEXP (op, 0, 0);
22446 op = XEXP (op, 1);
22448 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22450 if (GET_CODE (XEXP (op, 0)) == MULT)
22451 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22452 else
22453 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22456 return 0;
22459 /* We can't rely on the caller doing the proper promotion when
22460 using APCS or ATPCS. */
22462 static bool
22463 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22465 return !TARGET_AAPCS_BASED;
22468 static enum machine_mode
22469 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22470 enum machine_mode mode,
22471 int *punsignedp ATTRIBUTE_UNUSED,
22472 const_tree fntype ATTRIBUTE_UNUSED,
22473 int for_return ATTRIBUTE_UNUSED)
22475 if (GET_MODE_CLASS (mode) == MODE_INT
22476 && GET_MODE_SIZE (mode) < 4)
22477 return SImode;
22479 return mode;
22482 /* AAPCS based ABIs use short enums by default. */
22484 static bool
22485 arm_default_short_enums (void)
22487 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22491 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22493 static bool
22494 arm_align_anon_bitfield (void)
22496 return TARGET_AAPCS_BASED;
22500 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22502 static tree
22503 arm_cxx_guard_type (void)
22505 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22508 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22509 has an accumulator dependency on the result of the producer (a
22510 multiplication instruction) and no other dependency on that result. */
22512 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22514 rtx mul = PATTERN (producer);
22515 rtx mac = PATTERN (consumer);
22516 rtx mul_result;
22517 rtx mac_op0, mac_op1, mac_acc;
22519 if (GET_CODE (mul) == COND_EXEC)
22520 mul = COND_EXEC_CODE (mul);
22521 if (GET_CODE (mac) == COND_EXEC)
22522 mac = COND_EXEC_CODE (mac);
22524 /* Check that mul is of the form (set (...) (mult ...))
22525 and mla is of the form (set (...) (plus (mult ...) (...))). */
22526 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22527 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22528 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22529 return 0;
22531 mul_result = XEXP (mul, 0);
22532 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22533 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22534 mac_acc = XEXP (XEXP (mac, 1), 1);
22536 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22537 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22538 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22542 /* The EABI says test the least significant bit of a guard variable. */
22544 static bool
22545 arm_cxx_guard_mask_bit (void)
22547 return TARGET_AAPCS_BASED;
22551 /* The EABI specifies that all array cookies are 8 bytes long. */
22553 static tree
22554 arm_get_cookie_size (tree type)
22556 tree size;
22558 if (!TARGET_AAPCS_BASED)
22559 return default_cxx_get_cookie_size (type);
22561 size = build_int_cst (sizetype, 8);
22562 return size;
22566 /* The EABI says that array cookies should also contain the element size. */
22568 static bool
22569 arm_cookie_has_size (void)
22571 return TARGET_AAPCS_BASED;
22575 /* The EABI says constructors and destructors should return a pointer to
22576 the object constructed/destroyed. */
22578 static bool
22579 arm_cxx_cdtor_returns_this (void)
22581 return TARGET_AAPCS_BASED;
22584 /* The EABI says that an inline function may never be the key
22585 method. */
22587 static bool
22588 arm_cxx_key_method_may_be_inline (void)
22590 return !TARGET_AAPCS_BASED;
22593 static void
22594 arm_cxx_determine_class_data_visibility (tree decl)
22596 if (!TARGET_AAPCS_BASED
22597 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22598 return;
22600 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22601 is exported. However, on systems without dynamic vague linkage,
22602 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22603 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22604 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22605 else
22606 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22607 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22610 static bool
22611 arm_cxx_class_data_always_comdat (void)
22613 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22614 vague linkage if the class has no key function. */
22615 return !TARGET_AAPCS_BASED;
22619 /* The EABI says __aeabi_atexit should be used to register static
22620 destructors. */
22622 static bool
22623 arm_cxx_use_aeabi_atexit (void)
22625 return TARGET_AAPCS_BASED;
22629 void
22630 arm_set_return_address (rtx source, rtx scratch)
22632 arm_stack_offsets *offsets;
22633 HOST_WIDE_INT delta;
22634 rtx addr;
22635 unsigned long saved_regs;
22637 offsets = arm_get_frame_offsets ();
22638 saved_regs = offsets->saved_regs_mask;
22640 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22641 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22642 else
22644 if (frame_pointer_needed)
22645 addr = plus_constant(hard_frame_pointer_rtx, -4);
22646 else
22648 /* LR will be the first saved register. */
22649 delta = offsets->outgoing_args - (offsets->frame + 4);
22652 if (delta >= 4096)
22654 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22655 GEN_INT (delta & ~4095)));
22656 addr = scratch;
22657 delta &= 4095;
22659 else
22660 addr = stack_pointer_rtx;
22662 addr = plus_constant (addr, delta);
22664 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22669 void
22670 thumb_set_return_address (rtx source, rtx scratch)
22672 arm_stack_offsets *offsets;
22673 HOST_WIDE_INT delta;
22674 HOST_WIDE_INT limit;
22675 int reg;
22676 rtx addr;
22677 unsigned long mask;
22679 emit_use (source);
22681 offsets = arm_get_frame_offsets ();
22682 mask = offsets->saved_regs_mask;
22683 if (mask & (1 << LR_REGNUM))
22685 limit = 1024;
22686 /* Find the saved regs. */
22687 if (frame_pointer_needed)
22689 delta = offsets->soft_frame - offsets->saved_args;
22690 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22691 if (TARGET_THUMB1)
22692 limit = 128;
22694 else
22696 delta = offsets->outgoing_args - offsets->saved_args;
22697 reg = SP_REGNUM;
22699 /* Allow for the stack frame. */
22700 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22701 delta -= 16;
22702 /* The link register is always the first saved register. */
22703 delta -= 4;
22705 /* Construct the address. */
22706 addr = gen_rtx_REG (SImode, reg);
22707 if (delta > limit)
22709 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22710 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22711 addr = scratch;
22713 else
22714 addr = plus_constant (addr, delta);
22716 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22718 else
22719 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22722 /* Implements target hook vector_mode_supported_p. */
22723 bool
22724 arm_vector_mode_supported_p (enum machine_mode mode)
22726 /* Neon also supports V2SImode, etc. listed in the clause below. */
22727 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22728 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22729 return true;
22731 if ((TARGET_NEON || TARGET_IWMMXT)
22732 && ((mode == V2SImode)
22733 || (mode == V4HImode)
22734 || (mode == V8QImode)))
22735 return true;
22737 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
22738 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
22739 || mode == V2HAmode))
22740 return true;
22742 return false;
22745 /* Implements target hook array_mode_supported_p. */
22747 static bool
22748 arm_array_mode_supported_p (enum machine_mode mode,
22749 unsigned HOST_WIDE_INT nelems)
22751 if (TARGET_NEON
22752 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
22753 && (nelems >= 2 && nelems <= 4))
22754 return true;
22756 return false;
22759 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22760 registers when autovectorizing for Neon, at least until multiple vector
22761 widths are supported properly by the middle-end. */
22763 static enum machine_mode
22764 arm_preferred_simd_mode (enum machine_mode mode)
22766 if (TARGET_NEON)
22767 switch (mode)
22769 case SFmode:
22770 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22771 case SImode:
22772 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22773 case HImode:
22774 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22775 case QImode:
22776 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22777 case DImode:
22778 if (TARGET_NEON_VECTORIZE_QUAD)
22779 return V2DImode;
22780 break;
22782 default:;
22785 if (TARGET_REALLY_IWMMXT)
22786 switch (mode)
22788 case SImode:
22789 return V2SImode;
22790 case HImode:
22791 return V4HImode;
22792 case QImode:
22793 return V8QImode;
22795 default:;
22798 return word_mode;
22801 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22803 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22804 using r0-r4 for function arguments, r7 for the stack frame and don't have
22805 enough left over to do doubleword arithmetic. For Thumb-2 all the
22806 potentially problematic instructions accept high registers so this is not
22807 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22808 that require many low registers. */
22809 static bool
22810 arm_class_likely_spilled_p (reg_class_t rclass)
22812 if ((TARGET_THUMB1 && rclass == LO_REGS)
22813 || rclass == CC_REG)
22814 return true;
22816 return false;
22819 /* Implements target hook small_register_classes_for_mode_p. */
22820 bool
22821 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22823 return TARGET_THUMB1;
22826 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22827 ARM insns and therefore guarantee that the shift count is modulo 256.
22828 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22829 guarantee no particular behavior for out-of-range counts. */
22831 static unsigned HOST_WIDE_INT
22832 arm_shift_truncation_mask (enum machine_mode mode)
22834 return mode == SImode ? 255 : 0;
22838 /* Map internal gcc register numbers to DWARF2 register numbers. */
22840 unsigned int
22841 arm_dbx_register_number (unsigned int regno)
22843 if (regno < 16)
22844 return regno;
22846 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22847 compatibility. The EABI defines them as registers 96-103. */
22848 if (IS_FPA_REGNUM (regno))
22849 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22851 if (IS_VFP_REGNUM (regno))
22853 /* See comment in arm_dwarf_register_span. */
22854 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22855 return 64 + regno - FIRST_VFP_REGNUM;
22856 else
22857 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22860 if (IS_IWMMXT_GR_REGNUM (regno))
22861 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22863 if (IS_IWMMXT_REGNUM (regno))
22864 return 112 + regno - FIRST_IWMMXT_REGNUM;
22866 gcc_unreachable ();
22869 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22870 GCC models tham as 64 32-bit registers, so we need to describe this to
22871 the DWARF generation code. Other registers can use the default. */
22872 static rtx
22873 arm_dwarf_register_span (rtx rtl)
22875 unsigned regno;
22876 int nregs;
22877 int i;
22878 rtx p;
22880 regno = REGNO (rtl);
22881 if (!IS_VFP_REGNUM (regno))
22882 return NULL_RTX;
22884 /* XXX FIXME: The EABI defines two VFP register ranges:
22885 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22886 256-287: D0-D31
22887 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22888 corresponding D register. Until GDB supports this, we shall use the
22889 legacy encodings. We also use these encodings for D0-D15 for
22890 compatibility with older debuggers. */
22891 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22892 return NULL_RTX;
22894 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22895 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22896 regno = (regno - FIRST_VFP_REGNUM) / 2;
22897 for (i = 0; i < nregs; i++)
22898 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22900 return p;
22903 #if ARM_UNWIND_INFO
22904 /* Emit unwind directives for a store-multiple instruction or stack pointer
22905 push during alignment.
22906 These should only ever be generated by the function prologue code, so
22907 expect them to have a particular form. */
22909 static void
22910 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22912 int i;
22913 HOST_WIDE_INT offset;
22914 HOST_WIDE_INT nregs;
22915 int reg_size;
22916 unsigned reg;
22917 unsigned lastreg;
22918 rtx e;
22920 e = XVECEXP (p, 0, 0);
22921 if (GET_CODE (e) != SET)
22922 abort ();
22924 /* First insn will adjust the stack pointer. */
22925 if (GET_CODE (e) != SET
22926 || GET_CODE (XEXP (e, 0)) != REG
22927 || REGNO (XEXP (e, 0)) != SP_REGNUM
22928 || GET_CODE (XEXP (e, 1)) != PLUS)
22929 abort ();
22931 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22932 nregs = XVECLEN (p, 0) - 1;
22934 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22935 if (reg < 16)
22937 /* The function prologue may also push pc, but not annotate it as it is
22938 never restored. We turn this into a stack pointer adjustment. */
22939 if (nregs * 4 == offset - 4)
22941 fprintf (asm_out_file, "\t.pad #4\n");
22942 offset -= 4;
22944 reg_size = 4;
22945 fprintf (asm_out_file, "\t.save {");
22947 else if (IS_VFP_REGNUM (reg))
22949 reg_size = 8;
22950 fprintf (asm_out_file, "\t.vsave {");
22952 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22954 /* FPA registers are done differently. */
22955 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22956 return;
22958 else
22959 /* Unknown register type. */
22960 abort ();
22962 /* If the stack increment doesn't match the size of the saved registers,
22963 something has gone horribly wrong. */
22964 if (offset != nregs * reg_size)
22965 abort ();
22967 offset = 0;
22968 lastreg = 0;
22969 /* The remaining insns will describe the stores. */
22970 for (i = 1; i <= nregs; i++)
22972 /* Expect (set (mem <addr>) (reg)).
22973 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22974 e = XVECEXP (p, 0, i);
22975 if (GET_CODE (e) != SET
22976 || GET_CODE (XEXP (e, 0)) != MEM
22977 || GET_CODE (XEXP (e, 1)) != REG)
22978 abort ();
22980 reg = REGNO (XEXP (e, 1));
22981 if (reg < lastreg)
22982 abort ();
22984 if (i != 1)
22985 fprintf (asm_out_file, ", ");
22986 /* We can't use %r for vfp because we need to use the
22987 double precision register names. */
22988 if (IS_VFP_REGNUM (reg))
22989 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22990 else
22991 asm_fprintf (asm_out_file, "%r", reg);
22993 #ifdef ENABLE_CHECKING
22994 /* Check that the addresses are consecutive. */
22995 e = XEXP (XEXP (e, 0), 0);
22996 if (GET_CODE (e) == PLUS)
22998 offset += reg_size;
22999 if (GET_CODE (XEXP (e, 0)) != REG
23000 || REGNO (XEXP (e, 0)) != SP_REGNUM
23001 || GET_CODE (XEXP (e, 1)) != CONST_INT
23002 || offset != INTVAL (XEXP (e, 1)))
23003 abort ();
23005 else if (i != 1
23006 || GET_CODE (e) != REG
23007 || REGNO (e) != SP_REGNUM)
23008 abort ();
23009 #endif
23011 fprintf (asm_out_file, "}\n");
23014 /* Emit unwind directives for a SET. */
23016 static void
23017 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23019 rtx e0;
23020 rtx e1;
23021 unsigned reg;
23023 e0 = XEXP (p, 0);
23024 e1 = XEXP (p, 1);
23025 switch (GET_CODE (e0))
23027 case MEM:
23028 /* Pushing a single register. */
23029 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23030 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23031 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23032 abort ();
23034 asm_fprintf (asm_out_file, "\t.save ");
23035 if (IS_VFP_REGNUM (REGNO (e1)))
23036 asm_fprintf(asm_out_file, "{d%d}\n",
23037 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23038 else
23039 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23040 break;
23042 case REG:
23043 if (REGNO (e0) == SP_REGNUM)
23045 /* A stack increment. */
23046 if (GET_CODE (e1) != PLUS
23047 || GET_CODE (XEXP (e1, 0)) != REG
23048 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23049 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23050 abort ();
23052 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23053 -INTVAL (XEXP (e1, 1)));
23055 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23057 HOST_WIDE_INT offset;
23059 if (GET_CODE (e1) == PLUS)
23061 if (GET_CODE (XEXP (e1, 0)) != REG
23062 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23063 abort ();
23064 reg = REGNO (XEXP (e1, 0));
23065 offset = INTVAL (XEXP (e1, 1));
23066 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23067 HARD_FRAME_POINTER_REGNUM, reg,
23068 offset);
23070 else if (GET_CODE (e1) == REG)
23072 reg = REGNO (e1);
23073 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23074 HARD_FRAME_POINTER_REGNUM, reg);
23076 else
23077 abort ();
23079 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23081 /* Move from sp to reg. */
23082 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23084 else if (GET_CODE (e1) == PLUS
23085 && GET_CODE (XEXP (e1, 0)) == REG
23086 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23087 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23089 /* Set reg to offset from sp. */
23090 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23091 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23093 else
23094 abort ();
23095 break;
23097 default:
23098 abort ();
23103 /* Emit unwind directives for the given insn. */
23105 static void
23106 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23108 rtx note, pat;
23109 bool handled_one = false;
23111 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23112 return;
23114 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23115 && (TREE_NOTHROW (current_function_decl)
23116 || crtl->all_throwers_are_sibcalls))
23117 return;
23119 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
23120 return;
23122 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
23124 pat = XEXP (note, 0);
23125 switch (REG_NOTE_KIND (note))
23127 case REG_FRAME_RELATED_EXPR:
23128 goto found;
23130 case REG_CFA_REGISTER:
23131 if (pat == NULL)
23133 pat = PATTERN (insn);
23134 if (GET_CODE (pat) == PARALLEL)
23135 pat = XVECEXP (pat, 0, 0);
23138 /* Only emitted for IS_STACKALIGN re-alignment. */
23140 rtx dest, src;
23141 unsigned reg;
23143 src = SET_SRC (pat);
23144 dest = SET_DEST (pat);
23146 gcc_assert (src == stack_pointer_rtx);
23147 reg = REGNO (dest);
23148 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
23149 reg + 0x90, reg);
23151 handled_one = true;
23152 break;
23154 case REG_CFA_DEF_CFA:
23155 case REG_CFA_EXPRESSION:
23156 case REG_CFA_ADJUST_CFA:
23157 case REG_CFA_OFFSET:
23158 /* ??? Only handling here what we actually emit. */
23159 gcc_unreachable ();
23161 default:
23162 break;
23165 if (handled_one)
23166 return;
23167 pat = PATTERN (insn);
23168 found:
23170 switch (GET_CODE (pat))
23172 case SET:
23173 arm_unwind_emit_set (asm_out_file, pat);
23174 break;
23176 case SEQUENCE:
23177 /* Store multiple. */
23178 arm_unwind_emit_sequence (asm_out_file, pat);
23179 break;
23181 default:
23182 abort();
23187 /* Output a reference from a function exception table to the type_info
23188 object X. The EABI specifies that the symbol should be relocated by
23189 an R_ARM_TARGET2 relocation. */
23191 static bool
23192 arm_output_ttype (rtx x)
23194 fputs ("\t.word\t", asm_out_file);
23195 output_addr_const (asm_out_file, x);
23196 /* Use special relocations for symbol references. */
23197 if (GET_CODE (x) != CONST_INT)
23198 fputs ("(TARGET2)", asm_out_file);
23199 fputc ('\n', asm_out_file);
23201 return TRUE;
23204 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
23206 static void
23207 arm_asm_emit_except_personality (rtx personality)
23209 fputs ("\t.personality\t", asm_out_file);
23210 output_addr_const (asm_out_file, personality);
23211 fputc ('\n', asm_out_file);
23214 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
23216 static void
23217 arm_asm_init_sections (void)
23219 exception_section = get_unnamed_section (0, output_section_asm_op,
23220 "\t.handlerdata");
23222 #endif /* ARM_UNWIND_INFO */
23224 /* Output unwind directives for the start/end of a function. */
23226 void
23227 arm_output_fn_unwind (FILE * f, bool prologue)
23229 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23230 return;
23232 if (prologue)
23233 fputs ("\t.fnstart\n", f);
23234 else
23236 /* If this function will never be unwound, then mark it as such.
23237 The came condition is used in arm_unwind_emit to suppress
23238 the frame annotations. */
23239 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23240 && (TREE_NOTHROW (current_function_decl)
23241 || crtl->all_throwers_are_sibcalls))
23242 fputs("\t.cantunwind\n", f);
23244 fputs ("\t.fnend\n", f);
23248 static bool
23249 arm_emit_tls_decoration (FILE *fp, rtx x)
23251 enum tls_reloc reloc;
23252 rtx val;
23254 val = XVECEXP (x, 0, 0);
23255 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
23257 output_addr_const (fp, val);
23259 switch (reloc)
23261 case TLS_GD32:
23262 fputs ("(tlsgd)", fp);
23263 break;
23264 case TLS_LDM32:
23265 fputs ("(tlsldm)", fp);
23266 break;
23267 case TLS_LDO32:
23268 fputs ("(tlsldo)", fp);
23269 break;
23270 case TLS_IE32:
23271 fputs ("(gottpoff)", fp);
23272 break;
23273 case TLS_LE32:
23274 fputs ("(tpoff)", fp);
23275 break;
23276 case TLS_DESCSEQ:
23277 fputs ("(tlsdesc)", fp);
23278 break;
23279 default:
23280 gcc_unreachable ();
23283 switch (reloc)
23285 case TLS_GD32:
23286 case TLS_LDM32:
23287 case TLS_IE32:
23288 case TLS_DESCSEQ:
23289 fputs (" + (. - ", fp);
23290 output_addr_const (fp, XVECEXP (x, 0, 2));
23291 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
23292 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
23293 output_addr_const (fp, XVECEXP (x, 0, 3));
23294 fputc (')', fp);
23295 break;
23296 default:
23297 break;
23300 return TRUE;
23303 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
23305 static void
23306 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
23308 gcc_assert (size == 4);
23309 fputs ("\t.word\t", file);
23310 output_addr_const (file, x);
23311 fputs ("(tlsldo)", file);
23314 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
23316 static bool
23317 arm_output_addr_const_extra (FILE *fp, rtx x)
23319 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
23320 return arm_emit_tls_decoration (fp, x);
23321 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
23323 char label[256];
23324 int labelno = INTVAL (XVECEXP (x, 0, 0));
23326 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
23327 assemble_name_raw (fp, label);
23329 return TRUE;
23331 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
23333 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
23334 if (GOT_PCREL)
23335 fputs ("+.", fp);
23336 fputs ("-(", fp);
23337 output_addr_const (fp, XVECEXP (x, 0, 0));
23338 fputc (')', fp);
23339 return TRUE;
23341 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
23343 output_addr_const (fp, XVECEXP (x, 0, 0));
23344 if (GOT_PCREL)
23345 fputs ("+.", fp);
23346 fputs ("-(", fp);
23347 output_addr_const (fp, XVECEXP (x, 0, 1));
23348 fputc (')', fp);
23349 return TRUE;
23351 else if (GET_CODE (x) == CONST_VECTOR)
23352 return arm_emit_vector_const (fp, x);
23354 return FALSE;
23357 /* Output assembly for a shift instruction.
23358 SET_FLAGS determines how the instruction modifies the condition codes.
23359 0 - Do not set condition codes.
23360 1 - Set condition codes.
23361 2 - Use smallest instruction. */
23362 const char *
23363 arm_output_shift(rtx * operands, int set_flags)
23365 char pattern[100];
23366 static const char flag_chars[3] = {'?', '.', '!'};
23367 const char *shift;
23368 HOST_WIDE_INT val;
23369 char c;
23371 c = flag_chars[set_flags];
23372 if (TARGET_UNIFIED_ASM)
23374 shift = shift_op(operands[3], &val);
23375 if (shift)
23377 if (val != -1)
23378 operands[2] = GEN_INT(val);
23379 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
23381 else
23382 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
23384 else
23385 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
23386 output_asm_insn (pattern, operands);
23387 return "";
23390 /* Output a Thumb-1 casesi dispatch sequence. */
23391 const char *
23392 thumb1_output_casesi (rtx *operands)
23394 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
23396 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23398 switch (GET_MODE(diff_vec))
23400 case QImode:
23401 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23402 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23403 case HImode:
23404 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23405 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23406 case SImode:
23407 return "bl\t%___gnu_thumb1_case_si";
23408 default:
23409 gcc_unreachable ();
23413 /* Output a Thumb-2 casesi instruction. */
23414 const char *
23415 thumb2_output_casesi (rtx *operands)
23417 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
23419 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23421 output_asm_insn ("cmp\t%0, %1", operands);
23422 output_asm_insn ("bhi\t%l3", operands);
23423 switch (GET_MODE(diff_vec))
23425 case QImode:
23426 return "tbb\t[%|pc, %0]";
23427 case HImode:
23428 return "tbh\t[%|pc, %0, lsl #1]";
23429 case SImode:
23430 if (flag_pic)
23432 output_asm_insn ("adr\t%4, %l2", operands);
23433 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23434 output_asm_insn ("add\t%4, %4, %5", operands);
23435 return "bx\t%4";
23437 else
23439 output_asm_insn ("adr\t%4, %l2", operands);
23440 return "ldr\t%|pc, [%4, %0, lsl #2]";
23442 default:
23443 gcc_unreachable ();
23447 /* Most ARM cores are single issue, but some newer ones can dual issue.
23448 The scheduler descriptions rely on this being correct. */
23449 static int
23450 arm_issue_rate (void)
23452 switch (arm_tune)
23454 case cortexr4:
23455 case cortexr4f:
23456 case cortexr5:
23457 case cortexa5:
23458 case cortexa8:
23459 case cortexa9:
23460 case fa726te:
23461 return 2;
23463 default:
23464 return 1;
23468 /* A table and a function to perform ARM-specific name mangling for
23469 NEON vector types in order to conform to the AAPCS (see "Procedure
23470 Call Standard for the ARM Architecture", Appendix A). To qualify
23471 for emission with the mangled names defined in that document, a
23472 vector type must not only be of the correct mode but also be
23473 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23474 typedef struct
23476 enum machine_mode mode;
23477 const char *element_type_name;
23478 const char *aapcs_name;
23479 } arm_mangle_map_entry;
23481 static arm_mangle_map_entry arm_mangle_map[] = {
23482 /* 64-bit containerized types. */
23483 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23484 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23485 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23486 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23487 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23488 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23489 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23490 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23491 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23492 /* 128-bit containerized types. */
23493 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23494 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23495 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23496 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23497 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23498 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23499 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23500 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23501 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23502 { VOIDmode, NULL, NULL }
23505 const char *
23506 arm_mangle_type (const_tree type)
23508 arm_mangle_map_entry *pos = arm_mangle_map;
23510 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23511 has to be managled as if it is in the "std" namespace. */
23512 if (TARGET_AAPCS_BASED
23513 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23515 static bool warned;
23516 if (!warned && warn_psabi && !in_system_header)
23518 warned = true;
23519 inform (input_location,
23520 "the mangling of %<va_list%> has changed in GCC 4.4");
23522 return "St9__va_list";
23525 /* Half-precision float. */
23526 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23527 return "Dh";
23529 if (TREE_CODE (type) != VECTOR_TYPE)
23530 return NULL;
23532 /* Check the mode of the vector type, and the name of the vector
23533 element type, against the table. */
23534 while (pos->mode != VOIDmode)
23536 tree elt_type = TREE_TYPE (type);
23538 if (pos->mode == TYPE_MODE (type)
23539 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23540 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23541 pos->element_type_name))
23542 return pos->aapcs_name;
23544 pos++;
23547 /* Use the default mangling for unrecognized (possibly user-defined)
23548 vector types. */
23549 return NULL;
23552 /* Order of allocation of core registers for Thumb: this allocation is
23553 written over the corresponding initial entries of the array
23554 initialized with REG_ALLOC_ORDER. We allocate all low registers
23555 first. Saving and restoring a low register is usually cheaper than
23556 using a call-clobbered high register. */
23558 static const int thumb_core_reg_alloc_order[] =
23560 3, 2, 1, 0, 4, 5, 6, 7,
23561 14, 12, 8, 9, 10, 11, 13, 15
23564 /* Adjust register allocation order when compiling for Thumb. */
23566 void
23567 arm_order_regs_for_local_alloc (void)
23569 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23570 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23571 if (TARGET_THUMB)
23572 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23573 sizeof (thumb_core_reg_alloc_order));
23576 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23578 bool
23579 arm_frame_pointer_required (void)
23581 return (cfun->has_nonlocal_label
23582 || SUBTARGET_FRAME_POINTER_REQUIRED
23583 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23586 /* Only thumb1 can't support conditional execution, so return true if
23587 the target is not thumb1. */
23588 static bool
23589 arm_have_conditional_execution (void)
23591 return !TARGET_THUMB1;
23594 /* Legitimize a memory reference for sync primitive implemented using
23595 ldrex / strex. We currently force the form of the reference to be
23596 indirect without offset. We do not yet support the indirect offset
23597 addressing supported by some ARM targets for these
23598 instructions. */
23599 static rtx
23600 arm_legitimize_sync_memory (rtx memory)
23602 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23603 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23605 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23606 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23607 return legitimate_memory;
23610 /* An instruction emitter. */
23611 typedef void (* emit_f) (int label, const char *, rtx *);
23613 /* An instruction emitter that emits via the conventional
23614 output_asm_insn. */
23615 static void
23616 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23618 output_asm_insn (pattern, operands);
23621 /* Count the number of emitted synchronization instructions. */
23622 static unsigned arm_insn_count;
23624 /* An emitter that counts emitted instructions but does not actually
23625 emit instruction into the instruction stream. */
23626 static void
23627 arm_count (int label,
23628 const char *pattern ATTRIBUTE_UNUSED,
23629 rtx *operands ATTRIBUTE_UNUSED)
23631 if (! label)
23632 ++ arm_insn_count;
23635 /* Construct a pattern using conventional output formatting and feed
23636 it to output_asm_insn. Provides a mechanism to construct the
23637 output pattern on the fly. Note the hard limit on the pattern
23638 buffer size. */
23639 static void ATTRIBUTE_PRINTF_4
23640 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23641 const char *pattern, ...)
23643 va_list ap;
23644 char buffer[256];
23646 va_start (ap, pattern);
23647 vsprintf (buffer, pattern, ap);
23648 va_end (ap);
23649 emit (label, buffer, operands);
23652 /* Emit the memory barrier instruction, if any, provided by this
23653 target to a specified emitter. */
23654 static void
23655 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23657 if (TARGET_HAVE_DMB)
23659 /* Note we issue a system level barrier. We should consider
23660 issuing a inner shareabilty zone barrier here instead, ie.
23661 "DMB ISH". */
23662 emit (0, "dmb\tsy", operands);
23663 return;
23666 if (TARGET_HAVE_DMB_MCR)
23668 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23669 return;
23672 gcc_unreachable ();
23675 /* Emit the memory barrier instruction, if any, provided by this
23676 target. */
23677 const char *
23678 arm_output_memory_barrier (rtx *operands)
23680 arm_process_output_memory_barrier (arm_emit, operands);
23681 return "";
23684 /* Helper to figure out the instruction suffix required on ldrex/strex
23685 for operations on an object of the specified mode. */
23686 static const char *
23687 arm_ldrex_suffix (enum machine_mode mode)
23689 switch (mode)
23691 case QImode: return "b";
23692 case HImode: return "h";
23693 case SImode: return "";
23694 case DImode: return "d";
23695 default:
23696 gcc_unreachable ();
23698 return "";
23701 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23702 mode. */
23703 static void
23704 arm_output_ldrex (emit_f emit,
23705 enum machine_mode mode,
23706 rtx target,
23707 rtx memory)
23709 const char *suffix = arm_ldrex_suffix (mode);
23710 rtx operands[2];
23712 operands[0] = target;
23713 operands[1] = memory;
23714 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23717 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23718 mode. */
23719 static void
23720 arm_output_strex (emit_f emit,
23721 enum machine_mode mode,
23722 const char *cc,
23723 rtx result,
23724 rtx value,
23725 rtx memory)
23727 const char *suffix = arm_ldrex_suffix (mode);
23728 rtx operands[3];
23730 operands[0] = result;
23731 operands[1] = value;
23732 operands[2] = memory;
23733 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23734 cc);
23737 /* Helper to emit a two operand instruction. */
23738 static void
23739 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23741 rtx operands[2];
23743 operands[0] = d;
23744 operands[1] = s;
23745 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23748 /* Helper to emit a three operand instruction. */
23749 static void
23750 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23752 rtx operands[3];
23754 operands[0] = d;
23755 operands[1] = a;
23756 operands[2] = b;
23757 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23760 /* Emit a load store exclusive synchronization loop.
23763 old_value = [mem]
23764 if old_value != required_value
23765 break;
23766 t1 = sync_op (old_value, new_value)
23767 [mem] = t1, t2 = [0|1]
23768 while ! t2
23770 Note:
23771 t1 == t2 is not permitted
23772 t1 == old_value is permitted
23774 required_value:
23776 RTX register or const_int representing the required old_value for
23777 the modify to continue, if NULL no comparsion is performed. */
23778 static void
23779 arm_output_sync_loop (emit_f emit,
23780 enum machine_mode mode,
23781 rtx old_value,
23782 rtx memory,
23783 rtx required_value,
23784 rtx new_value,
23785 rtx t1,
23786 rtx t2,
23787 enum attr_sync_op sync_op,
23788 int early_barrier_required)
23790 rtx operands[1];
23792 gcc_assert (t1 != t2);
23794 if (early_barrier_required)
23795 arm_process_output_memory_barrier (emit, NULL);
23797 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23799 arm_output_ldrex (emit, mode, old_value, memory);
23801 if (required_value)
23803 rtx operands[2];
23805 operands[0] = old_value;
23806 operands[1] = required_value;
23807 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23808 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23811 switch (sync_op)
23813 case SYNC_OP_ADD:
23814 arm_output_op3 (emit, "add", t1, old_value, new_value);
23815 break;
23817 case SYNC_OP_SUB:
23818 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23819 break;
23821 case SYNC_OP_IOR:
23822 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23823 break;
23825 case SYNC_OP_XOR:
23826 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23827 break;
23829 case SYNC_OP_AND:
23830 arm_output_op3 (emit,"and", t1, old_value, new_value);
23831 break;
23833 case SYNC_OP_NAND:
23834 arm_output_op3 (emit, "and", t1, old_value, new_value);
23835 arm_output_op2 (emit, "mvn", t1, t1);
23836 break;
23838 case SYNC_OP_NONE:
23839 t1 = new_value;
23840 break;
23843 if (t2)
23845 arm_output_strex (emit, mode, "", t2, t1, memory);
23846 operands[0] = t2;
23847 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23848 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23849 LOCAL_LABEL_PREFIX);
23851 else
23853 /* Use old_value for the return value because for some operations
23854 the old_value can easily be restored. This saves one register. */
23855 arm_output_strex (emit, mode, "", old_value, t1, memory);
23856 operands[0] = old_value;
23857 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23858 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23859 LOCAL_LABEL_PREFIX);
23861 switch (sync_op)
23863 case SYNC_OP_ADD:
23864 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23865 break;
23867 case SYNC_OP_SUB:
23868 arm_output_op3 (emit, "add", old_value, t1, new_value);
23869 break;
23871 case SYNC_OP_XOR:
23872 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23873 break;
23875 case SYNC_OP_NONE:
23876 arm_output_op2 (emit, "mov", old_value, required_value);
23877 break;
23879 default:
23880 gcc_unreachable ();
23884 arm_process_output_memory_barrier (emit, NULL);
23885 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23888 static rtx
23889 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23891 if (index > 0)
23892 default_value = operands[index - 1];
23894 return default_value;
23897 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23898 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23900 /* Extract the operands for a synchroniztion instruction from the
23901 instructions attributes and emit the instruction. */
23902 static void
23903 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23905 rtx result, memory, required_value, new_value, t1, t2;
23906 int early_barrier;
23907 enum machine_mode mode;
23908 enum attr_sync_op sync_op;
23910 result = FETCH_SYNC_OPERAND(result, 0);
23911 memory = FETCH_SYNC_OPERAND(memory, 0);
23912 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23913 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23914 t1 = FETCH_SYNC_OPERAND(t1, 0);
23915 t2 = FETCH_SYNC_OPERAND(t2, 0);
23916 early_barrier =
23917 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23918 sync_op = get_attr_sync_op (insn);
23919 mode = GET_MODE (memory);
23921 arm_output_sync_loop (emit, mode, result, memory, required_value,
23922 new_value, t1, t2, sync_op, early_barrier);
23925 /* Emit a synchronization instruction loop. */
23926 const char *
23927 arm_output_sync_insn (rtx insn, rtx *operands)
23929 arm_process_output_sync_insn (arm_emit, insn, operands);
23930 return "";
23933 /* Count the number of machine instruction that will be emitted for a
23934 synchronization instruction. Note that the emitter used does not
23935 emit instructions, it just counts instructions being carefull not
23936 to count labels. */
23937 unsigned int
23938 arm_sync_loop_insns (rtx insn, rtx *operands)
23940 arm_insn_count = 0;
23941 arm_process_output_sync_insn (arm_count, insn, operands);
23942 return arm_insn_count;
23945 /* Helper to call a target sync instruction generator, dealing with
23946 the variation in operands required by the different generators. */
23947 static rtx
23948 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23949 rtx memory, rtx required_value, rtx new_value)
23951 switch (generator->op)
23953 case arm_sync_generator_omn:
23954 gcc_assert (! required_value);
23955 return generator->u.omn (old_value, memory, new_value);
23957 case arm_sync_generator_omrn:
23958 gcc_assert (required_value);
23959 return generator->u.omrn (old_value, memory, required_value, new_value);
23962 return NULL;
23965 /* Expand a synchronization loop. The synchronization loop is expanded
23966 as an opaque block of instructions in order to ensure that we do
23967 not subsequently get extraneous memory accesses inserted within the
23968 critical region. The exclusive access property of ldrex/strex is
23969 only guaranteed in there are no intervening memory accesses. */
23970 void
23971 arm_expand_sync (enum machine_mode mode,
23972 struct arm_sync_generator *generator,
23973 rtx target, rtx memory, rtx required_value, rtx new_value)
23975 if (target == NULL)
23976 target = gen_reg_rtx (mode);
23978 memory = arm_legitimize_sync_memory (memory);
23979 if (mode != SImode)
23981 rtx load_temp = gen_reg_rtx (SImode);
23983 if (required_value)
23984 required_value = convert_modes (SImode, mode, required_value, true);
23986 new_value = convert_modes (SImode, mode, new_value, true);
23987 emit_insn (arm_call_generator (generator, load_temp, memory,
23988 required_value, new_value));
23989 emit_move_insn (target, gen_lowpart (mode, load_temp));
23991 else
23993 emit_insn (arm_call_generator (generator, target, memory, required_value,
23994 new_value));
23998 static unsigned int
23999 arm_autovectorize_vector_sizes (void)
24001 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
24004 static bool
24005 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24007 /* Vectors which aren't in packed structures will not be less aligned than
24008 the natural alignment of their element type, so this is safe. */
24009 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24010 return !is_packed;
24012 return default_builtin_vector_alignment_reachable (type, is_packed);
24015 static bool
24016 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24017 const_tree type, int misalignment,
24018 bool is_packed)
24020 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24022 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24024 if (is_packed)
24025 return align == 1;
24027 /* If the misalignment is unknown, we should be able to handle the access
24028 so long as it is not to a member of a packed data structure. */
24029 if (misalignment == -1)
24030 return true;
24032 /* Return true if the misalignment is a multiple of the natural alignment
24033 of the vector's element type. This is probably always going to be
24034 true in practice, since we've already established that this isn't a
24035 packed access. */
24036 return ((misalignment % align) == 0);
24039 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24040 is_packed);
24043 static void
24044 arm_conditional_register_usage (void)
24046 int regno;
24048 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24050 for (regno = FIRST_FPA_REGNUM;
24051 regno <= LAST_FPA_REGNUM; ++regno)
24052 fixed_regs[regno] = call_used_regs[regno] = 1;
24055 if (TARGET_THUMB1 && optimize_size)
24057 /* When optimizing for size on Thumb-1, it's better not
24058 to use the HI regs, because of the overhead of
24059 stacking them. */
24060 for (regno = FIRST_HI_REGNUM;
24061 regno <= LAST_HI_REGNUM; ++regno)
24062 fixed_regs[regno] = call_used_regs[regno] = 1;
24065 /* The link register can be clobbered by any branch insn,
24066 but we have no way to track that at present, so mark
24067 it as unavailable. */
24068 if (TARGET_THUMB1)
24069 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24071 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24073 if (TARGET_MAVERICK)
24075 for (regno = FIRST_FPA_REGNUM;
24076 regno <= LAST_FPA_REGNUM; ++ regno)
24077 fixed_regs[regno] = call_used_regs[regno] = 1;
24078 for (regno = FIRST_CIRRUS_FP_REGNUM;
24079 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24081 fixed_regs[regno] = 0;
24082 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24085 if (TARGET_VFP)
24087 /* VFPv3 registers are disabled when earlier VFP
24088 versions are selected due to the definition of
24089 LAST_VFP_REGNUM. */
24090 for (regno = FIRST_VFP_REGNUM;
24091 regno <= LAST_VFP_REGNUM; ++ regno)
24093 fixed_regs[regno] = 0;
24094 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24095 || regno >= FIRST_VFP_REGNUM + 32;
24100 if (TARGET_REALLY_IWMMXT)
24102 regno = FIRST_IWMMXT_GR_REGNUM;
24103 /* The 2002/10/09 revision of the XScale ABI has wCG0
24104 and wCG1 as call-preserved registers. The 2002/11/21
24105 revision changed this so that all wCG registers are
24106 scratch registers. */
24107 for (regno = FIRST_IWMMXT_GR_REGNUM;
24108 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24109 fixed_regs[regno] = 0;
24110 /* The XScale ABI has wR0 - wR9 as scratch registers,
24111 the rest as call-preserved registers. */
24112 for (regno = FIRST_IWMMXT_REGNUM;
24113 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24115 fixed_regs[regno] = 0;
24116 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24120 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24122 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24123 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24125 else if (TARGET_APCS_STACK)
24127 fixed_regs[10] = 1;
24128 call_used_regs[10] = 1;
24130 /* -mcaller-super-interworking reserves r11 for calls to
24131 _interwork_r11_call_via_rN(). Making the register global
24132 is an easy way of ensuring that it remains valid for all
24133 calls. */
24134 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24135 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24137 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24138 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24139 if (TARGET_CALLER_INTERWORKING)
24140 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24142 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24145 static reg_class_t
24146 arm_preferred_rename_class (reg_class_t rclass)
24148 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24149 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24150 and code size can be reduced. */
24151 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24152 return LO_REGS;
24153 else
24154 return NO_REGS;
24157 /* Compute the atrribute "length" of insn "*push_multi".
24158 So this function MUST be kept in sync with that insn pattern. */
24160 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24162 int i, regno, hi_reg;
24163 int num_saves = XVECLEN (parallel_op, 0);
24165 /* ARM mode. */
24166 if (TARGET_ARM)
24167 return 4;
24168 /* Thumb1 mode. */
24169 if (TARGET_THUMB1)
24170 return 2;
24172 /* Thumb2 mode. */
24173 regno = REGNO (first_op);
24174 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24175 for (i = 1; i < num_saves && !hi_reg; i++)
24177 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24178 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24181 if (!hi_reg)
24182 return 2;
24183 return 4;
24186 #include "gt-arm.h"