Merged revisions 208012,208018-208019,208021,208023-208030,208033,208037,208040-20804...
[official-gcc.git] / main / gcc / config / arm / arm.c
blob0240cc70e815cbfd8d0a8e7cb2a952b20943ce05
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
67 void (*arm_lang_output_object_attributes_hook)(void);
69 struct four_ints
71 int i[4];
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets *arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
81 HOST_WIDE_INT, rtx, rtx, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx, int);
84 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
85 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
86 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
87 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
88 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
89 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
90 inline static int thumb1_index_register_rtx_p (rtx, int);
91 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx, int);
97 static void arm_print_operand_address (FILE *, rtx);
98 static bool arm_print_operand_punct_valid_p (unsigned char code);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
100 static arm_cc get_arm_condition_code (rtx);
101 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
102 static const char *output_multi_immediate (rtx *, const char *, const char *,
103 int, HOST_WIDE_INT);
104 static const char *shift_op (rtx, HOST_WIDE_INT *);
105 static struct machine_function *arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT get_jump_table_size (rtx);
108 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_forward_ref (Mfix *);
110 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_backward_ref (Mfix *);
112 static void assign_minipool_offsets (Mfix *);
113 static void arm_print_value (FILE *, rtx);
114 static void dump_minipool (rtx);
115 static int arm_barrier_cost (rtx);
116 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
117 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
118 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 rtx);
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree);
125 static unsigned long arm_compute_func_type (void);
126 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 #endif
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
134 static int arm_comp_type_attributes (const_tree, const_tree);
135 static void arm_set_default_type_attributes (tree);
136 static int arm_adjust_cost (rtx, rtx, rtx, int);
137 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence);
141 static int optimal_immediate_sequence_1 (enum rtx_code code,
142 unsigned HOST_WIDE_INT val,
143 struct four_ints *return_sequence,
144 int i);
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree, tree);
147 static enum machine_mode arm_promote_function_mode (const_tree,
148 enum machine_mode, int *,
149 const_tree, int);
150 static bool arm_return_in_memory (const_tree, const_tree);
151 static rtx arm_function_value (const_tree, const_tree, bool);
152 static rtx arm_libcall_value_1 (enum machine_mode);
153 static rtx arm_libcall_value (enum machine_mode, const_rtx);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
157 tree);
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
160 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
161 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
162 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
163 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
168 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
169 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
170 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx safe_vector_operand (rtx, enum machine_mode);
174 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
175 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
176 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
177 static tree arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond, rtx pattern);
179 static rtx emit_set_insn (rtx, rtx);
180 static rtx emit_multi_reg_push (unsigned long, unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static int arm_issue_rate (void);
239 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
240 static bool arm_output_addr_const_extra (FILE *, rtx);
241 static bool arm_allocate_stack_slots_for_args (void);
242 static bool arm_warn_func_return (tree);
243 static const char *arm_invalid_parameter_type (const_tree t);
244 static const char *arm_invalid_return_type (const_tree t);
245 static tree arm_promoted_type (const_tree t);
246 static tree arm_convert_to_type (tree type, tree expr);
247 static bool arm_scalar_mode_supported_p (enum machine_mode);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx, tree, rtx);
252 static rtx arm_trampoline_adjust_address (rtx);
253 static rtx arm_pic_static_addr (rtx orig, rtx reg);
254 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode,
258 unsigned HOST_WIDE_INT);
259 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260 static bool arm_class_likely_spilled_p (reg_class_t);
261 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
262 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
263 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
264 const_tree type,
265 int misalignment,
266 bool is_packed);
267 static void arm_conditional_register_usage (void);
268 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
269 static unsigned int arm_autovectorize_vector_sizes (void);
270 static int arm_default_branch_cost (bool, bool);
271 static int arm_cortex_a5_branch_cost (bool, bool);
272 static int arm_cortex_m_branch_cost (bool, bool);
274 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
275 const unsigned char *sel);
277 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
278 tree vectype,
279 int misalign ATTRIBUTE_UNUSED);
280 static unsigned arm_add_stmt_cost (void *data, int count,
281 enum vect_cost_for_stmt kind,
282 struct _stmt_vec_info *stmt_info,
283 int misalign,
284 enum vect_cost_model_location where);
286 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
287 bool op0_preserve_value);
288 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
290 /* Table of machine attributes. */
291 static const struct attribute_spec arm_attribute_table[] =
293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
294 affects_type_identity } */
295 /* Function calls made to this symbol must be done indirectly, because
296 it may lie outside of the 26 bit addressing range of a normal function
297 call. */
298 { "long_call", 0, 0, false, true, true, NULL, false },
299 /* Whereas these functions are always known to reside within the 26 bit
300 addressing range. */
301 { "short_call", 0, 0, false, true, true, NULL, false },
302 /* Specify the procedure call conventions for a function. */
303 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
304 false },
305 /* Interrupt Service Routines have special prologue and epilogue requirements. */
306 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
307 false },
308 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
309 false },
310 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
311 false },
312 #ifdef ARM_PE
313 /* ARM/PE has three new attributes:
314 interfacearm - ?
315 dllexport - for exporting a function/variable that will live in a dll
316 dllimport - for importing a function/variable from a dll
318 Microsoft allows multiple declspecs in one __declspec, separating
319 them with spaces. We do NOT support this. Instead, use __declspec
320 multiple times.
322 { "dllimport", 0, 0, true, false, false, NULL, false },
323 { "dllexport", 0, 0, true, false, false, NULL, false },
324 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
327 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
328 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
330 false },
331 #endif
332 { NULL, 0, 0, false, false, false, NULL, false }
335 /* Initialize the GCC target structure. */
336 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
337 #undef TARGET_MERGE_DECL_ATTRIBUTES
338 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
339 #endif
341 #undef TARGET_LEGITIMIZE_ADDRESS
342 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
344 #undef TARGET_LRA_P
345 #define TARGET_LRA_P arm_lra_p
347 #undef TARGET_ATTRIBUTE_TABLE
348 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
350 #undef TARGET_ASM_FILE_START
351 #define TARGET_ASM_FILE_START arm_file_start
352 #undef TARGET_ASM_FILE_END
353 #define TARGET_ASM_FILE_END arm_file_end
355 #undef TARGET_ASM_ALIGNED_SI_OP
356 #define TARGET_ASM_ALIGNED_SI_OP NULL
357 #undef TARGET_ASM_INTEGER
358 #define TARGET_ASM_INTEGER arm_assemble_integer
360 #undef TARGET_PRINT_OPERAND
361 #define TARGET_PRINT_OPERAND arm_print_operand
362 #undef TARGET_PRINT_OPERAND_ADDRESS
363 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
364 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
365 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
370 #undef TARGET_ASM_FUNCTION_PROLOGUE
371 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
373 #undef TARGET_ASM_FUNCTION_EPILOGUE
374 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
376 #undef TARGET_OPTION_OVERRIDE
377 #define TARGET_OPTION_OVERRIDE arm_option_override
379 #undef TARGET_COMP_TYPE_ATTRIBUTES
380 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
382 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
383 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
385 #undef TARGET_SCHED_ADJUST_COST
386 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
388 #undef TARGET_SCHED_REORDER
389 #define TARGET_SCHED_REORDER arm_sched_reorder
391 #undef TARGET_REGISTER_MOVE_COST
392 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
394 #undef TARGET_MEMORY_MOVE_COST
395 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
397 #undef TARGET_ENCODE_SECTION_INFO
398 #ifdef ARM_PE
399 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
400 #else
401 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
402 #endif
404 #undef TARGET_STRIP_NAME_ENCODING
405 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
407 #undef TARGET_ASM_INTERNAL_LABEL
408 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
410 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
411 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
413 #undef TARGET_FUNCTION_VALUE
414 #define TARGET_FUNCTION_VALUE arm_function_value
416 #undef TARGET_LIBCALL_VALUE
417 #define TARGET_LIBCALL_VALUE arm_libcall_value
419 #undef TARGET_FUNCTION_VALUE_REGNO_P
420 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
422 #undef TARGET_ASM_OUTPUT_MI_THUNK
423 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
424 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
425 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
427 #undef TARGET_RTX_COSTS
428 #define TARGET_RTX_COSTS arm_rtx_costs
429 #undef TARGET_ADDRESS_COST
430 #define TARGET_ADDRESS_COST arm_address_cost
432 #undef TARGET_SHIFT_TRUNCATION_MASK
433 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
434 #undef TARGET_VECTOR_MODE_SUPPORTED_P
435 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
436 #undef TARGET_ARRAY_MODE_SUPPORTED_P
437 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
438 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
439 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
440 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
441 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
442 arm_autovectorize_vector_sizes
444 #undef TARGET_MACHINE_DEPENDENT_REORG
445 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
447 #undef TARGET_INIT_BUILTINS
448 #define TARGET_INIT_BUILTINS arm_init_builtins
449 #undef TARGET_EXPAND_BUILTIN
450 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
451 #undef TARGET_BUILTIN_DECL
452 #define TARGET_BUILTIN_DECL arm_builtin_decl
454 #undef TARGET_INIT_LIBFUNCS
455 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
457 #undef TARGET_PROMOTE_FUNCTION_MODE
458 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
459 #undef TARGET_PROMOTE_PROTOTYPES
460 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
461 #undef TARGET_PASS_BY_REFERENCE
462 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
463 #undef TARGET_ARG_PARTIAL_BYTES
464 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
465 #undef TARGET_FUNCTION_ARG
466 #define TARGET_FUNCTION_ARG arm_function_arg
467 #undef TARGET_FUNCTION_ARG_ADVANCE
468 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
469 #undef TARGET_FUNCTION_ARG_BOUNDARY
470 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
472 #undef TARGET_SETUP_INCOMING_VARARGS
473 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
475 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
476 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
478 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
479 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
480 #undef TARGET_TRAMPOLINE_INIT
481 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
482 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
483 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
485 #undef TARGET_WARN_FUNC_RETURN
486 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
488 #undef TARGET_DEFAULT_SHORT_ENUMS
489 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
491 #undef TARGET_ALIGN_ANON_BITFIELD
492 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
494 #undef TARGET_NARROW_VOLATILE_BITFIELD
495 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
497 #undef TARGET_CXX_GUARD_TYPE
498 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
500 #undef TARGET_CXX_GUARD_MASK_BIT
501 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
503 #undef TARGET_CXX_GET_COOKIE_SIZE
504 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
506 #undef TARGET_CXX_COOKIE_HAS_SIZE
507 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
509 #undef TARGET_CXX_CDTOR_RETURNS_THIS
510 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
512 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
513 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
515 #undef TARGET_CXX_USE_AEABI_ATEXIT
516 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
518 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
519 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
520 arm_cxx_determine_class_data_visibility
522 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
523 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
525 #undef TARGET_RETURN_IN_MSB
526 #define TARGET_RETURN_IN_MSB arm_return_in_msb
528 #undef TARGET_RETURN_IN_MEMORY
529 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
531 #undef TARGET_MUST_PASS_IN_STACK
532 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
534 #if ARM_UNWIND_INFO
535 #undef TARGET_ASM_UNWIND_EMIT
536 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
538 /* EABI unwinding tables use a different format for the typeinfo tables. */
539 #undef TARGET_ASM_TTYPE
540 #define TARGET_ASM_TTYPE arm_output_ttype
542 #undef TARGET_ARM_EABI_UNWINDER
543 #define TARGET_ARM_EABI_UNWINDER true
545 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
546 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
548 #undef TARGET_ASM_INIT_SECTIONS
549 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
550 #endif /* ARM_UNWIND_INFO */
552 #undef TARGET_DWARF_REGISTER_SPAN
553 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
555 #undef TARGET_CANNOT_COPY_INSN_P
556 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
558 #ifdef HAVE_AS_TLS
559 #undef TARGET_HAVE_TLS
560 #define TARGET_HAVE_TLS true
561 #endif
563 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
564 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
566 #undef TARGET_LEGITIMATE_CONSTANT_P
567 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
569 #undef TARGET_CANNOT_FORCE_CONST_MEM
570 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
572 #undef TARGET_MAX_ANCHOR_OFFSET
573 #define TARGET_MAX_ANCHOR_OFFSET 4095
575 /* The minimum is set such that the total size of the block
576 for a particular anchor is -4088 + 1 + 4095 bytes, which is
577 divisible by eight, ensuring natural spacing of anchors. */
578 #undef TARGET_MIN_ANCHOR_OFFSET
579 #define TARGET_MIN_ANCHOR_OFFSET -4088
581 #undef TARGET_SCHED_ISSUE_RATE
582 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
584 #undef TARGET_MANGLE_TYPE
585 #define TARGET_MANGLE_TYPE arm_mangle_type
587 #undef TARGET_BUILD_BUILTIN_VA_LIST
588 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
589 #undef TARGET_EXPAND_BUILTIN_VA_START
590 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
591 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
592 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
594 #ifdef HAVE_AS_TLS
595 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
596 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
597 #endif
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
602 #undef TARGET_PREFERRED_RELOAD_CLASS
603 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
605 #undef TARGET_INVALID_PARAMETER_TYPE
606 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
608 #undef TARGET_INVALID_RETURN_TYPE
609 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
611 #undef TARGET_PROMOTED_TYPE
612 #define TARGET_PROMOTED_TYPE arm_promoted_type
614 #undef TARGET_CONVERT_TO_TYPE
615 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
617 #undef TARGET_SCALAR_MODE_SUPPORTED_P
618 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
620 #undef TARGET_FRAME_POINTER_REQUIRED
621 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
623 #undef TARGET_CAN_ELIMINATE
624 #define TARGET_CAN_ELIMINATE arm_can_eliminate
626 #undef TARGET_CONDITIONAL_REGISTER_USAGE
627 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
629 #undef TARGET_CLASS_LIKELY_SPILLED_P
630 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
632 #undef TARGET_VECTORIZE_BUILTINS
633 #define TARGET_VECTORIZE_BUILTINS
635 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
636 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
637 arm_builtin_vectorized_function
639 #undef TARGET_VECTOR_ALIGNMENT
640 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
642 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
643 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
644 arm_vector_alignment_reachable
646 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
647 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
648 arm_builtin_support_vector_misalignment
650 #undef TARGET_PREFERRED_RENAME_CLASS
651 #define TARGET_PREFERRED_RENAME_CLASS \
652 arm_preferred_rename_class
654 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
655 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
656 arm_vectorize_vec_perm_const_ok
658 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
659 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
660 arm_builtin_vectorization_cost
661 #undef TARGET_VECTORIZE_ADD_STMT_COST
662 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
664 #undef TARGET_CANONICALIZE_COMPARISON
665 #define TARGET_CANONICALIZE_COMPARISON \
666 arm_canonicalize_comparison
668 #undef TARGET_ASAN_SHADOW_OFFSET
669 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
671 #undef MAX_INSN_PER_IT_BLOCK
672 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
674 #undef TARGET_CAN_USE_DOLOOP_P
675 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
677 struct gcc_target targetm = TARGET_INITIALIZER;
679 /* Obstack for minipool constant handling. */
680 static struct obstack minipool_obstack;
681 static char * minipool_startobj;
683 /* The maximum number of insns skipped which
684 will be conditionalised if possible. */
685 static int max_insns_skipped = 5;
687 extern FILE * asm_out_file;
689 /* True if we are currently building a constant table. */
690 int making_const_table;
692 /* The processor for which instructions should be scheduled. */
693 enum processor_type arm_tune = arm_none;
695 /* The current tuning set. */
696 const struct tune_params *current_tune;
698 /* Which floating point hardware to schedule for. */
699 int arm_fpu_attr;
701 /* Which floating popint hardware to use. */
702 const struct arm_fpu_desc *arm_fpu_desc;
704 /* Used for Thumb call_via trampolines. */
705 rtx thumb_call_via_label[14];
706 static int thumb_call_reg_needed;
708 /* Bit values used to identify processor capabilities. */
709 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
710 #define FL_ARCH3M (1 << 1) /* Extended multiply */
711 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
712 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
713 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
714 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
715 #define FL_THUMB (1 << 6) /* Thumb aware */
716 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
717 #define FL_STRONG (1 << 8) /* StrongARM */
718 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
719 #define FL_XSCALE (1 << 10) /* XScale */
720 /* spare (1 << 11) */
721 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
722 media instructions. */
723 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
724 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
725 Note: ARM6 & 7 derivatives only. */
726 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
727 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
728 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
729 profile. */
730 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
731 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
732 #define FL_NEON (1 << 20) /* Neon instructions. */
733 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
734 architecture. */
735 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
736 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
737 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
738 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
740 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
741 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
743 /* Flags that only effect tuning, not available instructions. */
744 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
745 | FL_CO_PROC)
747 #define FL_FOR_ARCH2 FL_NOTM
748 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
749 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
750 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
751 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
752 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
753 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
754 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
755 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
756 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
757 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
758 #define FL_FOR_ARCH6J FL_FOR_ARCH6
759 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
760 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
761 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
762 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
763 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
764 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
765 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
766 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
772 /* The bits in this mask specify which
773 instructions we are allowed to generate. */
774 static unsigned long insn_flags = 0;
776 /* The bits in this mask specify which instruction scheduling options should
777 be used. */
778 static unsigned long tune_flags = 0;
780 /* The highest ARM architecture version supported by the
781 target. */
782 enum base_architecture arm_base_arch = BASE_ARCH_0;
784 /* The following are used in the arm.md file as equivalents to bits
785 in the above two flag variables. */
787 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
788 int arm_arch3m = 0;
790 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
791 int arm_arch4 = 0;
793 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
794 int arm_arch4t = 0;
796 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
797 int arm_arch5 = 0;
799 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
800 int arm_arch5e = 0;
802 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
803 int arm_arch6 = 0;
805 /* Nonzero if this chip supports the ARM 6K extensions. */
806 int arm_arch6k = 0;
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched = 0;
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm = 0;
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt = 0;
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2 = 0;
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale = 0;
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale = 0;
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf = 0;
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9 = 0;
848 /* Nonzero if generating Thumb instructions. */
849 int thumb_code = 0;
851 /* Nonzero if generating Thumb-1 instructions. */
852 int thumb1_code = 0;
854 /* Nonzero if we should define __THUMB_INTERWORK__ in the
855 preprocessor.
856 XXX This is a bit of a hack, it's intended to help work around
857 problems in GLD which doesn't understand that armv5t code is
858 interworking clean. */
859 int arm_cpp_interwork = 0;
861 /* Nonzero if chip supports Thumb 2. */
862 int arm_arch_thumb2;
864 /* Nonzero if chip supports integer division instruction. */
865 int arm_arch_arm_hwdiv;
866 int arm_arch_thumb_hwdiv;
868 /* Nonzero if we should use Neon to handle 64-bits operations rather
869 than core registers. */
870 int prefer_neon_for_64bits = 0;
872 /* Nonzero if we shouldn't use literal pools. */
873 bool arm_disable_literal_pool = false;
875 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
876 we must report the mode of the memory reference from
877 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
878 enum machine_mode output_memory_reference_mode;
880 /* The register number to be used for the PIC offset register. */
881 unsigned arm_pic_register = INVALID_REGNUM;
883 /* Set to 1 after arm_reorg has started. Reset to start at the start of
884 the next function. */
885 static int after_arm_reorg = 0;
887 enum arm_pcs arm_pcs_default;
889 /* For an explanation of these variables, see final_prescan_insn below. */
890 int arm_ccfsm_state;
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc;
894 rtx arm_target_insn;
895 int arm_target_label;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count = 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask = 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen = 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc = 0;
907 /* The condition codes of the ARM, and the inverse function. */
908 static const char * const arm_condition_codes[] =
910 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
911 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
914 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
915 int arm_regs_in_sequence[] =
917 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
920 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
921 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
924 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
925 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 /* Initialization code. */
929 struct processors
931 const char *const name;
932 enum processor_type core;
933 const char *arch;
934 enum base_architecture base_arch;
935 const unsigned long flags;
936 const struct tune_params *const tune;
940 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
941 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
942 prefetch_slots, \
943 l1_size, \
944 l1_line_size
946 /* arm generic vectorizer costs. */
947 static const
948 struct cpu_vec_costs arm_default_vec_cost = {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
968 const struct cpu_cost_table cortexa9_extra_costs =
970 /* ALU */
972 0, /* arith. */
973 0, /* logical. */
974 0, /* shift. */
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
978 0, /* log_shift. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
984 0, /* clz. */
985 0, /* non_exec. */
986 true /* non_exec_costs_exec. */
989 /* MULT SImode */
991 COSTS_N_INSNS (3), /* simple. */
992 COSTS_N_INSNS (3), /* flag_setting. */
993 COSTS_N_INSNS (2), /* extend. */
994 COSTS_N_INSNS (3), /* add. */
995 COSTS_N_INSNS (2), /* extend_add. */
996 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
998 /* MULT DImode */
1000 0, /* simple (N/A). */
1001 0, /* flag_setting (N/A). */
1002 COSTS_N_INSNS (4), /* extend. */
1003 0, /* add (N/A). */
1004 COSTS_N_INSNS (4), /* extend_add. */
1005 0 /* idiv (N/A). */
1008 /* LD/ST */
1010 COSTS_N_INSNS (2), /* load. */
1011 COSTS_N_INSNS (2), /* load_sign_extend. */
1012 COSTS_N_INSNS (2), /* ldrd. */
1013 COSTS_N_INSNS (2), /* ldm_1st. */
1014 1, /* ldm_regs_per_insn_1st. */
1015 2, /* ldm_regs_per_insn_subsequent. */
1016 COSTS_N_INSNS (5), /* loadf. */
1017 COSTS_N_INSNS (5), /* loadd. */
1018 COSTS_N_INSNS (1), /* load_unaligned. */
1019 COSTS_N_INSNS (2), /* store. */
1020 COSTS_N_INSNS (2), /* strd. */
1021 COSTS_N_INSNS (2), /* stm_1st. */
1022 1, /* stm_regs_per_insn_1st. */
1023 2, /* stm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (1), /* storef. */
1025 COSTS_N_INSNS (1), /* stored. */
1026 COSTS_N_INSNS (1) /* store_unaligned. */
1029 /* FP SFmode */
1031 COSTS_N_INSNS (14), /* div. */
1032 COSTS_N_INSNS (4), /* mult. */
1033 COSTS_N_INSNS (7), /* mult_addsub. */
1034 COSTS_N_INSNS (30), /* fma. */
1035 COSTS_N_INSNS (3), /* addsub. */
1036 COSTS_N_INSNS (1), /* fpconst. */
1037 COSTS_N_INSNS (1), /* neg. */
1038 COSTS_N_INSNS (3), /* compare. */
1039 COSTS_N_INSNS (3), /* widen. */
1040 COSTS_N_INSNS (3), /* narrow. */
1041 COSTS_N_INSNS (3), /* toint. */
1042 COSTS_N_INSNS (3), /* fromint. */
1043 COSTS_N_INSNS (3) /* roundint. */
1045 /* FP DFmode */
1047 COSTS_N_INSNS (24), /* div. */
1048 COSTS_N_INSNS (5), /* mult. */
1049 COSTS_N_INSNS (8), /* mult_addsub. */
1050 COSTS_N_INSNS (30), /* fma. */
1051 COSTS_N_INSNS (3), /* addsub. */
1052 COSTS_N_INSNS (1), /* fpconst. */
1053 COSTS_N_INSNS (1), /* neg. */
1054 COSTS_N_INSNS (3), /* compare. */
1055 COSTS_N_INSNS (3), /* widen. */
1056 COSTS_N_INSNS (3), /* narrow. */
1057 COSTS_N_INSNS (3), /* toint. */
1058 COSTS_N_INSNS (3), /* fromint. */
1059 COSTS_N_INSNS (3) /* roundint. */
1062 /* Vector */
1064 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa7_extra_costs =
1071 /* ALU */
1073 0, /* arith. */
1074 0, /* logical. */
1075 COSTS_N_INSNS (1), /* shift. */
1076 COSTS_N_INSNS (1), /* shift_reg. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 COSTS_N_INSNS (1), /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 COSTS_N_INSNS (1), /* log_shift_reg. */
1081 COSTS_N_INSNS (1), /* extend. */
1082 COSTS_N_INSNS (1), /* extend_arith. */
1083 COSTS_N_INSNS (1), /* bfi. */
1084 COSTS_N_INSNS (1), /* bfx. */
1085 COSTS_N_INSNS (1), /* clz. */
1086 0, /* non_exec. */
1087 true /* non_exec_costs_exec. */
1091 /* MULT SImode */
1093 0, /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (7) /* idiv. */
1100 /* MULT DImode */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (1), /* extend. */
1105 0, /* add. */
1106 COSTS_N_INSNS (2), /* extend_add. */
1107 0 /* idiv (N/A). */
1110 /* LD/ST */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (3), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (2), /* loadf. */
1119 COSTS_N_INSNS (2), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (3), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (2), /* storef. */
1127 COSTS_N_INSNS (2), /* stored. */
1128 COSTS_N_INSNS (1) /* store_unaligned. */
1131 /* FP SFmode */
1133 COSTS_N_INSNS (15), /* div. */
1134 COSTS_N_INSNS (3), /* mult. */
1135 COSTS_N_INSNS (7), /* mult_addsub. */
1136 COSTS_N_INSNS (7), /* fma. */
1137 COSTS_N_INSNS (3), /* addsub. */
1138 COSTS_N_INSNS (3), /* fpconst. */
1139 COSTS_N_INSNS (3), /* neg. */
1140 COSTS_N_INSNS (3), /* compare. */
1141 COSTS_N_INSNS (3), /* widen. */
1142 COSTS_N_INSNS (3), /* narrow. */
1143 COSTS_N_INSNS (3), /* toint. */
1144 COSTS_N_INSNS (3), /* fromint. */
1145 COSTS_N_INSNS (3) /* roundint. */
1147 /* FP DFmode */
1149 COSTS_N_INSNS (30), /* div. */
1150 COSTS_N_INSNS (6), /* mult. */
1151 COSTS_N_INSNS (10), /* mult_addsub. */
1152 COSTS_N_INSNS (7), /* fma. */
1153 COSTS_N_INSNS (3), /* addsub. */
1154 COSTS_N_INSNS (3), /* fpconst. */
1155 COSTS_N_INSNS (3), /* neg. */
1156 COSTS_N_INSNS (3), /* compare. */
1157 COSTS_N_INSNS (3), /* widen. */
1158 COSTS_N_INSNS (3), /* narrow. */
1159 COSTS_N_INSNS (3), /* toint. */
1160 COSTS_N_INSNS (3), /* fromint. */
1161 COSTS_N_INSNS (3) /* roundint. */
1164 /* Vector */
1166 COSTS_N_INSNS (1) /* alu. */
1170 const struct cpu_cost_table cortexa12_extra_costs =
1172 /* ALU */
1174 0, /* arith. */
1175 0, /* logical. */
1176 0, /* shift. */
1177 COSTS_N_INSNS (1), /* shift_reg. */
1178 COSTS_N_INSNS (1), /* arith_shift. */
1179 COSTS_N_INSNS (1), /* arith_shift_reg. */
1180 COSTS_N_INSNS (1), /* log_shift. */
1181 COSTS_N_INSNS (1), /* log_shift_reg. */
1182 0, /* extend. */
1183 COSTS_N_INSNS (1), /* extend_arith. */
1184 0, /* bfi. */
1185 COSTS_N_INSNS (1), /* bfx. */
1186 COSTS_N_INSNS (1), /* clz. */
1187 0, /* non_exec. */
1188 true /* non_exec_costs_exec. */
1190 /* MULT SImode */
1193 COSTS_N_INSNS (2), /* simple. */
1194 COSTS_N_INSNS (3), /* flag_setting. */
1195 COSTS_N_INSNS (2), /* extend. */
1196 COSTS_N_INSNS (3), /* add. */
1197 COSTS_N_INSNS (2), /* extend_add. */
1198 COSTS_N_INSNS (18) /* idiv. */
1200 /* MULT DImode */
1202 0, /* simple (N/A). */
1203 0, /* flag_setting (N/A). */
1204 COSTS_N_INSNS (3), /* extend. */
1205 0, /* add (N/A). */
1206 COSTS_N_INSNS (3), /* extend_add. */
1207 0 /* idiv (N/A). */
1210 /* LD/ST */
1212 COSTS_N_INSNS (3), /* load. */
1213 COSTS_N_INSNS (3), /* load_sign_extend. */
1214 COSTS_N_INSNS (3), /* ldrd. */
1215 COSTS_N_INSNS (3), /* ldm_1st. */
1216 1, /* ldm_regs_per_insn_1st. */
1217 2, /* ldm_regs_per_insn_subsequent. */
1218 COSTS_N_INSNS (3), /* loadf. */
1219 COSTS_N_INSNS (3), /* loadd. */
1220 0, /* load_unaligned. */
1221 0, /* store. */
1222 0, /* strd. */
1223 0, /* stm_1st. */
1224 1, /* stm_regs_per_insn_1st. */
1225 2, /* stm_regs_per_insn_subsequent. */
1226 COSTS_N_INSNS (2), /* storef. */
1227 COSTS_N_INSNS (2), /* stored. */
1228 0 /* store_unaligned. */
1231 /* FP SFmode */
1233 COSTS_N_INSNS (17), /* div. */
1234 COSTS_N_INSNS (4), /* mult. */
1235 COSTS_N_INSNS (8), /* mult_addsub. */
1236 COSTS_N_INSNS (8), /* fma. */
1237 COSTS_N_INSNS (4), /* addsub. */
1238 COSTS_N_INSNS (2), /* fpconst. */
1239 COSTS_N_INSNS (2), /* neg. */
1240 COSTS_N_INSNS (2), /* compare. */
1241 COSTS_N_INSNS (4), /* widen. */
1242 COSTS_N_INSNS (4), /* narrow. */
1243 COSTS_N_INSNS (4), /* toint. */
1244 COSTS_N_INSNS (4), /* fromint. */
1245 COSTS_N_INSNS (4) /* roundint. */
1247 /* FP DFmode */
1249 COSTS_N_INSNS (31), /* div. */
1250 COSTS_N_INSNS (4), /* mult. */
1251 COSTS_N_INSNS (8), /* mult_addsub. */
1252 COSTS_N_INSNS (8), /* fma. */
1253 COSTS_N_INSNS (4), /* addsub. */
1254 COSTS_N_INSNS (2), /* fpconst. */
1255 COSTS_N_INSNS (2), /* neg. */
1256 COSTS_N_INSNS (2), /* compare. */
1257 COSTS_N_INSNS (4), /* widen. */
1258 COSTS_N_INSNS (4), /* narrow. */
1259 COSTS_N_INSNS (4), /* toint. */
1260 COSTS_N_INSNS (4), /* fromint. */
1261 COSTS_N_INSNS (4) /* roundint. */
1264 /* Vector */
1266 COSTS_N_INSNS (1) /* alu. */
1270 const struct cpu_cost_table cortexa15_extra_costs =
1272 /* ALU */
1274 0, /* arith. */
1275 0, /* logical. */
1276 0, /* shift. */
1277 0, /* shift_reg. */
1278 COSTS_N_INSNS (1), /* arith_shift. */
1279 COSTS_N_INSNS (1), /* arith_shift_reg. */
1280 COSTS_N_INSNS (1), /* log_shift. */
1281 COSTS_N_INSNS (1), /* log_shift_reg. */
1282 0, /* extend. */
1283 COSTS_N_INSNS (1), /* extend_arith. */
1284 COSTS_N_INSNS (1), /* bfi. */
1285 0, /* bfx. */
1286 0, /* clz. */
1287 0, /* non_exec. */
1288 true /* non_exec_costs_exec. */
1290 /* MULT SImode */
1293 COSTS_N_INSNS (2), /* simple. */
1294 COSTS_N_INSNS (3), /* flag_setting. */
1295 COSTS_N_INSNS (2), /* extend. */
1296 COSTS_N_INSNS (2), /* add. */
1297 COSTS_N_INSNS (2), /* extend_add. */
1298 COSTS_N_INSNS (18) /* idiv. */
1300 /* MULT DImode */
1302 0, /* simple (N/A). */
1303 0, /* flag_setting (N/A). */
1304 COSTS_N_INSNS (3), /* extend. */
1305 0, /* add (N/A). */
1306 COSTS_N_INSNS (3), /* extend_add. */
1307 0 /* idiv (N/A). */
1310 /* LD/ST */
1312 COSTS_N_INSNS (3), /* load. */
1313 COSTS_N_INSNS (3), /* load_sign_extend. */
1314 COSTS_N_INSNS (3), /* ldrd. */
1315 COSTS_N_INSNS (4), /* ldm_1st. */
1316 1, /* ldm_regs_per_insn_1st. */
1317 2, /* ldm_regs_per_insn_subsequent. */
1318 COSTS_N_INSNS (4), /* loadf. */
1319 COSTS_N_INSNS (4), /* loadd. */
1320 0, /* load_unaligned. */
1321 0, /* store. */
1322 0, /* strd. */
1323 COSTS_N_INSNS (1), /* stm_1st. */
1324 1, /* stm_regs_per_insn_1st. */
1325 2, /* stm_regs_per_insn_subsequent. */
1326 0, /* storef. */
1327 0, /* stored. */
1328 0 /* store_unaligned. */
1331 /* FP SFmode */
1333 COSTS_N_INSNS (17), /* div. */
1334 COSTS_N_INSNS (4), /* mult. */
1335 COSTS_N_INSNS (8), /* mult_addsub. */
1336 COSTS_N_INSNS (8), /* fma. */
1337 COSTS_N_INSNS (4), /* addsub. */
1338 COSTS_N_INSNS (2), /* fpconst. */
1339 COSTS_N_INSNS (2), /* neg. */
1340 COSTS_N_INSNS (5), /* compare. */
1341 COSTS_N_INSNS (4), /* widen. */
1342 COSTS_N_INSNS (4), /* narrow. */
1343 COSTS_N_INSNS (4), /* toint. */
1344 COSTS_N_INSNS (4), /* fromint. */
1345 COSTS_N_INSNS (4) /* roundint. */
1347 /* FP DFmode */
1349 COSTS_N_INSNS (31), /* div. */
1350 COSTS_N_INSNS (4), /* mult. */
1351 COSTS_N_INSNS (8), /* mult_addsub. */
1352 COSTS_N_INSNS (8), /* fma. */
1353 COSTS_N_INSNS (4), /* addsub. */
1354 COSTS_N_INSNS (2), /* fpconst. */
1355 COSTS_N_INSNS (2), /* neg. */
1356 COSTS_N_INSNS (2), /* compare. */
1357 COSTS_N_INSNS (4), /* widen. */
1358 COSTS_N_INSNS (4), /* narrow. */
1359 COSTS_N_INSNS (4), /* toint. */
1360 COSTS_N_INSNS (4), /* fromint. */
1361 COSTS_N_INSNS (4) /* roundint. */
1364 /* Vector */
1366 COSTS_N_INSNS (1) /* alu. */
1370 const struct cpu_cost_table v7m_extra_costs =
1372 /* ALU */
1374 0, /* arith. */
1375 0, /* logical. */
1376 0, /* shift. */
1377 0, /* shift_reg. */
1378 0, /* arith_shift. */
1379 COSTS_N_INSNS (1), /* arith_shift_reg. */
1380 0, /* log_shift. */
1381 COSTS_N_INSNS (1), /* log_shift_reg. */
1382 0, /* extend. */
1383 COSTS_N_INSNS (1), /* extend_arith. */
1384 0, /* bfi. */
1385 0, /* bfx. */
1386 0, /* clz. */
1387 COSTS_N_INSNS (1), /* non_exec. */
1388 false /* non_exec_costs_exec. */
1391 /* MULT SImode */
1393 COSTS_N_INSNS (1), /* simple. */
1394 COSTS_N_INSNS (1), /* flag_setting. */
1395 COSTS_N_INSNS (2), /* extend. */
1396 COSTS_N_INSNS (1), /* add. */
1397 COSTS_N_INSNS (3), /* extend_add. */
1398 COSTS_N_INSNS (8) /* idiv. */
1400 /* MULT DImode */
1402 0, /* simple (N/A). */
1403 0, /* flag_setting (N/A). */
1404 COSTS_N_INSNS (2), /* extend. */
1405 0, /* add (N/A). */
1406 COSTS_N_INSNS (3), /* extend_add. */
1407 0 /* idiv (N/A). */
1410 /* LD/ST */
1412 COSTS_N_INSNS (2), /* load. */
1413 0, /* load_sign_extend. */
1414 COSTS_N_INSNS (3), /* ldrd. */
1415 COSTS_N_INSNS (2), /* ldm_1st. */
1416 1, /* ldm_regs_per_insn_1st. */
1417 1, /* ldm_regs_per_insn_subsequent. */
1418 COSTS_N_INSNS (2), /* loadf. */
1419 COSTS_N_INSNS (3), /* loadd. */
1420 COSTS_N_INSNS (1), /* load_unaligned. */
1421 COSTS_N_INSNS (2), /* store. */
1422 COSTS_N_INSNS (3), /* strd. */
1423 COSTS_N_INSNS (2), /* stm_1st. */
1424 1, /* stm_regs_per_insn_1st. */
1425 1, /* stm_regs_per_insn_subsequent. */
1426 COSTS_N_INSNS (2), /* storef. */
1427 COSTS_N_INSNS (3), /* stored. */
1428 COSTS_N_INSNS (1) /* store_unaligned. */
1431 /* FP SFmode */
1433 COSTS_N_INSNS (7), /* div. */
1434 COSTS_N_INSNS (2), /* mult. */
1435 COSTS_N_INSNS (5), /* mult_addsub. */
1436 COSTS_N_INSNS (3), /* fma. */
1437 COSTS_N_INSNS (1), /* addsub. */
1438 0, /* fpconst. */
1439 0, /* neg. */
1440 0, /* compare. */
1441 0, /* widen. */
1442 0, /* narrow. */
1443 0, /* toint. */
1444 0, /* fromint. */
1445 0 /* roundint. */
1447 /* FP DFmode */
1449 COSTS_N_INSNS (15), /* div. */
1450 COSTS_N_INSNS (5), /* mult. */
1451 COSTS_N_INSNS (7), /* mult_addsub. */
1452 COSTS_N_INSNS (7), /* fma. */
1453 COSTS_N_INSNS (3), /* addsub. */
1454 0, /* fpconst. */
1455 0, /* neg. */
1456 0, /* compare. */
1457 0, /* widen. */
1458 0, /* narrow. */
1459 0, /* toint. */
1460 0, /* fromint. */
1461 0 /* roundint. */
1464 /* Vector */
1466 COSTS_N_INSNS (1) /* alu. */
1470 const struct tune_params arm_slowmul_tune =
1472 arm_slowmul_rtx_costs,
1473 NULL,
1474 NULL, /* Sched adj cost. */
1475 3, /* Constant limit. */
1476 5, /* Max cond insns. */
1477 ARM_PREFETCH_NOT_BENEFICIAL,
1478 true, /* Prefer constant pool. */
1479 arm_default_branch_cost,
1480 false, /* Prefer LDRD/STRD. */
1481 {true, true}, /* Prefer non short circuit. */
1482 &arm_default_vec_cost, /* Vectorizer costs. */
1483 false /* Prefer Neon for 64-bits bitops. */
1486 const struct tune_params arm_fastmul_tune =
1488 arm_fastmul_rtx_costs,
1489 NULL,
1490 NULL, /* Sched adj cost. */
1491 1, /* Constant limit. */
1492 5, /* Max cond insns. */
1493 ARM_PREFETCH_NOT_BENEFICIAL,
1494 true, /* Prefer constant pool. */
1495 arm_default_branch_cost,
1496 false, /* Prefer LDRD/STRD. */
1497 {true, true}, /* Prefer non short circuit. */
1498 &arm_default_vec_cost, /* Vectorizer costs. */
1499 false /* Prefer Neon for 64-bits bitops. */
1502 /* StrongARM has early execution of branches, so a sequence that is worth
1503 skipping is shorter. Set max_insns_skipped to a lower value. */
1505 const struct tune_params arm_strongarm_tune =
1507 arm_fastmul_rtx_costs,
1508 NULL,
1509 NULL, /* Sched adj cost. */
1510 1, /* Constant limit. */
1511 3, /* Max cond insns. */
1512 ARM_PREFETCH_NOT_BENEFICIAL,
1513 true, /* Prefer constant pool. */
1514 arm_default_branch_cost,
1515 false, /* Prefer LDRD/STRD. */
1516 {true, true}, /* Prefer non short circuit. */
1517 &arm_default_vec_cost, /* Vectorizer costs. */
1518 false /* Prefer Neon for 64-bits bitops. */
1521 const struct tune_params arm_xscale_tune =
1523 arm_xscale_rtx_costs,
1524 NULL,
1525 xscale_sched_adjust_cost,
1526 2, /* Constant limit. */
1527 3, /* Max cond insns. */
1528 ARM_PREFETCH_NOT_BENEFICIAL,
1529 true, /* Prefer constant pool. */
1530 arm_default_branch_cost,
1531 false, /* Prefer LDRD/STRD. */
1532 {true, true}, /* Prefer non short circuit. */
1533 &arm_default_vec_cost, /* Vectorizer costs. */
1534 false /* Prefer Neon for 64-bits bitops. */
1537 const struct tune_params arm_9e_tune =
1539 arm_9e_rtx_costs,
1540 NULL,
1541 NULL, /* Sched adj cost. */
1542 1, /* Constant limit. */
1543 5, /* Max cond insns. */
1544 ARM_PREFETCH_NOT_BENEFICIAL,
1545 true, /* Prefer constant pool. */
1546 arm_default_branch_cost,
1547 false, /* Prefer LDRD/STRD. */
1548 {true, true}, /* Prefer non short circuit. */
1549 &arm_default_vec_cost, /* Vectorizer costs. */
1550 false /* Prefer Neon for 64-bits bitops. */
1553 const struct tune_params arm_v6t2_tune =
1555 arm_9e_rtx_costs,
1556 NULL,
1557 NULL, /* Sched adj cost. */
1558 1, /* Constant limit. */
1559 5, /* Max cond insns. */
1560 ARM_PREFETCH_NOT_BENEFICIAL,
1561 false, /* Prefer constant pool. */
1562 arm_default_branch_cost,
1563 false, /* Prefer LDRD/STRD. */
1564 {true, true}, /* Prefer non short circuit. */
1565 &arm_default_vec_cost, /* Vectorizer costs. */
1566 false /* Prefer Neon for 64-bits bitops. */
1569 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1570 const struct tune_params arm_cortex_tune =
1572 arm_9e_rtx_costs,
1573 &generic_extra_costs,
1574 NULL, /* Sched adj cost. */
1575 1, /* Constant limit. */
1576 5, /* Max cond insns. */
1577 ARM_PREFETCH_NOT_BENEFICIAL,
1578 false, /* Prefer constant pool. */
1579 arm_default_branch_cost,
1580 false, /* Prefer LDRD/STRD. */
1581 {true, true}, /* Prefer non short circuit. */
1582 &arm_default_vec_cost, /* Vectorizer costs. */
1583 false /* Prefer Neon for 64-bits bitops. */
1586 const struct tune_params arm_cortex_a7_tune =
1588 arm_9e_rtx_costs,
1589 &cortexa7_extra_costs,
1590 NULL,
1591 1, /* Constant limit. */
1592 5, /* Max cond insns. */
1593 ARM_PREFETCH_NOT_BENEFICIAL,
1594 false, /* Prefer constant pool. */
1595 arm_default_branch_cost,
1596 false, /* Prefer LDRD/STRD. */
1597 {true, true}, /* Prefer non short circuit. */
1598 &arm_default_vec_cost, /* Vectorizer costs. */
1599 false /* Prefer Neon for 64-bits bitops. */
1602 const struct tune_params arm_cortex_a15_tune =
1604 arm_9e_rtx_costs,
1605 &cortexa15_extra_costs,
1606 NULL, /* Sched adj cost. */
1607 1, /* Constant limit. */
1608 2, /* Max cond insns. */
1609 ARM_PREFETCH_NOT_BENEFICIAL,
1610 false, /* Prefer constant pool. */
1611 arm_default_branch_cost,
1612 true, /* Prefer LDRD/STRD. */
1613 {true, true}, /* Prefer non short circuit. */
1614 &arm_default_vec_cost, /* Vectorizer costs. */
1615 false /* Prefer Neon for 64-bits bitops. */
1618 const struct tune_params arm_cortex_a53_tune =
1620 arm_9e_rtx_costs,
1621 &cortexa53_extra_costs,
1622 NULL, /* Scheduler cost adjustment. */
1623 1, /* Constant limit. */
1624 5, /* Max cond insns. */
1625 ARM_PREFETCH_NOT_BENEFICIAL,
1626 false, /* Prefer constant pool. */
1627 arm_default_branch_cost,
1628 false, /* Prefer LDRD/STRD. */
1629 {true, true}, /* Prefer non short circuit. */
1630 &arm_default_vec_cost, /* Vectorizer costs. */
1631 false /* Prefer Neon for 64-bits bitops. */
1634 const struct tune_params arm_cortex_a57_tune =
1636 arm_9e_rtx_costs,
1637 &cortexa57_extra_costs,
1638 NULL, /* Scheduler cost adjustment. */
1639 1, /* Constant limit. */
1640 2, /* Max cond insns. */
1641 ARM_PREFETCH_NOT_BENEFICIAL,
1642 false, /* Prefer constant pool. */
1643 arm_default_branch_cost,
1644 true, /* Prefer LDRD/STRD. */
1645 {true, true}, /* Prefer non short circuit. */
1646 &arm_default_vec_cost, /* Vectorizer costs. */
1647 false /* Prefer Neon for 64-bits bitops. */
1650 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1651 less appealing. Set max_insns_skipped to a low value. */
1653 const struct tune_params arm_cortex_a5_tune =
1655 arm_9e_rtx_costs,
1656 NULL,
1657 NULL, /* Sched adj cost. */
1658 1, /* Constant limit. */
1659 1, /* Max cond insns. */
1660 ARM_PREFETCH_NOT_BENEFICIAL,
1661 false, /* Prefer constant pool. */
1662 arm_cortex_a5_branch_cost,
1663 false, /* Prefer LDRD/STRD. */
1664 {false, false}, /* Prefer non short circuit. */
1665 &arm_default_vec_cost, /* Vectorizer costs. */
1666 false /* Prefer Neon for 64-bits bitops. */
1669 const struct tune_params arm_cortex_a9_tune =
1671 arm_9e_rtx_costs,
1672 &cortexa9_extra_costs,
1673 cortex_a9_sched_adjust_cost,
1674 1, /* Constant limit. */
1675 5, /* Max cond insns. */
1676 ARM_PREFETCH_BENEFICIAL(4,32,32),
1677 false, /* Prefer constant pool. */
1678 arm_default_branch_cost,
1679 false, /* Prefer LDRD/STRD. */
1680 {true, true}, /* Prefer non short circuit. */
1681 &arm_default_vec_cost, /* Vectorizer costs. */
1682 false /* Prefer Neon for 64-bits bitops. */
1685 const struct tune_params arm_cortex_a12_tune =
1687 arm_9e_rtx_costs,
1688 &cortexa12_extra_costs,
1689 NULL,
1690 1, /* Constant limit. */
1691 5, /* Max cond insns. */
1692 ARM_PREFETCH_BENEFICIAL(4,32,32),
1693 false, /* Prefer constant pool. */
1694 arm_default_branch_cost,
1695 true, /* Prefer LDRD/STRD. */
1696 {true, true}, /* Prefer non short circuit. */
1697 &arm_default_vec_cost, /* Vectorizer costs. */
1698 false /* Prefer Neon for 64-bits bitops. */
1701 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1702 cycle to execute each. An LDR from the constant pool also takes two cycles
1703 to execute, but mildly increases pipelining opportunity (consecutive
1704 loads/stores can be pipelined together, saving one cycle), and may also
1705 improve icache utilisation. Hence we prefer the constant pool for such
1706 processors. */
1708 const struct tune_params arm_v7m_tune =
1710 arm_9e_rtx_costs,
1711 &v7m_extra_costs,
1712 NULL, /* Sched adj cost. */
1713 1, /* Constant limit. */
1714 2, /* Max cond insns. */
1715 ARM_PREFETCH_NOT_BENEFICIAL,
1716 true, /* Prefer constant pool. */
1717 arm_cortex_m_branch_cost,
1718 false, /* Prefer LDRD/STRD. */
1719 {false, false}, /* Prefer non short circuit. */
1720 &arm_default_vec_cost, /* Vectorizer costs. */
1721 false /* Prefer Neon for 64-bits bitops. */
1724 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1725 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1726 const struct tune_params arm_v6m_tune =
1728 arm_9e_rtx_costs,
1729 NULL,
1730 NULL, /* Sched adj cost. */
1731 1, /* Constant limit. */
1732 5, /* Max cond insns. */
1733 ARM_PREFETCH_NOT_BENEFICIAL,
1734 false, /* Prefer constant pool. */
1735 arm_default_branch_cost,
1736 false, /* Prefer LDRD/STRD. */
1737 {false, false}, /* Prefer non short circuit. */
1738 &arm_default_vec_cost, /* Vectorizer costs. */
1739 false /* Prefer Neon for 64-bits bitops. */
1742 const struct tune_params arm_fa726te_tune =
1744 arm_9e_rtx_costs,
1745 NULL,
1746 fa726te_sched_adjust_cost,
1747 1, /* Constant limit. */
1748 5, /* Max cond insns. */
1749 ARM_PREFETCH_NOT_BENEFICIAL,
1750 true, /* Prefer constant pool. */
1751 arm_default_branch_cost,
1752 false, /* Prefer LDRD/STRD. */
1753 {true, true}, /* Prefer non short circuit. */
1754 &arm_default_vec_cost, /* Vectorizer costs. */
1755 false /* Prefer Neon for 64-bits bitops. */
1759 /* Not all of these give usefully different compilation alternatives,
1760 but there is no simple way of generalizing them. */
1761 static const struct processors all_cores[] =
1763 /* ARM Cores */
1764 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1765 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1766 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1767 #include "arm-cores.def"
1768 #undef ARM_CORE
1769 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1772 static const struct processors all_architectures[] =
1774 /* ARM Architectures */
1775 /* We don't specify tuning costs here as it will be figured out
1776 from the core. */
1778 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1779 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1780 #include "arm-arches.def"
1781 #undef ARM_ARCH
1782 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1786 /* These are populated as commandline arguments are processed, or NULL
1787 if not specified. */
1788 static const struct processors *arm_selected_arch;
1789 static const struct processors *arm_selected_cpu;
1790 static const struct processors *arm_selected_tune;
1792 /* The name of the preprocessor macro to define for this architecture. */
1794 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1796 /* Available values for -mfpu=. */
1798 static const struct arm_fpu_desc all_fpus[] =
1800 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1801 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1802 #include "arm-fpus.def"
1803 #undef ARM_FPU
1807 /* Supported TLS relocations. */
1809 enum tls_reloc {
1810 TLS_GD32,
1811 TLS_LDM32,
1812 TLS_LDO32,
1813 TLS_IE32,
1814 TLS_LE32,
1815 TLS_DESCSEQ /* GNU scheme */
1818 /* The maximum number of insns to be used when loading a constant. */
1819 inline static int
1820 arm_constant_limit (bool size_p)
1822 return size_p ? 1 : current_tune->constant_limit;
1825 /* Emit an insn that's a simple single-set. Both the operands must be known
1826 to be valid. */
1827 inline static rtx
1828 emit_set_insn (rtx x, rtx y)
1830 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1833 /* Return the number of bits set in VALUE. */
1834 static unsigned
1835 bit_count (unsigned long value)
1837 unsigned long count = 0;
1839 while (value)
1841 count++;
1842 value &= value - 1; /* Clear the least-significant set bit. */
1845 return count;
1848 typedef struct
1850 enum machine_mode mode;
1851 const char *name;
1852 } arm_fixed_mode_set;
1854 /* A small helper for setting fixed-point library libfuncs. */
1856 static void
1857 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1858 const char *funcname, const char *modename,
1859 int num_suffix)
1861 char buffer[50];
1863 if (num_suffix == 0)
1864 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1865 else
1866 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1868 set_optab_libfunc (optable, mode, buffer);
1871 static void
1872 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1873 enum machine_mode from, const char *funcname,
1874 const char *toname, const char *fromname)
1876 char buffer[50];
1877 const char *maybe_suffix_2 = "";
1879 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1880 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1881 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1882 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1883 maybe_suffix_2 = "2";
1885 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1886 maybe_suffix_2);
1888 set_conv_libfunc (optable, to, from, buffer);
1891 /* Set up library functions unique to ARM. */
1893 static void
1894 arm_init_libfuncs (void)
1896 /* For Linux, we have access to kernel support for atomic operations. */
1897 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1898 init_sync_libfuncs (2 * UNITS_PER_WORD);
1900 /* There are no special library functions unless we are using the
1901 ARM BPABI. */
1902 if (!TARGET_BPABI)
1903 return;
1905 /* The functions below are described in Section 4 of the "Run-Time
1906 ABI for the ARM architecture", Version 1.0. */
1908 /* Double-precision floating-point arithmetic. Table 2. */
1909 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1910 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1911 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1912 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1913 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1915 /* Double-precision comparisons. Table 3. */
1916 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1917 set_optab_libfunc (ne_optab, DFmode, NULL);
1918 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1919 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1920 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1921 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1922 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1924 /* Single-precision floating-point arithmetic. Table 4. */
1925 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1926 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1927 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1928 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1929 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1931 /* Single-precision comparisons. Table 5. */
1932 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1933 set_optab_libfunc (ne_optab, SFmode, NULL);
1934 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1935 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1936 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1937 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1938 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1940 /* Floating-point to integer conversions. Table 6. */
1941 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1942 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1943 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1944 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1945 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1946 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1947 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1948 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1950 /* Conversions between floating types. Table 7. */
1951 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1952 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1954 /* Integer to floating-point conversions. Table 8. */
1955 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1956 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1957 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1958 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1959 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1960 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1961 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1962 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1964 /* Long long. Table 9. */
1965 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1966 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1967 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1968 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1969 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1970 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1971 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1972 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1974 /* Integer (32/32->32) division. \S 4.3.1. */
1975 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1976 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1978 /* The divmod functions are designed so that they can be used for
1979 plain division, even though they return both the quotient and the
1980 remainder. The quotient is returned in the usual location (i.e.,
1981 r0 for SImode, {r0, r1} for DImode), just as would be expected
1982 for an ordinary division routine. Because the AAPCS calling
1983 conventions specify that all of { r0, r1, r2, r3 } are
1984 callee-saved registers, there is no need to tell the compiler
1985 explicitly that those registers are clobbered by these
1986 routines. */
1987 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1988 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1990 /* For SImode division the ABI provides div-without-mod routines,
1991 which are faster. */
1992 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1993 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1995 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1996 divmod libcalls instead. */
1997 set_optab_libfunc (smod_optab, DImode, NULL);
1998 set_optab_libfunc (umod_optab, DImode, NULL);
1999 set_optab_libfunc (smod_optab, SImode, NULL);
2000 set_optab_libfunc (umod_optab, SImode, NULL);
2002 /* Half-precision float operations. The compiler handles all operations
2003 with NULL libfuncs by converting the SFmode. */
2004 switch (arm_fp16_format)
2006 case ARM_FP16_FORMAT_IEEE:
2007 case ARM_FP16_FORMAT_ALTERNATIVE:
2009 /* Conversions. */
2010 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2011 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2012 ? "__gnu_f2h_ieee"
2013 : "__gnu_f2h_alternative"));
2014 set_conv_libfunc (sext_optab, SFmode, HFmode,
2015 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2016 ? "__gnu_h2f_ieee"
2017 : "__gnu_h2f_alternative"));
2019 /* Arithmetic. */
2020 set_optab_libfunc (add_optab, HFmode, NULL);
2021 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2022 set_optab_libfunc (smul_optab, HFmode, NULL);
2023 set_optab_libfunc (neg_optab, HFmode, NULL);
2024 set_optab_libfunc (sub_optab, HFmode, NULL);
2026 /* Comparisons. */
2027 set_optab_libfunc (eq_optab, HFmode, NULL);
2028 set_optab_libfunc (ne_optab, HFmode, NULL);
2029 set_optab_libfunc (lt_optab, HFmode, NULL);
2030 set_optab_libfunc (le_optab, HFmode, NULL);
2031 set_optab_libfunc (ge_optab, HFmode, NULL);
2032 set_optab_libfunc (gt_optab, HFmode, NULL);
2033 set_optab_libfunc (unord_optab, HFmode, NULL);
2034 break;
2036 default:
2037 break;
2040 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2042 const arm_fixed_mode_set fixed_arith_modes[] =
2044 { QQmode, "qq" },
2045 { UQQmode, "uqq" },
2046 { HQmode, "hq" },
2047 { UHQmode, "uhq" },
2048 { SQmode, "sq" },
2049 { USQmode, "usq" },
2050 { DQmode, "dq" },
2051 { UDQmode, "udq" },
2052 { TQmode, "tq" },
2053 { UTQmode, "utq" },
2054 { HAmode, "ha" },
2055 { UHAmode, "uha" },
2056 { SAmode, "sa" },
2057 { USAmode, "usa" },
2058 { DAmode, "da" },
2059 { UDAmode, "uda" },
2060 { TAmode, "ta" },
2061 { UTAmode, "uta" }
2063 const arm_fixed_mode_set fixed_conv_modes[] =
2065 { QQmode, "qq" },
2066 { UQQmode, "uqq" },
2067 { HQmode, "hq" },
2068 { UHQmode, "uhq" },
2069 { SQmode, "sq" },
2070 { USQmode, "usq" },
2071 { DQmode, "dq" },
2072 { UDQmode, "udq" },
2073 { TQmode, "tq" },
2074 { UTQmode, "utq" },
2075 { HAmode, "ha" },
2076 { UHAmode, "uha" },
2077 { SAmode, "sa" },
2078 { USAmode, "usa" },
2079 { DAmode, "da" },
2080 { UDAmode, "uda" },
2081 { TAmode, "ta" },
2082 { UTAmode, "uta" },
2083 { QImode, "qi" },
2084 { HImode, "hi" },
2085 { SImode, "si" },
2086 { DImode, "di" },
2087 { TImode, "ti" },
2088 { SFmode, "sf" },
2089 { DFmode, "df" }
2091 unsigned int i, j;
2093 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2095 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2096 "add", fixed_arith_modes[i].name, 3);
2097 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2098 "ssadd", fixed_arith_modes[i].name, 3);
2099 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2100 "usadd", fixed_arith_modes[i].name, 3);
2101 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2102 "sub", fixed_arith_modes[i].name, 3);
2103 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2104 "sssub", fixed_arith_modes[i].name, 3);
2105 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2106 "ussub", fixed_arith_modes[i].name, 3);
2107 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2108 "mul", fixed_arith_modes[i].name, 3);
2109 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2110 "ssmul", fixed_arith_modes[i].name, 3);
2111 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2112 "usmul", fixed_arith_modes[i].name, 3);
2113 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2114 "div", fixed_arith_modes[i].name, 3);
2115 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2116 "udiv", fixed_arith_modes[i].name, 3);
2117 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2118 "ssdiv", fixed_arith_modes[i].name, 3);
2119 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2120 "usdiv", fixed_arith_modes[i].name, 3);
2121 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2122 "neg", fixed_arith_modes[i].name, 2);
2123 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2124 "ssneg", fixed_arith_modes[i].name, 2);
2125 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2126 "usneg", fixed_arith_modes[i].name, 2);
2127 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2128 "ashl", fixed_arith_modes[i].name, 3);
2129 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2130 "ashr", fixed_arith_modes[i].name, 3);
2131 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2132 "lshr", fixed_arith_modes[i].name, 3);
2133 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2134 "ssashl", fixed_arith_modes[i].name, 3);
2135 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2136 "usashl", fixed_arith_modes[i].name, 3);
2137 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2138 "cmp", fixed_arith_modes[i].name, 2);
2141 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2142 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2144 if (i == j
2145 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2146 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2147 continue;
2149 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2150 fixed_conv_modes[j].mode, "fract",
2151 fixed_conv_modes[i].name,
2152 fixed_conv_modes[j].name);
2153 arm_set_fixed_conv_libfunc (satfract_optab,
2154 fixed_conv_modes[i].mode,
2155 fixed_conv_modes[j].mode, "satfract",
2156 fixed_conv_modes[i].name,
2157 fixed_conv_modes[j].name);
2158 arm_set_fixed_conv_libfunc (fractuns_optab,
2159 fixed_conv_modes[i].mode,
2160 fixed_conv_modes[j].mode, "fractuns",
2161 fixed_conv_modes[i].name,
2162 fixed_conv_modes[j].name);
2163 arm_set_fixed_conv_libfunc (satfractuns_optab,
2164 fixed_conv_modes[i].mode,
2165 fixed_conv_modes[j].mode, "satfractuns",
2166 fixed_conv_modes[i].name,
2167 fixed_conv_modes[j].name);
2171 if (TARGET_AAPCS_BASED)
2172 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2175 /* On AAPCS systems, this is the "struct __va_list". */
2176 static GTY(()) tree va_list_type;
2178 /* Return the type to use as __builtin_va_list. */
2179 static tree
2180 arm_build_builtin_va_list (void)
2182 tree va_list_name;
2183 tree ap_field;
2185 if (!TARGET_AAPCS_BASED)
2186 return std_build_builtin_va_list ();
2188 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2189 defined as:
2191 struct __va_list
2193 void *__ap;
2196 The C Library ABI further reinforces this definition in \S
2197 4.1.
2199 We must follow this definition exactly. The structure tag
2200 name is visible in C++ mangled names, and thus forms a part
2201 of the ABI. The field name may be used by people who
2202 #include <stdarg.h>. */
2203 /* Create the type. */
2204 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2205 /* Give it the required name. */
2206 va_list_name = build_decl (BUILTINS_LOCATION,
2207 TYPE_DECL,
2208 get_identifier ("__va_list"),
2209 va_list_type);
2210 DECL_ARTIFICIAL (va_list_name) = 1;
2211 TYPE_NAME (va_list_type) = va_list_name;
2212 TYPE_STUB_DECL (va_list_type) = va_list_name;
2213 /* Create the __ap field. */
2214 ap_field = build_decl (BUILTINS_LOCATION,
2215 FIELD_DECL,
2216 get_identifier ("__ap"),
2217 ptr_type_node);
2218 DECL_ARTIFICIAL (ap_field) = 1;
2219 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2220 TYPE_FIELDS (va_list_type) = ap_field;
2221 /* Compute its layout. */
2222 layout_type (va_list_type);
2224 return va_list_type;
2227 /* Return an expression of type "void *" pointing to the next
2228 available argument in a variable-argument list. VALIST is the
2229 user-level va_list object, of type __builtin_va_list. */
2230 static tree
2231 arm_extract_valist_ptr (tree valist)
2233 if (TREE_TYPE (valist) == error_mark_node)
2234 return error_mark_node;
2236 /* On an AAPCS target, the pointer is stored within "struct
2237 va_list". */
2238 if (TARGET_AAPCS_BASED)
2240 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2241 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2242 valist, ap_field, NULL_TREE);
2245 return valist;
2248 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2249 static void
2250 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2252 valist = arm_extract_valist_ptr (valist);
2253 std_expand_builtin_va_start (valist, nextarg);
2256 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2257 static tree
2258 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2259 gimple_seq *post_p)
2261 valist = arm_extract_valist_ptr (valist);
2262 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2265 /* Fix up any incompatible options that the user has specified. */
2266 static void
2267 arm_option_override (void)
2269 if (global_options_set.x_arm_arch_option)
2270 arm_selected_arch = &all_architectures[arm_arch_option];
2272 if (global_options_set.x_arm_cpu_option)
2274 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2275 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2278 if (global_options_set.x_arm_tune_option)
2279 arm_selected_tune = &all_cores[(int) arm_tune_option];
2281 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2282 SUBTARGET_OVERRIDE_OPTIONS;
2283 #endif
2285 if (arm_selected_arch)
2287 if (arm_selected_cpu)
2289 /* Check for conflict between mcpu and march. */
2290 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2292 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2293 arm_selected_cpu->name, arm_selected_arch->name);
2294 /* -march wins for code generation.
2295 -mcpu wins for default tuning. */
2296 if (!arm_selected_tune)
2297 arm_selected_tune = arm_selected_cpu;
2299 arm_selected_cpu = arm_selected_arch;
2301 else
2302 /* -mcpu wins. */
2303 arm_selected_arch = NULL;
2305 else
2306 /* Pick a CPU based on the architecture. */
2307 arm_selected_cpu = arm_selected_arch;
2310 /* If the user did not specify a processor, choose one for them. */
2311 if (!arm_selected_cpu)
2313 const struct processors * sel;
2314 unsigned int sought;
2316 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2317 if (!arm_selected_cpu->name)
2319 #ifdef SUBTARGET_CPU_DEFAULT
2320 /* Use the subtarget default CPU if none was specified by
2321 configure. */
2322 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2323 #endif
2324 /* Default to ARM6. */
2325 if (!arm_selected_cpu->name)
2326 arm_selected_cpu = &all_cores[arm6];
2329 sel = arm_selected_cpu;
2330 insn_flags = sel->flags;
2332 /* Now check to see if the user has specified some command line
2333 switch that require certain abilities from the cpu. */
2334 sought = 0;
2336 if (TARGET_INTERWORK || TARGET_THUMB)
2338 sought |= (FL_THUMB | FL_MODE32);
2340 /* There are no ARM processors that support both APCS-26 and
2341 interworking. Therefore we force FL_MODE26 to be removed
2342 from insn_flags here (if it was set), so that the search
2343 below will always be able to find a compatible processor. */
2344 insn_flags &= ~FL_MODE26;
2347 if (sought != 0 && ((sought & insn_flags) != sought))
2349 /* Try to locate a CPU type that supports all of the abilities
2350 of the default CPU, plus the extra abilities requested by
2351 the user. */
2352 for (sel = all_cores; sel->name != NULL; sel++)
2353 if ((sel->flags & sought) == (sought | insn_flags))
2354 break;
2356 if (sel->name == NULL)
2358 unsigned current_bit_count = 0;
2359 const struct processors * best_fit = NULL;
2361 /* Ideally we would like to issue an error message here
2362 saying that it was not possible to find a CPU compatible
2363 with the default CPU, but which also supports the command
2364 line options specified by the programmer, and so they
2365 ought to use the -mcpu=<name> command line option to
2366 override the default CPU type.
2368 If we cannot find a cpu that has both the
2369 characteristics of the default cpu and the given
2370 command line options we scan the array again looking
2371 for a best match. */
2372 for (sel = all_cores; sel->name != NULL; sel++)
2373 if ((sel->flags & sought) == sought)
2375 unsigned count;
2377 count = bit_count (sel->flags & insn_flags);
2379 if (count >= current_bit_count)
2381 best_fit = sel;
2382 current_bit_count = count;
2386 gcc_assert (best_fit);
2387 sel = best_fit;
2390 arm_selected_cpu = sel;
2394 gcc_assert (arm_selected_cpu);
2395 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2396 if (!arm_selected_tune)
2397 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2399 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2400 insn_flags = arm_selected_cpu->flags;
2401 arm_base_arch = arm_selected_cpu->base_arch;
2403 arm_tune = arm_selected_tune->core;
2404 tune_flags = arm_selected_tune->flags;
2405 current_tune = arm_selected_tune->tune;
2407 /* Make sure that the processor choice does not conflict with any of the
2408 other command line choices. */
2409 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2410 error ("target CPU does not support ARM mode");
2412 /* BPABI targets use linker tricks to allow interworking on cores
2413 without thumb support. */
2414 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2416 warning (0, "target CPU does not support interworking" );
2417 target_flags &= ~MASK_INTERWORK;
2420 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2422 warning (0, "target CPU does not support THUMB instructions");
2423 target_flags &= ~MASK_THUMB;
2426 if (TARGET_APCS_FRAME && TARGET_THUMB)
2428 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2429 target_flags &= ~MASK_APCS_FRAME;
2432 /* Callee super interworking implies thumb interworking. Adding
2433 this to the flags here simplifies the logic elsewhere. */
2434 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2435 target_flags |= MASK_INTERWORK;
2437 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2438 from here where no function is being compiled currently. */
2439 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2440 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2442 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2443 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2445 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2447 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2448 target_flags |= MASK_APCS_FRAME;
2451 if (TARGET_POKE_FUNCTION_NAME)
2452 target_flags |= MASK_APCS_FRAME;
2454 if (TARGET_APCS_REENT && flag_pic)
2455 error ("-fpic and -mapcs-reent are incompatible");
2457 if (TARGET_APCS_REENT)
2458 warning (0, "APCS reentrant code not supported. Ignored");
2460 /* If this target is normally configured to use APCS frames, warn if they
2461 are turned off and debugging is turned on. */
2462 if (TARGET_ARM
2463 && write_symbols != NO_DEBUG
2464 && !TARGET_APCS_FRAME
2465 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2466 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2468 if (TARGET_APCS_FLOAT)
2469 warning (0, "passing floating point arguments in fp regs not yet supported");
2471 if (TARGET_LITTLE_WORDS)
2472 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2473 "will be removed in a future release");
2475 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2476 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2477 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2478 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2479 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2480 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2481 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2482 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2483 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2484 arm_arch6m = arm_arch6 && !arm_arch_notm;
2485 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2486 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2487 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2488 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2489 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2491 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2492 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2493 thumb_code = TARGET_ARM == 0;
2494 thumb1_code = TARGET_THUMB1 != 0;
2495 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2496 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2497 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2498 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2499 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2500 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2501 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2502 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2503 if (arm_restrict_it == 2)
2504 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2506 if (!TARGET_THUMB2)
2507 arm_restrict_it = 0;
2509 /* If we are not using the default (ARM mode) section anchor offset
2510 ranges, then set the correct ranges now. */
2511 if (TARGET_THUMB1)
2513 /* Thumb-1 LDR instructions cannot have negative offsets.
2514 Permissible positive offset ranges are 5-bit (for byte loads),
2515 6-bit (for halfword loads), or 7-bit (for word loads).
2516 Empirical results suggest a 7-bit anchor range gives the best
2517 overall code size. */
2518 targetm.min_anchor_offset = 0;
2519 targetm.max_anchor_offset = 127;
2521 else if (TARGET_THUMB2)
2523 /* The minimum is set such that the total size of the block
2524 for a particular anchor is 248 + 1 + 4095 bytes, which is
2525 divisible by eight, ensuring natural spacing of anchors. */
2526 targetm.min_anchor_offset = -248;
2527 targetm.max_anchor_offset = 4095;
2530 /* V5 code we generate is completely interworking capable, so we turn off
2531 TARGET_INTERWORK here to avoid many tests later on. */
2533 /* XXX However, we must pass the right pre-processor defines to CPP
2534 or GLD can get confused. This is a hack. */
2535 if (TARGET_INTERWORK)
2536 arm_cpp_interwork = 1;
2538 if (arm_arch5)
2539 target_flags &= ~MASK_INTERWORK;
2541 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2542 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2544 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2545 error ("iwmmxt abi requires an iwmmxt capable cpu");
2547 if (!global_options_set.x_arm_fpu_index)
2549 const char *target_fpu_name;
2550 bool ok;
2552 #ifdef FPUTYPE_DEFAULT
2553 target_fpu_name = FPUTYPE_DEFAULT;
2554 #else
2555 target_fpu_name = "vfp";
2556 #endif
2558 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2559 CL_TARGET);
2560 gcc_assert (ok);
2563 arm_fpu_desc = &all_fpus[arm_fpu_index];
2565 switch (arm_fpu_desc->model)
2567 case ARM_FP_MODEL_VFP:
2568 arm_fpu_attr = FPU_VFP;
2569 break;
2571 default:
2572 gcc_unreachable();
2575 if (TARGET_AAPCS_BASED)
2577 if (TARGET_CALLER_INTERWORKING)
2578 error ("AAPCS does not support -mcaller-super-interworking");
2579 else
2580 if (TARGET_CALLEE_INTERWORKING)
2581 error ("AAPCS does not support -mcallee-super-interworking");
2584 /* iWMMXt and NEON are incompatible. */
2585 if (TARGET_IWMMXT && TARGET_NEON)
2586 error ("iWMMXt and NEON are incompatible");
2588 /* iWMMXt unsupported under Thumb mode. */
2589 if (TARGET_THUMB && TARGET_IWMMXT)
2590 error ("iWMMXt unsupported under Thumb mode");
2592 /* __fp16 support currently assumes the core has ldrh. */
2593 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2594 sorry ("__fp16 and no ldrh");
2596 /* If soft-float is specified then don't use FPU. */
2597 if (TARGET_SOFT_FLOAT)
2598 arm_fpu_attr = FPU_NONE;
2600 if (TARGET_AAPCS_BASED)
2602 if (arm_abi == ARM_ABI_IWMMXT)
2603 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2604 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2605 && TARGET_HARD_FLOAT
2606 && TARGET_VFP)
2607 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2608 else
2609 arm_pcs_default = ARM_PCS_AAPCS;
2611 else
2613 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2614 sorry ("-mfloat-abi=hard and VFP");
2616 if (arm_abi == ARM_ABI_APCS)
2617 arm_pcs_default = ARM_PCS_APCS;
2618 else
2619 arm_pcs_default = ARM_PCS_ATPCS;
2622 /* For arm2/3 there is no need to do any scheduling if we are doing
2623 software floating-point. */
2624 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2625 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2627 /* Use the cp15 method if it is available. */
2628 if (target_thread_pointer == TP_AUTO)
2630 if (arm_arch6k && !TARGET_THUMB1)
2631 target_thread_pointer = TP_CP15;
2632 else
2633 target_thread_pointer = TP_SOFT;
2636 if (TARGET_HARD_TP && TARGET_THUMB1)
2637 error ("can not use -mtp=cp15 with 16-bit Thumb");
2639 /* Override the default structure alignment for AAPCS ABI. */
2640 if (!global_options_set.x_arm_structure_size_boundary)
2642 if (TARGET_AAPCS_BASED)
2643 arm_structure_size_boundary = 8;
2645 else
2647 if (arm_structure_size_boundary != 8
2648 && arm_structure_size_boundary != 32
2649 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2651 if (ARM_DOUBLEWORD_ALIGN)
2652 warning (0,
2653 "structure size boundary can only be set to 8, 32 or 64");
2654 else
2655 warning (0, "structure size boundary can only be set to 8 or 32");
2656 arm_structure_size_boundary
2657 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2661 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2663 error ("RTP PIC is incompatible with Thumb");
2664 flag_pic = 0;
2667 /* If stack checking is disabled, we can use r10 as the PIC register,
2668 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2669 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2671 if (TARGET_VXWORKS_RTP)
2672 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2673 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2676 if (flag_pic && TARGET_VXWORKS_RTP)
2677 arm_pic_register = 9;
2679 if (arm_pic_register_string != NULL)
2681 int pic_register = decode_reg_name (arm_pic_register_string);
2683 if (!flag_pic)
2684 warning (0, "-mpic-register= is useless without -fpic");
2686 /* Prevent the user from choosing an obviously stupid PIC register. */
2687 else if (pic_register < 0 || call_used_regs[pic_register]
2688 || pic_register == HARD_FRAME_POINTER_REGNUM
2689 || pic_register == STACK_POINTER_REGNUM
2690 || pic_register >= PC_REGNUM
2691 || (TARGET_VXWORKS_RTP
2692 && (unsigned int) pic_register != arm_pic_register))
2693 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2694 else
2695 arm_pic_register = pic_register;
2698 if (TARGET_VXWORKS_RTP
2699 && !global_options_set.x_arm_pic_data_is_text_relative)
2700 arm_pic_data_is_text_relative = 0;
2702 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2703 if (fix_cm3_ldrd == 2)
2705 if (arm_selected_cpu->core == cortexm3)
2706 fix_cm3_ldrd = 1;
2707 else
2708 fix_cm3_ldrd = 0;
2711 /* Enable -munaligned-access by default for
2712 - all ARMv6 architecture-based processors
2713 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2714 - ARMv8 architecture-base processors.
2716 Disable -munaligned-access by default for
2717 - all pre-ARMv6 architecture-based processors
2718 - ARMv6-M architecture-based processors. */
2720 if (unaligned_access == 2)
2722 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2723 unaligned_access = 1;
2724 else
2725 unaligned_access = 0;
2727 else if (unaligned_access == 1
2728 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2730 warning (0, "target CPU does not support unaligned accesses");
2731 unaligned_access = 0;
2734 if (TARGET_THUMB1 && flag_schedule_insns)
2736 /* Don't warn since it's on by default in -O2. */
2737 flag_schedule_insns = 0;
2740 if (optimize_size)
2742 /* If optimizing for size, bump the number of instructions that we
2743 are prepared to conditionally execute (even on a StrongARM). */
2744 max_insns_skipped = 6;
2746 else
2747 max_insns_skipped = current_tune->max_insns_skipped;
2749 /* Hot/Cold partitioning is not currently supported, since we can't
2750 handle literal pool placement in that case. */
2751 if (flag_reorder_blocks_and_partition)
2753 inform (input_location,
2754 "-freorder-blocks-and-partition not supported on this architecture");
2755 flag_reorder_blocks_and_partition = 0;
2756 flag_reorder_blocks = 1;
2759 if (flag_pic)
2760 /* Hoisting PIC address calculations more aggressively provides a small,
2761 but measurable, size reduction for PIC code. Therefore, we decrease
2762 the bar for unrestricted expression hoisting to the cost of PIC address
2763 calculation, which is 2 instructions. */
2764 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2765 global_options.x_param_values,
2766 global_options_set.x_param_values);
2768 /* ARM EABI defaults to strict volatile bitfields. */
2769 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2770 && abi_version_at_least(2))
2771 flag_strict_volatile_bitfields = 1;
2773 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2774 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2775 if (flag_prefetch_loop_arrays < 0
2776 && HAVE_prefetch
2777 && optimize >= 3
2778 && current_tune->num_prefetch_slots > 0)
2779 flag_prefetch_loop_arrays = 1;
2781 /* Set up parameters to be used in prefetching algorithm. Do not override the
2782 defaults unless we are tuning for a core we have researched values for. */
2783 if (current_tune->num_prefetch_slots > 0)
2784 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2785 current_tune->num_prefetch_slots,
2786 global_options.x_param_values,
2787 global_options_set.x_param_values);
2788 if (current_tune->l1_cache_line_size >= 0)
2789 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2790 current_tune->l1_cache_line_size,
2791 global_options.x_param_values,
2792 global_options_set.x_param_values);
2793 if (current_tune->l1_cache_size >= 0)
2794 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2795 current_tune->l1_cache_size,
2796 global_options.x_param_values,
2797 global_options_set.x_param_values);
2799 /* Use Neon to perform 64-bits operations rather than core
2800 registers. */
2801 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2802 if (use_neon_for_64bits == 1)
2803 prefer_neon_for_64bits = true;
2805 /* Use the alternative scheduling-pressure algorithm by default. */
2806 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2807 global_options.x_param_values,
2808 global_options_set.x_param_values);
2810 /* Disable shrink-wrap when optimizing function for size, since it tends to
2811 generate additional returns. */
2812 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2813 flag_shrink_wrap = false;
2814 /* TBD: Dwarf info for apcs frame is not handled yet. */
2815 if (TARGET_APCS_FRAME)
2816 flag_shrink_wrap = false;
2818 /* We only support -mslow-flash-data on armv7-m targets. */
2819 if (target_slow_flash_data
2820 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2821 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2822 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2824 /* Currently, for slow flash data, we just disable literal pools. */
2825 if (target_slow_flash_data)
2826 arm_disable_literal_pool = true;
2828 /* Register global variables with the garbage collector. */
2829 arm_add_gc_roots ();
2832 static void
2833 arm_add_gc_roots (void)
2835 gcc_obstack_init(&minipool_obstack);
2836 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2839 /* A table of known ARM exception types.
2840 For use with the interrupt function attribute. */
2842 typedef struct
2844 const char *const arg;
2845 const unsigned long return_value;
2847 isr_attribute_arg;
2849 static const isr_attribute_arg isr_attribute_args [] =
2851 { "IRQ", ARM_FT_ISR },
2852 { "irq", ARM_FT_ISR },
2853 { "FIQ", ARM_FT_FIQ },
2854 { "fiq", ARM_FT_FIQ },
2855 { "ABORT", ARM_FT_ISR },
2856 { "abort", ARM_FT_ISR },
2857 { "ABORT", ARM_FT_ISR },
2858 { "abort", ARM_FT_ISR },
2859 { "UNDEF", ARM_FT_EXCEPTION },
2860 { "undef", ARM_FT_EXCEPTION },
2861 { "SWI", ARM_FT_EXCEPTION },
2862 { "swi", ARM_FT_EXCEPTION },
2863 { NULL, ARM_FT_NORMAL }
2866 /* Returns the (interrupt) function type of the current
2867 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2869 static unsigned long
2870 arm_isr_value (tree argument)
2872 const isr_attribute_arg * ptr;
2873 const char * arg;
2875 if (!arm_arch_notm)
2876 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2878 /* No argument - default to IRQ. */
2879 if (argument == NULL_TREE)
2880 return ARM_FT_ISR;
2882 /* Get the value of the argument. */
2883 if (TREE_VALUE (argument) == NULL_TREE
2884 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2885 return ARM_FT_UNKNOWN;
2887 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2889 /* Check it against the list of known arguments. */
2890 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2891 if (streq (arg, ptr->arg))
2892 return ptr->return_value;
2894 /* An unrecognized interrupt type. */
2895 return ARM_FT_UNKNOWN;
2898 /* Computes the type of the current function. */
2900 static unsigned long
2901 arm_compute_func_type (void)
2903 unsigned long type = ARM_FT_UNKNOWN;
2904 tree a;
2905 tree attr;
2907 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2909 /* Decide if the current function is volatile. Such functions
2910 never return, and many memory cycles can be saved by not storing
2911 register values that will never be needed again. This optimization
2912 was added to speed up context switching in a kernel application. */
2913 if (optimize > 0
2914 && (TREE_NOTHROW (current_function_decl)
2915 || !(flag_unwind_tables
2916 || (flag_exceptions
2917 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2918 && TREE_THIS_VOLATILE (current_function_decl))
2919 type |= ARM_FT_VOLATILE;
2921 if (cfun->static_chain_decl != NULL)
2922 type |= ARM_FT_NESTED;
2924 attr = DECL_ATTRIBUTES (current_function_decl);
2926 a = lookup_attribute ("naked", attr);
2927 if (a != NULL_TREE)
2928 type |= ARM_FT_NAKED;
2930 a = lookup_attribute ("isr", attr);
2931 if (a == NULL_TREE)
2932 a = lookup_attribute ("interrupt", attr);
2934 if (a == NULL_TREE)
2935 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2936 else
2937 type |= arm_isr_value (TREE_VALUE (a));
2939 return type;
2942 /* Returns the type of the current function. */
2944 unsigned long
2945 arm_current_func_type (void)
2947 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2948 cfun->machine->func_type = arm_compute_func_type ();
2950 return cfun->machine->func_type;
2953 bool
2954 arm_allocate_stack_slots_for_args (void)
2956 /* Naked functions should not allocate stack slots for arguments. */
2957 return !IS_NAKED (arm_current_func_type ());
2960 static bool
2961 arm_warn_func_return (tree decl)
2963 /* Naked functions are implemented entirely in assembly, including the
2964 return sequence, so suppress warnings about this. */
2965 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2969 /* Output assembler code for a block containing the constant parts
2970 of a trampoline, leaving space for the variable parts.
2972 On the ARM, (if r8 is the static chain regnum, and remembering that
2973 referencing pc adds an offset of 8) the trampoline looks like:
2974 ldr r8, [pc, #0]
2975 ldr pc, [pc]
2976 .word static chain value
2977 .word function's address
2978 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2980 static void
2981 arm_asm_trampoline_template (FILE *f)
2983 if (TARGET_ARM)
2985 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2986 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2988 else if (TARGET_THUMB2)
2990 /* The Thumb-2 trampoline is similar to the arm implementation.
2991 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2992 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2993 STATIC_CHAIN_REGNUM, PC_REGNUM);
2994 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2996 else
2998 ASM_OUTPUT_ALIGN (f, 2);
2999 fprintf (f, "\t.code\t16\n");
3000 fprintf (f, ".Ltrampoline_start:\n");
3001 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3002 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3003 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3004 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3005 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3006 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3008 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3009 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3012 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3014 static void
3015 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3017 rtx fnaddr, mem, a_tramp;
3019 emit_block_move (m_tramp, assemble_trampoline_template (),
3020 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3022 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3023 emit_move_insn (mem, chain_value);
3025 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3026 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3027 emit_move_insn (mem, fnaddr);
3029 a_tramp = XEXP (m_tramp, 0);
3030 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3031 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3032 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3035 /* Thumb trampolines should be entered in thumb mode, so set
3036 the bottom bit of the address. */
3038 static rtx
3039 arm_trampoline_adjust_address (rtx addr)
3041 if (TARGET_THUMB)
3042 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3043 NULL, 0, OPTAB_LIB_WIDEN);
3044 return addr;
3047 /* Return 1 if it is possible to return using a single instruction.
3048 If SIBLING is non-null, this is a test for a return before a sibling
3049 call. SIBLING is the call insn, so we can examine its register usage. */
3052 use_return_insn (int iscond, rtx sibling)
3054 int regno;
3055 unsigned int func_type;
3056 unsigned long saved_int_regs;
3057 unsigned HOST_WIDE_INT stack_adjust;
3058 arm_stack_offsets *offsets;
3060 /* Never use a return instruction before reload has run. */
3061 if (!reload_completed)
3062 return 0;
3064 func_type = arm_current_func_type ();
3066 /* Naked, volatile and stack alignment functions need special
3067 consideration. */
3068 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3069 return 0;
3071 /* So do interrupt functions that use the frame pointer and Thumb
3072 interrupt functions. */
3073 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3074 return 0;
3076 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3077 && !optimize_function_for_size_p (cfun))
3078 return 0;
3080 offsets = arm_get_frame_offsets ();
3081 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3083 /* As do variadic functions. */
3084 if (crtl->args.pretend_args_size
3085 || cfun->machine->uses_anonymous_args
3086 /* Or if the function calls __builtin_eh_return () */
3087 || crtl->calls_eh_return
3088 /* Or if the function calls alloca */
3089 || cfun->calls_alloca
3090 /* Or if there is a stack adjustment. However, if the stack pointer
3091 is saved on the stack, we can use a pre-incrementing stack load. */
3092 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3093 && stack_adjust == 4)))
3094 return 0;
3096 saved_int_regs = offsets->saved_regs_mask;
3098 /* Unfortunately, the insn
3100 ldmib sp, {..., sp, ...}
3102 triggers a bug on most SA-110 based devices, such that the stack
3103 pointer won't be correctly restored if the instruction takes a
3104 page fault. We work around this problem by popping r3 along with
3105 the other registers, since that is never slower than executing
3106 another instruction.
3108 We test for !arm_arch5 here, because code for any architecture
3109 less than this could potentially be run on one of the buggy
3110 chips. */
3111 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3113 /* Validate that r3 is a call-clobbered register (always true in
3114 the default abi) ... */
3115 if (!call_used_regs[3])
3116 return 0;
3118 /* ... that it isn't being used for a return value ... */
3119 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3120 return 0;
3122 /* ... or for a tail-call argument ... */
3123 if (sibling)
3125 gcc_assert (CALL_P (sibling));
3127 if (find_regno_fusage (sibling, USE, 3))
3128 return 0;
3131 /* ... and that there are no call-saved registers in r0-r2
3132 (always true in the default ABI). */
3133 if (saved_int_regs & 0x7)
3134 return 0;
3137 /* Can't be done if interworking with Thumb, and any registers have been
3138 stacked. */
3139 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3140 return 0;
3142 /* On StrongARM, conditional returns are expensive if they aren't
3143 taken and multiple registers have been stacked. */
3144 if (iscond && arm_tune_strongarm)
3146 /* Conditional return when just the LR is stored is a simple
3147 conditional-load instruction, that's not expensive. */
3148 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3149 return 0;
3151 if (flag_pic
3152 && arm_pic_register != INVALID_REGNUM
3153 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3154 return 0;
3157 /* If there are saved registers but the LR isn't saved, then we need
3158 two instructions for the return. */
3159 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3160 return 0;
3162 /* Can't be done if any of the VFP regs are pushed,
3163 since this also requires an insn. */
3164 if (TARGET_HARD_FLOAT && TARGET_VFP)
3165 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3166 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3167 return 0;
3169 if (TARGET_REALLY_IWMMXT)
3170 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3171 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3172 return 0;
3174 return 1;
3177 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3178 shrink-wrapping if possible. This is the case if we need to emit a
3179 prologue, which we can test by looking at the offsets. */
3180 bool
3181 use_simple_return_p (void)
3183 arm_stack_offsets *offsets;
3185 offsets = arm_get_frame_offsets ();
3186 return offsets->outgoing_args != 0;
3189 /* Return TRUE if int I is a valid immediate ARM constant. */
3192 const_ok_for_arm (HOST_WIDE_INT i)
3194 int lowbit;
3196 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3197 be all zero, or all one. */
3198 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3199 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3200 != ((~(unsigned HOST_WIDE_INT) 0)
3201 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3202 return FALSE;
3204 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3206 /* Fast return for 0 and small values. We must do this for zero, since
3207 the code below can't handle that one case. */
3208 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3209 return TRUE;
3211 /* Get the number of trailing zeros. */
3212 lowbit = ffs((int) i) - 1;
3214 /* Only even shifts are allowed in ARM mode so round down to the
3215 nearest even number. */
3216 if (TARGET_ARM)
3217 lowbit &= ~1;
3219 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3220 return TRUE;
3222 if (TARGET_ARM)
3224 /* Allow rotated constants in ARM mode. */
3225 if (lowbit <= 4
3226 && ((i & ~0xc000003f) == 0
3227 || (i & ~0xf000000f) == 0
3228 || (i & ~0xfc000003) == 0))
3229 return TRUE;
3231 else
3233 HOST_WIDE_INT v;
3235 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3236 v = i & 0xff;
3237 v |= v << 16;
3238 if (i == v || i == (v | (v << 8)))
3239 return TRUE;
3241 /* Allow repeated pattern 0xXY00XY00. */
3242 v = i & 0xff00;
3243 v |= v << 16;
3244 if (i == v)
3245 return TRUE;
3248 return FALSE;
3251 /* Return true if I is a valid constant for the operation CODE. */
3253 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3255 if (const_ok_for_arm (i))
3256 return 1;
3258 switch (code)
3260 case SET:
3261 /* See if we can use movw. */
3262 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3263 return 1;
3264 else
3265 /* Otherwise, try mvn. */
3266 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3268 case PLUS:
3269 /* See if we can use addw or subw. */
3270 if (TARGET_THUMB2
3271 && ((i & 0xfffff000) == 0
3272 || ((-i) & 0xfffff000) == 0))
3273 return 1;
3274 /* else fall through. */
3276 case COMPARE:
3277 case EQ:
3278 case NE:
3279 case GT:
3280 case LE:
3281 case LT:
3282 case GE:
3283 case GEU:
3284 case LTU:
3285 case GTU:
3286 case LEU:
3287 case UNORDERED:
3288 case ORDERED:
3289 case UNEQ:
3290 case UNGE:
3291 case UNLT:
3292 case UNGT:
3293 case UNLE:
3294 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3296 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3297 case XOR:
3298 return 0;
3300 case IOR:
3301 if (TARGET_THUMB2)
3302 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3303 return 0;
3305 case AND:
3306 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3308 default:
3309 gcc_unreachable ();
3313 /* Return true if I is a valid di mode constant for the operation CODE. */
3315 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3317 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3318 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3319 rtx hi = GEN_INT (hi_val);
3320 rtx lo = GEN_INT (lo_val);
3322 if (TARGET_THUMB1)
3323 return 0;
3325 switch (code)
3327 case AND:
3328 case IOR:
3329 case XOR:
3330 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3331 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3332 case PLUS:
3333 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3335 default:
3336 return 0;
3340 /* Emit a sequence of insns to handle a large constant.
3341 CODE is the code of the operation required, it can be any of SET, PLUS,
3342 IOR, AND, XOR, MINUS;
3343 MODE is the mode in which the operation is being performed;
3344 VAL is the integer to operate on;
3345 SOURCE is the other operand (a register, or a null-pointer for SET);
3346 SUBTARGETS means it is safe to create scratch registers if that will
3347 either produce a simpler sequence, or we will want to cse the values.
3348 Return value is the number of insns emitted. */
3350 /* ??? Tweak this for thumb2. */
3352 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3353 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3355 rtx cond;
3357 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3358 cond = COND_EXEC_TEST (PATTERN (insn));
3359 else
3360 cond = NULL_RTX;
3362 if (subtargets || code == SET
3363 || (REG_P (target) && REG_P (source)
3364 && REGNO (target) != REGNO (source)))
3366 /* After arm_reorg has been called, we can't fix up expensive
3367 constants by pushing them into memory so we must synthesize
3368 them in-line, regardless of the cost. This is only likely to
3369 be more costly on chips that have load delay slots and we are
3370 compiling without running the scheduler (so no splitting
3371 occurred before the final instruction emission).
3373 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3375 if (!after_arm_reorg
3376 && !cond
3377 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3378 1, 0)
3379 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3380 + (code != SET))))
3382 if (code == SET)
3384 /* Currently SET is the only monadic value for CODE, all
3385 the rest are diadic. */
3386 if (TARGET_USE_MOVT)
3387 arm_emit_movpair (target, GEN_INT (val));
3388 else
3389 emit_set_insn (target, GEN_INT (val));
3391 return 1;
3393 else
3395 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3397 if (TARGET_USE_MOVT)
3398 arm_emit_movpair (temp, GEN_INT (val));
3399 else
3400 emit_set_insn (temp, GEN_INT (val));
3402 /* For MINUS, the value is subtracted from, since we never
3403 have subtraction of a constant. */
3404 if (code == MINUS)
3405 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3406 else
3407 emit_set_insn (target,
3408 gen_rtx_fmt_ee (code, mode, source, temp));
3409 return 2;
3414 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3418 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3419 ARM/THUMB2 immediates, and add up to VAL.
3420 Thr function return value gives the number of insns required. */
3421 static int
3422 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3423 struct four_ints *return_sequence)
3425 int best_consecutive_zeros = 0;
3426 int i;
3427 int best_start = 0;
3428 int insns1, insns2;
3429 struct four_ints tmp_sequence;
3431 /* If we aren't targeting ARM, the best place to start is always at
3432 the bottom, otherwise look more closely. */
3433 if (TARGET_ARM)
3435 for (i = 0; i < 32; i += 2)
3437 int consecutive_zeros = 0;
3439 if (!(val & (3 << i)))
3441 while ((i < 32) && !(val & (3 << i)))
3443 consecutive_zeros += 2;
3444 i += 2;
3446 if (consecutive_zeros > best_consecutive_zeros)
3448 best_consecutive_zeros = consecutive_zeros;
3449 best_start = i - consecutive_zeros;
3451 i -= 2;
3456 /* So long as it won't require any more insns to do so, it's
3457 desirable to emit a small constant (in bits 0...9) in the last
3458 insn. This way there is more chance that it can be combined with
3459 a later addressing insn to form a pre-indexed load or store
3460 operation. Consider:
3462 *((volatile int *)0xe0000100) = 1;
3463 *((volatile int *)0xe0000110) = 2;
3465 We want this to wind up as:
3467 mov rA, #0xe0000000
3468 mov rB, #1
3469 str rB, [rA, #0x100]
3470 mov rB, #2
3471 str rB, [rA, #0x110]
3473 rather than having to synthesize both large constants from scratch.
3475 Therefore, we calculate how many insns would be required to emit
3476 the constant starting from `best_start', and also starting from
3477 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3478 yield a shorter sequence, we may as well use zero. */
3479 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3480 if (best_start != 0
3481 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3483 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3484 if (insns2 <= insns1)
3486 *return_sequence = tmp_sequence;
3487 insns1 = insns2;
3491 return insns1;
3494 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3495 static int
3496 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3497 struct four_ints *return_sequence, int i)
3499 int remainder = val & 0xffffffff;
3500 int insns = 0;
3502 /* Try and find a way of doing the job in either two or three
3503 instructions.
3505 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3506 location. We start at position I. This may be the MSB, or
3507 optimial_immediate_sequence may have positioned it at the largest block
3508 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3509 wrapping around to the top of the word when we drop off the bottom.
3510 In the worst case this code should produce no more than four insns.
3512 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3513 constants, shifted to any arbitrary location. We should always start
3514 at the MSB. */
3517 int end;
3518 unsigned int b1, b2, b3, b4;
3519 unsigned HOST_WIDE_INT result;
3520 int loc;
3522 gcc_assert (insns < 4);
3524 if (i <= 0)
3525 i += 32;
3527 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3528 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3530 loc = i;
3531 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3532 /* We can use addw/subw for the last 12 bits. */
3533 result = remainder;
3534 else
3536 /* Use an 8-bit shifted/rotated immediate. */
3537 end = i - 8;
3538 if (end < 0)
3539 end += 32;
3540 result = remainder & ((0x0ff << end)
3541 | ((i < end) ? (0xff >> (32 - end))
3542 : 0));
3543 i -= 8;
3546 else
3548 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3549 arbitrary shifts. */
3550 i -= TARGET_ARM ? 2 : 1;
3551 continue;
3554 /* Next, see if we can do a better job with a thumb2 replicated
3555 constant.
3557 We do it this way around to catch the cases like 0x01F001E0 where
3558 two 8-bit immediates would work, but a replicated constant would
3559 make it worse.
3561 TODO: 16-bit constants that don't clear all the bits, but still win.
3562 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3563 if (TARGET_THUMB2)
3565 b1 = (remainder & 0xff000000) >> 24;
3566 b2 = (remainder & 0x00ff0000) >> 16;
3567 b3 = (remainder & 0x0000ff00) >> 8;
3568 b4 = remainder & 0xff;
3570 if (loc > 24)
3572 /* The 8-bit immediate already found clears b1 (and maybe b2),
3573 but must leave b3 and b4 alone. */
3575 /* First try to find a 32-bit replicated constant that clears
3576 almost everything. We can assume that we can't do it in one,
3577 or else we wouldn't be here. */
3578 unsigned int tmp = b1 & b2 & b3 & b4;
3579 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3580 + (tmp << 24);
3581 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3582 + (tmp == b3) + (tmp == b4);
3583 if (tmp
3584 && (matching_bytes >= 3
3585 || (matching_bytes == 2
3586 && const_ok_for_op (remainder & ~tmp2, code))))
3588 /* At least 3 of the bytes match, and the fourth has at
3589 least as many bits set, or two of the bytes match
3590 and it will only require one more insn to finish. */
3591 result = tmp2;
3592 i = tmp != b1 ? 32
3593 : tmp != b2 ? 24
3594 : tmp != b3 ? 16
3595 : 8;
3598 /* Second, try to find a 16-bit replicated constant that can
3599 leave three of the bytes clear. If b2 or b4 is already
3600 zero, then we can. If the 8-bit from above would not
3601 clear b2 anyway, then we still win. */
3602 else if (b1 == b3 && (!b2 || !b4
3603 || (remainder & 0x00ff0000 & ~result)))
3605 result = remainder & 0xff00ff00;
3606 i = 24;
3609 else if (loc > 16)
3611 /* The 8-bit immediate already found clears b2 (and maybe b3)
3612 and we don't get here unless b1 is alredy clear, but it will
3613 leave b4 unchanged. */
3615 /* If we can clear b2 and b4 at once, then we win, since the
3616 8-bits couldn't possibly reach that far. */
3617 if (b2 == b4)
3619 result = remainder & 0x00ff00ff;
3620 i = 16;
3625 return_sequence->i[insns++] = result;
3626 remainder &= ~result;
3628 if (code == SET || code == MINUS)
3629 code = PLUS;
3631 while (remainder);
3633 return insns;
3636 /* Emit an instruction with the indicated PATTERN. If COND is
3637 non-NULL, conditionalize the execution of the instruction on COND
3638 being true. */
3640 static void
3641 emit_constant_insn (rtx cond, rtx pattern)
3643 if (cond)
3644 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3645 emit_insn (pattern);
3648 /* As above, but extra parameter GENERATE which, if clear, suppresses
3649 RTL generation. */
3651 static int
3652 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3653 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3654 int generate)
3656 int can_invert = 0;
3657 int can_negate = 0;
3658 int final_invert = 0;
3659 int i;
3660 int set_sign_bit_copies = 0;
3661 int clear_sign_bit_copies = 0;
3662 int clear_zero_bit_copies = 0;
3663 int set_zero_bit_copies = 0;
3664 int insns = 0, neg_insns, inv_insns;
3665 unsigned HOST_WIDE_INT temp1, temp2;
3666 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3667 struct four_ints *immediates;
3668 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3670 /* Find out which operations are safe for a given CODE. Also do a quick
3671 check for degenerate cases; these can occur when DImode operations
3672 are split. */
3673 switch (code)
3675 case SET:
3676 can_invert = 1;
3677 break;
3679 case PLUS:
3680 can_negate = 1;
3681 break;
3683 case IOR:
3684 if (remainder == 0xffffffff)
3686 if (generate)
3687 emit_constant_insn (cond,
3688 gen_rtx_SET (VOIDmode, target,
3689 GEN_INT (ARM_SIGN_EXTEND (val))));
3690 return 1;
3693 if (remainder == 0)
3695 if (reload_completed && rtx_equal_p (target, source))
3696 return 0;
3698 if (generate)
3699 emit_constant_insn (cond,
3700 gen_rtx_SET (VOIDmode, target, source));
3701 return 1;
3703 break;
3705 case AND:
3706 if (remainder == 0)
3708 if (generate)
3709 emit_constant_insn (cond,
3710 gen_rtx_SET (VOIDmode, target, const0_rtx));
3711 return 1;
3713 if (remainder == 0xffffffff)
3715 if (reload_completed && rtx_equal_p (target, source))
3716 return 0;
3717 if (generate)
3718 emit_constant_insn (cond,
3719 gen_rtx_SET (VOIDmode, target, source));
3720 return 1;
3722 can_invert = 1;
3723 break;
3725 case XOR:
3726 if (remainder == 0)
3728 if (reload_completed && rtx_equal_p (target, source))
3729 return 0;
3730 if (generate)
3731 emit_constant_insn (cond,
3732 gen_rtx_SET (VOIDmode, target, source));
3733 return 1;
3736 if (remainder == 0xffffffff)
3738 if (generate)
3739 emit_constant_insn (cond,
3740 gen_rtx_SET (VOIDmode, target,
3741 gen_rtx_NOT (mode, source)));
3742 return 1;
3744 final_invert = 1;
3745 break;
3747 case MINUS:
3748 /* We treat MINUS as (val - source), since (source - val) is always
3749 passed as (source + (-val)). */
3750 if (remainder == 0)
3752 if (generate)
3753 emit_constant_insn (cond,
3754 gen_rtx_SET (VOIDmode, target,
3755 gen_rtx_NEG (mode, source)));
3756 return 1;
3758 if (const_ok_for_arm (val))
3760 if (generate)
3761 emit_constant_insn (cond,
3762 gen_rtx_SET (VOIDmode, target,
3763 gen_rtx_MINUS (mode, GEN_INT (val),
3764 source)));
3765 return 1;
3768 break;
3770 default:
3771 gcc_unreachable ();
3774 /* If we can do it in one insn get out quickly. */
3775 if (const_ok_for_op (val, code))
3777 if (generate)
3778 emit_constant_insn (cond,
3779 gen_rtx_SET (VOIDmode, target,
3780 (source
3781 ? gen_rtx_fmt_ee (code, mode, source,
3782 GEN_INT (val))
3783 : GEN_INT (val))));
3784 return 1;
3787 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3788 insn. */
3789 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3790 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3792 if (generate)
3794 if (mode == SImode && i == 16)
3795 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3796 smaller insn. */
3797 emit_constant_insn (cond,
3798 gen_zero_extendhisi2
3799 (target, gen_lowpart (HImode, source)));
3800 else
3801 /* Extz only supports SImode, but we can coerce the operands
3802 into that mode. */
3803 emit_constant_insn (cond,
3804 gen_extzv_t2 (gen_lowpart (SImode, target),
3805 gen_lowpart (SImode, source),
3806 GEN_INT (i), const0_rtx));
3809 return 1;
3812 /* Calculate a few attributes that may be useful for specific
3813 optimizations. */
3814 /* Count number of leading zeros. */
3815 for (i = 31; i >= 0; i--)
3817 if ((remainder & (1 << i)) == 0)
3818 clear_sign_bit_copies++;
3819 else
3820 break;
3823 /* Count number of leading 1's. */
3824 for (i = 31; i >= 0; i--)
3826 if ((remainder & (1 << i)) != 0)
3827 set_sign_bit_copies++;
3828 else
3829 break;
3832 /* Count number of trailing zero's. */
3833 for (i = 0; i <= 31; i++)
3835 if ((remainder & (1 << i)) == 0)
3836 clear_zero_bit_copies++;
3837 else
3838 break;
3841 /* Count number of trailing 1's. */
3842 for (i = 0; i <= 31; i++)
3844 if ((remainder & (1 << i)) != 0)
3845 set_zero_bit_copies++;
3846 else
3847 break;
3850 switch (code)
3852 case SET:
3853 /* See if we can do this by sign_extending a constant that is known
3854 to be negative. This is a good, way of doing it, since the shift
3855 may well merge into a subsequent insn. */
3856 if (set_sign_bit_copies > 1)
3858 if (const_ok_for_arm
3859 (temp1 = ARM_SIGN_EXTEND (remainder
3860 << (set_sign_bit_copies - 1))))
3862 if (generate)
3864 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3865 emit_constant_insn (cond,
3866 gen_rtx_SET (VOIDmode, new_src,
3867 GEN_INT (temp1)));
3868 emit_constant_insn (cond,
3869 gen_ashrsi3 (target, new_src,
3870 GEN_INT (set_sign_bit_copies - 1)));
3872 return 2;
3874 /* For an inverted constant, we will need to set the low bits,
3875 these will be shifted out of harm's way. */
3876 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3877 if (const_ok_for_arm (~temp1))
3879 if (generate)
3881 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3882 emit_constant_insn (cond,
3883 gen_rtx_SET (VOIDmode, new_src,
3884 GEN_INT (temp1)));
3885 emit_constant_insn (cond,
3886 gen_ashrsi3 (target, new_src,
3887 GEN_INT (set_sign_bit_copies - 1)));
3889 return 2;
3893 /* See if we can calculate the value as the difference between two
3894 valid immediates. */
3895 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3897 int topshift = clear_sign_bit_copies & ~1;
3899 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3900 & (0xff000000 >> topshift));
3902 /* If temp1 is zero, then that means the 9 most significant
3903 bits of remainder were 1 and we've caused it to overflow.
3904 When topshift is 0 we don't need to do anything since we
3905 can borrow from 'bit 32'. */
3906 if (temp1 == 0 && topshift != 0)
3907 temp1 = 0x80000000 >> (topshift - 1);
3909 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3911 if (const_ok_for_arm (temp2))
3913 if (generate)
3915 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3916 emit_constant_insn (cond,
3917 gen_rtx_SET (VOIDmode, new_src,
3918 GEN_INT (temp1)));
3919 emit_constant_insn (cond,
3920 gen_addsi3 (target, new_src,
3921 GEN_INT (-temp2)));
3924 return 2;
3928 /* See if we can generate this by setting the bottom (or the top)
3929 16 bits, and then shifting these into the other half of the
3930 word. We only look for the simplest cases, to do more would cost
3931 too much. Be careful, however, not to generate this when the
3932 alternative would take fewer insns. */
3933 if (val & 0xffff0000)
3935 temp1 = remainder & 0xffff0000;
3936 temp2 = remainder & 0x0000ffff;
3938 /* Overlaps outside this range are best done using other methods. */
3939 for (i = 9; i < 24; i++)
3941 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3942 && !const_ok_for_arm (temp2))
3944 rtx new_src = (subtargets
3945 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3946 : target);
3947 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3948 source, subtargets, generate);
3949 source = new_src;
3950 if (generate)
3951 emit_constant_insn
3952 (cond,
3953 gen_rtx_SET
3954 (VOIDmode, target,
3955 gen_rtx_IOR (mode,
3956 gen_rtx_ASHIFT (mode, source,
3957 GEN_INT (i)),
3958 source)));
3959 return insns + 1;
3963 /* Don't duplicate cases already considered. */
3964 for (i = 17; i < 24; i++)
3966 if (((temp1 | (temp1 >> i)) == remainder)
3967 && !const_ok_for_arm (temp1))
3969 rtx new_src = (subtargets
3970 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3971 : target);
3972 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3973 source, subtargets, generate);
3974 source = new_src;
3975 if (generate)
3976 emit_constant_insn
3977 (cond,
3978 gen_rtx_SET (VOIDmode, target,
3979 gen_rtx_IOR
3980 (mode,
3981 gen_rtx_LSHIFTRT (mode, source,
3982 GEN_INT (i)),
3983 source)));
3984 return insns + 1;
3988 break;
3990 case IOR:
3991 case XOR:
3992 /* If we have IOR or XOR, and the constant can be loaded in a
3993 single instruction, and we can find a temporary to put it in,
3994 then this can be done in two instructions instead of 3-4. */
3995 if (subtargets
3996 /* TARGET can't be NULL if SUBTARGETS is 0 */
3997 || (reload_completed && !reg_mentioned_p (target, source)))
3999 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4001 if (generate)
4003 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4005 emit_constant_insn (cond,
4006 gen_rtx_SET (VOIDmode, sub,
4007 GEN_INT (val)));
4008 emit_constant_insn (cond,
4009 gen_rtx_SET (VOIDmode, target,
4010 gen_rtx_fmt_ee (code, mode,
4011 source, sub)));
4013 return 2;
4017 if (code == XOR)
4018 break;
4020 /* Convert.
4021 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4022 and the remainder 0s for e.g. 0xfff00000)
4023 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4025 This can be done in 2 instructions by using shifts with mov or mvn.
4026 e.g. for
4027 x = x | 0xfff00000;
4028 we generate.
4029 mvn r0, r0, asl #12
4030 mvn r0, r0, lsr #12 */
4031 if (set_sign_bit_copies > 8
4032 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4034 if (generate)
4036 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4037 rtx shift = GEN_INT (set_sign_bit_copies);
4039 emit_constant_insn
4040 (cond,
4041 gen_rtx_SET (VOIDmode, sub,
4042 gen_rtx_NOT (mode,
4043 gen_rtx_ASHIFT (mode,
4044 source,
4045 shift))));
4046 emit_constant_insn
4047 (cond,
4048 gen_rtx_SET (VOIDmode, target,
4049 gen_rtx_NOT (mode,
4050 gen_rtx_LSHIFTRT (mode, sub,
4051 shift))));
4053 return 2;
4056 /* Convert
4057 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4059 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4061 For eg. r0 = r0 | 0xfff
4062 mvn r0, r0, lsr #12
4063 mvn r0, r0, asl #12
4066 if (set_zero_bit_copies > 8
4067 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4069 if (generate)
4071 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4072 rtx shift = GEN_INT (set_zero_bit_copies);
4074 emit_constant_insn
4075 (cond,
4076 gen_rtx_SET (VOIDmode, sub,
4077 gen_rtx_NOT (mode,
4078 gen_rtx_LSHIFTRT (mode,
4079 source,
4080 shift))));
4081 emit_constant_insn
4082 (cond,
4083 gen_rtx_SET (VOIDmode, target,
4084 gen_rtx_NOT (mode,
4085 gen_rtx_ASHIFT (mode, sub,
4086 shift))));
4088 return 2;
4091 /* This will never be reached for Thumb2 because orn is a valid
4092 instruction. This is for Thumb1 and the ARM 32 bit cases.
4094 x = y | constant (such that ~constant is a valid constant)
4095 Transform this to
4096 x = ~(~y & ~constant).
4098 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4100 if (generate)
4102 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4103 emit_constant_insn (cond,
4104 gen_rtx_SET (VOIDmode, sub,
4105 gen_rtx_NOT (mode, source)));
4106 source = sub;
4107 if (subtargets)
4108 sub = gen_reg_rtx (mode);
4109 emit_constant_insn (cond,
4110 gen_rtx_SET (VOIDmode, sub,
4111 gen_rtx_AND (mode, source,
4112 GEN_INT (temp1))));
4113 emit_constant_insn (cond,
4114 gen_rtx_SET (VOIDmode, target,
4115 gen_rtx_NOT (mode, sub)));
4117 return 3;
4119 break;
4121 case AND:
4122 /* See if two shifts will do 2 or more insn's worth of work. */
4123 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4125 HOST_WIDE_INT shift_mask = ((0xffffffff
4126 << (32 - clear_sign_bit_copies))
4127 & 0xffffffff);
4129 if ((remainder | shift_mask) != 0xffffffff)
4131 if (generate)
4133 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4134 insns = arm_gen_constant (AND, mode, cond,
4135 remainder | shift_mask,
4136 new_src, source, subtargets, 1);
4137 source = new_src;
4139 else
4141 rtx targ = subtargets ? NULL_RTX : target;
4142 insns = arm_gen_constant (AND, mode, cond,
4143 remainder | shift_mask,
4144 targ, source, subtargets, 0);
4148 if (generate)
4150 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4151 rtx shift = GEN_INT (clear_sign_bit_copies);
4153 emit_insn (gen_ashlsi3 (new_src, source, shift));
4154 emit_insn (gen_lshrsi3 (target, new_src, shift));
4157 return insns + 2;
4160 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4162 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4164 if ((remainder | shift_mask) != 0xffffffff)
4166 if (generate)
4168 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4170 insns = arm_gen_constant (AND, mode, cond,
4171 remainder | shift_mask,
4172 new_src, source, subtargets, 1);
4173 source = new_src;
4175 else
4177 rtx targ = subtargets ? NULL_RTX : target;
4179 insns = arm_gen_constant (AND, mode, cond,
4180 remainder | shift_mask,
4181 targ, source, subtargets, 0);
4185 if (generate)
4187 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4188 rtx shift = GEN_INT (clear_zero_bit_copies);
4190 emit_insn (gen_lshrsi3 (new_src, source, shift));
4191 emit_insn (gen_ashlsi3 (target, new_src, shift));
4194 return insns + 2;
4197 break;
4199 default:
4200 break;
4203 /* Calculate what the instruction sequences would be if we generated it
4204 normally, negated, or inverted. */
4205 if (code == AND)
4206 /* AND cannot be split into multiple insns, so invert and use BIC. */
4207 insns = 99;
4208 else
4209 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4211 if (can_negate)
4212 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4213 &neg_immediates);
4214 else
4215 neg_insns = 99;
4217 if (can_invert || final_invert)
4218 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4219 &inv_immediates);
4220 else
4221 inv_insns = 99;
4223 immediates = &pos_immediates;
4225 /* Is the negated immediate sequence more efficient? */
4226 if (neg_insns < insns && neg_insns <= inv_insns)
4228 insns = neg_insns;
4229 immediates = &neg_immediates;
4231 else
4232 can_negate = 0;
4234 /* Is the inverted immediate sequence more efficient?
4235 We must allow for an extra NOT instruction for XOR operations, although
4236 there is some chance that the final 'mvn' will get optimized later. */
4237 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4239 insns = inv_insns;
4240 immediates = &inv_immediates;
4242 else
4244 can_invert = 0;
4245 final_invert = 0;
4248 /* Now output the chosen sequence as instructions. */
4249 if (generate)
4251 for (i = 0; i < insns; i++)
4253 rtx new_src, temp1_rtx;
4255 temp1 = immediates->i[i];
4257 if (code == SET || code == MINUS)
4258 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4259 else if ((final_invert || i < (insns - 1)) && subtargets)
4260 new_src = gen_reg_rtx (mode);
4261 else
4262 new_src = target;
4264 if (can_invert)
4265 temp1 = ~temp1;
4266 else if (can_negate)
4267 temp1 = -temp1;
4269 temp1 = trunc_int_for_mode (temp1, mode);
4270 temp1_rtx = GEN_INT (temp1);
4272 if (code == SET)
4274 else if (code == MINUS)
4275 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4276 else
4277 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4279 emit_constant_insn (cond,
4280 gen_rtx_SET (VOIDmode, new_src,
4281 temp1_rtx));
4282 source = new_src;
4284 if (code == SET)
4286 can_negate = can_invert;
4287 can_invert = 0;
4288 code = PLUS;
4290 else if (code == MINUS)
4291 code = PLUS;
4295 if (final_invert)
4297 if (generate)
4298 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4299 gen_rtx_NOT (mode, source)));
4300 insns++;
4303 return insns;
4306 /* Canonicalize a comparison so that we are more likely to recognize it.
4307 This can be done for a few constant compares, where we can make the
4308 immediate value easier to load. */
4310 static void
4311 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4312 bool op0_preserve_value)
4314 enum machine_mode mode;
4315 unsigned HOST_WIDE_INT i, maxval;
4317 mode = GET_MODE (*op0);
4318 if (mode == VOIDmode)
4319 mode = GET_MODE (*op1);
4321 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4323 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4324 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4325 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4326 for GTU/LEU in Thumb mode. */
4327 if (mode == DImode)
4329 rtx tem;
4331 if (*code == GT || *code == LE
4332 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4334 /* Missing comparison. First try to use an available
4335 comparison. */
4336 if (CONST_INT_P (*op1))
4338 i = INTVAL (*op1);
4339 switch (*code)
4341 case GT:
4342 case LE:
4343 if (i != maxval
4344 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4346 *op1 = GEN_INT (i + 1);
4347 *code = *code == GT ? GE : LT;
4348 return;
4350 break;
4351 case GTU:
4352 case LEU:
4353 if (i != ~((unsigned HOST_WIDE_INT) 0)
4354 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4356 *op1 = GEN_INT (i + 1);
4357 *code = *code == GTU ? GEU : LTU;
4358 return;
4360 break;
4361 default:
4362 gcc_unreachable ();
4366 /* If that did not work, reverse the condition. */
4367 if (!op0_preserve_value)
4369 tem = *op0;
4370 *op0 = *op1;
4371 *op1 = tem;
4372 *code = (int)swap_condition ((enum rtx_code)*code);
4375 return;
4378 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4379 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4380 to facilitate possible combining with a cmp into 'ands'. */
4381 if (mode == SImode
4382 && GET_CODE (*op0) == ZERO_EXTEND
4383 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4384 && GET_MODE (XEXP (*op0, 0)) == QImode
4385 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4386 && subreg_lowpart_p (XEXP (*op0, 0))
4387 && *op1 == const0_rtx)
4388 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4389 GEN_INT (255));
4391 /* Comparisons smaller than DImode. Only adjust comparisons against
4392 an out-of-range constant. */
4393 if (!CONST_INT_P (*op1)
4394 || const_ok_for_arm (INTVAL (*op1))
4395 || const_ok_for_arm (- INTVAL (*op1)))
4396 return;
4398 i = INTVAL (*op1);
4400 switch (*code)
4402 case EQ:
4403 case NE:
4404 return;
4406 case GT:
4407 case LE:
4408 if (i != maxval
4409 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4411 *op1 = GEN_INT (i + 1);
4412 *code = *code == GT ? GE : LT;
4413 return;
4415 break;
4417 case GE:
4418 case LT:
4419 if (i != ~maxval
4420 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4422 *op1 = GEN_INT (i - 1);
4423 *code = *code == GE ? GT : LE;
4424 return;
4426 break;
4428 case GTU:
4429 case LEU:
4430 if (i != ~((unsigned HOST_WIDE_INT) 0)
4431 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4433 *op1 = GEN_INT (i + 1);
4434 *code = *code == GTU ? GEU : LTU;
4435 return;
4437 break;
4439 case GEU:
4440 case LTU:
4441 if (i != 0
4442 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4444 *op1 = GEN_INT (i - 1);
4445 *code = *code == GEU ? GTU : LEU;
4446 return;
4448 break;
4450 default:
4451 gcc_unreachable ();
4456 /* Define how to find the value returned by a function. */
4458 static rtx
4459 arm_function_value(const_tree type, const_tree func,
4460 bool outgoing ATTRIBUTE_UNUSED)
4462 enum machine_mode mode;
4463 int unsignedp ATTRIBUTE_UNUSED;
4464 rtx r ATTRIBUTE_UNUSED;
4466 mode = TYPE_MODE (type);
4468 if (TARGET_AAPCS_BASED)
4469 return aapcs_allocate_return_reg (mode, type, func);
4471 /* Promote integer types. */
4472 if (INTEGRAL_TYPE_P (type))
4473 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4475 /* Promotes small structs returned in a register to full-word size
4476 for big-endian AAPCS. */
4477 if (arm_return_in_msb (type))
4479 HOST_WIDE_INT size = int_size_in_bytes (type);
4480 if (size % UNITS_PER_WORD != 0)
4482 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4483 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4487 return arm_libcall_value_1 (mode);
4490 /* libcall hashtable helpers. */
4492 struct libcall_hasher : typed_noop_remove <rtx_def>
4494 typedef rtx_def value_type;
4495 typedef rtx_def compare_type;
4496 static inline hashval_t hash (const value_type *);
4497 static inline bool equal (const value_type *, const compare_type *);
4498 static inline void remove (value_type *);
4501 inline bool
4502 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4504 return rtx_equal_p (p1, p2);
4507 inline hashval_t
4508 libcall_hasher::hash (const value_type *p1)
4510 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4513 typedef hash_table <libcall_hasher> libcall_table_type;
4515 static void
4516 add_libcall (libcall_table_type htab, rtx libcall)
4518 *htab.find_slot (libcall, INSERT) = libcall;
4521 static bool
4522 arm_libcall_uses_aapcs_base (const_rtx libcall)
4524 static bool init_done = false;
4525 static libcall_table_type libcall_htab;
4527 if (!init_done)
4529 init_done = true;
4531 libcall_htab.create (31);
4532 add_libcall (libcall_htab,
4533 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4534 add_libcall (libcall_htab,
4535 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4536 add_libcall (libcall_htab,
4537 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4538 add_libcall (libcall_htab,
4539 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4541 add_libcall (libcall_htab,
4542 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4543 add_libcall (libcall_htab,
4544 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4545 add_libcall (libcall_htab,
4546 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4547 add_libcall (libcall_htab,
4548 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4550 add_libcall (libcall_htab,
4551 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4552 add_libcall (libcall_htab,
4553 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4554 add_libcall (libcall_htab,
4555 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4556 add_libcall (libcall_htab,
4557 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4558 add_libcall (libcall_htab,
4559 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4560 add_libcall (libcall_htab,
4561 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4562 add_libcall (libcall_htab,
4563 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4564 add_libcall (libcall_htab,
4565 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4567 /* Values from double-precision helper functions are returned in core
4568 registers if the selected core only supports single-precision
4569 arithmetic, even if we are using the hard-float ABI. The same is
4570 true for single-precision helpers, but we will never be using the
4571 hard-float ABI on a CPU which doesn't support single-precision
4572 operations in hardware. */
4573 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4574 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4575 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4576 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4577 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4578 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4579 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4580 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4581 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4582 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4583 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4584 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4585 SFmode));
4586 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4587 DFmode));
4590 return libcall && libcall_htab.find (libcall) != NULL;
4593 static rtx
4594 arm_libcall_value_1 (enum machine_mode mode)
4596 if (TARGET_AAPCS_BASED)
4597 return aapcs_libcall_value (mode);
4598 else if (TARGET_IWMMXT_ABI
4599 && arm_vector_mode_supported_p (mode))
4600 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4601 else
4602 return gen_rtx_REG (mode, ARG_REGISTER (1));
4605 /* Define how to find the value returned by a library function
4606 assuming the value has mode MODE. */
4608 static rtx
4609 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4611 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4612 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4614 /* The following libcalls return their result in integer registers,
4615 even though they return a floating point value. */
4616 if (arm_libcall_uses_aapcs_base (libcall))
4617 return gen_rtx_REG (mode, ARG_REGISTER(1));
4621 return arm_libcall_value_1 (mode);
4624 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4626 static bool
4627 arm_function_value_regno_p (const unsigned int regno)
4629 if (regno == ARG_REGISTER (1)
4630 || (TARGET_32BIT
4631 && TARGET_AAPCS_BASED
4632 && TARGET_VFP
4633 && TARGET_HARD_FLOAT
4634 && regno == FIRST_VFP_REGNUM)
4635 || (TARGET_IWMMXT_ABI
4636 && regno == FIRST_IWMMXT_REGNUM))
4637 return true;
4639 return false;
4642 /* Determine the amount of memory needed to store the possible return
4643 registers of an untyped call. */
4645 arm_apply_result_size (void)
4647 int size = 16;
4649 if (TARGET_32BIT)
4651 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4652 size += 32;
4653 if (TARGET_IWMMXT_ABI)
4654 size += 8;
4657 return size;
4660 /* Decide whether TYPE should be returned in memory (true)
4661 or in a register (false). FNTYPE is the type of the function making
4662 the call. */
4663 static bool
4664 arm_return_in_memory (const_tree type, const_tree fntype)
4666 HOST_WIDE_INT size;
4668 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4670 if (TARGET_AAPCS_BASED)
4672 /* Simple, non-aggregate types (ie not including vectors and
4673 complex) are always returned in a register (or registers).
4674 We don't care about which register here, so we can short-cut
4675 some of the detail. */
4676 if (!AGGREGATE_TYPE_P (type)
4677 && TREE_CODE (type) != VECTOR_TYPE
4678 && TREE_CODE (type) != COMPLEX_TYPE)
4679 return false;
4681 /* Any return value that is no larger than one word can be
4682 returned in r0. */
4683 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4684 return false;
4686 /* Check any available co-processors to see if they accept the
4687 type as a register candidate (VFP, for example, can return
4688 some aggregates in consecutive registers). These aren't
4689 available if the call is variadic. */
4690 if (aapcs_select_return_coproc (type, fntype) >= 0)
4691 return false;
4693 /* Vector values should be returned using ARM registers, not
4694 memory (unless they're over 16 bytes, which will break since
4695 we only have four call-clobbered registers to play with). */
4696 if (TREE_CODE (type) == VECTOR_TYPE)
4697 return (size < 0 || size > (4 * UNITS_PER_WORD));
4699 /* The rest go in memory. */
4700 return true;
4703 if (TREE_CODE (type) == VECTOR_TYPE)
4704 return (size < 0 || size > (4 * UNITS_PER_WORD));
4706 if (!AGGREGATE_TYPE_P (type) &&
4707 (TREE_CODE (type) != VECTOR_TYPE))
4708 /* All simple types are returned in registers. */
4709 return false;
4711 if (arm_abi != ARM_ABI_APCS)
4713 /* ATPCS and later return aggregate types in memory only if they are
4714 larger than a word (or are variable size). */
4715 return (size < 0 || size > UNITS_PER_WORD);
4718 /* For the arm-wince targets we choose to be compatible with Microsoft's
4719 ARM and Thumb compilers, which always return aggregates in memory. */
4720 #ifndef ARM_WINCE
4721 /* All structures/unions bigger than one word are returned in memory.
4722 Also catch the case where int_size_in_bytes returns -1. In this case
4723 the aggregate is either huge or of variable size, and in either case
4724 we will want to return it via memory and not in a register. */
4725 if (size < 0 || size > UNITS_PER_WORD)
4726 return true;
4728 if (TREE_CODE (type) == RECORD_TYPE)
4730 tree field;
4732 /* For a struct the APCS says that we only return in a register
4733 if the type is 'integer like' and every addressable element
4734 has an offset of zero. For practical purposes this means
4735 that the structure can have at most one non bit-field element
4736 and that this element must be the first one in the structure. */
4738 /* Find the first field, ignoring non FIELD_DECL things which will
4739 have been created by C++. */
4740 for (field = TYPE_FIELDS (type);
4741 field && TREE_CODE (field) != FIELD_DECL;
4742 field = DECL_CHAIN (field))
4743 continue;
4745 if (field == NULL)
4746 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4748 /* Check that the first field is valid for returning in a register. */
4750 /* ... Floats are not allowed */
4751 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4752 return true;
4754 /* ... Aggregates that are not themselves valid for returning in
4755 a register are not allowed. */
4756 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4757 return true;
4759 /* Now check the remaining fields, if any. Only bitfields are allowed,
4760 since they are not addressable. */
4761 for (field = DECL_CHAIN (field);
4762 field;
4763 field = DECL_CHAIN (field))
4765 if (TREE_CODE (field) != FIELD_DECL)
4766 continue;
4768 if (!DECL_BIT_FIELD_TYPE (field))
4769 return true;
4772 return false;
4775 if (TREE_CODE (type) == UNION_TYPE)
4777 tree field;
4779 /* Unions can be returned in registers if every element is
4780 integral, or can be returned in an integer register. */
4781 for (field = TYPE_FIELDS (type);
4782 field;
4783 field = DECL_CHAIN (field))
4785 if (TREE_CODE (field) != FIELD_DECL)
4786 continue;
4788 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4789 return true;
4791 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4792 return true;
4795 return false;
4797 #endif /* not ARM_WINCE */
4799 /* Return all other types in memory. */
4800 return true;
4803 const struct pcs_attribute_arg
4805 const char *arg;
4806 enum arm_pcs value;
4807 } pcs_attribute_args[] =
4809 {"aapcs", ARM_PCS_AAPCS},
4810 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4811 #if 0
4812 /* We could recognize these, but changes would be needed elsewhere
4813 * to implement them. */
4814 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4815 {"atpcs", ARM_PCS_ATPCS},
4816 {"apcs", ARM_PCS_APCS},
4817 #endif
4818 {NULL, ARM_PCS_UNKNOWN}
4821 static enum arm_pcs
4822 arm_pcs_from_attribute (tree attr)
4824 const struct pcs_attribute_arg *ptr;
4825 const char *arg;
4827 /* Get the value of the argument. */
4828 if (TREE_VALUE (attr) == NULL_TREE
4829 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4830 return ARM_PCS_UNKNOWN;
4832 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4834 /* Check it against the list of known arguments. */
4835 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4836 if (streq (arg, ptr->arg))
4837 return ptr->value;
4839 /* An unrecognized interrupt type. */
4840 return ARM_PCS_UNKNOWN;
4843 /* Get the PCS variant to use for this call. TYPE is the function's type
4844 specification, DECL is the specific declartion. DECL may be null if
4845 the call could be indirect or if this is a library call. */
4846 static enum arm_pcs
4847 arm_get_pcs_model (const_tree type, const_tree decl)
4849 bool user_convention = false;
4850 enum arm_pcs user_pcs = arm_pcs_default;
4851 tree attr;
4853 gcc_assert (type);
4855 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4856 if (attr)
4858 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4859 user_convention = true;
4862 if (TARGET_AAPCS_BASED)
4864 /* Detect varargs functions. These always use the base rules
4865 (no argument is ever a candidate for a co-processor
4866 register). */
4867 bool base_rules = stdarg_p (type);
4869 if (user_convention)
4871 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4872 sorry ("non-AAPCS derived PCS variant");
4873 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4874 error ("variadic functions must use the base AAPCS variant");
4877 if (base_rules)
4878 return ARM_PCS_AAPCS;
4879 else if (user_convention)
4880 return user_pcs;
4881 else if (decl && flag_unit_at_a_time)
4883 /* Local functions never leak outside this compilation unit,
4884 so we are free to use whatever conventions are
4885 appropriate. */
4886 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4887 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4888 if (i && i->local)
4889 return ARM_PCS_AAPCS_LOCAL;
4892 else if (user_convention && user_pcs != arm_pcs_default)
4893 sorry ("PCS variant");
4895 /* For everything else we use the target's default. */
4896 return arm_pcs_default;
4900 static void
4901 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4902 const_tree fntype ATTRIBUTE_UNUSED,
4903 rtx libcall ATTRIBUTE_UNUSED,
4904 const_tree fndecl ATTRIBUTE_UNUSED)
4906 /* Record the unallocated VFP registers. */
4907 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4908 pcum->aapcs_vfp_reg_alloc = 0;
4911 /* Walk down the type tree of TYPE counting consecutive base elements.
4912 If *MODEP is VOIDmode, then set it to the first valid floating point
4913 type. If a non-floating point type is found, or if a floating point
4914 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4915 otherwise return the count in the sub-tree. */
4916 static int
4917 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4919 enum machine_mode mode;
4920 HOST_WIDE_INT size;
4922 switch (TREE_CODE (type))
4924 case REAL_TYPE:
4925 mode = TYPE_MODE (type);
4926 if (mode != DFmode && mode != SFmode)
4927 return -1;
4929 if (*modep == VOIDmode)
4930 *modep = mode;
4932 if (*modep == mode)
4933 return 1;
4935 break;
4937 case COMPLEX_TYPE:
4938 mode = TYPE_MODE (TREE_TYPE (type));
4939 if (mode != DFmode && mode != SFmode)
4940 return -1;
4942 if (*modep == VOIDmode)
4943 *modep = mode;
4945 if (*modep == mode)
4946 return 2;
4948 break;
4950 case VECTOR_TYPE:
4951 /* Use V2SImode and V4SImode as representatives of all 64-bit
4952 and 128-bit vector types, whether or not those modes are
4953 supported with the present options. */
4954 size = int_size_in_bytes (type);
4955 switch (size)
4957 case 8:
4958 mode = V2SImode;
4959 break;
4960 case 16:
4961 mode = V4SImode;
4962 break;
4963 default:
4964 return -1;
4967 if (*modep == VOIDmode)
4968 *modep = mode;
4970 /* Vector modes are considered to be opaque: two vectors are
4971 equivalent for the purposes of being homogeneous aggregates
4972 if they are the same size. */
4973 if (*modep == mode)
4974 return 1;
4976 break;
4978 case ARRAY_TYPE:
4980 int count;
4981 tree index = TYPE_DOMAIN (type);
4983 /* Can't handle incomplete types. */
4984 if (!COMPLETE_TYPE_P (type))
4985 return -1;
4987 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4988 if (count == -1
4989 || !index
4990 || !TYPE_MAX_VALUE (index)
4991 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4992 || !TYPE_MIN_VALUE (index)
4993 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4994 || count < 0)
4995 return -1;
4997 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4998 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5000 /* There must be no padding. */
5001 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5002 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5003 != count * GET_MODE_BITSIZE (*modep)))
5004 return -1;
5006 return count;
5009 case RECORD_TYPE:
5011 int count = 0;
5012 int sub_count;
5013 tree field;
5015 /* Can't handle incomplete types. */
5016 if (!COMPLETE_TYPE_P (type))
5017 return -1;
5019 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5021 if (TREE_CODE (field) != FIELD_DECL)
5022 continue;
5024 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5025 if (sub_count < 0)
5026 return -1;
5027 count += sub_count;
5030 /* There must be no padding. */
5031 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5032 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5033 != count * GET_MODE_BITSIZE (*modep)))
5034 return -1;
5036 return count;
5039 case UNION_TYPE:
5040 case QUAL_UNION_TYPE:
5042 /* These aren't very interesting except in a degenerate case. */
5043 int count = 0;
5044 int sub_count;
5045 tree field;
5047 /* Can't handle incomplete types. */
5048 if (!COMPLETE_TYPE_P (type))
5049 return -1;
5051 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5053 if (TREE_CODE (field) != FIELD_DECL)
5054 continue;
5056 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5057 if (sub_count < 0)
5058 return -1;
5059 count = count > sub_count ? count : sub_count;
5062 /* There must be no padding. */
5063 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5064 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5065 != count * GET_MODE_BITSIZE (*modep)))
5066 return -1;
5068 return count;
5071 default:
5072 break;
5075 return -1;
5078 /* Return true if PCS_VARIANT should use VFP registers. */
5079 static bool
5080 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5082 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5084 static bool seen_thumb1_vfp = false;
5086 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5088 sorry ("Thumb-1 hard-float VFP ABI");
5089 /* sorry() is not immediately fatal, so only display this once. */
5090 seen_thumb1_vfp = true;
5093 return true;
5096 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5097 return false;
5099 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5100 (TARGET_VFP_DOUBLE || !is_double));
5103 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5104 suitable for passing or returning in VFP registers for the PCS
5105 variant selected. If it is, then *BASE_MODE is updated to contain
5106 a machine mode describing each element of the argument's type and
5107 *COUNT to hold the number of such elements. */
5108 static bool
5109 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5110 enum machine_mode mode, const_tree type,
5111 enum machine_mode *base_mode, int *count)
5113 enum machine_mode new_mode = VOIDmode;
5115 /* If we have the type information, prefer that to working things
5116 out from the mode. */
5117 if (type)
5119 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5121 if (ag_count > 0 && ag_count <= 4)
5122 *count = ag_count;
5123 else
5124 return false;
5126 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5127 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5128 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5130 *count = 1;
5131 new_mode = mode;
5133 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5135 *count = 2;
5136 new_mode = (mode == DCmode ? DFmode : SFmode);
5138 else
5139 return false;
5142 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5143 return false;
5145 *base_mode = new_mode;
5146 return true;
5149 static bool
5150 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5151 enum machine_mode mode, const_tree type)
5153 int count ATTRIBUTE_UNUSED;
5154 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5156 if (!use_vfp_abi (pcs_variant, false))
5157 return false;
5158 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5159 &ag_mode, &count);
5162 static bool
5163 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5164 const_tree type)
5166 if (!use_vfp_abi (pcum->pcs_variant, false))
5167 return false;
5169 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5170 &pcum->aapcs_vfp_rmode,
5171 &pcum->aapcs_vfp_rcount);
5174 static bool
5175 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5176 const_tree type ATTRIBUTE_UNUSED)
5178 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5179 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5180 int regno;
5182 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5183 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5185 pcum->aapcs_vfp_reg_alloc = mask << regno;
5186 if (mode == BLKmode
5187 || (mode == TImode && ! TARGET_NEON)
5188 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5190 int i;
5191 int rcount = pcum->aapcs_vfp_rcount;
5192 int rshift = shift;
5193 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5194 rtx par;
5195 if (!TARGET_NEON)
5197 /* Avoid using unsupported vector modes. */
5198 if (rmode == V2SImode)
5199 rmode = DImode;
5200 else if (rmode == V4SImode)
5202 rmode = DImode;
5203 rcount *= 2;
5204 rshift /= 2;
5207 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5208 for (i = 0; i < rcount; i++)
5210 rtx tmp = gen_rtx_REG (rmode,
5211 FIRST_VFP_REGNUM + regno + i * rshift);
5212 tmp = gen_rtx_EXPR_LIST
5213 (VOIDmode, tmp,
5214 GEN_INT (i * GET_MODE_SIZE (rmode)));
5215 XVECEXP (par, 0, i) = tmp;
5218 pcum->aapcs_reg = par;
5220 else
5221 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5222 return true;
5224 return false;
5227 static rtx
5228 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5229 enum machine_mode mode,
5230 const_tree type ATTRIBUTE_UNUSED)
5232 if (!use_vfp_abi (pcs_variant, false))
5233 return NULL;
5235 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5237 int count;
5238 enum machine_mode ag_mode;
5239 int i;
5240 rtx par;
5241 int shift;
5243 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5244 &ag_mode, &count);
5246 if (!TARGET_NEON)
5248 if (ag_mode == V2SImode)
5249 ag_mode = DImode;
5250 else if (ag_mode == V4SImode)
5252 ag_mode = DImode;
5253 count *= 2;
5256 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5257 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5258 for (i = 0; i < count; i++)
5260 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5261 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5262 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5263 XVECEXP (par, 0, i) = tmp;
5266 return par;
5269 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5272 static void
5273 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5274 enum machine_mode mode ATTRIBUTE_UNUSED,
5275 const_tree type ATTRIBUTE_UNUSED)
5277 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5278 pcum->aapcs_vfp_reg_alloc = 0;
5279 return;
5282 #define AAPCS_CP(X) \
5284 aapcs_ ## X ## _cum_init, \
5285 aapcs_ ## X ## _is_call_candidate, \
5286 aapcs_ ## X ## _allocate, \
5287 aapcs_ ## X ## _is_return_candidate, \
5288 aapcs_ ## X ## _allocate_return_reg, \
5289 aapcs_ ## X ## _advance \
5292 /* Table of co-processors that can be used to pass arguments in
5293 registers. Idealy no arugment should be a candidate for more than
5294 one co-processor table entry, but the table is processed in order
5295 and stops after the first match. If that entry then fails to put
5296 the argument into a co-processor register, the argument will go on
5297 the stack. */
5298 static struct
5300 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5301 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5303 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5304 BLKmode) is a candidate for this co-processor's registers; this
5305 function should ignore any position-dependent state in
5306 CUMULATIVE_ARGS and only use call-type dependent information. */
5307 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5309 /* Return true if the argument does get a co-processor register; it
5310 should set aapcs_reg to an RTX of the register allocated as is
5311 required for a return from FUNCTION_ARG. */
5312 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5314 /* Return true if a result of mode MODE (or type TYPE if MODE is
5315 BLKmode) is can be returned in this co-processor's registers. */
5316 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5318 /* Allocate and return an RTX element to hold the return type of a
5319 call, this routine must not fail and will only be called if
5320 is_return_candidate returned true with the same parameters. */
5321 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5323 /* Finish processing this argument and prepare to start processing
5324 the next one. */
5325 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5326 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5328 AAPCS_CP(vfp)
5331 #undef AAPCS_CP
5333 static int
5334 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5335 const_tree type)
5337 int i;
5339 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5340 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5341 return i;
5343 return -1;
5346 static int
5347 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5349 /* We aren't passed a decl, so we can't check that a call is local.
5350 However, it isn't clear that that would be a win anyway, since it
5351 might limit some tail-calling opportunities. */
5352 enum arm_pcs pcs_variant;
5354 if (fntype)
5356 const_tree fndecl = NULL_TREE;
5358 if (TREE_CODE (fntype) == FUNCTION_DECL)
5360 fndecl = fntype;
5361 fntype = TREE_TYPE (fntype);
5364 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5366 else
5367 pcs_variant = arm_pcs_default;
5369 if (pcs_variant != ARM_PCS_AAPCS)
5371 int i;
5373 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5374 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5375 TYPE_MODE (type),
5376 type))
5377 return i;
5379 return -1;
5382 static rtx
5383 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5384 const_tree fntype)
5386 /* We aren't passed a decl, so we can't check that a call is local.
5387 However, it isn't clear that that would be a win anyway, since it
5388 might limit some tail-calling opportunities. */
5389 enum arm_pcs pcs_variant;
5390 int unsignedp ATTRIBUTE_UNUSED;
5392 if (fntype)
5394 const_tree fndecl = NULL_TREE;
5396 if (TREE_CODE (fntype) == FUNCTION_DECL)
5398 fndecl = fntype;
5399 fntype = TREE_TYPE (fntype);
5402 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5404 else
5405 pcs_variant = arm_pcs_default;
5407 /* Promote integer types. */
5408 if (type && INTEGRAL_TYPE_P (type))
5409 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5411 if (pcs_variant != ARM_PCS_AAPCS)
5413 int i;
5415 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5416 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5417 type))
5418 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5419 mode, type);
5422 /* Promotes small structs returned in a register to full-word size
5423 for big-endian AAPCS. */
5424 if (type && arm_return_in_msb (type))
5426 HOST_WIDE_INT size = int_size_in_bytes (type);
5427 if (size % UNITS_PER_WORD != 0)
5429 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5430 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5434 return gen_rtx_REG (mode, R0_REGNUM);
5437 static rtx
5438 aapcs_libcall_value (enum machine_mode mode)
5440 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5441 && GET_MODE_SIZE (mode) <= 4)
5442 mode = SImode;
5444 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5447 /* Lay out a function argument using the AAPCS rules. The rule
5448 numbers referred to here are those in the AAPCS. */
5449 static void
5450 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5451 const_tree type, bool named)
5453 int nregs, nregs2;
5454 int ncrn;
5456 /* We only need to do this once per argument. */
5457 if (pcum->aapcs_arg_processed)
5458 return;
5460 pcum->aapcs_arg_processed = true;
5462 /* Special case: if named is false then we are handling an incoming
5463 anonymous argument which is on the stack. */
5464 if (!named)
5465 return;
5467 /* Is this a potential co-processor register candidate? */
5468 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5470 int slot = aapcs_select_call_coproc (pcum, mode, type);
5471 pcum->aapcs_cprc_slot = slot;
5473 /* We don't have to apply any of the rules from part B of the
5474 preparation phase, these are handled elsewhere in the
5475 compiler. */
5477 if (slot >= 0)
5479 /* A Co-processor register candidate goes either in its own
5480 class of registers or on the stack. */
5481 if (!pcum->aapcs_cprc_failed[slot])
5483 /* C1.cp - Try to allocate the argument to co-processor
5484 registers. */
5485 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5486 return;
5488 /* C2.cp - Put the argument on the stack and note that we
5489 can't assign any more candidates in this slot. We also
5490 need to note that we have allocated stack space, so that
5491 we won't later try to split a non-cprc candidate between
5492 core registers and the stack. */
5493 pcum->aapcs_cprc_failed[slot] = true;
5494 pcum->can_split = false;
5497 /* We didn't get a register, so this argument goes on the
5498 stack. */
5499 gcc_assert (pcum->can_split == false);
5500 return;
5504 /* C3 - For double-word aligned arguments, round the NCRN up to the
5505 next even number. */
5506 ncrn = pcum->aapcs_ncrn;
5507 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5508 ncrn++;
5510 nregs = ARM_NUM_REGS2(mode, type);
5512 /* Sigh, this test should really assert that nregs > 0, but a GCC
5513 extension allows empty structs and then gives them empty size; it
5514 then allows such a structure to be passed by value. For some of
5515 the code below we have to pretend that such an argument has
5516 non-zero size so that we 'locate' it correctly either in
5517 registers or on the stack. */
5518 gcc_assert (nregs >= 0);
5520 nregs2 = nregs ? nregs : 1;
5522 /* C4 - Argument fits entirely in core registers. */
5523 if (ncrn + nregs2 <= NUM_ARG_REGS)
5525 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5526 pcum->aapcs_next_ncrn = ncrn + nregs;
5527 return;
5530 /* C5 - Some core registers left and there are no arguments already
5531 on the stack: split this argument between the remaining core
5532 registers and the stack. */
5533 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5535 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5536 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5537 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5538 return;
5541 /* C6 - NCRN is set to 4. */
5542 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5544 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5545 return;
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is NULL. */
5551 void
5552 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5553 rtx libname,
5554 tree fndecl ATTRIBUTE_UNUSED)
5556 /* Long call handling. */
5557 if (fntype)
5558 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5559 else
5560 pcum->pcs_variant = arm_pcs_default;
5562 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5564 if (arm_libcall_uses_aapcs_base (libname))
5565 pcum->pcs_variant = ARM_PCS_AAPCS;
5567 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5568 pcum->aapcs_reg = NULL_RTX;
5569 pcum->aapcs_partial = 0;
5570 pcum->aapcs_arg_processed = false;
5571 pcum->aapcs_cprc_slot = -1;
5572 pcum->can_split = true;
5574 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5576 int i;
5578 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5580 pcum->aapcs_cprc_failed[i] = false;
5581 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5584 return;
5587 /* Legacy ABIs */
5589 /* On the ARM, the offset starts at 0. */
5590 pcum->nregs = 0;
5591 pcum->iwmmxt_nregs = 0;
5592 pcum->can_split = true;
5594 /* Varargs vectors are treated the same as long long.
5595 named_count avoids having to change the way arm handles 'named' */
5596 pcum->named_count = 0;
5597 pcum->nargs = 0;
5599 if (TARGET_REALLY_IWMMXT && fntype)
5601 tree fn_arg;
5603 for (fn_arg = TYPE_ARG_TYPES (fntype);
5604 fn_arg;
5605 fn_arg = TREE_CHAIN (fn_arg))
5606 pcum->named_count += 1;
5608 if (! pcum->named_count)
5609 pcum->named_count = INT_MAX;
5613 /* Return true if we use LRA instead of reload pass. */
5614 static bool
5615 arm_lra_p (void)
5617 return arm_lra_flag;
5620 /* Return true if mode/type need doubleword alignment. */
5621 static bool
5622 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5624 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5625 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5629 /* Determine where to put an argument to a function.
5630 Value is zero to push the argument on the stack,
5631 or a hard register in which to store the argument.
5633 MODE is the argument's machine mode.
5634 TYPE is the data type of the argument (as a tree).
5635 This is null for libcalls where that information may
5636 not be available.
5637 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5638 the preceding args and about the function being called.
5639 NAMED is nonzero if this argument is a named parameter
5640 (otherwise it is an extra parameter matching an ellipsis).
5642 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5643 other arguments are passed on the stack. If (NAMED == 0) (which happens
5644 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5645 defined), say it is passed in the stack (function_prologue will
5646 indeed make it pass in the stack if necessary). */
5648 static rtx
5649 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5650 const_tree type, bool named)
5652 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5653 int nregs;
5655 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5656 a call insn (op3 of a call_value insn). */
5657 if (mode == VOIDmode)
5658 return const0_rtx;
5660 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5662 aapcs_layout_arg (pcum, mode, type, named);
5663 return pcum->aapcs_reg;
5666 /* Varargs vectors are treated the same as long long.
5667 named_count avoids having to change the way arm handles 'named' */
5668 if (TARGET_IWMMXT_ABI
5669 && arm_vector_mode_supported_p (mode)
5670 && pcum->named_count > pcum->nargs + 1)
5672 if (pcum->iwmmxt_nregs <= 9)
5673 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5674 else
5676 pcum->can_split = false;
5677 return NULL_RTX;
5681 /* Put doubleword aligned quantities in even register pairs. */
5682 if (pcum->nregs & 1
5683 && ARM_DOUBLEWORD_ALIGN
5684 && arm_needs_doubleword_align (mode, type))
5685 pcum->nregs++;
5687 /* Only allow splitting an arg between regs and memory if all preceding
5688 args were allocated to regs. For args passed by reference we only count
5689 the reference pointer. */
5690 if (pcum->can_split)
5691 nregs = 1;
5692 else
5693 nregs = ARM_NUM_REGS2 (mode, type);
5695 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5696 return NULL_RTX;
5698 return gen_rtx_REG (mode, pcum->nregs);
5701 static unsigned int
5702 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5704 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5705 ? DOUBLEWORD_ALIGNMENT
5706 : PARM_BOUNDARY);
5709 static int
5710 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5711 tree type, bool named)
5713 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5714 int nregs = pcum->nregs;
5716 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5718 aapcs_layout_arg (pcum, mode, type, named);
5719 return pcum->aapcs_partial;
5722 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5723 return 0;
5725 if (NUM_ARG_REGS > nregs
5726 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5727 && pcum->can_split)
5728 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5730 return 0;
5733 /* Update the data in PCUM to advance over an argument
5734 of mode MODE and data type TYPE.
5735 (TYPE is null for libcalls where that information may not be available.) */
5737 static void
5738 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5739 const_tree type, bool named)
5741 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5743 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5745 aapcs_layout_arg (pcum, mode, type, named);
5747 if (pcum->aapcs_cprc_slot >= 0)
5749 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5750 type);
5751 pcum->aapcs_cprc_slot = -1;
5754 /* Generic stuff. */
5755 pcum->aapcs_arg_processed = false;
5756 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5757 pcum->aapcs_reg = NULL_RTX;
5758 pcum->aapcs_partial = 0;
5760 else
5762 pcum->nargs += 1;
5763 if (arm_vector_mode_supported_p (mode)
5764 && pcum->named_count > pcum->nargs
5765 && TARGET_IWMMXT_ABI)
5766 pcum->iwmmxt_nregs += 1;
5767 else
5768 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5772 /* Variable sized types are passed by reference. This is a GCC
5773 extension to the ARM ABI. */
5775 static bool
5776 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5777 enum machine_mode mode ATTRIBUTE_UNUSED,
5778 const_tree type, bool named ATTRIBUTE_UNUSED)
5780 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5783 /* Encode the current state of the #pragma [no_]long_calls. */
5784 typedef enum
5786 OFF, /* No #pragma [no_]long_calls is in effect. */
5787 LONG, /* #pragma long_calls is in effect. */
5788 SHORT /* #pragma no_long_calls is in effect. */
5789 } arm_pragma_enum;
5791 static arm_pragma_enum arm_pragma_long_calls = OFF;
5793 void
5794 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5796 arm_pragma_long_calls = LONG;
5799 void
5800 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5802 arm_pragma_long_calls = SHORT;
5805 void
5806 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5808 arm_pragma_long_calls = OFF;
5811 /* Handle an attribute requiring a FUNCTION_DECL;
5812 arguments as in struct attribute_spec.handler. */
5813 static tree
5814 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5815 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5817 if (TREE_CODE (*node) != FUNCTION_DECL)
5819 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5820 name);
5821 *no_add_attrs = true;
5824 return NULL_TREE;
5827 /* Handle an "interrupt" or "isr" attribute;
5828 arguments as in struct attribute_spec.handler. */
5829 static tree
5830 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5831 bool *no_add_attrs)
5833 if (DECL_P (*node))
5835 if (TREE_CODE (*node) != FUNCTION_DECL)
5837 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5838 name);
5839 *no_add_attrs = true;
5841 /* FIXME: the argument if any is checked for type attributes;
5842 should it be checked for decl ones? */
5844 else
5846 if (TREE_CODE (*node) == FUNCTION_TYPE
5847 || TREE_CODE (*node) == METHOD_TYPE)
5849 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5851 warning (OPT_Wattributes, "%qE attribute ignored",
5852 name);
5853 *no_add_attrs = true;
5856 else if (TREE_CODE (*node) == POINTER_TYPE
5857 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5858 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5859 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5861 *node = build_variant_type_copy (*node);
5862 TREE_TYPE (*node) = build_type_attribute_variant
5863 (TREE_TYPE (*node),
5864 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5865 *no_add_attrs = true;
5867 else
5869 /* Possibly pass this attribute on from the type to a decl. */
5870 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5871 | (int) ATTR_FLAG_FUNCTION_NEXT
5872 | (int) ATTR_FLAG_ARRAY_NEXT))
5874 *no_add_attrs = true;
5875 return tree_cons (name, args, NULL_TREE);
5877 else
5879 warning (OPT_Wattributes, "%qE attribute ignored",
5880 name);
5885 return NULL_TREE;
5888 /* Handle a "pcs" attribute; arguments as in struct
5889 attribute_spec.handler. */
5890 static tree
5891 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5892 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5894 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5896 warning (OPT_Wattributes, "%qE attribute ignored", name);
5897 *no_add_attrs = true;
5899 return NULL_TREE;
5902 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5903 /* Handle the "notshared" attribute. This attribute is another way of
5904 requesting hidden visibility. ARM's compiler supports
5905 "__declspec(notshared)"; we support the same thing via an
5906 attribute. */
5908 static tree
5909 arm_handle_notshared_attribute (tree *node,
5910 tree name ATTRIBUTE_UNUSED,
5911 tree args ATTRIBUTE_UNUSED,
5912 int flags ATTRIBUTE_UNUSED,
5913 bool *no_add_attrs)
5915 tree decl = TYPE_NAME (*node);
5917 if (decl)
5919 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5920 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5921 *no_add_attrs = false;
5923 return NULL_TREE;
5925 #endif
5927 /* Return 0 if the attributes for two types are incompatible, 1 if they
5928 are compatible, and 2 if they are nearly compatible (which causes a
5929 warning to be generated). */
5930 static int
5931 arm_comp_type_attributes (const_tree type1, const_tree type2)
5933 int l1, l2, s1, s2;
5935 /* Check for mismatch of non-default calling convention. */
5936 if (TREE_CODE (type1) != FUNCTION_TYPE)
5937 return 1;
5939 /* Check for mismatched call attributes. */
5940 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5941 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5942 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5943 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5945 /* Only bother to check if an attribute is defined. */
5946 if (l1 | l2 | s1 | s2)
5948 /* If one type has an attribute, the other must have the same attribute. */
5949 if ((l1 != l2) || (s1 != s2))
5950 return 0;
5952 /* Disallow mixed attributes. */
5953 if ((l1 & s2) || (l2 & s1))
5954 return 0;
5957 /* Check for mismatched ISR attribute. */
5958 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5959 if (! l1)
5960 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5961 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5962 if (! l2)
5963 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5964 if (l1 != l2)
5965 return 0;
5967 return 1;
5970 /* Assigns default attributes to newly defined type. This is used to
5971 set short_call/long_call attributes for function types of
5972 functions defined inside corresponding #pragma scopes. */
5973 static void
5974 arm_set_default_type_attributes (tree type)
5976 /* Add __attribute__ ((long_call)) to all functions, when
5977 inside #pragma long_calls or __attribute__ ((short_call)),
5978 when inside #pragma no_long_calls. */
5979 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5981 tree type_attr_list, attr_name;
5982 type_attr_list = TYPE_ATTRIBUTES (type);
5984 if (arm_pragma_long_calls == LONG)
5985 attr_name = get_identifier ("long_call");
5986 else if (arm_pragma_long_calls == SHORT)
5987 attr_name = get_identifier ("short_call");
5988 else
5989 return;
5991 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5992 TYPE_ATTRIBUTES (type) = type_attr_list;
5996 /* Return true if DECL is known to be linked into section SECTION. */
5998 static bool
5999 arm_function_in_section_p (tree decl, section *section)
6001 /* We can only be certain about functions defined in the same
6002 compilation unit. */
6003 if (!TREE_STATIC (decl))
6004 return false;
6006 /* Make sure that SYMBOL always binds to the definition in this
6007 compilation unit. */
6008 if (!targetm.binds_local_p (decl))
6009 return false;
6011 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6012 if (!DECL_SECTION_NAME (decl))
6014 /* Make sure that we will not create a unique section for DECL. */
6015 if (flag_function_sections || DECL_ONE_ONLY (decl))
6016 return false;
6019 return function_section (decl) == section;
6022 /* Return nonzero if a 32-bit "long_call" should be generated for
6023 a call from the current function to DECL. We generate a long_call
6024 if the function:
6026 a. has an __attribute__((long call))
6027 or b. is within the scope of a #pragma long_calls
6028 or c. the -mlong-calls command line switch has been specified
6030 However we do not generate a long call if the function:
6032 d. has an __attribute__ ((short_call))
6033 or e. is inside the scope of a #pragma no_long_calls
6034 or f. is defined in the same section as the current function. */
6036 bool
6037 arm_is_long_call_p (tree decl)
6039 tree attrs;
6041 if (!decl)
6042 return TARGET_LONG_CALLS;
6044 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6045 if (lookup_attribute ("short_call", attrs))
6046 return false;
6048 /* For "f", be conservative, and only cater for cases in which the
6049 whole of the current function is placed in the same section. */
6050 if (!flag_reorder_blocks_and_partition
6051 && TREE_CODE (decl) == FUNCTION_DECL
6052 && arm_function_in_section_p (decl, current_function_section ()))
6053 return false;
6055 if (lookup_attribute ("long_call", attrs))
6056 return true;
6058 return TARGET_LONG_CALLS;
6061 /* Return nonzero if it is ok to make a tail-call to DECL. */
6062 static bool
6063 arm_function_ok_for_sibcall (tree decl, tree exp)
6065 unsigned long func_type;
6067 if (cfun->machine->sibcall_blocked)
6068 return false;
6070 /* Never tailcall something if we are generating code for Thumb-1. */
6071 if (TARGET_THUMB1)
6072 return false;
6074 /* The PIC register is live on entry to VxWorks PLT entries, so we
6075 must make the call before restoring the PIC register. */
6076 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6077 return false;
6079 /* Cannot tail-call to long calls, since these are out of range of
6080 a branch instruction. */
6081 if (decl && arm_is_long_call_p (decl))
6082 return false;
6084 /* If we are interworking and the function is not declared static
6085 then we can't tail-call it unless we know that it exists in this
6086 compilation unit (since it might be a Thumb routine). */
6087 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6088 && !TREE_ASM_WRITTEN (decl))
6089 return false;
6091 func_type = arm_current_func_type ();
6092 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6093 if (IS_INTERRUPT (func_type))
6094 return false;
6096 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6098 /* Check that the return value locations are the same. For
6099 example that we aren't returning a value from the sibling in
6100 a VFP register but then need to transfer it to a core
6101 register. */
6102 rtx a, b;
6104 a = arm_function_value (TREE_TYPE (exp), decl, false);
6105 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6106 cfun->decl, false);
6107 if (!rtx_equal_p (a, b))
6108 return false;
6111 /* Never tailcall if function may be called with a misaligned SP. */
6112 if (IS_STACKALIGN (func_type))
6113 return false;
6115 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6116 references should become a NOP. Don't convert such calls into
6117 sibling calls. */
6118 if (TARGET_AAPCS_BASED
6119 && arm_abi == ARM_ABI_AAPCS
6120 && decl
6121 && DECL_WEAK (decl))
6122 return false;
6124 /* Everything else is ok. */
6125 return true;
6129 /* Addressing mode support functions. */
6131 /* Return nonzero if X is a legitimate immediate operand when compiling
6132 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6134 legitimate_pic_operand_p (rtx x)
6136 if (GET_CODE (x) == SYMBOL_REF
6137 || (GET_CODE (x) == CONST
6138 && GET_CODE (XEXP (x, 0)) == PLUS
6139 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6140 return 0;
6142 return 1;
6145 /* Record that the current function needs a PIC register. Initialize
6146 cfun->machine->pic_reg if we have not already done so. */
6148 static void
6149 require_pic_register (void)
6151 /* A lot of the logic here is made obscure by the fact that this
6152 routine gets called as part of the rtx cost estimation process.
6153 We don't want those calls to affect any assumptions about the real
6154 function; and further, we can't call entry_of_function() until we
6155 start the real expansion process. */
6156 if (!crtl->uses_pic_offset_table)
6158 gcc_assert (can_create_pseudo_p ());
6159 if (arm_pic_register != INVALID_REGNUM
6160 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6162 if (!cfun->machine->pic_reg)
6163 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6165 /* Play games to avoid marking the function as needing pic
6166 if we are being called as part of the cost-estimation
6167 process. */
6168 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6169 crtl->uses_pic_offset_table = 1;
6171 else
6173 rtx seq, insn;
6175 if (!cfun->machine->pic_reg)
6176 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6178 /* Play games to avoid marking the function as needing pic
6179 if we are being called as part of the cost-estimation
6180 process. */
6181 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6183 crtl->uses_pic_offset_table = 1;
6184 start_sequence ();
6186 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6187 && arm_pic_register > LAST_LO_REGNUM)
6188 emit_move_insn (cfun->machine->pic_reg,
6189 gen_rtx_REG (Pmode, arm_pic_register));
6190 else
6191 arm_load_pic_register (0UL);
6193 seq = get_insns ();
6194 end_sequence ();
6196 for (insn = seq; insn; insn = NEXT_INSN (insn))
6197 if (INSN_P (insn))
6198 INSN_LOCATION (insn) = prologue_location;
6200 /* We can be called during expansion of PHI nodes, where
6201 we can't yet emit instructions directly in the final
6202 insn stream. Queue the insns on the entry edge, they will
6203 be committed after everything else is expanded. */
6204 insert_insn_on_edge (seq,
6205 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6212 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6214 if (GET_CODE (orig) == SYMBOL_REF
6215 || GET_CODE (orig) == LABEL_REF)
6217 rtx insn;
6219 if (reg == 0)
6221 gcc_assert (can_create_pseudo_p ());
6222 reg = gen_reg_rtx (Pmode);
6225 /* VxWorks does not impose a fixed gap between segments; the run-time
6226 gap can be different from the object-file gap. We therefore can't
6227 use GOTOFF unless we are absolutely sure that the symbol is in the
6228 same segment as the GOT. Unfortunately, the flexibility of linker
6229 scripts means that we can't be sure of that in general, so assume
6230 that GOTOFF is never valid on VxWorks. */
6231 if ((GET_CODE (orig) == LABEL_REF
6232 || (GET_CODE (orig) == SYMBOL_REF &&
6233 SYMBOL_REF_LOCAL_P (orig)))
6234 && NEED_GOT_RELOC
6235 && arm_pic_data_is_text_relative)
6236 insn = arm_pic_static_addr (orig, reg);
6237 else
6239 rtx pat;
6240 rtx mem;
6242 /* If this function doesn't have a pic register, create one now. */
6243 require_pic_register ();
6245 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6247 /* Make the MEM as close to a constant as possible. */
6248 mem = SET_SRC (pat);
6249 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6250 MEM_READONLY_P (mem) = 1;
6251 MEM_NOTRAP_P (mem) = 1;
6253 insn = emit_insn (pat);
6256 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6257 by loop. */
6258 set_unique_reg_note (insn, REG_EQUAL, orig);
6260 return reg;
6262 else if (GET_CODE (orig) == CONST)
6264 rtx base, offset;
6266 if (GET_CODE (XEXP (orig, 0)) == PLUS
6267 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6268 return orig;
6270 /* Handle the case where we have: const (UNSPEC_TLS). */
6271 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6272 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6273 return orig;
6275 /* Handle the case where we have:
6276 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6277 CONST_INT. */
6278 if (GET_CODE (XEXP (orig, 0)) == PLUS
6279 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6280 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6282 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6283 return orig;
6286 if (reg == 0)
6288 gcc_assert (can_create_pseudo_p ());
6289 reg = gen_reg_rtx (Pmode);
6292 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6294 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6295 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6296 base == reg ? 0 : reg);
6298 if (CONST_INT_P (offset))
6300 /* The base register doesn't really matter, we only want to
6301 test the index for the appropriate mode. */
6302 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6304 gcc_assert (can_create_pseudo_p ());
6305 offset = force_reg (Pmode, offset);
6308 if (CONST_INT_P (offset))
6309 return plus_constant (Pmode, base, INTVAL (offset));
6312 if (GET_MODE_SIZE (mode) > 4
6313 && (GET_MODE_CLASS (mode) == MODE_INT
6314 || TARGET_SOFT_FLOAT))
6316 emit_insn (gen_addsi3 (reg, base, offset));
6317 return reg;
6320 return gen_rtx_PLUS (Pmode, base, offset);
6323 return orig;
6327 /* Find a spare register to use during the prolog of a function. */
6329 static int
6330 thumb_find_work_register (unsigned long pushed_regs_mask)
6332 int reg;
6334 /* Check the argument registers first as these are call-used. The
6335 register allocation order means that sometimes r3 might be used
6336 but earlier argument registers might not, so check them all. */
6337 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6338 if (!df_regs_ever_live_p (reg))
6339 return reg;
6341 /* Before going on to check the call-saved registers we can try a couple
6342 more ways of deducing that r3 is available. The first is when we are
6343 pushing anonymous arguments onto the stack and we have less than 4
6344 registers worth of fixed arguments(*). In this case r3 will be part of
6345 the variable argument list and so we can be sure that it will be
6346 pushed right at the start of the function. Hence it will be available
6347 for the rest of the prologue.
6348 (*): ie crtl->args.pretend_args_size is greater than 0. */
6349 if (cfun->machine->uses_anonymous_args
6350 && crtl->args.pretend_args_size > 0)
6351 return LAST_ARG_REGNUM;
6353 /* The other case is when we have fixed arguments but less than 4 registers
6354 worth. In this case r3 might be used in the body of the function, but
6355 it is not being used to convey an argument into the function. In theory
6356 we could just check crtl->args.size to see how many bytes are
6357 being passed in argument registers, but it seems that it is unreliable.
6358 Sometimes it will have the value 0 when in fact arguments are being
6359 passed. (See testcase execute/20021111-1.c for an example). So we also
6360 check the args_info.nregs field as well. The problem with this field is
6361 that it makes no allowances for arguments that are passed to the
6362 function but which are not used. Hence we could miss an opportunity
6363 when a function has an unused argument in r3. But it is better to be
6364 safe than to be sorry. */
6365 if (! cfun->machine->uses_anonymous_args
6366 && crtl->args.size >= 0
6367 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6368 && (TARGET_AAPCS_BASED
6369 ? crtl->args.info.aapcs_ncrn < 4
6370 : crtl->args.info.nregs < 4))
6371 return LAST_ARG_REGNUM;
6373 /* Otherwise look for a call-saved register that is going to be pushed. */
6374 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6375 if (pushed_regs_mask & (1 << reg))
6376 return reg;
6378 if (TARGET_THUMB2)
6380 /* Thumb-2 can use high regs. */
6381 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6382 if (pushed_regs_mask & (1 << reg))
6383 return reg;
6385 /* Something went wrong - thumb_compute_save_reg_mask()
6386 should have arranged for a suitable register to be pushed. */
6387 gcc_unreachable ();
6390 static GTY(()) int pic_labelno;
6392 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6393 low register. */
6395 void
6396 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6398 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6400 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6401 return;
6403 gcc_assert (flag_pic);
6405 pic_reg = cfun->machine->pic_reg;
6406 if (TARGET_VXWORKS_RTP)
6408 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6409 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6410 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6412 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6414 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6415 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6417 else
6419 /* We use an UNSPEC rather than a LABEL_REF because this label
6420 never appears in the code stream. */
6422 labelno = GEN_INT (pic_labelno++);
6423 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6424 l1 = gen_rtx_CONST (VOIDmode, l1);
6426 /* On the ARM the PC register contains 'dot + 8' at the time of the
6427 addition, on the Thumb it is 'dot + 4'. */
6428 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6429 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6430 UNSPEC_GOTSYM_OFF);
6431 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6433 if (TARGET_32BIT)
6435 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6437 else /* TARGET_THUMB1 */
6439 if (arm_pic_register != INVALID_REGNUM
6440 && REGNO (pic_reg) > LAST_LO_REGNUM)
6442 /* We will have pushed the pic register, so we should always be
6443 able to find a work register. */
6444 pic_tmp = gen_rtx_REG (SImode,
6445 thumb_find_work_register (saved_regs));
6446 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6447 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6448 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6450 else if (arm_pic_register != INVALID_REGNUM
6451 && arm_pic_register > LAST_LO_REGNUM
6452 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6454 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6455 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6456 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6458 else
6459 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6463 /* Need to emit this whether or not we obey regdecls,
6464 since setjmp/longjmp can cause life info to screw up. */
6465 emit_use (pic_reg);
6468 /* Generate code to load the address of a static var when flag_pic is set. */
6469 static rtx
6470 arm_pic_static_addr (rtx orig, rtx reg)
6472 rtx l1, labelno, offset_rtx, insn;
6474 gcc_assert (flag_pic);
6476 /* We use an UNSPEC rather than a LABEL_REF because this label
6477 never appears in the code stream. */
6478 labelno = GEN_INT (pic_labelno++);
6479 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6480 l1 = gen_rtx_CONST (VOIDmode, l1);
6482 /* On the ARM the PC register contains 'dot + 8' at the time of the
6483 addition, on the Thumb it is 'dot + 4'. */
6484 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6485 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6486 UNSPEC_SYMBOL_OFFSET);
6487 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6489 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6490 return insn;
6493 /* Return nonzero if X is valid as an ARM state addressing register. */
6494 static int
6495 arm_address_register_rtx_p (rtx x, int strict_p)
6497 int regno;
6499 if (!REG_P (x))
6500 return 0;
6502 regno = REGNO (x);
6504 if (strict_p)
6505 return ARM_REGNO_OK_FOR_BASE_P (regno);
6507 return (regno <= LAST_ARM_REGNUM
6508 || regno >= FIRST_PSEUDO_REGISTER
6509 || regno == FRAME_POINTER_REGNUM
6510 || regno == ARG_POINTER_REGNUM);
6513 /* Return TRUE if this rtx is the difference of a symbol and a label,
6514 and will reduce to a PC-relative relocation in the object file.
6515 Expressions like this can be left alone when generating PIC, rather
6516 than forced through the GOT. */
6517 static int
6518 pcrel_constant_p (rtx x)
6520 if (GET_CODE (x) == MINUS)
6521 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6523 return FALSE;
6526 /* Return true if X will surely end up in an index register after next
6527 splitting pass. */
6528 static bool
6529 will_be_in_index_register (const_rtx x)
6531 /* arm.md: calculate_pic_address will split this into a register. */
6532 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6535 /* Return nonzero if X is a valid ARM state address operand. */
6537 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6538 int strict_p)
6540 bool use_ldrd;
6541 enum rtx_code code = GET_CODE (x);
6543 if (arm_address_register_rtx_p (x, strict_p))
6544 return 1;
6546 use_ldrd = (TARGET_LDRD
6547 && (mode == DImode
6548 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6550 if (code == POST_INC || code == PRE_DEC
6551 || ((code == PRE_INC || code == POST_DEC)
6552 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6553 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6555 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6556 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6557 && GET_CODE (XEXP (x, 1)) == PLUS
6558 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6560 rtx addend = XEXP (XEXP (x, 1), 1);
6562 /* Don't allow ldrd post increment by register because it's hard
6563 to fixup invalid register choices. */
6564 if (use_ldrd
6565 && GET_CODE (x) == POST_MODIFY
6566 && REG_P (addend))
6567 return 0;
6569 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6570 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6573 /* After reload constants split into minipools will have addresses
6574 from a LABEL_REF. */
6575 else if (reload_completed
6576 && (code == LABEL_REF
6577 || (code == CONST
6578 && GET_CODE (XEXP (x, 0)) == PLUS
6579 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6580 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6581 return 1;
6583 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6584 return 0;
6586 else if (code == PLUS)
6588 rtx xop0 = XEXP (x, 0);
6589 rtx xop1 = XEXP (x, 1);
6591 return ((arm_address_register_rtx_p (xop0, strict_p)
6592 && ((CONST_INT_P (xop1)
6593 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6594 || (!strict_p && will_be_in_index_register (xop1))))
6595 || (arm_address_register_rtx_p (xop1, strict_p)
6596 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6599 #if 0
6600 /* Reload currently can't handle MINUS, so disable this for now */
6601 else if (GET_CODE (x) == MINUS)
6603 rtx xop0 = XEXP (x, 0);
6604 rtx xop1 = XEXP (x, 1);
6606 return (arm_address_register_rtx_p (xop0, strict_p)
6607 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6609 #endif
6611 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6612 && code == SYMBOL_REF
6613 && CONSTANT_POOL_ADDRESS_P (x)
6614 && ! (flag_pic
6615 && symbol_mentioned_p (get_pool_constant (x))
6616 && ! pcrel_constant_p (get_pool_constant (x))))
6617 return 1;
6619 return 0;
6622 /* Return nonzero if X is a valid Thumb-2 address operand. */
6623 static int
6624 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6626 bool use_ldrd;
6627 enum rtx_code code = GET_CODE (x);
6629 if (arm_address_register_rtx_p (x, strict_p))
6630 return 1;
6632 use_ldrd = (TARGET_LDRD
6633 && (mode == DImode
6634 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6636 if (code == POST_INC || code == PRE_DEC
6637 || ((code == PRE_INC || code == POST_DEC)
6638 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6639 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6641 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6642 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6643 && GET_CODE (XEXP (x, 1)) == PLUS
6644 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6646 /* Thumb-2 only has autoincrement by constant. */
6647 rtx addend = XEXP (XEXP (x, 1), 1);
6648 HOST_WIDE_INT offset;
6650 if (!CONST_INT_P (addend))
6651 return 0;
6653 offset = INTVAL(addend);
6654 if (GET_MODE_SIZE (mode) <= 4)
6655 return (offset > -256 && offset < 256);
6657 return (use_ldrd && offset > -1024 && offset < 1024
6658 && (offset & 3) == 0);
6661 /* After reload constants split into minipools will have addresses
6662 from a LABEL_REF. */
6663 else if (reload_completed
6664 && (code == LABEL_REF
6665 || (code == CONST
6666 && GET_CODE (XEXP (x, 0)) == PLUS
6667 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6668 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6669 return 1;
6671 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6672 return 0;
6674 else if (code == PLUS)
6676 rtx xop0 = XEXP (x, 0);
6677 rtx xop1 = XEXP (x, 1);
6679 return ((arm_address_register_rtx_p (xop0, strict_p)
6680 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6681 || (!strict_p && will_be_in_index_register (xop1))))
6682 || (arm_address_register_rtx_p (xop1, strict_p)
6683 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6686 /* Normally we can assign constant values to target registers without
6687 the help of constant pool. But there are cases we have to use constant
6688 pool like:
6689 1) assign a label to register.
6690 2) sign-extend a 8bit value to 32bit and then assign to register.
6692 Constant pool access in format:
6693 (set (reg r0) (mem (symbol_ref (".LC0"))))
6694 will cause the use of literal pool (later in function arm_reorg).
6695 So here we mark such format as an invalid format, then the compiler
6696 will adjust it into:
6697 (set (reg r0) (symbol_ref (".LC0")))
6698 (set (reg r0) (mem (reg r0))).
6699 No extra register is required, and (mem (reg r0)) won't cause the use
6700 of literal pools. */
6701 else if (arm_disable_literal_pool && code == SYMBOL_REF
6702 && CONSTANT_POOL_ADDRESS_P (x))
6703 return 0;
6705 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6706 && code == SYMBOL_REF
6707 && CONSTANT_POOL_ADDRESS_P (x)
6708 && ! (flag_pic
6709 && symbol_mentioned_p (get_pool_constant (x))
6710 && ! pcrel_constant_p (get_pool_constant (x))))
6711 return 1;
6713 return 0;
6716 /* Return nonzero if INDEX is valid for an address index operand in
6717 ARM state. */
6718 static int
6719 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6720 int strict_p)
6722 HOST_WIDE_INT range;
6723 enum rtx_code code = GET_CODE (index);
6725 /* Standard coprocessor addressing modes. */
6726 if (TARGET_HARD_FLOAT
6727 && TARGET_VFP
6728 && (mode == SFmode || mode == DFmode))
6729 return (code == CONST_INT && INTVAL (index) < 1024
6730 && INTVAL (index) > -1024
6731 && (INTVAL (index) & 3) == 0);
6733 /* For quad modes, we restrict the constant offset to be slightly less
6734 than what the instruction format permits. We do this because for
6735 quad mode moves, we will actually decompose them into two separate
6736 double-mode reads or writes. INDEX must therefore be a valid
6737 (double-mode) offset and so should INDEX+8. */
6738 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6739 return (code == CONST_INT
6740 && INTVAL (index) < 1016
6741 && INTVAL (index) > -1024
6742 && (INTVAL (index) & 3) == 0);
6744 /* We have no such constraint on double mode offsets, so we permit the
6745 full range of the instruction format. */
6746 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6747 return (code == CONST_INT
6748 && INTVAL (index) < 1024
6749 && INTVAL (index) > -1024
6750 && (INTVAL (index) & 3) == 0);
6752 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6753 return (code == CONST_INT
6754 && INTVAL (index) < 1024
6755 && INTVAL (index) > -1024
6756 && (INTVAL (index) & 3) == 0);
6758 if (arm_address_register_rtx_p (index, strict_p)
6759 && (GET_MODE_SIZE (mode) <= 4))
6760 return 1;
6762 if (mode == DImode || mode == DFmode)
6764 if (code == CONST_INT)
6766 HOST_WIDE_INT val = INTVAL (index);
6768 if (TARGET_LDRD)
6769 return val > -256 && val < 256;
6770 else
6771 return val > -4096 && val < 4092;
6774 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6777 if (GET_MODE_SIZE (mode) <= 4
6778 && ! (arm_arch4
6779 && (mode == HImode
6780 || mode == HFmode
6781 || (mode == QImode && outer == SIGN_EXTEND))))
6783 if (code == MULT)
6785 rtx xiop0 = XEXP (index, 0);
6786 rtx xiop1 = XEXP (index, 1);
6788 return ((arm_address_register_rtx_p (xiop0, strict_p)
6789 && power_of_two_operand (xiop1, SImode))
6790 || (arm_address_register_rtx_p (xiop1, strict_p)
6791 && power_of_two_operand (xiop0, SImode)));
6793 else if (code == LSHIFTRT || code == ASHIFTRT
6794 || code == ASHIFT || code == ROTATERT)
6796 rtx op = XEXP (index, 1);
6798 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6799 && CONST_INT_P (op)
6800 && INTVAL (op) > 0
6801 && INTVAL (op) <= 31);
6805 /* For ARM v4 we may be doing a sign-extend operation during the
6806 load. */
6807 if (arm_arch4)
6809 if (mode == HImode
6810 || mode == HFmode
6811 || (outer == SIGN_EXTEND && mode == QImode))
6812 range = 256;
6813 else
6814 range = 4096;
6816 else
6817 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6819 return (code == CONST_INT
6820 && INTVAL (index) < range
6821 && INTVAL (index) > -range);
6824 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6825 index operand. i.e. 1, 2, 4 or 8. */
6826 static bool
6827 thumb2_index_mul_operand (rtx op)
6829 HOST_WIDE_INT val;
6831 if (!CONST_INT_P (op))
6832 return false;
6834 val = INTVAL(op);
6835 return (val == 1 || val == 2 || val == 4 || val == 8);
6838 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6839 static int
6840 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6842 enum rtx_code code = GET_CODE (index);
6844 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6845 /* Standard coprocessor addressing modes. */
6846 if (TARGET_HARD_FLOAT
6847 && TARGET_VFP
6848 && (mode == SFmode || mode == DFmode))
6849 return (code == CONST_INT && INTVAL (index) < 1024
6850 /* Thumb-2 allows only > -256 index range for it's core register
6851 load/stores. Since we allow SF/DF in core registers, we have
6852 to use the intersection between -256~4096 (core) and -1024~1024
6853 (coprocessor). */
6854 && INTVAL (index) > -256
6855 && (INTVAL (index) & 3) == 0);
6857 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6859 /* For DImode assume values will usually live in core regs
6860 and only allow LDRD addressing modes. */
6861 if (!TARGET_LDRD || mode != DImode)
6862 return (code == CONST_INT
6863 && INTVAL (index) < 1024
6864 && INTVAL (index) > -1024
6865 && (INTVAL (index) & 3) == 0);
6868 /* For quad modes, we restrict the constant offset to be slightly less
6869 than what the instruction format permits. We do this because for
6870 quad mode moves, we will actually decompose them into two separate
6871 double-mode reads or writes. INDEX must therefore be a valid
6872 (double-mode) offset and so should INDEX+8. */
6873 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6874 return (code == CONST_INT
6875 && INTVAL (index) < 1016
6876 && INTVAL (index) > -1024
6877 && (INTVAL (index) & 3) == 0);
6879 /* We have no such constraint on double mode offsets, so we permit the
6880 full range of the instruction format. */
6881 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6882 return (code == CONST_INT
6883 && INTVAL (index) < 1024
6884 && INTVAL (index) > -1024
6885 && (INTVAL (index) & 3) == 0);
6887 if (arm_address_register_rtx_p (index, strict_p)
6888 && (GET_MODE_SIZE (mode) <= 4))
6889 return 1;
6891 if (mode == DImode || mode == DFmode)
6893 if (code == CONST_INT)
6895 HOST_WIDE_INT val = INTVAL (index);
6896 /* ??? Can we assume ldrd for thumb2? */
6897 /* Thumb-2 ldrd only has reg+const addressing modes. */
6898 /* ldrd supports offsets of +-1020.
6899 However the ldr fallback does not. */
6900 return val > -256 && val < 256 && (val & 3) == 0;
6902 else
6903 return 0;
6906 if (code == MULT)
6908 rtx xiop0 = XEXP (index, 0);
6909 rtx xiop1 = XEXP (index, 1);
6911 return ((arm_address_register_rtx_p (xiop0, strict_p)
6912 && thumb2_index_mul_operand (xiop1))
6913 || (arm_address_register_rtx_p (xiop1, strict_p)
6914 && thumb2_index_mul_operand (xiop0)));
6916 else if (code == ASHIFT)
6918 rtx op = XEXP (index, 1);
6920 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6921 && CONST_INT_P (op)
6922 && INTVAL (op) > 0
6923 && INTVAL (op) <= 3);
6926 return (code == CONST_INT
6927 && INTVAL (index) < 4096
6928 && INTVAL (index) > -256);
6931 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6932 static int
6933 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6935 int regno;
6937 if (!REG_P (x))
6938 return 0;
6940 regno = REGNO (x);
6942 if (strict_p)
6943 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6945 return (regno <= LAST_LO_REGNUM
6946 || regno > LAST_VIRTUAL_REGISTER
6947 || regno == FRAME_POINTER_REGNUM
6948 || (GET_MODE_SIZE (mode) >= 4
6949 && (regno == STACK_POINTER_REGNUM
6950 || regno >= FIRST_PSEUDO_REGISTER
6951 || x == hard_frame_pointer_rtx
6952 || x == arg_pointer_rtx)));
6955 /* Return nonzero if x is a legitimate index register. This is the case
6956 for any base register that can access a QImode object. */
6957 inline static int
6958 thumb1_index_register_rtx_p (rtx x, int strict_p)
6960 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6963 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6965 The AP may be eliminated to either the SP or the FP, so we use the
6966 least common denominator, e.g. SImode, and offsets from 0 to 64.
6968 ??? Verify whether the above is the right approach.
6970 ??? Also, the FP may be eliminated to the SP, so perhaps that
6971 needs special handling also.
6973 ??? Look at how the mips16 port solves this problem. It probably uses
6974 better ways to solve some of these problems.
6976 Although it is not incorrect, we don't accept QImode and HImode
6977 addresses based on the frame pointer or arg pointer until the
6978 reload pass starts. This is so that eliminating such addresses
6979 into stack based ones won't produce impossible code. */
6981 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6983 /* ??? Not clear if this is right. Experiment. */
6984 if (GET_MODE_SIZE (mode) < 4
6985 && !(reload_in_progress || reload_completed)
6986 && (reg_mentioned_p (frame_pointer_rtx, x)
6987 || reg_mentioned_p (arg_pointer_rtx, x)
6988 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6989 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6990 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6991 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6992 return 0;
6994 /* Accept any base register. SP only in SImode or larger. */
6995 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6996 return 1;
6998 /* This is PC relative data before arm_reorg runs. */
6999 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7000 && GET_CODE (x) == SYMBOL_REF
7001 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7002 return 1;
7004 /* This is PC relative data after arm_reorg runs. */
7005 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7006 && reload_completed
7007 && (GET_CODE (x) == LABEL_REF
7008 || (GET_CODE (x) == CONST
7009 && GET_CODE (XEXP (x, 0)) == PLUS
7010 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7011 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7012 return 1;
7014 /* Post-inc indexing only supported for SImode and larger. */
7015 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7016 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7017 return 1;
7019 else if (GET_CODE (x) == PLUS)
7021 /* REG+REG address can be any two index registers. */
7022 /* We disallow FRAME+REG addressing since we know that FRAME
7023 will be replaced with STACK, and SP relative addressing only
7024 permits SP+OFFSET. */
7025 if (GET_MODE_SIZE (mode) <= 4
7026 && XEXP (x, 0) != frame_pointer_rtx
7027 && XEXP (x, 1) != frame_pointer_rtx
7028 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7029 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7030 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7031 return 1;
7033 /* REG+const has 5-7 bit offset for non-SP registers. */
7034 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7035 || XEXP (x, 0) == arg_pointer_rtx)
7036 && CONST_INT_P (XEXP (x, 1))
7037 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7038 return 1;
7040 /* REG+const has 10-bit offset for SP, but only SImode and
7041 larger is supported. */
7042 /* ??? Should probably check for DI/DFmode overflow here
7043 just like GO_IF_LEGITIMATE_OFFSET does. */
7044 else if (REG_P (XEXP (x, 0))
7045 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7046 && GET_MODE_SIZE (mode) >= 4
7047 && CONST_INT_P (XEXP (x, 1))
7048 && INTVAL (XEXP (x, 1)) >= 0
7049 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7050 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7051 return 1;
7053 else if (REG_P (XEXP (x, 0))
7054 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7055 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7056 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7057 && REGNO (XEXP (x, 0))
7058 <= LAST_VIRTUAL_POINTER_REGISTER))
7059 && GET_MODE_SIZE (mode) >= 4
7060 && CONST_INT_P (XEXP (x, 1))
7061 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7062 return 1;
7065 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7066 && GET_MODE_SIZE (mode) == 4
7067 && GET_CODE (x) == SYMBOL_REF
7068 && CONSTANT_POOL_ADDRESS_P (x)
7069 && ! (flag_pic
7070 && symbol_mentioned_p (get_pool_constant (x))
7071 && ! pcrel_constant_p (get_pool_constant (x))))
7072 return 1;
7074 return 0;
7077 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7078 instruction of mode MODE. */
7080 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7082 switch (GET_MODE_SIZE (mode))
7084 case 1:
7085 return val >= 0 && val < 32;
7087 case 2:
7088 return val >= 0 && val < 64 && (val & 1) == 0;
7090 default:
7091 return (val >= 0
7092 && (val + GET_MODE_SIZE (mode)) <= 128
7093 && (val & 3) == 0);
7097 bool
7098 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7100 if (TARGET_ARM)
7101 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7102 else if (TARGET_THUMB2)
7103 return thumb2_legitimate_address_p (mode, x, strict_p);
7104 else /* if (TARGET_THUMB1) */
7105 return thumb1_legitimate_address_p (mode, x, strict_p);
7108 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7110 Given an rtx X being reloaded into a reg required to be
7111 in class CLASS, return the class of reg to actually use.
7112 In general this is just CLASS, but for the Thumb core registers and
7113 immediate constants we prefer a LO_REGS class or a subset. */
7115 static reg_class_t
7116 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7118 if (TARGET_32BIT)
7119 return rclass;
7120 else
7122 if (rclass == GENERAL_REGS)
7123 return LO_REGS;
7124 else
7125 return rclass;
7129 /* Build the SYMBOL_REF for __tls_get_addr. */
7131 static GTY(()) rtx tls_get_addr_libfunc;
7133 static rtx
7134 get_tls_get_addr (void)
7136 if (!tls_get_addr_libfunc)
7137 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7138 return tls_get_addr_libfunc;
7142 arm_load_tp (rtx target)
7144 if (!target)
7145 target = gen_reg_rtx (SImode);
7147 if (TARGET_HARD_TP)
7149 /* Can return in any reg. */
7150 emit_insn (gen_load_tp_hard (target));
7152 else
7154 /* Always returned in r0. Immediately copy the result into a pseudo,
7155 otherwise other uses of r0 (e.g. setting up function arguments) may
7156 clobber the value. */
7158 rtx tmp;
7160 emit_insn (gen_load_tp_soft ());
7162 tmp = gen_rtx_REG (SImode, 0);
7163 emit_move_insn (target, tmp);
7165 return target;
7168 static rtx
7169 load_tls_operand (rtx x, rtx reg)
7171 rtx tmp;
7173 if (reg == NULL_RTX)
7174 reg = gen_reg_rtx (SImode);
7176 tmp = gen_rtx_CONST (SImode, x);
7178 emit_move_insn (reg, tmp);
7180 return reg;
7183 static rtx
7184 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7186 rtx insns, label, labelno, sum;
7188 gcc_assert (reloc != TLS_DESCSEQ);
7189 start_sequence ();
7191 labelno = GEN_INT (pic_labelno++);
7192 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7193 label = gen_rtx_CONST (VOIDmode, label);
7195 sum = gen_rtx_UNSPEC (Pmode,
7196 gen_rtvec (4, x, GEN_INT (reloc), label,
7197 GEN_INT (TARGET_ARM ? 8 : 4)),
7198 UNSPEC_TLS);
7199 reg = load_tls_operand (sum, reg);
7201 if (TARGET_ARM)
7202 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7203 else
7204 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7206 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7207 LCT_PURE, /* LCT_CONST? */
7208 Pmode, 1, reg, Pmode);
7210 insns = get_insns ();
7211 end_sequence ();
7213 return insns;
7216 static rtx
7217 arm_tls_descseq_addr (rtx x, rtx reg)
7219 rtx labelno = GEN_INT (pic_labelno++);
7220 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7221 rtx sum = gen_rtx_UNSPEC (Pmode,
7222 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7223 gen_rtx_CONST (VOIDmode, label),
7224 GEN_INT (!TARGET_ARM)),
7225 UNSPEC_TLS);
7226 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7228 emit_insn (gen_tlscall (x, labelno));
7229 if (!reg)
7230 reg = gen_reg_rtx (SImode);
7231 else
7232 gcc_assert (REGNO (reg) != 0);
7234 emit_move_insn (reg, reg0);
7236 return reg;
7240 legitimize_tls_address (rtx x, rtx reg)
7242 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7243 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7245 switch (model)
7247 case TLS_MODEL_GLOBAL_DYNAMIC:
7248 if (TARGET_GNU2_TLS)
7250 reg = arm_tls_descseq_addr (x, reg);
7252 tp = arm_load_tp (NULL_RTX);
7254 dest = gen_rtx_PLUS (Pmode, tp, reg);
7256 else
7258 /* Original scheme */
7259 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7260 dest = gen_reg_rtx (Pmode);
7261 emit_libcall_block (insns, dest, ret, x);
7263 return dest;
7265 case TLS_MODEL_LOCAL_DYNAMIC:
7266 if (TARGET_GNU2_TLS)
7268 reg = arm_tls_descseq_addr (x, reg);
7270 tp = arm_load_tp (NULL_RTX);
7272 dest = gen_rtx_PLUS (Pmode, tp, reg);
7274 else
7276 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7278 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7279 share the LDM result with other LD model accesses. */
7280 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7281 UNSPEC_TLS);
7282 dest = gen_reg_rtx (Pmode);
7283 emit_libcall_block (insns, dest, ret, eqv);
7285 /* Load the addend. */
7286 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7287 GEN_INT (TLS_LDO32)),
7288 UNSPEC_TLS);
7289 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7290 dest = gen_rtx_PLUS (Pmode, dest, addend);
7292 return dest;
7294 case TLS_MODEL_INITIAL_EXEC:
7295 labelno = GEN_INT (pic_labelno++);
7296 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7297 label = gen_rtx_CONST (VOIDmode, label);
7298 sum = gen_rtx_UNSPEC (Pmode,
7299 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7300 GEN_INT (TARGET_ARM ? 8 : 4)),
7301 UNSPEC_TLS);
7302 reg = load_tls_operand (sum, reg);
7304 if (TARGET_ARM)
7305 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7306 else if (TARGET_THUMB2)
7307 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7308 else
7310 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7311 emit_move_insn (reg, gen_const_mem (SImode, reg));
7314 tp = arm_load_tp (NULL_RTX);
7316 return gen_rtx_PLUS (Pmode, tp, reg);
7318 case TLS_MODEL_LOCAL_EXEC:
7319 tp = arm_load_tp (NULL_RTX);
7321 reg = gen_rtx_UNSPEC (Pmode,
7322 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7323 UNSPEC_TLS);
7324 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7326 return gen_rtx_PLUS (Pmode, tp, reg);
7328 default:
7329 abort ();
7333 /* Try machine-dependent ways of modifying an illegitimate address
7334 to be legitimate. If we find one, return the new, valid address. */
7336 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7338 if (arm_tls_referenced_p (x))
7340 rtx addend = NULL;
7342 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7344 addend = XEXP (XEXP (x, 0), 1);
7345 x = XEXP (XEXP (x, 0), 0);
7348 if (GET_CODE (x) != SYMBOL_REF)
7349 return x;
7351 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7353 x = legitimize_tls_address (x, NULL_RTX);
7355 if (addend)
7357 x = gen_rtx_PLUS (SImode, x, addend);
7358 orig_x = x;
7360 else
7361 return x;
7364 if (!TARGET_ARM)
7366 /* TODO: legitimize_address for Thumb2. */
7367 if (TARGET_THUMB2)
7368 return x;
7369 return thumb_legitimize_address (x, orig_x, mode);
7372 if (GET_CODE (x) == PLUS)
7374 rtx xop0 = XEXP (x, 0);
7375 rtx xop1 = XEXP (x, 1);
7377 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7378 xop0 = force_reg (SImode, xop0);
7380 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7381 && !symbol_mentioned_p (xop1))
7382 xop1 = force_reg (SImode, xop1);
7384 if (ARM_BASE_REGISTER_RTX_P (xop0)
7385 && CONST_INT_P (xop1))
7387 HOST_WIDE_INT n, low_n;
7388 rtx base_reg, val;
7389 n = INTVAL (xop1);
7391 /* VFP addressing modes actually allow greater offsets, but for
7392 now we just stick with the lowest common denominator. */
7393 if (mode == DImode
7394 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7396 low_n = n & 0x0f;
7397 n &= ~0x0f;
7398 if (low_n > 4)
7400 n += 16;
7401 low_n -= 16;
7404 else
7406 low_n = ((mode) == TImode ? 0
7407 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7408 n -= low_n;
7411 base_reg = gen_reg_rtx (SImode);
7412 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7413 emit_move_insn (base_reg, val);
7414 x = plus_constant (Pmode, base_reg, low_n);
7416 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7417 x = gen_rtx_PLUS (SImode, xop0, xop1);
7420 /* XXX We don't allow MINUS any more -- see comment in
7421 arm_legitimate_address_outer_p (). */
7422 else if (GET_CODE (x) == MINUS)
7424 rtx xop0 = XEXP (x, 0);
7425 rtx xop1 = XEXP (x, 1);
7427 if (CONSTANT_P (xop0))
7428 xop0 = force_reg (SImode, xop0);
7430 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7431 xop1 = force_reg (SImode, xop1);
7433 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7434 x = gen_rtx_MINUS (SImode, xop0, xop1);
7437 /* Make sure to take full advantage of the pre-indexed addressing mode
7438 with absolute addresses which often allows for the base register to
7439 be factorized for multiple adjacent memory references, and it might
7440 even allows for the mini pool to be avoided entirely. */
7441 else if (CONST_INT_P (x) && optimize > 0)
7443 unsigned int bits;
7444 HOST_WIDE_INT mask, base, index;
7445 rtx base_reg;
7447 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7448 use a 8-bit index. So let's use a 12-bit index for SImode only and
7449 hope that arm_gen_constant will enable ldrb to use more bits. */
7450 bits = (mode == SImode) ? 12 : 8;
7451 mask = (1 << bits) - 1;
7452 base = INTVAL (x) & ~mask;
7453 index = INTVAL (x) & mask;
7454 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7456 /* It'll most probably be more efficient to generate the base
7457 with more bits set and use a negative index instead. */
7458 base |= mask;
7459 index -= mask;
7461 base_reg = force_reg (SImode, GEN_INT (base));
7462 x = plus_constant (Pmode, base_reg, index);
7465 if (flag_pic)
7467 /* We need to find and carefully transform any SYMBOL and LABEL
7468 references; so go back to the original address expression. */
7469 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7471 if (new_x != orig_x)
7472 x = new_x;
7475 return x;
7479 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7480 to be legitimate. If we find one, return the new, valid address. */
7482 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7484 if (GET_CODE (x) == PLUS
7485 && CONST_INT_P (XEXP (x, 1))
7486 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7487 || INTVAL (XEXP (x, 1)) < 0))
7489 rtx xop0 = XEXP (x, 0);
7490 rtx xop1 = XEXP (x, 1);
7491 HOST_WIDE_INT offset = INTVAL (xop1);
7493 /* Try and fold the offset into a biasing of the base register and
7494 then offsetting that. Don't do this when optimizing for space
7495 since it can cause too many CSEs. */
7496 if (optimize_size && offset >= 0
7497 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7499 HOST_WIDE_INT delta;
7501 if (offset >= 256)
7502 delta = offset - (256 - GET_MODE_SIZE (mode));
7503 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7504 delta = 31 * GET_MODE_SIZE (mode);
7505 else
7506 delta = offset & (~31 * GET_MODE_SIZE (mode));
7508 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7509 NULL_RTX);
7510 x = plus_constant (Pmode, xop0, delta);
7512 else if (offset < 0 && offset > -256)
7513 /* Small negative offsets are best done with a subtract before the
7514 dereference, forcing these into a register normally takes two
7515 instructions. */
7516 x = force_operand (x, NULL_RTX);
7517 else
7519 /* For the remaining cases, force the constant into a register. */
7520 xop1 = force_reg (SImode, xop1);
7521 x = gen_rtx_PLUS (SImode, xop0, xop1);
7524 else if (GET_CODE (x) == PLUS
7525 && s_register_operand (XEXP (x, 1), SImode)
7526 && !s_register_operand (XEXP (x, 0), SImode))
7528 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7530 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7533 if (flag_pic)
7535 /* We need to find and carefully transform any SYMBOL and LABEL
7536 references; so go back to the original address expression. */
7537 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7539 if (new_x != orig_x)
7540 x = new_x;
7543 return x;
7546 bool
7547 arm_legitimize_reload_address (rtx *p,
7548 enum machine_mode mode,
7549 int opnum, int type,
7550 int ind_levels ATTRIBUTE_UNUSED)
7552 /* We must recognize output that we have already generated ourselves. */
7553 if (GET_CODE (*p) == PLUS
7554 && GET_CODE (XEXP (*p, 0)) == PLUS
7555 && REG_P (XEXP (XEXP (*p, 0), 0))
7556 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7557 && CONST_INT_P (XEXP (*p, 1)))
7559 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7560 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7561 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7562 return true;
7565 if (GET_CODE (*p) == PLUS
7566 && REG_P (XEXP (*p, 0))
7567 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7568 /* If the base register is equivalent to a constant, let the generic
7569 code handle it. Otherwise we will run into problems if a future
7570 reload pass decides to rematerialize the constant. */
7571 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7572 && CONST_INT_P (XEXP (*p, 1)))
7574 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7575 HOST_WIDE_INT low, high;
7577 /* Detect coprocessor load/stores. */
7578 bool coproc_p = ((TARGET_HARD_FLOAT
7579 && TARGET_VFP
7580 && (mode == SFmode || mode == DFmode))
7581 || (TARGET_REALLY_IWMMXT
7582 && VALID_IWMMXT_REG_MODE (mode))
7583 || (TARGET_NEON
7584 && (VALID_NEON_DREG_MODE (mode)
7585 || VALID_NEON_QREG_MODE (mode))));
7587 /* For some conditions, bail out when lower two bits are unaligned. */
7588 if ((val & 0x3) != 0
7589 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7590 && (coproc_p
7591 /* For DI, and DF under soft-float: */
7592 || ((mode == DImode || mode == DFmode)
7593 /* Without ldrd, we use stm/ldm, which does not
7594 fair well with unaligned bits. */
7595 && (! TARGET_LDRD
7596 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7597 || TARGET_THUMB2))))
7598 return false;
7600 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7601 of which the (reg+high) gets turned into a reload add insn,
7602 we try to decompose the index into high/low values that can often
7603 also lead to better reload CSE.
7604 For example:
7605 ldr r0, [r2, #4100] // Offset too large
7606 ldr r1, [r2, #4104] // Offset too large
7608 is best reloaded as:
7609 add t1, r2, #4096
7610 ldr r0, [t1, #4]
7611 add t2, r2, #4096
7612 ldr r1, [t2, #8]
7614 which post-reload CSE can simplify in most cases to eliminate the
7615 second add instruction:
7616 add t1, r2, #4096
7617 ldr r0, [t1, #4]
7618 ldr r1, [t1, #8]
7620 The idea here is that we want to split out the bits of the constant
7621 as a mask, rather than as subtracting the maximum offset that the
7622 respective type of load/store used can handle.
7624 When encountering negative offsets, we can still utilize it even if
7625 the overall offset is positive; sometimes this may lead to an immediate
7626 that can be constructed with fewer instructions.
7627 For example:
7628 ldr r0, [r2, #0x3FFFFC]
7630 This is best reloaded as:
7631 add t1, r2, #0x400000
7632 ldr r0, [t1, #-4]
7634 The trick for spotting this for a load insn with N bits of offset
7635 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7636 negative offset that is going to make bit N and all the bits below
7637 it become zero in the remainder part.
7639 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7640 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7641 used in most cases of ARM load/store instructions. */
7643 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7644 (((VAL) & ((1 << (N)) - 1)) \
7645 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7646 : 0)
7648 if (coproc_p)
7650 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7652 /* NEON quad-word load/stores are made of two double-word accesses,
7653 so the valid index range is reduced by 8. Treat as 9-bit range if
7654 we go over it. */
7655 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7656 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7658 else if (GET_MODE_SIZE (mode) == 8)
7660 if (TARGET_LDRD)
7661 low = (TARGET_THUMB2
7662 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7663 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7664 else
7665 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7666 to access doublewords. The supported load/store offsets are
7667 -8, -4, and 4, which we try to produce here. */
7668 low = ((val & 0xf) ^ 0x8) - 0x8;
7670 else if (GET_MODE_SIZE (mode) < 8)
7672 /* NEON element load/stores do not have an offset. */
7673 if (TARGET_NEON_FP16 && mode == HFmode)
7674 return false;
7676 if (TARGET_THUMB2)
7678 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7679 Try the wider 12-bit range first, and re-try if the result
7680 is out of range. */
7681 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7682 if (low < -255)
7683 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7685 else
7687 if (mode == HImode || mode == HFmode)
7689 if (arm_arch4)
7690 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7691 else
7693 /* The storehi/movhi_bytes fallbacks can use only
7694 [-4094,+4094] of the full ldrb/strb index range. */
7695 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7696 if (low == 4095 || low == -4095)
7697 return false;
7700 else
7701 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7704 else
7705 return false;
7707 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7708 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7709 - (unsigned HOST_WIDE_INT) 0x80000000);
7710 /* Check for overflow or zero */
7711 if (low == 0 || high == 0 || (high + low != val))
7712 return false;
7714 /* Reload the high part into a base reg; leave the low part
7715 in the mem.
7716 Note that replacing this gen_rtx_PLUS with plus_constant is
7717 wrong in this case because we rely on the
7718 (plus (plus reg c1) c2) structure being preserved so that
7719 XEXP (*p, 0) in push_reload below uses the correct term. */
7720 *p = gen_rtx_PLUS (GET_MODE (*p),
7721 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7722 GEN_INT (high)),
7723 GEN_INT (low));
7724 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7725 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7726 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7727 return true;
7730 return false;
7734 thumb_legitimize_reload_address (rtx *x_p,
7735 enum machine_mode mode,
7736 int opnum, int type,
7737 int ind_levels ATTRIBUTE_UNUSED)
7739 rtx x = *x_p;
7741 if (GET_CODE (x) == PLUS
7742 && GET_MODE_SIZE (mode) < 4
7743 && REG_P (XEXP (x, 0))
7744 && XEXP (x, 0) == stack_pointer_rtx
7745 && CONST_INT_P (XEXP (x, 1))
7746 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7748 rtx orig_x = x;
7750 x = copy_rtx (x);
7751 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7752 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7753 return x;
7756 /* If both registers are hi-regs, then it's better to reload the
7757 entire expression rather than each register individually. That
7758 only requires one reload register rather than two. */
7759 if (GET_CODE (x) == PLUS
7760 && REG_P (XEXP (x, 0))
7761 && REG_P (XEXP (x, 1))
7762 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7763 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7765 rtx orig_x = x;
7767 x = copy_rtx (x);
7768 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7769 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7770 return x;
7773 return NULL;
7776 /* Test for various thread-local symbols. */
7778 /* Helper for arm_tls_referenced_p. */
7780 static int
7781 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7783 if (GET_CODE (*x) == SYMBOL_REF)
7784 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7786 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7787 TLS offsets, not real symbol references. */
7788 if (GET_CODE (*x) == UNSPEC
7789 && XINT (*x, 1) == UNSPEC_TLS)
7790 return -1;
7792 return 0;
7795 /* Return TRUE if X contains any TLS symbol references. */
7797 bool
7798 arm_tls_referenced_p (rtx x)
7800 if (! TARGET_HAVE_TLS)
7801 return false;
7803 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7806 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7808 On the ARM, allow any integer (invalid ones are removed later by insn
7809 patterns), nice doubles and symbol_refs which refer to the function's
7810 constant pool XXX.
7812 When generating pic allow anything. */
7814 static bool
7815 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7817 /* At present, we have no support for Neon structure constants, so forbid
7818 them here. It might be possible to handle simple cases like 0 and -1
7819 in future. */
7820 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7821 return false;
7823 return flag_pic || !label_mentioned_p (x);
7826 static bool
7827 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7829 return (CONST_INT_P (x)
7830 || CONST_DOUBLE_P (x)
7831 || CONSTANT_ADDRESS_P (x)
7832 || flag_pic);
7835 static bool
7836 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7838 return (!arm_cannot_force_const_mem (mode, x)
7839 && (TARGET_32BIT
7840 ? arm_legitimate_constant_p_1 (mode, x)
7841 : thumb_legitimate_constant_p (mode, x)));
7844 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7846 static bool
7847 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7849 rtx base, offset;
7851 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7853 split_const (x, &base, &offset);
7854 if (GET_CODE (base) == SYMBOL_REF
7855 && !offset_within_block_p (base, INTVAL (offset)))
7856 return true;
7858 return arm_tls_referenced_p (x);
7861 #define REG_OR_SUBREG_REG(X) \
7862 (REG_P (X) \
7863 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7865 #define REG_OR_SUBREG_RTX(X) \
7866 (REG_P (X) ? (X) : SUBREG_REG (X))
7868 static inline int
7869 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7871 enum machine_mode mode = GET_MODE (x);
7872 int total, words;
7874 switch (code)
7876 case ASHIFT:
7877 case ASHIFTRT:
7878 case LSHIFTRT:
7879 case ROTATERT:
7880 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7882 case PLUS:
7883 case MINUS:
7884 case COMPARE:
7885 case NEG:
7886 case NOT:
7887 return COSTS_N_INSNS (1);
7889 case MULT:
7890 if (CONST_INT_P (XEXP (x, 1)))
7892 int cycles = 0;
7893 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7895 while (i)
7897 i >>= 2;
7898 cycles++;
7900 return COSTS_N_INSNS (2) + cycles;
7902 return COSTS_N_INSNS (1) + 16;
7904 case SET:
7905 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7906 the mode. */
7907 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7908 return (COSTS_N_INSNS (words)
7909 + 4 * ((MEM_P (SET_SRC (x)))
7910 + MEM_P (SET_DEST (x))));
7912 case CONST_INT:
7913 if (outer == SET)
7915 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7916 return 0;
7917 if (thumb_shiftable_const (INTVAL (x)))
7918 return COSTS_N_INSNS (2);
7919 return COSTS_N_INSNS (3);
7921 else if ((outer == PLUS || outer == COMPARE)
7922 && INTVAL (x) < 256 && INTVAL (x) > -256)
7923 return 0;
7924 else if ((outer == IOR || outer == XOR || outer == AND)
7925 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7926 return COSTS_N_INSNS (1);
7927 else if (outer == AND)
7929 int i;
7930 /* This duplicates the tests in the andsi3 expander. */
7931 for (i = 9; i <= 31; i++)
7932 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7933 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7934 return COSTS_N_INSNS (2);
7936 else if (outer == ASHIFT || outer == ASHIFTRT
7937 || outer == LSHIFTRT)
7938 return 0;
7939 return COSTS_N_INSNS (2);
7941 case CONST:
7942 case CONST_DOUBLE:
7943 case LABEL_REF:
7944 case SYMBOL_REF:
7945 return COSTS_N_INSNS (3);
7947 case UDIV:
7948 case UMOD:
7949 case DIV:
7950 case MOD:
7951 return 100;
7953 case TRUNCATE:
7954 return 99;
7956 case AND:
7957 case XOR:
7958 case IOR:
7959 /* XXX guess. */
7960 return 8;
7962 case MEM:
7963 /* XXX another guess. */
7964 /* Memory costs quite a lot for the first word, but subsequent words
7965 load at the equivalent of a single insn each. */
7966 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7967 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7968 ? 4 : 0));
7970 case IF_THEN_ELSE:
7971 /* XXX a guess. */
7972 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7973 return 14;
7974 return 2;
7976 case SIGN_EXTEND:
7977 case ZERO_EXTEND:
7978 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7979 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7981 if (mode == SImode)
7982 return total;
7984 if (arm_arch6)
7985 return total + COSTS_N_INSNS (1);
7987 /* Assume a two-shift sequence. Increase the cost slightly so
7988 we prefer actual shifts over an extend operation. */
7989 return total + 1 + COSTS_N_INSNS (2);
7991 default:
7992 return 99;
7996 static inline bool
7997 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7999 enum machine_mode mode = GET_MODE (x);
8000 enum rtx_code subcode;
8001 rtx operand;
8002 enum rtx_code code = GET_CODE (x);
8003 *total = 0;
8005 switch (code)
8007 case MEM:
8008 /* Memory costs quite a lot for the first word, but subsequent words
8009 load at the equivalent of a single insn each. */
8010 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8011 return true;
8013 case DIV:
8014 case MOD:
8015 case UDIV:
8016 case UMOD:
8017 if (TARGET_HARD_FLOAT && mode == SFmode)
8018 *total = COSTS_N_INSNS (2);
8019 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8020 *total = COSTS_N_INSNS (4);
8021 else
8022 *total = COSTS_N_INSNS (20);
8023 return false;
8025 case ROTATE:
8026 if (REG_P (XEXP (x, 1)))
8027 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8028 else if (!CONST_INT_P (XEXP (x, 1)))
8029 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8031 /* Fall through */
8032 case ROTATERT:
8033 if (mode != SImode)
8035 *total += COSTS_N_INSNS (4);
8036 return true;
8039 /* Fall through */
8040 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8041 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8042 if (mode == DImode)
8044 *total += COSTS_N_INSNS (3);
8045 return true;
8048 *total += COSTS_N_INSNS (1);
8049 /* Increase the cost of complex shifts because they aren't any faster,
8050 and reduce dual issue opportunities. */
8051 if (arm_tune_cortex_a9
8052 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8053 ++*total;
8055 return true;
8057 case MINUS:
8058 if (mode == DImode)
8060 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8061 if (CONST_INT_P (XEXP (x, 0))
8062 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8064 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8065 return true;
8068 if (CONST_INT_P (XEXP (x, 1))
8069 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8071 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8072 return true;
8075 return false;
8078 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8080 if (TARGET_HARD_FLOAT
8081 && (mode == SFmode
8082 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8084 *total = COSTS_N_INSNS (1);
8085 if (CONST_DOUBLE_P (XEXP (x, 0))
8086 && arm_const_double_rtx (XEXP (x, 0)))
8088 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8089 return true;
8092 if (CONST_DOUBLE_P (XEXP (x, 1))
8093 && arm_const_double_rtx (XEXP (x, 1)))
8095 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8096 return true;
8099 return false;
8101 *total = COSTS_N_INSNS (20);
8102 return false;
8105 *total = COSTS_N_INSNS (1);
8106 if (CONST_INT_P (XEXP (x, 0))
8107 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8109 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8110 return true;
8113 subcode = GET_CODE (XEXP (x, 1));
8114 if (subcode == ASHIFT || subcode == ASHIFTRT
8115 || subcode == LSHIFTRT
8116 || subcode == ROTATE || subcode == ROTATERT)
8118 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8119 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8120 return true;
8123 /* A shift as a part of RSB costs no more than RSB itself. */
8124 if (GET_CODE (XEXP (x, 0)) == MULT
8125 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8127 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8128 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8129 return true;
8132 if (subcode == MULT
8133 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8135 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8136 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8137 return true;
8140 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8141 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8143 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8144 if (REG_P (XEXP (XEXP (x, 1), 0))
8145 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8146 *total += COSTS_N_INSNS (1);
8148 return true;
8151 /* Fall through */
8153 case PLUS:
8154 if (code == PLUS && arm_arch6 && mode == SImode
8155 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8156 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8158 *total = COSTS_N_INSNS (1);
8159 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8160 0, speed);
8161 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8162 return true;
8165 /* MLA: All arguments must be registers. We filter out
8166 multiplication by a power of two, so that we fall down into
8167 the code below. */
8168 if (GET_CODE (XEXP (x, 0)) == MULT
8169 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8171 /* The cost comes from the cost of the multiply. */
8172 return false;
8175 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8177 if (TARGET_HARD_FLOAT
8178 && (mode == SFmode
8179 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8181 *total = COSTS_N_INSNS (1);
8182 if (CONST_DOUBLE_P (XEXP (x, 1))
8183 && arm_const_double_rtx (XEXP (x, 1)))
8185 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8186 return true;
8189 return false;
8192 *total = COSTS_N_INSNS (20);
8193 return false;
8196 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8197 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8199 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8200 if (REG_P (XEXP (XEXP (x, 0), 0))
8201 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8202 *total += COSTS_N_INSNS (1);
8203 return true;
8206 /* Fall through */
8208 case AND: case XOR: case IOR:
8210 /* Normally the frame registers will be spilt into reg+const during
8211 reload, so it is a bad idea to combine them with other instructions,
8212 since then they might not be moved outside of loops. As a compromise
8213 we allow integration with ops that have a constant as their second
8214 operand. */
8215 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8216 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8217 && !CONST_INT_P (XEXP (x, 1)))
8218 *total = COSTS_N_INSNS (1);
8220 if (mode == DImode)
8222 *total += COSTS_N_INSNS (2);
8223 if (CONST_INT_P (XEXP (x, 1))
8224 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8226 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8227 return true;
8230 return false;
8233 *total += COSTS_N_INSNS (1);
8234 if (CONST_INT_P (XEXP (x, 1))
8235 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8237 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8238 return true;
8240 subcode = GET_CODE (XEXP (x, 0));
8241 if (subcode == ASHIFT || subcode == ASHIFTRT
8242 || subcode == LSHIFTRT
8243 || subcode == ROTATE || subcode == ROTATERT)
8245 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8246 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8247 return true;
8250 if (subcode == MULT
8251 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8253 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8254 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8255 return true;
8258 if (subcode == UMIN || subcode == UMAX
8259 || subcode == SMIN || subcode == SMAX)
8261 *total = COSTS_N_INSNS (3);
8262 return true;
8265 return false;
8267 case MULT:
8268 /* This should have been handled by the CPU specific routines. */
8269 gcc_unreachable ();
8271 case TRUNCATE:
8272 if (arm_arch3m && mode == SImode
8273 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8274 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8275 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8276 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8277 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8278 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8280 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8281 return true;
8283 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8284 return false;
8286 case NEG:
8287 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8289 if (TARGET_HARD_FLOAT
8290 && (mode == SFmode
8291 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8293 *total = COSTS_N_INSNS (1);
8294 return false;
8296 *total = COSTS_N_INSNS (2);
8297 return false;
8300 /* Fall through */
8301 case NOT:
8302 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8303 if (mode == SImode && code == NOT)
8305 subcode = GET_CODE (XEXP (x, 0));
8306 if (subcode == ASHIFT || subcode == ASHIFTRT
8307 || subcode == LSHIFTRT
8308 || subcode == ROTATE || subcode == ROTATERT
8309 || (subcode == MULT
8310 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8312 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8313 /* Register shifts cost an extra cycle. */
8314 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8315 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8316 subcode, 1, speed);
8317 return true;
8321 return false;
8323 case IF_THEN_ELSE:
8324 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8326 *total = COSTS_N_INSNS (4);
8327 return true;
8330 operand = XEXP (x, 0);
8332 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8333 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8334 && REG_P (XEXP (operand, 0))
8335 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8336 *total += COSTS_N_INSNS (1);
8337 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8338 + rtx_cost (XEXP (x, 2), code, 2, speed));
8339 return true;
8341 case NE:
8342 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8344 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8345 return true;
8347 goto scc_insn;
8349 case GE:
8350 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8351 && mode == SImode && XEXP (x, 1) == const0_rtx)
8353 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8354 return true;
8356 goto scc_insn;
8358 case LT:
8359 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8360 && mode == SImode && XEXP (x, 1) == const0_rtx)
8362 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8363 return true;
8365 goto scc_insn;
8367 case EQ:
8368 case GT:
8369 case LE:
8370 case GEU:
8371 case LTU:
8372 case GTU:
8373 case LEU:
8374 case UNORDERED:
8375 case ORDERED:
8376 case UNEQ:
8377 case UNGE:
8378 case UNLT:
8379 case UNGT:
8380 case UNLE:
8381 scc_insn:
8382 /* SCC insns. In the case where the comparison has already been
8383 performed, then they cost 2 instructions. Otherwise they need
8384 an additional comparison before them. */
8385 *total = COSTS_N_INSNS (2);
8386 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8388 return true;
8391 /* Fall through */
8392 case COMPARE:
8393 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8395 *total = 0;
8396 return true;
8399 *total += COSTS_N_INSNS (1);
8400 if (CONST_INT_P (XEXP (x, 1))
8401 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8403 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8404 return true;
8407 subcode = GET_CODE (XEXP (x, 0));
8408 if (subcode == ASHIFT || subcode == ASHIFTRT
8409 || subcode == LSHIFTRT
8410 || subcode == ROTATE || subcode == ROTATERT)
8412 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8413 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8414 return true;
8417 if (subcode == MULT
8418 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8420 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8421 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8422 return true;
8425 return false;
8427 case UMIN:
8428 case UMAX:
8429 case SMIN:
8430 case SMAX:
8431 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8432 if (!CONST_INT_P (XEXP (x, 1))
8433 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8434 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8435 return true;
8437 case ABS:
8438 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8440 if (TARGET_HARD_FLOAT
8441 && (mode == SFmode
8442 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8444 *total = COSTS_N_INSNS (1);
8445 return false;
8447 *total = COSTS_N_INSNS (20);
8448 return false;
8450 *total = COSTS_N_INSNS (1);
8451 if (mode == DImode)
8452 *total += COSTS_N_INSNS (3);
8453 return false;
8455 case SIGN_EXTEND:
8456 case ZERO_EXTEND:
8457 *total = 0;
8458 if (GET_MODE_CLASS (mode) == MODE_INT)
8460 rtx op = XEXP (x, 0);
8461 enum machine_mode opmode = GET_MODE (op);
8463 if (mode == DImode)
8464 *total += COSTS_N_INSNS (1);
8466 if (opmode != SImode)
8468 if (MEM_P (op))
8470 /* If !arm_arch4, we use one of the extendhisi2_mem
8471 or movhi_bytes patterns for HImode. For a QImode
8472 sign extension, we first zero-extend from memory
8473 and then perform a shift sequence. */
8474 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8475 *total += COSTS_N_INSNS (2);
8477 else if (arm_arch6)
8478 *total += COSTS_N_INSNS (1);
8480 /* We don't have the necessary insn, so we need to perform some
8481 other operation. */
8482 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8483 /* An and with constant 255. */
8484 *total += COSTS_N_INSNS (1);
8485 else
8486 /* A shift sequence. Increase costs slightly to avoid
8487 combining two shifts into an extend operation. */
8488 *total += COSTS_N_INSNS (2) + 1;
8491 return false;
8494 switch (GET_MODE (XEXP (x, 0)))
8496 case V8QImode:
8497 case V4HImode:
8498 case V2SImode:
8499 case V4QImode:
8500 case V2HImode:
8501 *total = COSTS_N_INSNS (1);
8502 return false;
8504 default:
8505 gcc_unreachable ();
8507 gcc_unreachable ();
8509 case ZERO_EXTRACT:
8510 case SIGN_EXTRACT:
8511 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8512 return true;
8514 case CONST_INT:
8515 if (const_ok_for_arm (INTVAL (x))
8516 || const_ok_for_arm (~INTVAL (x)))
8517 *total = COSTS_N_INSNS (1);
8518 else
8519 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8520 INTVAL (x), NULL_RTX,
8521 NULL_RTX, 0, 0));
8522 return true;
8524 case CONST:
8525 case LABEL_REF:
8526 case SYMBOL_REF:
8527 *total = COSTS_N_INSNS (3);
8528 return true;
8530 case HIGH:
8531 *total = COSTS_N_INSNS (1);
8532 return true;
8534 case LO_SUM:
8535 *total = COSTS_N_INSNS (1);
8536 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8537 return true;
8539 case CONST_DOUBLE:
8540 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8541 && (mode == SFmode || !TARGET_VFP_SINGLE))
8542 *total = COSTS_N_INSNS (1);
8543 else
8544 *total = COSTS_N_INSNS (4);
8545 return true;
8547 case SET:
8548 /* The vec_extract patterns accept memory operands that require an
8549 address reload. Account for the cost of that reload to give the
8550 auto-inc-dec pass an incentive to try to replace them. */
8551 if (TARGET_NEON && MEM_P (SET_DEST (x))
8552 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8554 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8555 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8556 *total += COSTS_N_INSNS (1);
8557 return true;
8559 /* Likewise for the vec_set patterns. */
8560 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8561 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8562 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8564 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8565 *total = rtx_cost (mem, code, 0, speed);
8566 if (!neon_vector_mem_operand (mem, 2, true))
8567 *total += COSTS_N_INSNS (1);
8568 return true;
8570 return false;
8572 case UNSPEC:
8573 /* We cost this as high as our memory costs to allow this to
8574 be hoisted from loops. */
8575 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8577 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8579 return true;
8581 case CONST_VECTOR:
8582 if (TARGET_NEON
8583 && TARGET_HARD_FLOAT
8584 && outer == SET
8585 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8586 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8587 *total = COSTS_N_INSNS (1);
8588 else
8589 *total = COSTS_N_INSNS (4);
8590 return true;
8592 default:
8593 *total = COSTS_N_INSNS (4);
8594 return false;
8598 /* Estimates the size cost of thumb1 instructions.
8599 For now most of the code is copied from thumb1_rtx_costs. We need more
8600 fine grain tuning when we have more related test cases. */
8601 static inline int
8602 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8604 enum machine_mode mode = GET_MODE (x);
8605 int words;
8607 switch (code)
8609 case ASHIFT:
8610 case ASHIFTRT:
8611 case LSHIFTRT:
8612 case ROTATERT:
8613 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8615 case PLUS:
8616 case MINUS:
8617 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8618 defined by RTL expansion, especially for the expansion of
8619 multiplication. */
8620 if ((GET_CODE (XEXP (x, 0)) == MULT
8621 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8622 || (GET_CODE (XEXP (x, 1)) == MULT
8623 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8624 return COSTS_N_INSNS (2);
8625 /* On purpose fall through for normal RTX. */
8626 case COMPARE:
8627 case NEG:
8628 case NOT:
8629 return COSTS_N_INSNS (1);
8631 case MULT:
8632 if (CONST_INT_P (XEXP (x, 1)))
8634 /* Thumb1 mul instruction can't operate on const. We must Load it
8635 into a register first. */
8636 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8637 return COSTS_N_INSNS (1) + const_size;
8639 return COSTS_N_INSNS (1);
8641 case SET:
8642 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8643 the mode. */
8644 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8645 return (COSTS_N_INSNS (words)
8646 + 4 * ((MEM_P (SET_SRC (x)))
8647 + MEM_P (SET_DEST (x))));
8649 case CONST_INT:
8650 if (outer == SET)
8652 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8653 return COSTS_N_INSNS (1);
8654 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8655 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8656 return COSTS_N_INSNS (2);
8657 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8658 if (thumb_shiftable_const (INTVAL (x)))
8659 return COSTS_N_INSNS (2);
8660 return COSTS_N_INSNS (3);
8662 else if ((outer == PLUS || outer == COMPARE)
8663 && INTVAL (x) < 256 && INTVAL (x) > -256)
8664 return 0;
8665 else if ((outer == IOR || outer == XOR || outer == AND)
8666 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8667 return COSTS_N_INSNS (1);
8668 else if (outer == AND)
8670 int i;
8671 /* This duplicates the tests in the andsi3 expander. */
8672 for (i = 9; i <= 31; i++)
8673 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8674 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8675 return COSTS_N_INSNS (2);
8677 else if (outer == ASHIFT || outer == ASHIFTRT
8678 || outer == LSHIFTRT)
8679 return 0;
8680 return COSTS_N_INSNS (2);
8682 case CONST:
8683 case CONST_DOUBLE:
8684 case LABEL_REF:
8685 case SYMBOL_REF:
8686 return COSTS_N_INSNS (3);
8688 case UDIV:
8689 case UMOD:
8690 case DIV:
8691 case MOD:
8692 return 100;
8694 case TRUNCATE:
8695 return 99;
8697 case AND:
8698 case XOR:
8699 case IOR:
8700 /* XXX guess. */
8701 return 8;
8703 case MEM:
8704 /* XXX another guess. */
8705 /* Memory costs quite a lot for the first word, but subsequent words
8706 load at the equivalent of a single insn each. */
8707 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8708 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8709 ? 4 : 0));
8711 case IF_THEN_ELSE:
8712 /* XXX a guess. */
8713 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8714 return 14;
8715 return 2;
8717 case ZERO_EXTEND:
8718 /* XXX still guessing. */
8719 switch (GET_MODE (XEXP (x, 0)))
8721 case QImode:
8722 return (1 + (mode == DImode ? 4 : 0)
8723 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8725 case HImode:
8726 return (4 + (mode == DImode ? 4 : 0)
8727 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8729 case SImode:
8730 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8732 default:
8733 return 99;
8736 default:
8737 return 99;
8741 /* RTX costs when optimizing for size. */
8742 static bool
8743 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8744 int *total)
8746 enum machine_mode mode = GET_MODE (x);
8747 if (TARGET_THUMB1)
8749 *total = thumb1_size_rtx_costs (x, code, outer_code);
8750 return true;
8753 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8754 switch (code)
8756 case MEM:
8757 /* A memory access costs 1 insn if the mode is small, or the address is
8758 a single register, otherwise it costs one insn per word. */
8759 if (REG_P (XEXP (x, 0)))
8760 *total = COSTS_N_INSNS (1);
8761 else if (flag_pic
8762 && GET_CODE (XEXP (x, 0)) == PLUS
8763 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8764 /* This will be split into two instructions.
8765 See arm.md:calculate_pic_address. */
8766 *total = COSTS_N_INSNS (2);
8767 else
8768 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8769 return true;
8771 case DIV:
8772 case MOD:
8773 case UDIV:
8774 case UMOD:
8775 /* Needs a libcall, so it costs about this. */
8776 *total = COSTS_N_INSNS (2);
8777 return false;
8779 case ROTATE:
8780 if (mode == SImode && REG_P (XEXP (x, 1)))
8782 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8783 return true;
8785 /* Fall through */
8786 case ROTATERT:
8787 case ASHIFT:
8788 case LSHIFTRT:
8789 case ASHIFTRT:
8790 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8792 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8793 return true;
8795 else if (mode == SImode)
8797 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8798 /* Slightly disparage register shifts, but not by much. */
8799 if (!CONST_INT_P (XEXP (x, 1)))
8800 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8801 return true;
8804 /* Needs a libcall. */
8805 *total = COSTS_N_INSNS (2);
8806 return false;
8808 case MINUS:
8809 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8810 && (mode == SFmode || !TARGET_VFP_SINGLE))
8812 *total = COSTS_N_INSNS (1);
8813 return false;
8816 if (mode == SImode)
8818 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8819 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8821 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8822 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8823 || subcode1 == ROTATE || subcode1 == ROTATERT
8824 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8825 || subcode1 == ASHIFTRT)
8827 /* It's just the cost of the two operands. */
8828 *total = 0;
8829 return false;
8832 *total = COSTS_N_INSNS (1);
8833 return false;
8836 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8837 return false;
8839 case PLUS:
8840 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8841 && (mode == SFmode || !TARGET_VFP_SINGLE))
8843 *total = COSTS_N_INSNS (1);
8844 return false;
8847 /* A shift as a part of ADD costs nothing. */
8848 if (GET_CODE (XEXP (x, 0)) == MULT
8849 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8851 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8852 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8853 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8854 return true;
8857 /* Fall through */
8858 case AND: case XOR: case IOR:
8859 if (mode == SImode)
8861 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8863 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8864 || subcode == LSHIFTRT || subcode == ASHIFTRT
8865 || (code == AND && subcode == NOT))
8867 /* It's just the cost of the two operands. */
8868 *total = 0;
8869 return false;
8873 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8874 return false;
8876 case MULT:
8877 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8878 return false;
8880 case NEG:
8881 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8882 && (mode == SFmode || !TARGET_VFP_SINGLE))
8884 *total = COSTS_N_INSNS (1);
8885 return false;
8888 /* Fall through */
8889 case NOT:
8890 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8892 return false;
8894 case IF_THEN_ELSE:
8895 *total = 0;
8896 return false;
8898 case COMPARE:
8899 if (cc_register (XEXP (x, 0), VOIDmode))
8900 * total = 0;
8901 else
8902 *total = COSTS_N_INSNS (1);
8903 return false;
8905 case ABS:
8906 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8907 && (mode == SFmode || !TARGET_VFP_SINGLE))
8908 *total = COSTS_N_INSNS (1);
8909 else
8910 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8911 return false;
8913 case SIGN_EXTEND:
8914 case ZERO_EXTEND:
8915 return arm_rtx_costs_1 (x, outer_code, total, 0);
8917 case CONST_INT:
8918 if (const_ok_for_arm (INTVAL (x)))
8919 /* A multiplication by a constant requires another instruction
8920 to load the constant to a register. */
8921 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8922 ? 1 : 0);
8923 else if (const_ok_for_arm (~INTVAL (x)))
8924 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8925 else if (const_ok_for_arm (-INTVAL (x)))
8927 if (outer_code == COMPARE || outer_code == PLUS
8928 || outer_code == MINUS)
8929 *total = 0;
8930 else
8931 *total = COSTS_N_INSNS (1);
8933 else
8934 *total = COSTS_N_INSNS (2);
8935 return true;
8937 case CONST:
8938 case LABEL_REF:
8939 case SYMBOL_REF:
8940 *total = COSTS_N_INSNS (2);
8941 return true;
8943 case CONST_DOUBLE:
8944 *total = COSTS_N_INSNS (4);
8945 return true;
8947 case CONST_VECTOR:
8948 if (TARGET_NEON
8949 && TARGET_HARD_FLOAT
8950 && outer_code == SET
8951 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8952 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8953 *total = COSTS_N_INSNS (1);
8954 else
8955 *total = COSTS_N_INSNS (4);
8956 return true;
8958 case HIGH:
8959 case LO_SUM:
8960 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8961 cost of these slightly. */
8962 *total = COSTS_N_INSNS (1) + 1;
8963 return true;
8965 case SET:
8966 return false;
8968 default:
8969 if (mode != VOIDmode)
8970 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8971 else
8972 *total = COSTS_N_INSNS (4); /* How knows? */
8973 return false;
8977 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8978 operand, then return the operand that is being shifted. If the shift
8979 is not by a constant, then set SHIFT_REG to point to the operand.
8980 Return NULL if OP is not a shifter operand. */
8981 static rtx
8982 shifter_op_p (rtx op, rtx *shift_reg)
8984 enum rtx_code code = GET_CODE (op);
8986 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8987 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8988 return XEXP (op, 0);
8989 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8990 return XEXP (op, 0);
8991 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8992 || code == ASHIFTRT)
8994 if (!CONST_INT_P (XEXP (op, 1)))
8995 *shift_reg = XEXP (op, 1);
8996 return XEXP (op, 0);
8999 return NULL;
9002 static bool
9003 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9005 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9006 gcc_assert (GET_CODE (x) == UNSPEC);
9008 switch (XINT (x, 1))
9010 case UNSPEC_UNALIGNED_LOAD:
9011 /* We can only do unaligned loads into the integer unit, and we can't
9012 use LDM or LDRD. */
9013 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9014 if (speed_p)
9015 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9016 + extra_cost->ldst.load_unaligned);
9018 #ifdef NOT_YET
9019 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9020 ADDR_SPACE_GENERIC, speed_p);
9021 #endif
9022 return true;
9024 case UNSPEC_UNALIGNED_STORE:
9025 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9026 if (speed_p)
9027 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9028 + extra_cost->ldst.store_unaligned);
9030 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9031 #ifdef NOT_YET
9032 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9033 ADDR_SPACE_GENERIC, speed_p);
9034 #endif
9035 return true;
9037 case UNSPEC_VRINTZ:
9038 case UNSPEC_VRINTP:
9039 case UNSPEC_VRINTM:
9040 case UNSPEC_VRINTR:
9041 case UNSPEC_VRINTX:
9042 case UNSPEC_VRINTA:
9043 *cost = COSTS_N_INSNS (1);
9044 if (speed_p)
9045 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9047 return true;
9048 default:
9049 *cost = COSTS_N_INSNS (2);
9050 break;
9052 return false;
9055 /* Cost of a libcall. We assume one insn per argument, an amount for the
9056 call (one insn for -Os) and then one for processing the result. */
9057 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9059 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9060 do \
9062 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9063 if (shift_op != NULL \
9064 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9066 if (shift_reg) \
9068 if (speed_p) \
9069 *cost += extra_cost->alu.arith_shift_reg; \
9070 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9072 else if (speed_p) \
9073 *cost += extra_cost->alu.arith_shift; \
9075 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9076 + rtx_cost (XEXP (x, 1 - IDX), \
9077 OP, 1, speed_p)); \
9078 return true; \
9081 while (0);
9083 /* RTX costs. Make an estimate of the cost of executing the operation
9084 X, which is contained with an operation with code OUTER_CODE.
9085 SPEED_P indicates whether the cost desired is the performance cost,
9086 or the size cost. The estimate is stored in COST and the return
9087 value is TRUE if the cost calculation is final, or FALSE if the
9088 caller should recurse through the operands of X to add additional
9089 costs.
9091 We currently make no attempt to model the size savings of Thumb-2
9092 16-bit instructions. At the normal points in compilation where
9093 this code is called we have no measure of whether the condition
9094 flags are live or not, and thus no realistic way to determine what
9095 the size will eventually be. */
9096 static bool
9097 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9098 const struct cpu_cost_table *extra_cost,
9099 int *cost, bool speed_p)
9101 enum machine_mode mode = GET_MODE (x);
9103 if (TARGET_THUMB1)
9105 if (speed_p)
9106 *cost = thumb1_rtx_costs (x, code, outer_code);
9107 else
9108 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9109 return true;
9112 switch (code)
9114 case SET:
9115 *cost = 0;
9116 /* SET RTXs don't have a mode so we get it from the destination. */
9117 mode = GET_MODE (SET_DEST (x));
9119 if (REG_P (SET_SRC (x))
9120 && REG_P (SET_DEST (x)))
9122 /* Assume that most copies can be done with a single insn,
9123 unless we don't have HW FP, in which case everything
9124 larger than word mode will require two insns. */
9125 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9126 && GET_MODE_SIZE (mode) > 4)
9127 || mode == DImode)
9128 ? 2 : 1);
9129 /* Conditional register moves can be encoded
9130 in 16 bits in Thumb mode. */
9131 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9132 *cost >>= 1;
9134 return true;
9137 if (CONST_INT_P (SET_SRC (x)))
9139 /* Handle CONST_INT here, since the value doesn't have a mode
9140 and we would otherwise be unable to work out the true cost. */
9141 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9142 outer_code = SET;
9143 /* Slightly lower the cost of setting a core reg to a constant.
9144 This helps break up chains and allows for better scheduling. */
9145 if (REG_P (SET_DEST (x))
9146 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9147 *cost -= 1;
9148 x = SET_SRC (x);
9149 /* Immediate moves with an immediate in the range [0, 255] can be
9150 encoded in 16 bits in Thumb mode. */
9151 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9152 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9153 *cost >>= 1;
9154 goto const_int_cost;
9157 return false;
9159 case MEM:
9160 /* A memory access costs 1 insn if the mode is small, or the address is
9161 a single register, otherwise it costs one insn per word. */
9162 if (REG_P (XEXP (x, 0)))
9163 *cost = COSTS_N_INSNS (1);
9164 else if (flag_pic
9165 && GET_CODE (XEXP (x, 0)) == PLUS
9166 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9167 /* This will be split into two instructions.
9168 See arm.md:calculate_pic_address. */
9169 *cost = COSTS_N_INSNS (2);
9170 else
9171 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9173 /* For speed optimizations, add the costs of the address and
9174 accessing memory. */
9175 if (speed_p)
9176 #ifdef NOT_YET
9177 *cost += (extra_cost->ldst.load
9178 + arm_address_cost (XEXP (x, 0), mode,
9179 ADDR_SPACE_GENERIC, speed_p));
9180 #else
9181 *cost += extra_cost->ldst.load;
9182 #endif
9183 return true;
9185 case PARALLEL:
9187 /* Calculations of LDM costs are complex. We assume an initial cost
9188 (ldm_1st) which will load the number of registers mentioned in
9189 ldm_regs_per_insn_1st registers; then each additional
9190 ldm_regs_per_insn_subsequent registers cost one more insn. The
9191 formula for N regs is thus:
9193 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9194 + ldm_regs_per_insn_subsequent - 1)
9195 / ldm_regs_per_insn_subsequent).
9197 Additional costs may also be added for addressing. A similar
9198 formula is used for STM. */
9200 bool is_ldm = load_multiple_operation (x, SImode);
9201 bool is_stm = store_multiple_operation (x, SImode);
9203 *cost = COSTS_N_INSNS (1);
9205 if (is_ldm || is_stm)
9207 if (speed_p)
9209 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9210 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9211 ? extra_cost->ldst.ldm_regs_per_insn_1st
9212 : extra_cost->ldst.stm_regs_per_insn_1st;
9213 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9214 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9215 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9217 *cost += regs_per_insn_1st
9218 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9219 + regs_per_insn_sub - 1)
9220 / regs_per_insn_sub);
9221 return true;
9225 return false;
9227 case DIV:
9228 case UDIV:
9229 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9230 && (mode == SFmode || !TARGET_VFP_SINGLE))
9231 *cost = COSTS_N_INSNS (speed_p
9232 ? extra_cost->fp[mode != SFmode].div : 1);
9233 else if (mode == SImode && TARGET_IDIV)
9234 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9235 else
9236 *cost = LIBCALL_COST (2);
9237 return false; /* All arguments must be in registers. */
9239 case MOD:
9240 case UMOD:
9241 *cost = LIBCALL_COST (2);
9242 return false; /* All arguments must be in registers. */
9244 case ROTATE:
9245 if (mode == SImode && REG_P (XEXP (x, 1)))
9247 *cost = (COSTS_N_INSNS (2)
9248 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9249 if (speed_p)
9250 *cost += extra_cost->alu.shift_reg;
9251 return true;
9253 /* Fall through */
9254 case ROTATERT:
9255 case ASHIFT:
9256 case LSHIFTRT:
9257 case ASHIFTRT:
9258 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9260 *cost = (COSTS_N_INSNS (3)
9261 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9262 if (speed_p)
9263 *cost += 2 * extra_cost->alu.shift;
9264 return true;
9266 else if (mode == SImode)
9268 *cost = (COSTS_N_INSNS (1)
9269 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9270 /* Slightly disparage register shifts at -Os, but not by much. */
9271 if (!CONST_INT_P (XEXP (x, 1)))
9272 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9273 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9274 return true;
9276 else if (GET_MODE_CLASS (mode) == MODE_INT
9277 && GET_MODE_SIZE (mode) < 4)
9279 if (code == ASHIFT)
9281 *cost = (COSTS_N_INSNS (1)
9282 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9283 /* Slightly disparage register shifts at -Os, but not by
9284 much. */
9285 if (!CONST_INT_P (XEXP (x, 1)))
9286 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9287 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9289 else if (code == LSHIFTRT || code == ASHIFTRT)
9291 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9293 /* Can use SBFX/UBFX. */
9294 *cost = COSTS_N_INSNS (1);
9295 if (speed_p)
9296 *cost += extra_cost->alu.bfx;
9297 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9299 else
9301 *cost = COSTS_N_INSNS (2);
9302 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9303 if (speed_p)
9305 if (CONST_INT_P (XEXP (x, 1)))
9306 *cost += 2 * extra_cost->alu.shift;
9307 else
9308 *cost += (extra_cost->alu.shift
9309 + extra_cost->alu.shift_reg);
9311 else
9312 /* Slightly disparage register shifts. */
9313 *cost += !CONST_INT_P (XEXP (x, 1));
9316 else /* Rotates. */
9318 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9319 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9320 if (speed_p)
9322 if (CONST_INT_P (XEXP (x, 1)))
9323 *cost += (2 * extra_cost->alu.shift
9324 + extra_cost->alu.log_shift);
9325 else
9326 *cost += (extra_cost->alu.shift
9327 + extra_cost->alu.shift_reg
9328 + extra_cost->alu.log_shift_reg);
9331 return true;
9334 *cost = LIBCALL_COST (2);
9335 return false;
9337 case MINUS:
9338 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9339 && (mode == SFmode || !TARGET_VFP_SINGLE))
9341 *cost = COSTS_N_INSNS (1);
9342 if (GET_CODE (XEXP (x, 0)) == MULT
9343 || GET_CODE (XEXP (x, 1)) == MULT)
9345 rtx mul_op0, mul_op1, sub_op;
9347 if (speed_p)
9348 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9350 if (GET_CODE (XEXP (x, 0)) == MULT)
9352 mul_op0 = XEXP (XEXP (x, 0), 0);
9353 mul_op1 = XEXP (XEXP (x, 0), 1);
9354 sub_op = XEXP (x, 1);
9356 else
9358 mul_op0 = XEXP (XEXP (x, 1), 0);
9359 mul_op1 = XEXP (XEXP (x, 1), 1);
9360 sub_op = XEXP (x, 0);
9363 /* The first operand of the multiply may be optionally
9364 negated. */
9365 if (GET_CODE (mul_op0) == NEG)
9366 mul_op0 = XEXP (mul_op0, 0);
9368 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9369 + rtx_cost (mul_op1, code, 0, speed_p)
9370 + rtx_cost (sub_op, code, 0, speed_p));
9372 return true;
9375 if (speed_p)
9376 *cost += extra_cost->fp[mode != SFmode].addsub;
9377 return false;
9380 if (mode == SImode)
9382 rtx shift_by_reg = NULL;
9383 rtx shift_op;
9384 rtx non_shift_op;
9386 *cost = COSTS_N_INSNS (1);
9388 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9389 if (shift_op == NULL)
9391 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9392 non_shift_op = XEXP (x, 0);
9394 else
9395 non_shift_op = XEXP (x, 1);
9397 if (shift_op != NULL)
9399 if (shift_by_reg != NULL)
9401 if (speed_p)
9402 *cost += extra_cost->alu.arith_shift_reg;
9403 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9405 else if (speed_p)
9406 *cost += extra_cost->alu.arith_shift;
9408 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9409 + rtx_cost (non_shift_op, code, 0, speed_p));
9410 return true;
9413 if (arm_arch_thumb2
9414 && GET_CODE (XEXP (x, 1)) == MULT)
9416 /* MLS. */
9417 if (speed_p)
9418 *cost += extra_cost->mult[0].add;
9419 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9420 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9421 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9422 return true;
9425 if (CONST_INT_P (XEXP (x, 0)))
9427 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9428 INTVAL (XEXP (x, 0)), NULL_RTX,
9429 NULL_RTX, 1, 0);
9430 *cost = COSTS_N_INSNS (insns);
9431 if (speed_p)
9432 *cost += insns * extra_cost->alu.arith;
9433 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9434 return true;
9437 return false;
9440 if (GET_MODE_CLASS (mode) == MODE_INT
9441 && GET_MODE_SIZE (mode) < 4)
9443 rtx shift_op, shift_reg;
9444 shift_reg = NULL;
9446 /* We check both sides of the MINUS for shifter operands since,
9447 unlike PLUS, it's not commutative. */
9449 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9450 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9452 /* Slightly disparage, as we might need to widen the result. */
9453 *cost = 1 + COSTS_N_INSNS (1);
9454 if (speed_p)
9455 *cost += extra_cost->alu.arith;
9457 if (CONST_INT_P (XEXP (x, 0)))
9459 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9460 return true;
9463 return false;
9466 if (mode == DImode)
9468 *cost = COSTS_N_INSNS (2);
9470 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9472 rtx op1 = XEXP (x, 1);
9474 if (speed_p)
9475 *cost += 2 * extra_cost->alu.arith;
9477 if (GET_CODE (op1) == ZERO_EXTEND)
9478 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9479 else
9480 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9481 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9482 0, speed_p);
9483 return true;
9485 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9487 if (speed_p)
9488 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9489 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9490 0, speed_p)
9491 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9492 return true;
9494 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9495 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9497 if (speed_p)
9498 *cost += (extra_cost->alu.arith
9499 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9500 ? extra_cost->alu.arith
9501 : extra_cost->alu.arith_shift));
9502 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9503 + rtx_cost (XEXP (XEXP (x, 1), 0),
9504 GET_CODE (XEXP (x, 1)), 0, speed_p));
9505 return true;
9508 if (speed_p)
9509 *cost += 2 * extra_cost->alu.arith;
9510 return false;
9513 /* Vector mode? */
9515 *cost = LIBCALL_COST (2);
9516 return false;
9518 case PLUS:
9519 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9520 && (mode == SFmode || !TARGET_VFP_SINGLE))
9522 *cost = COSTS_N_INSNS (1);
9523 if (GET_CODE (XEXP (x, 0)) == MULT)
9525 rtx mul_op0, mul_op1, add_op;
9527 if (speed_p)
9528 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9530 mul_op0 = XEXP (XEXP (x, 0), 0);
9531 mul_op1 = XEXP (XEXP (x, 0), 1);
9532 add_op = XEXP (x, 1);
9534 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9535 + rtx_cost (mul_op1, code, 0, speed_p)
9536 + rtx_cost (add_op, code, 0, speed_p));
9538 return true;
9541 if (speed_p)
9542 *cost += extra_cost->fp[mode != SFmode].addsub;
9543 return false;
9545 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9547 *cost = LIBCALL_COST (2);
9548 return false;
9551 /* Narrow modes can be synthesized in SImode, but the range
9552 of useful sub-operations is limited. Check for shift operations
9553 on one of the operands. Only left shifts can be used in the
9554 narrow modes. */
9555 if (GET_MODE_CLASS (mode) == MODE_INT
9556 && GET_MODE_SIZE (mode) < 4)
9558 rtx shift_op, shift_reg;
9559 shift_reg = NULL;
9561 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9563 if (CONST_INT_P (XEXP (x, 1)))
9565 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9566 INTVAL (XEXP (x, 1)), NULL_RTX,
9567 NULL_RTX, 1, 0);
9568 *cost = COSTS_N_INSNS (insns);
9569 if (speed_p)
9570 *cost += insns * extra_cost->alu.arith;
9571 /* Slightly penalize a narrow operation as the result may
9572 need widening. */
9573 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9574 return true;
9577 /* Slightly penalize a narrow operation as the result may
9578 need widening. */
9579 *cost = 1 + COSTS_N_INSNS (1);
9580 if (speed_p)
9581 *cost += extra_cost->alu.arith;
9583 return false;
9586 if (mode == SImode)
9588 rtx shift_op, shift_reg;
9590 *cost = COSTS_N_INSNS (1);
9591 if (TARGET_INT_SIMD
9592 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9593 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9595 /* UXTA[BH] or SXTA[BH]. */
9596 if (speed_p)
9597 *cost += extra_cost->alu.extend_arith;
9598 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9599 speed_p)
9600 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9601 return true;
9604 shift_reg = NULL;
9605 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9606 if (shift_op != NULL)
9608 if (shift_reg)
9610 if (speed_p)
9611 *cost += extra_cost->alu.arith_shift_reg;
9612 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9614 else if (speed_p)
9615 *cost += extra_cost->alu.arith_shift;
9617 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9618 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9619 return true;
9621 if (GET_CODE (XEXP (x, 0)) == MULT)
9623 rtx mul_op = XEXP (x, 0);
9625 *cost = COSTS_N_INSNS (1);
9627 if (TARGET_DSP_MULTIPLY
9628 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9629 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9630 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9631 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9632 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9633 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9634 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9635 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9636 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9637 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9638 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9639 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9640 == 16))))))
9642 /* SMLA[BT][BT]. */
9643 if (speed_p)
9644 *cost += extra_cost->mult[0].extend_add;
9645 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9646 SIGN_EXTEND, 0, speed_p)
9647 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9648 SIGN_EXTEND, 0, speed_p)
9649 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9650 return true;
9653 if (speed_p)
9654 *cost += extra_cost->mult[0].add;
9655 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9656 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9657 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9658 return true;
9660 if (CONST_INT_P (XEXP (x, 1)))
9662 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9663 INTVAL (XEXP (x, 1)), NULL_RTX,
9664 NULL_RTX, 1, 0);
9665 *cost = COSTS_N_INSNS (insns);
9666 if (speed_p)
9667 *cost += insns * extra_cost->alu.arith;
9668 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9669 return true;
9671 return false;
9674 if (mode == DImode)
9676 if (arm_arch3m
9677 && GET_CODE (XEXP (x, 0)) == MULT
9678 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9679 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9680 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9681 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9683 *cost = COSTS_N_INSNS (1);
9684 if (speed_p)
9685 *cost += extra_cost->mult[1].extend_add;
9686 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9687 ZERO_EXTEND, 0, speed_p)
9688 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9689 ZERO_EXTEND, 0, speed_p)
9690 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9691 return true;
9694 *cost = COSTS_N_INSNS (2);
9696 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9697 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9699 if (speed_p)
9700 *cost += (extra_cost->alu.arith
9701 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9702 ? extra_cost->alu.arith
9703 : extra_cost->alu.arith_shift));
9705 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9706 speed_p)
9707 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9708 return true;
9711 if (speed_p)
9712 *cost += 2 * extra_cost->alu.arith;
9713 return false;
9716 /* Vector mode? */
9717 *cost = LIBCALL_COST (2);
9718 return false;
9720 case AND: case XOR: case IOR:
9721 if (mode == SImode)
9723 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9724 rtx op0 = XEXP (x, 0);
9725 rtx shift_op, shift_reg;
9727 *cost = COSTS_N_INSNS (1);
9729 if (subcode == NOT
9730 && (code == AND
9731 || (code == IOR && TARGET_THUMB2)))
9732 op0 = XEXP (op0, 0);
9734 shift_reg = NULL;
9735 shift_op = shifter_op_p (op0, &shift_reg);
9736 if (shift_op != NULL)
9738 if (shift_reg)
9740 if (speed_p)
9741 *cost += extra_cost->alu.log_shift_reg;
9742 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9744 else if (speed_p)
9745 *cost += extra_cost->alu.log_shift;
9747 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9748 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9749 return true;
9752 if (CONST_INT_P (XEXP (x, 1)))
9754 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9755 INTVAL (XEXP (x, 1)), NULL_RTX,
9756 NULL_RTX, 1, 0);
9758 *cost = COSTS_N_INSNS (insns);
9759 if (speed_p)
9760 *cost += insns * extra_cost->alu.logical;
9761 *cost += rtx_cost (op0, code, 0, speed_p);
9762 return true;
9765 if (speed_p)
9766 *cost += extra_cost->alu.logical;
9767 *cost += (rtx_cost (op0, code, 0, speed_p)
9768 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9769 return true;
9772 if (mode == DImode)
9774 rtx op0 = XEXP (x, 0);
9775 enum rtx_code subcode = GET_CODE (op0);
9777 *cost = COSTS_N_INSNS (2);
9779 if (subcode == NOT
9780 && (code == AND
9781 || (code == IOR && TARGET_THUMB2)))
9782 op0 = XEXP (op0, 0);
9784 if (GET_CODE (op0) == ZERO_EXTEND)
9786 if (speed_p)
9787 *cost += 2 * extra_cost->alu.logical;
9789 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9790 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9791 return true;
9793 else if (GET_CODE (op0) == SIGN_EXTEND)
9795 if (speed_p)
9796 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9798 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9799 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9800 return true;
9803 if (speed_p)
9804 *cost += 2 * extra_cost->alu.logical;
9806 return true;
9808 /* Vector mode? */
9810 *cost = LIBCALL_COST (2);
9811 return false;
9813 case MULT:
9814 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9815 && (mode == SFmode || !TARGET_VFP_SINGLE))
9817 rtx op0 = XEXP (x, 0);
9819 *cost = COSTS_N_INSNS (1);
9821 if (GET_CODE (op0) == NEG)
9822 op0 = XEXP (op0, 0);
9824 if (speed_p)
9825 *cost += extra_cost->fp[mode != SFmode].mult;
9827 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9828 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9829 return true;
9831 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9833 *cost = LIBCALL_COST (2);
9834 return false;
9837 if (mode == SImode)
9839 *cost = COSTS_N_INSNS (1);
9840 if (TARGET_DSP_MULTIPLY
9841 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9842 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9843 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9844 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9845 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9846 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9847 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9848 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9849 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9850 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9851 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9852 && (INTVAL (XEXP (XEXP (x, 1), 1))
9853 == 16))))))
9855 /* SMUL[TB][TB]. */
9856 if (speed_p)
9857 *cost += extra_cost->mult[0].extend;
9858 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9859 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9860 return true;
9862 if (speed_p)
9863 *cost += extra_cost->mult[0].simple;
9864 return false;
9867 if (mode == DImode)
9869 if (arm_arch3m
9870 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9871 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9872 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9873 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9875 *cost = COSTS_N_INSNS (1);
9876 if (speed_p)
9877 *cost += extra_cost->mult[1].extend;
9878 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9879 ZERO_EXTEND, 0, speed_p)
9880 + rtx_cost (XEXP (XEXP (x, 1), 0),
9881 ZERO_EXTEND, 0, speed_p));
9882 return true;
9885 *cost = LIBCALL_COST (2);
9886 return false;
9889 /* Vector mode? */
9890 *cost = LIBCALL_COST (2);
9891 return false;
9893 case NEG:
9894 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9895 && (mode == SFmode || !TARGET_VFP_SINGLE))
9897 *cost = COSTS_N_INSNS (1);
9898 if (speed_p)
9899 *cost += extra_cost->fp[mode != SFmode].neg;
9901 return false;
9903 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9905 *cost = LIBCALL_COST (1);
9906 return false;
9909 if (mode == SImode)
9911 if (GET_CODE (XEXP (x, 0)) == ABS)
9913 *cost = COSTS_N_INSNS (2);
9914 /* Assume the non-flag-changing variant. */
9915 if (speed_p)
9916 *cost += (extra_cost->alu.log_shift
9917 + extra_cost->alu.arith_shift);
9918 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9919 return true;
9922 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9923 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9925 *cost = COSTS_N_INSNS (2);
9926 /* No extra cost for MOV imm and MVN imm. */
9927 /* If the comparison op is using the flags, there's no further
9928 cost, otherwise we need to add the cost of the comparison. */
9929 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9930 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9931 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9933 *cost += (COSTS_N_INSNS (1)
9934 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9935 speed_p)
9936 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9937 speed_p));
9938 if (speed_p)
9939 *cost += extra_cost->alu.arith;
9941 return true;
9943 *cost = COSTS_N_INSNS (1);
9944 if (speed_p)
9945 *cost += extra_cost->alu.arith;
9946 return false;
9949 if (GET_MODE_CLASS (mode) == MODE_INT
9950 && GET_MODE_SIZE (mode) < 4)
9952 /* Slightly disparage, as we might need an extend operation. */
9953 *cost = 1 + COSTS_N_INSNS (1);
9954 if (speed_p)
9955 *cost += extra_cost->alu.arith;
9956 return false;
9959 if (mode == DImode)
9961 *cost = COSTS_N_INSNS (2);
9962 if (speed_p)
9963 *cost += 2 * extra_cost->alu.arith;
9964 return false;
9967 /* Vector mode? */
9968 *cost = LIBCALL_COST (1);
9969 return false;
9971 case NOT:
9972 if (mode == SImode)
9974 rtx shift_op;
9975 rtx shift_reg = NULL;
9977 *cost = COSTS_N_INSNS (1);
9978 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9980 if (shift_op)
9982 if (shift_reg != NULL)
9984 if (speed_p)
9985 *cost += extra_cost->alu.log_shift_reg;
9986 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9988 else if (speed_p)
9989 *cost += extra_cost->alu.log_shift;
9990 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9991 return true;
9994 if (speed_p)
9995 *cost += extra_cost->alu.logical;
9996 return false;
9998 if (mode == DImode)
10000 *cost = COSTS_N_INSNS (2);
10001 return false;
10004 /* Vector mode? */
10006 *cost += LIBCALL_COST (1);
10007 return false;
10009 case IF_THEN_ELSE:
10011 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10013 *cost = COSTS_N_INSNS (4);
10014 return true;
10016 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10017 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10019 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10020 /* Assume that if one arm of the if_then_else is a register,
10021 that it will be tied with the result and eliminate the
10022 conditional insn. */
10023 if (REG_P (XEXP (x, 1)))
10024 *cost += op2cost;
10025 else if (REG_P (XEXP (x, 2)))
10026 *cost += op1cost;
10027 else
10029 if (speed_p)
10031 if (extra_cost->alu.non_exec_costs_exec)
10032 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10033 else
10034 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10036 else
10037 *cost += op1cost + op2cost;
10040 return true;
10042 case COMPARE:
10043 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10044 *cost = 0;
10045 else
10047 enum machine_mode op0mode;
10048 /* We'll mostly assume that the cost of a compare is the cost of the
10049 LHS. However, there are some notable exceptions. */
10051 /* Floating point compares are never done as side-effects. */
10052 op0mode = GET_MODE (XEXP (x, 0));
10053 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10054 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10056 *cost = COSTS_N_INSNS (1);
10057 if (speed_p)
10058 *cost += extra_cost->fp[op0mode != SFmode].compare;
10060 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10062 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10063 return true;
10066 return false;
10068 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10070 *cost = LIBCALL_COST (2);
10071 return false;
10074 /* DImode compares normally take two insns. */
10075 if (op0mode == DImode)
10077 *cost = COSTS_N_INSNS (2);
10078 if (speed_p)
10079 *cost += 2 * extra_cost->alu.arith;
10080 return false;
10083 if (op0mode == SImode)
10085 rtx shift_op;
10086 rtx shift_reg;
10088 if (XEXP (x, 1) == const0_rtx
10089 && !(REG_P (XEXP (x, 0))
10090 || (GET_CODE (XEXP (x, 0)) == SUBREG
10091 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10093 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10095 /* Multiply operations that set the flags are often
10096 significantly more expensive. */
10097 if (speed_p
10098 && GET_CODE (XEXP (x, 0)) == MULT
10099 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10100 *cost += extra_cost->mult[0].flag_setting;
10102 if (speed_p
10103 && GET_CODE (XEXP (x, 0)) == PLUS
10104 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10105 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10106 0), 1), mode))
10107 *cost += extra_cost->mult[0].flag_setting;
10108 return true;
10111 shift_reg = NULL;
10112 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10113 if (shift_op != NULL)
10115 *cost = COSTS_N_INSNS (1);
10116 if (shift_reg != NULL)
10118 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10119 if (speed_p)
10120 *cost += extra_cost->alu.arith_shift_reg;
10122 else if (speed_p)
10123 *cost += extra_cost->alu.arith_shift;
10124 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10125 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10126 return true;
10129 *cost = COSTS_N_INSNS (1);
10130 if (speed_p)
10131 *cost += extra_cost->alu.arith;
10132 if (CONST_INT_P (XEXP (x, 1))
10133 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10135 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10136 return true;
10138 return false;
10141 /* Vector mode? */
10143 *cost = LIBCALL_COST (2);
10144 return false;
10146 return true;
10148 case EQ:
10149 case NE:
10150 case LT:
10151 case LE:
10152 case GT:
10153 case GE:
10154 case LTU:
10155 case LEU:
10156 case GEU:
10157 case GTU:
10158 case ORDERED:
10159 case UNORDERED:
10160 case UNEQ:
10161 case UNLE:
10162 case UNLT:
10163 case UNGE:
10164 case UNGT:
10165 case LTGT:
10166 if (outer_code == SET)
10168 /* Is it a store-flag operation? */
10169 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10170 && XEXP (x, 1) == const0_rtx)
10172 /* Thumb also needs an IT insn. */
10173 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10174 return true;
10176 if (XEXP (x, 1) == const0_rtx)
10178 switch (code)
10180 case LT:
10181 /* LSR Rd, Rn, #31. */
10182 *cost = COSTS_N_INSNS (1);
10183 if (speed_p)
10184 *cost += extra_cost->alu.shift;
10185 break;
10187 case EQ:
10188 /* RSBS T1, Rn, #0
10189 ADC Rd, Rn, T1. */
10191 case NE:
10192 /* SUBS T1, Rn, #1
10193 SBC Rd, Rn, T1. */
10194 *cost = COSTS_N_INSNS (2);
10195 break;
10197 case LE:
10198 /* RSBS T1, Rn, Rn, LSR #31
10199 ADC Rd, Rn, T1. */
10200 *cost = COSTS_N_INSNS (2);
10201 if (speed_p)
10202 *cost += extra_cost->alu.arith_shift;
10203 break;
10205 case GT:
10206 /* RSB Rd, Rn, Rn, ASR #1
10207 LSR Rd, Rd, #31. */
10208 *cost = COSTS_N_INSNS (2);
10209 if (speed_p)
10210 *cost += (extra_cost->alu.arith_shift
10211 + extra_cost->alu.shift);
10212 break;
10214 case GE:
10215 /* ASR Rd, Rn, #31
10216 ADD Rd, Rn, #1. */
10217 *cost = COSTS_N_INSNS (2);
10218 if (speed_p)
10219 *cost += extra_cost->alu.shift;
10220 break;
10222 default:
10223 /* Remaining cases are either meaningless or would take
10224 three insns anyway. */
10225 *cost = COSTS_N_INSNS (3);
10226 break;
10228 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10229 return true;
10231 else
10233 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10234 if (CONST_INT_P (XEXP (x, 1))
10235 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10237 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10238 return true;
10241 return false;
10244 /* Not directly inside a set. If it involves the condition code
10245 register it must be the condition for a branch, cond_exec or
10246 I_T_E operation. Since the comparison is performed elsewhere
10247 this is just the control part which has no additional
10248 cost. */
10249 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10250 && XEXP (x, 1) == const0_rtx)
10252 *cost = 0;
10253 return true;
10255 return false;
10257 case ABS:
10258 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10259 && (mode == SFmode || !TARGET_VFP_SINGLE))
10261 *cost = COSTS_N_INSNS (1);
10262 if (speed_p)
10263 *cost += extra_cost->fp[mode != SFmode].neg;
10265 return false;
10267 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10269 *cost = LIBCALL_COST (1);
10270 return false;
10273 if (mode == SImode)
10275 *cost = COSTS_N_INSNS (1);
10276 if (speed_p)
10277 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10278 return false;
10280 /* Vector mode? */
10281 *cost = LIBCALL_COST (1);
10282 return false;
10284 case SIGN_EXTEND:
10285 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10286 && MEM_P (XEXP (x, 0)))
10288 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10290 if (mode == DImode)
10291 *cost += COSTS_N_INSNS (1);
10293 if (!speed_p)
10294 return true;
10296 if (GET_MODE (XEXP (x, 0)) == SImode)
10297 *cost += extra_cost->ldst.load;
10298 else
10299 *cost += extra_cost->ldst.load_sign_extend;
10301 if (mode == DImode)
10302 *cost += extra_cost->alu.shift;
10304 return true;
10307 /* Widening from less than 32-bits requires an extend operation. */
10308 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10310 /* We have SXTB/SXTH. */
10311 *cost = COSTS_N_INSNS (1);
10312 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10313 if (speed_p)
10314 *cost += extra_cost->alu.extend;
10316 else if (GET_MODE (XEXP (x, 0)) != SImode)
10318 /* Needs two shifts. */
10319 *cost = COSTS_N_INSNS (2);
10320 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10321 if (speed_p)
10322 *cost += 2 * extra_cost->alu.shift;
10325 /* Widening beyond 32-bits requires one more insn. */
10326 if (mode == DImode)
10328 *cost += COSTS_N_INSNS (1);
10329 if (speed_p)
10330 *cost += extra_cost->alu.shift;
10333 return true;
10335 case ZERO_EXTEND:
10336 if ((arm_arch4
10337 || GET_MODE (XEXP (x, 0)) == SImode
10338 || GET_MODE (XEXP (x, 0)) == QImode)
10339 && MEM_P (XEXP (x, 0)))
10341 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10343 if (mode == DImode)
10344 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10346 return true;
10349 /* Widening from less than 32-bits requires an extend operation. */
10350 if (GET_MODE (XEXP (x, 0)) == QImode)
10352 /* UXTB can be a shorter instruction in Thumb2, but it might
10353 be slower than the AND Rd, Rn, #255 alternative. When
10354 optimizing for speed it should never be slower to use
10355 AND, and we don't really model 16-bit vs 32-bit insns
10356 here. */
10357 *cost = COSTS_N_INSNS (1);
10358 if (speed_p)
10359 *cost += extra_cost->alu.logical;
10361 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10363 /* We have UXTB/UXTH. */
10364 *cost = COSTS_N_INSNS (1);
10365 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10366 if (speed_p)
10367 *cost += extra_cost->alu.extend;
10369 else if (GET_MODE (XEXP (x, 0)) != SImode)
10371 /* Needs two shifts. It's marginally preferable to use
10372 shifts rather than two BIC instructions as the second
10373 shift may merge with a subsequent insn as a shifter
10374 op. */
10375 *cost = COSTS_N_INSNS (2);
10376 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10377 if (speed_p)
10378 *cost += 2 * extra_cost->alu.shift;
10380 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10381 *cost = COSTS_N_INSNS (1);
10383 /* Widening beyond 32-bits requires one more insn. */
10384 if (mode == DImode)
10386 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10389 return true;
10391 case CONST_INT:
10392 *cost = 0;
10393 /* CONST_INT has no mode, so we cannot tell for sure how many
10394 insns are really going to be needed. The best we can do is
10395 look at the value passed. If it fits in SImode, then assume
10396 that's the mode it will be used for. Otherwise assume it
10397 will be used in DImode. */
10398 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10399 mode = SImode;
10400 else
10401 mode = DImode;
10403 /* Avoid blowing up in arm_gen_constant (). */
10404 if (!(outer_code == PLUS
10405 || outer_code == AND
10406 || outer_code == IOR
10407 || outer_code == XOR
10408 || outer_code == MINUS))
10409 outer_code = SET;
10411 const_int_cost:
10412 if (mode == SImode)
10414 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10415 INTVAL (x), NULL, NULL,
10416 0, 0));
10417 /* Extra costs? */
10419 else
10421 *cost += COSTS_N_INSNS (arm_gen_constant
10422 (outer_code, SImode, NULL,
10423 trunc_int_for_mode (INTVAL (x), SImode),
10424 NULL, NULL, 0, 0)
10425 + arm_gen_constant (outer_code, SImode, NULL,
10426 INTVAL (x) >> 32, NULL,
10427 NULL, 0, 0));
10428 /* Extra costs? */
10431 return true;
10433 case CONST:
10434 case LABEL_REF:
10435 case SYMBOL_REF:
10436 if (speed_p)
10438 if (arm_arch_thumb2 && !flag_pic)
10439 *cost = COSTS_N_INSNS (2);
10440 else
10441 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10443 else
10444 *cost = COSTS_N_INSNS (2);
10446 if (flag_pic)
10448 *cost += COSTS_N_INSNS (1);
10449 if (speed_p)
10450 *cost += extra_cost->alu.arith;
10453 return true;
10455 case CONST_FIXED:
10456 *cost = COSTS_N_INSNS (4);
10457 /* Fixme. */
10458 return true;
10460 case CONST_DOUBLE:
10461 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10462 && (mode == SFmode || !TARGET_VFP_SINGLE))
10464 if (vfp3_const_double_rtx (x))
10466 *cost = COSTS_N_INSNS (1);
10467 if (speed_p)
10468 *cost += extra_cost->fp[mode == DFmode].fpconst;
10469 return true;
10472 if (speed_p)
10474 *cost = COSTS_N_INSNS (1);
10475 if (mode == DFmode)
10476 *cost += extra_cost->ldst.loadd;
10477 else
10478 *cost += extra_cost->ldst.loadf;
10480 else
10481 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10483 return true;
10485 *cost = COSTS_N_INSNS (4);
10486 return true;
10488 case CONST_VECTOR:
10489 /* Fixme. */
10490 if (TARGET_NEON
10491 && TARGET_HARD_FLOAT
10492 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10493 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10494 *cost = COSTS_N_INSNS (1);
10495 else
10496 *cost = COSTS_N_INSNS (4);
10497 return true;
10499 case HIGH:
10500 case LO_SUM:
10501 *cost = COSTS_N_INSNS (1);
10502 /* When optimizing for size, we prefer constant pool entries to
10503 MOVW/MOVT pairs, so bump the cost of these slightly. */
10504 if (!speed_p)
10505 *cost += 1;
10506 return true;
10508 case CLZ:
10509 *cost = COSTS_N_INSNS (1);
10510 if (speed_p)
10511 *cost += extra_cost->alu.clz;
10512 return false;
10514 case SMIN:
10515 if (XEXP (x, 1) == const0_rtx)
10517 *cost = COSTS_N_INSNS (1);
10518 if (speed_p)
10519 *cost += extra_cost->alu.log_shift;
10520 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10521 return true;
10523 /* Fall through. */
10524 case SMAX:
10525 case UMIN:
10526 case UMAX:
10527 *cost = COSTS_N_INSNS (2);
10528 return false;
10530 case TRUNCATE:
10531 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10532 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10533 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10534 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10535 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10536 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10537 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10538 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10539 == ZERO_EXTEND))))
10541 *cost = COSTS_N_INSNS (1);
10542 if (speed_p)
10543 *cost += extra_cost->mult[1].extend;
10544 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10545 speed_p)
10546 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10547 0, speed_p));
10548 return true;
10550 *cost = LIBCALL_COST (1);
10551 return false;
10553 case UNSPEC:
10554 return arm_unspec_cost (x, outer_code, speed_p, cost);
10556 case PC:
10557 /* Reading the PC is like reading any other register. Writing it
10558 is more expensive, but we take that into account elsewhere. */
10559 *cost = 0;
10560 return true;
10562 case ZERO_EXTRACT:
10563 /* TODO: Simple zero_extract of bottom bits using AND. */
10564 /* Fall through. */
10565 case SIGN_EXTRACT:
10566 if (arm_arch6
10567 && mode == SImode
10568 && CONST_INT_P (XEXP (x, 1))
10569 && CONST_INT_P (XEXP (x, 2)))
10571 *cost = COSTS_N_INSNS (1);
10572 if (speed_p)
10573 *cost += extra_cost->alu.bfx;
10574 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10575 return true;
10577 /* Without UBFX/SBFX, need to resort to shift operations. */
10578 *cost = COSTS_N_INSNS (2);
10579 if (speed_p)
10580 *cost += 2 * extra_cost->alu.shift;
10581 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10582 return true;
10584 case FLOAT_EXTEND:
10585 if (TARGET_HARD_FLOAT)
10587 *cost = COSTS_N_INSNS (1);
10588 if (speed_p)
10589 *cost += extra_cost->fp[mode == DFmode].widen;
10590 if (!TARGET_FPU_ARMV8
10591 && GET_MODE (XEXP (x, 0)) == HFmode)
10593 /* Pre v8, widening HF->DF is a two-step process, first
10594 widening to SFmode. */
10595 *cost += COSTS_N_INSNS (1);
10596 if (speed_p)
10597 *cost += extra_cost->fp[0].widen;
10599 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10600 return true;
10603 *cost = LIBCALL_COST (1);
10604 return false;
10606 case FLOAT_TRUNCATE:
10607 if (TARGET_HARD_FLOAT)
10609 *cost = COSTS_N_INSNS (1);
10610 if (speed_p)
10611 *cost += extra_cost->fp[mode == DFmode].narrow;
10612 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10613 return true;
10614 /* Vector modes? */
10616 *cost = LIBCALL_COST (1);
10617 return false;
10619 case FIX:
10620 case UNSIGNED_FIX:
10621 if (TARGET_HARD_FLOAT)
10623 if (GET_MODE_CLASS (mode) == MODE_INT)
10625 *cost = COSTS_N_INSNS (1);
10626 if (speed_p)
10627 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10628 /* Strip of the 'cost' of rounding towards zero. */
10629 if (GET_CODE (XEXP (x, 0)) == FIX)
10630 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10631 else
10632 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10633 /* ??? Increase the cost to deal with transferring from
10634 FP -> CORE registers? */
10635 return true;
10637 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10638 && TARGET_FPU_ARMV8)
10640 *cost = COSTS_N_INSNS (1);
10641 if (speed_p)
10642 *cost += extra_cost->fp[mode == DFmode].roundint;
10643 return false;
10645 /* Vector costs? */
10647 *cost = LIBCALL_COST (1);
10648 return false;
10650 case FLOAT:
10651 case UNSIGNED_FLOAT:
10652 if (TARGET_HARD_FLOAT)
10654 /* ??? Increase the cost to deal with transferring from CORE
10655 -> FP registers? */
10656 *cost = COSTS_N_INSNS (1);
10657 if (speed_p)
10658 *cost += extra_cost->fp[mode == DFmode].fromint;
10659 return false;
10661 *cost = LIBCALL_COST (1);
10662 return false;
10664 case CALL:
10665 *cost = COSTS_N_INSNS (1);
10666 return true;
10668 case ASM_OPERANDS:
10669 /* Just a guess. Cost one insn per input. */
10670 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10671 return true;
10673 default:
10674 if (mode != VOIDmode)
10675 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10676 else
10677 *cost = COSTS_N_INSNS (4); /* Who knows? */
10678 return false;
10682 #undef HANDLE_NARROW_SHIFT_ARITH
10684 /* RTX costs when optimizing for size. */
10685 static bool
10686 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10687 int *total, bool speed)
10689 bool result;
10691 if (TARGET_OLD_RTX_COSTS
10692 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10694 /* Old way. (Deprecated.) */
10695 if (!speed)
10696 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10697 (enum rtx_code) outer_code, total);
10698 else
10699 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10700 (enum rtx_code) outer_code, total,
10701 speed);
10703 else
10705 /* New way. */
10706 if (current_tune->insn_extra_cost)
10707 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10708 (enum rtx_code) outer_code,
10709 current_tune->insn_extra_cost,
10710 total, speed);
10711 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10712 && current_tune->insn_extra_cost != NULL */
10713 else
10714 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10715 (enum rtx_code) outer_code,
10716 &generic_extra_costs, total, speed);
10719 if (dump_file && (dump_flags & TDF_DETAILS))
10721 print_rtl_single (dump_file, x);
10722 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10723 *total, result ? "final" : "partial");
10725 return result;
10728 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10729 supported on any "slowmul" cores, so it can be ignored. */
10731 static bool
10732 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10733 int *total, bool speed)
10735 enum machine_mode mode = GET_MODE (x);
10737 if (TARGET_THUMB)
10739 *total = thumb1_rtx_costs (x, code, outer_code);
10740 return true;
10743 switch (code)
10745 case MULT:
10746 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10747 || mode == DImode)
10749 *total = COSTS_N_INSNS (20);
10750 return false;
10753 if (CONST_INT_P (XEXP (x, 1)))
10755 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10756 & (unsigned HOST_WIDE_INT) 0xffffffff);
10757 int cost, const_ok = const_ok_for_arm (i);
10758 int j, booth_unit_size;
10760 /* Tune as appropriate. */
10761 cost = const_ok ? 4 : 8;
10762 booth_unit_size = 2;
10763 for (j = 0; i && j < 32; j += booth_unit_size)
10765 i >>= booth_unit_size;
10766 cost++;
10769 *total = COSTS_N_INSNS (cost);
10770 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10771 return true;
10774 *total = COSTS_N_INSNS (20);
10775 return false;
10777 default:
10778 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10783 /* RTX cost for cores with a fast multiply unit (M variants). */
10785 static bool
10786 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10787 int *total, bool speed)
10789 enum machine_mode mode = GET_MODE (x);
10791 if (TARGET_THUMB1)
10793 *total = thumb1_rtx_costs (x, code, outer_code);
10794 return true;
10797 /* ??? should thumb2 use different costs? */
10798 switch (code)
10800 case MULT:
10801 /* There is no point basing this on the tuning, since it is always the
10802 fast variant if it exists at all. */
10803 if (mode == DImode
10804 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10805 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10806 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10808 *total = COSTS_N_INSNS(2);
10809 return false;
10813 if (mode == DImode)
10815 *total = COSTS_N_INSNS (5);
10816 return false;
10819 if (CONST_INT_P (XEXP (x, 1)))
10821 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10822 & (unsigned HOST_WIDE_INT) 0xffffffff);
10823 int cost, const_ok = const_ok_for_arm (i);
10824 int j, booth_unit_size;
10826 /* Tune as appropriate. */
10827 cost = const_ok ? 4 : 8;
10828 booth_unit_size = 8;
10829 for (j = 0; i && j < 32; j += booth_unit_size)
10831 i >>= booth_unit_size;
10832 cost++;
10835 *total = COSTS_N_INSNS(cost);
10836 return false;
10839 if (mode == SImode)
10841 *total = COSTS_N_INSNS (4);
10842 return false;
10845 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10847 if (TARGET_HARD_FLOAT
10848 && (mode == SFmode
10849 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10851 *total = COSTS_N_INSNS (1);
10852 return false;
10856 /* Requires a lib call */
10857 *total = COSTS_N_INSNS (20);
10858 return false;
10860 default:
10861 return arm_rtx_costs_1 (x, outer_code, total, speed);
10866 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10867 so it can be ignored. */
10869 static bool
10870 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10871 int *total, bool speed)
10873 enum machine_mode mode = GET_MODE (x);
10875 if (TARGET_THUMB)
10877 *total = thumb1_rtx_costs (x, code, outer_code);
10878 return true;
10881 switch (code)
10883 case COMPARE:
10884 if (GET_CODE (XEXP (x, 0)) != MULT)
10885 return arm_rtx_costs_1 (x, outer_code, total, speed);
10887 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10888 will stall until the multiplication is complete. */
10889 *total = COSTS_N_INSNS (3);
10890 return false;
10892 case MULT:
10893 /* There is no point basing this on the tuning, since it is always the
10894 fast variant if it exists at all. */
10895 if (mode == DImode
10896 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10897 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10898 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10900 *total = COSTS_N_INSNS (2);
10901 return false;
10905 if (mode == DImode)
10907 *total = COSTS_N_INSNS (5);
10908 return false;
10911 if (CONST_INT_P (XEXP (x, 1)))
10913 /* If operand 1 is a constant we can more accurately
10914 calculate the cost of the multiply. The multiplier can
10915 retire 15 bits on the first cycle and a further 12 on the
10916 second. We do, of course, have to load the constant into
10917 a register first. */
10918 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10919 /* There's a general overhead of one cycle. */
10920 int cost = 1;
10921 unsigned HOST_WIDE_INT masked_const;
10923 if (i & 0x80000000)
10924 i = ~i;
10926 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10928 masked_const = i & 0xffff8000;
10929 if (masked_const != 0)
10931 cost++;
10932 masked_const = i & 0xf8000000;
10933 if (masked_const != 0)
10934 cost++;
10936 *total = COSTS_N_INSNS (cost);
10937 return false;
10940 if (mode == SImode)
10942 *total = COSTS_N_INSNS (3);
10943 return false;
10946 /* Requires a lib call */
10947 *total = COSTS_N_INSNS (20);
10948 return false;
10950 default:
10951 return arm_rtx_costs_1 (x, outer_code, total, speed);
10956 /* RTX costs for 9e (and later) cores. */
10958 static bool
10959 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10960 int *total, bool speed)
10962 enum machine_mode mode = GET_MODE (x);
10964 if (TARGET_THUMB1)
10966 switch (code)
10968 case MULT:
10969 *total = COSTS_N_INSNS (3);
10970 return true;
10972 default:
10973 *total = thumb1_rtx_costs (x, code, outer_code);
10974 return true;
10978 switch (code)
10980 case MULT:
10981 /* There is no point basing this on the tuning, since it is always the
10982 fast variant if it exists at all. */
10983 if (mode == DImode
10984 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10985 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10986 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10988 *total = COSTS_N_INSNS (2);
10989 return false;
10993 if (mode == DImode)
10995 *total = COSTS_N_INSNS (5);
10996 return false;
10999 if (mode == SImode)
11001 *total = COSTS_N_INSNS (2);
11002 return false;
11005 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11007 if (TARGET_HARD_FLOAT
11008 && (mode == SFmode
11009 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11011 *total = COSTS_N_INSNS (1);
11012 return false;
11016 *total = COSTS_N_INSNS (20);
11017 return false;
11019 default:
11020 return arm_rtx_costs_1 (x, outer_code, total, speed);
11023 /* All address computations that can be done are free, but rtx cost returns
11024 the same for practically all of them. So we weight the different types
11025 of address here in the order (most pref first):
11026 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11027 static inline int
11028 arm_arm_address_cost (rtx x)
11030 enum rtx_code c = GET_CODE (x);
11032 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11033 return 0;
11034 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11035 return 10;
11037 if (c == PLUS)
11039 if (CONST_INT_P (XEXP (x, 1)))
11040 return 2;
11042 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11043 return 3;
11045 return 4;
11048 return 6;
11051 static inline int
11052 arm_thumb_address_cost (rtx x)
11054 enum rtx_code c = GET_CODE (x);
11056 if (c == REG)
11057 return 1;
11058 if (c == PLUS
11059 && REG_P (XEXP (x, 0))
11060 && CONST_INT_P (XEXP (x, 1)))
11061 return 1;
11063 return 2;
11066 static int
11067 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11068 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11070 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11073 /* Adjust cost hook for XScale. */
11074 static bool
11075 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11077 /* Some true dependencies can have a higher cost depending
11078 on precisely how certain input operands are used. */
11079 if (REG_NOTE_KIND(link) == 0
11080 && recog_memoized (insn) >= 0
11081 && recog_memoized (dep) >= 0)
11083 int shift_opnum = get_attr_shift (insn);
11084 enum attr_type attr_type = get_attr_type (dep);
11086 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11087 operand for INSN. If we have a shifted input operand and the
11088 instruction we depend on is another ALU instruction, then we may
11089 have to account for an additional stall. */
11090 if (shift_opnum != 0
11091 && (attr_type == TYPE_ALU_SHIFT_IMM
11092 || attr_type == TYPE_ALUS_SHIFT_IMM
11093 || attr_type == TYPE_LOGIC_SHIFT_IMM
11094 || attr_type == TYPE_LOGICS_SHIFT_IMM
11095 || attr_type == TYPE_ALU_SHIFT_REG
11096 || attr_type == TYPE_ALUS_SHIFT_REG
11097 || attr_type == TYPE_LOGIC_SHIFT_REG
11098 || attr_type == TYPE_LOGICS_SHIFT_REG
11099 || attr_type == TYPE_MOV_SHIFT
11100 || attr_type == TYPE_MVN_SHIFT
11101 || attr_type == TYPE_MOV_SHIFT_REG
11102 || attr_type == TYPE_MVN_SHIFT_REG))
11104 rtx shifted_operand;
11105 int opno;
11107 /* Get the shifted operand. */
11108 extract_insn (insn);
11109 shifted_operand = recog_data.operand[shift_opnum];
11111 /* Iterate over all the operands in DEP. If we write an operand
11112 that overlaps with SHIFTED_OPERAND, then we have increase the
11113 cost of this dependency. */
11114 extract_insn (dep);
11115 preprocess_constraints ();
11116 for (opno = 0; opno < recog_data.n_operands; opno++)
11118 /* We can ignore strict inputs. */
11119 if (recog_data.operand_type[opno] == OP_IN)
11120 continue;
11122 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11123 shifted_operand))
11125 *cost = 2;
11126 return false;
11131 return true;
11134 /* Adjust cost hook for Cortex A9. */
11135 static bool
11136 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11138 switch (REG_NOTE_KIND (link))
11140 case REG_DEP_ANTI:
11141 *cost = 0;
11142 return false;
11144 case REG_DEP_TRUE:
11145 case REG_DEP_OUTPUT:
11146 if (recog_memoized (insn) >= 0
11147 && recog_memoized (dep) >= 0)
11149 if (GET_CODE (PATTERN (insn)) == SET)
11151 if (GET_MODE_CLASS
11152 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11153 || GET_MODE_CLASS
11154 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11156 enum attr_type attr_type_insn = get_attr_type (insn);
11157 enum attr_type attr_type_dep = get_attr_type (dep);
11159 /* By default all dependencies of the form
11160 s0 = s0 <op> s1
11161 s0 = s0 <op> s2
11162 have an extra latency of 1 cycle because
11163 of the input and output dependency in this
11164 case. However this gets modeled as an true
11165 dependency and hence all these checks. */
11166 if (REG_P (SET_DEST (PATTERN (insn)))
11167 && REG_P (SET_DEST (PATTERN (dep)))
11168 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11169 SET_DEST (PATTERN (dep))))
11171 /* FMACS is a special case where the dependent
11172 instruction can be issued 3 cycles before
11173 the normal latency in case of an output
11174 dependency. */
11175 if ((attr_type_insn == TYPE_FMACS
11176 || attr_type_insn == TYPE_FMACD)
11177 && (attr_type_dep == TYPE_FMACS
11178 || attr_type_dep == TYPE_FMACD))
11180 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11181 *cost = insn_default_latency (dep) - 3;
11182 else
11183 *cost = insn_default_latency (dep);
11184 return false;
11186 else
11188 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11189 *cost = insn_default_latency (dep) + 1;
11190 else
11191 *cost = insn_default_latency (dep);
11193 return false;
11198 break;
11200 default:
11201 gcc_unreachable ();
11204 return true;
11207 /* Adjust cost hook for FA726TE. */
11208 static bool
11209 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11211 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11212 have penalty of 3. */
11213 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11214 && recog_memoized (insn) >= 0
11215 && recog_memoized (dep) >= 0
11216 && get_attr_conds (dep) == CONDS_SET)
11218 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11219 if (get_attr_conds (insn) == CONDS_USE
11220 && get_attr_type (insn) != TYPE_BRANCH)
11222 *cost = 3;
11223 return false;
11226 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11227 || get_attr_conds (insn) == CONDS_USE)
11229 *cost = 0;
11230 return false;
11234 return true;
11237 /* Implement TARGET_REGISTER_MOVE_COST.
11239 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11240 it is typically more expensive than a single memory access. We set
11241 the cost to less than two memory accesses so that floating
11242 point to integer conversion does not go through memory. */
11245 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11246 reg_class_t from, reg_class_t to)
11248 if (TARGET_32BIT)
11250 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11251 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11252 return 15;
11253 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11254 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11255 return 4;
11256 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11257 return 20;
11258 else
11259 return 2;
11261 else
11263 if (from == HI_REGS || to == HI_REGS)
11264 return 4;
11265 else
11266 return 2;
11270 /* Implement TARGET_MEMORY_MOVE_COST. */
11273 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11274 bool in ATTRIBUTE_UNUSED)
11276 if (TARGET_32BIT)
11277 return 10;
11278 else
11280 if (GET_MODE_SIZE (mode) < 4)
11281 return 8;
11282 else
11283 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11287 /* Vectorizer cost model implementation. */
11289 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11290 static int
11291 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11292 tree vectype,
11293 int misalign ATTRIBUTE_UNUSED)
11295 unsigned elements;
11297 switch (type_of_cost)
11299 case scalar_stmt:
11300 return current_tune->vec_costs->scalar_stmt_cost;
11302 case scalar_load:
11303 return current_tune->vec_costs->scalar_load_cost;
11305 case scalar_store:
11306 return current_tune->vec_costs->scalar_store_cost;
11308 case vector_stmt:
11309 return current_tune->vec_costs->vec_stmt_cost;
11311 case vector_load:
11312 return current_tune->vec_costs->vec_align_load_cost;
11314 case vector_store:
11315 return current_tune->vec_costs->vec_store_cost;
11317 case vec_to_scalar:
11318 return current_tune->vec_costs->vec_to_scalar_cost;
11320 case scalar_to_vec:
11321 return current_tune->vec_costs->scalar_to_vec_cost;
11323 case unaligned_load:
11324 return current_tune->vec_costs->vec_unalign_load_cost;
11326 case unaligned_store:
11327 return current_tune->vec_costs->vec_unalign_store_cost;
11329 case cond_branch_taken:
11330 return current_tune->vec_costs->cond_taken_branch_cost;
11332 case cond_branch_not_taken:
11333 return current_tune->vec_costs->cond_not_taken_branch_cost;
11335 case vec_perm:
11336 case vec_promote_demote:
11337 return current_tune->vec_costs->vec_stmt_cost;
11339 case vec_construct:
11340 elements = TYPE_VECTOR_SUBPARTS (vectype);
11341 return elements / 2 + 1;
11343 default:
11344 gcc_unreachable ();
11348 /* Implement targetm.vectorize.add_stmt_cost. */
11350 static unsigned
11351 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11352 struct _stmt_vec_info *stmt_info, int misalign,
11353 enum vect_cost_model_location where)
11355 unsigned *cost = (unsigned *) data;
11356 unsigned retval = 0;
11358 if (flag_vect_cost_model)
11360 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11361 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11363 /* Statements in an inner loop relative to the loop being
11364 vectorized are weighted more heavily. The value here is
11365 arbitrary and could potentially be improved with analysis. */
11366 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11367 count *= 50; /* FIXME. */
11369 retval = (unsigned) (count * stmt_cost);
11370 cost[where] += retval;
11373 return retval;
11376 /* Return true if and only if this insn can dual-issue only as older. */
11377 static bool
11378 cortexa7_older_only (rtx insn)
11380 if (recog_memoized (insn) < 0)
11381 return false;
11383 switch (get_attr_type (insn))
11385 case TYPE_ALU_REG:
11386 case TYPE_ALUS_REG:
11387 case TYPE_LOGIC_REG:
11388 case TYPE_LOGICS_REG:
11389 case TYPE_ADC_REG:
11390 case TYPE_ADCS_REG:
11391 case TYPE_ADR:
11392 case TYPE_BFM:
11393 case TYPE_REV:
11394 case TYPE_MVN_REG:
11395 case TYPE_SHIFT_IMM:
11396 case TYPE_SHIFT_REG:
11397 case TYPE_LOAD_BYTE:
11398 case TYPE_LOAD1:
11399 case TYPE_STORE1:
11400 case TYPE_FFARITHS:
11401 case TYPE_FADDS:
11402 case TYPE_FFARITHD:
11403 case TYPE_FADDD:
11404 case TYPE_FMOV:
11405 case TYPE_F_CVT:
11406 case TYPE_FCMPS:
11407 case TYPE_FCMPD:
11408 case TYPE_FCONSTS:
11409 case TYPE_FCONSTD:
11410 case TYPE_FMULS:
11411 case TYPE_FMACS:
11412 case TYPE_FMULD:
11413 case TYPE_FMACD:
11414 case TYPE_FDIVS:
11415 case TYPE_FDIVD:
11416 case TYPE_F_MRC:
11417 case TYPE_F_MRRC:
11418 case TYPE_F_FLAG:
11419 case TYPE_F_LOADS:
11420 case TYPE_F_STORES:
11421 return true;
11422 default:
11423 return false;
11427 /* Return true if and only if this insn can dual-issue as younger. */
11428 static bool
11429 cortexa7_younger (FILE *file, int verbose, rtx insn)
11431 if (recog_memoized (insn) < 0)
11433 if (verbose > 5)
11434 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11435 return false;
11438 switch (get_attr_type (insn))
11440 case TYPE_ALU_IMM:
11441 case TYPE_ALUS_IMM:
11442 case TYPE_LOGIC_IMM:
11443 case TYPE_LOGICS_IMM:
11444 case TYPE_EXTEND:
11445 case TYPE_MVN_IMM:
11446 case TYPE_MOV_IMM:
11447 case TYPE_MOV_REG:
11448 case TYPE_MOV_SHIFT:
11449 case TYPE_MOV_SHIFT_REG:
11450 case TYPE_BRANCH:
11451 case TYPE_CALL:
11452 return true;
11453 default:
11454 return false;
11459 /* Look for an instruction that can dual issue only as an older
11460 instruction, and move it in front of any instructions that can
11461 dual-issue as younger, while preserving the relative order of all
11462 other instructions in the ready list. This is a hueuristic to help
11463 dual-issue in later cycles, by postponing issue of more flexible
11464 instructions. This heuristic may affect dual issue opportunities
11465 in the current cycle. */
11466 static void
11467 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11468 int clock)
11470 int i;
11471 int first_older_only = -1, first_younger = -1;
11473 if (verbose > 5)
11474 fprintf (file,
11475 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11476 clock,
11477 *n_readyp);
11479 /* Traverse the ready list from the head (the instruction to issue
11480 first), and looking for the first instruction that can issue as
11481 younger and the first instruction that can dual-issue only as
11482 older. */
11483 for (i = *n_readyp - 1; i >= 0; i--)
11485 rtx insn = ready[i];
11486 if (cortexa7_older_only (insn))
11488 first_older_only = i;
11489 if (verbose > 5)
11490 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11491 break;
11493 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11494 first_younger = i;
11497 /* Nothing to reorder because either no younger insn found or insn
11498 that can dual-issue only as older appears before any insn that
11499 can dual-issue as younger. */
11500 if (first_younger == -1)
11502 if (verbose > 5)
11503 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11504 return;
11507 /* Nothing to reorder because no older-only insn in the ready list. */
11508 if (first_older_only == -1)
11510 if (verbose > 5)
11511 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11512 return;
11515 /* Move first_older_only insn before first_younger. */
11516 if (verbose > 5)
11517 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11518 INSN_UID(ready [first_older_only]),
11519 INSN_UID(ready [first_younger]));
11520 rtx first_older_only_insn = ready [first_older_only];
11521 for (i = first_older_only; i < first_younger; i++)
11523 ready[i] = ready[i+1];
11526 ready[i] = first_older_only_insn;
11527 return;
11530 /* Implement TARGET_SCHED_REORDER. */
11531 static int
11532 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11533 int clock)
11535 switch (arm_tune)
11537 case cortexa7:
11538 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11539 break;
11540 default:
11541 /* Do nothing for other cores. */
11542 break;
11545 return arm_issue_rate ();
11548 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11549 It corrects the value of COST based on the relationship between
11550 INSN and DEP through the dependence LINK. It returns the new
11551 value. There is a per-core adjust_cost hook to adjust scheduler costs
11552 and the per-core hook can choose to completely override the generic
11553 adjust_cost function. Only put bits of code into arm_adjust_cost that
11554 are common across all cores. */
11555 static int
11556 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11558 rtx i_pat, d_pat;
11560 /* When generating Thumb-1 code, we want to place flag-setting operations
11561 close to a conditional branch which depends on them, so that we can
11562 omit the comparison. */
11563 if (TARGET_THUMB1
11564 && REG_NOTE_KIND (link) == 0
11565 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11566 && recog_memoized (dep) >= 0
11567 && get_attr_conds (dep) == CONDS_SET)
11568 return 0;
11570 if (current_tune->sched_adjust_cost != NULL)
11572 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11573 return cost;
11576 /* XXX Is this strictly true? */
11577 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11578 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11579 return 0;
11581 /* Call insns don't incur a stall, even if they follow a load. */
11582 if (REG_NOTE_KIND (link) == 0
11583 && CALL_P (insn))
11584 return 1;
11586 if ((i_pat = single_set (insn)) != NULL
11587 && MEM_P (SET_SRC (i_pat))
11588 && (d_pat = single_set (dep)) != NULL
11589 && MEM_P (SET_DEST (d_pat)))
11591 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11592 /* This is a load after a store, there is no conflict if the load reads
11593 from a cached area. Assume that loads from the stack, and from the
11594 constant pool are cached, and that others will miss. This is a
11595 hack. */
11597 if ((GET_CODE (src_mem) == SYMBOL_REF
11598 && CONSTANT_POOL_ADDRESS_P (src_mem))
11599 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11600 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11601 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11602 return 1;
11605 return cost;
11609 arm_max_conditional_execute (void)
11611 return max_insns_skipped;
11614 static int
11615 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11617 if (TARGET_32BIT)
11618 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11619 else
11620 return (optimize > 0) ? 2 : 0;
11623 static int
11624 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11626 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11629 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11630 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11631 sequences of non-executed instructions in IT blocks probably take the same
11632 amount of time as executed instructions (and the IT instruction itself takes
11633 space in icache). This function was experimentally determined to give good
11634 results on a popular embedded benchmark. */
11636 static int
11637 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11639 return (TARGET_32BIT && speed_p) ? 1
11640 : arm_default_branch_cost (speed_p, predictable_p);
11643 static bool fp_consts_inited = false;
11645 static REAL_VALUE_TYPE value_fp0;
11647 static void
11648 init_fp_table (void)
11650 REAL_VALUE_TYPE r;
11652 r = REAL_VALUE_ATOF ("0", DFmode);
11653 value_fp0 = r;
11654 fp_consts_inited = true;
11657 /* Return TRUE if rtx X is a valid immediate FP constant. */
11659 arm_const_double_rtx (rtx x)
11661 REAL_VALUE_TYPE r;
11663 if (!fp_consts_inited)
11664 init_fp_table ();
11666 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11667 if (REAL_VALUE_MINUS_ZERO (r))
11668 return 0;
11670 if (REAL_VALUES_EQUAL (r, value_fp0))
11671 return 1;
11673 return 0;
11676 /* VFPv3 has a fairly wide range of representable immediates, formed from
11677 "quarter-precision" floating-point values. These can be evaluated using this
11678 formula (with ^ for exponentiation):
11680 -1^s * n * 2^-r
11682 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11683 16 <= n <= 31 and 0 <= r <= 7.
11685 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11687 - A (most-significant) is the sign bit.
11688 - BCD are the exponent (encoded as r XOR 3).
11689 - EFGH are the mantissa (encoded as n - 16).
11692 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11693 fconst[sd] instruction, or -1 if X isn't suitable. */
11694 static int
11695 vfp3_const_double_index (rtx x)
11697 REAL_VALUE_TYPE r, m;
11698 int sign, exponent;
11699 unsigned HOST_WIDE_INT mantissa, mant_hi;
11700 unsigned HOST_WIDE_INT mask;
11701 HOST_WIDE_INT m1, m2;
11702 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11704 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11705 return -1;
11707 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11709 /* We can't represent these things, so detect them first. */
11710 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11711 return -1;
11713 /* Extract sign, exponent and mantissa. */
11714 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11715 r = real_value_abs (&r);
11716 exponent = REAL_EXP (&r);
11717 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11718 highest (sign) bit, with a fixed binary point at bit point_pos.
11719 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11720 bits for the mantissa, this may fail (low bits would be lost). */
11721 real_ldexp (&m, &r, point_pos - exponent);
11722 REAL_VALUE_TO_INT (&m1, &m2, m);
11723 mantissa = m1;
11724 mant_hi = m2;
11726 /* If there are bits set in the low part of the mantissa, we can't
11727 represent this value. */
11728 if (mantissa != 0)
11729 return -1;
11731 /* Now make it so that mantissa contains the most-significant bits, and move
11732 the point_pos to indicate that the least-significant bits have been
11733 discarded. */
11734 point_pos -= HOST_BITS_PER_WIDE_INT;
11735 mantissa = mant_hi;
11737 /* We can permit four significant bits of mantissa only, plus a high bit
11738 which is always 1. */
11739 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11740 if ((mantissa & mask) != 0)
11741 return -1;
11743 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11744 mantissa >>= point_pos - 5;
11746 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11747 floating-point immediate zero with Neon using an integer-zero load, but
11748 that case is handled elsewhere.) */
11749 if (mantissa == 0)
11750 return -1;
11752 gcc_assert (mantissa >= 16 && mantissa <= 31);
11754 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11755 normalized significands are in the range [1, 2). (Our mantissa is shifted
11756 left 4 places at this point relative to normalized IEEE754 values). GCC
11757 internally uses [0.5, 1) (see real.c), so the exponent returned from
11758 REAL_EXP must be altered. */
11759 exponent = 5 - exponent;
11761 if (exponent < 0 || exponent > 7)
11762 return -1;
11764 /* Sign, mantissa and exponent are now in the correct form to plug into the
11765 formula described in the comment above. */
11766 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11769 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11771 vfp3_const_double_rtx (rtx x)
11773 if (!TARGET_VFP3)
11774 return 0;
11776 return vfp3_const_double_index (x) != -1;
11779 /* Recognize immediates which can be used in various Neon instructions. Legal
11780 immediates are described by the following table (for VMVN variants, the
11781 bitwise inverse of the constant shown is recognized. In either case, VMOV
11782 is output and the correct instruction to use for a given constant is chosen
11783 by the assembler). The constant shown is replicated across all elements of
11784 the destination vector.
11786 insn elems variant constant (binary)
11787 ---- ----- ------- -----------------
11788 vmov i32 0 00000000 00000000 00000000 abcdefgh
11789 vmov i32 1 00000000 00000000 abcdefgh 00000000
11790 vmov i32 2 00000000 abcdefgh 00000000 00000000
11791 vmov i32 3 abcdefgh 00000000 00000000 00000000
11792 vmov i16 4 00000000 abcdefgh
11793 vmov i16 5 abcdefgh 00000000
11794 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11795 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11796 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11797 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11798 vmvn i16 10 00000000 abcdefgh
11799 vmvn i16 11 abcdefgh 00000000
11800 vmov i32 12 00000000 00000000 abcdefgh 11111111
11801 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11802 vmov i32 14 00000000 abcdefgh 11111111 11111111
11803 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11804 vmov i8 16 abcdefgh
11805 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11806 eeeeeeee ffffffff gggggggg hhhhhhhh
11807 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11808 vmov f32 19 00000000 00000000 00000000 00000000
11810 For case 18, B = !b. Representable values are exactly those accepted by
11811 vfp3_const_double_index, but are output as floating-point numbers rather
11812 than indices.
11814 For case 19, we will change it to vmov.i32 when assembling.
11816 Variants 0-5 (inclusive) may also be used as immediates for the second
11817 operand of VORR/VBIC instructions.
11819 The INVERSE argument causes the bitwise inverse of the given operand to be
11820 recognized instead (used for recognizing legal immediates for the VAND/VORN
11821 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11822 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11823 output, rather than the real insns vbic/vorr).
11825 INVERSE makes no difference to the recognition of float vectors.
11827 The return value is the variant of immediate as shown in the above table, or
11828 -1 if the given value doesn't match any of the listed patterns.
11830 static int
11831 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11832 rtx *modconst, int *elementwidth)
11834 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11835 matches = 1; \
11836 for (i = 0; i < idx; i += (STRIDE)) \
11837 if (!(TEST)) \
11838 matches = 0; \
11839 if (matches) \
11841 immtype = (CLASS); \
11842 elsize = (ELSIZE); \
11843 break; \
11846 unsigned int i, elsize = 0, idx = 0, n_elts;
11847 unsigned int innersize;
11848 unsigned char bytes[16];
11849 int immtype = -1, matches;
11850 unsigned int invmask = inverse ? 0xff : 0;
11851 bool vector = GET_CODE (op) == CONST_VECTOR;
11853 if (vector)
11855 n_elts = CONST_VECTOR_NUNITS (op);
11856 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11858 else
11860 n_elts = 1;
11861 if (mode == VOIDmode)
11862 mode = DImode;
11863 innersize = GET_MODE_SIZE (mode);
11866 /* Vectors of float constants. */
11867 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11869 rtx el0 = CONST_VECTOR_ELT (op, 0);
11870 REAL_VALUE_TYPE r0;
11872 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11873 return -1;
11875 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11877 for (i = 1; i < n_elts; i++)
11879 rtx elt = CONST_VECTOR_ELT (op, i);
11880 REAL_VALUE_TYPE re;
11882 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11884 if (!REAL_VALUES_EQUAL (r0, re))
11885 return -1;
11888 if (modconst)
11889 *modconst = CONST_VECTOR_ELT (op, 0);
11891 if (elementwidth)
11892 *elementwidth = 0;
11894 if (el0 == CONST0_RTX (GET_MODE (el0)))
11895 return 19;
11896 else
11897 return 18;
11900 /* Splat vector constant out into a byte vector. */
11901 for (i = 0; i < n_elts; i++)
11903 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11904 unsigned HOST_WIDE_INT elpart;
11905 unsigned int part, parts;
11907 if (CONST_INT_P (el))
11909 elpart = INTVAL (el);
11910 parts = 1;
11912 else if (CONST_DOUBLE_P (el))
11914 elpart = CONST_DOUBLE_LOW (el);
11915 parts = 2;
11917 else
11918 gcc_unreachable ();
11920 for (part = 0; part < parts; part++)
11922 unsigned int byte;
11923 for (byte = 0; byte < innersize; byte++)
11925 bytes[idx++] = (elpart & 0xff) ^ invmask;
11926 elpart >>= BITS_PER_UNIT;
11928 if (CONST_DOUBLE_P (el))
11929 elpart = CONST_DOUBLE_HIGH (el);
11933 /* Sanity check. */
11934 gcc_assert (idx == GET_MODE_SIZE (mode));
11938 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11939 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11941 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11942 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11944 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11945 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11947 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11948 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11950 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11952 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11954 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11955 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11957 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11958 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11960 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11961 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11963 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11964 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11966 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11968 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11970 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11971 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11973 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11974 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11976 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11977 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11979 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11980 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11982 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11984 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11985 && bytes[i] == bytes[(i + 8) % idx]);
11987 while (0);
11989 if (immtype == -1)
11990 return -1;
11992 if (elementwidth)
11993 *elementwidth = elsize;
11995 if (modconst)
11997 unsigned HOST_WIDE_INT imm = 0;
11999 /* Un-invert bytes of recognized vector, if necessary. */
12000 if (invmask != 0)
12001 for (i = 0; i < idx; i++)
12002 bytes[i] ^= invmask;
12004 if (immtype == 17)
12006 /* FIXME: Broken on 32-bit H_W_I hosts. */
12007 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12009 for (i = 0; i < 8; i++)
12010 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12011 << (i * BITS_PER_UNIT);
12013 *modconst = GEN_INT (imm);
12015 else
12017 unsigned HOST_WIDE_INT imm = 0;
12019 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12020 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12022 *modconst = GEN_INT (imm);
12026 return immtype;
12027 #undef CHECK
12030 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12031 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12032 float elements), and a modified constant (whatever should be output for a
12033 VMOV) in *MODCONST. */
12036 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12037 rtx *modconst, int *elementwidth)
12039 rtx tmpconst;
12040 int tmpwidth;
12041 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12043 if (retval == -1)
12044 return 0;
12046 if (modconst)
12047 *modconst = tmpconst;
12049 if (elementwidth)
12050 *elementwidth = tmpwidth;
12052 return 1;
12055 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12056 the immediate is valid, write a constant suitable for using as an operand
12057 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12058 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12061 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12062 rtx *modconst, int *elementwidth)
12064 rtx tmpconst;
12065 int tmpwidth;
12066 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12068 if (retval < 0 || retval > 5)
12069 return 0;
12071 if (modconst)
12072 *modconst = tmpconst;
12074 if (elementwidth)
12075 *elementwidth = tmpwidth;
12077 return 1;
12080 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12081 the immediate is valid, write a constant suitable for using as an operand
12082 to VSHR/VSHL to *MODCONST and the corresponding element width to
12083 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12084 because they have different limitations. */
12087 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12088 rtx *modconst, int *elementwidth,
12089 bool isleftshift)
12091 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12092 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12093 unsigned HOST_WIDE_INT last_elt = 0;
12094 unsigned HOST_WIDE_INT maxshift;
12096 /* Split vector constant out into a byte vector. */
12097 for (i = 0; i < n_elts; i++)
12099 rtx el = CONST_VECTOR_ELT (op, i);
12100 unsigned HOST_WIDE_INT elpart;
12102 if (CONST_INT_P (el))
12103 elpart = INTVAL (el);
12104 else if (CONST_DOUBLE_P (el))
12105 return 0;
12106 else
12107 gcc_unreachable ();
12109 if (i != 0 && elpart != last_elt)
12110 return 0;
12112 last_elt = elpart;
12115 /* Shift less than element size. */
12116 maxshift = innersize * 8;
12118 if (isleftshift)
12120 /* Left shift immediate value can be from 0 to <size>-1. */
12121 if (last_elt >= maxshift)
12122 return 0;
12124 else
12126 /* Right shift immediate value can be from 1 to <size>. */
12127 if (last_elt == 0 || last_elt > maxshift)
12128 return 0;
12131 if (elementwidth)
12132 *elementwidth = innersize * 8;
12134 if (modconst)
12135 *modconst = CONST_VECTOR_ELT (op, 0);
12137 return 1;
12140 /* Return a string suitable for output of Neon immediate logic operation
12141 MNEM. */
12143 char *
12144 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12145 int inverse, int quad)
12147 int width, is_valid;
12148 static char templ[40];
12150 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12152 gcc_assert (is_valid != 0);
12154 if (quad)
12155 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12156 else
12157 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12159 return templ;
12162 /* Return a string suitable for output of Neon immediate shift operation
12163 (VSHR or VSHL) MNEM. */
12165 char *
12166 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12167 enum machine_mode mode, int quad,
12168 bool isleftshift)
12170 int width, is_valid;
12171 static char templ[40];
12173 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12174 gcc_assert (is_valid != 0);
12176 if (quad)
12177 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12178 else
12179 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12181 return templ;
12184 /* Output a sequence of pairwise operations to implement a reduction.
12185 NOTE: We do "too much work" here, because pairwise operations work on two
12186 registers-worth of operands in one go. Unfortunately we can't exploit those
12187 extra calculations to do the full operation in fewer steps, I don't think.
12188 Although all vector elements of the result but the first are ignored, we
12189 actually calculate the same result in each of the elements. An alternative
12190 such as initially loading a vector with zero to use as each of the second
12191 operands would use up an additional register and take an extra instruction,
12192 for no particular gain. */
12194 void
12195 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12196 rtx (*reduc) (rtx, rtx, rtx))
12198 enum machine_mode inner = GET_MODE_INNER (mode);
12199 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12200 rtx tmpsum = op1;
12202 for (i = parts / 2; i >= 1; i /= 2)
12204 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12205 emit_insn (reduc (dest, tmpsum, tmpsum));
12206 tmpsum = dest;
12210 /* If VALS is a vector constant that can be loaded into a register
12211 using VDUP, generate instructions to do so and return an RTX to
12212 assign to the register. Otherwise return NULL_RTX. */
12214 static rtx
12215 neon_vdup_constant (rtx vals)
12217 enum machine_mode mode = GET_MODE (vals);
12218 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12219 int n_elts = GET_MODE_NUNITS (mode);
12220 bool all_same = true;
12221 rtx x;
12222 int i;
12224 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12225 return NULL_RTX;
12227 for (i = 0; i < n_elts; ++i)
12229 x = XVECEXP (vals, 0, i);
12230 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12231 all_same = false;
12234 if (!all_same)
12235 /* The elements are not all the same. We could handle repeating
12236 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12237 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12238 vdup.i16). */
12239 return NULL_RTX;
12241 /* We can load this constant by using VDUP and a constant in a
12242 single ARM register. This will be cheaper than a vector
12243 load. */
12245 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12246 return gen_rtx_VEC_DUPLICATE (mode, x);
12249 /* Generate code to load VALS, which is a PARALLEL containing only
12250 constants (for vec_init) or CONST_VECTOR, efficiently into a
12251 register. Returns an RTX to copy into the register, or NULL_RTX
12252 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12255 neon_make_constant (rtx vals)
12257 enum machine_mode mode = GET_MODE (vals);
12258 rtx target;
12259 rtx const_vec = NULL_RTX;
12260 int n_elts = GET_MODE_NUNITS (mode);
12261 int n_const = 0;
12262 int i;
12264 if (GET_CODE (vals) == CONST_VECTOR)
12265 const_vec = vals;
12266 else if (GET_CODE (vals) == PARALLEL)
12268 /* A CONST_VECTOR must contain only CONST_INTs and
12269 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12270 Only store valid constants in a CONST_VECTOR. */
12271 for (i = 0; i < n_elts; ++i)
12273 rtx x = XVECEXP (vals, 0, i);
12274 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12275 n_const++;
12277 if (n_const == n_elts)
12278 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12280 else
12281 gcc_unreachable ();
12283 if (const_vec != NULL
12284 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12285 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12286 return const_vec;
12287 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12288 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12289 pipeline cycle; creating the constant takes one or two ARM
12290 pipeline cycles. */
12291 return target;
12292 else if (const_vec != NULL_RTX)
12293 /* Load from constant pool. On Cortex-A8 this takes two cycles
12294 (for either double or quad vectors). We can not take advantage
12295 of single-cycle VLD1 because we need a PC-relative addressing
12296 mode. */
12297 return const_vec;
12298 else
12299 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12300 We can not construct an initializer. */
12301 return NULL_RTX;
12304 /* Initialize vector TARGET to VALS. */
12306 void
12307 neon_expand_vector_init (rtx target, rtx vals)
12309 enum machine_mode mode = GET_MODE (target);
12310 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12311 int n_elts = GET_MODE_NUNITS (mode);
12312 int n_var = 0, one_var = -1;
12313 bool all_same = true;
12314 rtx x, mem;
12315 int i;
12317 for (i = 0; i < n_elts; ++i)
12319 x = XVECEXP (vals, 0, i);
12320 if (!CONSTANT_P (x))
12321 ++n_var, one_var = i;
12323 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12324 all_same = false;
12327 if (n_var == 0)
12329 rtx constant = neon_make_constant (vals);
12330 if (constant != NULL_RTX)
12332 emit_move_insn (target, constant);
12333 return;
12337 /* Splat a single non-constant element if we can. */
12338 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12340 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12341 emit_insn (gen_rtx_SET (VOIDmode, target,
12342 gen_rtx_VEC_DUPLICATE (mode, x)));
12343 return;
12346 /* One field is non-constant. Load constant then overwrite varying
12347 field. This is more efficient than using the stack. */
12348 if (n_var == 1)
12350 rtx copy = copy_rtx (vals);
12351 rtx index = GEN_INT (one_var);
12353 /* Load constant part of vector, substitute neighboring value for
12354 varying element. */
12355 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12356 neon_expand_vector_init (target, copy);
12358 /* Insert variable. */
12359 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12360 switch (mode)
12362 case V8QImode:
12363 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12364 break;
12365 case V16QImode:
12366 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12367 break;
12368 case V4HImode:
12369 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12370 break;
12371 case V8HImode:
12372 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12373 break;
12374 case V2SImode:
12375 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12376 break;
12377 case V4SImode:
12378 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12379 break;
12380 case V2SFmode:
12381 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12382 break;
12383 case V4SFmode:
12384 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12385 break;
12386 case V2DImode:
12387 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12388 break;
12389 default:
12390 gcc_unreachable ();
12392 return;
12395 /* Construct the vector in memory one field at a time
12396 and load the whole vector. */
12397 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12398 for (i = 0; i < n_elts; i++)
12399 emit_move_insn (adjust_address_nv (mem, inner_mode,
12400 i * GET_MODE_SIZE (inner_mode)),
12401 XVECEXP (vals, 0, i));
12402 emit_move_insn (target, mem);
12405 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12406 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12407 reported source locations are bogus. */
12409 static void
12410 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12411 const char *err)
12413 HOST_WIDE_INT lane;
12415 gcc_assert (CONST_INT_P (operand));
12417 lane = INTVAL (operand);
12419 if (lane < low || lane >= high)
12420 error (err);
12423 /* Bounds-check lanes. */
12425 void
12426 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12428 bounds_check (operand, low, high, "lane out of range");
12431 /* Bounds-check constants. */
12433 void
12434 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12436 bounds_check (operand, low, high, "constant out of range");
12439 HOST_WIDE_INT
12440 neon_element_bits (enum machine_mode mode)
12442 if (mode == DImode)
12443 return GET_MODE_BITSIZE (mode);
12444 else
12445 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12449 /* Predicates for `match_operand' and `match_operator'. */
12451 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12452 WB is true if full writeback address modes are allowed and is false
12453 if limited writeback address modes (POST_INC and PRE_DEC) are
12454 allowed. */
12457 arm_coproc_mem_operand (rtx op, bool wb)
12459 rtx ind;
12461 /* Reject eliminable registers. */
12462 if (! (reload_in_progress || reload_completed || lra_in_progress)
12463 && ( reg_mentioned_p (frame_pointer_rtx, op)
12464 || reg_mentioned_p (arg_pointer_rtx, op)
12465 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12466 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12467 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12468 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12469 return FALSE;
12471 /* Constants are converted into offsets from labels. */
12472 if (!MEM_P (op))
12473 return FALSE;
12475 ind = XEXP (op, 0);
12477 if (reload_completed
12478 && (GET_CODE (ind) == LABEL_REF
12479 || (GET_CODE (ind) == CONST
12480 && GET_CODE (XEXP (ind, 0)) == PLUS
12481 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12482 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12483 return TRUE;
12485 /* Match: (mem (reg)). */
12486 if (REG_P (ind))
12487 return arm_address_register_rtx_p (ind, 0);
12489 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12490 acceptable in any case (subject to verification by
12491 arm_address_register_rtx_p). We need WB to be true to accept
12492 PRE_INC and POST_DEC. */
12493 if (GET_CODE (ind) == POST_INC
12494 || GET_CODE (ind) == PRE_DEC
12495 || (wb
12496 && (GET_CODE (ind) == PRE_INC
12497 || GET_CODE (ind) == POST_DEC)))
12498 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12500 if (wb
12501 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12502 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12503 && GET_CODE (XEXP (ind, 1)) == PLUS
12504 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12505 ind = XEXP (ind, 1);
12507 /* Match:
12508 (plus (reg)
12509 (const)). */
12510 if (GET_CODE (ind) == PLUS
12511 && REG_P (XEXP (ind, 0))
12512 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12513 && CONST_INT_P (XEXP (ind, 1))
12514 && INTVAL (XEXP (ind, 1)) > -1024
12515 && INTVAL (XEXP (ind, 1)) < 1024
12516 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12517 return TRUE;
12519 return FALSE;
12522 /* Return TRUE if OP is a memory operand which we can load or store a vector
12523 to/from. TYPE is one of the following values:
12524 0 - Vector load/stor (vldr)
12525 1 - Core registers (ldm)
12526 2 - Element/structure loads (vld1)
12529 neon_vector_mem_operand (rtx op, int type, bool strict)
12531 rtx ind;
12533 /* Reject eliminable registers. */
12534 if (! (reload_in_progress || reload_completed)
12535 && ( reg_mentioned_p (frame_pointer_rtx, op)
12536 || reg_mentioned_p (arg_pointer_rtx, op)
12537 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12538 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12539 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12540 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12541 return !strict;
12543 /* Constants are converted into offsets from labels. */
12544 if (!MEM_P (op))
12545 return FALSE;
12547 ind = XEXP (op, 0);
12549 if (reload_completed
12550 && (GET_CODE (ind) == LABEL_REF
12551 || (GET_CODE (ind) == CONST
12552 && GET_CODE (XEXP (ind, 0)) == PLUS
12553 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12554 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12555 return TRUE;
12557 /* Match: (mem (reg)). */
12558 if (REG_P (ind))
12559 return arm_address_register_rtx_p (ind, 0);
12561 /* Allow post-increment with Neon registers. */
12562 if ((type != 1 && GET_CODE (ind) == POST_INC)
12563 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12564 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12566 /* FIXME: vld1 allows register post-modify. */
12568 /* Match:
12569 (plus (reg)
12570 (const)). */
12571 if (type == 0
12572 && GET_CODE (ind) == PLUS
12573 && REG_P (XEXP (ind, 0))
12574 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12575 && CONST_INT_P (XEXP (ind, 1))
12576 && INTVAL (XEXP (ind, 1)) > -1024
12577 /* For quad modes, we restrict the constant offset to be slightly less
12578 than what the instruction format permits. We have no such constraint
12579 on double mode offsets. (This must match arm_legitimate_index_p.) */
12580 && (INTVAL (XEXP (ind, 1))
12581 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12582 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12583 return TRUE;
12585 return FALSE;
12588 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12589 type. */
12591 neon_struct_mem_operand (rtx op)
12593 rtx ind;
12595 /* Reject eliminable registers. */
12596 if (! (reload_in_progress || reload_completed)
12597 && ( reg_mentioned_p (frame_pointer_rtx, op)
12598 || reg_mentioned_p (arg_pointer_rtx, op)
12599 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12600 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12601 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12602 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12603 return FALSE;
12605 /* Constants are converted into offsets from labels. */
12606 if (!MEM_P (op))
12607 return FALSE;
12609 ind = XEXP (op, 0);
12611 if (reload_completed
12612 && (GET_CODE (ind) == LABEL_REF
12613 || (GET_CODE (ind) == CONST
12614 && GET_CODE (XEXP (ind, 0)) == PLUS
12615 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12616 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12617 return TRUE;
12619 /* Match: (mem (reg)). */
12620 if (REG_P (ind))
12621 return arm_address_register_rtx_p (ind, 0);
12623 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12624 if (GET_CODE (ind) == POST_INC
12625 || GET_CODE (ind) == PRE_DEC)
12626 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12628 return FALSE;
12631 /* Return true if X is a register that will be eliminated later on. */
12633 arm_eliminable_register (rtx x)
12635 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12636 || REGNO (x) == ARG_POINTER_REGNUM
12637 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12638 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12641 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12642 coprocessor registers. Otherwise return NO_REGS. */
12644 enum reg_class
12645 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12647 if (mode == HFmode)
12649 if (!TARGET_NEON_FP16)
12650 return GENERAL_REGS;
12651 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12652 return NO_REGS;
12653 return GENERAL_REGS;
12656 /* The neon move patterns handle all legitimate vector and struct
12657 addresses. */
12658 if (TARGET_NEON
12659 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12660 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12661 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12662 || VALID_NEON_STRUCT_MODE (mode)))
12663 return NO_REGS;
12665 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12666 return NO_REGS;
12668 return GENERAL_REGS;
12671 /* Values which must be returned in the most-significant end of the return
12672 register. */
12674 static bool
12675 arm_return_in_msb (const_tree valtype)
12677 return (TARGET_AAPCS_BASED
12678 && BYTES_BIG_ENDIAN
12679 && (AGGREGATE_TYPE_P (valtype)
12680 || TREE_CODE (valtype) == COMPLEX_TYPE
12681 || FIXED_POINT_TYPE_P (valtype)));
12684 /* Return TRUE if X references a SYMBOL_REF. */
12686 symbol_mentioned_p (rtx x)
12688 const char * fmt;
12689 int i;
12691 if (GET_CODE (x) == SYMBOL_REF)
12692 return 1;
12694 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12695 are constant offsets, not symbols. */
12696 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12697 return 0;
12699 fmt = GET_RTX_FORMAT (GET_CODE (x));
12701 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12703 if (fmt[i] == 'E')
12705 int j;
12707 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12708 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12709 return 1;
12711 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12712 return 1;
12715 return 0;
12718 /* Return TRUE if X references a LABEL_REF. */
12720 label_mentioned_p (rtx x)
12722 const char * fmt;
12723 int i;
12725 if (GET_CODE (x) == LABEL_REF)
12726 return 1;
12728 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12729 instruction, but they are constant offsets, not symbols. */
12730 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12731 return 0;
12733 fmt = GET_RTX_FORMAT (GET_CODE (x));
12734 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12736 if (fmt[i] == 'E')
12738 int j;
12740 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12741 if (label_mentioned_p (XVECEXP (x, i, j)))
12742 return 1;
12744 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12745 return 1;
12748 return 0;
12752 tls_mentioned_p (rtx x)
12754 switch (GET_CODE (x))
12756 case CONST:
12757 return tls_mentioned_p (XEXP (x, 0));
12759 case UNSPEC:
12760 if (XINT (x, 1) == UNSPEC_TLS)
12761 return 1;
12763 default:
12764 return 0;
12768 /* Must not copy any rtx that uses a pc-relative address. */
12770 static int
12771 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12773 if (GET_CODE (*x) == UNSPEC
12774 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12775 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12776 return 1;
12777 return 0;
12780 static bool
12781 arm_cannot_copy_insn_p (rtx insn)
12783 /* The tls call insn cannot be copied, as it is paired with a data
12784 word. */
12785 if (recog_memoized (insn) == CODE_FOR_tlscall)
12786 return true;
12788 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12791 enum rtx_code
12792 minmax_code (rtx x)
12794 enum rtx_code code = GET_CODE (x);
12796 switch (code)
12798 case SMAX:
12799 return GE;
12800 case SMIN:
12801 return LE;
12802 case UMIN:
12803 return LEU;
12804 case UMAX:
12805 return GEU;
12806 default:
12807 gcc_unreachable ();
12811 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12813 bool
12814 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12815 int *mask, bool *signed_sat)
12817 /* The high bound must be a power of two minus one. */
12818 int log = exact_log2 (INTVAL (hi_bound) + 1);
12819 if (log == -1)
12820 return false;
12822 /* The low bound is either zero (for usat) or one less than the
12823 negation of the high bound (for ssat). */
12824 if (INTVAL (lo_bound) == 0)
12826 if (mask)
12827 *mask = log;
12828 if (signed_sat)
12829 *signed_sat = false;
12831 return true;
12834 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12836 if (mask)
12837 *mask = log + 1;
12838 if (signed_sat)
12839 *signed_sat = true;
12841 return true;
12844 return false;
12847 /* Return 1 if memory locations are adjacent. */
12849 adjacent_mem_locations (rtx a, rtx b)
12851 /* We don't guarantee to preserve the order of these memory refs. */
12852 if (volatile_refs_p (a) || volatile_refs_p (b))
12853 return 0;
12855 if ((REG_P (XEXP (a, 0))
12856 || (GET_CODE (XEXP (a, 0)) == PLUS
12857 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12858 && (REG_P (XEXP (b, 0))
12859 || (GET_CODE (XEXP (b, 0)) == PLUS
12860 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12862 HOST_WIDE_INT val0 = 0, val1 = 0;
12863 rtx reg0, reg1;
12864 int val_diff;
12866 if (GET_CODE (XEXP (a, 0)) == PLUS)
12868 reg0 = XEXP (XEXP (a, 0), 0);
12869 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12871 else
12872 reg0 = XEXP (a, 0);
12874 if (GET_CODE (XEXP (b, 0)) == PLUS)
12876 reg1 = XEXP (XEXP (b, 0), 0);
12877 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12879 else
12880 reg1 = XEXP (b, 0);
12882 /* Don't accept any offset that will require multiple
12883 instructions to handle, since this would cause the
12884 arith_adjacentmem pattern to output an overlong sequence. */
12885 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12886 return 0;
12888 /* Don't allow an eliminable register: register elimination can make
12889 the offset too large. */
12890 if (arm_eliminable_register (reg0))
12891 return 0;
12893 val_diff = val1 - val0;
12895 if (arm_ld_sched)
12897 /* If the target has load delay slots, then there's no benefit
12898 to using an ldm instruction unless the offset is zero and
12899 we are optimizing for size. */
12900 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12901 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12902 && (val_diff == 4 || val_diff == -4));
12905 return ((REGNO (reg0) == REGNO (reg1))
12906 && (val_diff == 4 || val_diff == -4));
12909 return 0;
12912 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12913 for load operations, false for store operations. CONSECUTIVE is true
12914 if the register numbers in the operation must be consecutive in the register
12915 bank. RETURN_PC is true if value is to be loaded in PC.
12916 The pattern we are trying to match for load is:
12917 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12918 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12921 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12923 where
12924 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12925 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12926 3. If consecutive is TRUE, then for kth register being loaded,
12927 REGNO (R_dk) = REGNO (R_d0) + k.
12928 The pattern for store is similar. */
12929 bool
12930 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12931 bool consecutive, bool return_pc)
12933 HOST_WIDE_INT count = XVECLEN (op, 0);
12934 rtx reg, mem, addr;
12935 unsigned regno;
12936 unsigned first_regno;
12937 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12938 rtx elt;
12939 bool addr_reg_in_reglist = false;
12940 bool update = false;
12941 int reg_increment;
12942 int offset_adj;
12943 int regs_per_val;
12945 /* If not in SImode, then registers must be consecutive
12946 (e.g., VLDM instructions for DFmode). */
12947 gcc_assert ((mode == SImode) || consecutive);
12948 /* Setting return_pc for stores is illegal. */
12949 gcc_assert (!return_pc || load);
12951 /* Set up the increments and the regs per val based on the mode. */
12952 reg_increment = GET_MODE_SIZE (mode);
12953 regs_per_val = reg_increment / 4;
12954 offset_adj = return_pc ? 1 : 0;
12956 if (count <= 1
12957 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12958 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12959 return false;
12961 /* Check if this is a write-back. */
12962 elt = XVECEXP (op, 0, offset_adj);
12963 if (GET_CODE (SET_SRC (elt)) == PLUS)
12965 i++;
12966 base = 1;
12967 update = true;
12969 /* The offset adjustment must be the number of registers being
12970 popped times the size of a single register. */
12971 if (!REG_P (SET_DEST (elt))
12972 || !REG_P (XEXP (SET_SRC (elt), 0))
12973 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12974 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12975 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12976 ((count - 1 - offset_adj) * reg_increment))
12977 return false;
12980 i = i + offset_adj;
12981 base = base + offset_adj;
12982 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12983 success depends on the type: VLDM can do just one reg,
12984 LDM must do at least two. */
12985 if ((count <= i) && (mode == SImode))
12986 return false;
12988 elt = XVECEXP (op, 0, i - 1);
12989 if (GET_CODE (elt) != SET)
12990 return false;
12992 if (load)
12994 reg = SET_DEST (elt);
12995 mem = SET_SRC (elt);
12997 else
12999 reg = SET_SRC (elt);
13000 mem = SET_DEST (elt);
13003 if (!REG_P (reg) || !MEM_P (mem))
13004 return false;
13006 regno = REGNO (reg);
13007 first_regno = regno;
13008 addr = XEXP (mem, 0);
13009 if (GET_CODE (addr) == PLUS)
13011 if (!CONST_INT_P (XEXP (addr, 1)))
13012 return false;
13014 offset = INTVAL (XEXP (addr, 1));
13015 addr = XEXP (addr, 0);
13018 if (!REG_P (addr))
13019 return false;
13021 /* Don't allow SP to be loaded unless it is also the base register. It
13022 guarantees that SP is reset correctly when an LDM instruction
13023 is interrupted. Otherwise, we might end up with a corrupt stack. */
13024 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13025 return false;
13027 for (; i < count; i++)
13029 elt = XVECEXP (op, 0, i);
13030 if (GET_CODE (elt) != SET)
13031 return false;
13033 if (load)
13035 reg = SET_DEST (elt);
13036 mem = SET_SRC (elt);
13038 else
13040 reg = SET_SRC (elt);
13041 mem = SET_DEST (elt);
13044 if (!REG_P (reg)
13045 || GET_MODE (reg) != mode
13046 || REGNO (reg) <= regno
13047 || (consecutive
13048 && (REGNO (reg) !=
13049 (unsigned int) (first_regno + regs_per_val * (i - base))))
13050 /* Don't allow SP to be loaded unless it is also the base register. It
13051 guarantees that SP is reset correctly when an LDM instruction
13052 is interrupted. Otherwise, we might end up with a corrupt stack. */
13053 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13054 || !MEM_P (mem)
13055 || GET_MODE (mem) != mode
13056 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13057 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13058 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13059 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13060 offset + (i - base) * reg_increment))
13061 && (!REG_P (XEXP (mem, 0))
13062 || offset + (i - base) * reg_increment != 0)))
13063 return false;
13065 regno = REGNO (reg);
13066 if (regno == REGNO (addr))
13067 addr_reg_in_reglist = true;
13070 if (load)
13072 if (update && addr_reg_in_reglist)
13073 return false;
13075 /* For Thumb-1, address register is always modified - either by write-back
13076 or by explicit load. If the pattern does not describe an update,
13077 then the address register must be in the list of loaded registers. */
13078 if (TARGET_THUMB1)
13079 return update || addr_reg_in_reglist;
13082 return true;
13085 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13086 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13087 instruction. ADD_OFFSET is nonzero if the base address register needs
13088 to be modified with an add instruction before we can use it. */
13090 static bool
13091 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13092 int nops, HOST_WIDE_INT add_offset)
13094 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13095 if the offset isn't small enough. The reason 2 ldrs are faster
13096 is because these ARMs are able to do more than one cache access
13097 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13098 whilst the ARM8 has a double bandwidth cache. This means that
13099 these cores can do both an instruction fetch and a data fetch in
13100 a single cycle, so the trick of calculating the address into a
13101 scratch register (one of the result regs) and then doing a load
13102 multiple actually becomes slower (and no smaller in code size).
13103 That is the transformation
13105 ldr rd1, [rbase + offset]
13106 ldr rd2, [rbase + offset + 4]
13110 add rd1, rbase, offset
13111 ldmia rd1, {rd1, rd2}
13113 produces worse code -- '3 cycles + any stalls on rd2' instead of
13114 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13115 access per cycle, the first sequence could never complete in less
13116 than 6 cycles, whereas the ldm sequence would only take 5 and
13117 would make better use of sequential accesses if not hitting the
13118 cache.
13120 We cheat here and test 'arm_ld_sched' which we currently know to
13121 only be true for the ARM8, ARM9 and StrongARM. If this ever
13122 changes, then the test below needs to be reworked. */
13123 if (nops == 2 && arm_ld_sched && add_offset != 0)
13124 return false;
13126 /* XScale has load-store double instructions, but they have stricter
13127 alignment requirements than load-store multiple, so we cannot
13128 use them.
13130 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13131 the pipeline until completion.
13133 NREGS CYCLES
13139 An ldr instruction takes 1-3 cycles, but does not block the
13140 pipeline.
13142 NREGS CYCLES
13143 1 1-3
13144 2 2-6
13145 3 3-9
13146 4 4-12
13148 Best case ldr will always win. However, the more ldr instructions
13149 we issue, the less likely we are to be able to schedule them well.
13150 Using ldr instructions also increases code size.
13152 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13153 for counts of 3 or 4 regs. */
13154 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13155 return false;
13156 return true;
13159 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13160 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13161 an array ORDER which describes the sequence to use when accessing the
13162 offsets that produces an ascending order. In this sequence, each
13163 offset must be larger by exactly 4 than the previous one. ORDER[0]
13164 must have been filled in with the lowest offset by the caller.
13165 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13166 we use to verify that ORDER produces an ascending order of registers.
13167 Return true if it was possible to construct such an order, false if
13168 not. */
13170 static bool
13171 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13172 int *unsorted_regs)
13174 int i;
13175 for (i = 1; i < nops; i++)
13177 int j;
13179 order[i] = order[i - 1];
13180 for (j = 0; j < nops; j++)
13181 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13183 /* We must find exactly one offset that is higher than the
13184 previous one by 4. */
13185 if (order[i] != order[i - 1])
13186 return false;
13187 order[i] = j;
13189 if (order[i] == order[i - 1])
13190 return false;
13191 /* The register numbers must be ascending. */
13192 if (unsorted_regs != NULL
13193 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13194 return false;
13196 return true;
13199 /* Used to determine in a peephole whether a sequence of load
13200 instructions can be changed into a load-multiple instruction.
13201 NOPS is the number of separate load instructions we are examining. The
13202 first NOPS entries in OPERANDS are the destination registers, the
13203 next NOPS entries are memory operands. If this function is
13204 successful, *BASE is set to the common base register of the memory
13205 accesses; *LOAD_OFFSET is set to the first memory location's offset
13206 from that base register.
13207 REGS is an array filled in with the destination register numbers.
13208 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13209 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13210 the sequence of registers in REGS matches the loads from ascending memory
13211 locations, and the function verifies that the register numbers are
13212 themselves ascending. If CHECK_REGS is false, the register numbers
13213 are stored in the order they are found in the operands. */
13214 static int
13215 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13216 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13218 int unsorted_regs[MAX_LDM_STM_OPS];
13219 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13220 int order[MAX_LDM_STM_OPS];
13221 rtx base_reg_rtx = NULL;
13222 int base_reg = -1;
13223 int i, ldm_case;
13225 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13226 easily extended if required. */
13227 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13229 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13231 /* Loop over the operands and check that the memory references are
13232 suitable (i.e. immediate offsets from the same base register). At
13233 the same time, extract the target register, and the memory
13234 offsets. */
13235 for (i = 0; i < nops; i++)
13237 rtx reg;
13238 rtx offset;
13240 /* Convert a subreg of a mem into the mem itself. */
13241 if (GET_CODE (operands[nops + i]) == SUBREG)
13242 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13244 gcc_assert (MEM_P (operands[nops + i]));
13246 /* Don't reorder volatile memory references; it doesn't seem worth
13247 looking for the case where the order is ok anyway. */
13248 if (MEM_VOLATILE_P (operands[nops + i]))
13249 return 0;
13251 offset = const0_rtx;
13253 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13254 || (GET_CODE (reg) == SUBREG
13255 && REG_P (reg = SUBREG_REG (reg))))
13256 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13257 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13258 || (GET_CODE (reg) == SUBREG
13259 && REG_P (reg = SUBREG_REG (reg))))
13260 && (CONST_INT_P (offset
13261 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13263 if (i == 0)
13265 base_reg = REGNO (reg);
13266 base_reg_rtx = reg;
13267 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13268 return 0;
13270 else if (base_reg != (int) REGNO (reg))
13271 /* Not addressed from the same base register. */
13272 return 0;
13274 unsorted_regs[i] = (REG_P (operands[i])
13275 ? REGNO (operands[i])
13276 : REGNO (SUBREG_REG (operands[i])));
13278 /* If it isn't an integer register, or if it overwrites the
13279 base register but isn't the last insn in the list, then
13280 we can't do this. */
13281 if (unsorted_regs[i] < 0
13282 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13283 || unsorted_regs[i] > 14
13284 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13285 return 0;
13287 /* Don't allow SP to be loaded unless it is also the base
13288 register. It guarantees that SP is reset correctly when
13289 an LDM instruction is interrupted. Otherwise, we might
13290 end up with a corrupt stack. */
13291 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13292 return 0;
13294 unsorted_offsets[i] = INTVAL (offset);
13295 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13296 order[0] = i;
13298 else
13299 /* Not a suitable memory address. */
13300 return 0;
13303 /* All the useful information has now been extracted from the
13304 operands into unsorted_regs and unsorted_offsets; additionally,
13305 order[0] has been set to the lowest offset in the list. Sort
13306 the offsets into order, verifying that they are adjacent, and
13307 check that the register numbers are ascending. */
13308 if (!compute_offset_order (nops, unsorted_offsets, order,
13309 check_regs ? unsorted_regs : NULL))
13310 return 0;
13312 if (saved_order)
13313 memcpy (saved_order, order, sizeof order);
13315 if (base)
13317 *base = base_reg;
13319 for (i = 0; i < nops; i++)
13320 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13322 *load_offset = unsorted_offsets[order[0]];
13325 if (TARGET_THUMB1
13326 && !peep2_reg_dead_p (nops, base_reg_rtx))
13327 return 0;
13329 if (unsorted_offsets[order[0]] == 0)
13330 ldm_case = 1; /* ldmia */
13331 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13332 ldm_case = 2; /* ldmib */
13333 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13334 ldm_case = 3; /* ldmda */
13335 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13336 ldm_case = 4; /* ldmdb */
13337 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13338 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13339 ldm_case = 5;
13340 else
13341 return 0;
13343 if (!multiple_operation_profitable_p (false, nops,
13344 ldm_case == 5
13345 ? unsorted_offsets[order[0]] : 0))
13346 return 0;
13348 return ldm_case;
13351 /* Used to determine in a peephole whether a sequence of store instructions can
13352 be changed into a store-multiple instruction.
13353 NOPS is the number of separate store instructions we are examining.
13354 NOPS_TOTAL is the total number of instructions recognized by the peephole
13355 pattern.
13356 The first NOPS entries in OPERANDS are the source registers, the next
13357 NOPS entries are memory operands. If this function is successful, *BASE is
13358 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13359 to the first memory location's offset from that base register. REGS is an
13360 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13361 likewise filled with the corresponding rtx's.
13362 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13363 numbers to an ascending order of stores.
13364 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13365 from ascending memory locations, and the function verifies that the register
13366 numbers are themselves ascending. If CHECK_REGS is false, the register
13367 numbers are stored in the order they are found in the operands. */
13368 static int
13369 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13370 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13371 HOST_WIDE_INT *load_offset, bool check_regs)
13373 int unsorted_regs[MAX_LDM_STM_OPS];
13374 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13375 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13376 int order[MAX_LDM_STM_OPS];
13377 int base_reg = -1;
13378 rtx base_reg_rtx = NULL;
13379 int i, stm_case;
13381 /* Write back of base register is currently only supported for Thumb 1. */
13382 int base_writeback = TARGET_THUMB1;
13384 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13385 easily extended if required. */
13386 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13388 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13390 /* Loop over the operands and check that the memory references are
13391 suitable (i.e. immediate offsets from the same base register). At
13392 the same time, extract the target register, and the memory
13393 offsets. */
13394 for (i = 0; i < nops; i++)
13396 rtx reg;
13397 rtx offset;
13399 /* Convert a subreg of a mem into the mem itself. */
13400 if (GET_CODE (operands[nops + i]) == SUBREG)
13401 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13403 gcc_assert (MEM_P (operands[nops + i]));
13405 /* Don't reorder volatile memory references; it doesn't seem worth
13406 looking for the case where the order is ok anyway. */
13407 if (MEM_VOLATILE_P (operands[nops + i]))
13408 return 0;
13410 offset = const0_rtx;
13412 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13413 || (GET_CODE (reg) == SUBREG
13414 && REG_P (reg = SUBREG_REG (reg))))
13415 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13416 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13417 || (GET_CODE (reg) == SUBREG
13418 && REG_P (reg = SUBREG_REG (reg))))
13419 && (CONST_INT_P (offset
13420 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13422 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13423 ? operands[i] : SUBREG_REG (operands[i]));
13424 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13426 if (i == 0)
13428 base_reg = REGNO (reg);
13429 base_reg_rtx = reg;
13430 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13431 return 0;
13433 else if (base_reg != (int) REGNO (reg))
13434 /* Not addressed from the same base register. */
13435 return 0;
13437 /* If it isn't an integer register, then we can't do this. */
13438 if (unsorted_regs[i] < 0
13439 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13440 /* The effects are unpredictable if the base register is
13441 both updated and stored. */
13442 || (base_writeback && unsorted_regs[i] == base_reg)
13443 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13444 || unsorted_regs[i] > 14)
13445 return 0;
13447 unsorted_offsets[i] = INTVAL (offset);
13448 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13449 order[0] = i;
13451 else
13452 /* Not a suitable memory address. */
13453 return 0;
13456 /* All the useful information has now been extracted from the
13457 operands into unsorted_regs and unsorted_offsets; additionally,
13458 order[0] has been set to the lowest offset in the list. Sort
13459 the offsets into order, verifying that they are adjacent, and
13460 check that the register numbers are ascending. */
13461 if (!compute_offset_order (nops, unsorted_offsets, order,
13462 check_regs ? unsorted_regs : NULL))
13463 return 0;
13465 if (saved_order)
13466 memcpy (saved_order, order, sizeof order);
13468 if (base)
13470 *base = base_reg;
13472 for (i = 0; i < nops; i++)
13474 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13475 if (reg_rtxs)
13476 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13479 *load_offset = unsorted_offsets[order[0]];
13482 if (TARGET_THUMB1
13483 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13484 return 0;
13486 if (unsorted_offsets[order[0]] == 0)
13487 stm_case = 1; /* stmia */
13488 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13489 stm_case = 2; /* stmib */
13490 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13491 stm_case = 3; /* stmda */
13492 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13493 stm_case = 4; /* stmdb */
13494 else
13495 return 0;
13497 if (!multiple_operation_profitable_p (false, nops, 0))
13498 return 0;
13500 return stm_case;
13503 /* Routines for use in generating RTL. */
13505 /* Generate a load-multiple instruction. COUNT is the number of loads in
13506 the instruction; REGS and MEMS are arrays containing the operands.
13507 BASEREG is the base register to be used in addressing the memory operands.
13508 WBACK_OFFSET is nonzero if the instruction should update the base
13509 register. */
13511 static rtx
13512 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13513 HOST_WIDE_INT wback_offset)
13515 int i = 0, j;
13516 rtx result;
13518 if (!multiple_operation_profitable_p (false, count, 0))
13520 rtx seq;
13522 start_sequence ();
13524 for (i = 0; i < count; i++)
13525 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13527 if (wback_offset != 0)
13528 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13530 seq = get_insns ();
13531 end_sequence ();
13533 return seq;
13536 result = gen_rtx_PARALLEL (VOIDmode,
13537 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13538 if (wback_offset != 0)
13540 XVECEXP (result, 0, 0)
13541 = gen_rtx_SET (VOIDmode, basereg,
13542 plus_constant (Pmode, basereg, wback_offset));
13543 i = 1;
13544 count++;
13547 for (j = 0; i < count; i++, j++)
13548 XVECEXP (result, 0, i)
13549 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13551 return result;
13554 /* Generate a store-multiple instruction. COUNT is the number of stores in
13555 the instruction; REGS and MEMS are arrays containing the operands.
13556 BASEREG is the base register to be used in addressing the memory operands.
13557 WBACK_OFFSET is nonzero if the instruction should update the base
13558 register. */
13560 static rtx
13561 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13562 HOST_WIDE_INT wback_offset)
13564 int i = 0, j;
13565 rtx result;
13567 if (GET_CODE (basereg) == PLUS)
13568 basereg = XEXP (basereg, 0);
13570 if (!multiple_operation_profitable_p (false, count, 0))
13572 rtx seq;
13574 start_sequence ();
13576 for (i = 0; i < count; i++)
13577 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13579 if (wback_offset != 0)
13580 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13582 seq = get_insns ();
13583 end_sequence ();
13585 return seq;
13588 result = gen_rtx_PARALLEL (VOIDmode,
13589 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13590 if (wback_offset != 0)
13592 XVECEXP (result, 0, 0)
13593 = gen_rtx_SET (VOIDmode, basereg,
13594 plus_constant (Pmode, basereg, wback_offset));
13595 i = 1;
13596 count++;
13599 for (j = 0; i < count; i++, j++)
13600 XVECEXP (result, 0, i)
13601 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13603 return result;
13606 /* Generate either a load-multiple or a store-multiple instruction. This
13607 function can be used in situations where we can start with a single MEM
13608 rtx and adjust its address upwards.
13609 COUNT is the number of operations in the instruction, not counting a
13610 possible update of the base register. REGS is an array containing the
13611 register operands.
13612 BASEREG is the base register to be used in addressing the memory operands,
13613 which are constructed from BASEMEM.
13614 WRITE_BACK specifies whether the generated instruction should include an
13615 update of the base register.
13616 OFFSETP is used to pass an offset to and from this function; this offset
13617 is not used when constructing the address (instead BASEMEM should have an
13618 appropriate offset in its address), it is used only for setting
13619 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13621 static rtx
13622 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13623 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13625 rtx mems[MAX_LDM_STM_OPS];
13626 HOST_WIDE_INT offset = *offsetp;
13627 int i;
13629 gcc_assert (count <= MAX_LDM_STM_OPS);
13631 if (GET_CODE (basereg) == PLUS)
13632 basereg = XEXP (basereg, 0);
13634 for (i = 0; i < count; i++)
13636 rtx addr = plus_constant (Pmode, basereg, i * 4);
13637 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13638 offset += 4;
13641 if (write_back)
13642 *offsetp = offset;
13644 if (is_load)
13645 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13646 write_back ? 4 * count : 0);
13647 else
13648 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13649 write_back ? 4 * count : 0);
13653 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13654 rtx basemem, HOST_WIDE_INT *offsetp)
13656 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13657 offsetp);
13661 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13662 rtx basemem, HOST_WIDE_INT *offsetp)
13664 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13665 offsetp);
13668 /* Called from a peephole2 expander to turn a sequence of loads into an
13669 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13670 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13671 is true if we can reorder the registers because they are used commutatively
13672 subsequently.
13673 Returns true iff we could generate a new instruction. */
13675 bool
13676 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13678 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13679 rtx mems[MAX_LDM_STM_OPS];
13680 int i, j, base_reg;
13681 rtx base_reg_rtx;
13682 HOST_WIDE_INT offset;
13683 int write_back = FALSE;
13684 int ldm_case;
13685 rtx addr;
13687 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13688 &base_reg, &offset, !sort_regs);
13690 if (ldm_case == 0)
13691 return false;
13693 if (sort_regs)
13694 for (i = 0; i < nops - 1; i++)
13695 for (j = i + 1; j < nops; j++)
13696 if (regs[i] > regs[j])
13698 int t = regs[i];
13699 regs[i] = regs[j];
13700 regs[j] = t;
13702 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13704 if (TARGET_THUMB1)
13706 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13707 gcc_assert (ldm_case == 1 || ldm_case == 5);
13708 write_back = TRUE;
13711 if (ldm_case == 5)
13713 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13714 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13715 offset = 0;
13716 if (!TARGET_THUMB1)
13718 base_reg = regs[0];
13719 base_reg_rtx = newbase;
13723 for (i = 0; i < nops; i++)
13725 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13726 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13727 SImode, addr, 0);
13729 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13730 write_back ? offset + i * 4 : 0));
13731 return true;
13734 /* Called from a peephole2 expander to turn a sequence of stores into an
13735 STM instruction. OPERANDS are the operands found by the peephole matcher;
13736 NOPS indicates how many separate stores we are trying to combine.
13737 Returns true iff we could generate a new instruction. */
13739 bool
13740 gen_stm_seq (rtx *operands, int nops)
13742 int i;
13743 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13744 rtx mems[MAX_LDM_STM_OPS];
13745 int base_reg;
13746 rtx base_reg_rtx;
13747 HOST_WIDE_INT offset;
13748 int write_back = FALSE;
13749 int stm_case;
13750 rtx addr;
13751 bool base_reg_dies;
13753 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13754 mem_order, &base_reg, &offset, true);
13756 if (stm_case == 0)
13757 return false;
13759 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13761 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13762 if (TARGET_THUMB1)
13764 gcc_assert (base_reg_dies);
13765 write_back = TRUE;
13768 if (stm_case == 5)
13770 gcc_assert (base_reg_dies);
13771 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13772 offset = 0;
13775 addr = plus_constant (Pmode, base_reg_rtx, offset);
13777 for (i = 0; i < nops; i++)
13779 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13780 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13781 SImode, addr, 0);
13783 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13784 write_back ? offset + i * 4 : 0));
13785 return true;
13788 /* Called from a peephole2 expander to turn a sequence of stores that are
13789 preceded by constant loads into an STM instruction. OPERANDS are the
13790 operands found by the peephole matcher; NOPS indicates how many
13791 separate stores we are trying to combine; there are 2 * NOPS
13792 instructions in the peephole.
13793 Returns true iff we could generate a new instruction. */
13795 bool
13796 gen_const_stm_seq (rtx *operands, int nops)
13798 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13799 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13800 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13801 rtx mems[MAX_LDM_STM_OPS];
13802 int base_reg;
13803 rtx base_reg_rtx;
13804 HOST_WIDE_INT offset;
13805 int write_back = FALSE;
13806 int stm_case;
13807 rtx addr;
13808 bool base_reg_dies;
13809 int i, j;
13810 HARD_REG_SET allocated;
13812 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13813 mem_order, &base_reg, &offset, false);
13815 if (stm_case == 0)
13816 return false;
13818 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13820 /* If the same register is used more than once, try to find a free
13821 register. */
13822 CLEAR_HARD_REG_SET (allocated);
13823 for (i = 0; i < nops; i++)
13825 for (j = i + 1; j < nops; j++)
13826 if (regs[i] == regs[j])
13828 rtx t = peep2_find_free_register (0, nops * 2,
13829 TARGET_THUMB1 ? "l" : "r",
13830 SImode, &allocated);
13831 if (t == NULL_RTX)
13832 return false;
13833 reg_rtxs[i] = t;
13834 regs[i] = REGNO (t);
13838 /* Compute an ordering that maps the register numbers to an ascending
13839 sequence. */
13840 reg_order[0] = 0;
13841 for (i = 0; i < nops; i++)
13842 if (regs[i] < regs[reg_order[0]])
13843 reg_order[0] = i;
13845 for (i = 1; i < nops; i++)
13847 int this_order = reg_order[i - 1];
13848 for (j = 0; j < nops; j++)
13849 if (regs[j] > regs[reg_order[i - 1]]
13850 && (this_order == reg_order[i - 1]
13851 || regs[j] < regs[this_order]))
13852 this_order = j;
13853 reg_order[i] = this_order;
13856 /* Ensure that registers that must be live after the instruction end
13857 up with the correct value. */
13858 for (i = 0; i < nops; i++)
13860 int this_order = reg_order[i];
13861 if ((this_order != mem_order[i]
13862 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13863 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13864 return false;
13867 /* Load the constants. */
13868 for (i = 0; i < nops; i++)
13870 rtx op = operands[2 * nops + mem_order[i]];
13871 sorted_regs[i] = regs[reg_order[i]];
13872 emit_move_insn (reg_rtxs[reg_order[i]], op);
13875 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13877 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13878 if (TARGET_THUMB1)
13880 gcc_assert (base_reg_dies);
13881 write_back = TRUE;
13884 if (stm_case == 5)
13886 gcc_assert (base_reg_dies);
13887 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13888 offset = 0;
13891 addr = plus_constant (Pmode, base_reg_rtx, offset);
13893 for (i = 0; i < nops; i++)
13895 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13896 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13897 SImode, addr, 0);
13899 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13900 write_back ? offset + i * 4 : 0));
13901 return true;
13904 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13905 unaligned copies on processors which support unaligned semantics for those
13906 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13907 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13908 An interleave factor of 1 (the minimum) will perform no interleaving.
13909 Load/store multiple are used for aligned addresses where possible. */
13911 static void
13912 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13913 HOST_WIDE_INT length,
13914 unsigned int interleave_factor)
13916 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13917 int *regnos = XALLOCAVEC (int, interleave_factor);
13918 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13919 HOST_WIDE_INT i, j;
13920 HOST_WIDE_INT remaining = length, words;
13921 rtx halfword_tmp = NULL, byte_tmp = NULL;
13922 rtx dst, src;
13923 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13924 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13925 HOST_WIDE_INT srcoffset, dstoffset;
13926 HOST_WIDE_INT src_autoinc, dst_autoinc;
13927 rtx mem, addr;
13929 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13931 /* Use hard registers if we have aligned source or destination so we can use
13932 load/store multiple with contiguous registers. */
13933 if (dst_aligned || src_aligned)
13934 for (i = 0; i < interleave_factor; i++)
13935 regs[i] = gen_rtx_REG (SImode, i);
13936 else
13937 for (i = 0; i < interleave_factor; i++)
13938 regs[i] = gen_reg_rtx (SImode);
13940 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13941 src = copy_addr_to_reg (XEXP (srcbase, 0));
13943 srcoffset = dstoffset = 0;
13945 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13946 For copying the last bytes we want to subtract this offset again. */
13947 src_autoinc = dst_autoinc = 0;
13949 for (i = 0; i < interleave_factor; i++)
13950 regnos[i] = i;
13952 /* Copy BLOCK_SIZE_BYTES chunks. */
13954 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13956 /* Load words. */
13957 if (src_aligned && interleave_factor > 1)
13959 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13960 TRUE, srcbase, &srcoffset));
13961 src_autoinc += UNITS_PER_WORD * interleave_factor;
13963 else
13965 for (j = 0; j < interleave_factor; j++)
13967 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13968 - src_autoinc));
13969 mem = adjust_automodify_address (srcbase, SImode, addr,
13970 srcoffset + j * UNITS_PER_WORD);
13971 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13973 srcoffset += block_size_bytes;
13976 /* Store words. */
13977 if (dst_aligned && interleave_factor > 1)
13979 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13980 TRUE, dstbase, &dstoffset));
13981 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13983 else
13985 for (j = 0; j < interleave_factor; j++)
13987 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13988 - dst_autoinc));
13989 mem = adjust_automodify_address (dstbase, SImode, addr,
13990 dstoffset + j * UNITS_PER_WORD);
13991 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13993 dstoffset += block_size_bytes;
13996 remaining -= block_size_bytes;
13999 /* Copy any whole words left (note these aren't interleaved with any
14000 subsequent halfword/byte load/stores in the interests of simplicity). */
14002 words = remaining / UNITS_PER_WORD;
14004 gcc_assert (words < interleave_factor);
14006 if (src_aligned && words > 1)
14008 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14009 &srcoffset));
14010 src_autoinc += UNITS_PER_WORD * words;
14012 else
14014 for (j = 0; j < words; j++)
14016 addr = plus_constant (Pmode, src,
14017 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14018 mem = adjust_automodify_address (srcbase, SImode, addr,
14019 srcoffset + j * UNITS_PER_WORD);
14020 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14022 srcoffset += words * UNITS_PER_WORD;
14025 if (dst_aligned && words > 1)
14027 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14028 &dstoffset));
14029 dst_autoinc += words * UNITS_PER_WORD;
14031 else
14033 for (j = 0; j < words; j++)
14035 addr = plus_constant (Pmode, dst,
14036 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14037 mem = adjust_automodify_address (dstbase, SImode, addr,
14038 dstoffset + j * UNITS_PER_WORD);
14039 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14041 dstoffset += words * UNITS_PER_WORD;
14044 remaining -= words * UNITS_PER_WORD;
14046 gcc_assert (remaining < 4);
14048 /* Copy a halfword if necessary. */
14050 if (remaining >= 2)
14052 halfword_tmp = gen_reg_rtx (SImode);
14054 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14055 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14056 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14058 /* Either write out immediately, or delay until we've loaded the last
14059 byte, depending on interleave factor. */
14060 if (interleave_factor == 1)
14062 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14063 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14064 emit_insn (gen_unaligned_storehi (mem,
14065 gen_lowpart (HImode, halfword_tmp)));
14066 halfword_tmp = NULL;
14067 dstoffset += 2;
14070 remaining -= 2;
14071 srcoffset += 2;
14074 gcc_assert (remaining < 2);
14076 /* Copy last byte. */
14078 if ((remaining & 1) != 0)
14080 byte_tmp = gen_reg_rtx (SImode);
14082 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14083 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14084 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14086 if (interleave_factor == 1)
14088 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14089 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14090 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14091 byte_tmp = NULL;
14092 dstoffset++;
14095 remaining--;
14096 srcoffset++;
14099 /* Store last halfword if we haven't done so already. */
14101 if (halfword_tmp)
14103 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14104 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14105 emit_insn (gen_unaligned_storehi (mem,
14106 gen_lowpart (HImode, halfword_tmp)));
14107 dstoffset += 2;
14110 /* Likewise for last byte. */
14112 if (byte_tmp)
14114 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14115 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14116 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14117 dstoffset++;
14120 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14123 /* From mips_adjust_block_mem:
14125 Helper function for doing a loop-based block operation on memory
14126 reference MEM. Each iteration of the loop will operate on LENGTH
14127 bytes of MEM.
14129 Create a new base register for use within the loop and point it to
14130 the start of MEM. Create a new memory reference that uses this
14131 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14133 static void
14134 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14135 rtx *loop_mem)
14137 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14139 /* Although the new mem does not refer to a known location,
14140 it does keep up to LENGTH bytes of alignment. */
14141 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14142 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14145 /* From mips_block_move_loop:
14147 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14148 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14149 the memory regions do not overlap. */
14151 static void
14152 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14153 unsigned int interleave_factor,
14154 HOST_WIDE_INT bytes_per_iter)
14156 rtx label, src_reg, dest_reg, final_src, test;
14157 HOST_WIDE_INT leftover;
14159 leftover = length % bytes_per_iter;
14160 length -= leftover;
14162 /* Create registers and memory references for use within the loop. */
14163 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14164 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14166 /* Calculate the value that SRC_REG should have after the last iteration of
14167 the loop. */
14168 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14169 0, 0, OPTAB_WIDEN);
14171 /* Emit the start of the loop. */
14172 label = gen_label_rtx ();
14173 emit_label (label);
14175 /* Emit the loop body. */
14176 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14177 interleave_factor);
14179 /* Move on to the next block. */
14180 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14181 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14183 /* Emit the loop condition. */
14184 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14185 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14187 /* Mop up any left-over bytes. */
14188 if (leftover)
14189 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14192 /* Emit a block move when either the source or destination is unaligned (not
14193 aligned to a four-byte boundary). This may need further tuning depending on
14194 core type, optimize_size setting, etc. */
14196 static int
14197 arm_movmemqi_unaligned (rtx *operands)
14199 HOST_WIDE_INT length = INTVAL (operands[2]);
14201 if (optimize_size)
14203 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14204 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14205 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14206 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14207 or dst_aligned though: allow more interleaving in those cases since the
14208 resulting code can be smaller. */
14209 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14210 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14212 if (length > 12)
14213 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14214 interleave_factor, bytes_per_iter);
14215 else
14216 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14217 interleave_factor);
14219 else
14221 /* Note that the loop created by arm_block_move_unaligned_loop may be
14222 subject to loop unrolling, which makes tuning this condition a little
14223 redundant. */
14224 if (length > 32)
14225 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14226 else
14227 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14230 return 1;
14234 arm_gen_movmemqi (rtx *operands)
14236 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14237 HOST_WIDE_INT srcoffset, dstoffset;
14238 int i;
14239 rtx src, dst, srcbase, dstbase;
14240 rtx part_bytes_reg = NULL;
14241 rtx mem;
14243 if (!CONST_INT_P (operands[2])
14244 || !CONST_INT_P (operands[3])
14245 || INTVAL (operands[2]) > 64)
14246 return 0;
14248 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14249 return arm_movmemqi_unaligned (operands);
14251 if (INTVAL (operands[3]) & 3)
14252 return 0;
14254 dstbase = operands[0];
14255 srcbase = operands[1];
14257 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14258 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14260 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14261 out_words_to_go = INTVAL (operands[2]) / 4;
14262 last_bytes = INTVAL (operands[2]) & 3;
14263 dstoffset = srcoffset = 0;
14265 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14266 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14268 for (i = 0; in_words_to_go >= 2; i+=4)
14270 if (in_words_to_go > 4)
14271 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14272 TRUE, srcbase, &srcoffset));
14273 else
14274 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14275 src, FALSE, srcbase,
14276 &srcoffset));
14278 if (out_words_to_go)
14280 if (out_words_to_go > 4)
14281 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14282 TRUE, dstbase, &dstoffset));
14283 else if (out_words_to_go != 1)
14284 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14285 out_words_to_go, dst,
14286 (last_bytes == 0
14287 ? FALSE : TRUE),
14288 dstbase, &dstoffset));
14289 else
14291 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14292 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14293 if (last_bytes != 0)
14295 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14296 dstoffset += 4;
14301 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14302 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14305 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14306 if (out_words_to_go)
14308 rtx sreg;
14310 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14311 sreg = copy_to_reg (mem);
14313 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14314 emit_move_insn (mem, sreg);
14315 in_words_to_go--;
14317 gcc_assert (!in_words_to_go); /* Sanity check */
14320 if (in_words_to_go)
14322 gcc_assert (in_words_to_go > 0);
14324 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14325 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14328 gcc_assert (!last_bytes || part_bytes_reg);
14330 if (BYTES_BIG_ENDIAN && last_bytes)
14332 rtx tmp = gen_reg_rtx (SImode);
14334 /* The bytes we want are in the top end of the word. */
14335 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14336 GEN_INT (8 * (4 - last_bytes))));
14337 part_bytes_reg = tmp;
14339 while (last_bytes)
14341 mem = adjust_automodify_address (dstbase, QImode,
14342 plus_constant (Pmode, dst,
14343 last_bytes - 1),
14344 dstoffset + last_bytes - 1);
14345 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14347 if (--last_bytes)
14349 tmp = gen_reg_rtx (SImode);
14350 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14351 part_bytes_reg = tmp;
14356 else
14358 if (last_bytes > 1)
14360 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14361 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14362 last_bytes -= 2;
14363 if (last_bytes)
14365 rtx tmp = gen_reg_rtx (SImode);
14366 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14367 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14368 part_bytes_reg = tmp;
14369 dstoffset += 2;
14373 if (last_bytes)
14375 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14376 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14380 return 1;
14383 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14384 by mode size. */
14385 inline static rtx
14386 next_consecutive_mem (rtx mem)
14388 enum machine_mode mode = GET_MODE (mem);
14389 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14390 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14392 return adjust_automodify_address (mem, mode, addr, offset);
14395 /* Copy using LDRD/STRD instructions whenever possible.
14396 Returns true upon success. */
14397 bool
14398 gen_movmem_ldrd_strd (rtx *operands)
14400 unsigned HOST_WIDE_INT len;
14401 HOST_WIDE_INT align;
14402 rtx src, dst, base;
14403 rtx reg0;
14404 bool src_aligned, dst_aligned;
14405 bool src_volatile, dst_volatile;
14407 gcc_assert (CONST_INT_P (operands[2]));
14408 gcc_assert (CONST_INT_P (operands[3]));
14410 len = UINTVAL (operands[2]);
14411 if (len > 64)
14412 return false;
14414 /* Maximum alignment we can assume for both src and dst buffers. */
14415 align = INTVAL (operands[3]);
14417 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14418 return false;
14420 /* Place src and dst addresses in registers
14421 and update the corresponding mem rtx. */
14422 dst = operands[0];
14423 dst_volatile = MEM_VOLATILE_P (dst);
14424 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14425 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14426 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14428 src = operands[1];
14429 src_volatile = MEM_VOLATILE_P (src);
14430 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14431 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14432 src = adjust_automodify_address (src, VOIDmode, base, 0);
14434 if (!unaligned_access && !(src_aligned && dst_aligned))
14435 return false;
14437 if (src_volatile || dst_volatile)
14438 return false;
14440 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14441 if (!(dst_aligned || src_aligned))
14442 return arm_gen_movmemqi (operands);
14444 src = adjust_address (src, DImode, 0);
14445 dst = adjust_address (dst, DImode, 0);
14446 while (len >= 8)
14448 len -= 8;
14449 reg0 = gen_reg_rtx (DImode);
14450 if (src_aligned)
14451 emit_move_insn (reg0, src);
14452 else
14453 emit_insn (gen_unaligned_loaddi (reg0, src));
14455 if (dst_aligned)
14456 emit_move_insn (dst, reg0);
14457 else
14458 emit_insn (gen_unaligned_storedi (dst, reg0));
14460 src = next_consecutive_mem (src);
14461 dst = next_consecutive_mem (dst);
14464 gcc_assert (len < 8);
14465 if (len >= 4)
14467 /* More than a word but less than a double-word to copy. Copy a word. */
14468 reg0 = gen_reg_rtx (SImode);
14469 src = adjust_address (src, SImode, 0);
14470 dst = adjust_address (dst, SImode, 0);
14471 if (src_aligned)
14472 emit_move_insn (reg0, src);
14473 else
14474 emit_insn (gen_unaligned_loadsi (reg0, src));
14476 if (dst_aligned)
14477 emit_move_insn (dst, reg0);
14478 else
14479 emit_insn (gen_unaligned_storesi (dst, reg0));
14481 src = next_consecutive_mem (src);
14482 dst = next_consecutive_mem (dst);
14483 len -= 4;
14486 if (len == 0)
14487 return true;
14489 /* Copy the remaining bytes. */
14490 if (len >= 2)
14492 dst = adjust_address (dst, HImode, 0);
14493 src = adjust_address (src, HImode, 0);
14494 reg0 = gen_reg_rtx (SImode);
14495 if (src_aligned)
14496 emit_insn (gen_zero_extendhisi2 (reg0, src));
14497 else
14498 emit_insn (gen_unaligned_loadhiu (reg0, src));
14500 if (dst_aligned)
14501 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14502 else
14503 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14505 src = next_consecutive_mem (src);
14506 dst = next_consecutive_mem (dst);
14507 if (len == 2)
14508 return true;
14511 dst = adjust_address (dst, QImode, 0);
14512 src = adjust_address (src, QImode, 0);
14513 reg0 = gen_reg_rtx (QImode);
14514 emit_move_insn (reg0, src);
14515 emit_move_insn (dst, reg0);
14516 return true;
14519 /* Select a dominance comparison mode if possible for a test of the general
14520 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14521 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14522 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14523 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14524 In all cases OP will be either EQ or NE, but we don't need to know which
14525 here. If we are unable to support a dominance comparison we return
14526 CC mode. This will then fail to match for the RTL expressions that
14527 generate this call. */
14528 enum machine_mode
14529 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14531 enum rtx_code cond1, cond2;
14532 int swapped = 0;
14534 /* Currently we will probably get the wrong result if the individual
14535 comparisons are not simple. This also ensures that it is safe to
14536 reverse a comparison if necessary. */
14537 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14538 != CCmode)
14539 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14540 != CCmode))
14541 return CCmode;
14543 /* The if_then_else variant of this tests the second condition if the
14544 first passes, but is true if the first fails. Reverse the first
14545 condition to get a true "inclusive-or" expression. */
14546 if (cond_or == DOM_CC_NX_OR_Y)
14547 cond1 = reverse_condition (cond1);
14549 /* If the comparisons are not equal, and one doesn't dominate the other,
14550 then we can't do this. */
14551 if (cond1 != cond2
14552 && !comparison_dominates_p (cond1, cond2)
14553 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14554 return CCmode;
14556 if (swapped)
14558 enum rtx_code temp = cond1;
14559 cond1 = cond2;
14560 cond2 = temp;
14563 switch (cond1)
14565 case EQ:
14566 if (cond_or == DOM_CC_X_AND_Y)
14567 return CC_DEQmode;
14569 switch (cond2)
14571 case EQ: return CC_DEQmode;
14572 case LE: return CC_DLEmode;
14573 case LEU: return CC_DLEUmode;
14574 case GE: return CC_DGEmode;
14575 case GEU: return CC_DGEUmode;
14576 default: gcc_unreachable ();
14579 case LT:
14580 if (cond_or == DOM_CC_X_AND_Y)
14581 return CC_DLTmode;
14583 switch (cond2)
14585 case LT:
14586 return CC_DLTmode;
14587 case LE:
14588 return CC_DLEmode;
14589 case NE:
14590 return CC_DNEmode;
14591 default:
14592 gcc_unreachable ();
14595 case GT:
14596 if (cond_or == DOM_CC_X_AND_Y)
14597 return CC_DGTmode;
14599 switch (cond2)
14601 case GT:
14602 return CC_DGTmode;
14603 case GE:
14604 return CC_DGEmode;
14605 case NE:
14606 return CC_DNEmode;
14607 default:
14608 gcc_unreachable ();
14611 case LTU:
14612 if (cond_or == DOM_CC_X_AND_Y)
14613 return CC_DLTUmode;
14615 switch (cond2)
14617 case LTU:
14618 return CC_DLTUmode;
14619 case LEU:
14620 return CC_DLEUmode;
14621 case NE:
14622 return CC_DNEmode;
14623 default:
14624 gcc_unreachable ();
14627 case GTU:
14628 if (cond_or == DOM_CC_X_AND_Y)
14629 return CC_DGTUmode;
14631 switch (cond2)
14633 case GTU:
14634 return CC_DGTUmode;
14635 case GEU:
14636 return CC_DGEUmode;
14637 case NE:
14638 return CC_DNEmode;
14639 default:
14640 gcc_unreachable ();
14643 /* The remaining cases only occur when both comparisons are the
14644 same. */
14645 case NE:
14646 gcc_assert (cond1 == cond2);
14647 return CC_DNEmode;
14649 case LE:
14650 gcc_assert (cond1 == cond2);
14651 return CC_DLEmode;
14653 case GE:
14654 gcc_assert (cond1 == cond2);
14655 return CC_DGEmode;
14657 case LEU:
14658 gcc_assert (cond1 == cond2);
14659 return CC_DLEUmode;
14661 case GEU:
14662 gcc_assert (cond1 == cond2);
14663 return CC_DGEUmode;
14665 default:
14666 gcc_unreachable ();
14670 enum machine_mode
14671 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14673 /* All floating point compares return CCFP if it is an equality
14674 comparison, and CCFPE otherwise. */
14675 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14677 switch (op)
14679 case EQ:
14680 case NE:
14681 case UNORDERED:
14682 case ORDERED:
14683 case UNLT:
14684 case UNLE:
14685 case UNGT:
14686 case UNGE:
14687 case UNEQ:
14688 case LTGT:
14689 return CCFPmode;
14691 case LT:
14692 case LE:
14693 case GT:
14694 case GE:
14695 return CCFPEmode;
14697 default:
14698 gcc_unreachable ();
14702 /* A compare with a shifted operand. Because of canonicalization, the
14703 comparison will have to be swapped when we emit the assembler. */
14704 if (GET_MODE (y) == SImode
14705 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14706 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14707 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14708 || GET_CODE (x) == ROTATERT))
14709 return CC_SWPmode;
14711 /* This operation is performed swapped, but since we only rely on the Z
14712 flag we don't need an additional mode. */
14713 if (GET_MODE (y) == SImode
14714 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14715 && GET_CODE (x) == NEG
14716 && (op == EQ || op == NE))
14717 return CC_Zmode;
14719 /* This is a special case that is used by combine to allow a
14720 comparison of a shifted byte load to be split into a zero-extend
14721 followed by a comparison of the shifted integer (only valid for
14722 equalities and unsigned inequalities). */
14723 if (GET_MODE (x) == SImode
14724 && GET_CODE (x) == ASHIFT
14725 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14726 && GET_CODE (XEXP (x, 0)) == SUBREG
14727 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14728 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14729 && (op == EQ || op == NE
14730 || op == GEU || op == GTU || op == LTU || op == LEU)
14731 && CONST_INT_P (y))
14732 return CC_Zmode;
14734 /* A construct for a conditional compare, if the false arm contains
14735 0, then both conditions must be true, otherwise either condition
14736 must be true. Not all conditions are possible, so CCmode is
14737 returned if it can't be done. */
14738 if (GET_CODE (x) == IF_THEN_ELSE
14739 && (XEXP (x, 2) == const0_rtx
14740 || XEXP (x, 2) == const1_rtx)
14741 && COMPARISON_P (XEXP (x, 0))
14742 && COMPARISON_P (XEXP (x, 1)))
14743 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14744 INTVAL (XEXP (x, 2)));
14746 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14747 if (GET_CODE (x) == AND
14748 && (op == EQ || op == NE)
14749 && COMPARISON_P (XEXP (x, 0))
14750 && COMPARISON_P (XEXP (x, 1)))
14751 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14752 DOM_CC_X_AND_Y);
14754 if (GET_CODE (x) == IOR
14755 && (op == EQ || op == NE)
14756 && COMPARISON_P (XEXP (x, 0))
14757 && COMPARISON_P (XEXP (x, 1)))
14758 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14759 DOM_CC_X_OR_Y);
14761 /* An operation (on Thumb) where we want to test for a single bit.
14762 This is done by shifting that bit up into the top bit of a
14763 scratch register; we can then branch on the sign bit. */
14764 if (TARGET_THUMB1
14765 && GET_MODE (x) == SImode
14766 && (op == EQ || op == NE)
14767 && GET_CODE (x) == ZERO_EXTRACT
14768 && XEXP (x, 1) == const1_rtx)
14769 return CC_Nmode;
14771 /* An operation that sets the condition codes as a side-effect, the
14772 V flag is not set correctly, so we can only use comparisons where
14773 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14774 instead.) */
14775 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14776 if (GET_MODE (x) == SImode
14777 && y == const0_rtx
14778 && (op == EQ || op == NE || op == LT || op == GE)
14779 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14780 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14781 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14782 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14783 || GET_CODE (x) == LSHIFTRT
14784 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14785 || GET_CODE (x) == ROTATERT
14786 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14787 return CC_NOOVmode;
14789 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14790 return CC_Zmode;
14792 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14793 && GET_CODE (x) == PLUS
14794 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14795 return CC_Cmode;
14797 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14799 switch (op)
14801 case EQ:
14802 case NE:
14803 /* A DImode comparison against zero can be implemented by
14804 or'ing the two halves together. */
14805 if (y == const0_rtx)
14806 return CC_Zmode;
14808 /* We can do an equality test in three Thumb instructions. */
14809 if (!TARGET_32BIT)
14810 return CC_Zmode;
14812 /* FALLTHROUGH */
14814 case LTU:
14815 case LEU:
14816 case GTU:
14817 case GEU:
14818 /* DImode unsigned comparisons can be implemented by cmp +
14819 cmpeq without a scratch register. Not worth doing in
14820 Thumb-2. */
14821 if (TARGET_32BIT)
14822 return CC_CZmode;
14824 /* FALLTHROUGH */
14826 case LT:
14827 case LE:
14828 case GT:
14829 case GE:
14830 /* DImode signed and unsigned comparisons can be implemented
14831 by cmp + sbcs with a scratch register, but that does not
14832 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14833 gcc_assert (op != EQ && op != NE);
14834 return CC_NCVmode;
14836 default:
14837 gcc_unreachable ();
14841 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14842 return GET_MODE (x);
14844 return CCmode;
14847 /* X and Y are two things to compare using CODE. Emit the compare insn and
14848 return the rtx for register 0 in the proper mode. FP means this is a
14849 floating point compare: I don't think that it is needed on the arm. */
14851 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14853 enum machine_mode mode;
14854 rtx cc_reg;
14855 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14857 /* We might have X as a constant, Y as a register because of the predicates
14858 used for cmpdi. If so, force X to a register here. */
14859 if (dimode_comparison && !REG_P (x))
14860 x = force_reg (DImode, x);
14862 mode = SELECT_CC_MODE (code, x, y);
14863 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14865 if (dimode_comparison
14866 && mode != CC_CZmode)
14868 rtx clobber, set;
14870 /* To compare two non-zero values for equality, XOR them and
14871 then compare against zero. Not used for ARM mode; there
14872 CC_CZmode is cheaper. */
14873 if (mode == CC_Zmode && y != const0_rtx)
14875 gcc_assert (!reload_completed);
14876 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14877 y = const0_rtx;
14880 /* A scratch register is required. */
14881 if (reload_completed)
14882 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14883 else
14884 scratch = gen_rtx_SCRATCH (SImode);
14886 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14887 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14888 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14890 else
14891 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14893 return cc_reg;
14896 /* Generate a sequence of insns that will generate the correct return
14897 address mask depending on the physical architecture that the program
14898 is running on. */
14900 arm_gen_return_addr_mask (void)
14902 rtx reg = gen_reg_rtx (Pmode);
14904 emit_insn (gen_return_addr_mask (reg));
14905 return reg;
14908 void
14909 arm_reload_in_hi (rtx *operands)
14911 rtx ref = operands[1];
14912 rtx base, scratch;
14913 HOST_WIDE_INT offset = 0;
14915 if (GET_CODE (ref) == SUBREG)
14917 offset = SUBREG_BYTE (ref);
14918 ref = SUBREG_REG (ref);
14921 if (REG_P (ref))
14923 /* We have a pseudo which has been spilt onto the stack; there
14924 are two cases here: the first where there is a simple
14925 stack-slot replacement and a second where the stack-slot is
14926 out of range, or is used as a subreg. */
14927 if (reg_equiv_mem (REGNO (ref)))
14929 ref = reg_equiv_mem (REGNO (ref));
14930 base = find_replacement (&XEXP (ref, 0));
14932 else
14933 /* The slot is out of range, or was dressed up in a SUBREG. */
14934 base = reg_equiv_address (REGNO (ref));
14936 else
14937 base = find_replacement (&XEXP (ref, 0));
14939 /* Handle the case where the address is too complex to be offset by 1. */
14940 if (GET_CODE (base) == MINUS
14941 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14943 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14945 emit_set_insn (base_plus, base);
14946 base = base_plus;
14948 else if (GET_CODE (base) == PLUS)
14950 /* The addend must be CONST_INT, or we would have dealt with it above. */
14951 HOST_WIDE_INT hi, lo;
14953 offset += INTVAL (XEXP (base, 1));
14954 base = XEXP (base, 0);
14956 /* Rework the address into a legal sequence of insns. */
14957 /* Valid range for lo is -4095 -> 4095 */
14958 lo = (offset >= 0
14959 ? (offset & 0xfff)
14960 : -((-offset) & 0xfff));
14962 /* Corner case, if lo is the max offset then we would be out of range
14963 once we have added the additional 1 below, so bump the msb into the
14964 pre-loading insn(s). */
14965 if (lo == 4095)
14966 lo &= 0x7ff;
14968 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14969 ^ (HOST_WIDE_INT) 0x80000000)
14970 - (HOST_WIDE_INT) 0x80000000);
14972 gcc_assert (hi + lo == offset);
14974 if (hi != 0)
14976 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14978 /* Get the base address; addsi3 knows how to handle constants
14979 that require more than one insn. */
14980 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14981 base = base_plus;
14982 offset = lo;
14986 /* Operands[2] may overlap operands[0] (though it won't overlap
14987 operands[1]), that's why we asked for a DImode reg -- so we can
14988 use the bit that does not overlap. */
14989 if (REGNO (operands[2]) == REGNO (operands[0]))
14990 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14991 else
14992 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14994 emit_insn (gen_zero_extendqisi2 (scratch,
14995 gen_rtx_MEM (QImode,
14996 plus_constant (Pmode, base,
14997 offset))));
14998 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14999 gen_rtx_MEM (QImode,
15000 plus_constant (Pmode, base,
15001 offset + 1))));
15002 if (!BYTES_BIG_ENDIAN)
15003 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15004 gen_rtx_IOR (SImode,
15005 gen_rtx_ASHIFT
15006 (SImode,
15007 gen_rtx_SUBREG (SImode, operands[0], 0),
15008 GEN_INT (8)),
15009 scratch));
15010 else
15011 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15012 gen_rtx_IOR (SImode,
15013 gen_rtx_ASHIFT (SImode, scratch,
15014 GEN_INT (8)),
15015 gen_rtx_SUBREG (SImode, operands[0], 0)));
15018 /* Handle storing a half-word to memory during reload by synthesizing as two
15019 byte stores. Take care not to clobber the input values until after we
15020 have moved them somewhere safe. This code assumes that if the DImode
15021 scratch in operands[2] overlaps either the input value or output address
15022 in some way, then that value must die in this insn (we absolutely need
15023 two scratch registers for some corner cases). */
15024 void
15025 arm_reload_out_hi (rtx *operands)
15027 rtx ref = operands[0];
15028 rtx outval = operands[1];
15029 rtx base, scratch;
15030 HOST_WIDE_INT offset = 0;
15032 if (GET_CODE (ref) == SUBREG)
15034 offset = SUBREG_BYTE (ref);
15035 ref = SUBREG_REG (ref);
15038 if (REG_P (ref))
15040 /* We have a pseudo which has been spilt onto the stack; there
15041 are two cases here: the first where there is a simple
15042 stack-slot replacement and a second where the stack-slot is
15043 out of range, or is used as a subreg. */
15044 if (reg_equiv_mem (REGNO (ref)))
15046 ref = reg_equiv_mem (REGNO (ref));
15047 base = find_replacement (&XEXP (ref, 0));
15049 else
15050 /* The slot is out of range, or was dressed up in a SUBREG. */
15051 base = reg_equiv_address (REGNO (ref));
15053 else
15054 base = find_replacement (&XEXP (ref, 0));
15056 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15058 /* Handle the case where the address is too complex to be offset by 1. */
15059 if (GET_CODE (base) == MINUS
15060 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15062 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15064 /* Be careful not to destroy OUTVAL. */
15065 if (reg_overlap_mentioned_p (base_plus, outval))
15067 /* Updating base_plus might destroy outval, see if we can
15068 swap the scratch and base_plus. */
15069 if (!reg_overlap_mentioned_p (scratch, outval))
15071 rtx tmp = scratch;
15072 scratch = base_plus;
15073 base_plus = tmp;
15075 else
15077 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15079 /* Be conservative and copy OUTVAL into the scratch now,
15080 this should only be necessary if outval is a subreg
15081 of something larger than a word. */
15082 /* XXX Might this clobber base? I can't see how it can,
15083 since scratch is known to overlap with OUTVAL, and
15084 must be wider than a word. */
15085 emit_insn (gen_movhi (scratch_hi, outval));
15086 outval = scratch_hi;
15090 emit_set_insn (base_plus, base);
15091 base = base_plus;
15093 else if (GET_CODE (base) == PLUS)
15095 /* The addend must be CONST_INT, or we would have dealt with it above. */
15096 HOST_WIDE_INT hi, lo;
15098 offset += INTVAL (XEXP (base, 1));
15099 base = XEXP (base, 0);
15101 /* Rework the address into a legal sequence of insns. */
15102 /* Valid range for lo is -4095 -> 4095 */
15103 lo = (offset >= 0
15104 ? (offset & 0xfff)
15105 : -((-offset) & 0xfff));
15107 /* Corner case, if lo is the max offset then we would be out of range
15108 once we have added the additional 1 below, so bump the msb into the
15109 pre-loading insn(s). */
15110 if (lo == 4095)
15111 lo &= 0x7ff;
15113 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15114 ^ (HOST_WIDE_INT) 0x80000000)
15115 - (HOST_WIDE_INT) 0x80000000);
15117 gcc_assert (hi + lo == offset);
15119 if (hi != 0)
15121 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15123 /* Be careful not to destroy OUTVAL. */
15124 if (reg_overlap_mentioned_p (base_plus, outval))
15126 /* Updating base_plus might destroy outval, see if we
15127 can swap the scratch and base_plus. */
15128 if (!reg_overlap_mentioned_p (scratch, outval))
15130 rtx tmp = scratch;
15131 scratch = base_plus;
15132 base_plus = tmp;
15134 else
15136 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15138 /* Be conservative and copy outval into scratch now,
15139 this should only be necessary if outval is a
15140 subreg of something larger than a word. */
15141 /* XXX Might this clobber base? I can't see how it
15142 can, since scratch is known to overlap with
15143 outval. */
15144 emit_insn (gen_movhi (scratch_hi, outval));
15145 outval = scratch_hi;
15149 /* Get the base address; addsi3 knows how to handle constants
15150 that require more than one insn. */
15151 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15152 base = base_plus;
15153 offset = lo;
15157 if (BYTES_BIG_ENDIAN)
15159 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15160 plus_constant (Pmode, base,
15161 offset + 1)),
15162 gen_lowpart (QImode, outval)));
15163 emit_insn (gen_lshrsi3 (scratch,
15164 gen_rtx_SUBREG (SImode, outval, 0),
15165 GEN_INT (8)));
15166 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15167 offset)),
15168 gen_lowpart (QImode, scratch)));
15170 else
15172 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15173 offset)),
15174 gen_lowpart (QImode, outval)));
15175 emit_insn (gen_lshrsi3 (scratch,
15176 gen_rtx_SUBREG (SImode, outval, 0),
15177 GEN_INT (8)));
15178 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15179 plus_constant (Pmode, base,
15180 offset + 1)),
15181 gen_lowpart (QImode, scratch)));
15185 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15186 (padded to the size of a word) should be passed in a register. */
15188 static bool
15189 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15191 if (TARGET_AAPCS_BASED)
15192 return must_pass_in_stack_var_size (mode, type);
15193 else
15194 return must_pass_in_stack_var_size_or_pad (mode, type);
15198 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15199 Return true if an argument passed on the stack should be padded upwards,
15200 i.e. if the least-significant byte has useful data.
15201 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15202 aggregate types are placed in the lowest memory address. */
15204 bool
15205 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15207 if (!TARGET_AAPCS_BASED)
15208 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15210 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15211 return false;
15213 return true;
15217 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15218 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15219 register has useful data, and return the opposite if the most
15220 significant byte does. */
15222 bool
15223 arm_pad_reg_upward (enum machine_mode mode,
15224 tree type, int first ATTRIBUTE_UNUSED)
15226 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15228 /* For AAPCS, small aggregates, small fixed-point types,
15229 and small complex types are always padded upwards. */
15230 if (type)
15232 if ((AGGREGATE_TYPE_P (type)
15233 || TREE_CODE (type) == COMPLEX_TYPE
15234 || FIXED_POINT_TYPE_P (type))
15235 && int_size_in_bytes (type) <= 4)
15236 return true;
15238 else
15240 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15241 && GET_MODE_SIZE (mode) <= 4)
15242 return true;
15246 /* Otherwise, use default padding. */
15247 return !BYTES_BIG_ENDIAN;
15250 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15251 assuming that the address in the base register is word aligned. */
15252 bool
15253 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15255 HOST_WIDE_INT max_offset;
15257 /* Offset must be a multiple of 4 in Thumb mode. */
15258 if (TARGET_THUMB2 && ((offset & 3) != 0))
15259 return false;
15261 if (TARGET_THUMB2)
15262 max_offset = 1020;
15263 else if (TARGET_ARM)
15264 max_offset = 255;
15265 else
15266 return false;
15268 return ((offset <= max_offset) && (offset >= -max_offset));
15271 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15272 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15273 Assumes that the address in the base register RN is word aligned. Pattern
15274 guarantees that both memory accesses use the same base register,
15275 the offsets are constants within the range, and the gap between the offsets is 4.
15276 If preload complete then check that registers are legal. WBACK indicates whether
15277 address is updated. LOAD indicates whether memory access is load or store. */
15278 bool
15279 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15280 bool wback, bool load)
15282 unsigned int t, t2, n;
15284 if (!reload_completed)
15285 return true;
15287 if (!offset_ok_for_ldrd_strd (offset))
15288 return false;
15290 t = REGNO (rt);
15291 t2 = REGNO (rt2);
15292 n = REGNO (rn);
15294 if ((TARGET_THUMB2)
15295 && ((wback && (n == t || n == t2))
15296 || (t == SP_REGNUM)
15297 || (t == PC_REGNUM)
15298 || (t2 == SP_REGNUM)
15299 || (t2 == PC_REGNUM)
15300 || (!load && (n == PC_REGNUM))
15301 || (load && (t == t2))
15302 /* Triggers Cortex-M3 LDRD errata. */
15303 || (!wback && load && fix_cm3_ldrd && (n == t))))
15304 return false;
15306 if ((TARGET_ARM)
15307 && ((wback && (n == t || n == t2))
15308 || (t2 == PC_REGNUM)
15309 || (t % 2 != 0) /* First destination register is not even. */
15310 || (t2 != t + 1)
15311 /* PC can be used as base register (for offset addressing only),
15312 but it is depricated. */
15313 || (n == PC_REGNUM)))
15314 return false;
15316 return true;
15319 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15320 operand MEM's address contains an immediate offset from the base
15321 register and has no side effects, in which case it sets BASE and
15322 OFFSET accordingly. */
15323 static bool
15324 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15326 rtx addr;
15328 gcc_assert (base != NULL && offset != NULL);
15330 /* TODO: Handle more general memory operand patterns, such as
15331 PRE_DEC and PRE_INC. */
15333 if (side_effects_p (mem))
15334 return false;
15336 /* Can't deal with subregs. */
15337 if (GET_CODE (mem) == SUBREG)
15338 return false;
15340 gcc_assert (MEM_P (mem));
15342 *offset = const0_rtx;
15344 addr = XEXP (mem, 0);
15346 /* If addr isn't valid for DImode, then we can't handle it. */
15347 if (!arm_legitimate_address_p (DImode, addr,
15348 reload_in_progress || reload_completed))
15349 return false;
15351 if (REG_P (addr))
15353 *base = addr;
15354 return true;
15356 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15358 *base = XEXP (addr, 0);
15359 *offset = XEXP (addr, 1);
15360 return (REG_P (*base) && CONST_INT_P (*offset));
15363 return false;
15366 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15368 /* Called from a peephole2 to replace two word-size accesses with a
15369 single LDRD/STRD instruction. Returns true iff we can generate a
15370 new instruction sequence. That is, both accesses use the same base
15371 register and the gap between constant offsets is 4. This function
15372 may reorder its operands to match ldrd/strd RTL templates.
15373 OPERANDS are the operands found by the peephole matcher;
15374 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15375 corresponding memory operands. LOAD indicaates whether the access
15376 is load or store. CONST_STORE indicates a store of constant
15377 integer values held in OPERANDS[4,5] and assumes that the pattern
15378 is of length 4 insn, for the purpose of checking dead registers.
15379 COMMUTE indicates that register operands may be reordered. */
15380 bool
15381 gen_operands_ldrd_strd (rtx *operands, bool load,
15382 bool const_store, bool commute)
15384 int nops = 2;
15385 HOST_WIDE_INT offsets[2], offset;
15386 rtx base = NULL_RTX;
15387 rtx cur_base, cur_offset, tmp;
15388 int i, gap;
15389 HARD_REG_SET regset;
15391 gcc_assert (!const_store || !load);
15392 /* Check that the memory references are immediate offsets from the
15393 same base register. Extract the base register, the destination
15394 registers, and the corresponding memory offsets. */
15395 for (i = 0; i < nops; i++)
15397 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15398 return false;
15400 if (i == 0)
15401 base = cur_base;
15402 else if (REGNO (base) != REGNO (cur_base))
15403 return false;
15405 offsets[i] = INTVAL (cur_offset);
15406 if (GET_CODE (operands[i]) == SUBREG)
15408 tmp = SUBREG_REG (operands[i]);
15409 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15410 operands[i] = tmp;
15414 /* Make sure there is no dependency between the individual loads. */
15415 if (load && REGNO (operands[0]) == REGNO (base))
15416 return false; /* RAW */
15418 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15419 return false; /* WAW */
15421 /* If the same input register is used in both stores
15422 when storing different constants, try to find a free register.
15423 For example, the code
15424 mov r0, 0
15425 str r0, [r2]
15426 mov r0, 1
15427 str r0, [r2, #4]
15428 can be transformed into
15429 mov r1, 0
15430 strd r1, r0, [r2]
15431 in Thumb mode assuming that r1 is free. */
15432 if (const_store
15433 && REGNO (operands[0]) == REGNO (operands[1])
15434 && INTVAL (operands[4]) != INTVAL (operands[5]))
15436 if (TARGET_THUMB2)
15438 CLEAR_HARD_REG_SET (regset);
15439 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15440 if (tmp == NULL_RTX)
15441 return false;
15443 /* Use the new register in the first load to ensure that
15444 if the original input register is not dead after peephole,
15445 then it will have the correct constant value. */
15446 operands[0] = tmp;
15448 else if (TARGET_ARM)
15450 return false;
15451 int regno = REGNO (operands[0]);
15452 if (!peep2_reg_dead_p (4, operands[0]))
15454 /* When the input register is even and is not dead after the
15455 pattern, it has to hold the second constant but we cannot
15456 form a legal STRD in ARM mode with this register as the second
15457 register. */
15458 if (regno % 2 == 0)
15459 return false;
15461 /* Is regno-1 free? */
15462 SET_HARD_REG_SET (regset);
15463 CLEAR_HARD_REG_BIT(regset, regno - 1);
15464 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15465 if (tmp == NULL_RTX)
15466 return false;
15468 operands[0] = tmp;
15470 else
15472 /* Find a DImode register. */
15473 CLEAR_HARD_REG_SET (regset);
15474 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15475 if (tmp != NULL_RTX)
15477 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15478 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15480 else
15482 /* Can we use the input register to form a DI register? */
15483 SET_HARD_REG_SET (regset);
15484 CLEAR_HARD_REG_BIT(regset,
15485 regno % 2 == 0 ? regno + 1 : regno - 1);
15486 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15487 if (tmp == NULL_RTX)
15488 return false;
15489 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15493 gcc_assert (operands[0] != NULL_RTX);
15494 gcc_assert (operands[1] != NULL_RTX);
15495 gcc_assert (REGNO (operands[0]) % 2 == 0);
15496 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15500 /* Make sure the instructions are ordered with lower memory access first. */
15501 if (offsets[0] > offsets[1])
15503 gap = offsets[0] - offsets[1];
15504 offset = offsets[1];
15506 /* Swap the instructions such that lower memory is accessed first. */
15507 SWAP_RTX (operands[0], operands[1]);
15508 SWAP_RTX (operands[2], operands[3]);
15509 if (const_store)
15510 SWAP_RTX (operands[4], operands[5]);
15512 else
15514 gap = offsets[1] - offsets[0];
15515 offset = offsets[0];
15518 /* Make sure accesses are to consecutive memory locations. */
15519 if (gap != 4)
15520 return false;
15522 /* Make sure we generate legal instructions. */
15523 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15524 false, load))
15525 return true;
15527 /* In Thumb state, where registers are almost unconstrained, there
15528 is little hope to fix it. */
15529 if (TARGET_THUMB2)
15530 return false;
15532 if (load && commute)
15534 /* Try reordering registers. */
15535 SWAP_RTX (operands[0], operands[1]);
15536 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15537 false, load))
15538 return true;
15541 if (const_store)
15543 /* If input registers are dead after this pattern, they can be
15544 reordered or replaced by other registers that are free in the
15545 current pattern. */
15546 if (!peep2_reg_dead_p (4, operands[0])
15547 || !peep2_reg_dead_p (4, operands[1]))
15548 return false;
15550 /* Try to reorder the input registers. */
15551 /* For example, the code
15552 mov r0, 0
15553 mov r1, 1
15554 str r1, [r2]
15555 str r0, [r2, #4]
15556 can be transformed into
15557 mov r1, 0
15558 mov r0, 1
15559 strd r0, [r2]
15561 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15562 false, false))
15564 SWAP_RTX (operands[0], operands[1]);
15565 return true;
15568 /* Try to find a free DI register. */
15569 CLEAR_HARD_REG_SET (regset);
15570 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15571 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15572 while (true)
15574 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15575 if (tmp == NULL_RTX)
15576 return false;
15578 /* DREG must be an even-numbered register in DImode.
15579 Split it into SI registers. */
15580 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15581 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15582 gcc_assert (operands[0] != NULL_RTX);
15583 gcc_assert (operands[1] != NULL_RTX);
15584 gcc_assert (REGNO (operands[0]) % 2 == 0);
15585 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15587 return (operands_ok_ldrd_strd (operands[0], operands[1],
15588 base, offset,
15589 false, load));
15593 return false;
15595 #undef SWAP_RTX
15600 /* Print a symbolic form of X to the debug file, F. */
15601 static void
15602 arm_print_value (FILE *f, rtx x)
15604 switch (GET_CODE (x))
15606 case CONST_INT:
15607 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15608 return;
15610 case CONST_DOUBLE:
15611 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15612 return;
15614 case CONST_VECTOR:
15616 int i;
15618 fprintf (f, "<");
15619 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15621 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15622 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15623 fputc (',', f);
15625 fprintf (f, ">");
15627 return;
15629 case CONST_STRING:
15630 fprintf (f, "\"%s\"", XSTR (x, 0));
15631 return;
15633 case SYMBOL_REF:
15634 fprintf (f, "`%s'", XSTR (x, 0));
15635 return;
15637 case LABEL_REF:
15638 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15639 return;
15641 case CONST:
15642 arm_print_value (f, XEXP (x, 0));
15643 return;
15645 case PLUS:
15646 arm_print_value (f, XEXP (x, 0));
15647 fprintf (f, "+");
15648 arm_print_value (f, XEXP (x, 1));
15649 return;
15651 case PC:
15652 fprintf (f, "pc");
15653 return;
15655 default:
15656 fprintf (f, "????");
15657 return;
15661 /* Routines for manipulation of the constant pool. */
15663 /* Arm instructions cannot load a large constant directly into a
15664 register; they have to come from a pc relative load. The constant
15665 must therefore be placed in the addressable range of the pc
15666 relative load. Depending on the precise pc relative load
15667 instruction the range is somewhere between 256 bytes and 4k. This
15668 means that we often have to dump a constant inside a function, and
15669 generate code to branch around it.
15671 It is important to minimize this, since the branches will slow
15672 things down and make the code larger.
15674 Normally we can hide the table after an existing unconditional
15675 branch so that there is no interruption of the flow, but in the
15676 worst case the code looks like this:
15678 ldr rn, L1
15680 b L2
15681 align
15682 L1: .long value
15686 ldr rn, L3
15688 b L4
15689 align
15690 L3: .long value
15694 We fix this by performing a scan after scheduling, which notices
15695 which instructions need to have their operands fetched from the
15696 constant table and builds the table.
15698 The algorithm starts by building a table of all the constants that
15699 need fixing up and all the natural barriers in the function (places
15700 where a constant table can be dropped without breaking the flow).
15701 For each fixup we note how far the pc-relative replacement will be
15702 able to reach and the offset of the instruction into the function.
15704 Having built the table we then group the fixes together to form
15705 tables that are as large as possible (subject to addressing
15706 constraints) and emit each table of constants after the last
15707 barrier that is within range of all the instructions in the group.
15708 If a group does not contain a barrier, then we forcibly create one
15709 by inserting a jump instruction into the flow. Once the table has
15710 been inserted, the insns are then modified to reference the
15711 relevant entry in the pool.
15713 Possible enhancements to the algorithm (not implemented) are:
15715 1) For some processors and object formats, there may be benefit in
15716 aligning the pools to the start of cache lines; this alignment
15717 would need to be taken into account when calculating addressability
15718 of a pool. */
15720 /* These typedefs are located at the start of this file, so that
15721 they can be used in the prototypes there. This comment is to
15722 remind readers of that fact so that the following structures
15723 can be understood more easily.
15725 typedef struct minipool_node Mnode;
15726 typedef struct minipool_fixup Mfix; */
15728 struct minipool_node
15730 /* Doubly linked chain of entries. */
15731 Mnode * next;
15732 Mnode * prev;
15733 /* The maximum offset into the code that this entry can be placed. While
15734 pushing fixes for forward references, all entries are sorted in order
15735 of increasing max_address. */
15736 HOST_WIDE_INT max_address;
15737 /* Similarly for an entry inserted for a backwards ref. */
15738 HOST_WIDE_INT min_address;
15739 /* The number of fixes referencing this entry. This can become zero
15740 if we "unpush" an entry. In this case we ignore the entry when we
15741 come to emit the code. */
15742 int refcount;
15743 /* The offset from the start of the minipool. */
15744 HOST_WIDE_INT offset;
15745 /* The value in table. */
15746 rtx value;
15747 /* The mode of value. */
15748 enum machine_mode mode;
15749 /* The size of the value. With iWMMXt enabled
15750 sizes > 4 also imply an alignment of 8-bytes. */
15751 int fix_size;
15754 struct minipool_fixup
15756 Mfix * next;
15757 rtx insn;
15758 HOST_WIDE_INT address;
15759 rtx * loc;
15760 enum machine_mode mode;
15761 int fix_size;
15762 rtx value;
15763 Mnode * minipool;
15764 HOST_WIDE_INT forwards;
15765 HOST_WIDE_INT backwards;
15768 /* Fixes less than a word need padding out to a word boundary. */
15769 #define MINIPOOL_FIX_SIZE(mode) \
15770 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15772 static Mnode * minipool_vector_head;
15773 static Mnode * minipool_vector_tail;
15774 static rtx minipool_vector_label;
15775 static int minipool_pad;
15777 /* The linked list of all minipool fixes required for this function. */
15778 Mfix * minipool_fix_head;
15779 Mfix * minipool_fix_tail;
15780 /* The fix entry for the current minipool, once it has been placed. */
15781 Mfix * minipool_barrier;
15783 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15784 #define JUMP_TABLES_IN_TEXT_SECTION 0
15785 #endif
15787 static HOST_WIDE_INT
15788 get_jump_table_size (rtx insn)
15790 /* ADDR_VECs only take room if read-only data does into the text
15791 section. */
15792 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15794 rtx body = PATTERN (insn);
15795 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15796 HOST_WIDE_INT size;
15797 HOST_WIDE_INT modesize;
15799 modesize = GET_MODE_SIZE (GET_MODE (body));
15800 size = modesize * XVECLEN (body, elt);
15801 switch (modesize)
15803 case 1:
15804 /* Round up size of TBB table to a halfword boundary. */
15805 size = (size + 1) & ~(HOST_WIDE_INT)1;
15806 break;
15807 case 2:
15808 /* No padding necessary for TBH. */
15809 break;
15810 case 4:
15811 /* Add two bytes for alignment on Thumb. */
15812 if (TARGET_THUMB)
15813 size += 2;
15814 break;
15815 default:
15816 gcc_unreachable ();
15818 return size;
15821 return 0;
15824 /* Return the maximum amount of padding that will be inserted before
15825 label LABEL. */
15827 static HOST_WIDE_INT
15828 get_label_padding (rtx label)
15830 HOST_WIDE_INT align, min_insn_size;
15832 align = 1 << label_to_alignment (label);
15833 min_insn_size = TARGET_THUMB ? 2 : 4;
15834 return align > min_insn_size ? align - min_insn_size : 0;
15837 /* Move a minipool fix MP from its current location to before MAX_MP.
15838 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15839 constraints may need updating. */
15840 static Mnode *
15841 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15842 HOST_WIDE_INT max_address)
15844 /* The code below assumes these are different. */
15845 gcc_assert (mp != max_mp);
15847 if (max_mp == NULL)
15849 if (max_address < mp->max_address)
15850 mp->max_address = max_address;
15852 else
15854 if (max_address > max_mp->max_address - mp->fix_size)
15855 mp->max_address = max_mp->max_address - mp->fix_size;
15856 else
15857 mp->max_address = max_address;
15859 /* Unlink MP from its current position. Since max_mp is non-null,
15860 mp->prev must be non-null. */
15861 mp->prev->next = mp->next;
15862 if (mp->next != NULL)
15863 mp->next->prev = mp->prev;
15864 else
15865 minipool_vector_tail = mp->prev;
15867 /* Re-insert it before MAX_MP. */
15868 mp->next = max_mp;
15869 mp->prev = max_mp->prev;
15870 max_mp->prev = mp;
15872 if (mp->prev != NULL)
15873 mp->prev->next = mp;
15874 else
15875 minipool_vector_head = mp;
15878 /* Save the new entry. */
15879 max_mp = mp;
15881 /* Scan over the preceding entries and adjust their addresses as
15882 required. */
15883 while (mp->prev != NULL
15884 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15886 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15887 mp = mp->prev;
15890 return max_mp;
15893 /* Add a constant to the minipool for a forward reference. Returns the
15894 node added or NULL if the constant will not fit in this pool. */
15895 static Mnode *
15896 add_minipool_forward_ref (Mfix *fix)
15898 /* If set, max_mp is the first pool_entry that has a lower
15899 constraint than the one we are trying to add. */
15900 Mnode * max_mp = NULL;
15901 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15902 Mnode * mp;
15904 /* If the minipool starts before the end of FIX->INSN then this FIX
15905 can not be placed into the current pool. Furthermore, adding the
15906 new constant pool entry may cause the pool to start FIX_SIZE bytes
15907 earlier. */
15908 if (minipool_vector_head &&
15909 (fix->address + get_attr_length (fix->insn)
15910 >= minipool_vector_head->max_address - fix->fix_size))
15911 return NULL;
15913 /* Scan the pool to see if a constant with the same value has
15914 already been added. While we are doing this, also note the
15915 location where we must insert the constant if it doesn't already
15916 exist. */
15917 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15919 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15920 && fix->mode == mp->mode
15921 && (!LABEL_P (fix->value)
15922 || (CODE_LABEL_NUMBER (fix->value)
15923 == CODE_LABEL_NUMBER (mp->value)))
15924 && rtx_equal_p (fix->value, mp->value))
15926 /* More than one fix references this entry. */
15927 mp->refcount++;
15928 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15931 /* Note the insertion point if necessary. */
15932 if (max_mp == NULL
15933 && mp->max_address > max_address)
15934 max_mp = mp;
15936 /* If we are inserting an 8-bytes aligned quantity and
15937 we have not already found an insertion point, then
15938 make sure that all such 8-byte aligned quantities are
15939 placed at the start of the pool. */
15940 if (ARM_DOUBLEWORD_ALIGN
15941 && max_mp == NULL
15942 && fix->fix_size >= 8
15943 && mp->fix_size < 8)
15945 max_mp = mp;
15946 max_address = mp->max_address;
15950 /* The value is not currently in the minipool, so we need to create
15951 a new entry for it. If MAX_MP is NULL, the entry will be put on
15952 the end of the list since the placement is less constrained than
15953 any existing entry. Otherwise, we insert the new fix before
15954 MAX_MP and, if necessary, adjust the constraints on the other
15955 entries. */
15956 mp = XNEW (Mnode);
15957 mp->fix_size = fix->fix_size;
15958 mp->mode = fix->mode;
15959 mp->value = fix->value;
15960 mp->refcount = 1;
15961 /* Not yet required for a backwards ref. */
15962 mp->min_address = -65536;
15964 if (max_mp == NULL)
15966 mp->max_address = max_address;
15967 mp->next = NULL;
15968 mp->prev = minipool_vector_tail;
15970 if (mp->prev == NULL)
15972 minipool_vector_head = mp;
15973 minipool_vector_label = gen_label_rtx ();
15975 else
15976 mp->prev->next = mp;
15978 minipool_vector_tail = mp;
15980 else
15982 if (max_address > max_mp->max_address - mp->fix_size)
15983 mp->max_address = max_mp->max_address - mp->fix_size;
15984 else
15985 mp->max_address = max_address;
15987 mp->next = max_mp;
15988 mp->prev = max_mp->prev;
15989 max_mp->prev = mp;
15990 if (mp->prev != NULL)
15991 mp->prev->next = mp;
15992 else
15993 minipool_vector_head = mp;
15996 /* Save the new entry. */
15997 max_mp = mp;
15999 /* Scan over the preceding entries and adjust their addresses as
16000 required. */
16001 while (mp->prev != NULL
16002 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16004 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16005 mp = mp->prev;
16008 return max_mp;
16011 static Mnode *
16012 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16013 HOST_WIDE_INT min_address)
16015 HOST_WIDE_INT offset;
16017 /* The code below assumes these are different. */
16018 gcc_assert (mp != min_mp);
16020 if (min_mp == NULL)
16022 if (min_address > mp->min_address)
16023 mp->min_address = min_address;
16025 else
16027 /* We will adjust this below if it is too loose. */
16028 mp->min_address = min_address;
16030 /* Unlink MP from its current position. Since min_mp is non-null,
16031 mp->next must be non-null. */
16032 mp->next->prev = mp->prev;
16033 if (mp->prev != NULL)
16034 mp->prev->next = mp->next;
16035 else
16036 minipool_vector_head = mp->next;
16038 /* Reinsert it after MIN_MP. */
16039 mp->prev = min_mp;
16040 mp->next = min_mp->next;
16041 min_mp->next = mp;
16042 if (mp->next != NULL)
16043 mp->next->prev = mp;
16044 else
16045 minipool_vector_tail = mp;
16048 min_mp = mp;
16050 offset = 0;
16051 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16053 mp->offset = offset;
16054 if (mp->refcount > 0)
16055 offset += mp->fix_size;
16057 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16058 mp->next->min_address = mp->min_address + mp->fix_size;
16061 return min_mp;
16064 /* Add a constant to the minipool for a backward reference. Returns the
16065 node added or NULL if the constant will not fit in this pool.
16067 Note that the code for insertion for a backwards reference can be
16068 somewhat confusing because the calculated offsets for each fix do
16069 not take into account the size of the pool (which is still under
16070 construction. */
16071 static Mnode *
16072 add_minipool_backward_ref (Mfix *fix)
16074 /* If set, min_mp is the last pool_entry that has a lower constraint
16075 than the one we are trying to add. */
16076 Mnode *min_mp = NULL;
16077 /* This can be negative, since it is only a constraint. */
16078 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16079 Mnode *mp;
16081 /* If we can't reach the current pool from this insn, or if we can't
16082 insert this entry at the end of the pool without pushing other
16083 fixes out of range, then we don't try. This ensures that we
16084 can't fail later on. */
16085 if (min_address >= minipool_barrier->address
16086 || (minipool_vector_tail->min_address + fix->fix_size
16087 >= minipool_barrier->address))
16088 return NULL;
16090 /* Scan the pool to see if a constant with the same value has
16091 already been added. While we are doing this, also note the
16092 location where we must insert the constant if it doesn't already
16093 exist. */
16094 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16096 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16097 && fix->mode == mp->mode
16098 && (!LABEL_P (fix->value)
16099 || (CODE_LABEL_NUMBER (fix->value)
16100 == CODE_LABEL_NUMBER (mp->value)))
16101 && rtx_equal_p (fix->value, mp->value)
16102 /* Check that there is enough slack to move this entry to the
16103 end of the table (this is conservative). */
16104 && (mp->max_address
16105 > (minipool_barrier->address
16106 + minipool_vector_tail->offset
16107 + minipool_vector_tail->fix_size)))
16109 mp->refcount++;
16110 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16113 if (min_mp != NULL)
16114 mp->min_address += fix->fix_size;
16115 else
16117 /* Note the insertion point if necessary. */
16118 if (mp->min_address < min_address)
16120 /* For now, we do not allow the insertion of 8-byte alignment
16121 requiring nodes anywhere but at the start of the pool. */
16122 if (ARM_DOUBLEWORD_ALIGN
16123 && fix->fix_size >= 8 && mp->fix_size < 8)
16124 return NULL;
16125 else
16126 min_mp = mp;
16128 else if (mp->max_address
16129 < minipool_barrier->address + mp->offset + fix->fix_size)
16131 /* Inserting before this entry would push the fix beyond
16132 its maximum address (which can happen if we have
16133 re-located a forwards fix); force the new fix to come
16134 after it. */
16135 if (ARM_DOUBLEWORD_ALIGN
16136 && fix->fix_size >= 8 && mp->fix_size < 8)
16137 return NULL;
16138 else
16140 min_mp = mp;
16141 min_address = mp->min_address + fix->fix_size;
16144 /* Do not insert a non-8-byte aligned quantity before 8-byte
16145 aligned quantities. */
16146 else if (ARM_DOUBLEWORD_ALIGN
16147 && fix->fix_size < 8
16148 && mp->fix_size >= 8)
16150 min_mp = mp;
16151 min_address = mp->min_address + fix->fix_size;
16156 /* We need to create a new entry. */
16157 mp = XNEW (Mnode);
16158 mp->fix_size = fix->fix_size;
16159 mp->mode = fix->mode;
16160 mp->value = fix->value;
16161 mp->refcount = 1;
16162 mp->max_address = minipool_barrier->address + 65536;
16164 mp->min_address = min_address;
16166 if (min_mp == NULL)
16168 mp->prev = NULL;
16169 mp->next = minipool_vector_head;
16171 if (mp->next == NULL)
16173 minipool_vector_tail = mp;
16174 minipool_vector_label = gen_label_rtx ();
16176 else
16177 mp->next->prev = mp;
16179 minipool_vector_head = mp;
16181 else
16183 mp->next = min_mp->next;
16184 mp->prev = min_mp;
16185 min_mp->next = mp;
16187 if (mp->next != NULL)
16188 mp->next->prev = mp;
16189 else
16190 minipool_vector_tail = mp;
16193 /* Save the new entry. */
16194 min_mp = mp;
16196 if (mp->prev)
16197 mp = mp->prev;
16198 else
16199 mp->offset = 0;
16201 /* Scan over the following entries and adjust their offsets. */
16202 while (mp->next != NULL)
16204 if (mp->next->min_address < mp->min_address + mp->fix_size)
16205 mp->next->min_address = mp->min_address + mp->fix_size;
16207 if (mp->refcount)
16208 mp->next->offset = mp->offset + mp->fix_size;
16209 else
16210 mp->next->offset = mp->offset;
16212 mp = mp->next;
16215 return min_mp;
16218 static void
16219 assign_minipool_offsets (Mfix *barrier)
16221 HOST_WIDE_INT offset = 0;
16222 Mnode *mp;
16224 minipool_barrier = barrier;
16226 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16228 mp->offset = offset;
16230 if (mp->refcount > 0)
16231 offset += mp->fix_size;
16235 /* Output the literal table */
16236 static void
16237 dump_minipool (rtx scan)
16239 Mnode * mp;
16240 Mnode * nmp;
16241 int align64 = 0;
16243 if (ARM_DOUBLEWORD_ALIGN)
16244 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16245 if (mp->refcount > 0 && mp->fix_size >= 8)
16247 align64 = 1;
16248 break;
16251 if (dump_file)
16252 fprintf (dump_file,
16253 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16254 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16256 scan = emit_label_after (gen_label_rtx (), scan);
16257 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16258 scan = emit_label_after (minipool_vector_label, scan);
16260 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16262 if (mp->refcount > 0)
16264 if (dump_file)
16266 fprintf (dump_file,
16267 ";; Offset %u, min %ld, max %ld ",
16268 (unsigned) mp->offset, (unsigned long) mp->min_address,
16269 (unsigned long) mp->max_address);
16270 arm_print_value (dump_file, mp->value);
16271 fputc ('\n', dump_file);
16274 switch (mp->fix_size)
16276 #ifdef HAVE_consttable_1
16277 case 1:
16278 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16279 break;
16281 #endif
16282 #ifdef HAVE_consttable_2
16283 case 2:
16284 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16285 break;
16287 #endif
16288 #ifdef HAVE_consttable_4
16289 case 4:
16290 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16291 break;
16293 #endif
16294 #ifdef HAVE_consttable_8
16295 case 8:
16296 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16297 break;
16299 #endif
16300 #ifdef HAVE_consttable_16
16301 case 16:
16302 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16303 break;
16305 #endif
16306 default:
16307 gcc_unreachable ();
16311 nmp = mp->next;
16312 free (mp);
16315 minipool_vector_head = minipool_vector_tail = NULL;
16316 scan = emit_insn_after (gen_consttable_end (), scan);
16317 scan = emit_barrier_after (scan);
16320 /* Return the cost of forcibly inserting a barrier after INSN. */
16321 static int
16322 arm_barrier_cost (rtx insn)
16324 /* Basing the location of the pool on the loop depth is preferable,
16325 but at the moment, the basic block information seems to be
16326 corrupt by this stage of the compilation. */
16327 int base_cost = 50;
16328 rtx next = next_nonnote_insn (insn);
16330 if (next != NULL && LABEL_P (next))
16331 base_cost -= 20;
16333 switch (GET_CODE (insn))
16335 case CODE_LABEL:
16336 /* It will always be better to place the table before the label, rather
16337 than after it. */
16338 return 50;
16340 case INSN:
16341 case CALL_INSN:
16342 return base_cost;
16344 case JUMP_INSN:
16345 return base_cost - 10;
16347 default:
16348 return base_cost + 10;
16352 /* Find the best place in the insn stream in the range
16353 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16354 Create the barrier by inserting a jump and add a new fix entry for
16355 it. */
16356 static Mfix *
16357 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16359 HOST_WIDE_INT count = 0;
16360 rtx barrier;
16361 rtx from = fix->insn;
16362 /* The instruction after which we will insert the jump. */
16363 rtx selected = NULL;
16364 int selected_cost;
16365 /* The address at which the jump instruction will be placed. */
16366 HOST_WIDE_INT selected_address;
16367 Mfix * new_fix;
16368 HOST_WIDE_INT max_count = max_address - fix->address;
16369 rtx label = gen_label_rtx ();
16371 selected_cost = arm_barrier_cost (from);
16372 selected_address = fix->address;
16374 while (from && count < max_count)
16376 rtx tmp;
16377 int new_cost;
16379 /* This code shouldn't have been called if there was a natural barrier
16380 within range. */
16381 gcc_assert (!BARRIER_P (from));
16383 /* Count the length of this insn. This must stay in sync with the
16384 code that pushes minipool fixes. */
16385 if (LABEL_P (from))
16386 count += get_label_padding (from);
16387 else
16388 count += get_attr_length (from);
16390 /* If there is a jump table, add its length. */
16391 if (tablejump_p (from, NULL, &tmp))
16393 count += get_jump_table_size (tmp);
16395 /* Jump tables aren't in a basic block, so base the cost on
16396 the dispatch insn. If we select this location, we will
16397 still put the pool after the table. */
16398 new_cost = arm_barrier_cost (from);
16400 if (count < max_count
16401 && (!selected || new_cost <= selected_cost))
16403 selected = tmp;
16404 selected_cost = new_cost;
16405 selected_address = fix->address + count;
16408 /* Continue after the dispatch table. */
16409 from = NEXT_INSN (tmp);
16410 continue;
16413 new_cost = arm_barrier_cost (from);
16415 if (count < max_count
16416 && (!selected || new_cost <= selected_cost))
16418 selected = from;
16419 selected_cost = new_cost;
16420 selected_address = fix->address + count;
16423 from = NEXT_INSN (from);
16426 /* Make sure that we found a place to insert the jump. */
16427 gcc_assert (selected);
16429 /* Make sure we do not split a call and its corresponding
16430 CALL_ARG_LOCATION note. */
16431 if (CALL_P (selected))
16433 rtx next = NEXT_INSN (selected);
16434 if (next && NOTE_P (next)
16435 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16436 selected = next;
16439 /* Create a new JUMP_INSN that branches around a barrier. */
16440 from = emit_jump_insn_after (gen_jump (label), selected);
16441 JUMP_LABEL (from) = label;
16442 barrier = emit_barrier_after (from);
16443 emit_label_after (label, barrier);
16445 /* Create a minipool barrier entry for the new barrier. */
16446 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16447 new_fix->insn = barrier;
16448 new_fix->address = selected_address;
16449 new_fix->next = fix->next;
16450 fix->next = new_fix;
16452 return new_fix;
16455 /* Record that there is a natural barrier in the insn stream at
16456 ADDRESS. */
16457 static void
16458 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16460 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16462 fix->insn = insn;
16463 fix->address = address;
16465 fix->next = NULL;
16466 if (minipool_fix_head != NULL)
16467 minipool_fix_tail->next = fix;
16468 else
16469 minipool_fix_head = fix;
16471 minipool_fix_tail = fix;
16474 /* Record INSN, which will need fixing up to load a value from the
16475 minipool. ADDRESS is the offset of the insn since the start of the
16476 function; LOC is a pointer to the part of the insn which requires
16477 fixing; VALUE is the constant that must be loaded, which is of type
16478 MODE. */
16479 static void
16480 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16481 enum machine_mode mode, rtx value)
16483 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16485 fix->insn = insn;
16486 fix->address = address;
16487 fix->loc = loc;
16488 fix->mode = mode;
16489 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16490 fix->value = value;
16491 fix->forwards = get_attr_pool_range (insn);
16492 fix->backwards = get_attr_neg_pool_range (insn);
16493 fix->minipool = NULL;
16495 /* If an insn doesn't have a range defined for it, then it isn't
16496 expecting to be reworked by this code. Better to stop now than
16497 to generate duff assembly code. */
16498 gcc_assert (fix->forwards || fix->backwards);
16500 /* If an entry requires 8-byte alignment then assume all constant pools
16501 require 4 bytes of padding. Trying to do this later on a per-pool
16502 basis is awkward because existing pool entries have to be modified. */
16503 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16504 minipool_pad = 4;
16506 if (dump_file)
16508 fprintf (dump_file,
16509 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16510 GET_MODE_NAME (mode),
16511 INSN_UID (insn), (unsigned long) address,
16512 -1 * (long)fix->backwards, (long)fix->forwards);
16513 arm_print_value (dump_file, fix->value);
16514 fprintf (dump_file, "\n");
16517 /* Add it to the chain of fixes. */
16518 fix->next = NULL;
16520 if (minipool_fix_head != NULL)
16521 minipool_fix_tail->next = fix;
16522 else
16523 minipool_fix_head = fix;
16525 minipool_fix_tail = fix;
16528 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16529 Returns the number of insns needed, or 99 if we always want to synthesize
16530 the value. */
16532 arm_max_const_double_inline_cost ()
16534 /* Let the value get synthesized to avoid the use of literal pools. */
16535 if (arm_disable_literal_pool)
16536 return 99;
16538 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16541 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16542 Returns the number of insns needed, or 99 if we don't know how to
16543 do it. */
16545 arm_const_double_inline_cost (rtx val)
16547 rtx lowpart, highpart;
16548 enum machine_mode mode;
16550 mode = GET_MODE (val);
16552 if (mode == VOIDmode)
16553 mode = DImode;
16555 gcc_assert (GET_MODE_SIZE (mode) == 8);
16557 lowpart = gen_lowpart (SImode, val);
16558 highpart = gen_highpart_mode (SImode, mode, val);
16560 gcc_assert (CONST_INT_P (lowpart));
16561 gcc_assert (CONST_INT_P (highpart));
16563 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16564 NULL_RTX, NULL_RTX, 0, 0)
16565 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16566 NULL_RTX, NULL_RTX, 0, 0));
16569 /* Return true if it is worthwhile to split a 64-bit constant into two
16570 32-bit operations. This is the case if optimizing for size, or
16571 if we have load delay slots, or if one 32-bit part can be done with
16572 a single data operation. */
16573 bool
16574 arm_const_double_by_parts (rtx val)
16576 enum machine_mode mode = GET_MODE (val);
16577 rtx part;
16579 if (optimize_size || arm_ld_sched)
16580 return true;
16582 if (mode == VOIDmode)
16583 mode = DImode;
16585 part = gen_highpart_mode (SImode, mode, val);
16587 gcc_assert (CONST_INT_P (part));
16589 if (const_ok_for_arm (INTVAL (part))
16590 || const_ok_for_arm (~INTVAL (part)))
16591 return true;
16593 part = gen_lowpart (SImode, val);
16595 gcc_assert (CONST_INT_P (part));
16597 if (const_ok_for_arm (INTVAL (part))
16598 || const_ok_for_arm (~INTVAL (part)))
16599 return true;
16601 return false;
16604 /* Return true if it is possible to inline both the high and low parts
16605 of a 64-bit constant into 32-bit data processing instructions. */
16606 bool
16607 arm_const_double_by_immediates (rtx val)
16609 enum machine_mode mode = GET_MODE (val);
16610 rtx part;
16612 if (mode == VOIDmode)
16613 mode = DImode;
16615 part = gen_highpart_mode (SImode, mode, val);
16617 gcc_assert (CONST_INT_P (part));
16619 if (!const_ok_for_arm (INTVAL (part)))
16620 return false;
16622 part = gen_lowpart (SImode, val);
16624 gcc_assert (CONST_INT_P (part));
16626 if (!const_ok_for_arm (INTVAL (part)))
16627 return false;
16629 return true;
16632 /* Scan INSN and note any of its operands that need fixing.
16633 If DO_PUSHES is false we do not actually push any of the fixups
16634 needed. */
16635 static void
16636 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16638 int opno;
16640 extract_insn (insn);
16642 if (!constrain_operands (1))
16643 fatal_insn_not_found (insn);
16645 if (recog_data.n_alternatives == 0)
16646 return;
16648 /* Fill in recog_op_alt with information about the constraints of
16649 this insn. */
16650 preprocess_constraints ();
16652 for (opno = 0; opno < recog_data.n_operands; opno++)
16654 /* Things we need to fix can only occur in inputs. */
16655 if (recog_data.operand_type[opno] != OP_IN)
16656 continue;
16658 /* If this alternative is a memory reference, then any mention
16659 of constants in this alternative is really to fool reload
16660 into allowing us to accept one there. We need to fix them up
16661 now so that we output the right code. */
16662 if (recog_op_alt[opno][which_alternative].memory_ok)
16664 rtx op = recog_data.operand[opno];
16666 if (CONSTANT_P (op))
16668 if (do_pushes)
16669 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16670 recog_data.operand_mode[opno], op);
16672 else if (MEM_P (op)
16673 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16674 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16676 if (do_pushes)
16678 rtx cop = avoid_constant_pool_reference (op);
16680 /* Casting the address of something to a mode narrower
16681 than a word can cause avoid_constant_pool_reference()
16682 to return the pool reference itself. That's no good to
16683 us here. Lets just hope that we can use the
16684 constant pool value directly. */
16685 if (op == cop)
16686 cop = get_pool_constant (XEXP (op, 0));
16688 push_minipool_fix (insn, address,
16689 recog_data.operand_loc[opno],
16690 recog_data.operand_mode[opno], cop);
16697 return;
16700 /* Rewrite move insn into subtract of 0 if the condition codes will
16701 be useful in next conditional jump insn. */
16703 static void
16704 thumb1_reorg (void)
16706 basic_block bb;
16708 FOR_EACH_BB_FN (bb, cfun)
16710 rtx dest, src;
16711 rtx pat, op0, set = NULL;
16712 rtx prev, insn = BB_END (bb);
16713 bool insn_clobbered = false;
16715 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16716 insn = PREV_INSN (insn);
16718 /* Find the last cbranchsi4_insn in basic block BB. */
16719 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16720 continue;
16722 /* Get the register with which we are comparing. */
16723 pat = PATTERN (insn);
16724 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16726 /* Find the first flag setting insn before INSN in basic block BB. */
16727 gcc_assert (insn != BB_HEAD (bb));
16728 for (prev = PREV_INSN (insn);
16729 (!insn_clobbered
16730 && prev != BB_HEAD (bb)
16731 && (NOTE_P (prev)
16732 || DEBUG_INSN_P (prev)
16733 || ((set = single_set (prev)) != NULL
16734 && get_attr_conds (prev) == CONDS_NOCOND)));
16735 prev = PREV_INSN (prev))
16737 if (reg_set_p (op0, prev))
16738 insn_clobbered = true;
16741 /* Skip if op0 is clobbered by insn other than prev. */
16742 if (insn_clobbered)
16743 continue;
16745 if (!set)
16746 continue;
16748 dest = SET_DEST (set);
16749 src = SET_SRC (set);
16750 if (!low_register_operand (dest, SImode)
16751 || !low_register_operand (src, SImode))
16752 continue;
16754 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16755 in INSN. Both src and dest of the move insn are checked. */
16756 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16758 dest = copy_rtx (dest);
16759 src = copy_rtx (src);
16760 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16761 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16762 INSN_CODE (prev) = -1;
16763 /* Set test register in INSN to dest. */
16764 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16765 INSN_CODE (insn) = -1;
16770 /* Convert instructions to their cc-clobbering variant if possible, since
16771 that allows us to use smaller encodings. */
16773 static void
16774 thumb2_reorg (void)
16776 basic_block bb;
16777 regset_head live;
16779 INIT_REG_SET (&live);
16781 /* We are freeing block_for_insn in the toplev to keep compatibility
16782 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16783 compute_bb_for_insn ();
16784 df_analyze ();
16786 FOR_EACH_BB_FN (bb, cfun)
16788 rtx insn;
16790 COPY_REG_SET (&live, DF_LR_OUT (bb));
16791 df_simulate_initialize_backwards (bb, &live);
16792 FOR_BB_INSNS_REVERSE (bb, insn)
16794 if (NONJUMP_INSN_P (insn)
16795 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16796 && GET_CODE (PATTERN (insn)) == SET)
16798 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16799 rtx pat = PATTERN (insn);
16800 rtx dst = XEXP (pat, 0);
16801 rtx src = XEXP (pat, 1);
16802 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16804 if (!OBJECT_P (src))
16805 op0 = XEXP (src, 0);
16807 if (BINARY_P (src))
16808 op1 = XEXP (src, 1);
16810 if (low_register_operand (dst, SImode))
16812 switch (GET_CODE (src))
16814 case PLUS:
16815 /* Adding two registers and storing the result
16816 in the first source is already a 16-bit
16817 operation. */
16818 if (rtx_equal_p (dst, op0)
16819 && register_operand (op1, SImode))
16820 break;
16822 if (low_register_operand (op0, SImode))
16824 /* ADDS <Rd>,<Rn>,<Rm> */
16825 if (low_register_operand (op1, SImode))
16826 action = CONV;
16827 /* ADDS <Rdn>,#<imm8> */
16828 /* SUBS <Rdn>,#<imm8> */
16829 else if (rtx_equal_p (dst, op0)
16830 && CONST_INT_P (op1)
16831 && IN_RANGE (INTVAL (op1), -255, 255))
16832 action = CONV;
16833 /* ADDS <Rd>,<Rn>,#<imm3> */
16834 /* SUBS <Rd>,<Rn>,#<imm3> */
16835 else if (CONST_INT_P (op1)
16836 && IN_RANGE (INTVAL (op1), -7, 7))
16837 action = CONV;
16839 /* ADCS <Rd>, <Rn> */
16840 else if (GET_CODE (XEXP (src, 0)) == PLUS
16841 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16842 && low_register_operand (XEXP (XEXP (src, 0), 1),
16843 SImode)
16844 && COMPARISON_P (op1)
16845 && cc_register (XEXP (op1, 0), VOIDmode)
16846 && maybe_get_arm_condition_code (op1) == ARM_CS
16847 && XEXP (op1, 1) == const0_rtx)
16848 action = CONV;
16849 break;
16851 case MINUS:
16852 /* RSBS <Rd>,<Rn>,#0
16853 Not handled here: see NEG below. */
16854 /* SUBS <Rd>,<Rn>,#<imm3>
16855 SUBS <Rdn>,#<imm8>
16856 Not handled here: see PLUS above. */
16857 /* SUBS <Rd>,<Rn>,<Rm> */
16858 if (low_register_operand (op0, SImode)
16859 && low_register_operand (op1, SImode))
16860 action = CONV;
16861 break;
16863 case MULT:
16864 /* MULS <Rdm>,<Rn>,<Rdm>
16865 As an exception to the rule, this is only used
16866 when optimizing for size since MULS is slow on all
16867 known implementations. We do not even want to use
16868 MULS in cold code, if optimizing for speed, so we
16869 test the global flag here. */
16870 if (!optimize_size)
16871 break;
16872 /* else fall through. */
16873 case AND:
16874 case IOR:
16875 case XOR:
16876 /* ANDS <Rdn>,<Rm> */
16877 if (rtx_equal_p (dst, op0)
16878 && low_register_operand (op1, SImode))
16879 action = CONV;
16880 else if (rtx_equal_p (dst, op1)
16881 && low_register_operand (op0, SImode))
16882 action = SWAP_CONV;
16883 break;
16885 case ASHIFTRT:
16886 case ASHIFT:
16887 case LSHIFTRT:
16888 /* ASRS <Rdn>,<Rm> */
16889 /* LSRS <Rdn>,<Rm> */
16890 /* LSLS <Rdn>,<Rm> */
16891 if (rtx_equal_p (dst, op0)
16892 && low_register_operand (op1, SImode))
16893 action = CONV;
16894 /* ASRS <Rd>,<Rm>,#<imm5> */
16895 /* LSRS <Rd>,<Rm>,#<imm5> */
16896 /* LSLS <Rd>,<Rm>,#<imm5> */
16897 else if (low_register_operand (op0, SImode)
16898 && CONST_INT_P (op1)
16899 && IN_RANGE (INTVAL (op1), 0, 31))
16900 action = CONV;
16901 break;
16903 case ROTATERT:
16904 /* RORS <Rdn>,<Rm> */
16905 if (rtx_equal_p (dst, op0)
16906 && low_register_operand (op1, SImode))
16907 action = CONV;
16908 break;
16910 case NOT:
16911 case NEG:
16912 /* MVNS <Rd>,<Rm> */
16913 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16914 if (low_register_operand (op0, SImode))
16915 action = CONV;
16916 break;
16918 case CONST_INT:
16919 /* MOVS <Rd>,#<imm8> */
16920 if (CONST_INT_P (src)
16921 && IN_RANGE (INTVAL (src), 0, 255))
16922 action = CONV;
16923 break;
16925 case REG:
16926 /* MOVS and MOV<c> with registers have different
16927 encodings, so are not relevant here. */
16928 break;
16930 default:
16931 break;
16935 if (action != SKIP)
16937 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16938 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16939 rtvec vec;
16941 if (action == SWAP_CONV)
16943 src = copy_rtx (src);
16944 XEXP (src, 0) = op1;
16945 XEXP (src, 1) = op0;
16946 pat = gen_rtx_SET (VOIDmode, dst, src);
16947 vec = gen_rtvec (2, pat, clobber);
16949 else /* action == CONV */
16950 vec = gen_rtvec (2, pat, clobber);
16952 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16953 INSN_CODE (insn) = -1;
16957 if (NONDEBUG_INSN_P (insn))
16958 df_simulate_one_insn_backwards (bb, insn, &live);
16962 CLEAR_REG_SET (&live);
16965 /* Gcc puts the pool in the wrong place for ARM, since we can only
16966 load addresses a limited distance around the pc. We do some
16967 special munging to move the constant pool values to the correct
16968 point in the code. */
16969 static void
16970 arm_reorg (void)
16972 rtx insn;
16973 HOST_WIDE_INT address = 0;
16974 Mfix * fix;
16976 if (TARGET_THUMB1)
16977 thumb1_reorg ();
16978 else if (TARGET_THUMB2)
16979 thumb2_reorg ();
16981 /* Ensure all insns that must be split have been split at this point.
16982 Otherwise, the pool placement code below may compute incorrect
16983 insn lengths. Note that when optimizing, all insns have already
16984 been split at this point. */
16985 if (!optimize)
16986 split_all_insns_noflow ();
16988 minipool_fix_head = minipool_fix_tail = NULL;
16990 /* The first insn must always be a note, or the code below won't
16991 scan it properly. */
16992 insn = get_insns ();
16993 gcc_assert (NOTE_P (insn));
16994 minipool_pad = 0;
16996 /* Scan all the insns and record the operands that will need fixing. */
16997 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16999 if (BARRIER_P (insn))
17000 push_minipool_barrier (insn, address);
17001 else if (INSN_P (insn))
17003 rtx table;
17005 note_invalid_constants (insn, address, true);
17006 address += get_attr_length (insn);
17008 /* If the insn is a vector jump, add the size of the table
17009 and skip the table. */
17010 if (tablejump_p (insn, NULL, &table))
17012 address += get_jump_table_size (table);
17013 insn = table;
17016 else if (LABEL_P (insn))
17017 /* Add the worst-case padding due to alignment. We don't add
17018 the _current_ padding because the minipool insertions
17019 themselves might change it. */
17020 address += get_label_padding (insn);
17023 fix = minipool_fix_head;
17025 /* Now scan the fixups and perform the required changes. */
17026 while (fix)
17028 Mfix * ftmp;
17029 Mfix * fdel;
17030 Mfix * last_added_fix;
17031 Mfix * last_barrier = NULL;
17032 Mfix * this_fix;
17034 /* Skip any further barriers before the next fix. */
17035 while (fix && BARRIER_P (fix->insn))
17036 fix = fix->next;
17038 /* No more fixes. */
17039 if (fix == NULL)
17040 break;
17042 last_added_fix = NULL;
17044 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17046 if (BARRIER_P (ftmp->insn))
17048 if (ftmp->address >= minipool_vector_head->max_address)
17049 break;
17051 last_barrier = ftmp;
17053 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17054 break;
17056 last_added_fix = ftmp; /* Keep track of the last fix added. */
17059 /* If we found a barrier, drop back to that; any fixes that we
17060 could have reached but come after the barrier will now go in
17061 the next mini-pool. */
17062 if (last_barrier != NULL)
17064 /* Reduce the refcount for those fixes that won't go into this
17065 pool after all. */
17066 for (fdel = last_barrier->next;
17067 fdel && fdel != ftmp;
17068 fdel = fdel->next)
17070 fdel->minipool->refcount--;
17071 fdel->minipool = NULL;
17074 ftmp = last_barrier;
17076 else
17078 /* ftmp is first fix that we can't fit into this pool and
17079 there no natural barriers that we could use. Insert a
17080 new barrier in the code somewhere between the previous
17081 fix and this one, and arrange to jump around it. */
17082 HOST_WIDE_INT max_address;
17084 /* The last item on the list of fixes must be a barrier, so
17085 we can never run off the end of the list of fixes without
17086 last_barrier being set. */
17087 gcc_assert (ftmp);
17089 max_address = minipool_vector_head->max_address;
17090 /* Check that there isn't another fix that is in range that
17091 we couldn't fit into this pool because the pool was
17092 already too large: we need to put the pool before such an
17093 instruction. The pool itself may come just after the
17094 fix because create_fix_barrier also allows space for a
17095 jump instruction. */
17096 if (ftmp->address < max_address)
17097 max_address = ftmp->address + 1;
17099 last_barrier = create_fix_barrier (last_added_fix, max_address);
17102 assign_minipool_offsets (last_barrier);
17104 while (ftmp)
17106 if (!BARRIER_P (ftmp->insn)
17107 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17108 == NULL))
17109 break;
17111 ftmp = ftmp->next;
17114 /* Scan over the fixes we have identified for this pool, fixing them
17115 up and adding the constants to the pool itself. */
17116 for (this_fix = fix; this_fix && ftmp != this_fix;
17117 this_fix = this_fix->next)
17118 if (!BARRIER_P (this_fix->insn))
17120 rtx addr
17121 = plus_constant (Pmode,
17122 gen_rtx_LABEL_REF (VOIDmode,
17123 minipool_vector_label),
17124 this_fix->minipool->offset);
17125 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17128 dump_minipool (last_barrier->insn);
17129 fix = ftmp;
17132 /* From now on we must synthesize any constants that we can't handle
17133 directly. This can happen if the RTL gets split during final
17134 instruction generation. */
17135 after_arm_reorg = 1;
17137 /* Free the minipool memory. */
17138 obstack_free (&minipool_obstack, minipool_startobj);
17141 /* Routines to output assembly language. */
17143 /* If the rtx is the correct value then return the string of the number.
17144 In this way we can ensure that valid double constants are generated even
17145 when cross compiling. */
17146 const char *
17147 fp_immediate_constant (rtx x)
17149 REAL_VALUE_TYPE r;
17151 if (!fp_consts_inited)
17152 init_fp_table ();
17154 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17156 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17157 return "0";
17160 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17161 static const char *
17162 fp_const_from_val (REAL_VALUE_TYPE *r)
17164 if (!fp_consts_inited)
17165 init_fp_table ();
17167 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17168 return "0";
17171 /* OPERANDS[0] is the entire list of insns that constitute pop,
17172 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17173 is in the list, UPDATE is true iff the list contains explicit
17174 update of base register. */
17175 void
17176 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17177 bool update)
17179 int i;
17180 char pattern[100];
17181 int offset;
17182 const char *conditional;
17183 int num_saves = XVECLEN (operands[0], 0);
17184 unsigned int regno;
17185 unsigned int regno_base = REGNO (operands[1]);
17187 offset = 0;
17188 offset += update ? 1 : 0;
17189 offset += return_pc ? 1 : 0;
17191 /* Is the base register in the list? */
17192 for (i = offset; i < num_saves; i++)
17194 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17195 /* If SP is in the list, then the base register must be SP. */
17196 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17197 /* If base register is in the list, there must be no explicit update. */
17198 if (regno == regno_base)
17199 gcc_assert (!update);
17202 conditional = reverse ? "%?%D0" : "%?%d0";
17203 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17205 /* Output pop (not stmfd) because it has a shorter encoding. */
17206 gcc_assert (update);
17207 sprintf (pattern, "pop%s\t{", conditional);
17209 else
17211 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17212 It's just a convention, their semantics are identical. */
17213 if (regno_base == SP_REGNUM)
17214 sprintf (pattern, "ldm%sfd\t", conditional);
17215 else if (TARGET_UNIFIED_ASM)
17216 sprintf (pattern, "ldmia%s\t", conditional);
17217 else
17218 sprintf (pattern, "ldm%sia\t", conditional);
17220 strcat (pattern, reg_names[regno_base]);
17221 if (update)
17222 strcat (pattern, "!, {");
17223 else
17224 strcat (pattern, ", {");
17227 /* Output the first destination register. */
17228 strcat (pattern,
17229 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17231 /* Output the rest of the destination registers. */
17232 for (i = offset + 1; i < num_saves; i++)
17234 strcat (pattern, ", ");
17235 strcat (pattern,
17236 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17239 strcat (pattern, "}");
17241 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17242 strcat (pattern, "^");
17244 output_asm_insn (pattern, &cond);
17248 /* Output the assembly for a store multiple. */
17250 const char *
17251 vfp_output_fstmd (rtx * operands)
17253 char pattern[100];
17254 int p;
17255 int base;
17256 int i;
17258 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17259 p = strlen (pattern);
17261 gcc_assert (REG_P (operands[1]));
17263 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17264 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17266 p += sprintf (&pattern[p], ", d%d", base + i);
17268 strcpy (&pattern[p], "}");
17270 output_asm_insn (pattern, operands);
17271 return "";
17275 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17276 number of bytes pushed. */
17278 static int
17279 vfp_emit_fstmd (int base_reg, int count)
17281 rtx par;
17282 rtx dwarf;
17283 rtx tmp, reg;
17284 int i;
17286 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17287 register pairs are stored by a store multiple insn. We avoid this
17288 by pushing an extra pair. */
17289 if (count == 2 && !arm_arch6)
17291 if (base_reg == LAST_VFP_REGNUM - 3)
17292 base_reg -= 2;
17293 count++;
17296 /* FSTMD may not store more than 16 doubleword registers at once. Split
17297 larger stores into multiple parts (up to a maximum of two, in
17298 practice). */
17299 if (count > 16)
17301 int saved;
17302 /* NOTE: base_reg is an internal register number, so each D register
17303 counts as 2. */
17304 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17305 saved += vfp_emit_fstmd (base_reg, 16);
17306 return saved;
17309 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17310 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17312 reg = gen_rtx_REG (DFmode, base_reg);
17313 base_reg += 2;
17315 XVECEXP (par, 0, 0)
17316 = gen_rtx_SET (VOIDmode,
17317 gen_frame_mem
17318 (BLKmode,
17319 gen_rtx_PRE_MODIFY (Pmode,
17320 stack_pointer_rtx,
17321 plus_constant
17322 (Pmode, stack_pointer_rtx,
17323 - (count * 8)))
17325 gen_rtx_UNSPEC (BLKmode,
17326 gen_rtvec (1, reg),
17327 UNSPEC_PUSH_MULT));
17329 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17330 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17331 RTX_FRAME_RELATED_P (tmp) = 1;
17332 XVECEXP (dwarf, 0, 0) = tmp;
17334 tmp = gen_rtx_SET (VOIDmode,
17335 gen_frame_mem (DFmode, stack_pointer_rtx),
17336 reg);
17337 RTX_FRAME_RELATED_P (tmp) = 1;
17338 XVECEXP (dwarf, 0, 1) = tmp;
17340 for (i = 1; i < count; i++)
17342 reg = gen_rtx_REG (DFmode, base_reg);
17343 base_reg += 2;
17344 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17346 tmp = gen_rtx_SET (VOIDmode,
17347 gen_frame_mem (DFmode,
17348 plus_constant (Pmode,
17349 stack_pointer_rtx,
17350 i * 8)),
17351 reg);
17352 RTX_FRAME_RELATED_P (tmp) = 1;
17353 XVECEXP (dwarf, 0, i + 1) = tmp;
17356 par = emit_insn (par);
17357 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17358 RTX_FRAME_RELATED_P (par) = 1;
17360 return count * 8;
17363 /* Emit a call instruction with pattern PAT. ADDR is the address of
17364 the call target. */
17366 void
17367 arm_emit_call_insn (rtx pat, rtx addr)
17369 rtx insn;
17371 insn = emit_call_insn (pat);
17373 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17374 If the call might use such an entry, add a use of the PIC register
17375 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17376 if (TARGET_VXWORKS_RTP
17377 && flag_pic
17378 && GET_CODE (addr) == SYMBOL_REF
17379 && (SYMBOL_REF_DECL (addr)
17380 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17381 : !SYMBOL_REF_LOCAL_P (addr)))
17383 require_pic_register ();
17384 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17388 /* Output a 'call' insn. */
17389 const char *
17390 output_call (rtx *operands)
17392 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17394 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17395 if (REGNO (operands[0]) == LR_REGNUM)
17397 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17398 output_asm_insn ("mov%?\t%0, %|lr", operands);
17401 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17403 if (TARGET_INTERWORK || arm_arch4t)
17404 output_asm_insn ("bx%?\t%0", operands);
17405 else
17406 output_asm_insn ("mov%?\t%|pc, %0", operands);
17408 return "";
17411 /* Output a 'call' insn that is a reference in memory. This is
17412 disabled for ARMv5 and we prefer a blx instead because otherwise
17413 there's a significant performance overhead. */
17414 const char *
17415 output_call_mem (rtx *operands)
17417 gcc_assert (!arm_arch5);
17418 if (TARGET_INTERWORK)
17420 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17421 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17422 output_asm_insn ("bx%?\t%|ip", operands);
17424 else if (regno_use_in (LR_REGNUM, operands[0]))
17426 /* LR is used in the memory address. We load the address in the
17427 first instruction. It's safe to use IP as the target of the
17428 load since the call will kill it anyway. */
17429 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17430 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17431 if (arm_arch4t)
17432 output_asm_insn ("bx%?\t%|ip", operands);
17433 else
17434 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17436 else
17438 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17439 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17442 return "";
17446 /* Output a move from arm registers to arm registers of a long double
17447 OPERANDS[0] is the destination.
17448 OPERANDS[1] is the source. */
17449 const char *
17450 output_mov_long_double_arm_from_arm (rtx *operands)
17452 /* We have to be careful here because the two might overlap. */
17453 int dest_start = REGNO (operands[0]);
17454 int src_start = REGNO (operands[1]);
17455 rtx ops[2];
17456 int i;
17458 if (dest_start < src_start)
17460 for (i = 0; i < 3; i++)
17462 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17463 ops[1] = gen_rtx_REG (SImode, src_start + i);
17464 output_asm_insn ("mov%?\t%0, %1", ops);
17467 else
17469 for (i = 2; i >= 0; i--)
17471 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17472 ops[1] = gen_rtx_REG (SImode, src_start + i);
17473 output_asm_insn ("mov%?\t%0, %1", ops);
17477 return "";
17480 void
17481 arm_emit_movpair (rtx dest, rtx src)
17483 /* If the src is an immediate, simplify it. */
17484 if (CONST_INT_P (src))
17486 HOST_WIDE_INT val = INTVAL (src);
17487 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17488 if ((val >> 16) & 0x0000ffff)
17489 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17490 GEN_INT (16)),
17491 GEN_INT ((val >> 16) & 0x0000ffff));
17492 return;
17494 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17495 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17498 /* Output a move between double words. It must be REG<-MEM
17499 or MEM<-REG. */
17500 const char *
17501 output_move_double (rtx *operands, bool emit, int *count)
17503 enum rtx_code code0 = GET_CODE (operands[0]);
17504 enum rtx_code code1 = GET_CODE (operands[1]);
17505 rtx otherops[3];
17506 if (count)
17507 *count = 1;
17509 /* The only case when this might happen is when
17510 you are looking at the length of a DImode instruction
17511 that has an invalid constant in it. */
17512 if (code0 == REG && code1 != MEM)
17514 gcc_assert (!emit);
17515 *count = 2;
17516 return "";
17519 if (code0 == REG)
17521 unsigned int reg0 = REGNO (operands[0]);
17523 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17525 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17527 switch (GET_CODE (XEXP (operands[1], 0)))
17529 case REG:
17531 if (emit)
17533 if (TARGET_LDRD
17534 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17535 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17536 else
17537 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17539 break;
17541 case PRE_INC:
17542 gcc_assert (TARGET_LDRD);
17543 if (emit)
17544 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17545 break;
17547 case PRE_DEC:
17548 if (emit)
17550 if (TARGET_LDRD)
17551 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17552 else
17553 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17555 break;
17557 case POST_INC:
17558 if (emit)
17560 if (TARGET_LDRD)
17561 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17562 else
17563 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17565 break;
17567 case POST_DEC:
17568 gcc_assert (TARGET_LDRD);
17569 if (emit)
17570 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17571 break;
17573 case PRE_MODIFY:
17574 case POST_MODIFY:
17575 /* Autoicrement addressing modes should never have overlapping
17576 base and destination registers, and overlapping index registers
17577 are already prohibited, so this doesn't need to worry about
17578 fix_cm3_ldrd. */
17579 otherops[0] = operands[0];
17580 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17581 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17583 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17585 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17587 /* Registers overlap so split out the increment. */
17588 if (emit)
17590 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17591 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17593 if (count)
17594 *count = 2;
17596 else
17598 /* Use a single insn if we can.
17599 FIXME: IWMMXT allows offsets larger than ldrd can
17600 handle, fix these up with a pair of ldr. */
17601 if (TARGET_THUMB2
17602 || !CONST_INT_P (otherops[2])
17603 || (INTVAL (otherops[2]) > -256
17604 && INTVAL (otherops[2]) < 256))
17606 if (emit)
17607 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17609 else
17611 if (emit)
17613 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17614 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17616 if (count)
17617 *count = 2;
17622 else
17624 /* Use a single insn if we can.
17625 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17626 fix these up with a pair of ldr. */
17627 if (TARGET_THUMB2
17628 || !CONST_INT_P (otherops[2])
17629 || (INTVAL (otherops[2]) > -256
17630 && INTVAL (otherops[2]) < 256))
17632 if (emit)
17633 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17635 else
17637 if (emit)
17639 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17640 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17642 if (count)
17643 *count = 2;
17646 break;
17648 case LABEL_REF:
17649 case CONST:
17650 /* We might be able to use ldrd %0, %1 here. However the range is
17651 different to ldr/adr, and it is broken on some ARMv7-M
17652 implementations. */
17653 /* Use the second register of the pair to avoid problematic
17654 overlap. */
17655 otherops[1] = operands[1];
17656 if (emit)
17657 output_asm_insn ("adr%?\t%0, %1", otherops);
17658 operands[1] = otherops[0];
17659 if (emit)
17661 if (TARGET_LDRD)
17662 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17663 else
17664 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17667 if (count)
17668 *count = 2;
17669 break;
17671 /* ??? This needs checking for thumb2. */
17672 default:
17673 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17674 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17676 otherops[0] = operands[0];
17677 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17678 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17680 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17682 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17684 switch ((int) INTVAL (otherops[2]))
17686 case -8:
17687 if (emit)
17688 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17689 return "";
17690 case -4:
17691 if (TARGET_THUMB2)
17692 break;
17693 if (emit)
17694 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17695 return "";
17696 case 4:
17697 if (TARGET_THUMB2)
17698 break;
17699 if (emit)
17700 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17701 return "";
17704 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17705 operands[1] = otherops[0];
17706 if (TARGET_LDRD
17707 && (REG_P (otherops[2])
17708 || TARGET_THUMB2
17709 || (CONST_INT_P (otherops[2])
17710 && INTVAL (otherops[2]) > -256
17711 && INTVAL (otherops[2]) < 256)))
17713 if (reg_overlap_mentioned_p (operands[0],
17714 otherops[2]))
17716 rtx tmp;
17717 /* Swap base and index registers over to
17718 avoid a conflict. */
17719 tmp = otherops[1];
17720 otherops[1] = otherops[2];
17721 otherops[2] = tmp;
17723 /* If both registers conflict, it will usually
17724 have been fixed by a splitter. */
17725 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17726 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17728 if (emit)
17730 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17731 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17733 if (count)
17734 *count = 2;
17736 else
17738 otherops[0] = operands[0];
17739 if (emit)
17740 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17742 return "";
17745 if (CONST_INT_P (otherops[2]))
17747 if (emit)
17749 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17750 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17751 else
17752 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17755 else
17757 if (emit)
17758 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17761 else
17763 if (emit)
17764 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17767 if (count)
17768 *count = 2;
17770 if (TARGET_LDRD)
17771 return "ldr%(d%)\t%0, [%1]";
17773 return "ldm%(ia%)\t%1, %M0";
17775 else
17777 otherops[1] = adjust_address (operands[1], SImode, 4);
17778 /* Take care of overlapping base/data reg. */
17779 if (reg_mentioned_p (operands[0], operands[1]))
17781 if (emit)
17783 output_asm_insn ("ldr%?\t%0, %1", otherops);
17784 output_asm_insn ("ldr%?\t%0, %1", operands);
17786 if (count)
17787 *count = 2;
17790 else
17792 if (emit)
17794 output_asm_insn ("ldr%?\t%0, %1", operands);
17795 output_asm_insn ("ldr%?\t%0, %1", otherops);
17797 if (count)
17798 *count = 2;
17803 else
17805 /* Constraints should ensure this. */
17806 gcc_assert (code0 == MEM && code1 == REG);
17807 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17808 || (TARGET_ARM && TARGET_LDRD));
17810 switch (GET_CODE (XEXP (operands[0], 0)))
17812 case REG:
17813 if (emit)
17815 if (TARGET_LDRD)
17816 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17817 else
17818 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17820 break;
17822 case PRE_INC:
17823 gcc_assert (TARGET_LDRD);
17824 if (emit)
17825 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17826 break;
17828 case PRE_DEC:
17829 if (emit)
17831 if (TARGET_LDRD)
17832 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17833 else
17834 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17836 break;
17838 case POST_INC:
17839 if (emit)
17841 if (TARGET_LDRD)
17842 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17843 else
17844 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17846 break;
17848 case POST_DEC:
17849 gcc_assert (TARGET_LDRD);
17850 if (emit)
17851 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17852 break;
17854 case PRE_MODIFY:
17855 case POST_MODIFY:
17856 otherops[0] = operands[1];
17857 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17858 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17860 /* IWMMXT allows offsets larger than ldrd can handle,
17861 fix these up with a pair of ldr. */
17862 if (!TARGET_THUMB2
17863 && CONST_INT_P (otherops[2])
17864 && (INTVAL(otherops[2]) <= -256
17865 || INTVAL(otherops[2]) >= 256))
17867 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17869 if (emit)
17871 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17872 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17874 if (count)
17875 *count = 2;
17877 else
17879 if (emit)
17881 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17882 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17884 if (count)
17885 *count = 2;
17888 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17890 if (emit)
17891 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17893 else
17895 if (emit)
17896 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17898 break;
17900 case PLUS:
17901 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17902 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17904 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17906 case -8:
17907 if (emit)
17908 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17909 return "";
17911 case -4:
17912 if (TARGET_THUMB2)
17913 break;
17914 if (emit)
17915 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17916 return "";
17918 case 4:
17919 if (TARGET_THUMB2)
17920 break;
17921 if (emit)
17922 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17923 return "";
17926 if (TARGET_LDRD
17927 && (REG_P (otherops[2])
17928 || TARGET_THUMB2
17929 || (CONST_INT_P (otherops[2])
17930 && INTVAL (otherops[2]) > -256
17931 && INTVAL (otherops[2]) < 256)))
17933 otherops[0] = operands[1];
17934 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17935 if (emit)
17936 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17937 return "";
17939 /* Fall through */
17941 default:
17942 otherops[0] = adjust_address (operands[0], SImode, 4);
17943 otherops[1] = operands[1];
17944 if (emit)
17946 output_asm_insn ("str%?\t%1, %0", operands);
17947 output_asm_insn ("str%?\t%H1, %0", otherops);
17949 if (count)
17950 *count = 2;
17954 return "";
17957 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17958 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17960 const char *
17961 output_move_quad (rtx *operands)
17963 if (REG_P (operands[0]))
17965 /* Load, or reg->reg move. */
17967 if (MEM_P (operands[1]))
17969 switch (GET_CODE (XEXP (operands[1], 0)))
17971 case REG:
17972 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17973 break;
17975 case LABEL_REF:
17976 case CONST:
17977 output_asm_insn ("adr%?\t%0, %1", operands);
17978 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17979 break;
17981 default:
17982 gcc_unreachable ();
17985 else
17987 rtx ops[2];
17988 int dest, src, i;
17990 gcc_assert (REG_P (operands[1]));
17992 dest = REGNO (operands[0]);
17993 src = REGNO (operands[1]);
17995 /* This seems pretty dumb, but hopefully GCC won't try to do it
17996 very often. */
17997 if (dest < src)
17998 for (i = 0; i < 4; i++)
18000 ops[0] = gen_rtx_REG (SImode, dest + i);
18001 ops[1] = gen_rtx_REG (SImode, src + i);
18002 output_asm_insn ("mov%?\t%0, %1", ops);
18004 else
18005 for (i = 3; i >= 0; i--)
18007 ops[0] = gen_rtx_REG (SImode, dest + i);
18008 ops[1] = gen_rtx_REG (SImode, src + i);
18009 output_asm_insn ("mov%?\t%0, %1", ops);
18013 else
18015 gcc_assert (MEM_P (operands[0]));
18016 gcc_assert (REG_P (operands[1]));
18017 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18019 switch (GET_CODE (XEXP (operands[0], 0)))
18021 case REG:
18022 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18023 break;
18025 default:
18026 gcc_unreachable ();
18030 return "";
18033 /* Output a VFP load or store instruction. */
18035 const char *
18036 output_move_vfp (rtx *operands)
18038 rtx reg, mem, addr, ops[2];
18039 int load = REG_P (operands[0]);
18040 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18041 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18042 const char *templ;
18043 char buff[50];
18044 enum machine_mode mode;
18046 reg = operands[!load];
18047 mem = operands[load];
18049 mode = GET_MODE (reg);
18051 gcc_assert (REG_P (reg));
18052 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18053 gcc_assert (mode == SFmode
18054 || mode == DFmode
18055 || mode == SImode
18056 || mode == DImode
18057 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18058 gcc_assert (MEM_P (mem));
18060 addr = XEXP (mem, 0);
18062 switch (GET_CODE (addr))
18064 case PRE_DEC:
18065 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18066 ops[0] = XEXP (addr, 0);
18067 ops[1] = reg;
18068 break;
18070 case POST_INC:
18071 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18072 ops[0] = XEXP (addr, 0);
18073 ops[1] = reg;
18074 break;
18076 default:
18077 templ = "f%s%c%%?\t%%%s0, %%1%s";
18078 ops[0] = reg;
18079 ops[1] = mem;
18080 break;
18083 sprintf (buff, templ,
18084 load ? "ld" : "st",
18085 dp ? 'd' : 's',
18086 dp ? "P" : "",
18087 integer_p ? "\t%@ int" : "");
18088 output_asm_insn (buff, ops);
18090 return "";
18093 /* Output a Neon double-word or quad-word load or store, or a load
18094 or store for larger structure modes.
18096 WARNING: The ordering of elements is weird in big-endian mode,
18097 because the EABI requires that vectors stored in memory appear
18098 as though they were stored by a VSTM, as required by the EABI.
18099 GCC RTL defines element ordering based on in-memory order.
18100 This can be different from the architectural ordering of elements
18101 within a NEON register. The intrinsics defined in arm_neon.h use the
18102 NEON register element ordering, not the GCC RTL element ordering.
18104 For example, the in-memory ordering of a big-endian a quadword
18105 vector with 16-bit elements when stored from register pair {d0,d1}
18106 will be (lowest address first, d0[N] is NEON register element N):
18108 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18110 When necessary, quadword registers (dN, dN+1) are moved to ARM
18111 registers from rN in the order:
18113 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18115 So that STM/LDM can be used on vectors in ARM registers, and the
18116 same memory layout will result as if VSTM/VLDM were used.
18118 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18119 possible, which allows use of appropriate alignment tags.
18120 Note that the choice of "64" is independent of the actual vector
18121 element size; this size simply ensures that the behavior is
18122 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18124 Due to limitations of those instructions, use of VST1.64/VLD1.64
18125 is not possible if:
18126 - the address contains PRE_DEC, or
18127 - the mode refers to more than 4 double-word registers
18129 In those cases, it would be possible to replace VSTM/VLDM by a
18130 sequence of instructions; this is not currently implemented since
18131 this is not certain to actually improve performance. */
18133 const char *
18134 output_move_neon (rtx *operands)
18136 rtx reg, mem, addr, ops[2];
18137 int regno, nregs, load = REG_P (operands[0]);
18138 const char *templ;
18139 char buff[50];
18140 enum machine_mode mode;
18142 reg = operands[!load];
18143 mem = operands[load];
18145 mode = GET_MODE (reg);
18147 gcc_assert (REG_P (reg));
18148 regno = REGNO (reg);
18149 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18150 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18151 || NEON_REGNO_OK_FOR_QUAD (regno));
18152 gcc_assert (VALID_NEON_DREG_MODE (mode)
18153 || VALID_NEON_QREG_MODE (mode)
18154 || VALID_NEON_STRUCT_MODE (mode));
18155 gcc_assert (MEM_P (mem));
18157 addr = XEXP (mem, 0);
18159 /* Strip off const from addresses like (const (plus (...))). */
18160 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18161 addr = XEXP (addr, 0);
18163 switch (GET_CODE (addr))
18165 case POST_INC:
18166 /* We have to use vldm / vstm for too-large modes. */
18167 if (nregs > 4)
18169 templ = "v%smia%%?\t%%0!, %%h1";
18170 ops[0] = XEXP (addr, 0);
18172 else
18174 templ = "v%s1.64\t%%h1, %%A0";
18175 ops[0] = mem;
18177 ops[1] = reg;
18178 break;
18180 case PRE_DEC:
18181 /* We have to use vldm / vstm in this case, since there is no
18182 pre-decrement form of the vld1 / vst1 instructions. */
18183 templ = "v%smdb%%?\t%%0!, %%h1";
18184 ops[0] = XEXP (addr, 0);
18185 ops[1] = reg;
18186 break;
18188 case POST_MODIFY:
18189 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18190 gcc_unreachable ();
18192 case LABEL_REF:
18193 case PLUS:
18195 int i;
18196 int overlap = -1;
18197 for (i = 0; i < nregs; i++)
18199 /* We're only using DImode here because it's a convenient size. */
18200 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18201 ops[1] = adjust_address (mem, DImode, 8 * i);
18202 if (reg_overlap_mentioned_p (ops[0], mem))
18204 gcc_assert (overlap == -1);
18205 overlap = i;
18207 else
18209 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18210 output_asm_insn (buff, ops);
18213 if (overlap != -1)
18215 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18216 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18217 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18218 output_asm_insn (buff, ops);
18221 return "";
18224 default:
18225 /* We have to use vldm / vstm for too-large modes. */
18226 if (nregs > 4)
18227 templ = "v%smia%%?\t%%m0, %%h1";
18228 else
18229 templ = "v%s1.64\t%%h1, %%A0";
18231 ops[0] = mem;
18232 ops[1] = reg;
18235 sprintf (buff, templ, load ? "ld" : "st");
18236 output_asm_insn (buff, ops);
18238 return "";
18241 /* Compute and return the length of neon_mov<mode>, where <mode> is
18242 one of VSTRUCT modes: EI, OI, CI or XI. */
18244 arm_attr_length_move_neon (rtx insn)
18246 rtx reg, mem, addr;
18247 int load;
18248 enum machine_mode mode;
18250 extract_insn_cached (insn);
18252 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18254 mode = GET_MODE (recog_data.operand[0]);
18255 switch (mode)
18257 case EImode:
18258 case OImode:
18259 return 8;
18260 case CImode:
18261 return 12;
18262 case XImode:
18263 return 16;
18264 default:
18265 gcc_unreachable ();
18269 load = REG_P (recog_data.operand[0]);
18270 reg = recog_data.operand[!load];
18271 mem = recog_data.operand[load];
18273 gcc_assert (MEM_P (mem));
18275 mode = GET_MODE (reg);
18276 addr = XEXP (mem, 0);
18278 /* Strip off const from addresses like (const (plus (...))). */
18279 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18280 addr = XEXP (addr, 0);
18282 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18284 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18285 return insns * 4;
18287 else
18288 return 4;
18291 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18292 return zero. */
18295 arm_address_offset_is_imm (rtx insn)
18297 rtx mem, addr;
18299 extract_insn_cached (insn);
18301 if (REG_P (recog_data.operand[0]))
18302 return 0;
18304 mem = recog_data.operand[0];
18306 gcc_assert (MEM_P (mem));
18308 addr = XEXP (mem, 0);
18310 if (REG_P (addr)
18311 || (GET_CODE (addr) == PLUS
18312 && REG_P (XEXP (addr, 0))
18313 && CONST_INT_P (XEXP (addr, 1))))
18314 return 1;
18315 else
18316 return 0;
18319 /* Output an ADD r, s, #n where n may be too big for one instruction.
18320 If adding zero to one register, output nothing. */
18321 const char *
18322 output_add_immediate (rtx *operands)
18324 HOST_WIDE_INT n = INTVAL (operands[2]);
18326 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18328 if (n < 0)
18329 output_multi_immediate (operands,
18330 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18331 -n);
18332 else
18333 output_multi_immediate (operands,
18334 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18338 return "";
18341 /* Output a multiple immediate operation.
18342 OPERANDS is the vector of operands referred to in the output patterns.
18343 INSTR1 is the output pattern to use for the first constant.
18344 INSTR2 is the output pattern to use for subsequent constants.
18345 IMMED_OP is the index of the constant slot in OPERANDS.
18346 N is the constant value. */
18347 static const char *
18348 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18349 int immed_op, HOST_WIDE_INT n)
18351 #if HOST_BITS_PER_WIDE_INT > 32
18352 n &= 0xffffffff;
18353 #endif
18355 if (n == 0)
18357 /* Quick and easy output. */
18358 operands[immed_op] = const0_rtx;
18359 output_asm_insn (instr1, operands);
18361 else
18363 int i;
18364 const char * instr = instr1;
18366 /* Note that n is never zero here (which would give no output). */
18367 for (i = 0; i < 32; i += 2)
18369 if (n & (3 << i))
18371 operands[immed_op] = GEN_INT (n & (255 << i));
18372 output_asm_insn (instr, operands);
18373 instr = instr2;
18374 i += 6;
18379 return "";
18382 /* Return the name of a shifter operation. */
18383 static const char *
18384 arm_shift_nmem(enum rtx_code code)
18386 switch (code)
18388 case ASHIFT:
18389 return ARM_LSL_NAME;
18391 case ASHIFTRT:
18392 return "asr";
18394 case LSHIFTRT:
18395 return "lsr";
18397 case ROTATERT:
18398 return "ror";
18400 default:
18401 abort();
18405 /* Return the appropriate ARM instruction for the operation code.
18406 The returned result should not be overwritten. OP is the rtx of the
18407 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18408 was shifted. */
18409 const char *
18410 arithmetic_instr (rtx op, int shift_first_arg)
18412 switch (GET_CODE (op))
18414 case PLUS:
18415 return "add";
18417 case MINUS:
18418 return shift_first_arg ? "rsb" : "sub";
18420 case IOR:
18421 return "orr";
18423 case XOR:
18424 return "eor";
18426 case AND:
18427 return "and";
18429 case ASHIFT:
18430 case ASHIFTRT:
18431 case LSHIFTRT:
18432 case ROTATERT:
18433 return arm_shift_nmem(GET_CODE(op));
18435 default:
18436 gcc_unreachable ();
18440 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18441 for the operation code. The returned result should not be overwritten.
18442 OP is the rtx code of the shift.
18443 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18444 shift. */
18445 static const char *
18446 shift_op (rtx op, HOST_WIDE_INT *amountp)
18448 const char * mnem;
18449 enum rtx_code code = GET_CODE (op);
18451 switch (code)
18453 case ROTATE:
18454 if (!CONST_INT_P (XEXP (op, 1)))
18456 output_operand_lossage ("invalid shift operand");
18457 return NULL;
18460 code = ROTATERT;
18461 *amountp = 32 - INTVAL (XEXP (op, 1));
18462 mnem = "ror";
18463 break;
18465 case ASHIFT:
18466 case ASHIFTRT:
18467 case LSHIFTRT:
18468 case ROTATERT:
18469 mnem = arm_shift_nmem(code);
18470 if (CONST_INT_P (XEXP (op, 1)))
18472 *amountp = INTVAL (XEXP (op, 1));
18474 else if (REG_P (XEXP (op, 1)))
18476 *amountp = -1;
18477 return mnem;
18479 else
18481 output_operand_lossage ("invalid shift operand");
18482 return NULL;
18484 break;
18486 case MULT:
18487 /* We never have to worry about the amount being other than a
18488 power of 2, since this case can never be reloaded from a reg. */
18489 if (!CONST_INT_P (XEXP (op, 1)))
18491 output_operand_lossage ("invalid shift operand");
18492 return NULL;
18495 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18497 /* Amount must be a power of two. */
18498 if (*amountp & (*amountp - 1))
18500 output_operand_lossage ("invalid shift operand");
18501 return NULL;
18504 *amountp = int_log2 (*amountp);
18505 return ARM_LSL_NAME;
18507 default:
18508 output_operand_lossage ("invalid shift operand");
18509 return NULL;
18512 /* This is not 100% correct, but follows from the desire to merge
18513 multiplication by a power of 2 with the recognizer for a
18514 shift. >=32 is not a valid shift for "lsl", so we must try and
18515 output a shift that produces the correct arithmetical result.
18516 Using lsr #32 is identical except for the fact that the carry bit
18517 is not set correctly if we set the flags; but we never use the
18518 carry bit from such an operation, so we can ignore that. */
18519 if (code == ROTATERT)
18520 /* Rotate is just modulo 32. */
18521 *amountp &= 31;
18522 else if (*amountp != (*amountp & 31))
18524 if (code == ASHIFT)
18525 mnem = "lsr";
18526 *amountp = 32;
18529 /* Shifts of 0 are no-ops. */
18530 if (*amountp == 0)
18531 return NULL;
18533 return mnem;
18536 /* Obtain the shift from the POWER of two. */
18538 static HOST_WIDE_INT
18539 int_log2 (HOST_WIDE_INT power)
18541 HOST_WIDE_INT shift = 0;
18543 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18545 gcc_assert (shift <= 31);
18546 shift++;
18549 return shift;
18552 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18553 because /bin/as is horribly restrictive. The judgement about
18554 whether or not each character is 'printable' (and can be output as
18555 is) or not (and must be printed with an octal escape) must be made
18556 with reference to the *host* character set -- the situation is
18557 similar to that discussed in the comments above pp_c_char in
18558 c-pretty-print.c. */
18560 #define MAX_ASCII_LEN 51
18562 void
18563 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18565 int i;
18566 int len_so_far = 0;
18568 fputs ("\t.ascii\t\"", stream);
18570 for (i = 0; i < len; i++)
18572 int c = p[i];
18574 if (len_so_far >= MAX_ASCII_LEN)
18576 fputs ("\"\n\t.ascii\t\"", stream);
18577 len_so_far = 0;
18580 if (ISPRINT (c))
18582 if (c == '\\' || c == '\"')
18584 putc ('\\', stream);
18585 len_so_far++;
18587 putc (c, stream);
18588 len_so_far++;
18590 else
18592 fprintf (stream, "\\%03o", c);
18593 len_so_far += 4;
18597 fputs ("\"\n", stream);
18600 /* Compute the register save mask for registers 0 through 12
18601 inclusive. This code is used by arm_compute_save_reg_mask. */
18603 static unsigned long
18604 arm_compute_save_reg0_reg12_mask (void)
18606 unsigned long func_type = arm_current_func_type ();
18607 unsigned long save_reg_mask = 0;
18608 unsigned int reg;
18610 if (IS_INTERRUPT (func_type))
18612 unsigned int max_reg;
18613 /* Interrupt functions must not corrupt any registers,
18614 even call clobbered ones. If this is a leaf function
18615 we can just examine the registers used by the RTL, but
18616 otherwise we have to assume that whatever function is
18617 called might clobber anything, and so we have to save
18618 all the call-clobbered registers as well. */
18619 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18620 /* FIQ handlers have registers r8 - r12 banked, so
18621 we only need to check r0 - r7, Normal ISRs only
18622 bank r14 and r15, so we must check up to r12.
18623 r13 is the stack pointer which is always preserved,
18624 so we do not need to consider it here. */
18625 max_reg = 7;
18626 else
18627 max_reg = 12;
18629 for (reg = 0; reg <= max_reg; reg++)
18630 if (df_regs_ever_live_p (reg)
18631 || (! crtl->is_leaf && call_used_regs[reg]))
18632 save_reg_mask |= (1 << reg);
18634 /* Also save the pic base register if necessary. */
18635 if (flag_pic
18636 && !TARGET_SINGLE_PIC_BASE
18637 && arm_pic_register != INVALID_REGNUM
18638 && crtl->uses_pic_offset_table)
18639 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18641 else if (IS_VOLATILE(func_type))
18643 /* For noreturn functions we historically omitted register saves
18644 altogether. However this really messes up debugging. As a
18645 compromise save just the frame pointers. Combined with the link
18646 register saved elsewhere this should be sufficient to get
18647 a backtrace. */
18648 if (frame_pointer_needed)
18649 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18650 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18651 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18652 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18653 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18655 else
18657 /* In the normal case we only need to save those registers
18658 which are call saved and which are used by this function. */
18659 for (reg = 0; reg <= 11; reg++)
18660 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18661 save_reg_mask |= (1 << reg);
18663 /* Handle the frame pointer as a special case. */
18664 if (frame_pointer_needed)
18665 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18667 /* If we aren't loading the PIC register,
18668 don't stack it even though it may be live. */
18669 if (flag_pic
18670 && !TARGET_SINGLE_PIC_BASE
18671 && arm_pic_register != INVALID_REGNUM
18672 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18673 || crtl->uses_pic_offset_table))
18674 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18676 /* The prologue will copy SP into R0, so save it. */
18677 if (IS_STACKALIGN (func_type))
18678 save_reg_mask |= 1;
18681 /* Save registers so the exception handler can modify them. */
18682 if (crtl->calls_eh_return)
18684 unsigned int i;
18686 for (i = 0; ; i++)
18688 reg = EH_RETURN_DATA_REGNO (i);
18689 if (reg == INVALID_REGNUM)
18690 break;
18691 save_reg_mask |= 1 << reg;
18695 return save_reg_mask;
18698 /* Return true if r3 is live at the start of the function. */
18700 static bool
18701 arm_r3_live_at_start_p (void)
18703 /* Just look at cfg info, which is still close enough to correct at this
18704 point. This gives false positives for broken functions that might use
18705 uninitialized data that happens to be allocated in r3, but who cares? */
18706 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18709 /* Compute the number of bytes used to store the static chain register on the
18710 stack, above the stack frame. We need to know this accurately to get the
18711 alignment of the rest of the stack frame correct. */
18713 static int
18714 arm_compute_static_chain_stack_bytes (void)
18716 /* See the defining assertion in arm_expand_prologue. */
18717 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18718 && IS_NESTED (arm_current_func_type ())
18719 && arm_r3_live_at_start_p ()
18720 && crtl->args.pretend_args_size == 0)
18721 return 4;
18723 return 0;
18726 /* Compute a bit mask of which registers need to be
18727 saved on the stack for the current function.
18728 This is used by arm_get_frame_offsets, which may add extra registers. */
18730 static unsigned long
18731 arm_compute_save_reg_mask (void)
18733 unsigned int save_reg_mask = 0;
18734 unsigned long func_type = arm_current_func_type ();
18735 unsigned int reg;
18737 if (IS_NAKED (func_type))
18738 /* This should never really happen. */
18739 return 0;
18741 /* If we are creating a stack frame, then we must save the frame pointer,
18742 IP (which will hold the old stack pointer), LR and the PC. */
18743 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18744 save_reg_mask |=
18745 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18746 | (1 << IP_REGNUM)
18747 | (1 << LR_REGNUM)
18748 | (1 << PC_REGNUM);
18750 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18752 /* Decide if we need to save the link register.
18753 Interrupt routines have their own banked link register,
18754 so they never need to save it.
18755 Otherwise if we do not use the link register we do not need to save
18756 it. If we are pushing other registers onto the stack however, we
18757 can save an instruction in the epilogue by pushing the link register
18758 now and then popping it back into the PC. This incurs extra memory
18759 accesses though, so we only do it when optimizing for size, and only
18760 if we know that we will not need a fancy return sequence. */
18761 if (df_regs_ever_live_p (LR_REGNUM)
18762 || (save_reg_mask
18763 && optimize_size
18764 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18765 && !crtl->calls_eh_return))
18766 save_reg_mask |= 1 << LR_REGNUM;
18768 if (cfun->machine->lr_save_eliminated)
18769 save_reg_mask &= ~ (1 << LR_REGNUM);
18771 if (TARGET_REALLY_IWMMXT
18772 && ((bit_count (save_reg_mask)
18773 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18774 arm_compute_static_chain_stack_bytes())
18775 ) % 2) != 0)
18777 /* The total number of registers that are going to be pushed
18778 onto the stack is odd. We need to ensure that the stack
18779 is 64-bit aligned before we start to save iWMMXt registers,
18780 and also before we start to create locals. (A local variable
18781 might be a double or long long which we will load/store using
18782 an iWMMXt instruction). Therefore we need to push another
18783 ARM register, so that the stack will be 64-bit aligned. We
18784 try to avoid using the arg registers (r0 -r3) as they might be
18785 used to pass values in a tail call. */
18786 for (reg = 4; reg <= 12; reg++)
18787 if ((save_reg_mask & (1 << reg)) == 0)
18788 break;
18790 if (reg <= 12)
18791 save_reg_mask |= (1 << reg);
18792 else
18794 cfun->machine->sibcall_blocked = 1;
18795 save_reg_mask |= (1 << 3);
18799 /* We may need to push an additional register for use initializing the
18800 PIC base register. */
18801 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18802 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18804 reg = thumb_find_work_register (1 << 4);
18805 if (!call_used_regs[reg])
18806 save_reg_mask |= (1 << reg);
18809 return save_reg_mask;
18813 /* Compute a bit mask of which registers need to be
18814 saved on the stack for the current function. */
18815 static unsigned long
18816 thumb1_compute_save_reg_mask (void)
18818 unsigned long mask;
18819 unsigned reg;
18821 mask = 0;
18822 for (reg = 0; reg < 12; reg ++)
18823 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18824 mask |= 1 << reg;
18826 if (flag_pic
18827 && !TARGET_SINGLE_PIC_BASE
18828 && arm_pic_register != INVALID_REGNUM
18829 && crtl->uses_pic_offset_table)
18830 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18832 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18833 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18834 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18836 /* LR will also be pushed if any lo regs are pushed. */
18837 if (mask & 0xff || thumb_force_lr_save ())
18838 mask |= (1 << LR_REGNUM);
18840 /* Make sure we have a low work register if we need one.
18841 We will need one if we are going to push a high register,
18842 but we are not currently intending to push a low register. */
18843 if ((mask & 0xff) == 0
18844 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18846 /* Use thumb_find_work_register to choose which register
18847 we will use. If the register is live then we will
18848 have to push it. Use LAST_LO_REGNUM as our fallback
18849 choice for the register to select. */
18850 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18851 /* Make sure the register returned by thumb_find_work_register is
18852 not part of the return value. */
18853 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18854 reg = LAST_LO_REGNUM;
18856 if (! call_used_regs[reg])
18857 mask |= 1 << reg;
18860 /* The 504 below is 8 bytes less than 512 because there are two possible
18861 alignment words. We can't tell here if they will be present or not so we
18862 have to play it safe and assume that they are. */
18863 if ((CALLER_INTERWORKING_SLOT_SIZE +
18864 ROUND_UP_WORD (get_frame_size ()) +
18865 crtl->outgoing_args_size) >= 504)
18867 /* This is the same as the code in thumb1_expand_prologue() which
18868 determines which register to use for stack decrement. */
18869 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18870 if (mask & (1 << reg))
18871 break;
18873 if (reg > LAST_LO_REGNUM)
18875 /* Make sure we have a register available for stack decrement. */
18876 mask |= 1 << LAST_LO_REGNUM;
18880 return mask;
18884 /* Return the number of bytes required to save VFP registers. */
18885 static int
18886 arm_get_vfp_saved_size (void)
18888 unsigned int regno;
18889 int count;
18890 int saved;
18892 saved = 0;
18893 /* Space for saved VFP registers. */
18894 if (TARGET_HARD_FLOAT && TARGET_VFP)
18896 count = 0;
18897 for (regno = FIRST_VFP_REGNUM;
18898 regno < LAST_VFP_REGNUM;
18899 regno += 2)
18901 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18902 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18904 if (count > 0)
18906 /* Workaround ARM10 VFPr1 bug. */
18907 if (count == 2 && !arm_arch6)
18908 count++;
18909 saved += count * 8;
18911 count = 0;
18913 else
18914 count++;
18916 if (count > 0)
18918 if (count == 2 && !arm_arch6)
18919 count++;
18920 saved += count * 8;
18923 return saved;
18927 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18928 everything bar the final return instruction. If simple_return is true,
18929 then do not output epilogue, because it has already been emitted in RTL. */
18930 const char *
18931 output_return_instruction (rtx operand, bool really_return, bool reverse,
18932 bool simple_return)
18934 char conditional[10];
18935 char instr[100];
18936 unsigned reg;
18937 unsigned long live_regs_mask;
18938 unsigned long func_type;
18939 arm_stack_offsets *offsets;
18941 func_type = arm_current_func_type ();
18943 if (IS_NAKED (func_type))
18944 return "";
18946 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18948 /* If this function was declared non-returning, and we have
18949 found a tail call, then we have to trust that the called
18950 function won't return. */
18951 if (really_return)
18953 rtx ops[2];
18955 /* Otherwise, trap an attempted return by aborting. */
18956 ops[0] = operand;
18957 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18958 : "abort");
18959 assemble_external_libcall (ops[1]);
18960 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18963 return "";
18966 gcc_assert (!cfun->calls_alloca || really_return);
18968 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18970 cfun->machine->return_used_this_function = 1;
18972 offsets = arm_get_frame_offsets ();
18973 live_regs_mask = offsets->saved_regs_mask;
18975 if (!simple_return && live_regs_mask)
18977 const char * return_reg;
18979 /* If we do not have any special requirements for function exit
18980 (e.g. interworking) then we can load the return address
18981 directly into the PC. Otherwise we must load it into LR. */
18982 if (really_return
18983 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18984 return_reg = reg_names[PC_REGNUM];
18985 else
18986 return_reg = reg_names[LR_REGNUM];
18988 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18990 /* There are three possible reasons for the IP register
18991 being saved. 1) a stack frame was created, in which case
18992 IP contains the old stack pointer, or 2) an ISR routine
18993 corrupted it, or 3) it was saved to align the stack on
18994 iWMMXt. In case 1, restore IP into SP, otherwise just
18995 restore IP. */
18996 if (frame_pointer_needed)
18998 live_regs_mask &= ~ (1 << IP_REGNUM);
18999 live_regs_mask |= (1 << SP_REGNUM);
19001 else
19002 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19005 /* On some ARM architectures it is faster to use LDR rather than
19006 LDM to load a single register. On other architectures, the
19007 cost is the same. In 26 bit mode, or for exception handlers,
19008 we have to use LDM to load the PC so that the CPSR is also
19009 restored. */
19010 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19011 if (live_regs_mask == (1U << reg))
19012 break;
19014 if (reg <= LAST_ARM_REGNUM
19015 && (reg != LR_REGNUM
19016 || ! really_return
19017 || ! IS_INTERRUPT (func_type)))
19019 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19020 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19022 else
19024 char *p;
19025 int first = 1;
19027 /* Generate the load multiple instruction to restore the
19028 registers. Note we can get here, even if
19029 frame_pointer_needed is true, but only if sp already
19030 points to the base of the saved core registers. */
19031 if (live_regs_mask & (1 << SP_REGNUM))
19033 unsigned HOST_WIDE_INT stack_adjust;
19035 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19036 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19038 if (stack_adjust && arm_arch5 && TARGET_ARM)
19039 if (TARGET_UNIFIED_ASM)
19040 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19041 else
19042 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19043 else
19045 /* If we can't use ldmib (SA110 bug),
19046 then try to pop r3 instead. */
19047 if (stack_adjust)
19048 live_regs_mask |= 1 << 3;
19050 if (TARGET_UNIFIED_ASM)
19051 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19052 else
19053 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19056 else
19057 if (TARGET_UNIFIED_ASM)
19058 sprintf (instr, "pop%s\t{", conditional);
19059 else
19060 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19062 p = instr + strlen (instr);
19064 for (reg = 0; reg <= SP_REGNUM; reg++)
19065 if (live_regs_mask & (1 << reg))
19067 int l = strlen (reg_names[reg]);
19069 if (first)
19070 first = 0;
19071 else
19073 memcpy (p, ", ", 2);
19074 p += 2;
19077 memcpy (p, "%|", 2);
19078 memcpy (p + 2, reg_names[reg], l);
19079 p += l + 2;
19082 if (live_regs_mask & (1 << LR_REGNUM))
19084 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19085 /* If returning from an interrupt, restore the CPSR. */
19086 if (IS_INTERRUPT (func_type))
19087 strcat (p, "^");
19089 else
19090 strcpy (p, "}");
19093 output_asm_insn (instr, & operand);
19095 /* See if we need to generate an extra instruction to
19096 perform the actual function return. */
19097 if (really_return
19098 && func_type != ARM_FT_INTERWORKED
19099 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19101 /* The return has already been handled
19102 by loading the LR into the PC. */
19103 return "";
19107 if (really_return)
19109 switch ((int) ARM_FUNC_TYPE (func_type))
19111 case ARM_FT_ISR:
19112 case ARM_FT_FIQ:
19113 /* ??? This is wrong for unified assembly syntax. */
19114 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19115 break;
19117 case ARM_FT_INTERWORKED:
19118 sprintf (instr, "bx%s\t%%|lr", conditional);
19119 break;
19121 case ARM_FT_EXCEPTION:
19122 /* ??? This is wrong for unified assembly syntax. */
19123 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19124 break;
19126 default:
19127 /* Use bx if it's available. */
19128 if (arm_arch5 || arm_arch4t)
19129 sprintf (instr, "bx%s\t%%|lr", conditional);
19130 else
19131 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19132 break;
19135 output_asm_insn (instr, & operand);
19138 return "";
19141 /* Write the function name into the code section, directly preceding
19142 the function prologue.
19144 Code will be output similar to this:
19146 .ascii "arm_poke_function_name", 0
19147 .align
19149 .word 0xff000000 + (t1 - t0)
19150 arm_poke_function_name
19151 mov ip, sp
19152 stmfd sp!, {fp, ip, lr, pc}
19153 sub fp, ip, #4
19155 When performing a stack backtrace, code can inspect the value
19156 of 'pc' stored at 'fp' + 0. If the trace function then looks
19157 at location pc - 12 and the top 8 bits are set, then we know
19158 that there is a function name embedded immediately preceding this
19159 location and has length ((pc[-3]) & 0xff000000).
19161 We assume that pc is declared as a pointer to an unsigned long.
19163 It is of no benefit to output the function name if we are assembling
19164 a leaf function. These function types will not contain a stack
19165 backtrace structure, therefore it is not possible to determine the
19166 function name. */
19167 void
19168 arm_poke_function_name (FILE *stream, const char *name)
19170 unsigned long alignlength;
19171 unsigned long length;
19172 rtx x;
19174 length = strlen (name) + 1;
19175 alignlength = ROUND_UP_WORD (length);
19177 ASM_OUTPUT_ASCII (stream, name, length);
19178 ASM_OUTPUT_ALIGN (stream, 2);
19179 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19180 assemble_aligned_integer (UNITS_PER_WORD, x);
19183 /* Place some comments into the assembler stream
19184 describing the current function. */
19185 static void
19186 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19188 unsigned long func_type;
19190 /* ??? Do we want to print some of the below anyway? */
19191 if (TARGET_THUMB1)
19192 return;
19194 /* Sanity check. */
19195 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19197 func_type = arm_current_func_type ();
19199 switch ((int) ARM_FUNC_TYPE (func_type))
19201 default:
19202 case ARM_FT_NORMAL:
19203 break;
19204 case ARM_FT_INTERWORKED:
19205 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19206 break;
19207 case ARM_FT_ISR:
19208 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19209 break;
19210 case ARM_FT_FIQ:
19211 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19212 break;
19213 case ARM_FT_EXCEPTION:
19214 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19215 break;
19218 if (IS_NAKED (func_type))
19219 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19221 if (IS_VOLATILE (func_type))
19222 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19224 if (IS_NESTED (func_type))
19225 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19226 if (IS_STACKALIGN (func_type))
19227 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19229 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19230 crtl->args.size,
19231 crtl->args.pretend_args_size, frame_size);
19233 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19234 frame_pointer_needed,
19235 cfun->machine->uses_anonymous_args);
19237 if (cfun->machine->lr_save_eliminated)
19238 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19240 if (crtl->calls_eh_return)
19241 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19245 static void
19246 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19247 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19249 arm_stack_offsets *offsets;
19251 if (TARGET_THUMB1)
19253 int regno;
19255 /* Emit any call-via-reg trampolines that are needed for v4t support
19256 of call_reg and call_value_reg type insns. */
19257 for (regno = 0; regno < LR_REGNUM; regno++)
19259 rtx label = cfun->machine->call_via[regno];
19261 if (label != NULL)
19263 switch_to_section (function_section (current_function_decl));
19264 targetm.asm_out.internal_label (asm_out_file, "L",
19265 CODE_LABEL_NUMBER (label));
19266 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19270 /* ??? Probably not safe to set this here, since it assumes that a
19271 function will be emitted as assembly immediately after we generate
19272 RTL for it. This does not happen for inline functions. */
19273 cfun->machine->return_used_this_function = 0;
19275 else /* TARGET_32BIT */
19277 /* We need to take into account any stack-frame rounding. */
19278 offsets = arm_get_frame_offsets ();
19280 gcc_assert (!use_return_insn (FALSE, NULL)
19281 || (cfun->machine->return_used_this_function != 0)
19282 || offsets->saved_regs == offsets->outgoing_args
19283 || frame_pointer_needed);
19285 /* Reset the ARM-specific per-function variables. */
19286 after_arm_reorg = 0;
19290 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19291 STR and STRD. If an even number of registers are being pushed, one
19292 or more STRD patterns are created for each register pair. If an
19293 odd number of registers are pushed, emit an initial STR followed by
19294 as many STRD instructions as are needed. This works best when the
19295 stack is initially 64-bit aligned (the normal case), since it
19296 ensures that each STRD is also 64-bit aligned. */
19297 static void
19298 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19300 int num_regs = 0;
19301 int i;
19302 int regno;
19303 rtx par = NULL_RTX;
19304 rtx dwarf = NULL_RTX;
19305 rtx tmp;
19306 bool first = true;
19308 num_regs = bit_count (saved_regs_mask);
19310 /* Must be at least one register to save, and can't save SP or PC. */
19311 gcc_assert (num_regs > 0 && num_regs <= 14);
19312 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19313 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19315 /* Create sequence for DWARF info. All the frame-related data for
19316 debugging is held in this wrapper. */
19317 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19319 /* Describe the stack adjustment. */
19320 tmp = gen_rtx_SET (VOIDmode,
19321 stack_pointer_rtx,
19322 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19323 RTX_FRAME_RELATED_P (tmp) = 1;
19324 XVECEXP (dwarf, 0, 0) = tmp;
19326 /* Find the first register. */
19327 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19330 i = 0;
19332 /* If there's an odd number of registers to push. Start off by
19333 pushing a single register. This ensures that subsequent strd
19334 operations are dword aligned (assuming that SP was originally
19335 64-bit aligned). */
19336 if ((num_regs & 1) != 0)
19338 rtx reg, mem, insn;
19340 reg = gen_rtx_REG (SImode, regno);
19341 if (num_regs == 1)
19342 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19343 stack_pointer_rtx));
19344 else
19345 mem = gen_frame_mem (Pmode,
19346 gen_rtx_PRE_MODIFY
19347 (Pmode, stack_pointer_rtx,
19348 plus_constant (Pmode, stack_pointer_rtx,
19349 -4 * num_regs)));
19351 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19352 RTX_FRAME_RELATED_P (tmp) = 1;
19353 insn = emit_insn (tmp);
19354 RTX_FRAME_RELATED_P (insn) = 1;
19355 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19356 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19357 reg);
19358 RTX_FRAME_RELATED_P (tmp) = 1;
19359 i++;
19360 regno++;
19361 XVECEXP (dwarf, 0, i) = tmp;
19362 first = false;
19365 while (i < num_regs)
19366 if (saved_regs_mask & (1 << regno))
19368 rtx reg1, reg2, mem1, mem2;
19369 rtx tmp0, tmp1, tmp2;
19370 int regno2;
19372 /* Find the register to pair with this one. */
19373 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19374 regno2++)
19377 reg1 = gen_rtx_REG (SImode, regno);
19378 reg2 = gen_rtx_REG (SImode, regno2);
19380 if (first)
19382 rtx insn;
19384 first = false;
19385 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19386 stack_pointer_rtx,
19387 -4 * num_regs));
19388 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19389 stack_pointer_rtx,
19390 -4 * (num_regs - 1)));
19391 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19392 plus_constant (Pmode, stack_pointer_rtx,
19393 -4 * (num_regs)));
19394 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19395 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19396 RTX_FRAME_RELATED_P (tmp0) = 1;
19397 RTX_FRAME_RELATED_P (tmp1) = 1;
19398 RTX_FRAME_RELATED_P (tmp2) = 1;
19399 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19400 XVECEXP (par, 0, 0) = tmp0;
19401 XVECEXP (par, 0, 1) = tmp1;
19402 XVECEXP (par, 0, 2) = tmp2;
19403 insn = emit_insn (par);
19404 RTX_FRAME_RELATED_P (insn) = 1;
19405 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19407 else
19409 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19410 stack_pointer_rtx,
19411 4 * i));
19412 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19413 stack_pointer_rtx,
19414 4 * (i + 1)));
19415 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19416 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19417 RTX_FRAME_RELATED_P (tmp1) = 1;
19418 RTX_FRAME_RELATED_P (tmp2) = 1;
19419 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19420 XVECEXP (par, 0, 0) = tmp1;
19421 XVECEXP (par, 0, 1) = tmp2;
19422 emit_insn (par);
19425 /* Create unwind information. This is an approximation. */
19426 tmp1 = gen_rtx_SET (VOIDmode,
19427 gen_frame_mem (Pmode,
19428 plus_constant (Pmode,
19429 stack_pointer_rtx,
19430 4 * i)),
19431 reg1);
19432 tmp2 = gen_rtx_SET (VOIDmode,
19433 gen_frame_mem (Pmode,
19434 plus_constant (Pmode,
19435 stack_pointer_rtx,
19436 4 * (i + 1))),
19437 reg2);
19439 RTX_FRAME_RELATED_P (tmp1) = 1;
19440 RTX_FRAME_RELATED_P (tmp2) = 1;
19441 XVECEXP (dwarf, 0, i + 1) = tmp1;
19442 XVECEXP (dwarf, 0, i + 2) = tmp2;
19443 i += 2;
19444 regno = regno2 + 1;
19446 else
19447 regno++;
19449 return;
19452 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19453 whenever possible, otherwise it emits single-word stores. The first store
19454 also allocates stack space for all saved registers, using writeback with
19455 post-addressing mode. All other stores use offset addressing. If no STRD
19456 can be emitted, this function emits a sequence of single-word stores,
19457 and not an STM as before, because single-word stores provide more freedom
19458 scheduling and can be turned into an STM by peephole optimizations. */
19459 static void
19460 arm_emit_strd_push (unsigned long saved_regs_mask)
19462 int num_regs = 0;
19463 int i, j, dwarf_index = 0;
19464 int offset = 0;
19465 rtx dwarf = NULL_RTX;
19466 rtx insn = NULL_RTX;
19467 rtx tmp, mem;
19469 /* TODO: A more efficient code can be emitted by changing the
19470 layout, e.g., first push all pairs that can use STRD to keep the
19471 stack aligned, and then push all other registers. */
19472 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19473 if (saved_regs_mask & (1 << i))
19474 num_regs++;
19476 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19477 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19478 gcc_assert (num_regs > 0);
19480 /* Create sequence for DWARF info. */
19481 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19483 /* For dwarf info, we generate explicit stack update. */
19484 tmp = gen_rtx_SET (VOIDmode,
19485 stack_pointer_rtx,
19486 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19487 RTX_FRAME_RELATED_P (tmp) = 1;
19488 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19490 /* Save registers. */
19491 offset = - 4 * num_regs;
19492 j = 0;
19493 while (j <= LAST_ARM_REGNUM)
19494 if (saved_regs_mask & (1 << j))
19496 if ((j % 2 == 0)
19497 && (saved_regs_mask & (1 << (j + 1))))
19499 /* Current register and previous register form register pair for
19500 which STRD can be generated. */
19501 if (offset < 0)
19503 /* Allocate stack space for all saved registers. */
19504 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19505 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19506 mem = gen_frame_mem (DImode, tmp);
19507 offset = 0;
19509 else if (offset > 0)
19510 mem = gen_frame_mem (DImode,
19511 plus_constant (Pmode,
19512 stack_pointer_rtx,
19513 offset));
19514 else
19515 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19517 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19518 RTX_FRAME_RELATED_P (tmp) = 1;
19519 tmp = emit_insn (tmp);
19521 /* Record the first store insn. */
19522 if (dwarf_index == 1)
19523 insn = tmp;
19525 /* Generate dwarf info. */
19526 mem = gen_frame_mem (SImode,
19527 plus_constant (Pmode,
19528 stack_pointer_rtx,
19529 offset));
19530 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19531 RTX_FRAME_RELATED_P (tmp) = 1;
19532 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19534 mem = gen_frame_mem (SImode,
19535 plus_constant (Pmode,
19536 stack_pointer_rtx,
19537 offset + 4));
19538 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19539 RTX_FRAME_RELATED_P (tmp) = 1;
19540 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19542 offset += 8;
19543 j += 2;
19545 else
19547 /* Emit a single word store. */
19548 if (offset < 0)
19550 /* Allocate stack space for all saved registers. */
19551 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19552 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19553 mem = gen_frame_mem (SImode, tmp);
19554 offset = 0;
19556 else if (offset > 0)
19557 mem = gen_frame_mem (SImode,
19558 plus_constant (Pmode,
19559 stack_pointer_rtx,
19560 offset));
19561 else
19562 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19564 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19565 RTX_FRAME_RELATED_P (tmp) = 1;
19566 tmp = emit_insn (tmp);
19568 /* Record the first store insn. */
19569 if (dwarf_index == 1)
19570 insn = tmp;
19572 /* Generate dwarf info. */
19573 mem = gen_frame_mem (SImode,
19574 plus_constant(Pmode,
19575 stack_pointer_rtx,
19576 offset));
19577 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19578 RTX_FRAME_RELATED_P (tmp) = 1;
19579 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19581 offset += 4;
19582 j += 1;
19585 else
19586 j++;
19588 /* Attach dwarf info to the first insn we generate. */
19589 gcc_assert (insn != NULL_RTX);
19590 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19591 RTX_FRAME_RELATED_P (insn) = 1;
19594 /* Generate and emit an insn that we will recognize as a push_multi.
19595 Unfortunately, since this insn does not reflect very well the actual
19596 semantics of the operation, we need to annotate the insn for the benefit
19597 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19598 MASK for registers that should be annotated for DWARF2 frame unwind
19599 information. */
19600 static rtx
19601 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19603 int num_regs = 0;
19604 int num_dwarf_regs = 0;
19605 int i, j;
19606 rtx par;
19607 rtx dwarf;
19608 int dwarf_par_index;
19609 rtx tmp, reg;
19611 /* We don't record the PC in the dwarf frame information. */
19612 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19614 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19616 if (mask & (1 << i))
19617 num_regs++;
19618 if (dwarf_regs_mask & (1 << i))
19619 num_dwarf_regs++;
19622 gcc_assert (num_regs && num_regs <= 16);
19623 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19625 /* For the body of the insn we are going to generate an UNSPEC in
19626 parallel with several USEs. This allows the insn to be recognized
19627 by the push_multi pattern in the arm.md file.
19629 The body of the insn looks something like this:
19631 (parallel [
19632 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19633 (const_int:SI <num>)))
19634 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19635 (use (reg:SI XX))
19636 (use (reg:SI YY))
19640 For the frame note however, we try to be more explicit and actually
19641 show each register being stored into the stack frame, plus a (single)
19642 decrement of the stack pointer. We do it this way in order to be
19643 friendly to the stack unwinding code, which only wants to see a single
19644 stack decrement per instruction. The RTL we generate for the note looks
19645 something like this:
19647 (sequence [
19648 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19649 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19650 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19651 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19655 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19656 instead we'd have a parallel expression detailing all
19657 the stores to the various memory addresses so that debug
19658 information is more up-to-date. Remember however while writing
19659 this to take care of the constraints with the push instruction.
19661 Note also that this has to be taken care of for the VFP registers.
19663 For more see PR43399. */
19665 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19666 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19667 dwarf_par_index = 1;
19669 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19671 if (mask & (1 << i))
19673 reg = gen_rtx_REG (SImode, i);
19675 XVECEXP (par, 0, 0)
19676 = gen_rtx_SET (VOIDmode,
19677 gen_frame_mem
19678 (BLKmode,
19679 gen_rtx_PRE_MODIFY (Pmode,
19680 stack_pointer_rtx,
19681 plus_constant
19682 (Pmode, stack_pointer_rtx,
19683 -4 * num_regs))
19685 gen_rtx_UNSPEC (BLKmode,
19686 gen_rtvec (1, reg),
19687 UNSPEC_PUSH_MULT));
19689 if (dwarf_regs_mask & (1 << i))
19691 tmp = gen_rtx_SET (VOIDmode,
19692 gen_frame_mem (SImode, stack_pointer_rtx),
19693 reg);
19694 RTX_FRAME_RELATED_P (tmp) = 1;
19695 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19698 break;
19702 for (j = 1, i++; j < num_regs; i++)
19704 if (mask & (1 << i))
19706 reg = gen_rtx_REG (SImode, i);
19708 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19710 if (dwarf_regs_mask & (1 << i))
19713 = gen_rtx_SET (VOIDmode,
19714 gen_frame_mem
19715 (SImode,
19716 plus_constant (Pmode, stack_pointer_rtx,
19717 4 * j)),
19718 reg);
19719 RTX_FRAME_RELATED_P (tmp) = 1;
19720 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19723 j++;
19727 par = emit_insn (par);
19729 tmp = gen_rtx_SET (VOIDmode,
19730 stack_pointer_rtx,
19731 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19732 RTX_FRAME_RELATED_P (tmp) = 1;
19733 XVECEXP (dwarf, 0, 0) = tmp;
19735 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19737 return par;
19740 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19741 SIZE is the offset to be adjusted.
19742 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19743 static void
19744 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19746 rtx dwarf;
19748 RTX_FRAME_RELATED_P (insn) = 1;
19749 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19750 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19753 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19754 SAVED_REGS_MASK shows which registers need to be restored.
19756 Unfortunately, since this insn does not reflect very well the actual
19757 semantics of the operation, we need to annotate the insn for the benefit
19758 of DWARF2 frame unwind information. */
19759 static void
19760 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19762 int num_regs = 0;
19763 int i, j;
19764 rtx par;
19765 rtx dwarf = NULL_RTX;
19766 rtx tmp, reg;
19767 bool return_in_pc;
19768 int offset_adj;
19769 int emit_update;
19771 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19772 offset_adj = return_in_pc ? 1 : 0;
19773 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19774 if (saved_regs_mask & (1 << i))
19775 num_regs++;
19777 gcc_assert (num_regs && num_regs <= 16);
19779 /* If SP is in reglist, then we don't emit SP update insn. */
19780 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19782 /* The parallel needs to hold num_regs SETs
19783 and one SET for the stack update. */
19784 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19786 if (return_in_pc)
19788 tmp = ret_rtx;
19789 XVECEXP (par, 0, 0) = tmp;
19792 if (emit_update)
19794 /* Increment the stack pointer, based on there being
19795 num_regs 4-byte registers to restore. */
19796 tmp = gen_rtx_SET (VOIDmode,
19797 stack_pointer_rtx,
19798 plus_constant (Pmode,
19799 stack_pointer_rtx,
19800 4 * num_regs));
19801 RTX_FRAME_RELATED_P (tmp) = 1;
19802 XVECEXP (par, 0, offset_adj) = tmp;
19805 /* Now restore every reg, which may include PC. */
19806 for (j = 0, i = 0; j < num_regs; i++)
19807 if (saved_regs_mask & (1 << i))
19809 reg = gen_rtx_REG (SImode, i);
19810 if ((num_regs == 1) && emit_update && !return_in_pc)
19812 /* Emit single load with writeback. */
19813 tmp = gen_frame_mem (SImode,
19814 gen_rtx_POST_INC (Pmode,
19815 stack_pointer_rtx));
19816 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19817 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19818 return;
19821 tmp = gen_rtx_SET (VOIDmode,
19822 reg,
19823 gen_frame_mem
19824 (SImode,
19825 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19826 RTX_FRAME_RELATED_P (tmp) = 1;
19827 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19829 /* We need to maintain a sequence for DWARF info too. As dwarf info
19830 should not have PC, skip PC. */
19831 if (i != PC_REGNUM)
19832 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19834 j++;
19837 if (return_in_pc)
19838 par = emit_jump_insn (par);
19839 else
19840 par = emit_insn (par);
19842 REG_NOTES (par) = dwarf;
19843 if (!return_in_pc)
19844 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19845 stack_pointer_rtx, stack_pointer_rtx);
19848 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19849 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19851 Unfortunately, since this insn does not reflect very well the actual
19852 semantics of the operation, we need to annotate the insn for the benefit
19853 of DWARF2 frame unwind information. */
19854 static void
19855 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19857 int i, j;
19858 rtx par;
19859 rtx dwarf = NULL_RTX;
19860 rtx tmp, reg;
19862 gcc_assert (num_regs && num_regs <= 32);
19864 /* Workaround ARM10 VFPr1 bug. */
19865 if (num_regs == 2 && !arm_arch6)
19867 if (first_reg == 15)
19868 first_reg--;
19870 num_regs++;
19873 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19874 there could be up to 32 D-registers to restore.
19875 If there are more than 16 D-registers, make two recursive calls,
19876 each of which emits one pop_multi instruction. */
19877 if (num_regs > 16)
19879 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19880 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19881 return;
19884 /* The parallel needs to hold num_regs SETs
19885 and one SET for the stack update. */
19886 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19888 /* Increment the stack pointer, based on there being
19889 num_regs 8-byte registers to restore. */
19890 tmp = gen_rtx_SET (VOIDmode,
19891 base_reg,
19892 plus_constant (Pmode, base_reg, 8 * num_regs));
19893 RTX_FRAME_RELATED_P (tmp) = 1;
19894 XVECEXP (par, 0, 0) = tmp;
19896 /* Now show every reg that will be restored, using a SET for each. */
19897 for (j = 0, i=first_reg; j < num_regs; i += 2)
19899 reg = gen_rtx_REG (DFmode, i);
19901 tmp = gen_rtx_SET (VOIDmode,
19902 reg,
19903 gen_frame_mem
19904 (DFmode,
19905 plus_constant (Pmode, base_reg, 8 * j)));
19906 RTX_FRAME_RELATED_P (tmp) = 1;
19907 XVECEXP (par, 0, j + 1) = tmp;
19909 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19911 j++;
19914 par = emit_insn (par);
19915 REG_NOTES (par) = dwarf;
19917 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19918 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
19920 RTX_FRAME_RELATED_P (par) = 1;
19921 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
19923 else
19924 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19925 base_reg, base_reg);
19928 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19929 number of registers are being popped, multiple LDRD patterns are created for
19930 all register pairs. If odd number of registers are popped, last register is
19931 loaded by using LDR pattern. */
19932 static void
19933 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19935 int num_regs = 0;
19936 int i, j;
19937 rtx par = NULL_RTX;
19938 rtx dwarf = NULL_RTX;
19939 rtx tmp, reg, tmp1;
19940 bool return_in_pc;
19942 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19943 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19944 if (saved_regs_mask & (1 << i))
19945 num_regs++;
19947 gcc_assert (num_regs && num_regs <= 16);
19949 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19950 to be popped. So, if num_regs is even, now it will become odd,
19951 and we can generate pop with PC. If num_regs is odd, it will be
19952 even now, and ldr with return can be generated for PC. */
19953 if (return_in_pc)
19954 num_regs--;
19956 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19958 /* Var j iterates over all the registers to gather all the registers in
19959 saved_regs_mask. Var i gives index of saved registers in stack frame.
19960 A PARALLEL RTX of register-pair is created here, so that pattern for
19961 LDRD can be matched. As PC is always last register to be popped, and
19962 we have already decremented num_regs if PC, we don't have to worry
19963 about PC in this loop. */
19964 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19965 if (saved_regs_mask & (1 << j))
19967 /* Create RTX for memory load. */
19968 reg = gen_rtx_REG (SImode, j);
19969 tmp = gen_rtx_SET (SImode,
19970 reg,
19971 gen_frame_mem (SImode,
19972 plus_constant (Pmode,
19973 stack_pointer_rtx, 4 * i)));
19974 RTX_FRAME_RELATED_P (tmp) = 1;
19976 if (i % 2 == 0)
19978 /* When saved-register index (i) is even, the RTX to be emitted is
19979 yet to be created. Hence create it first. The LDRD pattern we
19980 are generating is :
19981 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19982 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19983 where target registers need not be consecutive. */
19984 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19985 dwarf = NULL_RTX;
19988 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19989 added as 0th element and if i is odd, reg_i is added as 1st element
19990 of LDRD pattern shown above. */
19991 XVECEXP (par, 0, (i % 2)) = tmp;
19992 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19994 if ((i % 2) == 1)
19996 /* When saved-register index (i) is odd, RTXs for both the registers
19997 to be loaded are generated in above given LDRD pattern, and the
19998 pattern can be emitted now. */
19999 par = emit_insn (par);
20000 REG_NOTES (par) = dwarf;
20001 RTX_FRAME_RELATED_P (par) = 1;
20004 i++;
20007 /* If the number of registers pushed is odd AND return_in_pc is false OR
20008 number of registers are even AND return_in_pc is true, last register is
20009 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20010 then LDR with post increment. */
20012 /* Increment the stack pointer, based on there being
20013 num_regs 4-byte registers to restore. */
20014 tmp = gen_rtx_SET (VOIDmode,
20015 stack_pointer_rtx,
20016 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20017 RTX_FRAME_RELATED_P (tmp) = 1;
20018 tmp = emit_insn (tmp);
20019 if (!return_in_pc)
20021 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20022 stack_pointer_rtx, stack_pointer_rtx);
20025 dwarf = NULL_RTX;
20027 if (((num_regs % 2) == 1 && !return_in_pc)
20028 || ((num_regs % 2) == 0 && return_in_pc))
20030 /* Scan for the single register to be popped. Skip until the saved
20031 register is found. */
20032 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20034 /* Gen LDR with post increment here. */
20035 tmp1 = gen_rtx_MEM (SImode,
20036 gen_rtx_POST_INC (SImode,
20037 stack_pointer_rtx));
20038 set_mem_alias_set (tmp1, get_frame_alias_set ());
20040 reg = gen_rtx_REG (SImode, j);
20041 tmp = gen_rtx_SET (SImode, reg, tmp1);
20042 RTX_FRAME_RELATED_P (tmp) = 1;
20043 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20045 if (return_in_pc)
20047 /* If return_in_pc, j must be PC_REGNUM. */
20048 gcc_assert (j == PC_REGNUM);
20049 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20050 XVECEXP (par, 0, 0) = ret_rtx;
20051 XVECEXP (par, 0, 1) = tmp;
20052 par = emit_jump_insn (par);
20054 else
20056 par = emit_insn (tmp);
20057 REG_NOTES (par) = dwarf;
20058 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20059 stack_pointer_rtx, stack_pointer_rtx);
20063 else if ((num_regs % 2) == 1 && return_in_pc)
20065 /* There are 2 registers to be popped. So, generate the pattern
20066 pop_multiple_with_stack_update_and_return to pop in PC. */
20067 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20070 return;
20073 /* LDRD in ARM mode needs consecutive registers as operands. This function
20074 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20075 offset addressing and then generates one separate stack udpate. This provides
20076 more scheduling freedom, compared to writeback on every load. However,
20077 if the function returns using load into PC directly
20078 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20079 before the last load. TODO: Add a peephole optimization to recognize
20080 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20081 peephole optimization to merge the load at stack-offset zero
20082 with the stack update instruction using load with writeback
20083 in post-index addressing mode. */
20084 static void
20085 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20087 int j = 0;
20088 int offset = 0;
20089 rtx par = NULL_RTX;
20090 rtx dwarf = NULL_RTX;
20091 rtx tmp, mem;
20093 /* Restore saved registers. */
20094 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20095 j = 0;
20096 while (j <= LAST_ARM_REGNUM)
20097 if (saved_regs_mask & (1 << j))
20099 if ((j % 2) == 0
20100 && (saved_regs_mask & (1 << (j + 1)))
20101 && (j + 1) != PC_REGNUM)
20103 /* Current register and next register form register pair for which
20104 LDRD can be generated. PC is always the last register popped, and
20105 we handle it separately. */
20106 if (offset > 0)
20107 mem = gen_frame_mem (DImode,
20108 plus_constant (Pmode,
20109 stack_pointer_rtx,
20110 offset));
20111 else
20112 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20114 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20115 tmp = emit_insn (tmp);
20116 RTX_FRAME_RELATED_P (tmp) = 1;
20118 /* Generate dwarf info. */
20120 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20121 gen_rtx_REG (SImode, j),
20122 NULL_RTX);
20123 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20124 gen_rtx_REG (SImode, j + 1),
20125 dwarf);
20127 REG_NOTES (tmp) = dwarf;
20129 offset += 8;
20130 j += 2;
20132 else if (j != PC_REGNUM)
20134 /* Emit a single word load. */
20135 if (offset > 0)
20136 mem = gen_frame_mem (SImode,
20137 plus_constant (Pmode,
20138 stack_pointer_rtx,
20139 offset));
20140 else
20141 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20143 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20144 tmp = emit_insn (tmp);
20145 RTX_FRAME_RELATED_P (tmp) = 1;
20147 /* Generate dwarf info. */
20148 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20149 gen_rtx_REG (SImode, j),
20150 NULL_RTX);
20152 offset += 4;
20153 j += 1;
20155 else /* j == PC_REGNUM */
20156 j++;
20158 else
20159 j++;
20161 /* Update the stack. */
20162 if (offset > 0)
20164 tmp = gen_rtx_SET (Pmode,
20165 stack_pointer_rtx,
20166 plus_constant (Pmode,
20167 stack_pointer_rtx,
20168 offset));
20169 tmp = emit_insn (tmp);
20170 arm_add_cfa_adjust_cfa_note (tmp, offset,
20171 stack_pointer_rtx, stack_pointer_rtx);
20172 offset = 0;
20175 if (saved_regs_mask & (1 << PC_REGNUM))
20177 /* Only PC is to be popped. */
20178 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20179 XVECEXP (par, 0, 0) = ret_rtx;
20180 tmp = gen_rtx_SET (SImode,
20181 gen_rtx_REG (SImode, PC_REGNUM),
20182 gen_frame_mem (SImode,
20183 gen_rtx_POST_INC (SImode,
20184 stack_pointer_rtx)));
20185 RTX_FRAME_RELATED_P (tmp) = 1;
20186 XVECEXP (par, 0, 1) = tmp;
20187 par = emit_jump_insn (par);
20189 /* Generate dwarf info. */
20190 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20191 gen_rtx_REG (SImode, PC_REGNUM),
20192 NULL_RTX);
20193 REG_NOTES (par) = dwarf;
20194 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20195 stack_pointer_rtx, stack_pointer_rtx);
20199 /* Calculate the size of the return value that is passed in registers. */
20200 static unsigned
20201 arm_size_return_regs (void)
20203 enum machine_mode mode;
20205 if (crtl->return_rtx != 0)
20206 mode = GET_MODE (crtl->return_rtx);
20207 else
20208 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20210 return GET_MODE_SIZE (mode);
20213 /* Return true if the current function needs to save/restore LR. */
20214 static bool
20215 thumb_force_lr_save (void)
20217 return !cfun->machine->lr_save_eliminated
20218 && (!leaf_function_p ()
20219 || thumb_far_jump_used_p ()
20220 || df_regs_ever_live_p (LR_REGNUM));
20223 /* We do not know if r3 will be available because
20224 we do have an indirect tailcall happening in this
20225 particular case. */
20226 static bool
20227 is_indirect_tailcall_p (rtx call)
20229 rtx pat = PATTERN (call);
20231 /* Indirect tail call. */
20232 pat = XVECEXP (pat, 0, 0);
20233 if (GET_CODE (pat) == SET)
20234 pat = SET_SRC (pat);
20236 pat = XEXP (XEXP (pat, 0), 0);
20237 return REG_P (pat);
20240 /* Return true if r3 is used by any of the tail call insns in the
20241 current function. */
20242 static bool
20243 any_sibcall_could_use_r3 (void)
20245 edge_iterator ei;
20246 edge e;
20248 if (!crtl->tail_call_emit)
20249 return false;
20250 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20251 if (e->flags & EDGE_SIBCALL)
20253 rtx call = BB_END (e->src);
20254 if (!CALL_P (call))
20255 call = prev_nonnote_nondebug_insn (call);
20256 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20257 if (find_regno_fusage (call, USE, 3)
20258 || is_indirect_tailcall_p (call))
20259 return true;
20261 return false;
20265 /* Compute the distance from register FROM to register TO.
20266 These can be the arg pointer (26), the soft frame pointer (25),
20267 the stack pointer (13) or the hard frame pointer (11).
20268 In thumb mode r7 is used as the soft frame pointer, if needed.
20269 Typical stack layout looks like this:
20271 old stack pointer -> | |
20272 ----
20273 | | \
20274 | | saved arguments for
20275 | | vararg functions
20276 | | /
20278 hard FP & arg pointer -> | | \
20279 | | stack
20280 | | frame
20281 | | /
20283 | | \
20284 | | call saved
20285 | | registers
20286 soft frame pointer -> | | /
20288 | | \
20289 | | local
20290 | | variables
20291 locals base pointer -> | | /
20293 | | \
20294 | | outgoing
20295 | | arguments
20296 current stack pointer -> | | /
20299 For a given function some or all of these stack components
20300 may not be needed, giving rise to the possibility of
20301 eliminating some of the registers.
20303 The values returned by this function must reflect the behavior
20304 of arm_expand_prologue() and arm_compute_save_reg_mask().
20306 The sign of the number returned reflects the direction of stack
20307 growth, so the values are positive for all eliminations except
20308 from the soft frame pointer to the hard frame pointer.
20310 SFP may point just inside the local variables block to ensure correct
20311 alignment. */
20314 /* Calculate stack offsets. These are used to calculate register elimination
20315 offsets and in prologue/epilogue code. Also calculates which registers
20316 should be saved. */
20318 static arm_stack_offsets *
20319 arm_get_frame_offsets (void)
20321 struct arm_stack_offsets *offsets;
20322 unsigned long func_type;
20323 int leaf;
20324 int saved;
20325 int core_saved;
20326 HOST_WIDE_INT frame_size;
20327 int i;
20329 offsets = &cfun->machine->stack_offsets;
20331 /* We need to know if we are a leaf function. Unfortunately, it
20332 is possible to be called after start_sequence has been called,
20333 which causes get_insns to return the insns for the sequence,
20334 not the function, which will cause leaf_function_p to return
20335 the incorrect result.
20337 to know about leaf functions once reload has completed, and the
20338 frame size cannot be changed after that time, so we can safely
20339 use the cached value. */
20341 if (reload_completed)
20342 return offsets;
20344 /* Initially this is the size of the local variables. It will translated
20345 into an offset once we have determined the size of preceding data. */
20346 frame_size = ROUND_UP_WORD (get_frame_size ());
20348 leaf = leaf_function_p ();
20350 /* Space for variadic functions. */
20351 offsets->saved_args = crtl->args.pretend_args_size;
20353 /* In Thumb mode this is incorrect, but never used. */
20354 offsets->frame
20355 = (offsets->saved_args
20356 + arm_compute_static_chain_stack_bytes ()
20357 + (frame_pointer_needed ? 4 : 0));
20359 if (TARGET_32BIT)
20361 unsigned int regno;
20363 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20364 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20365 saved = core_saved;
20367 /* We know that SP will be doubleword aligned on entry, and we must
20368 preserve that condition at any subroutine call. We also require the
20369 soft frame pointer to be doubleword aligned. */
20371 if (TARGET_REALLY_IWMMXT)
20373 /* Check for the call-saved iWMMXt registers. */
20374 for (regno = FIRST_IWMMXT_REGNUM;
20375 regno <= LAST_IWMMXT_REGNUM;
20376 regno++)
20377 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20378 saved += 8;
20381 func_type = arm_current_func_type ();
20382 /* Space for saved VFP registers. */
20383 if (! IS_VOLATILE (func_type)
20384 && TARGET_HARD_FLOAT && TARGET_VFP)
20385 saved += arm_get_vfp_saved_size ();
20387 else /* TARGET_THUMB1 */
20389 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20390 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20391 saved = core_saved;
20392 if (TARGET_BACKTRACE)
20393 saved += 16;
20396 /* Saved registers include the stack frame. */
20397 offsets->saved_regs
20398 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20399 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20401 /* A leaf function does not need any stack alignment if it has nothing
20402 on the stack. */
20403 if (leaf && frame_size == 0
20404 /* However if it calls alloca(), we have a dynamically allocated
20405 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20406 && ! cfun->calls_alloca)
20408 offsets->outgoing_args = offsets->soft_frame;
20409 offsets->locals_base = offsets->soft_frame;
20410 return offsets;
20413 /* Ensure SFP has the correct alignment. */
20414 if (ARM_DOUBLEWORD_ALIGN
20415 && (offsets->soft_frame & 7))
20417 offsets->soft_frame += 4;
20418 /* Try to align stack by pushing an extra reg. Don't bother doing this
20419 when there is a stack frame as the alignment will be rolled into
20420 the normal stack adjustment. */
20421 if (frame_size + crtl->outgoing_args_size == 0)
20423 int reg = -1;
20425 /* If it is safe to use r3, then do so. This sometimes
20426 generates better code on Thumb-2 by avoiding the need to
20427 use 32-bit push/pop instructions. */
20428 if (! any_sibcall_could_use_r3 ()
20429 && arm_size_return_regs () <= 12
20430 && (offsets->saved_regs_mask & (1 << 3)) == 0
20431 && (TARGET_THUMB2
20432 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20434 reg = 3;
20436 else
20437 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20439 /* Avoid fixed registers; they may be changed at
20440 arbitrary times so it's unsafe to restore them
20441 during the epilogue. */
20442 if (!fixed_regs[i]
20443 && (offsets->saved_regs_mask & (1 << i)) == 0)
20445 reg = i;
20446 break;
20450 if (reg != -1)
20452 offsets->saved_regs += 4;
20453 offsets->saved_regs_mask |= (1 << reg);
20458 offsets->locals_base = offsets->soft_frame + frame_size;
20459 offsets->outgoing_args = (offsets->locals_base
20460 + crtl->outgoing_args_size);
20462 if (ARM_DOUBLEWORD_ALIGN)
20464 /* Ensure SP remains doubleword aligned. */
20465 if (offsets->outgoing_args & 7)
20466 offsets->outgoing_args += 4;
20467 gcc_assert (!(offsets->outgoing_args & 7));
20470 return offsets;
20474 /* Calculate the relative offsets for the different stack pointers. Positive
20475 offsets are in the direction of stack growth. */
20477 HOST_WIDE_INT
20478 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20480 arm_stack_offsets *offsets;
20482 offsets = arm_get_frame_offsets ();
20484 /* OK, now we have enough information to compute the distances.
20485 There must be an entry in these switch tables for each pair
20486 of registers in ELIMINABLE_REGS, even if some of the entries
20487 seem to be redundant or useless. */
20488 switch (from)
20490 case ARG_POINTER_REGNUM:
20491 switch (to)
20493 case THUMB_HARD_FRAME_POINTER_REGNUM:
20494 return 0;
20496 case FRAME_POINTER_REGNUM:
20497 /* This is the reverse of the soft frame pointer
20498 to hard frame pointer elimination below. */
20499 return offsets->soft_frame - offsets->saved_args;
20501 case ARM_HARD_FRAME_POINTER_REGNUM:
20502 /* This is only non-zero in the case where the static chain register
20503 is stored above the frame. */
20504 return offsets->frame - offsets->saved_args - 4;
20506 case STACK_POINTER_REGNUM:
20507 /* If nothing has been pushed on the stack at all
20508 then this will return -4. This *is* correct! */
20509 return offsets->outgoing_args - (offsets->saved_args + 4);
20511 default:
20512 gcc_unreachable ();
20514 gcc_unreachable ();
20516 case FRAME_POINTER_REGNUM:
20517 switch (to)
20519 case THUMB_HARD_FRAME_POINTER_REGNUM:
20520 return 0;
20522 case ARM_HARD_FRAME_POINTER_REGNUM:
20523 /* The hard frame pointer points to the top entry in the
20524 stack frame. The soft frame pointer to the bottom entry
20525 in the stack frame. If there is no stack frame at all,
20526 then they are identical. */
20528 return offsets->frame - offsets->soft_frame;
20530 case STACK_POINTER_REGNUM:
20531 return offsets->outgoing_args - offsets->soft_frame;
20533 default:
20534 gcc_unreachable ();
20536 gcc_unreachable ();
20538 default:
20539 /* You cannot eliminate from the stack pointer.
20540 In theory you could eliminate from the hard frame
20541 pointer to the stack pointer, but this will never
20542 happen, since if a stack frame is not needed the
20543 hard frame pointer will never be used. */
20544 gcc_unreachable ();
20548 /* Given FROM and TO register numbers, say whether this elimination is
20549 allowed. Frame pointer elimination is automatically handled.
20551 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20552 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20553 pointer, we must eliminate FRAME_POINTER_REGNUM into
20554 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20555 ARG_POINTER_REGNUM. */
20557 bool
20558 arm_can_eliminate (const int from, const int to)
20560 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20561 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20562 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20563 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20564 true);
20567 /* Emit RTL to save coprocessor registers on function entry. Returns the
20568 number of bytes pushed. */
20570 static int
20571 arm_save_coproc_regs(void)
20573 int saved_size = 0;
20574 unsigned reg;
20575 unsigned start_reg;
20576 rtx insn;
20578 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20579 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20581 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20582 insn = gen_rtx_MEM (V2SImode, insn);
20583 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20584 RTX_FRAME_RELATED_P (insn) = 1;
20585 saved_size += 8;
20588 if (TARGET_HARD_FLOAT && TARGET_VFP)
20590 start_reg = FIRST_VFP_REGNUM;
20592 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20594 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20595 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20597 if (start_reg != reg)
20598 saved_size += vfp_emit_fstmd (start_reg,
20599 (reg - start_reg) / 2);
20600 start_reg = reg + 2;
20603 if (start_reg != reg)
20604 saved_size += vfp_emit_fstmd (start_reg,
20605 (reg - start_reg) / 2);
20607 return saved_size;
20611 /* Set the Thumb frame pointer from the stack pointer. */
20613 static void
20614 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20616 HOST_WIDE_INT amount;
20617 rtx insn, dwarf;
20619 amount = offsets->outgoing_args - offsets->locals_base;
20620 if (amount < 1024)
20621 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20622 stack_pointer_rtx, GEN_INT (amount)));
20623 else
20625 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20626 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20627 expects the first two operands to be the same. */
20628 if (TARGET_THUMB2)
20630 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20631 stack_pointer_rtx,
20632 hard_frame_pointer_rtx));
20634 else
20636 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20637 hard_frame_pointer_rtx,
20638 stack_pointer_rtx));
20640 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20641 plus_constant (Pmode, stack_pointer_rtx, amount));
20642 RTX_FRAME_RELATED_P (dwarf) = 1;
20643 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20646 RTX_FRAME_RELATED_P (insn) = 1;
20649 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20650 function. */
20651 void
20652 arm_expand_prologue (void)
20654 rtx amount;
20655 rtx insn;
20656 rtx ip_rtx;
20657 unsigned long live_regs_mask;
20658 unsigned long func_type;
20659 int fp_offset = 0;
20660 int saved_pretend_args = 0;
20661 int saved_regs = 0;
20662 unsigned HOST_WIDE_INT args_to_push;
20663 arm_stack_offsets *offsets;
20665 func_type = arm_current_func_type ();
20667 /* Naked functions don't have prologues. */
20668 if (IS_NAKED (func_type))
20669 return;
20671 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20672 args_to_push = crtl->args.pretend_args_size;
20674 /* Compute which register we will have to save onto the stack. */
20675 offsets = arm_get_frame_offsets ();
20676 live_regs_mask = offsets->saved_regs_mask;
20678 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20680 if (IS_STACKALIGN (func_type))
20682 rtx r0, r1;
20684 /* Handle a word-aligned stack pointer. We generate the following:
20686 mov r0, sp
20687 bic r1, r0, #7
20688 mov sp, r1
20689 <save and restore r0 in normal prologue/epilogue>
20690 mov sp, r0
20691 bx lr
20693 The unwinder doesn't need to know about the stack realignment.
20694 Just tell it we saved SP in r0. */
20695 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20697 r0 = gen_rtx_REG (SImode, 0);
20698 r1 = gen_rtx_REG (SImode, 1);
20700 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20701 RTX_FRAME_RELATED_P (insn) = 1;
20702 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20704 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20706 /* ??? The CFA changes here, which may cause GDB to conclude that it
20707 has entered a different function. That said, the unwind info is
20708 correct, individually, before and after this instruction because
20709 we've described the save of SP, which will override the default
20710 handling of SP as restoring from the CFA. */
20711 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20714 /* For APCS frames, if IP register is clobbered
20715 when creating frame, save that register in a special
20716 way. */
20717 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20719 if (IS_INTERRUPT (func_type))
20721 /* Interrupt functions must not corrupt any registers.
20722 Creating a frame pointer however, corrupts the IP
20723 register, so we must push it first. */
20724 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20726 /* Do not set RTX_FRAME_RELATED_P on this insn.
20727 The dwarf stack unwinding code only wants to see one
20728 stack decrement per function, and this is not it. If
20729 this instruction is labeled as being part of the frame
20730 creation sequence then dwarf2out_frame_debug_expr will
20731 die when it encounters the assignment of IP to FP
20732 later on, since the use of SP here establishes SP as
20733 the CFA register and not IP.
20735 Anyway this instruction is not really part of the stack
20736 frame creation although it is part of the prologue. */
20738 else if (IS_NESTED (func_type))
20740 /* The static chain register is the same as the IP register
20741 used as a scratch register during stack frame creation.
20742 To get around this need to find somewhere to store IP
20743 whilst the frame is being created. We try the following
20744 places in order:
20746 1. The last argument register r3 if it is available.
20747 2. A slot on the stack above the frame if there are no
20748 arguments to push onto the stack.
20749 3. Register r3 again, after pushing the argument registers
20750 onto the stack, if this is a varargs function.
20751 4. The last slot on the stack created for the arguments to
20752 push, if this isn't a varargs function.
20754 Note - we only need to tell the dwarf2 backend about the SP
20755 adjustment in the second variant; the static chain register
20756 doesn't need to be unwound, as it doesn't contain a value
20757 inherited from the caller. */
20759 if (!arm_r3_live_at_start_p ())
20760 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20761 else if (args_to_push == 0)
20763 rtx addr, dwarf;
20765 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20766 saved_regs += 4;
20768 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20769 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20770 fp_offset = 4;
20772 /* Just tell the dwarf backend that we adjusted SP. */
20773 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20774 plus_constant (Pmode, stack_pointer_rtx,
20775 -fp_offset));
20776 RTX_FRAME_RELATED_P (insn) = 1;
20777 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20779 else
20781 /* Store the args on the stack. */
20782 if (cfun->machine->uses_anonymous_args)
20784 insn
20785 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
20786 (0xf0 >> (args_to_push / 4)) & 0xf);
20787 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20788 saved_pretend_args = 1;
20790 else
20792 rtx addr, dwarf;
20794 if (args_to_push == 4)
20795 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20796 else
20797 addr
20798 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
20799 plus_constant (Pmode,
20800 stack_pointer_rtx,
20801 -args_to_push));
20803 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20805 /* Just tell the dwarf backend that we adjusted SP. */
20806 dwarf
20807 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20808 plus_constant (Pmode, stack_pointer_rtx,
20809 -args_to_push));
20810 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20813 RTX_FRAME_RELATED_P (insn) = 1;
20814 fp_offset = args_to_push;
20815 args_to_push = 0;
20819 insn = emit_set_insn (ip_rtx,
20820 plus_constant (Pmode, stack_pointer_rtx,
20821 fp_offset));
20822 RTX_FRAME_RELATED_P (insn) = 1;
20825 if (args_to_push)
20827 /* Push the argument registers, or reserve space for them. */
20828 if (cfun->machine->uses_anonymous_args)
20829 insn = emit_multi_reg_push
20830 ((0xf0 >> (args_to_push / 4)) & 0xf,
20831 (0xf0 >> (args_to_push / 4)) & 0xf);
20832 else
20833 insn = emit_insn
20834 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20835 GEN_INT (- args_to_push)));
20836 RTX_FRAME_RELATED_P (insn) = 1;
20839 /* If this is an interrupt service routine, and the link register
20840 is going to be pushed, and we're not generating extra
20841 push of IP (needed when frame is needed and frame layout if apcs),
20842 subtracting four from LR now will mean that the function return
20843 can be done with a single instruction. */
20844 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20845 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20846 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20847 && TARGET_ARM)
20849 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20851 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20854 if (live_regs_mask)
20856 unsigned long dwarf_regs_mask = live_regs_mask;
20858 saved_regs += bit_count (live_regs_mask) * 4;
20859 if (optimize_size && !frame_pointer_needed
20860 && saved_regs == offsets->saved_regs - offsets->saved_args)
20862 /* If no coprocessor registers are being pushed and we don't have
20863 to worry about a frame pointer then push extra registers to
20864 create the stack frame. This is done is a way that does not
20865 alter the frame layout, so is independent of the epilogue. */
20866 int n;
20867 int frame;
20868 n = 0;
20869 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20870 n++;
20871 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20872 if (frame && n * 4 >= frame)
20874 n = frame / 4;
20875 live_regs_mask |= (1 << n) - 1;
20876 saved_regs += frame;
20880 if (TARGET_LDRD
20881 && current_tune->prefer_ldrd_strd
20882 && !optimize_function_for_size_p (cfun))
20884 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
20885 if (TARGET_THUMB2)
20886 thumb2_emit_strd_push (live_regs_mask);
20887 else if (TARGET_ARM
20888 && !TARGET_APCS_FRAME
20889 && !IS_INTERRUPT (func_type))
20890 arm_emit_strd_push (live_regs_mask);
20891 else
20893 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
20894 RTX_FRAME_RELATED_P (insn) = 1;
20897 else
20899 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
20900 RTX_FRAME_RELATED_P (insn) = 1;
20904 if (! IS_VOLATILE (func_type))
20905 saved_regs += arm_save_coproc_regs ();
20907 if (frame_pointer_needed && TARGET_ARM)
20909 /* Create the new frame pointer. */
20910 if (TARGET_APCS_FRAME)
20912 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20913 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20914 RTX_FRAME_RELATED_P (insn) = 1;
20916 if (IS_NESTED (func_type))
20918 /* Recover the static chain register. */
20919 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20920 insn = gen_rtx_REG (SImode, 3);
20921 else
20923 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20924 insn = gen_frame_mem (SImode, insn);
20926 emit_set_insn (ip_rtx, insn);
20927 /* Add a USE to stop propagate_one_insn() from barfing. */
20928 emit_insn (gen_force_register_use (ip_rtx));
20931 else
20933 insn = GEN_INT (saved_regs - 4);
20934 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20935 stack_pointer_rtx, insn));
20936 RTX_FRAME_RELATED_P (insn) = 1;
20940 if (flag_stack_usage_info)
20941 current_function_static_stack_size
20942 = offsets->outgoing_args - offsets->saved_args;
20944 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20946 /* This add can produce multiple insns for a large constant, so we
20947 need to get tricky. */
20948 rtx last = get_last_insn ();
20950 amount = GEN_INT (offsets->saved_args + saved_regs
20951 - offsets->outgoing_args);
20953 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20954 amount));
20957 last = last ? NEXT_INSN (last) : get_insns ();
20958 RTX_FRAME_RELATED_P (last) = 1;
20960 while (last != insn);
20962 /* If the frame pointer is needed, emit a special barrier that
20963 will prevent the scheduler from moving stores to the frame
20964 before the stack adjustment. */
20965 if (frame_pointer_needed)
20966 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20967 hard_frame_pointer_rtx));
20971 if (frame_pointer_needed && TARGET_THUMB2)
20972 thumb_set_frame_pointer (offsets);
20974 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20976 unsigned long mask;
20978 mask = live_regs_mask;
20979 mask &= THUMB2_WORK_REGS;
20980 if (!IS_NESTED (func_type))
20981 mask |= (1 << IP_REGNUM);
20982 arm_load_pic_register (mask);
20985 /* If we are profiling, make sure no instructions are scheduled before
20986 the call to mcount. Similarly if the user has requested no
20987 scheduling in the prolog. Similarly if we want non-call exceptions
20988 using the EABI unwinder, to prevent faulting instructions from being
20989 swapped with a stack adjustment. */
20990 if (crtl->profile || !TARGET_SCHED_PROLOG
20991 || (arm_except_unwind_info (&global_options) == UI_TARGET
20992 && cfun->can_throw_non_call_exceptions))
20993 emit_insn (gen_blockage ());
20995 /* If the link register is being kept alive, with the return address in it,
20996 then make sure that it does not get reused by the ce2 pass. */
20997 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20998 cfun->machine->lr_save_eliminated = 1;
21001 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21002 static void
21003 arm_print_condition (FILE *stream)
21005 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21007 /* Branch conversion is not implemented for Thumb-2. */
21008 if (TARGET_THUMB)
21010 output_operand_lossage ("predicated Thumb instruction");
21011 return;
21013 if (current_insn_predicate != NULL)
21015 output_operand_lossage
21016 ("predicated instruction in conditional sequence");
21017 return;
21020 fputs (arm_condition_codes[arm_current_cc], stream);
21022 else if (current_insn_predicate)
21024 enum arm_cond_code code;
21026 if (TARGET_THUMB1)
21028 output_operand_lossage ("predicated Thumb instruction");
21029 return;
21032 code = get_arm_condition_code (current_insn_predicate);
21033 fputs (arm_condition_codes[code], stream);
21038 /* If CODE is 'd', then the X is a condition operand and the instruction
21039 should only be executed if the condition is true.
21040 if CODE is 'D', then the X is a condition operand and the instruction
21041 should only be executed if the condition is false: however, if the mode
21042 of the comparison is CCFPEmode, then always execute the instruction -- we
21043 do this because in these circumstances !GE does not necessarily imply LT;
21044 in these cases the instruction pattern will take care to make sure that
21045 an instruction containing %d will follow, thereby undoing the effects of
21046 doing this instruction unconditionally.
21047 If CODE is 'N' then X is a floating point operand that must be negated
21048 before output.
21049 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21050 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21051 static void
21052 arm_print_operand (FILE *stream, rtx x, int code)
21054 switch (code)
21056 case '@':
21057 fputs (ASM_COMMENT_START, stream);
21058 return;
21060 case '_':
21061 fputs (user_label_prefix, stream);
21062 return;
21064 case '|':
21065 fputs (REGISTER_PREFIX, stream);
21066 return;
21068 case '?':
21069 arm_print_condition (stream);
21070 return;
21072 case '(':
21073 /* Nothing in unified syntax, otherwise the current condition code. */
21074 if (!TARGET_UNIFIED_ASM)
21075 arm_print_condition (stream);
21076 break;
21078 case ')':
21079 /* The current condition code in unified syntax, otherwise nothing. */
21080 if (TARGET_UNIFIED_ASM)
21081 arm_print_condition (stream);
21082 break;
21084 case '.':
21085 /* The current condition code for a condition code setting instruction.
21086 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21087 if (TARGET_UNIFIED_ASM)
21089 fputc('s', stream);
21090 arm_print_condition (stream);
21092 else
21094 arm_print_condition (stream);
21095 fputc('s', stream);
21097 return;
21099 case '!':
21100 /* If the instruction is conditionally executed then print
21101 the current condition code, otherwise print 's'. */
21102 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21103 if (current_insn_predicate)
21104 arm_print_condition (stream);
21105 else
21106 fputc('s', stream);
21107 break;
21109 /* %# is a "break" sequence. It doesn't output anything, but is used to
21110 separate e.g. operand numbers from following text, if that text consists
21111 of further digits which we don't want to be part of the operand
21112 number. */
21113 case '#':
21114 return;
21116 case 'N':
21118 REAL_VALUE_TYPE r;
21119 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21120 r = real_value_negate (&r);
21121 fprintf (stream, "%s", fp_const_from_val (&r));
21123 return;
21125 /* An integer or symbol address without a preceding # sign. */
21126 case 'c':
21127 switch (GET_CODE (x))
21129 case CONST_INT:
21130 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21131 break;
21133 case SYMBOL_REF:
21134 output_addr_const (stream, x);
21135 break;
21137 case CONST:
21138 if (GET_CODE (XEXP (x, 0)) == PLUS
21139 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21141 output_addr_const (stream, x);
21142 break;
21144 /* Fall through. */
21146 default:
21147 output_operand_lossage ("Unsupported operand for code '%c'", code);
21149 return;
21151 /* An integer that we want to print in HEX. */
21152 case 'x':
21153 switch (GET_CODE (x))
21155 case CONST_INT:
21156 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21157 break;
21159 default:
21160 output_operand_lossage ("Unsupported operand for code '%c'", code);
21162 return;
21164 case 'B':
21165 if (CONST_INT_P (x))
21167 HOST_WIDE_INT val;
21168 val = ARM_SIGN_EXTEND (~INTVAL (x));
21169 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21171 else
21173 putc ('~', stream);
21174 output_addr_const (stream, x);
21176 return;
21178 case 'L':
21179 /* The low 16 bits of an immediate constant. */
21180 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21181 return;
21183 case 'i':
21184 fprintf (stream, "%s", arithmetic_instr (x, 1));
21185 return;
21187 case 'I':
21188 fprintf (stream, "%s", arithmetic_instr (x, 0));
21189 return;
21191 case 'S':
21193 HOST_WIDE_INT val;
21194 const char *shift;
21196 shift = shift_op (x, &val);
21198 if (shift)
21200 fprintf (stream, ", %s ", shift);
21201 if (val == -1)
21202 arm_print_operand (stream, XEXP (x, 1), 0);
21203 else
21204 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21207 return;
21209 /* An explanation of the 'Q', 'R' and 'H' register operands:
21211 In a pair of registers containing a DI or DF value the 'Q'
21212 operand returns the register number of the register containing
21213 the least significant part of the value. The 'R' operand returns
21214 the register number of the register containing the most
21215 significant part of the value.
21217 The 'H' operand returns the higher of the two register numbers.
21218 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21219 same as the 'Q' operand, since the most significant part of the
21220 value is held in the lower number register. The reverse is true
21221 on systems where WORDS_BIG_ENDIAN is false.
21223 The purpose of these operands is to distinguish between cases
21224 where the endian-ness of the values is important (for example
21225 when they are added together), and cases where the endian-ness
21226 is irrelevant, but the order of register operations is important.
21227 For example when loading a value from memory into a register
21228 pair, the endian-ness does not matter. Provided that the value
21229 from the lower memory address is put into the lower numbered
21230 register, and the value from the higher address is put into the
21231 higher numbered register, the load will work regardless of whether
21232 the value being loaded is big-wordian or little-wordian. The
21233 order of the two register loads can matter however, if the address
21234 of the memory location is actually held in one of the registers
21235 being overwritten by the load.
21237 The 'Q' and 'R' constraints are also available for 64-bit
21238 constants. */
21239 case 'Q':
21240 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21242 rtx part = gen_lowpart (SImode, x);
21243 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21244 return;
21247 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21249 output_operand_lossage ("invalid operand for code '%c'", code);
21250 return;
21253 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21254 return;
21256 case 'R':
21257 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21259 enum machine_mode mode = GET_MODE (x);
21260 rtx part;
21262 if (mode == VOIDmode)
21263 mode = DImode;
21264 part = gen_highpart_mode (SImode, mode, x);
21265 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21266 return;
21269 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21271 output_operand_lossage ("invalid operand for code '%c'", code);
21272 return;
21275 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21276 return;
21278 case 'H':
21279 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21281 output_operand_lossage ("invalid operand for code '%c'", code);
21282 return;
21285 asm_fprintf (stream, "%r", REGNO (x) + 1);
21286 return;
21288 case 'J':
21289 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21291 output_operand_lossage ("invalid operand for code '%c'", code);
21292 return;
21295 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21296 return;
21298 case 'K':
21299 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21301 output_operand_lossage ("invalid operand for code '%c'", code);
21302 return;
21305 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21306 return;
21308 case 'm':
21309 asm_fprintf (stream, "%r",
21310 REG_P (XEXP (x, 0))
21311 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21312 return;
21314 case 'M':
21315 asm_fprintf (stream, "{%r-%r}",
21316 REGNO (x),
21317 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21318 return;
21320 /* Like 'M', but writing doubleword vector registers, for use by Neon
21321 insns. */
21322 case 'h':
21324 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21325 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21326 if (numregs == 1)
21327 asm_fprintf (stream, "{d%d}", regno);
21328 else
21329 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21331 return;
21333 case 'd':
21334 /* CONST_TRUE_RTX means always -- that's the default. */
21335 if (x == const_true_rtx)
21336 return;
21338 if (!COMPARISON_P (x))
21340 output_operand_lossage ("invalid operand for code '%c'", code);
21341 return;
21344 fputs (arm_condition_codes[get_arm_condition_code (x)],
21345 stream);
21346 return;
21348 case 'D':
21349 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21350 want to do that. */
21351 if (x == const_true_rtx)
21353 output_operand_lossage ("instruction never executed");
21354 return;
21356 if (!COMPARISON_P (x))
21358 output_operand_lossage ("invalid operand for code '%c'", code);
21359 return;
21362 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21363 (get_arm_condition_code (x))],
21364 stream);
21365 return;
21367 case 's':
21368 case 'V':
21369 case 'W':
21370 case 'X':
21371 case 'Y':
21372 case 'Z':
21373 /* Former Maverick support, removed after GCC-4.7. */
21374 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21375 return;
21377 case 'U':
21378 if (!REG_P (x)
21379 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21380 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21381 /* Bad value for wCG register number. */
21383 output_operand_lossage ("invalid operand for code '%c'", code);
21384 return;
21387 else
21388 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21389 return;
21391 /* Print an iWMMXt control register name. */
21392 case 'w':
21393 if (!CONST_INT_P (x)
21394 || INTVAL (x) < 0
21395 || INTVAL (x) >= 16)
21396 /* Bad value for wC register number. */
21398 output_operand_lossage ("invalid operand for code '%c'", code);
21399 return;
21402 else
21404 static const char * wc_reg_names [16] =
21406 "wCID", "wCon", "wCSSF", "wCASF",
21407 "wC4", "wC5", "wC6", "wC7",
21408 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21409 "wC12", "wC13", "wC14", "wC15"
21412 fputs (wc_reg_names [INTVAL (x)], stream);
21414 return;
21416 /* Print the high single-precision register of a VFP double-precision
21417 register. */
21418 case 'p':
21420 int mode = GET_MODE (x);
21421 int regno;
21423 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21425 output_operand_lossage ("invalid operand for code '%c'", code);
21426 return;
21429 regno = REGNO (x);
21430 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21432 output_operand_lossage ("invalid operand for code '%c'", code);
21433 return;
21436 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21438 return;
21440 /* Print a VFP/Neon double precision or quad precision register name. */
21441 case 'P':
21442 case 'q':
21444 int mode = GET_MODE (x);
21445 int is_quad = (code == 'q');
21446 int regno;
21448 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21450 output_operand_lossage ("invalid operand for code '%c'", code);
21451 return;
21454 if (!REG_P (x)
21455 || !IS_VFP_REGNUM (REGNO (x)))
21457 output_operand_lossage ("invalid operand for code '%c'", code);
21458 return;
21461 regno = REGNO (x);
21462 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21463 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21465 output_operand_lossage ("invalid operand for code '%c'", code);
21466 return;
21469 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21470 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21472 return;
21474 /* These two codes print the low/high doubleword register of a Neon quad
21475 register, respectively. For pair-structure types, can also print
21476 low/high quadword registers. */
21477 case 'e':
21478 case 'f':
21480 int mode = GET_MODE (x);
21481 int regno;
21483 if ((GET_MODE_SIZE (mode) != 16
21484 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21486 output_operand_lossage ("invalid operand for code '%c'", code);
21487 return;
21490 regno = REGNO (x);
21491 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21493 output_operand_lossage ("invalid operand for code '%c'", code);
21494 return;
21497 if (GET_MODE_SIZE (mode) == 16)
21498 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21499 + (code == 'f' ? 1 : 0));
21500 else
21501 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21502 + (code == 'f' ? 1 : 0));
21504 return;
21506 /* Print a VFPv3 floating-point constant, represented as an integer
21507 index. */
21508 case 'G':
21510 int index = vfp3_const_double_index (x);
21511 gcc_assert (index != -1);
21512 fprintf (stream, "%d", index);
21514 return;
21516 /* Print bits representing opcode features for Neon.
21518 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21519 and polynomials as unsigned.
21521 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21523 Bit 2 is 1 for rounding functions, 0 otherwise. */
21525 /* Identify the type as 's', 'u', 'p' or 'f'. */
21526 case 'T':
21528 HOST_WIDE_INT bits = INTVAL (x);
21529 fputc ("uspf"[bits & 3], stream);
21531 return;
21533 /* Likewise, but signed and unsigned integers are both 'i'. */
21534 case 'F':
21536 HOST_WIDE_INT bits = INTVAL (x);
21537 fputc ("iipf"[bits & 3], stream);
21539 return;
21541 /* As for 'T', but emit 'u' instead of 'p'. */
21542 case 't':
21544 HOST_WIDE_INT bits = INTVAL (x);
21545 fputc ("usuf"[bits & 3], stream);
21547 return;
21549 /* Bit 2: rounding (vs none). */
21550 case 'O':
21552 HOST_WIDE_INT bits = INTVAL (x);
21553 fputs ((bits & 4) != 0 ? "r" : "", stream);
21555 return;
21557 /* Memory operand for vld1/vst1 instruction. */
21558 case 'A':
21560 rtx addr;
21561 bool postinc = FALSE;
21562 unsigned align, memsize, align_bits;
21564 gcc_assert (MEM_P (x));
21565 addr = XEXP (x, 0);
21566 if (GET_CODE (addr) == POST_INC)
21568 postinc = 1;
21569 addr = XEXP (addr, 0);
21571 asm_fprintf (stream, "[%r", REGNO (addr));
21573 /* We know the alignment of this access, so we can emit a hint in the
21574 instruction (for some alignments) as an aid to the memory subsystem
21575 of the target. */
21576 align = MEM_ALIGN (x) >> 3;
21577 memsize = MEM_SIZE (x);
21579 /* Only certain alignment specifiers are supported by the hardware. */
21580 if (memsize == 32 && (align % 32) == 0)
21581 align_bits = 256;
21582 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21583 align_bits = 128;
21584 else if (memsize >= 8 && (align % 8) == 0)
21585 align_bits = 64;
21586 else
21587 align_bits = 0;
21589 if (align_bits != 0)
21590 asm_fprintf (stream, ":%d", align_bits);
21592 asm_fprintf (stream, "]");
21594 if (postinc)
21595 fputs("!", stream);
21597 return;
21599 case 'C':
21601 rtx addr;
21603 gcc_assert (MEM_P (x));
21604 addr = XEXP (x, 0);
21605 gcc_assert (REG_P (addr));
21606 asm_fprintf (stream, "[%r]", REGNO (addr));
21608 return;
21610 /* Translate an S register number into a D register number and element index. */
21611 case 'y':
21613 int mode = GET_MODE (x);
21614 int regno;
21616 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21618 output_operand_lossage ("invalid operand for code '%c'", code);
21619 return;
21622 regno = REGNO (x);
21623 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21625 output_operand_lossage ("invalid operand for code '%c'", code);
21626 return;
21629 regno = regno - FIRST_VFP_REGNUM;
21630 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21632 return;
21634 case 'v':
21635 gcc_assert (CONST_DOUBLE_P (x));
21636 int result;
21637 result = vfp3_const_double_for_fract_bits (x);
21638 if (result == 0)
21639 result = vfp3_const_double_for_bits (x);
21640 fprintf (stream, "#%d", result);
21641 return;
21643 /* Register specifier for vld1.16/vst1.16. Translate the S register
21644 number into a D register number and element index. */
21645 case 'z':
21647 int mode = GET_MODE (x);
21648 int regno;
21650 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21652 output_operand_lossage ("invalid operand for code '%c'", code);
21653 return;
21656 regno = REGNO (x);
21657 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21659 output_operand_lossage ("invalid operand for code '%c'", code);
21660 return;
21663 regno = regno - FIRST_VFP_REGNUM;
21664 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21666 return;
21668 default:
21669 if (x == 0)
21671 output_operand_lossage ("missing operand");
21672 return;
21675 switch (GET_CODE (x))
21677 case REG:
21678 asm_fprintf (stream, "%r", REGNO (x));
21679 break;
21681 case MEM:
21682 output_memory_reference_mode = GET_MODE (x);
21683 output_address (XEXP (x, 0));
21684 break;
21686 case CONST_DOUBLE:
21687 if (TARGET_NEON)
21689 char fpstr[20];
21690 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21691 sizeof (fpstr), 0, 1);
21692 fprintf (stream, "#%s", fpstr);
21694 else
21695 fprintf (stream, "#%s", fp_immediate_constant (x));
21696 break;
21698 default:
21699 gcc_assert (GET_CODE (x) != NEG);
21700 fputc ('#', stream);
21701 if (GET_CODE (x) == HIGH)
21703 fputs (":lower16:", stream);
21704 x = XEXP (x, 0);
21707 output_addr_const (stream, x);
21708 break;
21713 /* Target hook for printing a memory address. */
21714 static void
21715 arm_print_operand_address (FILE *stream, rtx x)
21717 if (TARGET_32BIT)
21719 int is_minus = GET_CODE (x) == MINUS;
21721 if (REG_P (x))
21722 asm_fprintf (stream, "[%r]", REGNO (x));
21723 else if (GET_CODE (x) == PLUS || is_minus)
21725 rtx base = XEXP (x, 0);
21726 rtx index = XEXP (x, 1);
21727 HOST_WIDE_INT offset = 0;
21728 if (!REG_P (base)
21729 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21731 /* Ensure that BASE is a register. */
21732 /* (one of them must be). */
21733 /* Also ensure the SP is not used as in index register. */
21734 rtx temp = base;
21735 base = index;
21736 index = temp;
21738 switch (GET_CODE (index))
21740 case CONST_INT:
21741 offset = INTVAL (index);
21742 if (is_minus)
21743 offset = -offset;
21744 asm_fprintf (stream, "[%r, #%wd]",
21745 REGNO (base), offset);
21746 break;
21748 case REG:
21749 asm_fprintf (stream, "[%r, %s%r]",
21750 REGNO (base), is_minus ? "-" : "",
21751 REGNO (index));
21752 break;
21754 case MULT:
21755 case ASHIFTRT:
21756 case LSHIFTRT:
21757 case ASHIFT:
21758 case ROTATERT:
21760 asm_fprintf (stream, "[%r, %s%r",
21761 REGNO (base), is_minus ? "-" : "",
21762 REGNO (XEXP (index, 0)));
21763 arm_print_operand (stream, index, 'S');
21764 fputs ("]", stream);
21765 break;
21768 default:
21769 gcc_unreachable ();
21772 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21773 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21775 extern enum machine_mode output_memory_reference_mode;
21777 gcc_assert (REG_P (XEXP (x, 0)));
21779 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21780 asm_fprintf (stream, "[%r, #%s%d]!",
21781 REGNO (XEXP (x, 0)),
21782 GET_CODE (x) == PRE_DEC ? "-" : "",
21783 GET_MODE_SIZE (output_memory_reference_mode));
21784 else
21785 asm_fprintf (stream, "[%r], #%s%d",
21786 REGNO (XEXP (x, 0)),
21787 GET_CODE (x) == POST_DEC ? "-" : "",
21788 GET_MODE_SIZE (output_memory_reference_mode));
21790 else if (GET_CODE (x) == PRE_MODIFY)
21792 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21793 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21794 asm_fprintf (stream, "#%wd]!",
21795 INTVAL (XEXP (XEXP (x, 1), 1)));
21796 else
21797 asm_fprintf (stream, "%r]!",
21798 REGNO (XEXP (XEXP (x, 1), 1)));
21800 else if (GET_CODE (x) == POST_MODIFY)
21802 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21803 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21804 asm_fprintf (stream, "#%wd",
21805 INTVAL (XEXP (XEXP (x, 1), 1)));
21806 else
21807 asm_fprintf (stream, "%r",
21808 REGNO (XEXP (XEXP (x, 1), 1)));
21810 else output_addr_const (stream, x);
21812 else
21814 if (REG_P (x))
21815 asm_fprintf (stream, "[%r]", REGNO (x));
21816 else if (GET_CODE (x) == POST_INC)
21817 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21818 else if (GET_CODE (x) == PLUS)
21820 gcc_assert (REG_P (XEXP (x, 0)));
21821 if (CONST_INT_P (XEXP (x, 1)))
21822 asm_fprintf (stream, "[%r, #%wd]",
21823 REGNO (XEXP (x, 0)),
21824 INTVAL (XEXP (x, 1)));
21825 else
21826 asm_fprintf (stream, "[%r, %r]",
21827 REGNO (XEXP (x, 0)),
21828 REGNO (XEXP (x, 1)));
21830 else
21831 output_addr_const (stream, x);
21835 /* Target hook for indicating whether a punctuation character for
21836 TARGET_PRINT_OPERAND is valid. */
21837 static bool
21838 arm_print_operand_punct_valid_p (unsigned char code)
21840 return (code == '@' || code == '|' || code == '.'
21841 || code == '(' || code == ')' || code == '#'
21842 || (TARGET_32BIT && (code == '?'))
21843 || (TARGET_THUMB2 && (code == '!'))
21844 || (TARGET_THUMB && (code == '_')));
21847 /* Target hook for assembling integer objects. The ARM version needs to
21848 handle word-sized values specially. */
21849 static bool
21850 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21852 enum machine_mode mode;
21854 if (size == UNITS_PER_WORD && aligned_p)
21856 fputs ("\t.word\t", asm_out_file);
21857 output_addr_const (asm_out_file, x);
21859 /* Mark symbols as position independent. We only do this in the
21860 .text segment, not in the .data segment. */
21861 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21862 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21864 /* See legitimize_pic_address for an explanation of the
21865 TARGET_VXWORKS_RTP check. */
21866 if (!arm_pic_data_is_text_relative
21867 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21868 fputs ("(GOT)", asm_out_file);
21869 else
21870 fputs ("(GOTOFF)", asm_out_file);
21872 fputc ('\n', asm_out_file);
21873 return true;
21876 mode = GET_MODE (x);
21878 if (arm_vector_mode_supported_p (mode))
21880 int i, units;
21882 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21884 units = CONST_VECTOR_NUNITS (x);
21885 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21887 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21888 for (i = 0; i < units; i++)
21890 rtx elt = CONST_VECTOR_ELT (x, i);
21891 assemble_integer
21892 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21894 else
21895 for (i = 0; i < units; i++)
21897 rtx elt = CONST_VECTOR_ELT (x, i);
21898 REAL_VALUE_TYPE rval;
21900 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21902 assemble_real
21903 (rval, GET_MODE_INNER (mode),
21904 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21907 return true;
21910 return default_assemble_integer (x, size, aligned_p);
21913 static void
21914 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21916 section *s;
21918 if (!TARGET_AAPCS_BASED)
21920 (is_ctor ?
21921 default_named_section_asm_out_constructor
21922 : default_named_section_asm_out_destructor) (symbol, priority);
21923 return;
21926 /* Put these in the .init_array section, using a special relocation. */
21927 if (priority != DEFAULT_INIT_PRIORITY)
21929 char buf[18];
21930 sprintf (buf, "%s.%.5u",
21931 is_ctor ? ".init_array" : ".fini_array",
21932 priority);
21933 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21935 else if (is_ctor)
21936 s = ctors_section;
21937 else
21938 s = dtors_section;
21940 switch_to_section (s);
21941 assemble_align (POINTER_SIZE);
21942 fputs ("\t.word\t", asm_out_file);
21943 output_addr_const (asm_out_file, symbol);
21944 fputs ("(target1)\n", asm_out_file);
21947 /* Add a function to the list of static constructors. */
21949 static void
21950 arm_elf_asm_constructor (rtx symbol, int priority)
21952 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21955 /* Add a function to the list of static destructors. */
21957 static void
21958 arm_elf_asm_destructor (rtx symbol, int priority)
21960 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21963 /* A finite state machine takes care of noticing whether or not instructions
21964 can be conditionally executed, and thus decrease execution time and code
21965 size by deleting branch instructions. The fsm is controlled by
21966 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21968 /* The state of the fsm controlling condition codes are:
21969 0: normal, do nothing special
21970 1: make ASM_OUTPUT_OPCODE not output this instruction
21971 2: make ASM_OUTPUT_OPCODE not output this instruction
21972 3: make instructions conditional
21973 4: make instructions conditional
21975 State transitions (state->state by whom under condition):
21976 0 -> 1 final_prescan_insn if the `target' is a label
21977 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21978 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21979 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21980 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21981 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21982 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21983 (the target insn is arm_target_insn).
21985 If the jump clobbers the conditions then we use states 2 and 4.
21987 A similar thing can be done with conditional return insns.
21989 XXX In case the `target' is an unconditional branch, this conditionalising
21990 of the instructions always reduces code size, but not always execution
21991 time. But then, I want to reduce the code size to somewhere near what
21992 /bin/cc produces. */
21994 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21995 instructions. When a COND_EXEC instruction is seen the subsequent
21996 instructions are scanned so that multiple conditional instructions can be
21997 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21998 specify the length and true/false mask for the IT block. These will be
21999 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22001 /* Returns the index of the ARM condition code string in
22002 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22003 COMPARISON should be an rtx like `(eq (...) (...))'. */
22005 enum arm_cond_code
22006 maybe_get_arm_condition_code (rtx comparison)
22008 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22009 enum arm_cond_code code;
22010 enum rtx_code comp_code = GET_CODE (comparison);
22012 if (GET_MODE_CLASS (mode) != MODE_CC)
22013 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22014 XEXP (comparison, 1));
22016 switch (mode)
22018 case CC_DNEmode: code = ARM_NE; goto dominance;
22019 case CC_DEQmode: code = ARM_EQ; goto dominance;
22020 case CC_DGEmode: code = ARM_GE; goto dominance;
22021 case CC_DGTmode: code = ARM_GT; goto dominance;
22022 case CC_DLEmode: code = ARM_LE; goto dominance;
22023 case CC_DLTmode: code = ARM_LT; goto dominance;
22024 case CC_DGEUmode: code = ARM_CS; goto dominance;
22025 case CC_DGTUmode: code = ARM_HI; goto dominance;
22026 case CC_DLEUmode: code = ARM_LS; goto dominance;
22027 case CC_DLTUmode: code = ARM_CC;
22029 dominance:
22030 if (comp_code == EQ)
22031 return ARM_INVERSE_CONDITION_CODE (code);
22032 if (comp_code == NE)
22033 return code;
22034 return ARM_NV;
22036 case CC_NOOVmode:
22037 switch (comp_code)
22039 case NE: return ARM_NE;
22040 case EQ: return ARM_EQ;
22041 case GE: return ARM_PL;
22042 case LT: return ARM_MI;
22043 default: return ARM_NV;
22046 case CC_Zmode:
22047 switch (comp_code)
22049 case NE: return ARM_NE;
22050 case EQ: return ARM_EQ;
22051 default: return ARM_NV;
22054 case CC_Nmode:
22055 switch (comp_code)
22057 case NE: return ARM_MI;
22058 case EQ: return ARM_PL;
22059 default: return ARM_NV;
22062 case CCFPEmode:
22063 case CCFPmode:
22064 /* We can handle all cases except UNEQ and LTGT. */
22065 switch (comp_code)
22067 case GE: return ARM_GE;
22068 case GT: return ARM_GT;
22069 case LE: return ARM_LS;
22070 case LT: return ARM_MI;
22071 case NE: return ARM_NE;
22072 case EQ: return ARM_EQ;
22073 case ORDERED: return ARM_VC;
22074 case UNORDERED: return ARM_VS;
22075 case UNLT: return ARM_LT;
22076 case UNLE: return ARM_LE;
22077 case UNGT: return ARM_HI;
22078 case UNGE: return ARM_PL;
22079 /* UNEQ and LTGT do not have a representation. */
22080 case UNEQ: /* Fall through. */
22081 case LTGT: /* Fall through. */
22082 default: return ARM_NV;
22085 case CC_SWPmode:
22086 switch (comp_code)
22088 case NE: return ARM_NE;
22089 case EQ: return ARM_EQ;
22090 case GE: return ARM_LE;
22091 case GT: return ARM_LT;
22092 case LE: return ARM_GE;
22093 case LT: return ARM_GT;
22094 case GEU: return ARM_LS;
22095 case GTU: return ARM_CC;
22096 case LEU: return ARM_CS;
22097 case LTU: return ARM_HI;
22098 default: return ARM_NV;
22101 case CC_Cmode:
22102 switch (comp_code)
22104 case LTU: return ARM_CS;
22105 case GEU: return ARM_CC;
22106 default: return ARM_NV;
22109 case CC_CZmode:
22110 switch (comp_code)
22112 case NE: return ARM_NE;
22113 case EQ: return ARM_EQ;
22114 case GEU: return ARM_CS;
22115 case GTU: return ARM_HI;
22116 case LEU: return ARM_LS;
22117 case LTU: return ARM_CC;
22118 default: return ARM_NV;
22121 case CC_NCVmode:
22122 switch (comp_code)
22124 case GE: return ARM_GE;
22125 case LT: return ARM_LT;
22126 case GEU: return ARM_CS;
22127 case LTU: return ARM_CC;
22128 default: return ARM_NV;
22131 case CCmode:
22132 switch (comp_code)
22134 case NE: return ARM_NE;
22135 case EQ: return ARM_EQ;
22136 case GE: return ARM_GE;
22137 case GT: return ARM_GT;
22138 case LE: return ARM_LE;
22139 case LT: return ARM_LT;
22140 case GEU: return ARM_CS;
22141 case GTU: return ARM_HI;
22142 case LEU: return ARM_LS;
22143 case LTU: return ARM_CC;
22144 default: return ARM_NV;
22147 default: gcc_unreachable ();
22151 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22152 static enum arm_cond_code
22153 get_arm_condition_code (rtx comparison)
22155 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22156 gcc_assert (code != ARM_NV);
22157 return code;
22160 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22161 instructions. */
22162 void
22163 thumb2_final_prescan_insn (rtx insn)
22165 rtx first_insn = insn;
22166 rtx body = PATTERN (insn);
22167 rtx predicate;
22168 enum arm_cond_code code;
22169 int n;
22170 int mask;
22171 int max;
22173 /* max_insns_skipped in the tune was already taken into account in the
22174 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22175 just emit the IT blocks as we can. It does not make sense to split
22176 the IT blocks. */
22177 max = MAX_INSN_PER_IT_BLOCK;
22179 /* Remove the previous insn from the count of insns to be output. */
22180 if (arm_condexec_count)
22181 arm_condexec_count--;
22183 /* Nothing to do if we are already inside a conditional block. */
22184 if (arm_condexec_count)
22185 return;
22187 if (GET_CODE (body) != COND_EXEC)
22188 return;
22190 /* Conditional jumps are implemented directly. */
22191 if (JUMP_P (insn))
22192 return;
22194 predicate = COND_EXEC_TEST (body);
22195 arm_current_cc = get_arm_condition_code (predicate);
22197 n = get_attr_ce_count (insn);
22198 arm_condexec_count = 1;
22199 arm_condexec_mask = (1 << n) - 1;
22200 arm_condexec_masklen = n;
22201 /* See if subsequent instructions can be combined into the same block. */
22202 for (;;)
22204 insn = next_nonnote_insn (insn);
22206 /* Jumping into the middle of an IT block is illegal, so a label or
22207 barrier terminates the block. */
22208 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22209 break;
22211 body = PATTERN (insn);
22212 /* USE and CLOBBER aren't really insns, so just skip them. */
22213 if (GET_CODE (body) == USE
22214 || GET_CODE (body) == CLOBBER)
22215 continue;
22217 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22218 if (GET_CODE (body) != COND_EXEC)
22219 break;
22220 /* Maximum number of conditionally executed instructions in a block. */
22221 n = get_attr_ce_count (insn);
22222 if (arm_condexec_masklen + n > max)
22223 break;
22225 predicate = COND_EXEC_TEST (body);
22226 code = get_arm_condition_code (predicate);
22227 mask = (1 << n) - 1;
22228 if (arm_current_cc == code)
22229 arm_condexec_mask |= (mask << arm_condexec_masklen);
22230 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22231 break;
22233 arm_condexec_count++;
22234 arm_condexec_masklen += n;
22236 /* A jump must be the last instruction in a conditional block. */
22237 if (JUMP_P (insn))
22238 break;
22240 /* Restore recog_data (getting the attributes of other insns can
22241 destroy this array, but final.c assumes that it remains intact
22242 across this call). */
22243 extract_constrain_insn_cached (first_insn);
22246 void
22247 arm_final_prescan_insn (rtx insn)
22249 /* BODY will hold the body of INSN. */
22250 rtx body = PATTERN (insn);
22252 /* This will be 1 if trying to repeat the trick, and things need to be
22253 reversed if it appears to fail. */
22254 int reverse = 0;
22256 /* If we start with a return insn, we only succeed if we find another one. */
22257 int seeking_return = 0;
22258 enum rtx_code return_code = UNKNOWN;
22260 /* START_INSN will hold the insn from where we start looking. This is the
22261 first insn after the following code_label if REVERSE is true. */
22262 rtx start_insn = insn;
22264 /* If in state 4, check if the target branch is reached, in order to
22265 change back to state 0. */
22266 if (arm_ccfsm_state == 4)
22268 if (insn == arm_target_insn)
22270 arm_target_insn = NULL;
22271 arm_ccfsm_state = 0;
22273 return;
22276 /* If in state 3, it is possible to repeat the trick, if this insn is an
22277 unconditional branch to a label, and immediately following this branch
22278 is the previous target label which is only used once, and the label this
22279 branch jumps to is not too far off. */
22280 if (arm_ccfsm_state == 3)
22282 if (simplejump_p (insn))
22284 start_insn = next_nonnote_insn (start_insn);
22285 if (BARRIER_P (start_insn))
22287 /* XXX Isn't this always a barrier? */
22288 start_insn = next_nonnote_insn (start_insn);
22290 if (LABEL_P (start_insn)
22291 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22292 && LABEL_NUSES (start_insn) == 1)
22293 reverse = TRUE;
22294 else
22295 return;
22297 else if (ANY_RETURN_P (body))
22299 start_insn = next_nonnote_insn (start_insn);
22300 if (BARRIER_P (start_insn))
22301 start_insn = next_nonnote_insn (start_insn);
22302 if (LABEL_P (start_insn)
22303 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22304 && LABEL_NUSES (start_insn) == 1)
22306 reverse = TRUE;
22307 seeking_return = 1;
22308 return_code = GET_CODE (body);
22310 else
22311 return;
22313 else
22314 return;
22317 gcc_assert (!arm_ccfsm_state || reverse);
22318 if (!JUMP_P (insn))
22319 return;
22321 /* This jump might be paralleled with a clobber of the condition codes
22322 the jump should always come first */
22323 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22324 body = XVECEXP (body, 0, 0);
22326 if (reverse
22327 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22328 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22330 int insns_skipped;
22331 int fail = FALSE, succeed = FALSE;
22332 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22333 int then_not_else = TRUE;
22334 rtx this_insn = start_insn, label = 0;
22336 /* Register the insn jumped to. */
22337 if (reverse)
22339 if (!seeking_return)
22340 label = XEXP (SET_SRC (body), 0);
22342 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22343 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22344 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22346 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22347 then_not_else = FALSE;
22349 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22351 seeking_return = 1;
22352 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22354 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22356 seeking_return = 1;
22357 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22358 then_not_else = FALSE;
22360 else
22361 gcc_unreachable ();
22363 /* See how many insns this branch skips, and what kind of insns. If all
22364 insns are okay, and the label or unconditional branch to the same
22365 label is not too far away, succeed. */
22366 for (insns_skipped = 0;
22367 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22369 rtx scanbody;
22371 this_insn = next_nonnote_insn (this_insn);
22372 if (!this_insn)
22373 break;
22375 switch (GET_CODE (this_insn))
22377 case CODE_LABEL:
22378 /* Succeed if it is the target label, otherwise fail since
22379 control falls in from somewhere else. */
22380 if (this_insn == label)
22382 arm_ccfsm_state = 1;
22383 succeed = TRUE;
22385 else
22386 fail = TRUE;
22387 break;
22389 case BARRIER:
22390 /* Succeed if the following insn is the target label.
22391 Otherwise fail.
22392 If return insns are used then the last insn in a function
22393 will be a barrier. */
22394 this_insn = next_nonnote_insn (this_insn);
22395 if (this_insn && this_insn == label)
22397 arm_ccfsm_state = 1;
22398 succeed = TRUE;
22400 else
22401 fail = TRUE;
22402 break;
22404 case CALL_INSN:
22405 /* The AAPCS says that conditional calls should not be
22406 used since they make interworking inefficient (the
22407 linker can't transform BL<cond> into BLX). That's
22408 only a problem if the machine has BLX. */
22409 if (arm_arch5)
22411 fail = TRUE;
22412 break;
22415 /* Succeed if the following insn is the target label, or
22416 if the following two insns are a barrier and the
22417 target label. */
22418 this_insn = next_nonnote_insn (this_insn);
22419 if (this_insn && BARRIER_P (this_insn))
22420 this_insn = next_nonnote_insn (this_insn);
22422 if (this_insn && this_insn == label
22423 && insns_skipped < max_insns_skipped)
22425 arm_ccfsm_state = 1;
22426 succeed = TRUE;
22428 else
22429 fail = TRUE;
22430 break;
22432 case JUMP_INSN:
22433 /* If this is an unconditional branch to the same label, succeed.
22434 If it is to another label, do nothing. If it is conditional,
22435 fail. */
22436 /* XXX Probably, the tests for SET and the PC are
22437 unnecessary. */
22439 scanbody = PATTERN (this_insn);
22440 if (GET_CODE (scanbody) == SET
22441 && GET_CODE (SET_DEST (scanbody)) == PC)
22443 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22444 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22446 arm_ccfsm_state = 2;
22447 succeed = TRUE;
22449 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22450 fail = TRUE;
22452 /* Fail if a conditional return is undesirable (e.g. on a
22453 StrongARM), but still allow this if optimizing for size. */
22454 else if (GET_CODE (scanbody) == return_code
22455 && !use_return_insn (TRUE, NULL)
22456 && !optimize_size)
22457 fail = TRUE;
22458 else if (GET_CODE (scanbody) == return_code)
22460 arm_ccfsm_state = 2;
22461 succeed = TRUE;
22463 else if (GET_CODE (scanbody) == PARALLEL)
22465 switch (get_attr_conds (this_insn))
22467 case CONDS_NOCOND:
22468 break;
22469 default:
22470 fail = TRUE;
22471 break;
22474 else
22475 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22477 break;
22479 case INSN:
22480 /* Instructions using or affecting the condition codes make it
22481 fail. */
22482 scanbody = PATTERN (this_insn);
22483 if (!(GET_CODE (scanbody) == SET
22484 || GET_CODE (scanbody) == PARALLEL)
22485 || get_attr_conds (this_insn) != CONDS_NOCOND)
22486 fail = TRUE;
22487 break;
22489 default:
22490 break;
22493 if (succeed)
22495 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22496 arm_target_label = CODE_LABEL_NUMBER (label);
22497 else
22499 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22501 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22503 this_insn = next_nonnote_insn (this_insn);
22504 gcc_assert (!this_insn
22505 || (!BARRIER_P (this_insn)
22506 && !LABEL_P (this_insn)));
22508 if (!this_insn)
22510 /* Oh, dear! we ran off the end.. give up. */
22511 extract_constrain_insn_cached (insn);
22512 arm_ccfsm_state = 0;
22513 arm_target_insn = NULL;
22514 return;
22516 arm_target_insn = this_insn;
22519 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22520 what it was. */
22521 if (!reverse)
22522 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22524 if (reverse || then_not_else)
22525 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22528 /* Restore recog_data (getting the attributes of other insns can
22529 destroy this array, but final.c assumes that it remains intact
22530 across this call. */
22531 extract_constrain_insn_cached (insn);
22535 /* Output IT instructions. */
22536 void
22537 thumb2_asm_output_opcode (FILE * stream)
22539 char buff[5];
22540 int n;
22542 if (arm_condexec_mask)
22544 for (n = 0; n < arm_condexec_masklen; n++)
22545 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22546 buff[n] = 0;
22547 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22548 arm_condition_codes[arm_current_cc]);
22549 arm_condexec_mask = 0;
22553 /* Returns true if REGNO is a valid register
22554 for holding a quantity of type MODE. */
22556 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22558 if (GET_MODE_CLASS (mode) == MODE_CC)
22559 return (regno == CC_REGNUM
22560 || (TARGET_HARD_FLOAT && TARGET_VFP
22561 && regno == VFPCC_REGNUM));
22563 if (TARGET_THUMB1)
22564 /* For the Thumb we only allow values bigger than SImode in
22565 registers 0 - 6, so that there is always a second low
22566 register available to hold the upper part of the value.
22567 We probably we ought to ensure that the register is the
22568 start of an even numbered register pair. */
22569 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22571 if (TARGET_HARD_FLOAT && TARGET_VFP
22572 && IS_VFP_REGNUM (regno))
22574 if (mode == SFmode || mode == SImode)
22575 return VFP_REGNO_OK_FOR_SINGLE (regno);
22577 if (mode == DFmode)
22578 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22580 /* VFP registers can hold HFmode values, but there is no point in
22581 putting them there unless we have hardware conversion insns. */
22582 if (mode == HFmode)
22583 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22585 if (TARGET_NEON)
22586 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22587 || (VALID_NEON_QREG_MODE (mode)
22588 && NEON_REGNO_OK_FOR_QUAD (regno))
22589 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22590 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22591 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22592 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22593 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22595 return FALSE;
22598 if (TARGET_REALLY_IWMMXT)
22600 if (IS_IWMMXT_GR_REGNUM (regno))
22601 return mode == SImode;
22603 if (IS_IWMMXT_REGNUM (regno))
22604 return VALID_IWMMXT_REG_MODE (mode);
22607 /* We allow almost any value to be stored in the general registers.
22608 Restrict doubleword quantities to even register pairs so that we can
22609 use ldrd. Do not allow very large Neon structure opaque modes in
22610 general registers; they would use too many. */
22611 if (regno <= LAST_ARM_REGNUM)
22612 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22613 && ARM_NUM_REGS (mode) <= 4;
22615 if (regno == FRAME_POINTER_REGNUM
22616 || regno == ARG_POINTER_REGNUM)
22617 /* We only allow integers in the fake hard registers. */
22618 return GET_MODE_CLASS (mode) == MODE_INT;
22620 return FALSE;
22623 /* Implement MODES_TIEABLE_P. */
22625 bool
22626 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22628 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22629 return true;
22631 /* We specifically want to allow elements of "structure" modes to
22632 be tieable to the structure. This more general condition allows
22633 other rarer situations too. */
22634 if (TARGET_NEON
22635 && (VALID_NEON_DREG_MODE (mode1)
22636 || VALID_NEON_QREG_MODE (mode1)
22637 || VALID_NEON_STRUCT_MODE (mode1))
22638 && (VALID_NEON_DREG_MODE (mode2)
22639 || VALID_NEON_QREG_MODE (mode2)
22640 || VALID_NEON_STRUCT_MODE (mode2)))
22641 return true;
22643 return false;
22646 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22647 not used in arm mode. */
22649 enum reg_class
22650 arm_regno_class (int regno)
22652 if (TARGET_THUMB1)
22654 if (regno == STACK_POINTER_REGNUM)
22655 return STACK_REG;
22656 if (regno == CC_REGNUM)
22657 return CC_REG;
22658 if (regno < 8)
22659 return LO_REGS;
22660 return HI_REGS;
22663 if (TARGET_THUMB2 && regno < 8)
22664 return LO_REGS;
22666 if ( regno <= LAST_ARM_REGNUM
22667 || regno == FRAME_POINTER_REGNUM
22668 || regno == ARG_POINTER_REGNUM)
22669 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22671 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22672 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22674 if (IS_VFP_REGNUM (regno))
22676 if (regno <= D7_VFP_REGNUM)
22677 return VFP_D0_D7_REGS;
22678 else if (regno <= LAST_LO_VFP_REGNUM)
22679 return VFP_LO_REGS;
22680 else
22681 return VFP_HI_REGS;
22684 if (IS_IWMMXT_REGNUM (regno))
22685 return IWMMXT_REGS;
22687 if (IS_IWMMXT_GR_REGNUM (regno))
22688 return IWMMXT_GR_REGS;
22690 return NO_REGS;
22693 /* Handle a special case when computing the offset
22694 of an argument from the frame pointer. */
22696 arm_debugger_arg_offset (int value, rtx addr)
22698 rtx insn;
22700 /* We are only interested if dbxout_parms() failed to compute the offset. */
22701 if (value != 0)
22702 return 0;
22704 /* We can only cope with the case where the address is held in a register. */
22705 if (!REG_P (addr))
22706 return 0;
22708 /* If we are using the frame pointer to point at the argument, then
22709 an offset of 0 is correct. */
22710 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22711 return 0;
22713 /* If we are using the stack pointer to point at the
22714 argument, then an offset of 0 is correct. */
22715 /* ??? Check this is consistent with thumb2 frame layout. */
22716 if ((TARGET_THUMB || !frame_pointer_needed)
22717 && REGNO (addr) == SP_REGNUM)
22718 return 0;
22720 /* Oh dear. The argument is pointed to by a register rather
22721 than being held in a register, or being stored at a known
22722 offset from the frame pointer. Since GDB only understands
22723 those two kinds of argument we must translate the address
22724 held in the register into an offset from the frame pointer.
22725 We do this by searching through the insns for the function
22726 looking to see where this register gets its value. If the
22727 register is initialized from the frame pointer plus an offset
22728 then we are in luck and we can continue, otherwise we give up.
22730 This code is exercised by producing debugging information
22731 for a function with arguments like this:
22733 double func (double a, double b, int c, double d) {return d;}
22735 Without this code the stab for parameter 'd' will be set to
22736 an offset of 0 from the frame pointer, rather than 8. */
22738 /* The if() statement says:
22740 If the insn is a normal instruction
22741 and if the insn is setting the value in a register
22742 and if the register being set is the register holding the address of the argument
22743 and if the address is computing by an addition
22744 that involves adding to a register
22745 which is the frame pointer
22746 a constant integer
22748 then... */
22750 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22752 if ( NONJUMP_INSN_P (insn)
22753 && GET_CODE (PATTERN (insn)) == SET
22754 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22755 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22756 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22757 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22758 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22761 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22763 break;
22767 if (value == 0)
22769 debug_rtx (addr);
22770 warning (0, "unable to compute real location of stacked parameter");
22771 value = 8; /* XXX magic hack */
22774 return value;
22777 typedef enum {
22778 T_V8QI,
22779 T_V4HI,
22780 T_V4HF,
22781 T_V2SI,
22782 T_V2SF,
22783 T_DI,
22784 T_V16QI,
22785 T_V8HI,
22786 T_V4SI,
22787 T_V4SF,
22788 T_V2DI,
22789 T_TI,
22790 T_EI,
22791 T_OI,
22792 T_MAX /* Size of enum. Keep last. */
22793 } neon_builtin_type_mode;
22795 #define TYPE_MODE_BIT(X) (1 << (X))
22797 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22798 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22799 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22800 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22801 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22802 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22804 #define v8qi_UP T_V8QI
22805 #define v4hi_UP T_V4HI
22806 #define v4hf_UP T_V4HF
22807 #define v2si_UP T_V2SI
22808 #define v2sf_UP T_V2SF
22809 #define di_UP T_DI
22810 #define v16qi_UP T_V16QI
22811 #define v8hi_UP T_V8HI
22812 #define v4si_UP T_V4SI
22813 #define v4sf_UP T_V4SF
22814 #define v2di_UP T_V2DI
22815 #define ti_UP T_TI
22816 #define ei_UP T_EI
22817 #define oi_UP T_OI
22819 #define UP(X) X##_UP
22821 typedef enum {
22822 NEON_BINOP,
22823 NEON_TERNOP,
22824 NEON_UNOP,
22825 NEON_GETLANE,
22826 NEON_SETLANE,
22827 NEON_CREATE,
22828 NEON_RINT,
22829 NEON_DUP,
22830 NEON_DUPLANE,
22831 NEON_COMBINE,
22832 NEON_SPLIT,
22833 NEON_LANEMUL,
22834 NEON_LANEMULL,
22835 NEON_LANEMULH,
22836 NEON_LANEMAC,
22837 NEON_SCALARMUL,
22838 NEON_SCALARMULL,
22839 NEON_SCALARMULH,
22840 NEON_SCALARMAC,
22841 NEON_CONVERT,
22842 NEON_FLOAT_WIDEN,
22843 NEON_FLOAT_NARROW,
22844 NEON_FIXCONV,
22845 NEON_SELECT,
22846 NEON_RESULTPAIR,
22847 NEON_REINTERP,
22848 NEON_VTBL,
22849 NEON_VTBX,
22850 NEON_LOAD1,
22851 NEON_LOAD1LANE,
22852 NEON_STORE1,
22853 NEON_STORE1LANE,
22854 NEON_LOADSTRUCT,
22855 NEON_LOADSTRUCTLANE,
22856 NEON_STORESTRUCT,
22857 NEON_STORESTRUCTLANE,
22858 NEON_LOGICBINOP,
22859 NEON_SHIFTINSERT,
22860 NEON_SHIFTIMM,
22861 NEON_SHIFTACC
22862 } neon_itype;
22864 typedef struct {
22865 const char *name;
22866 const neon_itype itype;
22867 const neon_builtin_type_mode mode;
22868 const enum insn_code code;
22869 unsigned int fcode;
22870 } neon_builtin_datum;
22872 #define CF(N,X) CODE_FOR_neon_##N##X
22874 #define VAR1(T, N, A) \
22875 {#N, NEON_##T, UP (A), CF (N, A), 0}
22876 #define VAR2(T, N, A, B) \
22877 VAR1 (T, N, A), \
22878 {#N, NEON_##T, UP (B), CF (N, B), 0}
22879 #define VAR3(T, N, A, B, C) \
22880 VAR2 (T, N, A, B), \
22881 {#N, NEON_##T, UP (C), CF (N, C), 0}
22882 #define VAR4(T, N, A, B, C, D) \
22883 VAR3 (T, N, A, B, C), \
22884 {#N, NEON_##T, UP (D), CF (N, D), 0}
22885 #define VAR5(T, N, A, B, C, D, E) \
22886 VAR4 (T, N, A, B, C, D), \
22887 {#N, NEON_##T, UP (E), CF (N, E), 0}
22888 #define VAR6(T, N, A, B, C, D, E, F) \
22889 VAR5 (T, N, A, B, C, D, E), \
22890 {#N, NEON_##T, UP (F), CF (N, F), 0}
22891 #define VAR7(T, N, A, B, C, D, E, F, G) \
22892 VAR6 (T, N, A, B, C, D, E, F), \
22893 {#N, NEON_##T, UP (G), CF (N, G), 0}
22894 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22895 VAR7 (T, N, A, B, C, D, E, F, G), \
22896 {#N, NEON_##T, UP (H), CF (N, H), 0}
22897 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22898 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22899 {#N, NEON_##T, UP (I), CF (N, I), 0}
22900 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22901 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22902 {#N, NEON_##T, UP (J), CF (N, J), 0}
22904 /* The NEON builtin data can be found in arm_neon_builtins.def.
22905 The mode entries in the following table correspond to the "key" type of the
22906 instruction variant, i.e. equivalent to that which would be specified after
22907 the assembler mnemonic, which usually refers to the last vector operand.
22908 (Signed/unsigned/polynomial types are not differentiated between though, and
22909 are all mapped onto the same mode for a given element size.) The modes
22910 listed per instruction should be the same as those defined for that
22911 instruction's pattern in neon.md. */
22913 static neon_builtin_datum neon_builtin_data[] =
22915 #include "arm_neon_builtins.def"
22918 #undef CF
22919 #undef VAR1
22920 #undef VAR2
22921 #undef VAR3
22922 #undef VAR4
22923 #undef VAR5
22924 #undef VAR6
22925 #undef VAR7
22926 #undef VAR8
22927 #undef VAR9
22928 #undef VAR10
22930 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22931 #define VAR1(T, N, A) \
22932 CF (N, A)
22933 #define VAR2(T, N, A, B) \
22934 VAR1 (T, N, A), \
22935 CF (N, B)
22936 #define VAR3(T, N, A, B, C) \
22937 VAR2 (T, N, A, B), \
22938 CF (N, C)
22939 #define VAR4(T, N, A, B, C, D) \
22940 VAR3 (T, N, A, B, C), \
22941 CF (N, D)
22942 #define VAR5(T, N, A, B, C, D, E) \
22943 VAR4 (T, N, A, B, C, D), \
22944 CF (N, E)
22945 #define VAR6(T, N, A, B, C, D, E, F) \
22946 VAR5 (T, N, A, B, C, D, E), \
22947 CF (N, F)
22948 #define VAR7(T, N, A, B, C, D, E, F, G) \
22949 VAR6 (T, N, A, B, C, D, E, F), \
22950 CF (N, G)
22951 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22952 VAR7 (T, N, A, B, C, D, E, F, G), \
22953 CF (N, H)
22954 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22955 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22956 CF (N, I)
22957 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22958 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22959 CF (N, J)
22960 enum arm_builtins
22962 ARM_BUILTIN_GETWCGR0,
22963 ARM_BUILTIN_GETWCGR1,
22964 ARM_BUILTIN_GETWCGR2,
22965 ARM_BUILTIN_GETWCGR3,
22967 ARM_BUILTIN_SETWCGR0,
22968 ARM_BUILTIN_SETWCGR1,
22969 ARM_BUILTIN_SETWCGR2,
22970 ARM_BUILTIN_SETWCGR3,
22972 ARM_BUILTIN_WZERO,
22974 ARM_BUILTIN_WAVG2BR,
22975 ARM_BUILTIN_WAVG2HR,
22976 ARM_BUILTIN_WAVG2B,
22977 ARM_BUILTIN_WAVG2H,
22979 ARM_BUILTIN_WACCB,
22980 ARM_BUILTIN_WACCH,
22981 ARM_BUILTIN_WACCW,
22983 ARM_BUILTIN_WMACS,
22984 ARM_BUILTIN_WMACSZ,
22985 ARM_BUILTIN_WMACU,
22986 ARM_BUILTIN_WMACUZ,
22988 ARM_BUILTIN_WSADB,
22989 ARM_BUILTIN_WSADBZ,
22990 ARM_BUILTIN_WSADH,
22991 ARM_BUILTIN_WSADHZ,
22993 ARM_BUILTIN_WALIGNI,
22994 ARM_BUILTIN_WALIGNR0,
22995 ARM_BUILTIN_WALIGNR1,
22996 ARM_BUILTIN_WALIGNR2,
22997 ARM_BUILTIN_WALIGNR3,
22999 ARM_BUILTIN_TMIA,
23000 ARM_BUILTIN_TMIAPH,
23001 ARM_BUILTIN_TMIABB,
23002 ARM_BUILTIN_TMIABT,
23003 ARM_BUILTIN_TMIATB,
23004 ARM_BUILTIN_TMIATT,
23006 ARM_BUILTIN_TMOVMSKB,
23007 ARM_BUILTIN_TMOVMSKH,
23008 ARM_BUILTIN_TMOVMSKW,
23010 ARM_BUILTIN_TBCSTB,
23011 ARM_BUILTIN_TBCSTH,
23012 ARM_BUILTIN_TBCSTW,
23014 ARM_BUILTIN_WMADDS,
23015 ARM_BUILTIN_WMADDU,
23017 ARM_BUILTIN_WPACKHSS,
23018 ARM_BUILTIN_WPACKWSS,
23019 ARM_BUILTIN_WPACKDSS,
23020 ARM_BUILTIN_WPACKHUS,
23021 ARM_BUILTIN_WPACKWUS,
23022 ARM_BUILTIN_WPACKDUS,
23024 ARM_BUILTIN_WADDB,
23025 ARM_BUILTIN_WADDH,
23026 ARM_BUILTIN_WADDW,
23027 ARM_BUILTIN_WADDSSB,
23028 ARM_BUILTIN_WADDSSH,
23029 ARM_BUILTIN_WADDSSW,
23030 ARM_BUILTIN_WADDUSB,
23031 ARM_BUILTIN_WADDUSH,
23032 ARM_BUILTIN_WADDUSW,
23033 ARM_BUILTIN_WSUBB,
23034 ARM_BUILTIN_WSUBH,
23035 ARM_BUILTIN_WSUBW,
23036 ARM_BUILTIN_WSUBSSB,
23037 ARM_BUILTIN_WSUBSSH,
23038 ARM_BUILTIN_WSUBSSW,
23039 ARM_BUILTIN_WSUBUSB,
23040 ARM_BUILTIN_WSUBUSH,
23041 ARM_BUILTIN_WSUBUSW,
23043 ARM_BUILTIN_WAND,
23044 ARM_BUILTIN_WANDN,
23045 ARM_BUILTIN_WOR,
23046 ARM_BUILTIN_WXOR,
23048 ARM_BUILTIN_WCMPEQB,
23049 ARM_BUILTIN_WCMPEQH,
23050 ARM_BUILTIN_WCMPEQW,
23051 ARM_BUILTIN_WCMPGTUB,
23052 ARM_BUILTIN_WCMPGTUH,
23053 ARM_BUILTIN_WCMPGTUW,
23054 ARM_BUILTIN_WCMPGTSB,
23055 ARM_BUILTIN_WCMPGTSH,
23056 ARM_BUILTIN_WCMPGTSW,
23058 ARM_BUILTIN_TEXTRMSB,
23059 ARM_BUILTIN_TEXTRMSH,
23060 ARM_BUILTIN_TEXTRMSW,
23061 ARM_BUILTIN_TEXTRMUB,
23062 ARM_BUILTIN_TEXTRMUH,
23063 ARM_BUILTIN_TEXTRMUW,
23064 ARM_BUILTIN_TINSRB,
23065 ARM_BUILTIN_TINSRH,
23066 ARM_BUILTIN_TINSRW,
23068 ARM_BUILTIN_WMAXSW,
23069 ARM_BUILTIN_WMAXSH,
23070 ARM_BUILTIN_WMAXSB,
23071 ARM_BUILTIN_WMAXUW,
23072 ARM_BUILTIN_WMAXUH,
23073 ARM_BUILTIN_WMAXUB,
23074 ARM_BUILTIN_WMINSW,
23075 ARM_BUILTIN_WMINSH,
23076 ARM_BUILTIN_WMINSB,
23077 ARM_BUILTIN_WMINUW,
23078 ARM_BUILTIN_WMINUH,
23079 ARM_BUILTIN_WMINUB,
23081 ARM_BUILTIN_WMULUM,
23082 ARM_BUILTIN_WMULSM,
23083 ARM_BUILTIN_WMULUL,
23085 ARM_BUILTIN_PSADBH,
23086 ARM_BUILTIN_WSHUFH,
23088 ARM_BUILTIN_WSLLH,
23089 ARM_BUILTIN_WSLLW,
23090 ARM_BUILTIN_WSLLD,
23091 ARM_BUILTIN_WSRAH,
23092 ARM_BUILTIN_WSRAW,
23093 ARM_BUILTIN_WSRAD,
23094 ARM_BUILTIN_WSRLH,
23095 ARM_BUILTIN_WSRLW,
23096 ARM_BUILTIN_WSRLD,
23097 ARM_BUILTIN_WRORH,
23098 ARM_BUILTIN_WRORW,
23099 ARM_BUILTIN_WRORD,
23100 ARM_BUILTIN_WSLLHI,
23101 ARM_BUILTIN_WSLLWI,
23102 ARM_BUILTIN_WSLLDI,
23103 ARM_BUILTIN_WSRAHI,
23104 ARM_BUILTIN_WSRAWI,
23105 ARM_BUILTIN_WSRADI,
23106 ARM_BUILTIN_WSRLHI,
23107 ARM_BUILTIN_WSRLWI,
23108 ARM_BUILTIN_WSRLDI,
23109 ARM_BUILTIN_WRORHI,
23110 ARM_BUILTIN_WRORWI,
23111 ARM_BUILTIN_WRORDI,
23113 ARM_BUILTIN_WUNPCKIHB,
23114 ARM_BUILTIN_WUNPCKIHH,
23115 ARM_BUILTIN_WUNPCKIHW,
23116 ARM_BUILTIN_WUNPCKILB,
23117 ARM_BUILTIN_WUNPCKILH,
23118 ARM_BUILTIN_WUNPCKILW,
23120 ARM_BUILTIN_WUNPCKEHSB,
23121 ARM_BUILTIN_WUNPCKEHSH,
23122 ARM_BUILTIN_WUNPCKEHSW,
23123 ARM_BUILTIN_WUNPCKEHUB,
23124 ARM_BUILTIN_WUNPCKEHUH,
23125 ARM_BUILTIN_WUNPCKEHUW,
23126 ARM_BUILTIN_WUNPCKELSB,
23127 ARM_BUILTIN_WUNPCKELSH,
23128 ARM_BUILTIN_WUNPCKELSW,
23129 ARM_BUILTIN_WUNPCKELUB,
23130 ARM_BUILTIN_WUNPCKELUH,
23131 ARM_BUILTIN_WUNPCKELUW,
23133 ARM_BUILTIN_WABSB,
23134 ARM_BUILTIN_WABSH,
23135 ARM_BUILTIN_WABSW,
23137 ARM_BUILTIN_WADDSUBHX,
23138 ARM_BUILTIN_WSUBADDHX,
23140 ARM_BUILTIN_WABSDIFFB,
23141 ARM_BUILTIN_WABSDIFFH,
23142 ARM_BUILTIN_WABSDIFFW,
23144 ARM_BUILTIN_WADDCH,
23145 ARM_BUILTIN_WADDCW,
23147 ARM_BUILTIN_WAVG4,
23148 ARM_BUILTIN_WAVG4R,
23150 ARM_BUILTIN_WMADDSX,
23151 ARM_BUILTIN_WMADDUX,
23153 ARM_BUILTIN_WMADDSN,
23154 ARM_BUILTIN_WMADDUN,
23156 ARM_BUILTIN_WMULWSM,
23157 ARM_BUILTIN_WMULWUM,
23159 ARM_BUILTIN_WMULWSMR,
23160 ARM_BUILTIN_WMULWUMR,
23162 ARM_BUILTIN_WMULWL,
23164 ARM_BUILTIN_WMULSMR,
23165 ARM_BUILTIN_WMULUMR,
23167 ARM_BUILTIN_WQMULM,
23168 ARM_BUILTIN_WQMULMR,
23170 ARM_BUILTIN_WQMULWM,
23171 ARM_BUILTIN_WQMULWMR,
23173 ARM_BUILTIN_WADDBHUSM,
23174 ARM_BUILTIN_WADDBHUSL,
23176 ARM_BUILTIN_WQMIABB,
23177 ARM_BUILTIN_WQMIABT,
23178 ARM_BUILTIN_WQMIATB,
23179 ARM_BUILTIN_WQMIATT,
23181 ARM_BUILTIN_WQMIABBN,
23182 ARM_BUILTIN_WQMIABTN,
23183 ARM_BUILTIN_WQMIATBN,
23184 ARM_BUILTIN_WQMIATTN,
23186 ARM_BUILTIN_WMIABB,
23187 ARM_BUILTIN_WMIABT,
23188 ARM_BUILTIN_WMIATB,
23189 ARM_BUILTIN_WMIATT,
23191 ARM_BUILTIN_WMIABBN,
23192 ARM_BUILTIN_WMIABTN,
23193 ARM_BUILTIN_WMIATBN,
23194 ARM_BUILTIN_WMIATTN,
23196 ARM_BUILTIN_WMIAWBB,
23197 ARM_BUILTIN_WMIAWBT,
23198 ARM_BUILTIN_WMIAWTB,
23199 ARM_BUILTIN_WMIAWTT,
23201 ARM_BUILTIN_WMIAWBBN,
23202 ARM_BUILTIN_WMIAWBTN,
23203 ARM_BUILTIN_WMIAWTBN,
23204 ARM_BUILTIN_WMIAWTTN,
23206 ARM_BUILTIN_WMERGE,
23208 ARM_BUILTIN_CRC32B,
23209 ARM_BUILTIN_CRC32H,
23210 ARM_BUILTIN_CRC32W,
23211 ARM_BUILTIN_CRC32CB,
23212 ARM_BUILTIN_CRC32CH,
23213 ARM_BUILTIN_CRC32CW,
23215 #undef CRYPTO1
23216 #undef CRYPTO2
23217 #undef CRYPTO3
23219 #define CRYPTO1(L, U, M1, M2) \
23220 ARM_BUILTIN_CRYPTO_##U,
23221 #define CRYPTO2(L, U, M1, M2, M3) \
23222 ARM_BUILTIN_CRYPTO_##U,
23223 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23224 ARM_BUILTIN_CRYPTO_##U,
23226 #include "crypto.def"
23228 #undef CRYPTO1
23229 #undef CRYPTO2
23230 #undef CRYPTO3
23232 #include "arm_neon_builtins.def"
23234 ,ARM_BUILTIN_MAX
23237 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23239 #undef CF
23240 #undef VAR1
23241 #undef VAR2
23242 #undef VAR3
23243 #undef VAR4
23244 #undef VAR5
23245 #undef VAR6
23246 #undef VAR7
23247 #undef VAR8
23248 #undef VAR9
23249 #undef VAR10
23251 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23253 #define NUM_DREG_TYPES 5
23254 #define NUM_QREG_TYPES 6
23256 static void
23257 arm_init_neon_builtins (void)
23259 unsigned int i, fcode;
23260 tree decl;
23262 tree neon_intQI_type_node;
23263 tree neon_intHI_type_node;
23264 tree neon_floatHF_type_node;
23265 tree neon_polyQI_type_node;
23266 tree neon_polyHI_type_node;
23267 tree neon_intSI_type_node;
23268 tree neon_intDI_type_node;
23269 tree neon_intUTI_type_node;
23270 tree neon_float_type_node;
23272 tree intQI_pointer_node;
23273 tree intHI_pointer_node;
23274 tree intSI_pointer_node;
23275 tree intDI_pointer_node;
23276 tree float_pointer_node;
23278 tree const_intQI_node;
23279 tree const_intHI_node;
23280 tree const_intSI_node;
23281 tree const_intDI_node;
23282 tree const_float_node;
23284 tree const_intQI_pointer_node;
23285 tree const_intHI_pointer_node;
23286 tree const_intSI_pointer_node;
23287 tree const_intDI_pointer_node;
23288 tree const_float_pointer_node;
23290 tree V8QI_type_node;
23291 tree V4HI_type_node;
23292 tree V4HF_type_node;
23293 tree V2SI_type_node;
23294 tree V2SF_type_node;
23295 tree V16QI_type_node;
23296 tree V8HI_type_node;
23297 tree V4SI_type_node;
23298 tree V4SF_type_node;
23299 tree V2DI_type_node;
23301 tree intUQI_type_node;
23302 tree intUHI_type_node;
23303 tree intUSI_type_node;
23304 tree intUDI_type_node;
23306 tree intEI_type_node;
23307 tree intOI_type_node;
23308 tree intCI_type_node;
23309 tree intXI_type_node;
23311 tree V8QI_pointer_node;
23312 tree V4HI_pointer_node;
23313 tree V2SI_pointer_node;
23314 tree V2SF_pointer_node;
23315 tree V16QI_pointer_node;
23316 tree V8HI_pointer_node;
23317 tree V4SI_pointer_node;
23318 tree V4SF_pointer_node;
23319 tree V2DI_pointer_node;
23321 tree void_ftype_pv8qi_v8qi_v8qi;
23322 tree void_ftype_pv4hi_v4hi_v4hi;
23323 tree void_ftype_pv2si_v2si_v2si;
23324 tree void_ftype_pv2sf_v2sf_v2sf;
23325 tree void_ftype_pdi_di_di;
23326 tree void_ftype_pv16qi_v16qi_v16qi;
23327 tree void_ftype_pv8hi_v8hi_v8hi;
23328 tree void_ftype_pv4si_v4si_v4si;
23329 tree void_ftype_pv4sf_v4sf_v4sf;
23330 tree void_ftype_pv2di_v2di_v2di;
23332 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23333 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23334 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23336 /* Create distinguished type nodes for NEON vector element types,
23337 and pointers to values of such types, so we can detect them later. */
23338 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23339 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23340 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23341 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23342 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23343 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23344 neon_float_type_node = make_node (REAL_TYPE);
23345 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23346 layout_type (neon_float_type_node);
23347 neon_floatHF_type_node = make_node (REAL_TYPE);
23348 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23349 layout_type (neon_floatHF_type_node);
23351 /* Define typedefs which exactly correspond to the modes we are basing vector
23352 types on. If you change these names you'll need to change
23353 the table used by arm_mangle_type too. */
23354 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23355 "__builtin_neon_qi");
23356 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23357 "__builtin_neon_hi");
23358 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23359 "__builtin_neon_hf");
23360 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23361 "__builtin_neon_si");
23362 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23363 "__builtin_neon_sf");
23364 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23365 "__builtin_neon_di");
23366 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23367 "__builtin_neon_poly8");
23368 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23369 "__builtin_neon_poly16");
23371 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23372 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23373 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23374 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23375 float_pointer_node = build_pointer_type (neon_float_type_node);
23377 /* Next create constant-qualified versions of the above types. */
23378 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23379 TYPE_QUAL_CONST);
23380 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23381 TYPE_QUAL_CONST);
23382 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23383 TYPE_QUAL_CONST);
23384 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23385 TYPE_QUAL_CONST);
23386 const_float_node = build_qualified_type (neon_float_type_node,
23387 TYPE_QUAL_CONST);
23389 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23390 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23391 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23392 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23393 const_float_pointer_node = build_pointer_type (const_float_node);
23395 /* Now create vector types based on our NEON element types. */
23396 /* 64-bit vectors. */
23397 V8QI_type_node =
23398 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23399 V4HI_type_node =
23400 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23401 V4HF_type_node =
23402 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23403 V2SI_type_node =
23404 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23405 V2SF_type_node =
23406 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23407 /* 128-bit vectors. */
23408 V16QI_type_node =
23409 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23410 V8HI_type_node =
23411 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23412 V4SI_type_node =
23413 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23414 V4SF_type_node =
23415 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23416 V2DI_type_node =
23417 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23419 /* Unsigned integer types for various mode sizes. */
23420 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23421 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23422 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23423 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23424 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23427 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23428 "__builtin_neon_uqi");
23429 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23430 "__builtin_neon_uhi");
23431 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23432 "__builtin_neon_usi");
23433 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23434 "__builtin_neon_udi");
23435 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23436 "__builtin_neon_poly64");
23437 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23438 "__builtin_neon_poly128");
23440 /* Opaque integer types for structures of vectors. */
23441 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23442 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23443 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23444 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23446 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23447 "__builtin_neon_ti");
23448 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23449 "__builtin_neon_ei");
23450 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23451 "__builtin_neon_oi");
23452 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23453 "__builtin_neon_ci");
23454 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23455 "__builtin_neon_xi");
23457 /* Pointers to vector types. */
23458 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23459 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23460 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23461 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23462 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23463 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23464 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23465 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23466 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23468 /* Operations which return results as pairs. */
23469 void_ftype_pv8qi_v8qi_v8qi =
23470 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23471 V8QI_type_node, NULL);
23472 void_ftype_pv4hi_v4hi_v4hi =
23473 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23474 V4HI_type_node, NULL);
23475 void_ftype_pv2si_v2si_v2si =
23476 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23477 V2SI_type_node, NULL);
23478 void_ftype_pv2sf_v2sf_v2sf =
23479 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23480 V2SF_type_node, NULL);
23481 void_ftype_pdi_di_di =
23482 build_function_type_list (void_type_node, intDI_pointer_node,
23483 neon_intDI_type_node, neon_intDI_type_node, NULL);
23484 void_ftype_pv16qi_v16qi_v16qi =
23485 build_function_type_list (void_type_node, V16QI_pointer_node,
23486 V16QI_type_node, V16QI_type_node, NULL);
23487 void_ftype_pv8hi_v8hi_v8hi =
23488 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23489 V8HI_type_node, NULL);
23490 void_ftype_pv4si_v4si_v4si =
23491 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23492 V4SI_type_node, NULL);
23493 void_ftype_pv4sf_v4sf_v4sf =
23494 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23495 V4SF_type_node, NULL);
23496 void_ftype_pv2di_v2di_v2di =
23497 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23498 V2DI_type_node, NULL);
23500 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23502 tree V4USI_type_node =
23503 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23505 tree V16UQI_type_node =
23506 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23508 tree v16uqi_ftype_v16uqi
23509 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23511 tree v16uqi_ftype_v16uqi_v16uqi
23512 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23513 V16UQI_type_node, NULL_TREE);
23515 tree v4usi_ftype_v4usi
23516 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23518 tree v4usi_ftype_v4usi_v4usi
23519 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23520 V4USI_type_node, NULL_TREE);
23522 tree v4usi_ftype_v4usi_v4usi_v4usi
23523 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23524 V4USI_type_node, V4USI_type_node, NULL_TREE);
23526 tree uti_ftype_udi_udi
23527 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23528 intUDI_type_node, NULL_TREE);
23530 #undef CRYPTO1
23531 #undef CRYPTO2
23532 #undef CRYPTO3
23533 #undef C
23534 #undef N
23535 #undef CF
23536 #undef FT1
23537 #undef FT2
23538 #undef FT3
23540 #define C(U) \
23541 ARM_BUILTIN_CRYPTO_##U
23542 #define N(L) \
23543 "__builtin_arm_crypto_"#L
23544 #define FT1(R, A) \
23545 R##_ftype_##A
23546 #define FT2(R, A1, A2) \
23547 R##_ftype_##A1##_##A2
23548 #define FT3(R, A1, A2, A3) \
23549 R##_ftype_##A1##_##A2##_##A3
23550 #define CRYPTO1(L, U, R, A) \
23551 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23552 C (U), BUILT_IN_MD, \
23553 NULL, NULL_TREE);
23554 #define CRYPTO2(L, U, R, A1, A2) \
23555 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23556 C (U), BUILT_IN_MD, \
23557 NULL, NULL_TREE);
23559 #define CRYPTO3(L, U, R, A1, A2, A3) \
23560 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23561 C (U), BUILT_IN_MD, \
23562 NULL, NULL_TREE);
23563 #include "crypto.def"
23565 #undef CRYPTO1
23566 #undef CRYPTO2
23567 #undef CRYPTO3
23568 #undef C
23569 #undef N
23570 #undef FT1
23571 #undef FT2
23572 #undef FT3
23574 dreg_types[0] = V8QI_type_node;
23575 dreg_types[1] = V4HI_type_node;
23576 dreg_types[2] = V2SI_type_node;
23577 dreg_types[3] = V2SF_type_node;
23578 dreg_types[4] = neon_intDI_type_node;
23580 qreg_types[0] = V16QI_type_node;
23581 qreg_types[1] = V8HI_type_node;
23582 qreg_types[2] = V4SI_type_node;
23583 qreg_types[3] = V4SF_type_node;
23584 qreg_types[4] = V2DI_type_node;
23585 qreg_types[5] = neon_intUTI_type_node;
23587 for (i = 0; i < NUM_QREG_TYPES; i++)
23589 int j;
23590 for (j = 0; j < NUM_QREG_TYPES; j++)
23592 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23593 reinterp_ftype_dreg[i][j]
23594 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23596 reinterp_ftype_qreg[i][j]
23597 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23601 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23602 i < ARRAY_SIZE (neon_builtin_data);
23603 i++, fcode++)
23605 neon_builtin_datum *d = &neon_builtin_data[i];
23607 const char* const modenames[] = {
23608 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23609 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23610 "ti", "ei", "oi"
23612 char namebuf[60];
23613 tree ftype = NULL;
23614 int is_load = 0, is_store = 0;
23616 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23618 d->fcode = fcode;
23620 switch (d->itype)
23622 case NEON_LOAD1:
23623 case NEON_LOAD1LANE:
23624 case NEON_LOADSTRUCT:
23625 case NEON_LOADSTRUCTLANE:
23626 is_load = 1;
23627 /* Fall through. */
23628 case NEON_STORE1:
23629 case NEON_STORE1LANE:
23630 case NEON_STORESTRUCT:
23631 case NEON_STORESTRUCTLANE:
23632 if (!is_load)
23633 is_store = 1;
23634 /* Fall through. */
23635 case NEON_UNOP:
23636 case NEON_RINT:
23637 case NEON_BINOP:
23638 case NEON_LOGICBINOP:
23639 case NEON_SHIFTINSERT:
23640 case NEON_TERNOP:
23641 case NEON_GETLANE:
23642 case NEON_SETLANE:
23643 case NEON_CREATE:
23644 case NEON_DUP:
23645 case NEON_DUPLANE:
23646 case NEON_SHIFTIMM:
23647 case NEON_SHIFTACC:
23648 case NEON_COMBINE:
23649 case NEON_SPLIT:
23650 case NEON_CONVERT:
23651 case NEON_FIXCONV:
23652 case NEON_LANEMUL:
23653 case NEON_LANEMULL:
23654 case NEON_LANEMULH:
23655 case NEON_LANEMAC:
23656 case NEON_SCALARMUL:
23657 case NEON_SCALARMULL:
23658 case NEON_SCALARMULH:
23659 case NEON_SCALARMAC:
23660 case NEON_SELECT:
23661 case NEON_VTBL:
23662 case NEON_VTBX:
23664 int k;
23665 tree return_type = void_type_node, args = void_list_node;
23667 /* Build a function type directly from the insn_data for
23668 this builtin. The build_function_type() function takes
23669 care of removing duplicates for us. */
23670 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23672 tree eltype;
23674 if (is_load && k == 1)
23676 /* Neon load patterns always have the memory
23677 operand in the operand 1 position. */
23678 gcc_assert (insn_data[d->code].operand[k].predicate
23679 == neon_struct_operand);
23681 switch (d->mode)
23683 case T_V8QI:
23684 case T_V16QI:
23685 eltype = const_intQI_pointer_node;
23686 break;
23688 case T_V4HI:
23689 case T_V8HI:
23690 eltype = const_intHI_pointer_node;
23691 break;
23693 case T_V2SI:
23694 case T_V4SI:
23695 eltype = const_intSI_pointer_node;
23696 break;
23698 case T_V2SF:
23699 case T_V4SF:
23700 eltype = const_float_pointer_node;
23701 break;
23703 case T_DI:
23704 case T_V2DI:
23705 eltype = const_intDI_pointer_node;
23706 break;
23708 default: gcc_unreachable ();
23711 else if (is_store && k == 0)
23713 /* Similarly, Neon store patterns use operand 0 as
23714 the memory location to store to. */
23715 gcc_assert (insn_data[d->code].operand[k].predicate
23716 == neon_struct_operand);
23718 switch (d->mode)
23720 case T_V8QI:
23721 case T_V16QI:
23722 eltype = intQI_pointer_node;
23723 break;
23725 case T_V4HI:
23726 case T_V8HI:
23727 eltype = intHI_pointer_node;
23728 break;
23730 case T_V2SI:
23731 case T_V4SI:
23732 eltype = intSI_pointer_node;
23733 break;
23735 case T_V2SF:
23736 case T_V4SF:
23737 eltype = float_pointer_node;
23738 break;
23740 case T_DI:
23741 case T_V2DI:
23742 eltype = intDI_pointer_node;
23743 break;
23745 default: gcc_unreachable ();
23748 else
23750 switch (insn_data[d->code].operand[k].mode)
23752 case VOIDmode: eltype = void_type_node; break;
23753 /* Scalars. */
23754 case QImode: eltype = neon_intQI_type_node; break;
23755 case HImode: eltype = neon_intHI_type_node; break;
23756 case SImode: eltype = neon_intSI_type_node; break;
23757 case SFmode: eltype = neon_float_type_node; break;
23758 case DImode: eltype = neon_intDI_type_node; break;
23759 case TImode: eltype = intTI_type_node; break;
23760 case EImode: eltype = intEI_type_node; break;
23761 case OImode: eltype = intOI_type_node; break;
23762 case CImode: eltype = intCI_type_node; break;
23763 case XImode: eltype = intXI_type_node; break;
23764 /* 64-bit vectors. */
23765 case V8QImode: eltype = V8QI_type_node; break;
23766 case V4HImode: eltype = V4HI_type_node; break;
23767 case V2SImode: eltype = V2SI_type_node; break;
23768 case V2SFmode: eltype = V2SF_type_node; break;
23769 /* 128-bit vectors. */
23770 case V16QImode: eltype = V16QI_type_node; break;
23771 case V8HImode: eltype = V8HI_type_node; break;
23772 case V4SImode: eltype = V4SI_type_node; break;
23773 case V4SFmode: eltype = V4SF_type_node; break;
23774 case V2DImode: eltype = V2DI_type_node; break;
23775 default: gcc_unreachable ();
23779 if (k == 0 && !is_store)
23780 return_type = eltype;
23781 else
23782 args = tree_cons (NULL_TREE, eltype, args);
23785 ftype = build_function_type (return_type, args);
23787 break;
23789 case NEON_RESULTPAIR:
23791 switch (insn_data[d->code].operand[1].mode)
23793 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23794 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23795 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23796 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23797 case DImode: ftype = void_ftype_pdi_di_di; break;
23798 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23799 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23800 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23801 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23802 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23803 default: gcc_unreachable ();
23806 break;
23808 case NEON_REINTERP:
23810 /* We iterate over NUM_DREG_TYPES doubleword types,
23811 then NUM_QREG_TYPES quadword types.
23812 V4HF is not a type used in reinterpret, so we translate
23813 d->mode to the correct index in reinterp_ftype_dreg. */
23814 bool qreg_p
23815 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
23816 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
23817 % NUM_QREG_TYPES;
23818 switch (insn_data[d->code].operand[0].mode)
23820 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23821 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23822 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23823 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23824 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23825 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23826 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23827 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23828 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23829 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23830 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
23831 default: gcc_unreachable ();
23834 break;
23835 case NEON_FLOAT_WIDEN:
23837 tree eltype = NULL_TREE;
23838 tree return_type = NULL_TREE;
23840 switch (insn_data[d->code].operand[1].mode)
23842 case V4HFmode:
23843 eltype = V4HF_type_node;
23844 return_type = V4SF_type_node;
23845 break;
23846 default: gcc_unreachable ();
23848 ftype = build_function_type_list (return_type, eltype, NULL);
23849 break;
23851 case NEON_FLOAT_NARROW:
23853 tree eltype = NULL_TREE;
23854 tree return_type = NULL_TREE;
23856 switch (insn_data[d->code].operand[1].mode)
23858 case V4SFmode:
23859 eltype = V4SF_type_node;
23860 return_type = V4HF_type_node;
23861 break;
23862 default: gcc_unreachable ();
23864 ftype = build_function_type_list (return_type, eltype, NULL);
23865 break;
23867 default:
23868 gcc_unreachable ();
23871 gcc_assert (ftype != NULL);
23873 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23875 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23876 NULL_TREE);
23877 arm_builtin_decls[fcode] = decl;
23881 #undef NUM_DREG_TYPES
23882 #undef NUM_QREG_TYPES
23884 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23885 do \
23887 if ((MASK) & insn_flags) \
23889 tree bdecl; \
23890 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23891 BUILT_IN_MD, NULL, NULL_TREE); \
23892 arm_builtin_decls[CODE] = bdecl; \
23895 while (0)
23897 struct builtin_description
23899 const unsigned int mask;
23900 const enum insn_code icode;
23901 const char * const name;
23902 const enum arm_builtins code;
23903 const enum rtx_code comparison;
23904 const unsigned int flag;
23907 static const struct builtin_description bdesc_2arg[] =
23909 #define IWMMXT_BUILTIN(code, string, builtin) \
23910 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23911 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23913 #define IWMMXT2_BUILTIN(code, string, builtin) \
23914 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23915 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23917 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23918 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23919 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23920 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23921 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23922 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23923 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23924 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23925 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23926 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23927 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23928 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23929 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23930 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23931 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23932 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23933 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23934 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23935 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23936 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23937 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23938 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23939 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23940 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23941 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23942 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23943 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23944 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23945 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23946 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23947 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23948 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23949 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23950 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23951 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23952 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23953 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23954 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23955 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23956 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23957 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23958 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23959 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23960 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23961 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23962 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23963 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23964 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23965 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23966 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23967 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23968 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23969 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23970 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23971 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23972 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23973 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23974 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23975 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23976 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23977 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23978 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23979 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23980 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23981 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23982 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23983 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23984 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23985 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23986 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23987 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23988 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23989 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23990 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23991 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23992 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23993 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23994 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23996 #define IWMMXT_BUILTIN2(code, builtin) \
23997 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23999 #define IWMMXT2_BUILTIN2(code, builtin) \
24000 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24002 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24003 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24004 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24005 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24006 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24007 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24008 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24009 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24010 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24011 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24013 #define CRC32_BUILTIN(L, U) \
24014 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24015 UNKNOWN, 0},
24016 CRC32_BUILTIN (crc32b, CRC32B)
24017 CRC32_BUILTIN (crc32h, CRC32H)
24018 CRC32_BUILTIN (crc32w, CRC32W)
24019 CRC32_BUILTIN (crc32cb, CRC32CB)
24020 CRC32_BUILTIN (crc32ch, CRC32CH)
24021 CRC32_BUILTIN (crc32cw, CRC32CW)
24022 #undef CRC32_BUILTIN
24025 #define CRYPTO_BUILTIN(L, U) \
24026 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24027 UNKNOWN, 0},
24028 #undef CRYPTO1
24029 #undef CRYPTO2
24030 #undef CRYPTO3
24031 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24032 #define CRYPTO1(L, U, R, A)
24033 #define CRYPTO3(L, U, R, A1, A2, A3)
24034 #include "crypto.def"
24035 #undef CRYPTO1
24036 #undef CRYPTO2
24037 #undef CRYPTO3
24041 static const struct builtin_description bdesc_1arg[] =
24043 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24044 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24045 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24046 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24047 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24048 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24049 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24050 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24051 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24052 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24053 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24054 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24055 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24056 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24057 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24058 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24059 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24060 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24061 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24062 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24063 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24064 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24065 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24066 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24068 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24069 #define CRYPTO2(L, U, R, A1, A2)
24070 #define CRYPTO3(L, U, R, A1, A2, A3)
24071 #include "crypto.def"
24072 #undef CRYPTO1
24073 #undef CRYPTO2
24074 #undef CRYPTO3
24077 static const struct builtin_description bdesc_3arg[] =
24079 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24080 #define CRYPTO1(L, U, R, A)
24081 #define CRYPTO2(L, U, R, A1, A2)
24082 #include "crypto.def"
24083 #undef CRYPTO1
24084 #undef CRYPTO2
24085 #undef CRYPTO3
24087 #undef CRYPTO_BUILTIN
24089 /* Set up all the iWMMXt builtins. This is not called if
24090 TARGET_IWMMXT is zero. */
24092 static void
24093 arm_init_iwmmxt_builtins (void)
24095 const struct builtin_description * d;
24096 size_t i;
24098 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24099 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24100 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24102 tree v8qi_ftype_v8qi_v8qi_int
24103 = build_function_type_list (V8QI_type_node,
24104 V8QI_type_node, V8QI_type_node,
24105 integer_type_node, NULL_TREE);
24106 tree v4hi_ftype_v4hi_int
24107 = build_function_type_list (V4HI_type_node,
24108 V4HI_type_node, integer_type_node, NULL_TREE);
24109 tree v2si_ftype_v2si_int
24110 = build_function_type_list (V2SI_type_node,
24111 V2SI_type_node, integer_type_node, NULL_TREE);
24112 tree v2si_ftype_di_di
24113 = build_function_type_list (V2SI_type_node,
24114 long_long_integer_type_node,
24115 long_long_integer_type_node,
24116 NULL_TREE);
24117 tree di_ftype_di_int
24118 = build_function_type_list (long_long_integer_type_node,
24119 long_long_integer_type_node,
24120 integer_type_node, NULL_TREE);
24121 tree di_ftype_di_int_int
24122 = build_function_type_list (long_long_integer_type_node,
24123 long_long_integer_type_node,
24124 integer_type_node,
24125 integer_type_node, NULL_TREE);
24126 tree int_ftype_v8qi
24127 = build_function_type_list (integer_type_node,
24128 V8QI_type_node, NULL_TREE);
24129 tree int_ftype_v4hi
24130 = build_function_type_list (integer_type_node,
24131 V4HI_type_node, NULL_TREE);
24132 tree int_ftype_v2si
24133 = build_function_type_list (integer_type_node,
24134 V2SI_type_node, NULL_TREE);
24135 tree int_ftype_v8qi_int
24136 = build_function_type_list (integer_type_node,
24137 V8QI_type_node, integer_type_node, NULL_TREE);
24138 tree int_ftype_v4hi_int
24139 = build_function_type_list (integer_type_node,
24140 V4HI_type_node, integer_type_node, NULL_TREE);
24141 tree int_ftype_v2si_int
24142 = build_function_type_list (integer_type_node,
24143 V2SI_type_node, integer_type_node, NULL_TREE);
24144 tree v8qi_ftype_v8qi_int_int
24145 = build_function_type_list (V8QI_type_node,
24146 V8QI_type_node, integer_type_node,
24147 integer_type_node, NULL_TREE);
24148 tree v4hi_ftype_v4hi_int_int
24149 = build_function_type_list (V4HI_type_node,
24150 V4HI_type_node, integer_type_node,
24151 integer_type_node, NULL_TREE);
24152 tree v2si_ftype_v2si_int_int
24153 = build_function_type_list (V2SI_type_node,
24154 V2SI_type_node, integer_type_node,
24155 integer_type_node, NULL_TREE);
24156 /* Miscellaneous. */
24157 tree v8qi_ftype_v4hi_v4hi
24158 = build_function_type_list (V8QI_type_node,
24159 V4HI_type_node, V4HI_type_node, NULL_TREE);
24160 tree v4hi_ftype_v2si_v2si
24161 = build_function_type_list (V4HI_type_node,
24162 V2SI_type_node, V2SI_type_node, NULL_TREE);
24163 tree v8qi_ftype_v4hi_v8qi
24164 = build_function_type_list (V8QI_type_node,
24165 V4HI_type_node, V8QI_type_node, NULL_TREE);
24166 tree v2si_ftype_v4hi_v4hi
24167 = build_function_type_list (V2SI_type_node,
24168 V4HI_type_node, V4HI_type_node, NULL_TREE);
24169 tree v2si_ftype_v8qi_v8qi
24170 = build_function_type_list (V2SI_type_node,
24171 V8QI_type_node, V8QI_type_node, NULL_TREE);
24172 tree v4hi_ftype_v4hi_di
24173 = build_function_type_list (V4HI_type_node,
24174 V4HI_type_node, long_long_integer_type_node,
24175 NULL_TREE);
24176 tree v2si_ftype_v2si_di
24177 = build_function_type_list (V2SI_type_node,
24178 V2SI_type_node, long_long_integer_type_node,
24179 NULL_TREE);
24180 tree di_ftype_void
24181 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24182 tree int_ftype_void
24183 = build_function_type_list (integer_type_node, NULL_TREE);
24184 tree di_ftype_v8qi
24185 = build_function_type_list (long_long_integer_type_node,
24186 V8QI_type_node, NULL_TREE);
24187 tree di_ftype_v4hi
24188 = build_function_type_list (long_long_integer_type_node,
24189 V4HI_type_node, NULL_TREE);
24190 tree di_ftype_v2si
24191 = build_function_type_list (long_long_integer_type_node,
24192 V2SI_type_node, NULL_TREE);
24193 tree v2si_ftype_v4hi
24194 = build_function_type_list (V2SI_type_node,
24195 V4HI_type_node, NULL_TREE);
24196 tree v4hi_ftype_v8qi
24197 = build_function_type_list (V4HI_type_node,
24198 V8QI_type_node, NULL_TREE);
24199 tree v8qi_ftype_v8qi
24200 = build_function_type_list (V8QI_type_node,
24201 V8QI_type_node, NULL_TREE);
24202 tree v4hi_ftype_v4hi
24203 = build_function_type_list (V4HI_type_node,
24204 V4HI_type_node, NULL_TREE);
24205 tree v2si_ftype_v2si
24206 = build_function_type_list (V2SI_type_node,
24207 V2SI_type_node, NULL_TREE);
24209 tree di_ftype_di_v4hi_v4hi
24210 = build_function_type_list (long_long_unsigned_type_node,
24211 long_long_unsigned_type_node,
24212 V4HI_type_node, V4HI_type_node,
24213 NULL_TREE);
24215 tree di_ftype_v4hi_v4hi
24216 = build_function_type_list (long_long_unsigned_type_node,
24217 V4HI_type_node,V4HI_type_node,
24218 NULL_TREE);
24220 tree v2si_ftype_v2si_v4hi_v4hi
24221 = build_function_type_list (V2SI_type_node,
24222 V2SI_type_node, V4HI_type_node,
24223 V4HI_type_node, NULL_TREE);
24225 tree v2si_ftype_v2si_v8qi_v8qi
24226 = build_function_type_list (V2SI_type_node,
24227 V2SI_type_node, V8QI_type_node,
24228 V8QI_type_node, NULL_TREE);
24230 tree di_ftype_di_v2si_v2si
24231 = build_function_type_list (long_long_unsigned_type_node,
24232 long_long_unsigned_type_node,
24233 V2SI_type_node, V2SI_type_node,
24234 NULL_TREE);
24236 tree di_ftype_di_di_int
24237 = build_function_type_list (long_long_unsigned_type_node,
24238 long_long_unsigned_type_node,
24239 long_long_unsigned_type_node,
24240 integer_type_node, NULL_TREE);
24242 tree void_ftype_int
24243 = build_function_type_list (void_type_node,
24244 integer_type_node, NULL_TREE);
24246 tree v8qi_ftype_char
24247 = build_function_type_list (V8QI_type_node,
24248 signed_char_type_node, NULL_TREE);
24250 tree v4hi_ftype_short
24251 = build_function_type_list (V4HI_type_node,
24252 short_integer_type_node, NULL_TREE);
24254 tree v2si_ftype_int
24255 = build_function_type_list (V2SI_type_node,
24256 integer_type_node, NULL_TREE);
24258 /* Normal vector binops. */
24259 tree v8qi_ftype_v8qi_v8qi
24260 = build_function_type_list (V8QI_type_node,
24261 V8QI_type_node, V8QI_type_node, NULL_TREE);
24262 tree v4hi_ftype_v4hi_v4hi
24263 = build_function_type_list (V4HI_type_node,
24264 V4HI_type_node,V4HI_type_node, NULL_TREE);
24265 tree v2si_ftype_v2si_v2si
24266 = build_function_type_list (V2SI_type_node,
24267 V2SI_type_node, V2SI_type_node, NULL_TREE);
24268 tree di_ftype_di_di
24269 = build_function_type_list (long_long_unsigned_type_node,
24270 long_long_unsigned_type_node,
24271 long_long_unsigned_type_node,
24272 NULL_TREE);
24274 /* Add all builtins that are more or less simple operations on two
24275 operands. */
24276 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24278 /* Use one of the operands; the target can have a different mode for
24279 mask-generating compares. */
24280 enum machine_mode mode;
24281 tree type;
24283 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24284 continue;
24286 mode = insn_data[d->icode].operand[1].mode;
24288 switch (mode)
24290 case V8QImode:
24291 type = v8qi_ftype_v8qi_v8qi;
24292 break;
24293 case V4HImode:
24294 type = v4hi_ftype_v4hi_v4hi;
24295 break;
24296 case V2SImode:
24297 type = v2si_ftype_v2si_v2si;
24298 break;
24299 case DImode:
24300 type = di_ftype_di_di;
24301 break;
24303 default:
24304 gcc_unreachable ();
24307 def_mbuiltin (d->mask, d->name, type, d->code);
24310 /* Add the remaining MMX insns with somewhat more complicated types. */
24311 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24312 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24313 ARM_BUILTIN_ ## CODE)
24315 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24316 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24317 ARM_BUILTIN_ ## CODE)
24319 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24320 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24321 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24322 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24323 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24324 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24325 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24326 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24327 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24329 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24330 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24331 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24332 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24333 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24334 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24336 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24337 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24338 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24339 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24340 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24341 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24343 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24344 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24345 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24346 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24347 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24348 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24350 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24351 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24352 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24353 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24354 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24355 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24357 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24359 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24360 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24361 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24362 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24363 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24364 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24365 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24366 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24367 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24368 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24370 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24371 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24372 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24373 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24374 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24375 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24376 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24377 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24378 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24380 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24381 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24382 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24384 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24385 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24386 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24388 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24389 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24391 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24392 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24393 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24394 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24395 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24396 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24398 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24399 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24400 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24401 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24402 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24403 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24404 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24405 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24406 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24407 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24408 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24409 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24411 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24412 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24413 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24414 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24416 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24417 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24418 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24419 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24420 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24421 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24422 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24424 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24425 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24426 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24428 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24429 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24430 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24431 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24433 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24434 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24435 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24436 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24438 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24439 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24440 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24441 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24443 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24444 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24445 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24446 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24448 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24449 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24450 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24451 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24453 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24454 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24455 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24456 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24458 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24460 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24461 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24462 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24464 #undef iwmmx_mbuiltin
24465 #undef iwmmx2_mbuiltin
24468 static void
24469 arm_init_fp16_builtins (void)
24471 tree fp16_type = make_node (REAL_TYPE);
24472 TYPE_PRECISION (fp16_type) = 16;
24473 layout_type (fp16_type);
24474 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24477 static void
24478 arm_init_crc32_builtins ()
24480 tree si_ftype_si_qi
24481 = build_function_type_list (unsigned_intSI_type_node,
24482 unsigned_intSI_type_node,
24483 unsigned_intQI_type_node, NULL_TREE);
24484 tree si_ftype_si_hi
24485 = build_function_type_list (unsigned_intSI_type_node,
24486 unsigned_intSI_type_node,
24487 unsigned_intHI_type_node, NULL_TREE);
24488 tree si_ftype_si_si
24489 = build_function_type_list (unsigned_intSI_type_node,
24490 unsigned_intSI_type_node,
24491 unsigned_intSI_type_node, NULL_TREE);
24493 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24494 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24495 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24496 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24497 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24498 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24499 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24500 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24501 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24502 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24503 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24504 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24505 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24506 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24507 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24508 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24509 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24510 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24513 static void
24514 arm_init_builtins (void)
24516 if (TARGET_REALLY_IWMMXT)
24517 arm_init_iwmmxt_builtins ();
24519 if (TARGET_NEON)
24520 arm_init_neon_builtins ();
24522 if (arm_fp16_format)
24523 arm_init_fp16_builtins ();
24525 if (TARGET_CRC32)
24526 arm_init_crc32_builtins ();
24529 /* Return the ARM builtin for CODE. */
24531 static tree
24532 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24534 if (code >= ARM_BUILTIN_MAX)
24535 return error_mark_node;
24537 return arm_builtin_decls[code];
24540 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24542 static const char *
24543 arm_invalid_parameter_type (const_tree t)
24545 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24546 return N_("function parameters cannot have __fp16 type");
24547 return NULL;
24550 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24552 static const char *
24553 arm_invalid_return_type (const_tree t)
24555 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24556 return N_("functions cannot return __fp16 type");
24557 return NULL;
24560 /* Implement TARGET_PROMOTED_TYPE. */
24562 static tree
24563 arm_promoted_type (const_tree t)
24565 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24566 return float_type_node;
24567 return NULL_TREE;
24570 /* Implement TARGET_CONVERT_TO_TYPE.
24571 Specifically, this hook implements the peculiarity of the ARM
24572 half-precision floating-point C semantics that requires conversions between
24573 __fp16 to or from double to do an intermediate conversion to float. */
24575 static tree
24576 arm_convert_to_type (tree type, tree expr)
24578 tree fromtype = TREE_TYPE (expr);
24579 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24580 return NULL_TREE;
24581 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24582 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24583 return convert (type, convert (float_type_node, expr));
24584 return NULL_TREE;
24587 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24588 This simply adds HFmode as a supported mode; even though we don't
24589 implement arithmetic on this type directly, it's supported by
24590 optabs conversions, much the way the double-word arithmetic is
24591 special-cased in the default hook. */
24593 static bool
24594 arm_scalar_mode_supported_p (enum machine_mode mode)
24596 if (mode == HFmode)
24597 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24598 else if (ALL_FIXED_POINT_MODE_P (mode))
24599 return true;
24600 else
24601 return default_scalar_mode_supported_p (mode);
24604 /* Errors in the source file can cause expand_expr to return const0_rtx
24605 where we expect a vector. To avoid crashing, use one of the vector
24606 clear instructions. */
24608 static rtx
24609 safe_vector_operand (rtx x, enum machine_mode mode)
24611 if (x != const0_rtx)
24612 return x;
24613 x = gen_reg_rtx (mode);
24615 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24616 : gen_rtx_SUBREG (DImode, x, 0)));
24617 return x;
24620 /* Function to expand ternary builtins. */
24621 static rtx
24622 arm_expand_ternop_builtin (enum insn_code icode,
24623 tree exp, rtx target)
24625 rtx pat;
24626 tree arg0 = CALL_EXPR_ARG (exp, 0);
24627 tree arg1 = CALL_EXPR_ARG (exp, 1);
24628 tree arg2 = CALL_EXPR_ARG (exp, 2);
24630 rtx op0 = expand_normal (arg0);
24631 rtx op1 = expand_normal (arg1);
24632 rtx op2 = expand_normal (arg2);
24633 rtx op3 = NULL_RTX;
24635 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24636 lane operand depending on endianness. */
24637 bool builtin_sha1cpm_p = false;
24639 if (insn_data[icode].n_operands == 5)
24641 gcc_assert (icode == CODE_FOR_crypto_sha1c
24642 || icode == CODE_FOR_crypto_sha1p
24643 || icode == CODE_FOR_crypto_sha1m);
24644 builtin_sha1cpm_p = true;
24646 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24647 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24648 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24649 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24652 if (VECTOR_MODE_P (mode0))
24653 op0 = safe_vector_operand (op0, mode0);
24654 if (VECTOR_MODE_P (mode1))
24655 op1 = safe_vector_operand (op1, mode1);
24656 if (VECTOR_MODE_P (mode2))
24657 op2 = safe_vector_operand (op2, mode2);
24659 if (! target
24660 || GET_MODE (target) != tmode
24661 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24662 target = gen_reg_rtx (tmode);
24664 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24665 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24666 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24668 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24669 op0 = copy_to_mode_reg (mode0, op0);
24670 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24671 op1 = copy_to_mode_reg (mode1, op1);
24672 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24673 op2 = copy_to_mode_reg (mode2, op2);
24674 if (builtin_sha1cpm_p)
24675 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24677 if (builtin_sha1cpm_p)
24678 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24679 else
24680 pat = GEN_FCN (icode) (target, op0, op1, op2);
24681 if (! pat)
24682 return 0;
24683 emit_insn (pat);
24684 return target;
24687 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24689 static rtx
24690 arm_expand_binop_builtin (enum insn_code icode,
24691 tree exp, rtx target)
24693 rtx pat;
24694 tree arg0 = CALL_EXPR_ARG (exp, 0);
24695 tree arg1 = CALL_EXPR_ARG (exp, 1);
24696 rtx op0 = expand_normal (arg0);
24697 rtx op1 = expand_normal (arg1);
24698 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24699 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24700 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24702 if (VECTOR_MODE_P (mode0))
24703 op0 = safe_vector_operand (op0, mode0);
24704 if (VECTOR_MODE_P (mode1))
24705 op1 = safe_vector_operand (op1, mode1);
24707 if (! target
24708 || GET_MODE (target) != tmode
24709 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24710 target = gen_reg_rtx (tmode);
24712 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24713 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24716 op0 = copy_to_mode_reg (mode0, op0);
24717 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24718 op1 = copy_to_mode_reg (mode1, op1);
24720 pat = GEN_FCN (icode) (target, op0, op1);
24721 if (! pat)
24722 return 0;
24723 emit_insn (pat);
24724 return target;
24727 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24729 static rtx
24730 arm_expand_unop_builtin (enum insn_code icode,
24731 tree exp, rtx target, int do_load)
24733 rtx pat;
24734 tree arg0 = CALL_EXPR_ARG (exp, 0);
24735 rtx op0 = expand_normal (arg0);
24736 rtx op1 = NULL_RTX;
24737 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24738 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24739 bool builtin_sha1h_p = false;
24741 if (insn_data[icode].n_operands == 3)
24743 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24744 builtin_sha1h_p = true;
24747 if (! target
24748 || GET_MODE (target) != tmode
24749 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24750 target = gen_reg_rtx (tmode);
24751 if (do_load)
24752 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24753 else
24755 if (VECTOR_MODE_P (mode0))
24756 op0 = safe_vector_operand (op0, mode0);
24758 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24759 op0 = copy_to_mode_reg (mode0, op0);
24761 if (builtin_sha1h_p)
24762 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24764 if (builtin_sha1h_p)
24765 pat = GEN_FCN (icode) (target, op0, op1);
24766 else
24767 pat = GEN_FCN (icode) (target, op0);
24768 if (! pat)
24769 return 0;
24770 emit_insn (pat);
24771 return target;
24774 typedef enum {
24775 NEON_ARG_COPY_TO_REG,
24776 NEON_ARG_CONSTANT,
24777 NEON_ARG_MEMORY,
24778 NEON_ARG_STOP
24779 } builtin_arg;
24781 #define NEON_MAX_BUILTIN_ARGS 5
24783 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24784 and return an expression for the accessed memory.
24786 The intrinsic function operates on a block of registers that has
24787 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24788 function references the memory at EXP of type TYPE and in mode
24789 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24790 available. */
24792 static tree
24793 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24794 enum machine_mode reg_mode,
24795 neon_builtin_type_mode type_mode)
24797 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24798 tree elem_type, upper_bound, array_type;
24800 /* Work out the size of the register block in bytes. */
24801 reg_size = GET_MODE_SIZE (reg_mode);
24803 /* Work out the size of each vector in bytes. */
24804 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24805 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24807 /* Work out how many vectors there are. */
24808 gcc_assert (reg_size % vector_size == 0);
24809 nvectors = reg_size / vector_size;
24811 /* Work out the type of each element. */
24812 gcc_assert (POINTER_TYPE_P (type));
24813 elem_type = TREE_TYPE (type);
24815 /* Work out how many elements are being loaded or stored.
24816 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24817 and memory elements; anything else implies a lane load or store. */
24818 if (mem_mode == reg_mode)
24819 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24820 else
24821 nelems = nvectors;
24823 /* Create a type that describes the full access. */
24824 upper_bound = build_int_cst (size_type_node, nelems - 1);
24825 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24827 /* Dereference EXP using that type. */
24828 return fold_build2 (MEM_REF, array_type, exp,
24829 build_int_cst (build_pointer_type (array_type), 0));
24832 /* Expand a Neon builtin. */
24833 static rtx
24834 arm_expand_neon_args (rtx target, int icode, int have_retval,
24835 neon_builtin_type_mode type_mode,
24836 tree exp, int fcode, ...)
24838 va_list ap;
24839 rtx pat;
24840 tree arg[NEON_MAX_BUILTIN_ARGS];
24841 rtx op[NEON_MAX_BUILTIN_ARGS];
24842 tree arg_type;
24843 tree formals;
24844 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24845 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24846 enum machine_mode other_mode;
24847 int argc = 0;
24848 int opno;
24850 if (have_retval
24851 && (!target
24852 || GET_MODE (target) != tmode
24853 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24854 target = gen_reg_rtx (tmode);
24856 va_start (ap, fcode);
24858 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24860 for (;;)
24862 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24864 if (thisarg == NEON_ARG_STOP)
24865 break;
24866 else
24868 opno = argc + have_retval;
24869 mode[argc] = insn_data[icode].operand[opno].mode;
24870 arg[argc] = CALL_EXPR_ARG (exp, argc);
24871 arg_type = TREE_VALUE (formals);
24872 if (thisarg == NEON_ARG_MEMORY)
24874 other_mode = insn_data[icode].operand[1 - opno].mode;
24875 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24876 mode[argc], other_mode,
24877 type_mode);
24880 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
24881 be returned. */
24882 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
24883 (thisarg == NEON_ARG_MEMORY
24884 ? EXPAND_MEMORY : EXPAND_NORMAL));
24886 switch (thisarg)
24888 case NEON_ARG_COPY_TO_REG:
24889 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24890 if (!(*insn_data[icode].operand[opno].predicate)
24891 (op[argc], mode[argc]))
24892 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24893 break;
24895 case NEON_ARG_CONSTANT:
24896 /* FIXME: This error message is somewhat unhelpful. */
24897 if (!(*insn_data[icode].operand[opno].predicate)
24898 (op[argc], mode[argc]))
24899 error ("argument must be a constant");
24900 break;
24902 case NEON_ARG_MEMORY:
24903 /* Check if expand failed. */
24904 if (op[argc] == const0_rtx)
24905 return 0;
24906 gcc_assert (MEM_P (op[argc]));
24907 PUT_MODE (op[argc], mode[argc]);
24908 /* ??? arm_neon.h uses the same built-in functions for signed
24909 and unsigned accesses, casting where necessary. This isn't
24910 alias safe. */
24911 set_mem_alias_set (op[argc], 0);
24912 if (!(*insn_data[icode].operand[opno].predicate)
24913 (op[argc], mode[argc]))
24914 op[argc] = (replace_equiv_address
24915 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24916 break;
24918 case NEON_ARG_STOP:
24919 gcc_unreachable ();
24922 argc++;
24923 formals = TREE_CHAIN (formals);
24927 va_end (ap);
24929 if (have_retval)
24930 switch (argc)
24932 case 1:
24933 pat = GEN_FCN (icode) (target, op[0]);
24934 break;
24936 case 2:
24937 pat = GEN_FCN (icode) (target, op[0], op[1]);
24938 break;
24940 case 3:
24941 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24942 break;
24944 case 4:
24945 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24946 break;
24948 case 5:
24949 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24950 break;
24952 default:
24953 gcc_unreachable ();
24955 else
24956 switch (argc)
24958 case 1:
24959 pat = GEN_FCN (icode) (op[0]);
24960 break;
24962 case 2:
24963 pat = GEN_FCN (icode) (op[0], op[1]);
24964 break;
24966 case 3:
24967 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24968 break;
24970 case 4:
24971 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24972 break;
24974 case 5:
24975 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24976 break;
24978 default:
24979 gcc_unreachable ();
24982 if (!pat)
24983 return 0;
24985 emit_insn (pat);
24987 return target;
24990 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24991 constants defined per-instruction or per instruction-variant. Instead, the
24992 required info is looked up in the table neon_builtin_data. */
24993 static rtx
24994 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24996 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24997 neon_itype itype = d->itype;
24998 enum insn_code icode = d->code;
24999 neon_builtin_type_mode type_mode = d->mode;
25001 switch (itype)
25003 case NEON_UNOP:
25004 case NEON_CONVERT:
25005 case NEON_DUPLANE:
25006 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25007 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25009 case NEON_BINOP:
25010 case NEON_SETLANE:
25011 case NEON_SCALARMUL:
25012 case NEON_SCALARMULL:
25013 case NEON_SCALARMULH:
25014 case NEON_SHIFTINSERT:
25015 case NEON_LOGICBINOP:
25016 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25017 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25018 NEON_ARG_STOP);
25020 case NEON_TERNOP:
25021 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25022 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25023 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25025 case NEON_GETLANE:
25026 case NEON_FIXCONV:
25027 case NEON_SHIFTIMM:
25028 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25029 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25030 NEON_ARG_STOP);
25032 case NEON_CREATE:
25033 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25034 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25036 case NEON_DUP:
25037 case NEON_RINT:
25038 case NEON_SPLIT:
25039 case NEON_FLOAT_WIDEN:
25040 case NEON_FLOAT_NARROW:
25041 case NEON_REINTERP:
25042 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25043 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25045 case NEON_COMBINE:
25046 case NEON_VTBL:
25047 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25048 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25050 case NEON_RESULTPAIR:
25051 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25052 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25053 NEON_ARG_STOP);
25055 case NEON_LANEMUL:
25056 case NEON_LANEMULL:
25057 case NEON_LANEMULH:
25058 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25059 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25060 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25062 case NEON_LANEMAC:
25063 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25064 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25065 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25067 case NEON_SHIFTACC:
25068 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25069 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25070 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25072 case NEON_SCALARMAC:
25073 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25074 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25075 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25077 case NEON_SELECT:
25078 case NEON_VTBX:
25079 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25080 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25081 NEON_ARG_STOP);
25083 case NEON_LOAD1:
25084 case NEON_LOADSTRUCT:
25085 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25086 NEON_ARG_MEMORY, NEON_ARG_STOP);
25088 case NEON_LOAD1LANE:
25089 case NEON_LOADSTRUCTLANE:
25090 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25091 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25092 NEON_ARG_STOP);
25094 case NEON_STORE1:
25095 case NEON_STORESTRUCT:
25096 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25097 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25099 case NEON_STORE1LANE:
25100 case NEON_STORESTRUCTLANE:
25101 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25102 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25103 NEON_ARG_STOP);
25106 gcc_unreachable ();
25109 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25110 void
25111 neon_reinterpret (rtx dest, rtx src)
25113 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25116 /* Emit code to place a Neon pair result in memory locations (with equal
25117 registers). */
25118 void
25119 neon_emit_pair_result_insn (enum machine_mode mode,
25120 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25121 rtx op1, rtx op2)
25123 rtx mem = gen_rtx_MEM (mode, destaddr);
25124 rtx tmp1 = gen_reg_rtx (mode);
25125 rtx tmp2 = gen_reg_rtx (mode);
25127 emit_insn (intfn (tmp1, op1, op2, tmp2));
25129 emit_move_insn (mem, tmp1);
25130 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25131 emit_move_insn (mem, tmp2);
25134 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25135 not to early-clobber SRC registers in the process.
25137 We assume that the operands described by SRC and DEST represent a
25138 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25139 number of components into which the copy has been decomposed. */
25140 void
25141 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25143 unsigned int i;
25145 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25146 || REGNO (operands[0]) < REGNO (operands[1]))
25148 for (i = 0; i < count; i++)
25150 operands[2 * i] = dest[i];
25151 operands[2 * i + 1] = src[i];
25154 else
25156 for (i = 0; i < count; i++)
25158 operands[2 * i] = dest[count - i - 1];
25159 operands[2 * i + 1] = src[count - i - 1];
25164 /* Split operands into moves from op[1] + op[2] into op[0]. */
25166 void
25167 neon_split_vcombine (rtx operands[3])
25169 unsigned int dest = REGNO (operands[0]);
25170 unsigned int src1 = REGNO (operands[1]);
25171 unsigned int src2 = REGNO (operands[2]);
25172 enum machine_mode halfmode = GET_MODE (operands[1]);
25173 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25174 rtx destlo, desthi;
25176 if (src1 == dest && src2 == dest + halfregs)
25178 /* No-op move. Can't split to nothing; emit something. */
25179 emit_note (NOTE_INSN_DELETED);
25180 return;
25183 /* Preserve register attributes for variable tracking. */
25184 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25185 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25186 GET_MODE_SIZE (halfmode));
25188 /* Special case of reversed high/low parts. Use VSWP. */
25189 if (src2 == dest && src1 == dest + halfregs)
25191 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25192 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25193 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25194 return;
25197 if (!reg_overlap_mentioned_p (operands[2], destlo))
25199 /* Try to avoid unnecessary moves if part of the result
25200 is in the right place already. */
25201 if (src1 != dest)
25202 emit_move_insn (destlo, operands[1]);
25203 if (src2 != dest + halfregs)
25204 emit_move_insn (desthi, operands[2]);
25206 else
25208 if (src2 != dest + halfregs)
25209 emit_move_insn (desthi, operands[2]);
25210 if (src1 != dest)
25211 emit_move_insn (destlo, operands[1]);
25215 /* Expand an expression EXP that calls a built-in function,
25216 with result going to TARGET if that's convenient
25217 (and in mode MODE if that's convenient).
25218 SUBTARGET may be used as the target for computing one of EXP's operands.
25219 IGNORE is nonzero if the value is to be ignored. */
25221 static rtx
25222 arm_expand_builtin (tree exp,
25223 rtx target,
25224 rtx subtarget ATTRIBUTE_UNUSED,
25225 enum machine_mode mode ATTRIBUTE_UNUSED,
25226 int ignore ATTRIBUTE_UNUSED)
25228 const struct builtin_description * d;
25229 enum insn_code icode;
25230 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25231 tree arg0;
25232 tree arg1;
25233 tree arg2;
25234 rtx op0;
25235 rtx op1;
25236 rtx op2;
25237 rtx pat;
25238 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25239 size_t i;
25240 enum machine_mode tmode;
25241 enum machine_mode mode0;
25242 enum machine_mode mode1;
25243 enum machine_mode mode2;
25244 int opint;
25245 int selector;
25246 int mask;
25247 int imm;
25249 if (fcode >= ARM_BUILTIN_NEON_BASE)
25250 return arm_expand_neon_builtin (fcode, exp, target);
25252 switch (fcode)
25254 case ARM_BUILTIN_TEXTRMSB:
25255 case ARM_BUILTIN_TEXTRMUB:
25256 case ARM_BUILTIN_TEXTRMSH:
25257 case ARM_BUILTIN_TEXTRMUH:
25258 case ARM_BUILTIN_TEXTRMSW:
25259 case ARM_BUILTIN_TEXTRMUW:
25260 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25261 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25262 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25263 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25264 : CODE_FOR_iwmmxt_textrmw);
25266 arg0 = CALL_EXPR_ARG (exp, 0);
25267 arg1 = CALL_EXPR_ARG (exp, 1);
25268 op0 = expand_normal (arg0);
25269 op1 = expand_normal (arg1);
25270 tmode = insn_data[icode].operand[0].mode;
25271 mode0 = insn_data[icode].operand[1].mode;
25272 mode1 = insn_data[icode].operand[2].mode;
25274 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25275 op0 = copy_to_mode_reg (mode0, op0);
25276 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25278 /* @@@ better error message */
25279 error ("selector must be an immediate");
25280 return gen_reg_rtx (tmode);
25283 opint = INTVAL (op1);
25284 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25286 if (opint > 7 || opint < 0)
25287 error ("the range of selector should be in 0 to 7");
25289 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25291 if (opint > 3 || opint < 0)
25292 error ("the range of selector should be in 0 to 3");
25294 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25296 if (opint > 1 || opint < 0)
25297 error ("the range of selector should be in 0 to 1");
25300 if (target == 0
25301 || GET_MODE (target) != tmode
25302 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25303 target = gen_reg_rtx (tmode);
25304 pat = GEN_FCN (icode) (target, op0, op1);
25305 if (! pat)
25306 return 0;
25307 emit_insn (pat);
25308 return target;
25310 case ARM_BUILTIN_WALIGNI:
25311 /* If op2 is immediate, call walighi, else call walighr. */
25312 arg0 = CALL_EXPR_ARG (exp, 0);
25313 arg1 = CALL_EXPR_ARG (exp, 1);
25314 arg2 = CALL_EXPR_ARG (exp, 2);
25315 op0 = expand_normal (arg0);
25316 op1 = expand_normal (arg1);
25317 op2 = expand_normal (arg2);
25318 if (CONST_INT_P (op2))
25320 icode = CODE_FOR_iwmmxt_waligni;
25321 tmode = insn_data[icode].operand[0].mode;
25322 mode0 = insn_data[icode].operand[1].mode;
25323 mode1 = insn_data[icode].operand[2].mode;
25324 mode2 = insn_data[icode].operand[3].mode;
25325 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25326 op0 = copy_to_mode_reg (mode0, op0);
25327 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25328 op1 = copy_to_mode_reg (mode1, op1);
25329 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25330 selector = INTVAL (op2);
25331 if (selector > 7 || selector < 0)
25332 error ("the range of selector should be in 0 to 7");
25334 else
25336 icode = CODE_FOR_iwmmxt_walignr;
25337 tmode = insn_data[icode].operand[0].mode;
25338 mode0 = insn_data[icode].operand[1].mode;
25339 mode1 = insn_data[icode].operand[2].mode;
25340 mode2 = insn_data[icode].operand[3].mode;
25341 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25342 op0 = copy_to_mode_reg (mode0, op0);
25343 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25344 op1 = copy_to_mode_reg (mode1, op1);
25345 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25346 op2 = copy_to_mode_reg (mode2, op2);
25348 if (target == 0
25349 || GET_MODE (target) != tmode
25350 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25351 target = gen_reg_rtx (tmode);
25352 pat = GEN_FCN (icode) (target, op0, op1, op2);
25353 if (!pat)
25354 return 0;
25355 emit_insn (pat);
25356 return target;
25358 case ARM_BUILTIN_TINSRB:
25359 case ARM_BUILTIN_TINSRH:
25360 case ARM_BUILTIN_TINSRW:
25361 case ARM_BUILTIN_WMERGE:
25362 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25363 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25364 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25365 : CODE_FOR_iwmmxt_tinsrw);
25366 arg0 = CALL_EXPR_ARG (exp, 0);
25367 arg1 = CALL_EXPR_ARG (exp, 1);
25368 arg2 = CALL_EXPR_ARG (exp, 2);
25369 op0 = expand_normal (arg0);
25370 op1 = expand_normal (arg1);
25371 op2 = expand_normal (arg2);
25372 tmode = insn_data[icode].operand[0].mode;
25373 mode0 = insn_data[icode].operand[1].mode;
25374 mode1 = insn_data[icode].operand[2].mode;
25375 mode2 = insn_data[icode].operand[3].mode;
25377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25378 op0 = copy_to_mode_reg (mode0, op0);
25379 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25380 op1 = copy_to_mode_reg (mode1, op1);
25381 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25383 error ("selector must be an immediate");
25384 return const0_rtx;
25386 if (icode == CODE_FOR_iwmmxt_wmerge)
25388 selector = INTVAL (op2);
25389 if (selector > 7 || selector < 0)
25390 error ("the range of selector should be in 0 to 7");
25392 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25393 || (icode == CODE_FOR_iwmmxt_tinsrh)
25394 || (icode == CODE_FOR_iwmmxt_tinsrw))
25396 mask = 0x01;
25397 selector= INTVAL (op2);
25398 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25399 error ("the range of selector should be in 0 to 7");
25400 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25401 error ("the range of selector should be in 0 to 3");
25402 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25403 error ("the range of selector should be in 0 to 1");
25404 mask <<= selector;
25405 op2 = GEN_INT (mask);
25407 if (target == 0
25408 || GET_MODE (target) != tmode
25409 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25410 target = gen_reg_rtx (tmode);
25411 pat = GEN_FCN (icode) (target, op0, op1, op2);
25412 if (! pat)
25413 return 0;
25414 emit_insn (pat);
25415 return target;
25417 case ARM_BUILTIN_SETWCGR0:
25418 case ARM_BUILTIN_SETWCGR1:
25419 case ARM_BUILTIN_SETWCGR2:
25420 case ARM_BUILTIN_SETWCGR3:
25421 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25422 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25423 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25424 : CODE_FOR_iwmmxt_setwcgr3);
25425 arg0 = CALL_EXPR_ARG (exp, 0);
25426 op0 = expand_normal (arg0);
25427 mode0 = insn_data[icode].operand[0].mode;
25428 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25429 op0 = copy_to_mode_reg (mode0, op0);
25430 pat = GEN_FCN (icode) (op0);
25431 if (!pat)
25432 return 0;
25433 emit_insn (pat);
25434 return 0;
25436 case ARM_BUILTIN_GETWCGR0:
25437 case ARM_BUILTIN_GETWCGR1:
25438 case ARM_BUILTIN_GETWCGR2:
25439 case ARM_BUILTIN_GETWCGR3:
25440 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25441 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25442 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25443 : CODE_FOR_iwmmxt_getwcgr3);
25444 tmode = insn_data[icode].operand[0].mode;
25445 if (target == 0
25446 || GET_MODE (target) != tmode
25447 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25448 target = gen_reg_rtx (tmode);
25449 pat = GEN_FCN (icode) (target);
25450 if (!pat)
25451 return 0;
25452 emit_insn (pat);
25453 return target;
25455 case ARM_BUILTIN_WSHUFH:
25456 icode = CODE_FOR_iwmmxt_wshufh;
25457 arg0 = CALL_EXPR_ARG (exp, 0);
25458 arg1 = CALL_EXPR_ARG (exp, 1);
25459 op0 = expand_normal (arg0);
25460 op1 = expand_normal (arg1);
25461 tmode = insn_data[icode].operand[0].mode;
25462 mode1 = insn_data[icode].operand[1].mode;
25463 mode2 = insn_data[icode].operand[2].mode;
25465 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25466 op0 = copy_to_mode_reg (mode1, op0);
25467 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25469 error ("mask must be an immediate");
25470 return const0_rtx;
25472 selector = INTVAL (op1);
25473 if (selector < 0 || selector > 255)
25474 error ("the range of mask should be in 0 to 255");
25475 if (target == 0
25476 || GET_MODE (target) != tmode
25477 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25478 target = gen_reg_rtx (tmode);
25479 pat = GEN_FCN (icode) (target, op0, op1);
25480 if (! pat)
25481 return 0;
25482 emit_insn (pat);
25483 return target;
25485 case ARM_BUILTIN_WMADDS:
25486 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25487 case ARM_BUILTIN_WMADDSX:
25488 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25489 case ARM_BUILTIN_WMADDSN:
25490 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25491 case ARM_BUILTIN_WMADDU:
25492 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25493 case ARM_BUILTIN_WMADDUX:
25494 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25495 case ARM_BUILTIN_WMADDUN:
25496 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25497 case ARM_BUILTIN_WSADBZ:
25498 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25499 case ARM_BUILTIN_WSADHZ:
25500 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25502 /* Several three-argument builtins. */
25503 case ARM_BUILTIN_WMACS:
25504 case ARM_BUILTIN_WMACU:
25505 case ARM_BUILTIN_TMIA:
25506 case ARM_BUILTIN_TMIAPH:
25507 case ARM_BUILTIN_TMIATT:
25508 case ARM_BUILTIN_TMIATB:
25509 case ARM_BUILTIN_TMIABT:
25510 case ARM_BUILTIN_TMIABB:
25511 case ARM_BUILTIN_WQMIABB:
25512 case ARM_BUILTIN_WQMIABT:
25513 case ARM_BUILTIN_WQMIATB:
25514 case ARM_BUILTIN_WQMIATT:
25515 case ARM_BUILTIN_WQMIABBN:
25516 case ARM_BUILTIN_WQMIABTN:
25517 case ARM_BUILTIN_WQMIATBN:
25518 case ARM_BUILTIN_WQMIATTN:
25519 case ARM_BUILTIN_WMIABB:
25520 case ARM_BUILTIN_WMIABT:
25521 case ARM_BUILTIN_WMIATB:
25522 case ARM_BUILTIN_WMIATT:
25523 case ARM_BUILTIN_WMIABBN:
25524 case ARM_BUILTIN_WMIABTN:
25525 case ARM_BUILTIN_WMIATBN:
25526 case ARM_BUILTIN_WMIATTN:
25527 case ARM_BUILTIN_WMIAWBB:
25528 case ARM_BUILTIN_WMIAWBT:
25529 case ARM_BUILTIN_WMIAWTB:
25530 case ARM_BUILTIN_WMIAWTT:
25531 case ARM_BUILTIN_WMIAWBBN:
25532 case ARM_BUILTIN_WMIAWBTN:
25533 case ARM_BUILTIN_WMIAWTBN:
25534 case ARM_BUILTIN_WMIAWTTN:
25535 case ARM_BUILTIN_WSADB:
25536 case ARM_BUILTIN_WSADH:
25537 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25538 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25539 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25540 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25541 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25542 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25543 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25544 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25545 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25546 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25547 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25548 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25549 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25550 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25551 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25552 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25553 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25554 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25555 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25556 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25557 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25558 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25559 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25560 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25561 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25562 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25563 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25564 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25565 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25566 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25567 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25568 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25569 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25570 : CODE_FOR_iwmmxt_wsadh);
25571 arg0 = CALL_EXPR_ARG (exp, 0);
25572 arg1 = CALL_EXPR_ARG (exp, 1);
25573 arg2 = CALL_EXPR_ARG (exp, 2);
25574 op0 = expand_normal (arg0);
25575 op1 = expand_normal (arg1);
25576 op2 = expand_normal (arg2);
25577 tmode = insn_data[icode].operand[0].mode;
25578 mode0 = insn_data[icode].operand[1].mode;
25579 mode1 = insn_data[icode].operand[2].mode;
25580 mode2 = insn_data[icode].operand[3].mode;
25582 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25583 op0 = copy_to_mode_reg (mode0, op0);
25584 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25585 op1 = copy_to_mode_reg (mode1, op1);
25586 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25587 op2 = copy_to_mode_reg (mode2, op2);
25588 if (target == 0
25589 || GET_MODE (target) != tmode
25590 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25591 target = gen_reg_rtx (tmode);
25592 pat = GEN_FCN (icode) (target, op0, op1, op2);
25593 if (! pat)
25594 return 0;
25595 emit_insn (pat);
25596 return target;
25598 case ARM_BUILTIN_WZERO:
25599 target = gen_reg_rtx (DImode);
25600 emit_insn (gen_iwmmxt_clrdi (target));
25601 return target;
25603 case ARM_BUILTIN_WSRLHI:
25604 case ARM_BUILTIN_WSRLWI:
25605 case ARM_BUILTIN_WSRLDI:
25606 case ARM_BUILTIN_WSLLHI:
25607 case ARM_BUILTIN_WSLLWI:
25608 case ARM_BUILTIN_WSLLDI:
25609 case ARM_BUILTIN_WSRAHI:
25610 case ARM_BUILTIN_WSRAWI:
25611 case ARM_BUILTIN_WSRADI:
25612 case ARM_BUILTIN_WRORHI:
25613 case ARM_BUILTIN_WRORWI:
25614 case ARM_BUILTIN_WRORDI:
25615 case ARM_BUILTIN_WSRLH:
25616 case ARM_BUILTIN_WSRLW:
25617 case ARM_BUILTIN_WSRLD:
25618 case ARM_BUILTIN_WSLLH:
25619 case ARM_BUILTIN_WSLLW:
25620 case ARM_BUILTIN_WSLLD:
25621 case ARM_BUILTIN_WSRAH:
25622 case ARM_BUILTIN_WSRAW:
25623 case ARM_BUILTIN_WSRAD:
25624 case ARM_BUILTIN_WRORH:
25625 case ARM_BUILTIN_WRORW:
25626 case ARM_BUILTIN_WRORD:
25627 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25628 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25629 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25630 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25631 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25632 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25633 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25634 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25635 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25636 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25637 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25638 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25639 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25640 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25641 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25642 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25643 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25644 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25645 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25646 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25647 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25648 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25649 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25650 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25651 : CODE_FOR_nothing);
25652 arg1 = CALL_EXPR_ARG (exp, 1);
25653 op1 = expand_normal (arg1);
25654 if (GET_MODE (op1) == VOIDmode)
25656 imm = INTVAL (op1);
25657 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25658 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25659 && (imm < 0 || imm > 32))
25661 if (fcode == ARM_BUILTIN_WRORHI)
25662 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25663 else if (fcode == ARM_BUILTIN_WRORWI)
25664 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25665 else if (fcode == ARM_BUILTIN_WRORH)
25666 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25667 else
25668 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25670 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25671 && (imm < 0 || imm > 64))
25673 if (fcode == ARM_BUILTIN_WRORDI)
25674 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25675 else
25676 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25678 else if (imm < 0)
25680 if (fcode == ARM_BUILTIN_WSRLHI)
25681 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25682 else if (fcode == ARM_BUILTIN_WSRLWI)
25683 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25684 else if (fcode == ARM_BUILTIN_WSRLDI)
25685 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25686 else if (fcode == ARM_BUILTIN_WSLLHI)
25687 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25688 else if (fcode == ARM_BUILTIN_WSLLWI)
25689 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25690 else if (fcode == ARM_BUILTIN_WSLLDI)
25691 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25692 else if (fcode == ARM_BUILTIN_WSRAHI)
25693 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25694 else if (fcode == ARM_BUILTIN_WSRAWI)
25695 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25696 else if (fcode == ARM_BUILTIN_WSRADI)
25697 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25698 else if (fcode == ARM_BUILTIN_WSRLH)
25699 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25700 else if (fcode == ARM_BUILTIN_WSRLW)
25701 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25702 else if (fcode == ARM_BUILTIN_WSRLD)
25703 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25704 else if (fcode == ARM_BUILTIN_WSLLH)
25705 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25706 else if (fcode == ARM_BUILTIN_WSLLW)
25707 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25708 else if (fcode == ARM_BUILTIN_WSLLD)
25709 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25710 else if (fcode == ARM_BUILTIN_WSRAH)
25711 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25712 else if (fcode == ARM_BUILTIN_WSRAW)
25713 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25714 else
25715 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25718 return arm_expand_binop_builtin (icode, exp, target);
25720 default:
25721 break;
25724 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25725 if (d->code == (const enum arm_builtins) fcode)
25726 return arm_expand_binop_builtin (d->icode, exp, target);
25728 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25729 if (d->code == (const enum arm_builtins) fcode)
25730 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25732 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25733 if (d->code == (const enum arm_builtins) fcode)
25734 return arm_expand_ternop_builtin (d->icode, exp, target);
25736 /* @@@ Should really do something sensible here. */
25737 return NULL_RTX;
25740 /* Return the number (counting from 0) of
25741 the least significant set bit in MASK. */
25743 inline static int
25744 number_of_first_bit_set (unsigned mask)
25746 return ctz_hwi (mask);
25749 /* Like emit_multi_reg_push, but allowing for a different set of
25750 registers to be described as saved. MASK is the set of registers
25751 to be saved; REAL_REGS is the set of registers to be described as
25752 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25754 static rtx
25755 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25757 unsigned long regno;
25758 rtx par[10], tmp, reg, insn;
25759 int i, j;
25761 /* Build the parallel of the registers actually being stored. */
25762 for (i = 0; mask; ++i, mask &= mask - 1)
25764 regno = ctz_hwi (mask);
25765 reg = gen_rtx_REG (SImode, regno);
25767 if (i == 0)
25768 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25769 else
25770 tmp = gen_rtx_USE (VOIDmode, reg);
25772 par[i] = tmp;
25775 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25776 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25777 tmp = gen_frame_mem (BLKmode, tmp);
25778 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25779 par[0] = tmp;
25781 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25782 insn = emit_insn (tmp);
25784 /* Always build the stack adjustment note for unwind info. */
25785 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25786 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25787 par[0] = tmp;
25789 /* Build the parallel of the registers recorded as saved for unwind. */
25790 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25792 regno = ctz_hwi (real_regs);
25793 reg = gen_rtx_REG (SImode, regno);
25795 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25796 tmp = gen_frame_mem (SImode, tmp);
25797 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25798 RTX_FRAME_RELATED_P (tmp) = 1;
25799 par[j + 1] = tmp;
25802 if (j == 0)
25803 tmp = par[0];
25804 else
25806 RTX_FRAME_RELATED_P (par[0]) = 1;
25807 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25810 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25812 return insn;
25815 /* Emit code to push or pop registers to or from the stack. F is the
25816 assembly file. MASK is the registers to pop. */
25817 static void
25818 thumb_pop (FILE *f, unsigned long mask)
25820 int regno;
25821 int lo_mask = mask & 0xFF;
25822 int pushed_words = 0;
25824 gcc_assert (mask);
25826 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25828 /* Special case. Do not generate a POP PC statement here, do it in
25829 thumb_exit() */
25830 thumb_exit (f, -1);
25831 return;
25834 fprintf (f, "\tpop\t{");
25836 /* Look at the low registers first. */
25837 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25839 if (lo_mask & 1)
25841 asm_fprintf (f, "%r", regno);
25843 if ((lo_mask & ~1) != 0)
25844 fprintf (f, ", ");
25846 pushed_words++;
25850 if (mask & (1 << PC_REGNUM))
25852 /* Catch popping the PC. */
25853 if (TARGET_INTERWORK || TARGET_BACKTRACE
25854 || crtl->calls_eh_return)
25856 /* The PC is never poped directly, instead
25857 it is popped into r3 and then BX is used. */
25858 fprintf (f, "}\n");
25860 thumb_exit (f, -1);
25862 return;
25864 else
25866 if (mask & 0xFF)
25867 fprintf (f, ", ");
25869 asm_fprintf (f, "%r", PC_REGNUM);
25873 fprintf (f, "}\n");
25876 /* Generate code to return from a thumb function.
25877 If 'reg_containing_return_addr' is -1, then the return address is
25878 actually on the stack, at the stack pointer. */
25879 static void
25880 thumb_exit (FILE *f, int reg_containing_return_addr)
25882 unsigned regs_available_for_popping;
25883 unsigned regs_to_pop;
25884 int pops_needed;
25885 unsigned available;
25886 unsigned required;
25887 int mode;
25888 int size;
25889 int restore_a4 = FALSE;
25891 /* Compute the registers we need to pop. */
25892 regs_to_pop = 0;
25893 pops_needed = 0;
25895 if (reg_containing_return_addr == -1)
25897 regs_to_pop |= 1 << LR_REGNUM;
25898 ++pops_needed;
25901 if (TARGET_BACKTRACE)
25903 /* Restore the (ARM) frame pointer and stack pointer. */
25904 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25905 pops_needed += 2;
25908 /* If there is nothing to pop then just emit the BX instruction and
25909 return. */
25910 if (pops_needed == 0)
25912 if (crtl->calls_eh_return)
25913 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25915 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25916 return;
25918 /* Otherwise if we are not supporting interworking and we have not created
25919 a backtrace structure and the function was not entered in ARM mode then
25920 just pop the return address straight into the PC. */
25921 else if (!TARGET_INTERWORK
25922 && !TARGET_BACKTRACE
25923 && !is_called_in_ARM_mode (current_function_decl)
25924 && !crtl->calls_eh_return)
25926 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25927 return;
25930 /* Find out how many of the (return) argument registers we can corrupt. */
25931 regs_available_for_popping = 0;
25933 /* If returning via __builtin_eh_return, the bottom three registers
25934 all contain information needed for the return. */
25935 if (crtl->calls_eh_return)
25936 size = 12;
25937 else
25939 /* If we can deduce the registers used from the function's
25940 return value. This is more reliable that examining
25941 df_regs_ever_live_p () because that will be set if the register is
25942 ever used in the function, not just if the register is used
25943 to hold a return value. */
25945 if (crtl->return_rtx != 0)
25946 mode = GET_MODE (crtl->return_rtx);
25947 else
25948 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25950 size = GET_MODE_SIZE (mode);
25952 if (size == 0)
25954 /* In a void function we can use any argument register.
25955 In a function that returns a structure on the stack
25956 we can use the second and third argument registers. */
25957 if (mode == VOIDmode)
25958 regs_available_for_popping =
25959 (1 << ARG_REGISTER (1))
25960 | (1 << ARG_REGISTER (2))
25961 | (1 << ARG_REGISTER (3));
25962 else
25963 regs_available_for_popping =
25964 (1 << ARG_REGISTER (2))
25965 | (1 << ARG_REGISTER (3));
25967 else if (size <= 4)
25968 regs_available_for_popping =
25969 (1 << ARG_REGISTER (2))
25970 | (1 << ARG_REGISTER (3));
25971 else if (size <= 8)
25972 regs_available_for_popping =
25973 (1 << ARG_REGISTER (3));
25976 /* Match registers to be popped with registers into which we pop them. */
25977 for (available = regs_available_for_popping,
25978 required = regs_to_pop;
25979 required != 0 && available != 0;
25980 available &= ~(available & - available),
25981 required &= ~(required & - required))
25982 -- pops_needed;
25984 /* If we have any popping registers left over, remove them. */
25985 if (available > 0)
25986 regs_available_for_popping &= ~available;
25988 /* Otherwise if we need another popping register we can use
25989 the fourth argument register. */
25990 else if (pops_needed)
25992 /* If we have not found any free argument registers and
25993 reg a4 contains the return address, we must move it. */
25994 if (regs_available_for_popping == 0
25995 && reg_containing_return_addr == LAST_ARG_REGNUM)
25997 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25998 reg_containing_return_addr = LR_REGNUM;
26000 else if (size > 12)
26002 /* Register a4 is being used to hold part of the return value,
26003 but we have dire need of a free, low register. */
26004 restore_a4 = TRUE;
26006 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26009 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26011 /* The fourth argument register is available. */
26012 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26014 --pops_needed;
26018 /* Pop as many registers as we can. */
26019 thumb_pop (f, regs_available_for_popping);
26021 /* Process the registers we popped. */
26022 if (reg_containing_return_addr == -1)
26024 /* The return address was popped into the lowest numbered register. */
26025 regs_to_pop &= ~(1 << LR_REGNUM);
26027 reg_containing_return_addr =
26028 number_of_first_bit_set (regs_available_for_popping);
26030 /* Remove this register for the mask of available registers, so that
26031 the return address will not be corrupted by further pops. */
26032 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26035 /* If we popped other registers then handle them here. */
26036 if (regs_available_for_popping)
26038 int frame_pointer;
26040 /* Work out which register currently contains the frame pointer. */
26041 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26043 /* Move it into the correct place. */
26044 asm_fprintf (f, "\tmov\t%r, %r\n",
26045 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26047 /* (Temporarily) remove it from the mask of popped registers. */
26048 regs_available_for_popping &= ~(1 << frame_pointer);
26049 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26051 if (regs_available_for_popping)
26053 int stack_pointer;
26055 /* We popped the stack pointer as well,
26056 find the register that contains it. */
26057 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26059 /* Move it into the stack register. */
26060 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26062 /* At this point we have popped all necessary registers, so
26063 do not worry about restoring regs_available_for_popping
26064 to its correct value:
26066 assert (pops_needed == 0)
26067 assert (regs_available_for_popping == (1 << frame_pointer))
26068 assert (regs_to_pop == (1 << STACK_POINTER)) */
26070 else
26072 /* Since we have just move the popped value into the frame
26073 pointer, the popping register is available for reuse, and
26074 we know that we still have the stack pointer left to pop. */
26075 regs_available_for_popping |= (1 << frame_pointer);
26079 /* If we still have registers left on the stack, but we no longer have
26080 any registers into which we can pop them, then we must move the return
26081 address into the link register and make available the register that
26082 contained it. */
26083 if (regs_available_for_popping == 0 && pops_needed > 0)
26085 regs_available_for_popping |= 1 << reg_containing_return_addr;
26087 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26088 reg_containing_return_addr);
26090 reg_containing_return_addr = LR_REGNUM;
26093 /* If we have registers left on the stack then pop some more.
26094 We know that at most we will want to pop FP and SP. */
26095 if (pops_needed > 0)
26097 int popped_into;
26098 int move_to;
26100 thumb_pop (f, regs_available_for_popping);
26102 /* We have popped either FP or SP.
26103 Move whichever one it is into the correct register. */
26104 popped_into = number_of_first_bit_set (regs_available_for_popping);
26105 move_to = number_of_first_bit_set (regs_to_pop);
26107 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26109 regs_to_pop &= ~(1 << move_to);
26111 --pops_needed;
26114 /* If we still have not popped everything then we must have only
26115 had one register available to us and we are now popping the SP. */
26116 if (pops_needed > 0)
26118 int popped_into;
26120 thumb_pop (f, regs_available_for_popping);
26122 popped_into = number_of_first_bit_set (regs_available_for_popping);
26124 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26126 assert (regs_to_pop == (1 << STACK_POINTER))
26127 assert (pops_needed == 1)
26131 /* If necessary restore the a4 register. */
26132 if (restore_a4)
26134 if (reg_containing_return_addr != LR_REGNUM)
26136 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26137 reg_containing_return_addr = LR_REGNUM;
26140 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26143 if (crtl->calls_eh_return)
26144 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26146 /* Return to caller. */
26147 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26150 /* Scan INSN just before assembler is output for it.
26151 For Thumb-1, we track the status of the condition codes; this
26152 information is used in the cbranchsi4_insn pattern. */
26153 void
26154 thumb1_final_prescan_insn (rtx insn)
26156 if (flag_print_asm_name)
26157 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26158 INSN_ADDRESSES (INSN_UID (insn)));
26159 /* Don't overwrite the previous setter when we get to a cbranch. */
26160 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26162 enum attr_conds conds;
26164 if (cfun->machine->thumb1_cc_insn)
26166 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26167 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26168 CC_STATUS_INIT;
26170 conds = get_attr_conds (insn);
26171 if (conds == CONDS_SET)
26173 rtx set = single_set (insn);
26174 cfun->machine->thumb1_cc_insn = insn;
26175 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26176 cfun->machine->thumb1_cc_op1 = const0_rtx;
26177 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26178 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26180 rtx src1 = XEXP (SET_SRC (set), 1);
26181 if (src1 == const0_rtx)
26182 cfun->machine->thumb1_cc_mode = CCmode;
26184 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26186 /* Record the src register operand instead of dest because
26187 cprop_hardreg pass propagates src. */
26188 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26191 else if (conds != CONDS_NOCOND)
26192 cfun->machine->thumb1_cc_insn = NULL_RTX;
26195 /* Check if unexpected far jump is used. */
26196 if (cfun->machine->lr_save_eliminated
26197 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26198 internal_error("Unexpected thumb1 far jump");
26202 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26204 unsigned HOST_WIDE_INT mask = 0xff;
26205 int i;
26207 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26208 if (val == 0) /* XXX */
26209 return 0;
26211 for (i = 0; i < 25; i++)
26212 if ((val & (mask << i)) == val)
26213 return 1;
26215 return 0;
26218 /* Returns nonzero if the current function contains,
26219 or might contain a far jump. */
26220 static int
26221 thumb_far_jump_used_p (void)
26223 rtx insn;
26224 bool far_jump = false;
26225 unsigned int func_size = 0;
26227 /* This test is only important for leaf functions. */
26228 /* assert (!leaf_function_p ()); */
26230 /* If we have already decided that far jumps may be used,
26231 do not bother checking again, and always return true even if
26232 it turns out that they are not being used. Once we have made
26233 the decision that far jumps are present (and that hence the link
26234 register will be pushed onto the stack) we cannot go back on it. */
26235 if (cfun->machine->far_jump_used)
26236 return 1;
26238 /* If this function is not being called from the prologue/epilogue
26239 generation code then it must be being called from the
26240 INITIAL_ELIMINATION_OFFSET macro. */
26241 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26243 /* In this case we know that we are being asked about the elimination
26244 of the arg pointer register. If that register is not being used,
26245 then there are no arguments on the stack, and we do not have to
26246 worry that a far jump might force the prologue to push the link
26247 register, changing the stack offsets. In this case we can just
26248 return false, since the presence of far jumps in the function will
26249 not affect stack offsets.
26251 If the arg pointer is live (or if it was live, but has now been
26252 eliminated and so set to dead) then we do have to test to see if
26253 the function might contain a far jump. This test can lead to some
26254 false negatives, since before reload is completed, then length of
26255 branch instructions is not known, so gcc defaults to returning their
26256 longest length, which in turn sets the far jump attribute to true.
26258 A false negative will not result in bad code being generated, but it
26259 will result in a needless push and pop of the link register. We
26260 hope that this does not occur too often.
26262 If we need doubleword stack alignment this could affect the other
26263 elimination offsets so we can't risk getting it wrong. */
26264 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26265 cfun->machine->arg_pointer_live = 1;
26266 else if (!cfun->machine->arg_pointer_live)
26267 return 0;
26270 /* We should not change far_jump_used during or after reload, as there is
26271 no chance to change stack frame layout. */
26272 if (reload_in_progress || reload_completed)
26273 return 0;
26275 /* Check to see if the function contains a branch
26276 insn with the far jump attribute set. */
26277 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26279 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26281 far_jump = true;
26283 func_size += get_attr_length (insn);
26286 /* Attribute far_jump will always be true for thumb1 before
26287 shorten_branch pass. So checking far_jump attribute before
26288 shorten_branch isn't much useful.
26290 Following heuristic tries to estimate more accurately if a far jump
26291 may finally be used. The heuristic is very conservative as there is
26292 no chance to roll-back the decision of not to use far jump.
26294 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26295 2-byte insn is associated with a 4 byte constant pool. Using
26296 function size 2048/3 as the threshold is conservative enough. */
26297 if (far_jump)
26299 if ((func_size * 3) >= 2048)
26301 /* Record the fact that we have decided that
26302 the function does use far jumps. */
26303 cfun->machine->far_jump_used = 1;
26304 return 1;
26308 return 0;
26311 /* Return nonzero if FUNC must be entered in ARM mode. */
26313 is_called_in_ARM_mode (tree func)
26315 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26317 /* Ignore the problem about functions whose address is taken. */
26318 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26319 return TRUE;
26321 #ifdef ARM_PE
26322 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26323 #else
26324 return FALSE;
26325 #endif
26328 /* Given the stack offsets and register mask in OFFSETS, decide how
26329 many additional registers to push instead of subtracting a constant
26330 from SP. For epilogues the principle is the same except we use pop.
26331 FOR_PROLOGUE indicates which we're generating. */
26332 static int
26333 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26335 HOST_WIDE_INT amount;
26336 unsigned long live_regs_mask = offsets->saved_regs_mask;
26337 /* Extract a mask of the ones we can give to the Thumb's push/pop
26338 instruction. */
26339 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26340 /* Then count how many other high registers will need to be pushed. */
26341 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26342 int n_free, reg_base, size;
26344 if (!for_prologue && frame_pointer_needed)
26345 amount = offsets->locals_base - offsets->saved_regs;
26346 else
26347 amount = offsets->outgoing_args - offsets->saved_regs;
26349 /* If the stack frame size is 512 exactly, we can save one load
26350 instruction, which should make this a win even when optimizing
26351 for speed. */
26352 if (!optimize_size && amount != 512)
26353 return 0;
26355 /* Can't do this if there are high registers to push. */
26356 if (high_regs_pushed != 0)
26357 return 0;
26359 /* Shouldn't do it in the prologue if no registers would normally
26360 be pushed at all. In the epilogue, also allow it if we'll have
26361 a pop insn for the PC. */
26362 if (l_mask == 0
26363 && (for_prologue
26364 || TARGET_BACKTRACE
26365 || (live_regs_mask & 1 << LR_REGNUM) == 0
26366 || TARGET_INTERWORK
26367 || crtl->args.pretend_args_size != 0))
26368 return 0;
26370 /* Don't do this if thumb_expand_prologue wants to emit instructions
26371 between the push and the stack frame allocation. */
26372 if (for_prologue
26373 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26374 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26375 return 0;
26377 reg_base = 0;
26378 n_free = 0;
26379 if (!for_prologue)
26381 size = arm_size_return_regs ();
26382 reg_base = ARM_NUM_INTS (size);
26383 live_regs_mask >>= reg_base;
26386 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26387 && (for_prologue || call_used_regs[reg_base + n_free]))
26389 live_regs_mask >>= 1;
26390 n_free++;
26393 if (n_free == 0)
26394 return 0;
26395 gcc_assert (amount / 4 * 4 == amount);
26397 if (amount >= 512 && (amount - n_free * 4) < 512)
26398 return (amount - 508) / 4;
26399 if (amount <= n_free * 4)
26400 return amount / 4;
26401 return 0;
26404 /* The bits which aren't usefully expanded as rtl. */
26405 const char *
26406 thumb1_unexpanded_epilogue (void)
26408 arm_stack_offsets *offsets;
26409 int regno;
26410 unsigned long live_regs_mask = 0;
26411 int high_regs_pushed = 0;
26412 int extra_pop;
26413 int had_to_push_lr;
26414 int size;
26416 if (cfun->machine->return_used_this_function != 0)
26417 return "";
26419 if (IS_NAKED (arm_current_func_type ()))
26420 return "";
26422 offsets = arm_get_frame_offsets ();
26423 live_regs_mask = offsets->saved_regs_mask;
26424 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26426 /* If we can deduce the registers used from the function's return value.
26427 This is more reliable that examining df_regs_ever_live_p () because that
26428 will be set if the register is ever used in the function, not just if
26429 the register is used to hold a return value. */
26430 size = arm_size_return_regs ();
26432 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26433 if (extra_pop > 0)
26435 unsigned long extra_mask = (1 << extra_pop) - 1;
26436 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26439 /* The prolog may have pushed some high registers to use as
26440 work registers. e.g. the testsuite file:
26441 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26442 compiles to produce:
26443 push {r4, r5, r6, r7, lr}
26444 mov r7, r9
26445 mov r6, r8
26446 push {r6, r7}
26447 as part of the prolog. We have to undo that pushing here. */
26449 if (high_regs_pushed)
26451 unsigned long mask = live_regs_mask & 0xff;
26452 int next_hi_reg;
26454 /* The available low registers depend on the size of the value we are
26455 returning. */
26456 if (size <= 12)
26457 mask |= 1 << 3;
26458 if (size <= 8)
26459 mask |= 1 << 2;
26461 if (mask == 0)
26462 /* Oh dear! We have no low registers into which we can pop
26463 high registers! */
26464 internal_error
26465 ("no low registers available for popping high registers");
26467 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26468 if (live_regs_mask & (1 << next_hi_reg))
26469 break;
26471 while (high_regs_pushed)
26473 /* Find lo register(s) into which the high register(s) can
26474 be popped. */
26475 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26477 if (mask & (1 << regno))
26478 high_regs_pushed--;
26479 if (high_regs_pushed == 0)
26480 break;
26483 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26485 /* Pop the values into the low register(s). */
26486 thumb_pop (asm_out_file, mask);
26488 /* Move the value(s) into the high registers. */
26489 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26491 if (mask & (1 << regno))
26493 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26494 regno);
26496 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26497 if (live_regs_mask & (1 << next_hi_reg))
26498 break;
26502 live_regs_mask &= ~0x0f00;
26505 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26506 live_regs_mask &= 0xff;
26508 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26510 /* Pop the return address into the PC. */
26511 if (had_to_push_lr)
26512 live_regs_mask |= 1 << PC_REGNUM;
26514 /* Either no argument registers were pushed or a backtrace
26515 structure was created which includes an adjusted stack
26516 pointer, so just pop everything. */
26517 if (live_regs_mask)
26518 thumb_pop (asm_out_file, live_regs_mask);
26520 /* We have either just popped the return address into the
26521 PC or it is was kept in LR for the entire function.
26522 Note that thumb_pop has already called thumb_exit if the
26523 PC was in the list. */
26524 if (!had_to_push_lr)
26525 thumb_exit (asm_out_file, LR_REGNUM);
26527 else
26529 /* Pop everything but the return address. */
26530 if (live_regs_mask)
26531 thumb_pop (asm_out_file, live_regs_mask);
26533 if (had_to_push_lr)
26535 if (size > 12)
26537 /* We have no free low regs, so save one. */
26538 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26539 LAST_ARG_REGNUM);
26542 /* Get the return address into a temporary register. */
26543 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26545 if (size > 12)
26547 /* Move the return address to lr. */
26548 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26549 LAST_ARG_REGNUM);
26550 /* Restore the low register. */
26551 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26552 IP_REGNUM);
26553 regno = LR_REGNUM;
26555 else
26556 regno = LAST_ARG_REGNUM;
26558 else
26559 regno = LR_REGNUM;
26561 /* Remove the argument registers that were pushed onto the stack. */
26562 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26563 SP_REGNUM, SP_REGNUM,
26564 crtl->args.pretend_args_size);
26566 thumb_exit (asm_out_file, regno);
26569 return "";
26572 /* Functions to save and restore machine-specific function data. */
26573 static struct machine_function *
26574 arm_init_machine_status (void)
26576 struct machine_function *machine;
26577 machine = ggc_alloc_cleared_machine_function ();
26579 #if ARM_FT_UNKNOWN != 0
26580 machine->func_type = ARM_FT_UNKNOWN;
26581 #endif
26582 return machine;
26585 /* Return an RTX indicating where the return address to the
26586 calling function can be found. */
26588 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26590 if (count != 0)
26591 return NULL_RTX;
26593 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26596 /* Do anything needed before RTL is emitted for each function. */
26597 void
26598 arm_init_expanders (void)
26600 /* Arrange to initialize and mark the machine per-function status. */
26601 init_machine_status = arm_init_machine_status;
26603 /* This is to stop the combine pass optimizing away the alignment
26604 adjustment of va_arg. */
26605 /* ??? It is claimed that this should not be necessary. */
26606 if (cfun)
26607 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26611 /* Like arm_compute_initial_elimination offset. Simpler because there
26612 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26613 to point at the base of the local variables after static stack
26614 space for a function has been allocated. */
26616 HOST_WIDE_INT
26617 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26619 arm_stack_offsets *offsets;
26621 offsets = arm_get_frame_offsets ();
26623 switch (from)
26625 case ARG_POINTER_REGNUM:
26626 switch (to)
26628 case STACK_POINTER_REGNUM:
26629 return offsets->outgoing_args - offsets->saved_args;
26631 case FRAME_POINTER_REGNUM:
26632 return offsets->soft_frame - offsets->saved_args;
26634 case ARM_HARD_FRAME_POINTER_REGNUM:
26635 return offsets->saved_regs - offsets->saved_args;
26637 case THUMB_HARD_FRAME_POINTER_REGNUM:
26638 return offsets->locals_base - offsets->saved_args;
26640 default:
26641 gcc_unreachable ();
26643 break;
26645 case FRAME_POINTER_REGNUM:
26646 switch (to)
26648 case STACK_POINTER_REGNUM:
26649 return offsets->outgoing_args - offsets->soft_frame;
26651 case ARM_HARD_FRAME_POINTER_REGNUM:
26652 return offsets->saved_regs - offsets->soft_frame;
26654 case THUMB_HARD_FRAME_POINTER_REGNUM:
26655 return offsets->locals_base - offsets->soft_frame;
26657 default:
26658 gcc_unreachable ();
26660 break;
26662 default:
26663 gcc_unreachable ();
26667 /* Generate the function's prologue. */
26669 void
26670 thumb1_expand_prologue (void)
26672 rtx insn;
26674 HOST_WIDE_INT amount;
26675 arm_stack_offsets *offsets;
26676 unsigned long func_type;
26677 int regno;
26678 unsigned long live_regs_mask;
26679 unsigned long l_mask;
26680 unsigned high_regs_pushed = 0;
26682 func_type = arm_current_func_type ();
26684 /* Naked functions don't have prologues. */
26685 if (IS_NAKED (func_type))
26686 return;
26688 if (IS_INTERRUPT (func_type))
26690 error ("interrupt Service Routines cannot be coded in Thumb mode");
26691 return;
26694 if (is_called_in_ARM_mode (current_function_decl))
26695 emit_insn (gen_prologue_thumb1_interwork ());
26697 offsets = arm_get_frame_offsets ();
26698 live_regs_mask = offsets->saved_regs_mask;
26700 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26701 l_mask = live_regs_mask & 0x40ff;
26702 /* Then count how many other high registers will need to be pushed. */
26703 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26705 if (crtl->args.pretend_args_size)
26707 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26709 if (cfun->machine->uses_anonymous_args)
26711 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26712 unsigned long mask;
26714 mask = 1ul << (LAST_ARG_REGNUM + 1);
26715 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26717 insn = thumb1_emit_multi_reg_push (mask, 0);
26719 else
26721 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26722 stack_pointer_rtx, x));
26724 RTX_FRAME_RELATED_P (insn) = 1;
26727 if (TARGET_BACKTRACE)
26729 HOST_WIDE_INT offset = 0;
26730 unsigned work_register;
26731 rtx work_reg, x, arm_hfp_rtx;
26733 /* We have been asked to create a stack backtrace structure.
26734 The code looks like this:
26736 0 .align 2
26737 0 func:
26738 0 sub SP, #16 Reserve space for 4 registers.
26739 2 push {R7} Push low registers.
26740 4 add R7, SP, #20 Get the stack pointer before the push.
26741 6 str R7, [SP, #8] Store the stack pointer
26742 (before reserving the space).
26743 8 mov R7, PC Get hold of the start of this code + 12.
26744 10 str R7, [SP, #16] Store it.
26745 12 mov R7, FP Get hold of the current frame pointer.
26746 14 str R7, [SP, #4] Store it.
26747 16 mov R7, LR Get hold of the current return address.
26748 18 str R7, [SP, #12] Store it.
26749 20 add R7, SP, #16 Point at the start of the
26750 backtrace structure.
26751 22 mov FP, R7 Put this value into the frame pointer. */
26753 work_register = thumb_find_work_register (live_regs_mask);
26754 work_reg = gen_rtx_REG (SImode, work_register);
26755 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26757 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26758 stack_pointer_rtx, GEN_INT (-16)));
26759 RTX_FRAME_RELATED_P (insn) = 1;
26761 if (l_mask)
26763 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26764 RTX_FRAME_RELATED_P (insn) = 1;
26766 offset = bit_count (l_mask) * UNITS_PER_WORD;
26769 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26770 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26772 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26773 x = gen_frame_mem (SImode, x);
26774 emit_move_insn (x, work_reg);
26776 /* Make sure that the instruction fetching the PC is in the right place
26777 to calculate "start of backtrace creation code + 12". */
26778 /* ??? The stores using the common WORK_REG ought to be enough to
26779 prevent the scheduler from doing anything weird. Failing that
26780 we could always move all of the following into an UNSPEC_VOLATILE. */
26781 if (l_mask)
26783 x = gen_rtx_REG (SImode, PC_REGNUM);
26784 emit_move_insn (work_reg, x);
26786 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26787 x = gen_frame_mem (SImode, x);
26788 emit_move_insn (x, work_reg);
26790 emit_move_insn (work_reg, arm_hfp_rtx);
26792 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26793 x = gen_frame_mem (SImode, x);
26794 emit_move_insn (x, work_reg);
26796 else
26798 emit_move_insn (work_reg, arm_hfp_rtx);
26800 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26801 x = gen_frame_mem (SImode, x);
26802 emit_move_insn (x, work_reg);
26804 x = gen_rtx_REG (SImode, PC_REGNUM);
26805 emit_move_insn (work_reg, x);
26807 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26808 x = gen_frame_mem (SImode, x);
26809 emit_move_insn (x, work_reg);
26812 x = gen_rtx_REG (SImode, LR_REGNUM);
26813 emit_move_insn (work_reg, x);
26815 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26816 x = gen_frame_mem (SImode, x);
26817 emit_move_insn (x, work_reg);
26819 x = GEN_INT (offset + 12);
26820 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26822 emit_move_insn (arm_hfp_rtx, work_reg);
26824 /* Optimization: If we are not pushing any low registers but we are going
26825 to push some high registers then delay our first push. This will just
26826 be a push of LR and we can combine it with the push of the first high
26827 register. */
26828 else if ((l_mask & 0xff) != 0
26829 || (high_regs_pushed == 0 && l_mask))
26831 unsigned long mask = l_mask;
26832 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26833 insn = thumb1_emit_multi_reg_push (mask, mask);
26834 RTX_FRAME_RELATED_P (insn) = 1;
26837 if (high_regs_pushed)
26839 unsigned pushable_regs;
26840 unsigned next_hi_reg;
26841 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26842 : crtl->args.info.nregs;
26843 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26845 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26846 if (live_regs_mask & (1 << next_hi_reg))
26847 break;
26849 /* Here we need to mask out registers used for passing arguments
26850 even if they can be pushed. This is to avoid using them to stash the high
26851 registers. Such kind of stash may clobber the use of arguments. */
26852 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26854 if (pushable_regs == 0)
26855 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26857 while (high_regs_pushed > 0)
26859 unsigned long real_regs_mask = 0;
26861 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26863 if (pushable_regs & (1 << regno))
26865 emit_move_insn (gen_rtx_REG (SImode, regno),
26866 gen_rtx_REG (SImode, next_hi_reg));
26868 high_regs_pushed --;
26869 real_regs_mask |= (1 << next_hi_reg);
26871 if (high_regs_pushed)
26873 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26874 next_hi_reg --)
26875 if (live_regs_mask & (1 << next_hi_reg))
26876 break;
26878 else
26880 pushable_regs &= ~((1 << regno) - 1);
26881 break;
26886 /* If we had to find a work register and we have not yet
26887 saved the LR then add it to the list of regs to push. */
26888 if (l_mask == (1 << LR_REGNUM))
26890 pushable_regs |= l_mask;
26891 real_regs_mask |= l_mask;
26892 l_mask = 0;
26895 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26896 RTX_FRAME_RELATED_P (insn) = 1;
26900 /* Load the pic register before setting the frame pointer,
26901 so we can use r7 as a temporary work register. */
26902 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26903 arm_load_pic_register (live_regs_mask);
26905 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26906 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26907 stack_pointer_rtx);
26909 if (flag_stack_usage_info)
26910 current_function_static_stack_size
26911 = offsets->outgoing_args - offsets->saved_args;
26913 amount = offsets->outgoing_args - offsets->saved_regs;
26914 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26915 if (amount)
26917 if (amount < 512)
26919 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26920 GEN_INT (- amount)));
26921 RTX_FRAME_RELATED_P (insn) = 1;
26923 else
26925 rtx reg, dwarf;
26927 /* The stack decrement is too big for an immediate value in a single
26928 insn. In theory we could issue multiple subtracts, but after
26929 three of them it becomes more space efficient to place the full
26930 value in the constant pool and load into a register. (Also the
26931 ARM debugger really likes to see only one stack decrement per
26932 function). So instead we look for a scratch register into which
26933 we can load the decrement, and then we subtract this from the
26934 stack pointer. Unfortunately on the thumb the only available
26935 scratch registers are the argument registers, and we cannot use
26936 these as they may hold arguments to the function. Instead we
26937 attempt to locate a call preserved register which is used by this
26938 function. If we can find one, then we know that it will have
26939 been pushed at the start of the prologue and so we can corrupt
26940 it now. */
26941 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26942 if (live_regs_mask & (1 << regno))
26943 break;
26945 gcc_assert(regno <= LAST_LO_REGNUM);
26947 reg = gen_rtx_REG (SImode, regno);
26949 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26951 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26952 stack_pointer_rtx, reg));
26954 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26955 plus_constant (Pmode, stack_pointer_rtx,
26956 -amount));
26957 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26958 RTX_FRAME_RELATED_P (insn) = 1;
26962 if (frame_pointer_needed)
26963 thumb_set_frame_pointer (offsets);
26965 /* If we are profiling, make sure no instructions are scheduled before
26966 the call to mcount. Similarly if the user has requested no
26967 scheduling in the prolog. Similarly if we want non-call exceptions
26968 using the EABI unwinder, to prevent faulting instructions from being
26969 swapped with a stack adjustment. */
26970 if (crtl->profile || !TARGET_SCHED_PROLOG
26971 || (arm_except_unwind_info (&global_options) == UI_TARGET
26972 && cfun->can_throw_non_call_exceptions))
26973 emit_insn (gen_blockage ());
26975 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26976 if (live_regs_mask & 0xff)
26977 cfun->machine->lr_save_eliminated = 0;
26980 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26981 POP instruction can be generated. LR should be replaced by PC. All
26982 the checks required are already done by USE_RETURN_INSN (). Hence,
26983 all we really need to check here is if single register is to be
26984 returned, or multiple register return. */
26985 void
26986 thumb2_expand_return (bool simple_return)
26988 int i, num_regs;
26989 unsigned long saved_regs_mask;
26990 arm_stack_offsets *offsets;
26992 offsets = arm_get_frame_offsets ();
26993 saved_regs_mask = offsets->saved_regs_mask;
26995 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26996 if (saved_regs_mask & (1 << i))
26997 num_regs++;
26999 if (!simple_return && saved_regs_mask)
27001 if (num_regs == 1)
27003 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27004 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27005 rtx addr = gen_rtx_MEM (SImode,
27006 gen_rtx_POST_INC (SImode,
27007 stack_pointer_rtx));
27008 set_mem_alias_set (addr, get_frame_alias_set ());
27009 XVECEXP (par, 0, 0) = ret_rtx;
27010 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27011 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27012 emit_jump_insn (par);
27014 else
27016 saved_regs_mask &= ~ (1 << LR_REGNUM);
27017 saved_regs_mask |= (1 << PC_REGNUM);
27018 arm_emit_multi_reg_pop (saved_regs_mask);
27021 else
27023 emit_jump_insn (simple_return_rtx);
27027 void
27028 thumb1_expand_epilogue (void)
27030 HOST_WIDE_INT amount;
27031 arm_stack_offsets *offsets;
27032 int regno;
27034 /* Naked functions don't have prologues. */
27035 if (IS_NAKED (arm_current_func_type ()))
27036 return;
27038 offsets = arm_get_frame_offsets ();
27039 amount = offsets->outgoing_args - offsets->saved_regs;
27041 if (frame_pointer_needed)
27043 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27044 amount = offsets->locals_base - offsets->saved_regs;
27046 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27048 gcc_assert (amount >= 0);
27049 if (amount)
27051 emit_insn (gen_blockage ());
27053 if (amount < 512)
27054 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27055 GEN_INT (amount)));
27056 else
27058 /* r3 is always free in the epilogue. */
27059 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27061 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27062 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27066 /* Emit a USE (stack_pointer_rtx), so that
27067 the stack adjustment will not be deleted. */
27068 emit_insn (gen_force_register_use (stack_pointer_rtx));
27070 if (crtl->profile || !TARGET_SCHED_PROLOG)
27071 emit_insn (gen_blockage ());
27073 /* Emit a clobber for each insn that will be restored in the epilogue,
27074 so that flow2 will get register lifetimes correct. */
27075 for (regno = 0; regno < 13; regno++)
27076 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27077 emit_clobber (gen_rtx_REG (SImode, regno));
27079 if (! df_regs_ever_live_p (LR_REGNUM))
27080 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27083 /* Epilogue code for APCS frame. */
27084 static void
27085 arm_expand_epilogue_apcs_frame (bool really_return)
27087 unsigned long func_type;
27088 unsigned long saved_regs_mask;
27089 int num_regs = 0;
27090 int i;
27091 int floats_from_frame = 0;
27092 arm_stack_offsets *offsets;
27094 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27095 func_type = arm_current_func_type ();
27097 /* Get frame offsets for ARM. */
27098 offsets = arm_get_frame_offsets ();
27099 saved_regs_mask = offsets->saved_regs_mask;
27101 /* Find the offset of the floating-point save area in the frame. */
27102 floats_from_frame
27103 = (offsets->saved_args
27104 + arm_compute_static_chain_stack_bytes ()
27105 - offsets->frame);
27107 /* Compute how many core registers saved and how far away the floats are. */
27108 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27109 if (saved_regs_mask & (1 << i))
27111 num_regs++;
27112 floats_from_frame += 4;
27115 if (TARGET_HARD_FLOAT && TARGET_VFP)
27117 int start_reg;
27118 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27120 /* The offset is from IP_REGNUM. */
27121 int saved_size = arm_get_vfp_saved_size ();
27122 if (saved_size > 0)
27124 rtx insn;
27125 floats_from_frame += saved_size;
27126 insn = emit_insn (gen_addsi3 (ip_rtx,
27127 hard_frame_pointer_rtx,
27128 GEN_INT (-floats_from_frame)));
27129 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27130 ip_rtx, hard_frame_pointer_rtx);
27133 /* Generate VFP register multi-pop. */
27134 start_reg = FIRST_VFP_REGNUM;
27136 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27137 /* Look for a case where a reg does not need restoring. */
27138 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27139 && (!df_regs_ever_live_p (i + 1)
27140 || call_used_regs[i + 1]))
27142 if (start_reg != i)
27143 arm_emit_vfp_multi_reg_pop (start_reg,
27144 (i - start_reg) / 2,
27145 gen_rtx_REG (SImode,
27146 IP_REGNUM));
27147 start_reg = i + 2;
27150 /* Restore the remaining regs that we have discovered (or possibly
27151 even all of them, if the conditional in the for loop never
27152 fired). */
27153 if (start_reg != i)
27154 arm_emit_vfp_multi_reg_pop (start_reg,
27155 (i - start_reg) / 2,
27156 gen_rtx_REG (SImode, IP_REGNUM));
27159 if (TARGET_IWMMXT)
27161 /* The frame pointer is guaranteed to be non-double-word aligned, as
27162 it is set to double-word-aligned old_stack_pointer - 4. */
27163 rtx insn;
27164 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27166 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27167 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27169 rtx addr = gen_frame_mem (V2SImode,
27170 plus_constant (Pmode, hard_frame_pointer_rtx,
27171 - lrm_count * 4));
27172 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27173 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27174 gen_rtx_REG (V2SImode, i),
27175 NULL_RTX);
27176 lrm_count += 2;
27180 /* saved_regs_mask should contain IP which contains old stack pointer
27181 at the time of activation creation. Since SP and IP are adjacent registers,
27182 we can restore the value directly into SP. */
27183 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27184 saved_regs_mask &= ~(1 << IP_REGNUM);
27185 saved_regs_mask |= (1 << SP_REGNUM);
27187 /* There are two registers left in saved_regs_mask - LR and PC. We
27188 only need to restore LR (the return address), but to
27189 save time we can load it directly into PC, unless we need a
27190 special function exit sequence, or we are not really returning. */
27191 if (really_return
27192 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27193 && !crtl->calls_eh_return)
27194 /* Delete LR from the register mask, so that LR on
27195 the stack is loaded into the PC in the register mask. */
27196 saved_regs_mask &= ~(1 << LR_REGNUM);
27197 else
27198 saved_regs_mask &= ~(1 << PC_REGNUM);
27200 num_regs = bit_count (saved_regs_mask);
27201 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27203 rtx insn;
27204 emit_insn (gen_blockage ());
27205 /* Unwind the stack to just below the saved registers. */
27206 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27207 hard_frame_pointer_rtx,
27208 GEN_INT (- 4 * num_regs)));
27210 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27211 stack_pointer_rtx, hard_frame_pointer_rtx);
27214 arm_emit_multi_reg_pop (saved_regs_mask);
27216 if (IS_INTERRUPT (func_type))
27218 /* Interrupt handlers will have pushed the
27219 IP onto the stack, so restore it now. */
27220 rtx insn;
27221 rtx addr = gen_rtx_MEM (SImode,
27222 gen_rtx_POST_INC (SImode,
27223 stack_pointer_rtx));
27224 set_mem_alias_set (addr, get_frame_alias_set ());
27225 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27226 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27227 gen_rtx_REG (SImode, IP_REGNUM),
27228 NULL_RTX);
27231 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27232 return;
27234 if (crtl->calls_eh_return)
27235 emit_insn (gen_addsi3 (stack_pointer_rtx,
27236 stack_pointer_rtx,
27237 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27239 if (IS_STACKALIGN (func_type))
27240 /* Restore the original stack pointer. Before prologue, the stack was
27241 realigned and the original stack pointer saved in r0. For details,
27242 see comment in arm_expand_prologue. */
27243 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27245 emit_jump_insn (simple_return_rtx);
27248 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27249 function is not a sibcall. */
27250 void
27251 arm_expand_epilogue (bool really_return)
27253 unsigned long func_type;
27254 unsigned long saved_regs_mask;
27255 int num_regs = 0;
27256 int i;
27257 int amount;
27258 arm_stack_offsets *offsets;
27260 func_type = arm_current_func_type ();
27262 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27263 let output_return_instruction take care of instruction emission if any. */
27264 if (IS_NAKED (func_type)
27265 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27267 if (really_return)
27268 emit_jump_insn (simple_return_rtx);
27269 return;
27272 /* If we are throwing an exception, then we really must be doing a
27273 return, so we can't tail-call. */
27274 gcc_assert (!crtl->calls_eh_return || really_return);
27276 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27278 arm_expand_epilogue_apcs_frame (really_return);
27279 return;
27282 /* Get frame offsets for ARM. */
27283 offsets = arm_get_frame_offsets ();
27284 saved_regs_mask = offsets->saved_regs_mask;
27285 num_regs = bit_count (saved_regs_mask);
27287 if (frame_pointer_needed)
27289 rtx insn;
27290 /* Restore stack pointer if necessary. */
27291 if (TARGET_ARM)
27293 /* In ARM mode, frame pointer points to first saved register.
27294 Restore stack pointer to last saved register. */
27295 amount = offsets->frame - offsets->saved_regs;
27297 /* Force out any pending memory operations that reference stacked data
27298 before stack de-allocation occurs. */
27299 emit_insn (gen_blockage ());
27300 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27301 hard_frame_pointer_rtx,
27302 GEN_INT (amount)));
27303 arm_add_cfa_adjust_cfa_note (insn, amount,
27304 stack_pointer_rtx,
27305 hard_frame_pointer_rtx);
27307 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27308 deleted. */
27309 emit_insn (gen_force_register_use (stack_pointer_rtx));
27311 else
27313 /* In Thumb-2 mode, the frame pointer points to the last saved
27314 register. */
27315 amount = offsets->locals_base - offsets->saved_regs;
27316 if (amount)
27318 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27319 hard_frame_pointer_rtx,
27320 GEN_INT (amount)));
27321 arm_add_cfa_adjust_cfa_note (insn, amount,
27322 hard_frame_pointer_rtx,
27323 hard_frame_pointer_rtx);
27326 /* Force out any pending memory operations that reference stacked data
27327 before stack de-allocation occurs. */
27328 emit_insn (gen_blockage ());
27329 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27330 hard_frame_pointer_rtx));
27331 arm_add_cfa_adjust_cfa_note (insn, 0,
27332 stack_pointer_rtx,
27333 hard_frame_pointer_rtx);
27334 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27335 deleted. */
27336 emit_insn (gen_force_register_use (stack_pointer_rtx));
27339 else
27341 /* Pop off outgoing args and local frame to adjust stack pointer to
27342 last saved register. */
27343 amount = offsets->outgoing_args - offsets->saved_regs;
27344 if (amount)
27346 rtx tmp;
27347 /* Force out any pending memory operations that reference stacked data
27348 before stack de-allocation occurs. */
27349 emit_insn (gen_blockage ());
27350 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27351 stack_pointer_rtx,
27352 GEN_INT (amount)));
27353 arm_add_cfa_adjust_cfa_note (tmp, amount,
27354 stack_pointer_rtx, stack_pointer_rtx);
27355 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27356 not deleted. */
27357 emit_insn (gen_force_register_use (stack_pointer_rtx));
27361 if (TARGET_HARD_FLOAT && TARGET_VFP)
27363 /* Generate VFP register multi-pop. */
27364 int end_reg = LAST_VFP_REGNUM + 1;
27366 /* Scan the registers in reverse order. We need to match
27367 any groupings made in the prologue and generate matching
27368 vldm operations. The need to match groups is because,
27369 unlike pop, vldm can only do consecutive regs. */
27370 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27371 /* Look for a case where a reg does not need restoring. */
27372 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27373 && (!df_regs_ever_live_p (i + 1)
27374 || call_used_regs[i + 1]))
27376 /* Restore the regs discovered so far (from reg+2 to
27377 end_reg). */
27378 if (end_reg > i + 2)
27379 arm_emit_vfp_multi_reg_pop (i + 2,
27380 (end_reg - (i + 2)) / 2,
27381 stack_pointer_rtx);
27382 end_reg = i;
27385 /* Restore the remaining regs that we have discovered (or possibly
27386 even all of them, if the conditional in the for loop never
27387 fired). */
27388 if (end_reg > i + 2)
27389 arm_emit_vfp_multi_reg_pop (i + 2,
27390 (end_reg - (i + 2)) / 2,
27391 stack_pointer_rtx);
27394 if (TARGET_IWMMXT)
27395 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27396 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27398 rtx insn;
27399 rtx addr = gen_rtx_MEM (V2SImode,
27400 gen_rtx_POST_INC (SImode,
27401 stack_pointer_rtx));
27402 set_mem_alias_set (addr, get_frame_alias_set ());
27403 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27404 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27405 gen_rtx_REG (V2SImode, i),
27406 NULL_RTX);
27407 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27408 stack_pointer_rtx, stack_pointer_rtx);
27411 if (saved_regs_mask)
27413 rtx insn;
27414 bool return_in_pc = false;
27416 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27417 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27418 && !IS_STACKALIGN (func_type)
27419 && really_return
27420 && crtl->args.pretend_args_size == 0
27421 && saved_regs_mask & (1 << LR_REGNUM)
27422 && !crtl->calls_eh_return)
27424 saved_regs_mask &= ~(1 << LR_REGNUM);
27425 saved_regs_mask |= (1 << PC_REGNUM);
27426 return_in_pc = true;
27429 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27431 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27432 if (saved_regs_mask & (1 << i))
27434 rtx addr = gen_rtx_MEM (SImode,
27435 gen_rtx_POST_INC (SImode,
27436 stack_pointer_rtx));
27437 set_mem_alias_set (addr, get_frame_alias_set ());
27439 if (i == PC_REGNUM)
27441 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27442 XVECEXP (insn, 0, 0) = ret_rtx;
27443 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27444 gen_rtx_REG (SImode, i),
27445 addr);
27446 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27447 insn = emit_jump_insn (insn);
27449 else
27451 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27452 addr));
27453 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27454 gen_rtx_REG (SImode, i),
27455 NULL_RTX);
27456 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27457 stack_pointer_rtx,
27458 stack_pointer_rtx);
27462 else
27464 if (TARGET_LDRD
27465 && current_tune->prefer_ldrd_strd
27466 && !optimize_function_for_size_p (cfun))
27468 if (TARGET_THUMB2)
27469 thumb2_emit_ldrd_pop (saved_regs_mask);
27470 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27471 arm_emit_ldrd_pop (saved_regs_mask);
27472 else
27473 arm_emit_multi_reg_pop (saved_regs_mask);
27475 else
27476 arm_emit_multi_reg_pop (saved_regs_mask);
27479 if (return_in_pc == true)
27480 return;
27483 if (crtl->args.pretend_args_size)
27485 int i, j;
27486 rtx dwarf = NULL_RTX;
27487 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27488 stack_pointer_rtx,
27489 GEN_INT (crtl->args.pretend_args_size)));
27491 RTX_FRAME_RELATED_P (tmp) = 1;
27493 if (cfun->machine->uses_anonymous_args)
27495 /* Restore pretend args. Refer arm_expand_prologue on how to save
27496 pretend_args in stack. */
27497 int num_regs = crtl->args.pretend_args_size / 4;
27498 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27499 for (j = 0, i = 0; j < num_regs; i++)
27500 if (saved_regs_mask & (1 << i))
27502 rtx reg = gen_rtx_REG (SImode, i);
27503 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27504 j++;
27506 REG_NOTES (tmp) = dwarf;
27508 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27509 stack_pointer_rtx, stack_pointer_rtx);
27512 if (!really_return)
27513 return;
27515 if (crtl->calls_eh_return)
27516 emit_insn (gen_addsi3 (stack_pointer_rtx,
27517 stack_pointer_rtx,
27518 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27520 if (IS_STACKALIGN (func_type))
27521 /* Restore the original stack pointer. Before prologue, the stack was
27522 realigned and the original stack pointer saved in r0. For details,
27523 see comment in arm_expand_prologue. */
27524 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27526 emit_jump_insn (simple_return_rtx);
27529 /* Implementation of insn prologue_thumb1_interwork. This is the first
27530 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27532 const char *
27533 thumb1_output_interwork (void)
27535 const char * name;
27536 FILE *f = asm_out_file;
27538 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27539 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27540 == SYMBOL_REF);
27541 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27543 /* Generate code sequence to switch us into Thumb mode. */
27544 /* The .code 32 directive has already been emitted by
27545 ASM_DECLARE_FUNCTION_NAME. */
27546 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27547 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27549 /* Generate a label, so that the debugger will notice the
27550 change in instruction sets. This label is also used by
27551 the assembler to bypass the ARM code when this function
27552 is called from a Thumb encoded function elsewhere in the
27553 same file. Hence the definition of STUB_NAME here must
27554 agree with the definition in gas/config/tc-arm.c. */
27556 #define STUB_NAME ".real_start_of"
27558 fprintf (f, "\t.code\t16\n");
27559 #ifdef ARM_PE
27560 if (arm_dllexport_name_p (name))
27561 name = arm_strip_name_encoding (name);
27562 #endif
27563 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27564 fprintf (f, "\t.thumb_func\n");
27565 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27567 return "";
27570 /* Handle the case of a double word load into a low register from
27571 a computed memory address. The computed address may involve a
27572 register which is overwritten by the load. */
27573 const char *
27574 thumb_load_double_from_address (rtx *operands)
27576 rtx addr;
27577 rtx base;
27578 rtx offset;
27579 rtx arg1;
27580 rtx arg2;
27582 gcc_assert (REG_P (operands[0]));
27583 gcc_assert (MEM_P (operands[1]));
27585 /* Get the memory address. */
27586 addr = XEXP (operands[1], 0);
27588 /* Work out how the memory address is computed. */
27589 switch (GET_CODE (addr))
27591 case REG:
27592 operands[2] = adjust_address (operands[1], SImode, 4);
27594 if (REGNO (operands[0]) == REGNO (addr))
27596 output_asm_insn ("ldr\t%H0, %2", operands);
27597 output_asm_insn ("ldr\t%0, %1", operands);
27599 else
27601 output_asm_insn ("ldr\t%0, %1", operands);
27602 output_asm_insn ("ldr\t%H0, %2", operands);
27604 break;
27606 case CONST:
27607 /* Compute <address> + 4 for the high order load. */
27608 operands[2] = adjust_address (operands[1], SImode, 4);
27610 output_asm_insn ("ldr\t%0, %1", operands);
27611 output_asm_insn ("ldr\t%H0, %2", operands);
27612 break;
27614 case PLUS:
27615 arg1 = XEXP (addr, 0);
27616 arg2 = XEXP (addr, 1);
27618 if (CONSTANT_P (arg1))
27619 base = arg2, offset = arg1;
27620 else
27621 base = arg1, offset = arg2;
27623 gcc_assert (REG_P (base));
27625 /* Catch the case of <address> = <reg> + <reg> */
27626 if (REG_P (offset))
27628 int reg_offset = REGNO (offset);
27629 int reg_base = REGNO (base);
27630 int reg_dest = REGNO (operands[0]);
27632 /* Add the base and offset registers together into the
27633 higher destination register. */
27634 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27635 reg_dest + 1, reg_base, reg_offset);
27637 /* Load the lower destination register from the address in
27638 the higher destination register. */
27639 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27640 reg_dest, reg_dest + 1);
27642 /* Load the higher destination register from its own address
27643 plus 4. */
27644 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27645 reg_dest + 1, reg_dest + 1);
27647 else
27649 /* Compute <address> + 4 for the high order load. */
27650 operands[2] = adjust_address (operands[1], SImode, 4);
27652 /* If the computed address is held in the low order register
27653 then load the high order register first, otherwise always
27654 load the low order register first. */
27655 if (REGNO (operands[0]) == REGNO (base))
27657 output_asm_insn ("ldr\t%H0, %2", operands);
27658 output_asm_insn ("ldr\t%0, %1", operands);
27660 else
27662 output_asm_insn ("ldr\t%0, %1", operands);
27663 output_asm_insn ("ldr\t%H0, %2", operands);
27666 break;
27668 case LABEL_REF:
27669 /* With no registers to worry about we can just load the value
27670 directly. */
27671 operands[2] = adjust_address (operands[1], SImode, 4);
27673 output_asm_insn ("ldr\t%H0, %2", operands);
27674 output_asm_insn ("ldr\t%0, %1", operands);
27675 break;
27677 default:
27678 gcc_unreachable ();
27681 return "";
27684 const char *
27685 thumb_output_move_mem_multiple (int n, rtx *operands)
27687 rtx tmp;
27689 switch (n)
27691 case 2:
27692 if (REGNO (operands[4]) > REGNO (operands[5]))
27694 tmp = operands[4];
27695 operands[4] = operands[5];
27696 operands[5] = tmp;
27698 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27699 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27700 break;
27702 case 3:
27703 if (REGNO (operands[4]) > REGNO (operands[5]))
27705 tmp = operands[4];
27706 operands[4] = operands[5];
27707 operands[5] = tmp;
27709 if (REGNO (operands[5]) > REGNO (operands[6]))
27711 tmp = operands[5];
27712 operands[5] = operands[6];
27713 operands[6] = tmp;
27715 if (REGNO (operands[4]) > REGNO (operands[5]))
27717 tmp = operands[4];
27718 operands[4] = operands[5];
27719 operands[5] = tmp;
27722 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27723 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27724 break;
27726 default:
27727 gcc_unreachable ();
27730 return "";
27733 /* Output a call-via instruction for thumb state. */
27734 const char *
27735 thumb_call_via_reg (rtx reg)
27737 int regno = REGNO (reg);
27738 rtx *labelp;
27740 gcc_assert (regno < LR_REGNUM);
27742 /* If we are in the normal text section we can use a single instance
27743 per compilation unit. If we are doing function sections, then we need
27744 an entry per section, since we can't rely on reachability. */
27745 if (in_section == text_section)
27747 thumb_call_reg_needed = 1;
27749 if (thumb_call_via_label[regno] == NULL)
27750 thumb_call_via_label[regno] = gen_label_rtx ();
27751 labelp = thumb_call_via_label + regno;
27753 else
27755 if (cfun->machine->call_via[regno] == NULL)
27756 cfun->machine->call_via[regno] = gen_label_rtx ();
27757 labelp = cfun->machine->call_via + regno;
27760 output_asm_insn ("bl\t%a0", labelp);
27761 return "";
27764 /* Routines for generating rtl. */
27765 void
27766 thumb_expand_movmemqi (rtx *operands)
27768 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27769 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27770 HOST_WIDE_INT len = INTVAL (operands[2]);
27771 HOST_WIDE_INT offset = 0;
27773 while (len >= 12)
27775 emit_insn (gen_movmem12b (out, in, out, in));
27776 len -= 12;
27779 if (len >= 8)
27781 emit_insn (gen_movmem8b (out, in, out, in));
27782 len -= 8;
27785 if (len >= 4)
27787 rtx reg = gen_reg_rtx (SImode);
27788 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27789 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27790 len -= 4;
27791 offset += 4;
27794 if (len >= 2)
27796 rtx reg = gen_reg_rtx (HImode);
27797 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27798 plus_constant (Pmode, in,
27799 offset))));
27800 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27801 offset)),
27802 reg));
27803 len -= 2;
27804 offset += 2;
27807 if (len)
27809 rtx reg = gen_reg_rtx (QImode);
27810 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27811 plus_constant (Pmode, in,
27812 offset))));
27813 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27814 offset)),
27815 reg));
27819 void
27820 thumb_reload_out_hi (rtx *operands)
27822 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27825 /* Handle reading a half-word from memory during reload. */
27826 void
27827 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27829 gcc_unreachable ();
27832 /* Return the length of a function name prefix
27833 that starts with the character 'c'. */
27834 static int
27835 arm_get_strip_length (int c)
27837 switch (c)
27839 ARM_NAME_ENCODING_LENGTHS
27840 default: return 0;
27844 /* Return a pointer to a function's name with any
27845 and all prefix encodings stripped from it. */
27846 const char *
27847 arm_strip_name_encoding (const char *name)
27849 int skip;
27851 while ((skip = arm_get_strip_length (* name)))
27852 name += skip;
27854 return name;
27857 /* If there is a '*' anywhere in the name's prefix, then
27858 emit the stripped name verbatim, otherwise prepend an
27859 underscore if leading underscores are being used. */
27860 void
27861 arm_asm_output_labelref (FILE *stream, const char *name)
27863 int skip;
27864 int verbatim = 0;
27866 while ((skip = arm_get_strip_length (* name)))
27868 verbatim |= (*name == '*');
27869 name += skip;
27872 if (verbatim)
27873 fputs (name, stream);
27874 else
27875 asm_fprintf (stream, "%U%s", name);
27878 /* This function is used to emit an EABI tag and its associated value.
27879 We emit the numerical value of the tag in case the assembler does not
27880 support textual tags. (Eg gas prior to 2.20). If requested we include
27881 the tag name in a comment so that anyone reading the assembler output
27882 will know which tag is being set.
27884 This function is not static because arm-c.c needs it too. */
27886 void
27887 arm_emit_eabi_attribute (const char *name, int num, int val)
27889 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27890 if (flag_verbose_asm || flag_debug_asm)
27891 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27892 asm_fprintf (asm_out_file, "\n");
27895 static void
27896 arm_file_start (void)
27898 int val;
27900 if (TARGET_UNIFIED_ASM)
27901 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27903 if (TARGET_BPABI)
27905 const char *fpu_name;
27906 if (arm_selected_arch)
27908 /* armv7ve doesn't support any extensions. */
27909 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
27911 /* Keep backward compatability for assemblers
27912 which don't support armv7ve. */
27913 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
27914 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
27915 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
27916 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
27917 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
27919 else
27921 const char* pos = strchr (arm_selected_arch->name, '+');
27922 if (pos)
27924 char buf[15];
27925 gcc_assert (strlen (arm_selected_arch->name)
27926 <= sizeof (buf) / sizeof (*pos));
27927 strncpy (buf, arm_selected_arch->name,
27928 (pos - arm_selected_arch->name) * sizeof (*pos));
27929 buf[pos - arm_selected_arch->name] = '\0';
27930 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
27931 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
27933 else
27934 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27937 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27938 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27939 else
27941 const char* truncated_name
27942 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
27943 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27946 if (TARGET_SOFT_FLOAT)
27948 fpu_name = "softvfp";
27950 else
27952 fpu_name = arm_fpu_desc->name;
27953 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27955 if (TARGET_HARD_FLOAT)
27956 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27957 if (TARGET_HARD_FLOAT_ABI)
27958 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27961 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27963 /* Some of these attributes only apply when the corresponding features
27964 are used. However we don't have any easy way of figuring this out.
27965 Conservatively record the setting that would have been used. */
27967 if (flag_rounding_math)
27968 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27970 if (!flag_unsafe_math_optimizations)
27972 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27973 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27975 if (flag_signaling_nans)
27976 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27978 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27979 flag_finite_math_only ? 1 : 3);
27981 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27982 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27983 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27984 flag_short_enums ? 1 : 2);
27986 /* Tag_ABI_optimization_goals. */
27987 if (optimize_size)
27988 val = 4;
27989 else if (optimize >= 2)
27990 val = 2;
27991 else if (optimize)
27992 val = 1;
27993 else
27994 val = 6;
27995 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27997 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27998 unaligned_access);
28000 if (arm_fp16_format)
28001 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28002 (int) arm_fp16_format);
28004 if (arm_lang_output_object_attributes_hook)
28005 arm_lang_output_object_attributes_hook();
28008 default_file_start ();
28011 static void
28012 arm_file_end (void)
28014 int regno;
28016 if (NEED_INDICATE_EXEC_STACK)
28017 /* Add .note.GNU-stack. */
28018 file_end_indicate_exec_stack ();
28020 if (! thumb_call_reg_needed)
28021 return;
28023 switch_to_section (text_section);
28024 asm_fprintf (asm_out_file, "\t.code 16\n");
28025 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28027 for (regno = 0; regno < LR_REGNUM; regno++)
28029 rtx label = thumb_call_via_label[regno];
28031 if (label != 0)
28033 targetm.asm_out.internal_label (asm_out_file, "L",
28034 CODE_LABEL_NUMBER (label));
28035 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28040 #ifndef ARM_PE
28041 /* Symbols in the text segment can be accessed without indirecting via the
28042 constant pool; it may take an extra binary operation, but this is still
28043 faster than indirecting via memory. Don't do this when not optimizing,
28044 since we won't be calculating al of the offsets necessary to do this
28045 simplification. */
28047 static void
28048 arm_encode_section_info (tree decl, rtx rtl, int first)
28050 if (optimize > 0 && TREE_CONSTANT (decl))
28051 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28053 default_encode_section_info (decl, rtl, first);
28055 #endif /* !ARM_PE */
28057 static void
28058 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28060 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28061 && !strcmp (prefix, "L"))
28063 arm_ccfsm_state = 0;
28064 arm_target_insn = NULL;
28066 default_internal_label (stream, prefix, labelno);
28069 /* Output code to add DELTA to the first argument, and then jump
28070 to FUNCTION. Used for C++ multiple inheritance. */
28071 static void
28072 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28073 HOST_WIDE_INT delta,
28074 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28075 tree function)
28077 static int thunk_label = 0;
28078 char label[256];
28079 char labelpc[256];
28080 int mi_delta = delta;
28081 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28082 int shift = 0;
28083 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28084 ? 1 : 0);
28085 if (mi_delta < 0)
28086 mi_delta = - mi_delta;
28088 final_start_function (emit_barrier (), file, 1);
28090 if (TARGET_THUMB1)
28092 int labelno = thunk_label++;
28093 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28094 /* Thunks are entered in arm mode when avaiable. */
28095 if (TARGET_THUMB1_ONLY)
28097 /* push r3 so we can use it as a temporary. */
28098 /* TODO: Omit this save if r3 is not used. */
28099 fputs ("\tpush {r3}\n", file);
28100 fputs ("\tldr\tr3, ", file);
28102 else
28104 fputs ("\tldr\tr12, ", file);
28106 assemble_name (file, label);
28107 fputc ('\n', file);
28108 if (flag_pic)
28110 /* If we are generating PIC, the ldr instruction below loads
28111 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28112 the address of the add + 8, so we have:
28114 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28115 = target + 1.
28117 Note that we have "+ 1" because some versions of GNU ld
28118 don't set the low bit of the result for R_ARM_REL32
28119 relocations against thumb function symbols.
28120 On ARMv6M this is +4, not +8. */
28121 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28122 assemble_name (file, labelpc);
28123 fputs (":\n", file);
28124 if (TARGET_THUMB1_ONLY)
28126 /* This is 2 insns after the start of the thunk, so we know it
28127 is 4-byte aligned. */
28128 fputs ("\tadd\tr3, pc, r3\n", file);
28129 fputs ("\tmov r12, r3\n", file);
28131 else
28132 fputs ("\tadd\tr12, pc, r12\n", file);
28134 else if (TARGET_THUMB1_ONLY)
28135 fputs ("\tmov r12, r3\n", file);
28137 if (TARGET_THUMB1_ONLY)
28139 if (mi_delta > 255)
28141 fputs ("\tldr\tr3, ", file);
28142 assemble_name (file, label);
28143 fputs ("+4\n", file);
28144 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28145 mi_op, this_regno, this_regno);
28147 else if (mi_delta != 0)
28149 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28150 mi_op, this_regno, this_regno,
28151 mi_delta);
28154 else
28156 /* TODO: Use movw/movt for large constants when available. */
28157 while (mi_delta != 0)
28159 if ((mi_delta & (3 << shift)) == 0)
28160 shift += 2;
28161 else
28163 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28164 mi_op, this_regno, this_regno,
28165 mi_delta & (0xff << shift));
28166 mi_delta &= ~(0xff << shift);
28167 shift += 8;
28171 if (TARGET_THUMB1)
28173 if (TARGET_THUMB1_ONLY)
28174 fputs ("\tpop\t{r3}\n", file);
28176 fprintf (file, "\tbx\tr12\n");
28177 ASM_OUTPUT_ALIGN (file, 2);
28178 assemble_name (file, label);
28179 fputs (":\n", file);
28180 if (flag_pic)
28182 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28183 rtx tem = XEXP (DECL_RTL (function), 0);
28184 tem = plus_constant (GET_MODE (tem), tem, -7);
28185 tem = gen_rtx_MINUS (GET_MODE (tem),
28186 tem,
28187 gen_rtx_SYMBOL_REF (Pmode,
28188 ggc_strdup (labelpc)));
28189 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28191 else
28192 /* Output ".word .LTHUNKn". */
28193 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28195 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28196 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28198 else
28200 fputs ("\tb\t", file);
28201 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28202 if (NEED_PLT_RELOC)
28203 fputs ("(PLT)", file);
28204 fputc ('\n', file);
28207 final_end_function ();
28211 arm_emit_vector_const (FILE *file, rtx x)
28213 int i;
28214 const char * pattern;
28216 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28218 switch (GET_MODE (x))
28220 case V2SImode: pattern = "%08x"; break;
28221 case V4HImode: pattern = "%04x"; break;
28222 case V8QImode: pattern = "%02x"; break;
28223 default: gcc_unreachable ();
28226 fprintf (file, "0x");
28227 for (i = CONST_VECTOR_NUNITS (x); i--;)
28229 rtx element;
28231 element = CONST_VECTOR_ELT (x, i);
28232 fprintf (file, pattern, INTVAL (element));
28235 return 1;
28238 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28239 HFmode constant pool entries are actually loaded with ldr. */
28240 void
28241 arm_emit_fp16_const (rtx c)
28243 REAL_VALUE_TYPE r;
28244 long bits;
28246 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28247 bits = real_to_target (NULL, &r, HFmode);
28248 if (WORDS_BIG_ENDIAN)
28249 assemble_zeros (2);
28250 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28251 if (!WORDS_BIG_ENDIAN)
28252 assemble_zeros (2);
28255 const char *
28256 arm_output_load_gr (rtx *operands)
28258 rtx reg;
28259 rtx offset;
28260 rtx wcgr;
28261 rtx sum;
28263 if (!MEM_P (operands [1])
28264 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28265 || !REG_P (reg = XEXP (sum, 0))
28266 || !CONST_INT_P (offset = XEXP (sum, 1))
28267 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28268 return "wldrw%?\t%0, %1";
28270 /* Fix up an out-of-range load of a GR register. */
28271 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28272 wcgr = operands[0];
28273 operands[0] = reg;
28274 output_asm_insn ("ldr%?\t%0, %1", operands);
28276 operands[0] = wcgr;
28277 operands[1] = reg;
28278 output_asm_insn ("tmcr%?\t%0, %1", operands);
28279 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28281 return "";
28284 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28286 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28287 named arg and all anonymous args onto the stack.
28288 XXX I know the prologue shouldn't be pushing registers, but it is faster
28289 that way. */
28291 static void
28292 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28293 enum machine_mode mode,
28294 tree type,
28295 int *pretend_size,
28296 int second_time ATTRIBUTE_UNUSED)
28298 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28299 int nregs;
28301 cfun->machine->uses_anonymous_args = 1;
28302 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28304 nregs = pcum->aapcs_ncrn;
28305 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28306 nregs++;
28308 else
28309 nregs = pcum->nregs;
28311 if (nregs < NUM_ARG_REGS)
28312 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28315 /* We can't rely on the caller doing the proper promotion when
28316 using APCS or ATPCS. */
28318 static bool
28319 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28321 return !TARGET_AAPCS_BASED;
28324 static enum machine_mode
28325 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28326 enum machine_mode mode,
28327 int *punsignedp ATTRIBUTE_UNUSED,
28328 const_tree fntype ATTRIBUTE_UNUSED,
28329 int for_return ATTRIBUTE_UNUSED)
28331 if (GET_MODE_CLASS (mode) == MODE_INT
28332 && GET_MODE_SIZE (mode) < 4)
28333 return SImode;
28335 return mode;
28338 /* AAPCS based ABIs use short enums by default. */
28340 static bool
28341 arm_default_short_enums (void)
28343 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28347 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28349 static bool
28350 arm_align_anon_bitfield (void)
28352 return TARGET_AAPCS_BASED;
28356 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28358 static tree
28359 arm_cxx_guard_type (void)
28361 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28365 /* The EABI says test the least significant bit of a guard variable. */
28367 static bool
28368 arm_cxx_guard_mask_bit (void)
28370 return TARGET_AAPCS_BASED;
28374 /* The EABI specifies that all array cookies are 8 bytes long. */
28376 static tree
28377 arm_get_cookie_size (tree type)
28379 tree size;
28381 if (!TARGET_AAPCS_BASED)
28382 return default_cxx_get_cookie_size (type);
28384 size = build_int_cst (sizetype, 8);
28385 return size;
28389 /* The EABI says that array cookies should also contain the element size. */
28391 static bool
28392 arm_cookie_has_size (void)
28394 return TARGET_AAPCS_BASED;
28398 /* The EABI says constructors and destructors should return a pointer to
28399 the object constructed/destroyed. */
28401 static bool
28402 arm_cxx_cdtor_returns_this (void)
28404 return TARGET_AAPCS_BASED;
28407 /* The EABI says that an inline function may never be the key
28408 method. */
28410 static bool
28411 arm_cxx_key_method_may_be_inline (void)
28413 return !TARGET_AAPCS_BASED;
28416 static void
28417 arm_cxx_determine_class_data_visibility (tree decl)
28419 if (!TARGET_AAPCS_BASED
28420 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28421 return;
28423 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28424 is exported. However, on systems without dynamic vague linkage,
28425 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28426 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28427 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28428 else
28429 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28430 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28433 static bool
28434 arm_cxx_class_data_always_comdat (void)
28436 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28437 vague linkage if the class has no key function. */
28438 return !TARGET_AAPCS_BASED;
28442 /* The EABI says __aeabi_atexit should be used to register static
28443 destructors. */
28445 static bool
28446 arm_cxx_use_aeabi_atexit (void)
28448 return TARGET_AAPCS_BASED;
28452 void
28453 arm_set_return_address (rtx source, rtx scratch)
28455 arm_stack_offsets *offsets;
28456 HOST_WIDE_INT delta;
28457 rtx addr;
28458 unsigned long saved_regs;
28460 offsets = arm_get_frame_offsets ();
28461 saved_regs = offsets->saved_regs_mask;
28463 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28464 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28465 else
28467 if (frame_pointer_needed)
28468 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28469 else
28471 /* LR will be the first saved register. */
28472 delta = offsets->outgoing_args - (offsets->frame + 4);
28475 if (delta >= 4096)
28477 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28478 GEN_INT (delta & ~4095)));
28479 addr = scratch;
28480 delta &= 4095;
28482 else
28483 addr = stack_pointer_rtx;
28485 addr = plus_constant (Pmode, addr, delta);
28487 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28492 void
28493 thumb_set_return_address (rtx source, rtx scratch)
28495 arm_stack_offsets *offsets;
28496 HOST_WIDE_INT delta;
28497 HOST_WIDE_INT limit;
28498 int reg;
28499 rtx addr;
28500 unsigned long mask;
28502 emit_use (source);
28504 offsets = arm_get_frame_offsets ();
28505 mask = offsets->saved_regs_mask;
28506 if (mask & (1 << LR_REGNUM))
28508 limit = 1024;
28509 /* Find the saved regs. */
28510 if (frame_pointer_needed)
28512 delta = offsets->soft_frame - offsets->saved_args;
28513 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28514 if (TARGET_THUMB1)
28515 limit = 128;
28517 else
28519 delta = offsets->outgoing_args - offsets->saved_args;
28520 reg = SP_REGNUM;
28522 /* Allow for the stack frame. */
28523 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28524 delta -= 16;
28525 /* The link register is always the first saved register. */
28526 delta -= 4;
28528 /* Construct the address. */
28529 addr = gen_rtx_REG (SImode, reg);
28530 if (delta > limit)
28532 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28533 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28534 addr = scratch;
28536 else
28537 addr = plus_constant (Pmode, addr, delta);
28539 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28541 else
28542 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28545 /* Implements target hook vector_mode_supported_p. */
28546 bool
28547 arm_vector_mode_supported_p (enum machine_mode mode)
28549 /* Neon also supports V2SImode, etc. listed in the clause below. */
28550 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28551 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28552 return true;
28554 if ((TARGET_NEON || TARGET_IWMMXT)
28555 && ((mode == V2SImode)
28556 || (mode == V4HImode)
28557 || (mode == V8QImode)))
28558 return true;
28560 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28561 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28562 || mode == V2HAmode))
28563 return true;
28565 return false;
28568 /* Implements target hook array_mode_supported_p. */
28570 static bool
28571 arm_array_mode_supported_p (enum machine_mode mode,
28572 unsigned HOST_WIDE_INT nelems)
28574 if (TARGET_NEON
28575 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28576 && (nelems >= 2 && nelems <= 4))
28577 return true;
28579 return false;
28582 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28583 registers when autovectorizing for Neon, at least until multiple vector
28584 widths are supported properly by the middle-end. */
28586 static enum machine_mode
28587 arm_preferred_simd_mode (enum machine_mode mode)
28589 if (TARGET_NEON)
28590 switch (mode)
28592 case SFmode:
28593 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28594 case SImode:
28595 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28596 case HImode:
28597 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28598 case QImode:
28599 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28600 case DImode:
28601 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28602 return V2DImode;
28603 break;
28605 default:;
28608 if (TARGET_REALLY_IWMMXT)
28609 switch (mode)
28611 case SImode:
28612 return V2SImode;
28613 case HImode:
28614 return V4HImode;
28615 case QImode:
28616 return V8QImode;
28618 default:;
28621 return word_mode;
28624 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28626 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28627 using r0-r4 for function arguments, r7 for the stack frame and don't have
28628 enough left over to do doubleword arithmetic. For Thumb-2 all the
28629 potentially problematic instructions accept high registers so this is not
28630 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28631 that require many low registers. */
28632 static bool
28633 arm_class_likely_spilled_p (reg_class_t rclass)
28635 if ((TARGET_THUMB1 && rclass == LO_REGS)
28636 || rclass == CC_REG)
28637 return true;
28639 return false;
28642 /* Implements target hook small_register_classes_for_mode_p. */
28643 bool
28644 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28646 return TARGET_THUMB1;
28649 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28650 ARM insns and therefore guarantee that the shift count is modulo 256.
28651 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28652 guarantee no particular behavior for out-of-range counts. */
28654 static unsigned HOST_WIDE_INT
28655 arm_shift_truncation_mask (enum machine_mode mode)
28657 return mode == SImode ? 255 : 0;
28661 /* Map internal gcc register numbers to DWARF2 register numbers. */
28663 unsigned int
28664 arm_dbx_register_number (unsigned int regno)
28666 if (regno < 16)
28667 return regno;
28669 if (IS_VFP_REGNUM (regno))
28671 /* See comment in arm_dwarf_register_span. */
28672 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28673 return 64 + regno - FIRST_VFP_REGNUM;
28674 else
28675 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28678 if (IS_IWMMXT_GR_REGNUM (regno))
28679 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28681 if (IS_IWMMXT_REGNUM (regno))
28682 return 112 + regno - FIRST_IWMMXT_REGNUM;
28684 gcc_unreachable ();
28687 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28688 GCC models tham as 64 32-bit registers, so we need to describe this to
28689 the DWARF generation code. Other registers can use the default. */
28690 static rtx
28691 arm_dwarf_register_span (rtx rtl)
28693 enum machine_mode mode;
28694 unsigned regno;
28695 rtx parts[16];
28696 int nregs;
28697 int i;
28699 regno = REGNO (rtl);
28700 if (!IS_VFP_REGNUM (regno))
28701 return NULL_RTX;
28703 /* XXX FIXME: The EABI defines two VFP register ranges:
28704 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28705 256-287: D0-D31
28706 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28707 corresponding D register. Until GDB supports this, we shall use the
28708 legacy encodings. We also use these encodings for D0-D15 for
28709 compatibility with older debuggers. */
28710 mode = GET_MODE (rtl);
28711 if (GET_MODE_SIZE (mode) < 8)
28712 return NULL_RTX;
28714 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28716 nregs = GET_MODE_SIZE (mode) / 4;
28717 for (i = 0; i < nregs; i += 2)
28718 if (TARGET_BIG_END)
28720 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28721 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28723 else
28725 parts[i] = gen_rtx_REG (SImode, regno + i);
28726 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28729 else
28731 nregs = GET_MODE_SIZE (mode) / 8;
28732 for (i = 0; i < nregs; i++)
28733 parts[i] = gen_rtx_REG (DImode, regno + i);
28736 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28739 #if ARM_UNWIND_INFO
28740 /* Emit unwind directives for a store-multiple instruction or stack pointer
28741 push during alignment.
28742 These should only ever be generated by the function prologue code, so
28743 expect them to have a particular form.
28744 The store-multiple instruction sometimes pushes pc as the last register,
28745 although it should not be tracked into unwind information, or for -Os
28746 sometimes pushes some dummy registers before first register that needs
28747 to be tracked in unwind information; such dummy registers are there just
28748 to avoid separate stack adjustment, and will not be restored in the
28749 epilogue. */
28751 static void
28752 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28754 int i;
28755 HOST_WIDE_INT offset;
28756 HOST_WIDE_INT nregs;
28757 int reg_size;
28758 unsigned reg;
28759 unsigned lastreg;
28760 unsigned padfirst = 0, padlast = 0;
28761 rtx e;
28763 e = XVECEXP (p, 0, 0);
28764 gcc_assert (GET_CODE (e) == SET);
28766 /* First insn will adjust the stack pointer. */
28767 gcc_assert (GET_CODE (e) == SET
28768 && REG_P (SET_DEST (e))
28769 && REGNO (SET_DEST (e)) == SP_REGNUM
28770 && GET_CODE (SET_SRC (e)) == PLUS);
28772 offset = -INTVAL (XEXP (SET_SRC (e), 1));
28773 nregs = XVECLEN (p, 0) - 1;
28774 gcc_assert (nregs);
28776 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
28777 if (reg < 16)
28779 /* For -Os dummy registers can be pushed at the beginning to
28780 avoid separate stack pointer adjustment. */
28781 e = XVECEXP (p, 0, 1);
28782 e = XEXP (SET_DEST (e), 0);
28783 if (GET_CODE (e) == PLUS)
28784 padfirst = INTVAL (XEXP (e, 1));
28785 gcc_assert (padfirst == 0 || optimize_size);
28786 /* The function prologue may also push pc, but not annotate it as it is
28787 never restored. We turn this into a stack pointer adjustment. */
28788 e = XVECEXP (p, 0, nregs);
28789 e = XEXP (SET_DEST (e), 0);
28790 if (GET_CODE (e) == PLUS)
28791 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
28792 else
28793 padlast = offset - 4;
28794 gcc_assert (padlast == 0 || padlast == 4);
28795 if (padlast == 4)
28796 fprintf (asm_out_file, "\t.pad #4\n");
28797 reg_size = 4;
28798 fprintf (asm_out_file, "\t.save {");
28800 else if (IS_VFP_REGNUM (reg))
28802 reg_size = 8;
28803 fprintf (asm_out_file, "\t.vsave {");
28805 else
28806 /* Unknown register type. */
28807 gcc_unreachable ();
28809 /* If the stack increment doesn't match the size of the saved registers,
28810 something has gone horribly wrong. */
28811 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
28813 offset = padfirst;
28814 lastreg = 0;
28815 /* The remaining insns will describe the stores. */
28816 for (i = 1; i <= nregs; i++)
28818 /* Expect (set (mem <addr>) (reg)).
28819 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28820 e = XVECEXP (p, 0, i);
28821 gcc_assert (GET_CODE (e) == SET
28822 && MEM_P (SET_DEST (e))
28823 && REG_P (SET_SRC (e)));
28825 reg = REGNO (SET_SRC (e));
28826 gcc_assert (reg >= lastreg);
28828 if (i != 1)
28829 fprintf (asm_out_file, ", ");
28830 /* We can't use %r for vfp because we need to use the
28831 double precision register names. */
28832 if (IS_VFP_REGNUM (reg))
28833 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28834 else
28835 asm_fprintf (asm_out_file, "%r", reg);
28837 #ifdef ENABLE_CHECKING
28838 /* Check that the addresses are consecutive. */
28839 e = XEXP (SET_DEST (e), 0);
28840 if (GET_CODE (e) == PLUS)
28841 gcc_assert (REG_P (XEXP (e, 0))
28842 && REGNO (XEXP (e, 0)) == SP_REGNUM
28843 && CONST_INT_P (XEXP (e, 1))
28844 && offset == INTVAL (XEXP (e, 1)));
28845 else
28846 gcc_assert (i == 1
28847 && REG_P (e)
28848 && REGNO (e) == SP_REGNUM);
28849 offset += reg_size;
28850 #endif
28852 fprintf (asm_out_file, "}\n");
28853 if (padfirst)
28854 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
28857 /* Emit unwind directives for a SET. */
28859 static void
28860 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28862 rtx e0;
28863 rtx e1;
28864 unsigned reg;
28866 e0 = XEXP (p, 0);
28867 e1 = XEXP (p, 1);
28868 switch (GET_CODE (e0))
28870 case MEM:
28871 /* Pushing a single register. */
28872 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28873 || !REG_P (XEXP (XEXP (e0, 0), 0))
28874 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28875 abort ();
28877 asm_fprintf (asm_out_file, "\t.save ");
28878 if (IS_VFP_REGNUM (REGNO (e1)))
28879 asm_fprintf(asm_out_file, "{d%d}\n",
28880 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28881 else
28882 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28883 break;
28885 case REG:
28886 if (REGNO (e0) == SP_REGNUM)
28888 /* A stack increment. */
28889 if (GET_CODE (e1) != PLUS
28890 || !REG_P (XEXP (e1, 0))
28891 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28892 || !CONST_INT_P (XEXP (e1, 1)))
28893 abort ();
28895 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28896 -INTVAL (XEXP (e1, 1)));
28898 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28900 HOST_WIDE_INT offset;
28902 if (GET_CODE (e1) == PLUS)
28904 if (!REG_P (XEXP (e1, 0))
28905 || !CONST_INT_P (XEXP (e1, 1)))
28906 abort ();
28907 reg = REGNO (XEXP (e1, 0));
28908 offset = INTVAL (XEXP (e1, 1));
28909 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28910 HARD_FRAME_POINTER_REGNUM, reg,
28911 offset);
28913 else if (REG_P (e1))
28915 reg = REGNO (e1);
28916 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28917 HARD_FRAME_POINTER_REGNUM, reg);
28919 else
28920 abort ();
28922 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28924 /* Move from sp to reg. */
28925 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28927 else if (GET_CODE (e1) == PLUS
28928 && REG_P (XEXP (e1, 0))
28929 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28930 && CONST_INT_P (XEXP (e1, 1)))
28932 /* Set reg to offset from sp. */
28933 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28934 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28936 else
28937 abort ();
28938 break;
28940 default:
28941 abort ();
28946 /* Emit unwind directives for the given insn. */
28948 static void
28949 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28951 rtx note, pat;
28952 bool handled_one = false;
28954 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28955 return;
28957 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28958 && (TREE_NOTHROW (current_function_decl)
28959 || crtl->all_throwers_are_sibcalls))
28960 return;
28962 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28963 return;
28965 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28967 switch (REG_NOTE_KIND (note))
28969 case REG_FRAME_RELATED_EXPR:
28970 pat = XEXP (note, 0);
28971 goto found;
28973 case REG_CFA_REGISTER:
28974 pat = XEXP (note, 0);
28975 if (pat == NULL)
28977 pat = PATTERN (insn);
28978 if (GET_CODE (pat) == PARALLEL)
28979 pat = XVECEXP (pat, 0, 0);
28982 /* Only emitted for IS_STACKALIGN re-alignment. */
28984 rtx dest, src;
28985 unsigned reg;
28987 src = SET_SRC (pat);
28988 dest = SET_DEST (pat);
28990 gcc_assert (src == stack_pointer_rtx);
28991 reg = REGNO (dest);
28992 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28993 reg + 0x90, reg);
28995 handled_one = true;
28996 break;
28998 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28999 to get correct dwarf information for shrink-wrap. We should not
29000 emit unwind information for it because these are used either for
29001 pretend arguments or notes to adjust sp and restore registers from
29002 stack. */
29003 case REG_CFA_DEF_CFA:
29004 case REG_CFA_ADJUST_CFA:
29005 case REG_CFA_RESTORE:
29006 return;
29008 case REG_CFA_EXPRESSION:
29009 case REG_CFA_OFFSET:
29010 /* ??? Only handling here what we actually emit. */
29011 gcc_unreachable ();
29013 default:
29014 break;
29017 if (handled_one)
29018 return;
29019 pat = PATTERN (insn);
29020 found:
29022 switch (GET_CODE (pat))
29024 case SET:
29025 arm_unwind_emit_set (asm_out_file, pat);
29026 break;
29028 case SEQUENCE:
29029 /* Store multiple. */
29030 arm_unwind_emit_sequence (asm_out_file, pat);
29031 break;
29033 default:
29034 abort();
29039 /* Output a reference from a function exception table to the type_info
29040 object X. The EABI specifies that the symbol should be relocated by
29041 an R_ARM_TARGET2 relocation. */
29043 static bool
29044 arm_output_ttype (rtx x)
29046 fputs ("\t.word\t", asm_out_file);
29047 output_addr_const (asm_out_file, x);
29048 /* Use special relocations for symbol references. */
29049 if (!CONST_INT_P (x))
29050 fputs ("(TARGET2)", asm_out_file);
29051 fputc ('\n', asm_out_file);
29053 return TRUE;
29056 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29058 static void
29059 arm_asm_emit_except_personality (rtx personality)
29061 fputs ("\t.personality\t", asm_out_file);
29062 output_addr_const (asm_out_file, personality);
29063 fputc ('\n', asm_out_file);
29066 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29068 static void
29069 arm_asm_init_sections (void)
29071 exception_section = get_unnamed_section (0, output_section_asm_op,
29072 "\t.handlerdata");
29074 #endif /* ARM_UNWIND_INFO */
29076 /* Output unwind directives for the start/end of a function. */
29078 void
29079 arm_output_fn_unwind (FILE * f, bool prologue)
29081 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29082 return;
29084 if (prologue)
29085 fputs ("\t.fnstart\n", f);
29086 else
29088 /* If this function will never be unwound, then mark it as such.
29089 The came condition is used in arm_unwind_emit to suppress
29090 the frame annotations. */
29091 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29092 && (TREE_NOTHROW (current_function_decl)
29093 || crtl->all_throwers_are_sibcalls))
29094 fputs("\t.cantunwind\n", f);
29096 fputs ("\t.fnend\n", f);
29100 static bool
29101 arm_emit_tls_decoration (FILE *fp, rtx x)
29103 enum tls_reloc reloc;
29104 rtx val;
29106 val = XVECEXP (x, 0, 0);
29107 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29109 output_addr_const (fp, val);
29111 switch (reloc)
29113 case TLS_GD32:
29114 fputs ("(tlsgd)", fp);
29115 break;
29116 case TLS_LDM32:
29117 fputs ("(tlsldm)", fp);
29118 break;
29119 case TLS_LDO32:
29120 fputs ("(tlsldo)", fp);
29121 break;
29122 case TLS_IE32:
29123 fputs ("(gottpoff)", fp);
29124 break;
29125 case TLS_LE32:
29126 fputs ("(tpoff)", fp);
29127 break;
29128 case TLS_DESCSEQ:
29129 fputs ("(tlsdesc)", fp);
29130 break;
29131 default:
29132 gcc_unreachable ();
29135 switch (reloc)
29137 case TLS_GD32:
29138 case TLS_LDM32:
29139 case TLS_IE32:
29140 case TLS_DESCSEQ:
29141 fputs (" + (. - ", fp);
29142 output_addr_const (fp, XVECEXP (x, 0, 2));
29143 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29144 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29145 output_addr_const (fp, XVECEXP (x, 0, 3));
29146 fputc (')', fp);
29147 break;
29148 default:
29149 break;
29152 return TRUE;
29155 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29157 static void
29158 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29160 gcc_assert (size == 4);
29161 fputs ("\t.word\t", file);
29162 output_addr_const (file, x);
29163 fputs ("(tlsldo)", file);
29166 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29168 static bool
29169 arm_output_addr_const_extra (FILE *fp, rtx x)
29171 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29172 return arm_emit_tls_decoration (fp, x);
29173 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29175 char label[256];
29176 int labelno = INTVAL (XVECEXP (x, 0, 0));
29178 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29179 assemble_name_raw (fp, label);
29181 return TRUE;
29183 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29185 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29186 if (GOT_PCREL)
29187 fputs ("+.", fp);
29188 fputs ("-(", fp);
29189 output_addr_const (fp, XVECEXP (x, 0, 0));
29190 fputc (')', fp);
29191 return TRUE;
29193 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29195 output_addr_const (fp, XVECEXP (x, 0, 0));
29196 if (GOT_PCREL)
29197 fputs ("+.", fp);
29198 fputs ("-(", fp);
29199 output_addr_const (fp, XVECEXP (x, 0, 1));
29200 fputc (')', fp);
29201 return TRUE;
29203 else if (GET_CODE (x) == CONST_VECTOR)
29204 return arm_emit_vector_const (fp, x);
29206 return FALSE;
29209 /* Output assembly for a shift instruction.
29210 SET_FLAGS determines how the instruction modifies the condition codes.
29211 0 - Do not set condition codes.
29212 1 - Set condition codes.
29213 2 - Use smallest instruction. */
29214 const char *
29215 arm_output_shift(rtx * operands, int set_flags)
29217 char pattern[100];
29218 static const char flag_chars[3] = {'?', '.', '!'};
29219 const char *shift;
29220 HOST_WIDE_INT val;
29221 char c;
29223 c = flag_chars[set_flags];
29224 if (TARGET_UNIFIED_ASM)
29226 shift = shift_op(operands[3], &val);
29227 if (shift)
29229 if (val != -1)
29230 operands[2] = GEN_INT(val);
29231 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29233 else
29234 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29236 else
29237 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29238 output_asm_insn (pattern, operands);
29239 return "";
29242 /* Output assembly for a WMMX immediate shift instruction. */
29243 const char *
29244 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29246 int shift = INTVAL (operands[2]);
29247 char templ[50];
29248 enum machine_mode opmode = GET_MODE (operands[0]);
29250 gcc_assert (shift >= 0);
29252 /* If the shift value in the register versions is > 63 (for D qualifier),
29253 31 (for W qualifier) or 15 (for H qualifier). */
29254 if (((opmode == V4HImode) && (shift > 15))
29255 || ((opmode == V2SImode) && (shift > 31))
29256 || ((opmode == DImode) && (shift > 63)))
29258 if (wror_or_wsra)
29260 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29261 output_asm_insn (templ, operands);
29262 if (opmode == DImode)
29264 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29265 output_asm_insn (templ, operands);
29268 else
29270 /* The destination register will contain all zeros. */
29271 sprintf (templ, "wzero\t%%0");
29272 output_asm_insn (templ, operands);
29274 return "";
29277 if ((opmode == DImode) && (shift > 32))
29279 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29280 output_asm_insn (templ, operands);
29281 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29282 output_asm_insn (templ, operands);
29284 else
29286 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29287 output_asm_insn (templ, operands);
29289 return "";
29292 /* Output assembly for a WMMX tinsr instruction. */
29293 const char *
29294 arm_output_iwmmxt_tinsr (rtx *operands)
29296 int mask = INTVAL (operands[3]);
29297 int i;
29298 char templ[50];
29299 int units = mode_nunits[GET_MODE (operands[0])];
29300 gcc_assert ((mask & (mask - 1)) == 0);
29301 for (i = 0; i < units; ++i)
29303 if ((mask & 0x01) == 1)
29305 break;
29307 mask >>= 1;
29309 gcc_assert (i < units);
29311 switch (GET_MODE (operands[0]))
29313 case V8QImode:
29314 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29315 break;
29316 case V4HImode:
29317 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29318 break;
29319 case V2SImode:
29320 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29321 break;
29322 default:
29323 gcc_unreachable ();
29324 break;
29326 output_asm_insn (templ, operands);
29328 return "";
29331 /* Output a Thumb-1 casesi dispatch sequence. */
29332 const char *
29333 thumb1_output_casesi (rtx *operands)
29335 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29337 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29339 switch (GET_MODE(diff_vec))
29341 case QImode:
29342 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29343 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29344 case HImode:
29345 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29346 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29347 case SImode:
29348 return "bl\t%___gnu_thumb1_case_si";
29349 default:
29350 gcc_unreachable ();
29354 /* Output a Thumb-2 casesi instruction. */
29355 const char *
29356 thumb2_output_casesi (rtx *operands)
29358 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29360 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29362 output_asm_insn ("cmp\t%0, %1", operands);
29363 output_asm_insn ("bhi\t%l3", operands);
29364 switch (GET_MODE(diff_vec))
29366 case QImode:
29367 return "tbb\t[%|pc, %0]";
29368 case HImode:
29369 return "tbh\t[%|pc, %0, lsl #1]";
29370 case SImode:
29371 if (flag_pic)
29373 output_asm_insn ("adr\t%4, %l2", operands);
29374 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29375 output_asm_insn ("add\t%4, %4, %5", operands);
29376 return "bx\t%4";
29378 else
29380 output_asm_insn ("adr\t%4, %l2", operands);
29381 return "ldr\t%|pc, [%4, %0, lsl #2]";
29383 default:
29384 gcc_unreachable ();
29388 /* Most ARM cores are single issue, but some newer ones can dual issue.
29389 The scheduler descriptions rely on this being correct. */
29390 static int
29391 arm_issue_rate (void)
29393 switch (arm_tune)
29395 case cortexa15:
29396 case cortexa57:
29397 return 3;
29399 case cortexr4:
29400 case cortexr4f:
29401 case cortexr5:
29402 case genericv7a:
29403 case cortexa5:
29404 case cortexa7:
29405 case cortexa8:
29406 case cortexa9:
29407 case cortexa12:
29408 case cortexa53:
29409 case fa726te:
29410 case marvell_pj4:
29411 return 2;
29413 default:
29414 return 1;
29418 /* A table and a function to perform ARM-specific name mangling for
29419 NEON vector types in order to conform to the AAPCS (see "Procedure
29420 Call Standard for the ARM Architecture", Appendix A). To qualify
29421 for emission with the mangled names defined in that document, a
29422 vector type must not only be of the correct mode but also be
29423 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29424 typedef struct
29426 enum machine_mode mode;
29427 const char *element_type_name;
29428 const char *aapcs_name;
29429 } arm_mangle_map_entry;
29431 static arm_mangle_map_entry arm_mangle_map[] = {
29432 /* 64-bit containerized types. */
29433 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29434 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29435 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29436 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29437 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29438 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29439 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29440 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29441 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29442 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29444 /* 128-bit containerized types. */
29445 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29446 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29447 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29448 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29449 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29450 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29451 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29452 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29453 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29454 { VOIDmode, NULL, NULL }
29457 const char *
29458 arm_mangle_type (const_tree type)
29460 arm_mangle_map_entry *pos = arm_mangle_map;
29462 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29463 has to be managled as if it is in the "std" namespace. */
29464 if (TARGET_AAPCS_BASED
29465 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29466 return "St9__va_list";
29468 /* Half-precision float. */
29469 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29470 return "Dh";
29472 if (TREE_CODE (type) != VECTOR_TYPE)
29473 return NULL;
29475 /* Check the mode of the vector type, and the name of the vector
29476 element type, against the table. */
29477 while (pos->mode != VOIDmode)
29479 tree elt_type = TREE_TYPE (type);
29481 if (pos->mode == TYPE_MODE (type)
29482 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29483 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29484 pos->element_type_name))
29485 return pos->aapcs_name;
29487 pos++;
29490 /* Use the default mangling for unrecognized (possibly user-defined)
29491 vector types. */
29492 return NULL;
29495 /* Order of allocation of core registers for Thumb: this allocation is
29496 written over the corresponding initial entries of the array
29497 initialized with REG_ALLOC_ORDER. We allocate all low registers
29498 first. Saving and restoring a low register is usually cheaper than
29499 using a call-clobbered high register. */
29501 static const int thumb_core_reg_alloc_order[] =
29503 3, 2, 1, 0, 4, 5, 6, 7,
29504 14, 12, 8, 9, 10, 11
29507 /* Adjust register allocation order when compiling for Thumb. */
29509 void
29510 arm_order_regs_for_local_alloc (void)
29512 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29513 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29514 if (TARGET_THUMB)
29515 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29516 sizeof (thumb_core_reg_alloc_order));
29519 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29521 bool
29522 arm_frame_pointer_required (void)
29524 return (cfun->has_nonlocal_label
29525 || SUBTARGET_FRAME_POINTER_REQUIRED
29526 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29529 /* Only thumb1 can't support conditional execution, so return true if
29530 the target is not thumb1. */
29531 static bool
29532 arm_have_conditional_execution (void)
29534 return !TARGET_THUMB1;
29537 tree
29538 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29540 enum machine_mode in_mode, out_mode;
29541 int in_n, out_n;
29543 if (TREE_CODE (type_out) != VECTOR_TYPE
29544 || TREE_CODE (type_in) != VECTOR_TYPE
29545 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29546 return NULL_TREE;
29548 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29549 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29550 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29551 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29553 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29554 decl of the vectorized builtin for the appropriate vector mode.
29555 NULL_TREE is returned if no such builtin is available. */
29556 #undef ARM_CHECK_BUILTIN_MODE
29557 #define ARM_CHECK_BUILTIN_MODE(C) \
29558 (out_mode == SFmode && out_n == C \
29559 && in_mode == SFmode && in_n == C)
29561 #undef ARM_FIND_VRINT_VARIANT
29562 #define ARM_FIND_VRINT_VARIANT(N) \
29563 (ARM_CHECK_BUILTIN_MODE (2) \
29564 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29565 : (ARM_CHECK_BUILTIN_MODE (4) \
29566 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29567 : NULL_TREE))
29569 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29571 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29572 switch (fn)
29574 case BUILT_IN_FLOORF:
29575 return ARM_FIND_VRINT_VARIANT (vrintm);
29576 case BUILT_IN_CEILF:
29577 return ARM_FIND_VRINT_VARIANT (vrintp);
29578 case BUILT_IN_TRUNCF:
29579 return ARM_FIND_VRINT_VARIANT (vrintz);
29580 case BUILT_IN_ROUNDF:
29581 return ARM_FIND_VRINT_VARIANT (vrinta);
29582 default:
29583 return NULL_TREE;
29586 return NULL_TREE;
29588 #undef ARM_CHECK_BUILTIN_MODE
29589 #undef ARM_FIND_VRINT_VARIANT
29591 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29592 static HOST_WIDE_INT
29593 arm_vector_alignment (const_tree type)
29595 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29597 if (TARGET_AAPCS_BASED)
29598 align = MIN (align, 64);
29600 return align;
29603 static unsigned int
29604 arm_autovectorize_vector_sizes (void)
29606 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29609 static bool
29610 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29612 /* Vectors which aren't in packed structures will not be less aligned than
29613 the natural alignment of their element type, so this is safe. */
29614 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29615 return !is_packed;
29617 return default_builtin_vector_alignment_reachable (type, is_packed);
29620 static bool
29621 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29622 const_tree type, int misalignment,
29623 bool is_packed)
29625 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29627 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29629 if (is_packed)
29630 return align == 1;
29632 /* If the misalignment is unknown, we should be able to handle the access
29633 so long as it is not to a member of a packed data structure. */
29634 if (misalignment == -1)
29635 return true;
29637 /* Return true if the misalignment is a multiple of the natural alignment
29638 of the vector's element type. This is probably always going to be
29639 true in practice, since we've already established that this isn't a
29640 packed access. */
29641 return ((misalignment % align) == 0);
29644 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29645 is_packed);
29648 static void
29649 arm_conditional_register_usage (void)
29651 int regno;
29653 if (TARGET_THUMB1 && optimize_size)
29655 /* When optimizing for size on Thumb-1, it's better not
29656 to use the HI regs, because of the overhead of
29657 stacking them. */
29658 for (regno = FIRST_HI_REGNUM;
29659 regno <= LAST_HI_REGNUM; ++regno)
29660 fixed_regs[regno] = call_used_regs[regno] = 1;
29663 /* The link register can be clobbered by any branch insn,
29664 but we have no way to track that at present, so mark
29665 it as unavailable. */
29666 if (TARGET_THUMB1)
29667 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29669 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29671 /* VFPv3 registers are disabled when earlier VFP
29672 versions are selected due to the definition of
29673 LAST_VFP_REGNUM. */
29674 for (regno = FIRST_VFP_REGNUM;
29675 regno <= LAST_VFP_REGNUM; ++ regno)
29677 fixed_regs[regno] = 0;
29678 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29679 || regno >= FIRST_VFP_REGNUM + 32;
29683 if (TARGET_REALLY_IWMMXT)
29685 regno = FIRST_IWMMXT_GR_REGNUM;
29686 /* The 2002/10/09 revision of the XScale ABI has wCG0
29687 and wCG1 as call-preserved registers. The 2002/11/21
29688 revision changed this so that all wCG registers are
29689 scratch registers. */
29690 for (regno = FIRST_IWMMXT_GR_REGNUM;
29691 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29692 fixed_regs[regno] = 0;
29693 /* The XScale ABI has wR0 - wR9 as scratch registers,
29694 the rest as call-preserved registers. */
29695 for (regno = FIRST_IWMMXT_REGNUM;
29696 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29698 fixed_regs[regno] = 0;
29699 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29703 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29705 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29706 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29708 else if (TARGET_APCS_STACK)
29710 fixed_regs[10] = 1;
29711 call_used_regs[10] = 1;
29713 /* -mcaller-super-interworking reserves r11 for calls to
29714 _interwork_r11_call_via_rN(). Making the register global
29715 is an easy way of ensuring that it remains valid for all
29716 calls. */
29717 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29718 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29720 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29721 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29722 if (TARGET_CALLER_INTERWORKING)
29723 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29725 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29728 static reg_class_t
29729 arm_preferred_rename_class (reg_class_t rclass)
29731 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29732 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29733 and code size can be reduced. */
29734 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29735 return LO_REGS;
29736 else
29737 return NO_REGS;
29740 /* Compute the atrribute "length" of insn "*push_multi".
29741 So this function MUST be kept in sync with that insn pattern. */
29743 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29745 int i, regno, hi_reg;
29746 int num_saves = XVECLEN (parallel_op, 0);
29748 /* ARM mode. */
29749 if (TARGET_ARM)
29750 return 4;
29751 /* Thumb1 mode. */
29752 if (TARGET_THUMB1)
29753 return 2;
29755 /* Thumb2 mode. */
29756 regno = REGNO (first_op);
29757 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29758 for (i = 1; i < num_saves && !hi_reg; i++)
29760 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29761 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29764 if (!hi_reg)
29765 return 2;
29766 return 4;
29769 /* Compute the number of instructions emitted by output_move_double. */
29771 arm_count_output_move_double_insns (rtx *operands)
29773 int count;
29774 rtx ops[2];
29775 /* output_move_double may modify the operands array, so call it
29776 here on a copy of the array. */
29777 ops[0] = operands[0];
29778 ops[1] = operands[1];
29779 output_move_double (ops, false, &count);
29780 return count;
29784 vfp3_const_double_for_fract_bits (rtx operand)
29786 REAL_VALUE_TYPE r0;
29788 if (!CONST_DOUBLE_P (operand))
29789 return 0;
29791 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29792 if (exact_real_inverse (DFmode, &r0))
29794 if (exact_real_truncate (DFmode, &r0))
29796 HOST_WIDE_INT value = real_to_integer (&r0);
29797 value = value & 0xffffffff;
29798 if ((value != 0) && ( (value & (value - 1)) == 0))
29799 return int_log2 (value);
29802 return 0;
29806 vfp3_const_double_for_bits (rtx operand)
29808 REAL_VALUE_TYPE r0;
29810 if (!CONST_DOUBLE_P (operand))
29811 return 0;
29813 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29814 if (exact_real_truncate (DFmode, &r0))
29816 HOST_WIDE_INT value = real_to_integer (&r0);
29817 value = value & 0xffffffff;
29818 if ((value != 0) && ( (value & (value - 1)) == 0))
29819 return int_log2 (value);
29822 return 0;
29825 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29827 static void
29828 arm_pre_atomic_barrier (enum memmodel model)
29830 if (need_atomic_barrier_p (model, true))
29831 emit_insn (gen_memory_barrier ());
29834 static void
29835 arm_post_atomic_barrier (enum memmodel model)
29837 if (need_atomic_barrier_p (model, false))
29838 emit_insn (gen_memory_barrier ());
29841 /* Emit the load-exclusive and store-exclusive instructions.
29842 Use acquire and release versions if necessary. */
29844 static void
29845 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29847 rtx (*gen) (rtx, rtx);
29849 if (acq)
29851 switch (mode)
29853 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29854 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29855 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29856 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29857 default:
29858 gcc_unreachable ();
29861 else
29863 switch (mode)
29865 case QImode: gen = gen_arm_load_exclusiveqi; break;
29866 case HImode: gen = gen_arm_load_exclusivehi; break;
29867 case SImode: gen = gen_arm_load_exclusivesi; break;
29868 case DImode: gen = gen_arm_load_exclusivedi; break;
29869 default:
29870 gcc_unreachable ();
29874 emit_insn (gen (rval, mem));
29877 static void
29878 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29879 rtx mem, bool rel)
29881 rtx (*gen) (rtx, rtx, rtx);
29883 if (rel)
29885 switch (mode)
29887 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29888 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29889 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29890 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29891 default:
29892 gcc_unreachable ();
29895 else
29897 switch (mode)
29899 case QImode: gen = gen_arm_store_exclusiveqi; break;
29900 case HImode: gen = gen_arm_store_exclusivehi; break;
29901 case SImode: gen = gen_arm_store_exclusivesi; break;
29902 case DImode: gen = gen_arm_store_exclusivedi; break;
29903 default:
29904 gcc_unreachable ();
29908 emit_insn (gen (bval, rval, mem));
29911 /* Mark the previous jump instruction as unlikely. */
29913 static void
29914 emit_unlikely_jump (rtx insn)
29916 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29918 insn = emit_jump_insn (insn);
29919 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29922 /* Expand a compare and swap pattern. */
29924 void
29925 arm_expand_compare_and_swap (rtx operands[])
29927 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29928 enum machine_mode mode;
29929 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29931 bval = operands[0];
29932 rval = operands[1];
29933 mem = operands[2];
29934 oldval = operands[3];
29935 newval = operands[4];
29936 is_weak = operands[5];
29937 mod_s = operands[6];
29938 mod_f = operands[7];
29939 mode = GET_MODE (mem);
29941 /* Normally the succ memory model must be stronger than fail, but in the
29942 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29943 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29945 if (TARGET_HAVE_LDACQ
29946 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29947 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29948 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29950 switch (mode)
29952 case QImode:
29953 case HImode:
29954 /* For narrow modes, we're going to perform the comparison in SImode,
29955 so do the zero-extension now. */
29956 rval = gen_reg_rtx (SImode);
29957 oldval = convert_modes (SImode, mode, oldval, true);
29958 /* FALLTHRU */
29960 case SImode:
29961 /* Force the value into a register if needed. We waited until after
29962 the zero-extension above to do this properly. */
29963 if (!arm_add_operand (oldval, SImode))
29964 oldval = force_reg (SImode, oldval);
29965 break;
29967 case DImode:
29968 if (!cmpdi_operand (oldval, mode))
29969 oldval = force_reg (mode, oldval);
29970 break;
29972 default:
29973 gcc_unreachable ();
29976 switch (mode)
29978 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29979 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29980 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29981 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29982 default:
29983 gcc_unreachable ();
29986 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29988 if (mode == QImode || mode == HImode)
29989 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29991 /* In all cases, we arrange for success to be signaled by Z set.
29992 This arrangement allows for the boolean result to be used directly
29993 in a subsequent branch, post optimization. */
29994 x = gen_rtx_REG (CCmode, CC_REGNUM);
29995 x = gen_rtx_EQ (SImode, x, const0_rtx);
29996 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29999 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30000 another memory store between the load-exclusive and store-exclusive can
30001 reset the monitor from Exclusive to Open state. This means we must wait
30002 until after reload to split the pattern, lest we get a register spill in
30003 the middle of the atomic sequence. */
30005 void
30006 arm_split_compare_and_swap (rtx operands[])
30008 rtx rval, mem, oldval, newval, scratch;
30009 enum machine_mode mode;
30010 enum memmodel mod_s, mod_f;
30011 bool is_weak;
30012 rtx label1, label2, x, cond;
30014 rval = operands[0];
30015 mem = operands[1];
30016 oldval = operands[2];
30017 newval = operands[3];
30018 is_weak = (operands[4] != const0_rtx);
30019 mod_s = (enum memmodel) INTVAL (operands[5]);
30020 mod_f = (enum memmodel) INTVAL (operands[6]);
30021 scratch = operands[7];
30022 mode = GET_MODE (mem);
30024 bool use_acquire = TARGET_HAVE_LDACQ
30025 && !(mod_s == MEMMODEL_RELAXED
30026 || mod_s == MEMMODEL_CONSUME
30027 || mod_s == MEMMODEL_RELEASE);
30029 bool use_release = TARGET_HAVE_LDACQ
30030 && !(mod_s == MEMMODEL_RELAXED
30031 || mod_s == MEMMODEL_CONSUME
30032 || mod_s == MEMMODEL_ACQUIRE);
30034 /* Checks whether a barrier is needed and emits one accordingly. */
30035 if (!(use_acquire || use_release))
30036 arm_pre_atomic_barrier (mod_s);
30038 label1 = NULL_RTX;
30039 if (!is_weak)
30041 label1 = gen_label_rtx ();
30042 emit_label (label1);
30044 label2 = gen_label_rtx ();
30046 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30048 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30049 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30050 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30051 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30052 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30054 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30056 /* Weak or strong, we want EQ to be true for success, so that we
30057 match the flags that we got from the compare above. */
30058 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30059 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30060 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30062 if (!is_weak)
30064 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30065 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30066 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30067 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30070 if (mod_f != MEMMODEL_RELAXED)
30071 emit_label (label2);
30073 /* Checks whether a barrier is needed and emits one accordingly. */
30074 if (!(use_acquire || use_release))
30075 arm_post_atomic_barrier (mod_s);
30077 if (mod_f == MEMMODEL_RELAXED)
30078 emit_label (label2);
30081 void
30082 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30083 rtx value, rtx model_rtx, rtx cond)
30085 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30086 enum machine_mode mode = GET_MODE (mem);
30087 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30088 rtx label, x;
30090 bool use_acquire = TARGET_HAVE_LDACQ
30091 && !(model == MEMMODEL_RELAXED
30092 || model == MEMMODEL_CONSUME
30093 || model == MEMMODEL_RELEASE);
30095 bool use_release = TARGET_HAVE_LDACQ
30096 && !(model == MEMMODEL_RELAXED
30097 || model == MEMMODEL_CONSUME
30098 || model == MEMMODEL_ACQUIRE);
30100 /* Checks whether a barrier is needed and emits one accordingly. */
30101 if (!(use_acquire || use_release))
30102 arm_pre_atomic_barrier (model);
30104 label = gen_label_rtx ();
30105 emit_label (label);
30107 if (new_out)
30108 new_out = gen_lowpart (wmode, new_out);
30109 if (old_out)
30110 old_out = gen_lowpart (wmode, old_out);
30111 else
30112 old_out = new_out;
30113 value = simplify_gen_subreg (wmode, value, mode, 0);
30115 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30117 switch (code)
30119 case SET:
30120 new_out = value;
30121 break;
30123 case NOT:
30124 x = gen_rtx_AND (wmode, old_out, value);
30125 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30126 x = gen_rtx_NOT (wmode, new_out);
30127 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30128 break;
30130 case MINUS:
30131 if (CONST_INT_P (value))
30133 value = GEN_INT (-INTVAL (value));
30134 code = PLUS;
30136 /* FALLTHRU */
30138 case PLUS:
30139 if (mode == DImode)
30141 /* DImode plus/minus need to clobber flags. */
30142 /* The adddi3 and subdi3 patterns are incorrectly written so that
30143 they require matching operands, even when we could easily support
30144 three operands. Thankfully, this can be fixed up post-splitting,
30145 as the individual add+adc patterns do accept three operands and
30146 post-reload cprop can make these moves go away. */
30147 emit_move_insn (new_out, old_out);
30148 if (code == PLUS)
30149 x = gen_adddi3 (new_out, new_out, value);
30150 else
30151 x = gen_subdi3 (new_out, new_out, value);
30152 emit_insn (x);
30153 break;
30155 /* FALLTHRU */
30157 default:
30158 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30159 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30160 break;
30163 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30164 use_release);
30166 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30167 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30169 /* Checks whether a barrier is needed and emits one accordingly. */
30170 if (!(use_acquire || use_release))
30171 arm_post_atomic_barrier (model);
30174 #define MAX_VECT_LEN 16
30176 struct expand_vec_perm_d
30178 rtx target, op0, op1;
30179 unsigned char perm[MAX_VECT_LEN];
30180 enum machine_mode vmode;
30181 unsigned char nelt;
30182 bool one_vector_p;
30183 bool testing_p;
30186 /* Generate a variable permutation. */
30188 static void
30189 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30191 enum machine_mode vmode = GET_MODE (target);
30192 bool one_vector_p = rtx_equal_p (op0, op1);
30194 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30195 gcc_checking_assert (GET_MODE (op0) == vmode);
30196 gcc_checking_assert (GET_MODE (op1) == vmode);
30197 gcc_checking_assert (GET_MODE (sel) == vmode);
30198 gcc_checking_assert (TARGET_NEON);
30200 if (one_vector_p)
30202 if (vmode == V8QImode)
30203 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30204 else
30205 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30207 else
30209 rtx pair;
30211 if (vmode == V8QImode)
30213 pair = gen_reg_rtx (V16QImode);
30214 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30215 pair = gen_lowpart (TImode, pair);
30216 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30218 else
30220 pair = gen_reg_rtx (OImode);
30221 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30222 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30227 void
30228 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30230 enum machine_mode vmode = GET_MODE (target);
30231 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30232 bool one_vector_p = rtx_equal_p (op0, op1);
30233 rtx rmask[MAX_VECT_LEN], mask;
30235 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30236 numbering of elements for big-endian, we must reverse the order. */
30237 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30239 /* The VTBL instruction does not use a modulo index, so we must take care
30240 of that ourselves. */
30241 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30242 for (i = 0; i < nelt; ++i)
30243 rmask[i] = mask;
30244 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30245 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30247 arm_expand_vec_perm_1 (target, op0, op1, sel);
30250 /* Generate or test for an insn that supports a constant permutation. */
30252 /* Recognize patterns for the VUZP insns. */
30254 static bool
30255 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30257 unsigned int i, odd, mask, nelt = d->nelt;
30258 rtx out0, out1, in0, in1, x;
30259 rtx (*gen)(rtx, rtx, rtx, rtx);
30261 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30262 return false;
30264 /* Note that these are little-endian tests. Adjust for big-endian later. */
30265 if (d->perm[0] == 0)
30266 odd = 0;
30267 else if (d->perm[0] == 1)
30268 odd = 1;
30269 else
30270 return false;
30271 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30273 for (i = 0; i < nelt; i++)
30275 unsigned elt = (i * 2 + odd) & mask;
30276 if (d->perm[i] != elt)
30277 return false;
30280 /* Success! */
30281 if (d->testing_p)
30282 return true;
30284 switch (d->vmode)
30286 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30287 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30288 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30289 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30290 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30291 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30292 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30293 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30294 default:
30295 gcc_unreachable ();
30298 in0 = d->op0;
30299 in1 = d->op1;
30300 if (BYTES_BIG_ENDIAN)
30302 x = in0, in0 = in1, in1 = x;
30303 odd = !odd;
30306 out0 = d->target;
30307 out1 = gen_reg_rtx (d->vmode);
30308 if (odd)
30309 x = out0, out0 = out1, out1 = x;
30311 emit_insn (gen (out0, in0, in1, out1));
30312 return true;
30315 /* Recognize patterns for the VZIP insns. */
30317 static bool
30318 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30320 unsigned int i, high, mask, nelt = d->nelt;
30321 rtx out0, out1, in0, in1, x;
30322 rtx (*gen)(rtx, rtx, rtx, rtx);
30324 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30325 return false;
30327 /* Note that these are little-endian tests. Adjust for big-endian later. */
30328 high = nelt / 2;
30329 if (d->perm[0] == high)
30331 else if (d->perm[0] == 0)
30332 high = 0;
30333 else
30334 return false;
30335 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30337 for (i = 0; i < nelt / 2; i++)
30339 unsigned elt = (i + high) & mask;
30340 if (d->perm[i * 2] != elt)
30341 return false;
30342 elt = (elt + nelt) & mask;
30343 if (d->perm[i * 2 + 1] != elt)
30344 return false;
30347 /* Success! */
30348 if (d->testing_p)
30349 return true;
30351 switch (d->vmode)
30353 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30354 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30355 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30356 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30357 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30358 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30359 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30360 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30361 default:
30362 gcc_unreachable ();
30365 in0 = d->op0;
30366 in1 = d->op1;
30367 if (BYTES_BIG_ENDIAN)
30369 x = in0, in0 = in1, in1 = x;
30370 high = !high;
30373 out0 = d->target;
30374 out1 = gen_reg_rtx (d->vmode);
30375 if (high)
30376 x = out0, out0 = out1, out1 = x;
30378 emit_insn (gen (out0, in0, in1, out1));
30379 return true;
30382 /* Recognize patterns for the VREV insns. */
30384 static bool
30385 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30387 unsigned int i, j, diff, nelt = d->nelt;
30388 rtx (*gen)(rtx, rtx, rtx);
30390 if (!d->one_vector_p)
30391 return false;
30393 diff = d->perm[0];
30394 switch (diff)
30396 case 7:
30397 switch (d->vmode)
30399 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30400 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30401 default:
30402 return false;
30404 break;
30405 case 3:
30406 switch (d->vmode)
30408 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30409 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30410 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30411 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30412 default:
30413 return false;
30415 break;
30416 case 1:
30417 switch (d->vmode)
30419 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30420 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30421 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30422 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30423 case V4SImode: gen = gen_neon_vrev64v4si; break;
30424 case V2SImode: gen = gen_neon_vrev64v2si; break;
30425 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30426 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30427 default:
30428 return false;
30430 break;
30431 default:
30432 return false;
30435 for (i = 0; i < nelt ; i += diff + 1)
30436 for (j = 0; j <= diff; j += 1)
30438 /* This is guaranteed to be true as the value of diff
30439 is 7, 3, 1 and we should have enough elements in the
30440 queue to generate this. Getting a vector mask with a
30441 value of diff other than these values implies that
30442 something is wrong by the time we get here. */
30443 gcc_assert (i + j < nelt);
30444 if (d->perm[i + j] != i + diff - j)
30445 return false;
30448 /* Success! */
30449 if (d->testing_p)
30450 return true;
30452 /* ??? The third operand is an artifact of the builtin infrastructure
30453 and is ignored by the actual instruction. */
30454 emit_insn (gen (d->target, d->op0, const0_rtx));
30455 return true;
30458 /* Recognize patterns for the VTRN insns. */
30460 static bool
30461 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30463 unsigned int i, odd, mask, nelt = d->nelt;
30464 rtx out0, out1, in0, in1, x;
30465 rtx (*gen)(rtx, rtx, rtx, rtx);
30467 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30468 return false;
30470 /* Note that these are little-endian tests. Adjust for big-endian later. */
30471 if (d->perm[0] == 0)
30472 odd = 0;
30473 else if (d->perm[0] == 1)
30474 odd = 1;
30475 else
30476 return false;
30477 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30479 for (i = 0; i < nelt; i += 2)
30481 if (d->perm[i] != i + odd)
30482 return false;
30483 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30484 return false;
30487 /* Success! */
30488 if (d->testing_p)
30489 return true;
30491 switch (d->vmode)
30493 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30494 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30495 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30496 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30497 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30498 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30499 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30500 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30501 default:
30502 gcc_unreachable ();
30505 in0 = d->op0;
30506 in1 = d->op1;
30507 if (BYTES_BIG_ENDIAN)
30509 x = in0, in0 = in1, in1 = x;
30510 odd = !odd;
30513 out0 = d->target;
30514 out1 = gen_reg_rtx (d->vmode);
30515 if (odd)
30516 x = out0, out0 = out1, out1 = x;
30518 emit_insn (gen (out0, in0, in1, out1));
30519 return true;
30522 /* Recognize patterns for the VEXT insns. */
30524 static bool
30525 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30527 unsigned int i, nelt = d->nelt;
30528 rtx (*gen) (rtx, rtx, rtx, rtx);
30529 rtx offset;
30531 unsigned int location;
30533 unsigned int next = d->perm[0] + 1;
30535 /* TODO: Handle GCC's numbering of elements for big-endian. */
30536 if (BYTES_BIG_ENDIAN)
30537 return false;
30539 /* Check if the extracted indexes are increasing by one. */
30540 for (i = 1; i < nelt; next++, i++)
30542 /* If we hit the most significant element of the 2nd vector in
30543 the previous iteration, no need to test further. */
30544 if (next == 2 * nelt)
30545 return false;
30547 /* If we are operating on only one vector: it could be a
30548 rotation. If there are only two elements of size < 64, let
30549 arm_evpc_neon_vrev catch it. */
30550 if (d->one_vector_p && (next == nelt))
30552 if ((nelt == 2) && (d->vmode != V2DImode))
30553 return false;
30554 else
30555 next = 0;
30558 if (d->perm[i] != next)
30559 return false;
30562 location = d->perm[0];
30564 switch (d->vmode)
30566 case V16QImode: gen = gen_neon_vextv16qi; break;
30567 case V8QImode: gen = gen_neon_vextv8qi; break;
30568 case V4HImode: gen = gen_neon_vextv4hi; break;
30569 case V8HImode: gen = gen_neon_vextv8hi; break;
30570 case V2SImode: gen = gen_neon_vextv2si; break;
30571 case V4SImode: gen = gen_neon_vextv4si; break;
30572 case V2SFmode: gen = gen_neon_vextv2sf; break;
30573 case V4SFmode: gen = gen_neon_vextv4sf; break;
30574 case V2DImode: gen = gen_neon_vextv2di; break;
30575 default:
30576 return false;
30579 /* Success! */
30580 if (d->testing_p)
30581 return true;
30583 offset = GEN_INT (location);
30584 emit_insn (gen (d->target, d->op0, d->op1, offset));
30585 return true;
30588 /* The NEON VTBL instruction is a fully variable permuation that's even
30589 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30590 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30591 can do slightly better by expanding this as a constant where we don't
30592 have to apply a mask. */
30594 static bool
30595 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30597 rtx rperm[MAX_VECT_LEN], sel;
30598 enum machine_mode vmode = d->vmode;
30599 unsigned int i, nelt = d->nelt;
30601 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30602 numbering of elements for big-endian, we must reverse the order. */
30603 if (BYTES_BIG_ENDIAN)
30604 return false;
30606 if (d->testing_p)
30607 return true;
30609 /* Generic code will try constant permutation twice. Once with the
30610 original mode and again with the elements lowered to QImode.
30611 So wait and don't do the selector expansion ourselves. */
30612 if (vmode != V8QImode && vmode != V16QImode)
30613 return false;
30615 for (i = 0; i < nelt; ++i)
30616 rperm[i] = GEN_INT (d->perm[i]);
30617 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30618 sel = force_reg (vmode, sel);
30620 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30621 return true;
30624 static bool
30625 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30627 /* Check if the input mask matches vext before reordering the
30628 operands. */
30629 if (TARGET_NEON)
30630 if (arm_evpc_neon_vext (d))
30631 return true;
30633 /* The pattern matching functions above are written to look for a small
30634 number to begin the sequence (0, 1, N/2). If we begin with an index
30635 from the second operand, we can swap the operands. */
30636 if (d->perm[0] >= d->nelt)
30638 unsigned i, nelt = d->nelt;
30639 rtx x;
30641 for (i = 0; i < nelt; ++i)
30642 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30644 x = d->op0;
30645 d->op0 = d->op1;
30646 d->op1 = x;
30649 if (TARGET_NEON)
30651 if (arm_evpc_neon_vuzp (d))
30652 return true;
30653 if (arm_evpc_neon_vzip (d))
30654 return true;
30655 if (arm_evpc_neon_vrev (d))
30656 return true;
30657 if (arm_evpc_neon_vtrn (d))
30658 return true;
30659 return arm_evpc_neon_vtbl (d);
30661 return false;
30664 /* Expand a vec_perm_const pattern. */
30666 bool
30667 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30669 struct expand_vec_perm_d d;
30670 int i, nelt, which;
30672 d.target = target;
30673 d.op0 = op0;
30674 d.op1 = op1;
30676 d.vmode = GET_MODE (target);
30677 gcc_assert (VECTOR_MODE_P (d.vmode));
30678 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30679 d.testing_p = false;
30681 for (i = which = 0; i < nelt; ++i)
30683 rtx e = XVECEXP (sel, 0, i);
30684 int ei = INTVAL (e) & (2 * nelt - 1);
30685 which |= (ei < nelt ? 1 : 2);
30686 d.perm[i] = ei;
30689 switch (which)
30691 default:
30692 gcc_unreachable();
30694 case 3:
30695 d.one_vector_p = false;
30696 if (!rtx_equal_p (op0, op1))
30697 break;
30699 /* The elements of PERM do not suggest that only the first operand
30700 is used, but both operands are identical. Allow easier matching
30701 of the permutation by folding the permutation into the single
30702 input vector. */
30703 /* FALLTHRU */
30704 case 2:
30705 for (i = 0; i < nelt; ++i)
30706 d.perm[i] &= nelt - 1;
30707 d.op0 = op1;
30708 d.one_vector_p = true;
30709 break;
30711 case 1:
30712 d.op1 = op0;
30713 d.one_vector_p = true;
30714 break;
30717 return arm_expand_vec_perm_const_1 (&d);
30720 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30722 static bool
30723 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30724 const unsigned char *sel)
30726 struct expand_vec_perm_d d;
30727 unsigned int i, nelt, which;
30728 bool ret;
30730 d.vmode = vmode;
30731 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30732 d.testing_p = true;
30733 memcpy (d.perm, sel, nelt);
30735 /* Categorize the set of elements in the selector. */
30736 for (i = which = 0; i < nelt; ++i)
30738 unsigned char e = d.perm[i];
30739 gcc_assert (e < 2 * nelt);
30740 which |= (e < nelt ? 1 : 2);
30743 /* For all elements from second vector, fold the elements to first. */
30744 if (which == 2)
30745 for (i = 0; i < nelt; ++i)
30746 d.perm[i] -= nelt;
30748 /* Check whether the mask can be applied to the vector type. */
30749 d.one_vector_p = (which != 3);
30751 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30752 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30753 if (!d.one_vector_p)
30754 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30756 start_sequence ();
30757 ret = arm_expand_vec_perm_const_1 (&d);
30758 end_sequence ();
30760 return ret;
30763 bool
30764 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30766 /* If we are soft float and we do not have ldrd
30767 then all auto increment forms are ok. */
30768 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30769 return true;
30771 switch (code)
30773 /* Post increment and Pre Decrement are supported for all
30774 instruction forms except for vector forms. */
30775 case ARM_POST_INC:
30776 case ARM_PRE_DEC:
30777 if (VECTOR_MODE_P (mode))
30779 if (code != ARM_PRE_DEC)
30780 return true;
30781 else
30782 return false;
30785 return true;
30787 case ARM_POST_DEC:
30788 case ARM_PRE_INC:
30789 /* Without LDRD and mode size greater than
30790 word size, there is no point in auto-incrementing
30791 because ldm and stm will not have these forms. */
30792 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30793 return false;
30795 /* Vector and floating point modes do not support
30796 these auto increment forms. */
30797 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30798 return false;
30800 return true;
30802 default:
30803 return false;
30807 return false;
30810 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30811 on ARM, since we know that shifts by negative amounts are no-ops.
30812 Additionally, the default expansion code is not available or suitable
30813 for post-reload insn splits (this can occur when the register allocator
30814 chooses not to do a shift in NEON).
30816 This function is used in both initial expand and post-reload splits, and
30817 handles all kinds of 64-bit shifts.
30819 Input requirements:
30820 - It is safe for the input and output to be the same register, but
30821 early-clobber rules apply for the shift amount and scratch registers.
30822 - Shift by register requires both scratch registers. In all other cases
30823 the scratch registers may be NULL.
30824 - Ashiftrt by a register also clobbers the CC register. */
30825 void
30826 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30827 rtx amount, rtx scratch1, rtx scratch2)
30829 rtx out_high = gen_highpart (SImode, out);
30830 rtx out_low = gen_lowpart (SImode, out);
30831 rtx in_high = gen_highpart (SImode, in);
30832 rtx in_low = gen_lowpart (SImode, in);
30834 /* Terminology:
30835 in = the register pair containing the input value.
30836 out = the destination register pair.
30837 up = the high- or low-part of each pair.
30838 down = the opposite part to "up".
30839 In a shift, we can consider bits to shift from "up"-stream to
30840 "down"-stream, so in a left-shift "up" is the low-part and "down"
30841 is the high-part of each register pair. */
30843 rtx out_up = code == ASHIFT ? out_low : out_high;
30844 rtx out_down = code == ASHIFT ? out_high : out_low;
30845 rtx in_up = code == ASHIFT ? in_low : in_high;
30846 rtx in_down = code == ASHIFT ? in_high : in_low;
30848 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30849 gcc_assert (out
30850 && (REG_P (out) || GET_CODE (out) == SUBREG)
30851 && GET_MODE (out) == DImode);
30852 gcc_assert (in
30853 && (REG_P (in) || GET_CODE (in) == SUBREG)
30854 && GET_MODE (in) == DImode);
30855 gcc_assert (amount
30856 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30857 && GET_MODE (amount) == SImode)
30858 || CONST_INT_P (amount)));
30859 gcc_assert (scratch1 == NULL
30860 || (GET_CODE (scratch1) == SCRATCH)
30861 || (GET_MODE (scratch1) == SImode
30862 && REG_P (scratch1)));
30863 gcc_assert (scratch2 == NULL
30864 || (GET_CODE (scratch2) == SCRATCH)
30865 || (GET_MODE (scratch2) == SImode
30866 && REG_P (scratch2)));
30867 gcc_assert (!REG_P (out) || !REG_P (amount)
30868 || !HARD_REGISTER_P (out)
30869 || (REGNO (out) != REGNO (amount)
30870 && REGNO (out) + 1 != REGNO (amount)));
30872 /* Macros to make following code more readable. */
30873 #define SUB_32(DEST,SRC) \
30874 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30875 #define RSB_32(DEST,SRC) \
30876 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30877 #define SUB_S_32(DEST,SRC) \
30878 gen_addsi3_compare0 ((DEST), (SRC), \
30879 GEN_INT (-32))
30880 #define SET(DEST,SRC) \
30881 gen_rtx_SET (SImode, (DEST), (SRC))
30882 #define SHIFT(CODE,SRC,AMOUNT) \
30883 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30884 #define LSHIFT(CODE,SRC,AMOUNT) \
30885 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30886 SImode, (SRC), (AMOUNT))
30887 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30888 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30889 SImode, (SRC), (AMOUNT))
30890 #define ORR(A,B) \
30891 gen_rtx_IOR (SImode, (A), (B))
30892 #define BRANCH(COND,LABEL) \
30893 gen_arm_cond_branch ((LABEL), \
30894 gen_rtx_ ## COND (CCmode, cc_reg, \
30895 const0_rtx), \
30896 cc_reg)
30898 /* Shifts by register and shifts by constant are handled separately. */
30899 if (CONST_INT_P (amount))
30901 /* We have a shift-by-constant. */
30903 /* First, handle out-of-range shift amounts.
30904 In both cases we try to match the result an ARM instruction in a
30905 shift-by-register would give. This helps reduce execution
30906 differences between optimization levels, but it won't stop other
30907 parts of the compiler doing different things. This is "undefined
30908 behaviour, in any case. */
30909 if (INTVAL (amount) <= 0)
30910 emit_insn (gen_movdi (out, in));
30911 else if (INTVAL (amount) >= 64)
30913 if (code == ASHIFTRT)
30915 rtx const31_rtx = GEN_INT (31);
30916 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30917 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30919 else
30920 emit_insn (gen_movdi (out, const0_rtx));
30923 /* Now handle valid shifts. */
30924 else if (INTVAL (amount) < 32)
30926 /* Shifts by a constant less than 32. */
30927 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30929 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30930 emit_insn (SET (out_down,
30931 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30932 out_down)));
30933 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30935 else
30937 /* Shifts by a constant greater than 31. */
30938 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30940 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30941 if (code == ASHIFTRT)
30942 emit_insn (gen_ashrsi3 (out_up, in_up,
30943 GEN_INT (31)));
30944 else
30945 emit_insn (SET (out_up, const0_rtx));
30948 else
30950 /* We have a shift-by-register. */
30951 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30953 /* This alternative requires the scratch registers. */
30954 gcc_assert (scratch1 && REG_P (scratch1));
30955 gcc_assert (scratch2 && REG_P (scratch2));
30957 /* We will need the values "amount-32" and "32-amount" later.
30958 Swapping them around now allows the later code to be more general. */
30959 switch (code)
30961 case ASHIFT:
30962 emit_insn (SUB_32 (scratch1, amount));
30963 emit_insn (RSB_32 (scratch2, amount));
30964 break;
30965 case ASHIFTRT:
30966 emit_insn (RSB_32 (scratch1, amount));
30967 /* Also set CC = amount > 32. */
30968 emit_insn (SUB_S_32 (scratch2, amount));
30969 break;
30970 case LSHIFTRT:
30971 emit_insn (RSB_32 (scratch1, amount));
30972 emit_insn (SUB_32 (scratch2, amount));
30973 break;
30974 default:
30975 gcc_unreachable ();
30978 /* Emit code like this:
30980 arithmetic-left:
30981 out_down = in_down << amount;
30982 out_down = (in_up << (amount - 32)) | out_down;
30983 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30984 out_up = in_up << amount;
30986 arithmetic-right:
30987 out_down = in_down >> amount;
30988 out_down = (in_up << (32 - amount)) | out_down;
30989 if (amount < 32)
30990 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30991 out_up = in_up << amount;
30993 logical-right:
30994 out_down = in_down >> amount;
30995 out_down = (in_up << (32 - amount)) | out_down;
30996 if (amount < 32)
30997 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30998 out_up = in_up << amount;
31000 The ARM and Thumb2 variants are the same but implemented slightly
31001 differently. If this were only called during expand we could just
31002 use the Thumb2 case and let combine do the right thing, but this
31003 can also be called from post-reload splitters. */
31005 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31007 if (!TARGET_THUMB2)
31009 /* Emit code for ARM mode. */
31010 emit_insn (SET (out_down,
31011 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31012 if (code == ASHIFTRT)
31014 rtx done_label = gen_label_rtx ();
31015 emit_jump_insn (BRANCH (LT, done_label));
31016 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31017 out_down)));
31018 emit_label (done_label);
31020 else
31021 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31022 out_down)));
31024 else
31026 /* Emit code for Thumb2 mode.
31027 Thumb2 can't do shift and or in one insn. */
31028 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31029 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31031 if (code == ASHIFTRT)
31033 rtx done_label = gen_label_rtx ();
31034 emit_jump_insn (BRANCH (LT, done_label));
31035 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31036 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31037 emit_label (done_label);
31039 else
31041 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31042 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31046 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31049 #undef SUB_32
31050 #undef RSB_32
31051 #undef SUB_S_32
31052 #undef SET
31053 #undef SHIFT
31054 #undef LSHIFT
31055 #undef REV_LSHIFT
31056 #undef ORR
31057 #undef BRANCH
31061 /* Returns true if a valid comparison operation and makes
31062 the operands in a form that is valid. */
31063 bool
31064 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31066 enum rtx_code code = GET_CODE (*comparison);
31067 int code_int;
31068 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31069 ? GET_MODE (*op2) : GET_MODE (*op1);
31071 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31073 if (code == UNEQ || code == LTGT)
31074 return false;
31076 code_int = (int)code;
31077 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31078 PUT_CODE (*comparison, (enum rtx_code)code_int);
31080 switch (mode)
31082 case SImode:
31083 if (!arm_add_operand (*op1, mode))
31084 *op1 = force_reg (mode, *op1);
31085 if (!arm_add_operand (*op2, mode))
31086 *op2 = force_reg (mode, *op2);
31087 return true;
31089 case DImode:
31090 if (!cmpdi_operand (*op1, mode))
31091 *op1 = force_reg (mode, *op1);
31092 if (!cmpdi_operand (*op2, mode))
31093 *op2 = force_reg (mode, *op2);
31094 return true;
31096 case SFmode:
31097 case DFmode:
31098 if (!arm_float_compare_operand (*op1, mode))
31099 *op1 = force_reg (mode, *op1);
31100 if (!arm_float_compare_operand (*op2, mode))
31101 *op2 = force_reg (mode, *op2);
31102 return true;
31103 default:
31104 break;
31107 return false;
31111 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31113 static unsigned HOST_WIDE_INT
31114 arm_asan_shadow_offset (void)
31116 return (unsigned HOST_WIDE_INT) 1 << 29;
31119 #include "gt-arm.h"