Merge from trunk:
[official-gcc.git] / main / gcc / config / arm / arm.c
blob4c8b2c2ff97d3f246ce4c5750e2ece3fd8f61959
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "sched-int.h"
54 #include "target-def.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "df.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "gimple-expr.h"
64 #include "builtins.h"
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode;
68 typedef struct minipool_fixup Mfix;
70 void (*arm_lang_output_object_attributes_hook)(void);
72 struct four_ints
74 int i[4];
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets *arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
85 HOST_WIDE_INT, rtx, rtx, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx, int);
88 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
89 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
90 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
91 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
92 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
93 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
94 inline static int thumb1_index_register_rtx_p (rtx, int);
95 static int thumb_far_jump_used_p (void);
96 static bool thumb_force_lr_save (void);
97 static unsigned arm_size_return_regs (void);
98 static bool arm_assemble_integer (rtx, unsigned int, int);
99 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
100 static void arm_print_operand (FILE *, rtx, int);
101 static void arm_print_operand_address (FILE *, rtx);
102 static bool arm_print_operand_punct_valid_p (unsigned char code);
103 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
104 static arm_cc get_arm_condition_code (rtx);
105 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
106 static const char *output_multi_immediate (rtx *, const char *, const char *,
107 int, HOST_WIDE_INT);
108 static const char *shift_op (rtx, HOST_WIDE_INT *);
109 static struct machine_function *arm_init_machine_status (void);
110 static void thumb_exit (FILE *, int);
111 static HOST_WIDE_INT get_jump_table_size (rtx);
112 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
113 static Mnode *add_minipool_forward_ref (Mfix *);
114 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_backward_ref (Mfix *);
116 static void assign_minipool_offsets (Mfix *);
117 static void arm_print_value (FILE *, rtx);
118 static void dump_minipool (rtx);
119 static int arm_barrier_cost (rtx);
120 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
121 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
122 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
123 rtx);
124 static void arm_reorg (void);
125 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
126 static unsigned long arm_compute_save_reg0_reg12_mask (void);
127 static unsigned long arm_compute_save_reg_mask (void);
128 static unsigned long arm_isr_value (tree);
129 static unsigned long arm_compute_func_type (void);
130 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
131 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
133 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
134 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
135 #endif
136 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
137 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
138 static int arm_comp_type_attributes (const_tree, const_tree);
139 static void arm_set_default_type_attributes (tree);
140 static int arm_adjust_cost (rtx, rtx, rtx, int);
141 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
142 static int optimal_immediate_sequence (enum rtx_code code,
143 unsigned HOST_WIDE_INT val,
144 struct four_ints *return_sequence);
145 static int optimal_immediate_sequence_1 (enum rtx_code code,
146 unsigned HOST_WIDE_INT val,
147 struct four_ints *return_sequence,
148 int i);
149 static int arm_get_strip_length (int);
150 static bool arm_function_ok_for_sibcall (tree, tree);
151 static enum machine_mode arm_promote_function_mode (const_tree,
152 enum machine_mode, int *,
153 const_tree, int);
154 static bool arm_return_in_memory (const_tree, const_tree);
155 static rtx arm_function_value (const_tree, const_tree, bool);
156 static rtx arm_libcall_value_1 (enum machine_mode);
157 static rtx arm_libcall_value (enum machine_mode, const_rtx);
158 static bool arm_function_value_regno_p (const unsigned int);
159 static void arm_internal_label (FILE *, const char *, unsigned long);
160 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
161 tree);
162 static bool arm_have_conditional_execution (void);
163 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
164 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
165 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
166 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
167 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
168 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
172 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
175 static void arm_init_builtins (void);
176 static void arm_init_iwmmxt_builtins (void);
177 static rtx safe_vector_operand (rtx, enum machine_mode);
178 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
179 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
180 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
181 static tree arm_builtin_decl (unsigned, bool);
182 static void emit_constant_insn (rtx cond, rtx pattern);
183 static rtx emit_set_insn (rtx, rtx);
184 static rtx emit_multi_reg_push (unsigned long, unsigned long);
185 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
186 tree, bool);
187 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
188 const_tree, bool);
189 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
190 const_tree, bool);
191 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
192 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
193 const_tree);
194 static rtx aapcs_libcall_value (enum machine_mode);
195 static int aapcs_select_return_coproc (const_tree, const_tree);
197 #ifdef OBJECT_FORMAT_ELF
198 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
199 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
200 #endif
201 #ifndef ARM_PE
202 static void arm_encode_section_info (tree, rtx, int);
203 #endif
205 static void arm_file_end (void);
206 static void arm_file_start (void);
208 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 enum machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 static void arm_asm_init_sections (void);
223 #endif
224 static rtx arm_dwarf_register_span (rtx);
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
241 static bool arm_cannot_copy_insn_p (rtx);
242 static int arm_issue_rate (void);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
244 static bool arm_output_addr_const_extra (FILE *, rtx);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree);
247 static const char *arm_invalid_parameter_type (const_tree t);
248 static const char *arm_invalid_return_type (const_tree t);
249 static tree arm_promoted_type (const_tree t);
250 static tree arm_convert_to_type (tree type, tree expr);
251 static bool arm_scalar_mode_supported_p (enum machine_mode);
252 static bool arm_frame_pointer_required (void);
253 static bool arm_can_eliminate (const int, const int);
254 static void arm_asm_trampoline_template (FILE *);
255 static void arm_trampoline_init (rtx, tree, rtx);
256 static rtx arm_trampoline_adjust_address (rtx);
257 static rtx arm_pic_static_addr (rtx orig, rtx reg);
258 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
259 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
260 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
261 static bool arm_array_mode_supported_p (enum machine_mode,
262 unsigned HOST_WIDE_INT);
263 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
264 static bool arm_class_likely_spilled_p (reg_class_t);
265 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
266 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
267 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
268 const_tree type,
269 int misalignment,
270 bool is_packed);
271 static void arm_conditional_register_usage (void);
272 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
273 static unsigned int arm_autovectorize_vector_sizes (void);
274 static int arm_default_branch_cost (bool, bool);
275 static int arm_cortex_a5_branch_cost (bool, bool);
276 static int arm_cortex_m_branch_cost (bool, bool);
278 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
279 const unsigned char *sel);
281 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
282 tree vectype,
283 int misalign ATTRIBUTE_UNUSED);
284 static unsigned arm_add_stmt_cost (void *data, int count,
285 enum vect_cost_for_stmt kind,
286 struct _stmt_vec_info *stmt_info,
287 int misalign,
288 enum vect_cost_model_location where);
290 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
291 bool op0_preserve_value);
292 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
294 /* Table of machine attributes. */
295 static const struct attribute_spec arm_attribute_table[] =
297 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
298 affects_type_identity } */
299 /* Function calls made to this symbol must be done indirectly, because
300 it may lie outside of the 26 bit addressing range of a normal function
301 call. */
302 { "long_call", 0, 0, false, true, true, NULL, false },
303 /* Whereas these functions are always known to reside within the 26 bit
304 addressing range. */
305 { "short_call", 0, 0, false, true, true, NULL, false },
306 /* Specify the procedure call conventions for a function. */
307 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
308 false },
309 /* Interrupt Service Routines have special prologue and epilogue requirements. */
310 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
311 false },
312 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
313 false },
314 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
315 false },
316 #ifdef ARM_PE
317 /* ARM/PE has three new attributes:
318 interfacearm - ?
319 dllexport - for exporting a function/variable that will live in a dll
320 dllimport - for importing a function/variable from a dll
322 Microsoft allows multiple declspecs in one __declspec, separating
323 them with spaces. We do NOT support this. Instead, use __declspec
324 multiple times.
326 { "dllimport", 0, 0, true, false, false, NULL, false },
327 { "dllexport", 0, 0, true, false, false, NULL, false },
328 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
329 false },
330 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
332 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
333 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
334 false },
335 #endif
336 { NULL, 0, 0, false, false, false, NULL, false }
339 /* Initialize the GCC target structure. */
340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 #undef TARGET_MERGE_DECL_ATTRIBUTES
342 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
343 #endif
345 #undef TARGET_LEGITIMIZE_ADDRESS
346 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
348 #undef TARGET_LRA_P
349 #define TARGET_LRA_P arm_lra_p
351 #undef TARGET_ATTRIBUTE_TABLE
352 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
354 #undef TARGET_ASM_FILE_START
355 #define TARGET_ASM_FILE_START arm_file_start
356 #undef TARGET_ASM_FILE_END
357 #define TARGET_ASM_FILE_END arm_file_end
359 #undef TARGET_ASM_ALIGNED_SI_OP
360 #define TARGET_ASM_ALIGNED_SI_OP NULL
361 #undef TARGET_ASM_INTEGER
362 #define TARGET_ASM_INTEGER arm_assemble_integer
364 #undef TARGET_PRINT_OPERAND
365 #define TARGET_PRINT_OPERAND arm_print_operand
366 #undef TARGET_PRINT_OPERAND_ADDRESS
367 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
368 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
369 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
371 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
372 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
374 #undef TARGET_ASM_FUNCTION_PROLOGUE
375 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
377 #undef TARGET_ASM_FUNCTION_EPILOGUE
378 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
380 #undef TARGET_OPTION_OVERRIDE
381 #define TARGET_OPTION_OVERRIDE arm_option_override
383 #undef TARGET_COMP_TYPE_ATTRIBUTES
384 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
386 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
387 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
389 #undef TARGET_SCHED_ADJUST_COST
390 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
392 #undef TARGET_SCHED_REORDER
393 #define TARGET_SCHED_REORDER arm_sched_reorder
395 #undef TARGET_REGISTER_MOVE_COST
396 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
398 #undef TARGET_MEMORY_MOVE_COST
399 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
401 #undef TARGET_ENCODE_SECTION_INFO
402 #ifdef ARM_PE
403 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
404 #else
405 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
406 #endif
408 #undef TARGET_STRIP_NAME_ENCODING
409 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
411 #undef TARGET_ASM_INTERNAL_LABEL
412 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
414 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
415 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
417 #undef TARGET_FUNCTION_VALUE
418 #define TARGET_FUNCTION_VALUE arm_function_value
420 #undef TARGET_LIBCALL_VALUE
421 #define TARGET_LIBCALL_VALUE arm_libcall_value
423 #undef TARGET_FUNCTION_VALUE_REGNO_P
424 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
426 #undef TARGET_ASM_OUTPUT_MI_THUNK
427 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
428 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
429 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
431 #undef TARGET_RTX_COSTS
432 #define TARGET_RTX_COSTS arm_rtx_costs
433 #undef TARGET_ADDRESS_COST
434 #define TARGET_ADDRESS_COST arm_address_cost
436 #undef TARGET_SHIFT_TRUNCATION_MASK
437 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
438 #undef TARGET_VECTOR_MODE_SUPPORTED_P
439 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
440 #undef TARGET_ARRAY_MODE_SUPPORTED_P
441 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
444 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
445 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
446 arm_autovectorize_vector_sizes
448 #undef TARGET_MACHINE_DEPENDENT_REORG
449 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
451 #undef TARGET_INIT_BUILTINS
452 #define TARGET_INIT_BUILTINS arm_init_builtins
453 #undef TARGET_EXPAND_BUILTIN
454 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
455 #undef TARGET_BUILTIN_DECL
456 #define TARGET_BUILTIN_DECL arm_builtin_decl
458 #undef TARGET_INIT_LIBFUNCS
459 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
461 #undef TARGET_PROMOTE_FUNCTION_MODE
462 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
463 #undef TARGET_PROMOTE_PROTOTYPES
464 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
465 #undef TARGET_PASS_BY_REFERENCE
466 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
467 #undef TARGET_ARG_PARTIAL_BYTES
468 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
469 #undef TARGET_FUNCTION_ARG
470 #define TARGET_FUNCTION_ARG arm_function_arg
471 #undef TARGET_FUNCTION_ARG_ADVANCE
472 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
473 #undef TARGET_FUNCTION_ARG_BOUNDARY
474 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
476 #undef TARGET_SETUP_INCOMING_VARARGS
477 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
479 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
480 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
482 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
483 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
484 #undef TARGET_TRAMPOLINE_INIT
485 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
486 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
487 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
489 #undef TARGET_WARN_FUNC_RETURN
490 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
492 #undef TARGET_DEFAULT_SHORT_ENUMS
493 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
495 #undef TARGET_ALIGN_ANON_BITFIELD
496 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
498 #undef TARGET_NARROW_VOLATILE_BITFIELD
499 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
501 #undef TARGET_CXX_GUARD_TYPE
502 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
504 #undef TARGET_CXX_GUARD_MASK_BIT
505 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
507 #undef TARGET_CXX_GET_COOKIE_SIZE
508 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
510 #undef TARGET_CXX_COOKIE_HAS_SIZE
511 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
513 #undef TARGET_CXX_CDTOR_RETURNS_THIS
514 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
516 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
517 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
519 #undef TARGET_CXX_USE_AEABI_ATEXIT
520 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
522 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
523 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
524 arm_cxx_determine_class_data_visibility
526 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
527 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
529 #undef TARGET_RETURN_IN_MSB
530 #define TARGET_RETURN_IN_MSB arm_return_in_msb
532 #undef TARGET_RETURN_IN_MEMORY
533 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
538 #if ARM_UNWIND_INFO
539 #undef TARGET_ASM_UNWIND_EMIT
540 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
542 /* EABI unwinding tables use a different format for the typeinfo tables. */
543 #undef TARGET_ASM_TTYPE
544 #define TARGET_ASM_TTYPE arm_output_ttype
546 #undef TARGET_ARM_EABI_UNWINDER
547 #define TARGET_ARM_EABI_UNWINDER true
549 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
550 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
552 #undef TARGET_ASM_INIT_SECTIONS
553 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
554 #endif /* ARM_UNWIND_INFO */
556 #undef TARGET_DWARF_REGISTER_SPAN
557 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
559 #undef TARGET_CANNOT_COPY_INSN_P
560 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
562 #ifdef HAVE_AS_TLS
563 #undef TARGET_HAVE_TLS
564 #define TARGET_HAVE_TLS true
565 #endif
567 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
568 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
570 #undef TARGET_LEGITIMATE_CONSTANT_P
571 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
573 #undef TARGET_CANNOT_FORCE_CONST_MEM
574 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
576 #undef TARGET_MAX_ANCHOR_OFFSET
577 #define TARGET_MAX_ANCHOR_OFFSET 4095
579 /* The minimum is set such that the total size of the block
580 for a particular anchor is -4088 + 1 + 4095 bytes, which is
581 divisible by eight, ensuring natural spacing of anchors. */
582 #undef TARGET_MIN_ANCHOR_OFFSET
583 #define TARGET_MIN_ANCHOR_OFFSET -4088
585 #undef TARGET_SCHED_ISSUE_RATE
586 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
588 #undef TARGET_MANGLE_TYPE
589 #define TARGET_MANGLE_TYPE arm_mangle_type
591 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
592 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
594 #undef TARGET_BUILD_BUILTIN_VA_LIST
595 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
596 #undef TARGET_EXPAND_BUILTIN_VA_START
597 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
598 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
599 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
601 #ifdef HAVE_AS_TLS
602 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
603 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
604 #endif
606 #undef TARGET_LEGITIMATE_ADDRESS_P
607 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
609 #undef TARGET_PREFERRED_RELOAD_CLASS
610 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
612 #undef TARGET_INVALID_PARAMETER_TYPE
613 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
615 #undef TARGET_INVALID_RETURN_TYPE
616 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
618 #undef TARGET_PROMOTED_TYPE
619 #define TARGET_PROMOTED_TYPE arm_promoted_type
621 #undef TARGET_CONVERT_TO_TYPE
622 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
624 #undef TARGET_SCALAR_MODE_SUPPORTED_P
625 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
627 #undef TARGET_FRAME_POINTER_REQUIRED
628 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
630 #undef TARGET_CAN_ELIMINATE
631 #define TARGET_CAN_ELIMINATE arm_can_eliminate
633 #undef TARGET_CONDITIONAL_REGISTER_USAGE
634 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
636 #undef TARGET_CLASS_LIKELY_SPILLED_P
637 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
639 #undef TARGET_VECTORIZE_BUILTINS
640 #define TARGET_VECTORIZE_BUILTINS
642 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
643 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
644 arm_builtin_vectorized_function
646 #undef TARGET_VECTOR_ALIGNMENT
647 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
649 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
650 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
651 arm_vector_alignment_reachable
653 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
654 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
655 arm_builtin_support_vector_misalignment
657 #undef TARGET_PREFERRED_RENAME_CLASS
658 #define TARGET_PREFERRED_RENAME_CLASS \
659 arm_preferred_rename_class
661 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
662 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
663 arm_vectorize_vec_perm_const_ok
665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
667 arm_builtin_vectorization_cost
668 #undef TARGET_VECTORIZE_ADD_STMT_COST
669 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
671 #undef TARGET_CANONICALIZE_COMPARISON
672 #define TARGET_CANONICALIZE_COMPARISON \
673 arm_canonicalize_comparison
675 #undef TARGET_ASAN_SHADOW_OFFSET
676 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
678 #undef MAX_INSN_PER_IT_BLOCK
679 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
681 #undef TARGET_CAN_USE_DOLOOP_P
682 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
684 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
685 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
687 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
688 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
690 struct gcc_target targetm = TARGET_INITIALIZER;
692 /* Obstack for minipool constant handling. */
693 static struct obstack minipool_obstack;
694 static char * minipool_startobj;
696 /* The maximum number of insns skipped which
697 will be conditionalised if possible. */
698 static int max_insns_skipped = 5;
700 extern FILE * asm_out_file;
702 /* True if we are currently building a constant table. */
703 int making_const_table;
705 /* The processor for which instructions should be scheduled. */
706 enum processor_type arm_tune = arm_none;
708 /* The current tuning set. */
709 const struct tune_params *current_tune;
711 /* Which floating point hardware to schedule for. */
712 int arm_fpu_attr;
714 /* Which floating popint hardware to use. */
715 const struct arm_fpu_desc *arm_fpu_desc;
717 /* Used for Thumb call_via trampolines. */
718 rtx thumb_call_via_label[14];
719 static int thumb_call_reg_needed;
721 /* Bit values used to identify processor capabilities. */
722 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
723 #define FL_ARCH3M (1 << 1) /* Extended multiply */
724 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
725 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
726 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
727 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
728 #define FL_THUMB (1 << 6) /* Thumb aware */
729 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
730 #define FL_STRONG (1 << 8) /* StrongARM */
731 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
732 #define FL_XSCALE (1 << 10) /* XScale */
733 /* spare (1 << 11) */
734 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
735 media instructions. */
736 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
737 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
738 Note: ARM6 & 7 derivatives only. */
739 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
740 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
741 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
742 profile. */
743 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
744 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
745 #define FL_NEON (1 << 20) /* Neon instructions. */
746 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
747 architecture. */
748 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
749 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
750 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
751 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
753 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
754 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
756 /* Flags that only effect tuning, not available instructions. */
757 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
758 | FL_CO_PROC)
760 #define FL_FOR_ARCH2 FL_NOTM
761 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
762 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
763 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
764 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
765 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
766 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
767 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
768 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
769 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
770 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
771 #define FL_FOR_ARCH6J FL_FOR_ARCH6
772 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
773 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
774 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
775 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
776 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
777 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
778 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
779 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
780 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
781 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
782 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
783 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
785 /* The bits in this mask specify which
786 instructions we are allowed to generate. */
787 static unsigned long insn_flags = 0;
789 /* The bits in this mask specify which instruction scheduling options should
790 be used. */
791 static unsigned long tune_flags = 0;
793 /* The highest ARM architecture version supported by the
794 target. */
795 enum base_architecture arm_base_arch = BASE_ARCH_0;
797 /* The following are used in the arm.md file as equivalents to bits
798 in the above two flag variables. */
800 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
801 int arm_arch3m = 0;
803 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
804 int arm_arch4 = 0;
806 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
807 int arm_arch4t = 0;
809 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
810 int arm_arch5 = 0;
812 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
813 int arm_arch5e = 0;
815 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
816 int arm_arch6 = 0;
818 /* Nonzero if this chip supports the ARM 6K extensions. */
819 int arm_arch6k = 0;
821 /* Nonzero if instructions present in ARMv6-M can be used. */
822 int arm_arch6m = 0;
824 /* Nonzero if this chip supports the ARM 7 extensions. */
825 int arm_arch7 = 0;
827 /* Nonzero if instructions not present in the 'M' profile can be used. */
828 int arm_arch_notm = 0;
830 /* Nonzero if instructions present in ARMv7E-M can be used. */
831 int arm_arch7em = 0;
833 /* Nonzero if instructions present in ARMv8 can be used. */
834 int arm_arch8 = 0;
836 /* Nonzero if this chip can benefit from load scheduling. */
837 int arm_ld_sched = 0;
839 /* Nonzero if this chip is a StrongARM. */
840 int arm_tune_strongarm = 0;
842 /* Nonzero if this chip supports Intel Wireless MMX technology. */
843 int arm_arch_iwmmxt = 0;
845 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
846 int arm_arch_iwmmxt2 = 0;
848 /* Nonzero if this chip is an XScale. */
849 int arm_arch_xscale = 0;
851 /* Nonzero if tuning for XScale */
852 int arm_tune_xscale = 0;
854 /* Nonzero if we want to tune for stores that access the write-buffer.
855 This typically means an ARM6 or ARM7 with MMU or MPU. */
856 int arm_tune_wbuf = 0;
858 /* Nonzero if tuning for Cortex-A9. */
859 int arm_tune_cortex_a9 = 0;
861 /* Nonzero if generating Thumb instructions. */
862 int thumb_code = 0;
864 /* Nonzero if generating Thumb-1 instructions. */
865 int thumb1_code = 0;
867 /* Nonzero if we should define __THUMB_INTERWORK__ in the
868 preprocessor.
869 XXX This is a bit of a hack, it's intended to help work around
870 problems in GLD which doesn't understand that armv5t code is
871 interworking clean. */
872 int arm_cpp_interwork = 0;
874 /* Nonzero if chip supports Thumb 2. */
875 int arm_arch_thumb2;
877 /* Nonzero if chip supports integer division instruction. */
878 int arm_arch_arm_hwdiv;
879 int arm_arch_thumb_hwdiv;
881 /* Nonzero if we should use Neon to handle 64-bits operations rather
882 than core registers. */
883 int prefer_neon_for_64bits = 0;
885 /* Nonzero if we shouldn't use literal pools. */
886 bool arm_disable_literal_pool = false;
888 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
889 we must report the mode of the memory reference from
890 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
891 enum machine_mode output_memory_reference_mode;
893 /* The register number to be used for the PIC offset register. */
894 unsigned arm_pic_register = INVALID_REGNUM;
896 enum arm_pcs arm_pcs_default;
898 /* For an explanation of these variables, see final_prescan_insn below. */
899 int arm_ccfsm_state;
900 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
901 enum arm_cond_code arm_current_cc;
903 rtx arm_target_insn;
904 int arm_target_label;
905 /* The number of conditionally executed insns, including the current insn. */
906 int arm_condexec_count = 0;
907 /* A bitmask specifying the patterns for the IT block.
908 Zero means do not output an IT block before this insn. */
909 int arm_condexec_mask = 0;
910 /* The number of bits used in arm_condexec_mask. */
911 int arm_condexec_masklen = 0;
913 /* Nonzero if chip supports the ARMv8 CRC instructions. */
914 int arm_arch_crc = 0;
916 /* The condition codes of the ARM, and the inverse function. */
917 static const char * const arm_condition_codes[] =
919 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
920 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
923 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
924 int arm_regs_in_sequence[] =
926 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
929 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
930 #define streq(string1, string2) (strcmp (string1, string2) == 0)
932 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
933 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
934 | (1 << PIC_OFFSET_TABLE_REGNUM)))
936 /* Initialization code. */
938 struct processors
940 const char *const name;
941 enum processor_type core;
942 const char *arch;
943 enum base_architecture base_arch;
944 const unsigned long flags;
945 const struct tune_params *const tune;
949 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
950 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
951 prefetch_slots, \
952 l1_size, \
953 l1_line_size
955 /* arm generic vectorizer costs. */
956 static const
957 struct cpu_vec_costs arm_default_vec_cost = {
958 1, /* scalar_stmt_cost. */
959 1, /* scalar load_cost. */
960 1, /* scalar_store_cost. */
961 1, /* vec_stmt_cost. */
962 1, /* vec_to_scalar_cost. */
963 1, /* scalar_to_vec_cost. */
964 1, /* vec_align_load_cost. */
965 1, /* vec_unalign_load_cost. */
966 1, /* vec_unalign_store_cost. */
967 1, /* vec_store_cost. */
968 3, /* cond_taken_branch_cost. */
969 1, /* cond_not_taken_branch_cost. */
972 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
973 #include "aarch-cost-tables.h"
977 const struct cpu_cost_table cortexa9_extra_costs =
979 /* ALU */
981 0, /* arith. */
982 0, /* logical. */
983 0, /* shift. */
984 COSTS_N_INSNS (1), /* shift_reg. */
985 COSTS_N_INSNS (1), /* arith_shift. */
986 COSTS_N_INSNS (2), /* arith_shift_reg. */
987 0, /* log_shift. */
988 COSTS_N_INSNS (1), /* log_shift_reg. */
989 COSTS_N_INSNS (1), /* extend. */
990 COSTS_N_INSNS (2), /* extend_arith. */
991 COSTS_N_INSNS (1), /* bfi. */
992 COSTS_N_INSNS (1), /* bfx. */
993 0, /* clz. */
994 0, /* rev. */
995 0, /* non_exec. */
996 true /* non_exec_costs_exec. */
999 /* MULT SImode */
1001 COSTS_N_INSNS (3), /* simple. */
1002 COSTS_N_INSNS (3), /* flag_setting. */
1003 COSTS_N_INSNS (2), /* extend. */
1004 COSTS_N_INSNS (3), /* add. */
1005 COSTS_N_INSNS (2), /* extend_add. */
1006 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1008 /* MULT DImode */
1010 0, /* simple (N/A). */
1011 0, /* flag_setting (N/A). */
1012 COSTS_N_INSNS (4), /* extend. */
1013 0, /* add (N/A). */
1014 COSTS_N_INSNS (4), /* extend_add. */
1015 0 /* idiv (N/A). */
1018 /* LD/ST */
1020 COSTS_N_INSNS (2), /* load. */
1021 COSTS_N_INSNS (2), /* load_sign_extend. */
1022 COSTS_N_INSNS (2), /* ldrd. */
1023 COSTS_N_INSNS (2), /* ldm_1st. */
1024 1, /* ldm_regs_per_insn_1st. */
1025 2, /* ldm_regs_per_insn_subsequent. */
1026 COSTS_N_INSNS (5), /* loadf. */
1027 COSTS_N_INSNS (5), /* loadd. */
1028 COSTS_N_INSNS (1), /* load_unaligned. */
1029 COSTS_N_INSNS (2), /* store. */
1030 COSTS_N_INSNS (2), /* strd. */
1031 COSTS_N_INSNS (2), /* stm_1st. */
1032 1, /* stm_regs_per_insn_1st. */
1033 2, /* stm_regs_per_insn_subsequent. */
1034 COSTS_N_INSNS (1), /* storef. */
1035 COSTS_N_INSNS (1), /* stored. */
1036 COSTS_N_INSNS (1) /* store_unaligned. */
1039 /* FP SFmode */
1041 COSTS_N_INSNS (14), /* div. */
1042 COSTS_N_INSNS (4), /* mult. */
1043 COSTS_N_INSNS (7), /* mult_addsub. */
1044 COSTS_N_INSNS (30), /* fma. */
1045 COSTS_N_INSNS (3), /* addsub. */
1046 COSTS_N_INSNS (1), /* fpconst. */
1047 COSTS_N_INSNS (1), /* neg. */
1048 COSTS_N_INSNS (3), /* compare. */
1049 COSTS_N_INSNS (3), /* widen. */
1050 COSTS_N_INSNS (3), /* narrow. */
1051 COSTS_N_INSNS (3), /* toint. */
1052 COSTS_N_INSNS (3), /* fromint. */
1053 COSTS_N_INSNS (3) /* roundint. */
1055 /* FP DFmode */
1057 COSTS_N_INSNS (24), /* div. */
1058 COSTS_N_INSNS (5), /* mult. */
1059 COSTS_N_INSNS (8), /* mult_addsub. */
1060 COSTS_N_INSNS (30), /* fma. */
1061 COSTS_N_INSNS (3), /* addsub. */
1062 COSTS_N_INSNS (1), /* fpconst. */
1063 COSTS_N_INSNS (1), /* neg. */
1064 COSTS_N_INSNS (3), /* compare. */
1065 COSTS_N_INSNS (3), /* widen. */
1066 COSTS_N_INSNS (3), /* narrow. */
1067 COSTS_N_INSNS (3), /* toint. */
1068 COSTS_N_INSNS (3), /* fromint. */
1069 COSTS_N_INSNS (3) /* roundint. */
1072 /* Vector */
1074 COSTS_N_INSNS (1) /* alu. */
1078 const struct cpu_cost_table cortexa8_extra_costs =
1080 /* ALU */
1082 0, /* arith. */
1083 0, /* logical. */
1084 COSTS_N_INSNS (1), /* shift. */
1085 0, /* shift_reg. */
1086 COSTS_N_INSNS (1), /* arith_shift. */
1087 0, /* arith_shift_reg. */
1088 COSTS_N_INSNS (1), /* log_shift. */
1089 0, /* log_shift_reg. */
1090 0, /* extend. */
1091 0, /* extend_arith. */
1092 0, /* bfi. */
1093 0, /* bfx. */
1094 0, /* clz. */
1095 0, /* rev. */
1096 0, /* non_exec. */
1097 true /* non_exec_costs_exec. */
1100 /* MULT SImode */
1102 COSTS_N_INSNS (1), /* simple. */
1103 COSTS_N_INSNS (1), /* flag_setting. */
1104 COSTS_N_INSNS (1), /* extend. */
1105 COSTS_N_INSNS (1), /* add. */
1106 COSTS_N_INSNS (1), /* extend_add. */
1107 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1109 /* MULT DImode */
1111 0, /* simple (N/A). */
1112 0, /* flag_setting (N/A). */
1113 COSTS_N_INSNS (2), /* extend. */
1114 0, /* add (N/A). */
1115 COSTS_N_INSNS (2), /* extend_add. */
1116 0 /* idiv (N/A). */
1119 /* LD/ST */
1121 COSTS_N_INSNS (1), /* load. */
1122 COSTS_N_INSNS (1), /* load_sign_extend. */
1123 COSTS_N_INSNS (1), /* ldrd. */
1124 COSTS_N_INSNS (1), /* ldm_1st. */
1125 1, /* ldm_regs_per_insn_1st. */
1126 2, /* ldm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* loadf. */
1128 COSTS_N_INSNS (1), /* loadd. */
1129 COSTS_N_INSNS (1), /* load_unaligned. */
1130 COSTS_N_INSNS (1), /* store. */
1131 COSTS_N_INSNS (1), /* strd. */
1132 COSTS_N_INSNS (1), /* stm_1st. */
1133 1, /* stm_regs_per_insn_1st. */
1134 2, /* stm_regs_per_insn_subsequent. */
1135 COSTS_N_INSNS (1), /* storef. */
1136 COSTS_N_INSNS (1), /* stored. */
1137 COSTS_N_INSNS (1) /* store_unaligned. */
1140 /* FP SFmode */
1142 COSTS_N_INSNS (36), /* div. */
1143 COSTS_N_INSNS (11), /* mult. */
1144 COSTS_N_INSNS (20), /* mult_addsub. */
1145 COSTS_N_INSNS (30), /* fma. */
1146 COSTS_N_INSNS (9), /* addsub. */
1147 COSTS_N_INSNS (3), /* fpconst. */
1148 COSTS_N_INSNS (3), /* neg. */
1149 COSTS_N_INSNS (6), /* compare. */
1150 COSTS_N_INSNS (4), /* widen. */
1151 COSTS_N_INSNS (4), /* narrow. */
1152 COSTS_N_INSNS (8), /* toint. */
1153 COSTS_N_INSNS (8), /* fromint. */
1154 COSTS_N_INSNS (8) /* roundint. */
1156 /* FP DFmode */
1158 COSTS_N_INSNS (64), /* div. */
1159 COSTS_N_INSNS (16), /* mult. */
1160 COSTS_N_INSNS (25), /* mult_addsub. */
1161 COSTS_N_INSNS (30), /* fma. */
1162 COSTS_N_INSNS (9), /* addsub. */
1163 COSTS_N_INSNS (3), /* fpconst. */
1164 COSTS_N_INSNS (3), /* neg. */
1165 COSTS_N_INSNS (6), /* compare. */
1166 COSTS_N_INSNS (6), /* widen. */
1167 COSTS_N_INSNS (6), /* narrow. */
1168 COSTS_N_INSNS (8), /* toint. */
1169 COSTS_N_INSNS (8), /* fromint. */
1170 COSTS_N_INSNS (8) /* roundint. */
1173 /* Vector */
1175 COSTS_N_INSNS (1) /* alu. */
1179 const struct cpu_cost_table cortexa5_extra_costs =
1181 /* ALU */
1183 0, /* arith. */
1184 0, /* logical. */
1185 COSTS_N_INSNS (1), /* shift. */
1186 COSTS_N_INSNS (1), /* shift_reg. */
1187 COSTS_N_INSNS (1), /* arith_shift. */
1188 COSTS_N_INSNS (1), /* arith_shift_reg. */
1189 COSTS_N_INSNS (1), /* log_shift. */
1190 COSTS_N_INSNS (1), /* log_shift_reg. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* extend_arith. */
1193 COSTS_N_INSNS (1), /* bfi. */
1194 COSTS_N_INSNS (1), /* bfx. */
1195 COSTS_N_INSNS (1), /* clz. */
1196 COSTS_N_INSNS (1), /* rev. */
1197 0, /* non_exec. */
1198 true /* non_exec_costs_exec. */
1202 /* MULT SImode */
1204 0, /* simple. */
1205 COSTS_N_INSNS (1), /* flag_setting. */
1206 COSTS_N_INSNS (1), /* extend. */
1207 COSTS_N_INSNS (1), /* add. */
1208 COSTS_N_INSNS (1), /* extend_add. */
1209 COSTS_N_INSNS (7) /* idiv. */
1211 /* MULT DImode */
1213 0, /* simple (N/A). */
1214 0, /* flag_setting (N/A). */
1215 COSTS_N_INSNS (1), /* extend. */
1216 0, /* add. */
1217 COSTS_N_INSNS (2), /* extend_add. */
1218 0 /* idiv (N/A). */
1221 /* LD/ST */
1223 COSTS_N_INSNS (1), /* load. */
1224 COSTS_N_INSNS (1), /* load_sign_extend. */
1225 COSTS_N_INSNS (6), /* ldrd. */
1226 COSTS_N_INSNS (1), /* ldm_1st. */
1227 1, /* ldm_regs_per_insn_1st. */
1228 2, /* ldm_regs_per_insn_subsequent. */
1229 COSTS_N_INSNS (2), /* loadf. */
1230 COSTS_N_INSNS (4), /* loadd. */
1231 COSTS_N_INSNS (1), /* load_unaligned. */
1232 COSTS_N_INSNS (1), /* store. */
1233 COSTS_N_INSNS (3), /* strd. */
1234 COSTS_N_INSNS (1), /* stm_1st. */
1235 1, /* stm_regs_per_insn_1st. */
1236 2, /* stm_regs_per_insn_subsequent. */
1237 COSTS_N_INSNS (2), /* storef. */
1238 COSTS_N_INSNS (2), /* stored. */
1239 COSTS_N_INSNS (1) /* store_unaligned. */
1242 /* FP SFmode */
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1258 /* FP DFmode */
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1275 /* Vector */
1277 COSTS_N_INSNS (1) /* alu. */
1282 const struct cpu_cost_table cortexa7_extra_costs =
1284 /* ALU */
1286 0, /* arith. */
1287 0, /* logical. */
1288 COSTS_N_INSNS (1), /* shift. */
1289 COSTS_N_INSNS (1), /* shift_reg. */
1290 COSTS_N_INSNS (1), /* arith_shift. */
1291 COSTS_N_INSNS (1), /* arith_shift_reg. */
1292 COSTS_N_INSNS (1), /* log_shift. */
1293 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfi. */
1297 COSTS_N_INSNS (1), /* bfx. */
1298 COSTS_N_INSNS (1), /* clz. */
1299 COSTS_N_INSNS (1), /* rev. */
1300 0, /* non_exec. */
1301 true /* non_exec_costs_exec. */
1305 /* MULT SImode */
1307 0, /* simple. */
1308 COSTS_N_INSNS (1), /* flag_setting. */
1309 COSTS_N_INSNS (1), /* extend. */
1310 COSTS_N_INSNS (1), /* add. */
1311 COSTS_N_INSNS (1), /* extend_add. */
1312 COSTS_N_INSNS (7) /* idiv. */
1314 /* MULT DImode */
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (1), /* extend. */
1319 0, /* add. */
1320 COSTS_N_INSNS (2), /* extend_add. */
1321 0 /* idiv (N/A). */
1324 /* LD/ST */
1326 COSTS_N_INSNS (1), /* load. */
1327 COSTS_N_INSNS (1), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (1), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* loadf. */
1333 COSTS_N_INSNS (2), /* loadd. */
1334 COSTS_N_INSNS (1), /* load_unaligned. */
1335 COSTS_N_INSNS (1), /* store. */
1336 COSTS_N_INSNS (3), /* strd. */
1337 COSTS_N_INSNS (1), /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 COSTS_N_INSNS (1) /* store_unaligned. */
1345 /* FP SFmode */
1347 COSTS_N_INSNS (15), /* div. */
1348 COSTS_N_INSNS (3), /* mult. */
1349 COSTS_N_INSNS (7), /* mult_addsub. */
1350 COSTS_N_INSNS (7), /* fma. */
1351 COSTS_N_INSNS (3), /* addsub. */
1352 COSTS_N_INSNS (3), /* fpconst. */
1353 COSTS_N_INSNS (3), /* neg. */
1354 COSTS_N_INSNS (3), /* compare. */
1355 COSTS_N_INSNS (3), /* widen. */
1356 COSTS_N_INSNS (3), /* narrow. */
1357 COSTS_N_INSNS (3), /* toint. */
1358 COSTS_N_INSNS (3), /* fromint. */
1359 COSTS_N_INSNS (3) /* roundint. */
1361 /* FP DFmode */
1363 COSTS_N_INSNS (30), /* div. */
1364 COSTS_N_INSNS (6), /* mult. */
1365 COSTS_N_INSNS (10), /* mult_addsub. */
1366 COSTS_N_INSNS (7), /* fma. */
1367 COSTS_N_INSNS (3), /* addsub. */
1368 COSTS_N_INSNS (3), /* fpconst. */
1369 COSTS_N_INSNS (3), /* neg. */
1370 COSTS_N_INSNS (3), /* compare. */
1371 COSTS_N_INSNS (3), /* widen. */
1372 COSTS_N_INSNS (3), /* narrow. */
1373 COSTS_N_INSNS (3), /* toint. */
1374 COSTS_N_INSNS (3), /* fromint. */
1375 COSTS_N_INSNS (3) /* roundint. */
1378 /* Vector */
1380 COSTS_N_INSNS (1) /* alu. */
1384 const struct cpu_cost_table cortexa12_extra_costs =
1386 /* ALU */
1388 0, /* arith. */
1389 0, /* logical. */
1390 0, /* shift. */
1391 COSTS_N_INSNS (1), /* shift_reg. */
1392 COSTS_N_INSNS (1), /* arith_shift. */
1393 COSTS_N_INSNS (1), /* arith_shift_reg. */
1394 COSTS_N_INSNS (1), /* log_shift. */
1395 COSTS_N_INSNS (1), /* log_shift_reg. */
1396 0, /* extend. */
1397 COSTS_N_INSNS (1), /* extend_arith. */
1398 0, /* bfi. */
1399 COSTS_N_INSNS (1), /* bfx. */
1400 COSTS_N_INSNS (1), /* clz. */
1401 COSTS_N_INSNS (1), /* rev. */
1402 0, /* non_exec. */
1403 true /* non_exec_costs_exec. */
1405 /* MULT SImode */
1408 COSTS_N_INSNS (2), /* simple. */
1409 COSTS_N_INSNS (3), /* flag_setting. */
1410 COSTS_N_INSNS (2), /* extend. */
1411 COSTS_N_INSNS (3), /* add. */
1412 COSTS_N_INSNS (2), /* extend_add. */
1413 COSTS_N_INSNS (18) /* idiv. */
1415 /* MULT DImode */
1417 0, /* simple (N/A). */
1418 0, /* flag_setting (N/A). */
1419 COSTS_N_INSNS (3), /* extend. */
1420 0, /* add (N/A). */
1421 COSTS_N_INSNS (3), /* extend_add. */
1422 0 /* idiv (N/A). */
1425 /* LD/ST */
1427 COSTS_N_INSNS (3), /* load. */
1428 COSTS_N_INSNS (3), /* load_sign_extend. */
1429 COSTS_N_INSNS (3), /* ldrd. */
1430 COSTS_N_INSNS (3), /* ldm_1st. */
1431 1, /* ldm_regs_per_insn_1st. */
1432 2, /* ldm_regs_per_insn_subsequent. */
1433 COSTS_N_INSNS (3), /* loadf. */
1434 COSTS_N_INSNS (3), /* loadd. */
1435 0, /* load_unaligned. */
1436 0, /* store. */
1437 0, /* strd. */
1438 0, /* stm_1st. */
1439 1, /* stm_regs_per_insn_1st. */
1440 2, /* stm_regs_per_insn_subsequent. */
1441 COSTS_N_INSNS (2), /* storef. */
1442 COSTS_N_INSNS (2), /* stored. */
1443 0 /* store_unaligned. */
1446 /* FP SFmode */
1448 COSTS_N_INSNS (17), /* div. */
1449 COSTS_N_INSNS (4), /* mult. */
1450 COSTS_N_INSNS (8), /* mult_addsub. */
1451 COSTS_N_INSNS (8), /* fma. */
1452 COSTS_N_INSNS (4), /* addsub. */
1453 COSTS_N_INSNS (2), /* fpconst. */
1454 COSTS_N_INSNS (2), /* neg. */
1455 COSTS_N_INSNS (2), /* compare. */
1456 COSTS_N_INSNS (4), /* widen. */
1457 COSTS_N_INSNS (4), /* narrow. */
1458 COSTS_N_INSNS (4), /* toint. */
1459 COSTS_N_INSNS (4), /* fromint. */
1460 COSTS_N_INSNS (4) /* roundint. */
1462 /* FP DFmode */
1464 COSTS_N_INSNS (31), /* div. */
1465 COSTS_N_INSNS (4), /* mult. */
1466 COSTS_N_INSNS (8), /* mult_addsub. */
1467 COSTS_N_INSNS (8), /* fma. */
1468 COSTS_N_INSNS (4), /* addsub. */
1469 COSTS_N_INSNS (2), /* fpconst. */
1470 COSTS_N_INSNS (2), /* neg. */
1471 COSTS_N_INSNS (2), /* compare. */
1472 COSTS_N_INSNS (4), /* widen. */
1473 COSTS_N_INSNS (4), /* narrow. */
1474 COSTS_N_INSNS (4), /* toint. */
1475 COSTS_N_INSNS (4), /* fromint. */
1476 COSTS_N_INSNS (4) /* roundint. */
1479 /* Vector */
1481 COSTS_N_INSNS (1) /* alu. */
1485 const struct cpu_cost_table cortexa15_extra_costs =
1487 /* ALU */
1489 0, /* arith. */
1490 0, /* logical. */
1491 0, /* shift. */
1492 0, /* shift_reg. */
1493 COSTS_N_INSNS (1), /* arith_shift. */
1494 COSTS_N_INSNS (1), /* arith_shift_reg. */
1495 COSTS_N_INSNS (1), /* log_shift. */
1496 COSTS_N_INSNS (1), /* log_shift_reg. */
1497 0, /* extend. */
1498 COSTS_N_INSNS (1), /* extend_arith. */
1499 COSTS_N_INSNS (1), /* bfi. */
1500 0, /* bfx. */
1501 0, /* clz. */
1502 0, /* rev. */
1503 0, /* non_exec. */
1504 true /* non_exec_costs_exec. */
1506 /* MULT SImode */
1509 COSTS_N_INSNS (2), /* simple. */
1510 COSTS_N_INSNS (3), /* flag_setting. */
1511 COSTS_N_INSNS (2), /* extend. */
1512 COSTS_N_INSNS (2), /* add. */
1513 COSTS_N_INSNS (2), /* extend_add. */
1514 COSTS_N_INSNS (18) /* idiv. */
1516 /* MULT DImode */
1518 0, /* simple (N/A). */
1519 0, /* flag_setting (N/A). */
1520 COSTS_N_INSNS (3), /* extend. */
1521 0, /* add (N/A). */
1522 COSTS_N_INSNS (3), /* extend_add. */
1523 0 /* idiv (N/A). */
1526 /* LD/ST */
1528 COSTS_N_INSNS (3), /* load. */
1529 COSTS_N_INSNS (3), /* load_sign_extend. */
1530 COSTS_N_INSNS (3), /* ldrd. */
1531 COSTS_N_INSNS (4), /* ldm_1st. */
1532 1, /* ldm_regs_per_insn_1st. */
1533 2, /* ldm_regs_per_insn_subsequent. */
1534 COSTS_N_INSNS (4), /* loadf. */
1535 COSTS_N_INSNS (4), /* loadd. */
1536 0, /* load_unaligned. */
1537 0, /* store. */
1538 0, /* strd. */
1539 COSTS_N_INSNS (1), /* stm_1st. */
1540 1, /* stm_regs_per_insn_1st. */
1541 2, /* stm_regs_per_insn_subsequent. */
1542 0, /* storef. */
1543 0, /* stored. */
1544 0 /* store_unaligned. */
1547 /* FP SFmode */
1549 COSTS_N_INSNS (17), /* div. */
1550 COSTS_N_INSNS (4), /* mult. */
1551 COSTS_N_INSNS (8), /* mult_addsub. */
1552 COSTS_N_INSNS (8), /* fma. */
1553 COSTS_N_INSNS (4), /* addsub. */
1554 COSTS_N_INSNS (2), /* fpconst. */
1555 COSTS_N_INSNS (2), /* neg. */
1556 COSTS_N_INSNS (5), /* compare. */
1557 COSTS_N_INSNS (4), /* widen. */
1558 COSTS_N_INSNS (4), /* narrow. */
1559 COSTS_N_INSNS (4), /* toint. */
1560 COSTS_N_INSNS (4), /* fromint. */
1561 COSTS_N_INSNS (4) /* roundint. */
1563 /* FP DFmode */
1565 COSTS_N_INSNS (31), /* div. */
1566 COSTS_N_INSNS (4), /* mult. */
1567 COSTS_N_INSNS (8), /* mult_addsub. */
1568 COSTS_N_INSNS (8), /* fma. */
1569 COSTS_N_INSNS (4), /* addsub. */
1570 COSTS_N_INSNS (2), /* fpconst. */
1571 COSTS_N_INSNS (2), /* neg. */
1572 COSTS_N_INSNS (2), /* compare. */
1573 COSTS_N_INSNS (4), /* widen. */
1574 COSTS_N_INSNS (4), /* narrow. */
1575 COSTS_N_INSNS (4), /* toint. */
1576 COSTS_N_INSNS (4), /* fromint. */
1577 COSTS_N_INSNS (4) /* roundint. */
1580 /* Vector */
1582 COSTS_N_INSNS (1) /* alu. */
1586 const struct cpu_cost_table v7m_extra_costs =
1588 /* ALU */
1590 0, /* arith. */
1591 0, /* logical. */
1592 0, /* shift. */
1593 0, /* shift_reg. */
1594 0, /* arith_shift. */
1595 COSTS_N_INSNS (1), /* arith_shift_reg. */
1596 0, /* log_shift. */
1597 COSTS_N_INSNS (1), /* log_shift_reg. */
1598 0, /* extend. */
1599 COSTS_N_INSNS (1), /* extend_arith. */
1600 0, /* bfi. */
1601 0, /* bfx. */
1602 0, /* clz. */
1603 0, /* rev. */
1604 COSTS_N_INSNS (1), /* non_exec. */
1605 false /* non_exec_costs_exec. */
1608 /* MULT SImode */
1610 COSTS_N_INSNS (1), /* simple. */
1611 COSTS_N_INSNS (1), /* flag_setting. */
1612 COSTS_N_INSNS (2), /* extend. */
1613 COSTS_N_INSNS (1), /* add. */
1614 COSTS_N_INSNS (3), /* extend_add. */
1615 COSTS_N_INSNS (8) /* idiv. */
1617 /* MULT DImode */
1619 0, /* simple (N/A). */
1620 0, /* flag_setting (N/A). */
1621 COSTS_N_INSNS (2), /* extend. */
1622 0, /* add (N/A). */
1623 COSTS_N_INSNS (3), /* extend_add. */
1624 0 /* idiv (N/A). */
1627 /* LD/ST */
1629 COSTS_N_INSNS (2), /* load. */
1630 0, /* load_sign_extend. */
1631 COSTS_N_INSNS (3), /* ldrd. */
1632 COSTS_N_INSNS (2), /* ldm_1st. */
1633 1, /* ldm_regs_per_insn_1st. */
1634 1, /* ldm_regs_per_insn_subsequent. */
1635 COSTS_N_INSNS (2), /* loadf. */
1636 COSTS_N_INSNS (3), /* loadd. */
1637 COSTS_N_INSNS (1), /* load_unaligned. */
1638 COSTS_N_INSNS (2), /* store. */
1639 COSTS_N_INSNS (3), /* strd. */
1640 COSTS_N_INSNS (2), /* stm_1st. */
1641 1, /* stm_regs_per_insn_1st. */
1642 1, /* stm_regs_per_insn_subsequent. */
1643 COSTS_N_INSNS (2), /* storef. */
1644 COSTS_N_INSNS (3), /* stored. */
1645 COSTS_N_INSNS (1) /* store_unaligned. */
1648 /* FP SFmode */
1650 COSTS_N_INSNS (7), /* div. */
1651 COSTS_N_INSNS (2), /* mult. */
1652 COSTS_N_INSNS (5), /* mult_addsub. */
1653 COSTS_N_INSNS (3), /* fma. */
1654 COSTS_N_INSNS (1), /* addsub. */
1655 0, /* fpconst. */
1656 0, /* neg. */
1657 0, /* compare. */
1658 0, /* widen. */
1659 0, /* narrow. */
1660 0, /* toint. */
1661 0, /* fromint. */
1662 0 /* roundint. */
1664 /* FP DFmode */
1666 COSTS_N_INSNS (15), /* div. */
1667 COSTS_N_INSNS (5), /* mult. */
1668 COSTS_N_INSNS (7), /* mult_addsub. */
1669 COSTS_N_INSNS (7), /* fma. */
1670 COSTS_N_INSNS (3), /* addsub. */
1671 0, /* fpconst. */
1672 0, /* neg. */
1673 0, /* compare. */
1674 0, /* widen. */
1675 0, /* narrow. */
1676 0, /* toint. */
1677 0, /* fromint. */
1678 0 /* roundint. */
1681 /* Vector */
1683 COSTS_N_INSNS (1) /* alu. */
1687 const struct tune_params arm_slowmul_tune =
1689 arm_slowmul_rtx_costs,
1690 NULL,
1691 NULL, /* Sched adj cost. */
1692 3, /* Constant limit. */
1693 5, /* Max cond insns. */
1694 ARM_PREFETCH_NOT_BENEFICIAL,
1695 true, /* Prefer constant pool. */
1696 arm_default_branch_cost,
1697 false, /* Prefer LDRD/STRD. */
1698 {true, true}, /* Prefer non short circuit. */
1699 &arm_default_vec_cost, /* Vectorizer costs. */
1700 false, /* Prefer Neon for 64-bits bitops. */
1701 false, false /* Prefer 32-bit encodings. */
1704 const struct tune_params arm_fastmul_tune =
1706 arm_fastmul_rtx_costs,
1707 NULL,
1708 NULL, /* Sched adj cost. */
1709 1, /* Constant limit. */
1710 5, /* Max cond insns. */
1711 ARM_PREFETCH_NOT_BENEFICIAL,
1712 true, /* Prefer constant pool. */
1713 arm_default_branch_cost,
1714 false, /* Prefer LDRD/STRD. */
1715 {true, true}, /* Prefer non short circuit. */
1716 &arm_default_vec_cost, /* Vectorizer costs. */
1717 false, /* Prefer Neon for 64-bits bitops. */
1718 false, false /* Prefer 32-bit encodings. */
1721 /* StrongARM has early execution of branches, so a sequence that is worth
1722 skipping is shorter. Set max_insns_skipped to a lower value. */
1724 const struct tune_params arm_strongarm_tune =
1726 arm_fastmul_rtx_costs,
1727 NULL,
1728 NULL, /* Sched adj cost. */
1729 1, /* Constant limit. */
1730 3, /* Max cond insns. */
1731 ARM_PREFETCH_NOT_BENEFICIAL,
1732 true, /* Prefer constant pool. */
1733 arm_default_branch_cost,
1734 false, /* Prefer LDRD/STRD. */
1735 {true, true}, /* Prefer non short circuit. */
1736 &arm_default_vec_cost, /* Vectorizer costs. */
1737 false, /* Prefer Neon for 64-bits bitops. */
1738 false, false /* Prefer 32-bit encodings. */
1741 const struct tune_params arm_xscale_tune =
1743 arm_xscale_rtx_costs,
1744 NULL,
1745 xscale_sched_adjust_cost,
1746 2, /* Constant limit. */
1747 3, /* Max cond insns. */
1748 ARM_PREFETCH_NOT_BENEFICIAL,
1749 true, /* Prefer constant pool. */
1750 arm_default_branch_cost,
1751 false, /* Prefer LDRD/STRD. */
1752 {true, true}, /* Prefer non short circuit. */
1753 &arm_default_vec_cost, /* Vectorizer costs. */
1754 false, /* Prefer Neon for 64-bits bitops. */
1755 false, false /* Prefer 32-bit encodings. */
1758 const struct tune_params arm_9e_tune =
1760 arm_9e_rtx_costs,
1761 NULL,
1762 NULL, /* Sched adj cost. */
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 ARM_PREFETCH_NOT_BENEFICIAL,
1766 true, /* Prefer constant pool. */
1767 arm_default_branch_cost,
1768 false, /* Prefer LDRD/STRD. */
1769 {true, true}, /* Prefer non short circuit. */
1770 &arm_default_vec_cost, /* Vectorizer costs. */
1771 false, /* Prefer Neon for 64-bits bitops. */
1772 false, false /* Prefer 32-bit encodings. */
1775 const struct tune_params arm_v6t2_tune =
1777 arm_9e_rtx_costs,
1778 NULL,
1779 NULL, /* Sched adj cost. */
1780 1, /* Constant limit. */
1781 5, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL,
1783 false, /* Prefer constant pool. */
1784 arm_default_branch_cost,
1785 false, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 false, false /* Prefer 32-bit encodings. */
1792 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1793 const struct tune_params arm_cortex_tune =
1795 arm_9e_rtx_costs,
1796 &generic_extra_costs,
1797 NULL, /* Sched adj cost. */
1798 1, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 ARM_PREFETCH_NOT_BENEFICIAL,
1801 false, /* Prefer constant pool. */
1802 arm_default_branch_cost,
1803 false, /* Prefer LDRD/STRD. */
1804 {true, true}, /* Prefer non short circuit. */
1805 &arm_default_vec_cost, /* Vectorizer costs. */
1806 false, /* Prefer Neon for 64-bits bitops. */
1807 false, false /* Prefer 32-bit encodings. */
1810 const struct tune_params arm_cortex_a8_tune =
1812 arm_9e_rtx_costs,
1813 &cortexa8_extra_costs,
1814 NULL, /* Sched adj cost. */
1815 1, /* Constant limit. */
1816 5, /* Max cond insns. */
1817 ARM_PREFETCH_NOT_BENEFICIAL,
1818 false, /* Prefer constant pool. */
1819 arm_default_branch_cost,
1820 false, /* Prefer LDRD/STRD. */
1821 {true, true}, /* Prefer non short circuit. */
1822 &arm_default_vec_cost, /* Vectorizer costs. */
1823 false, /* Prefer Neon for 64-bits bitops. */
1824 false, false /* Prefer 32-bit encodings. */
1827 const struct tune_params arm_cortex_a7_tune =
1829 arm_9e_rtx_costs,
1830 &cortexa7_extra_costs,
1831 NULL,
1832 1, /* Constant limit. */
1833 5, /* Max cond insns. */
1834 ARM_PREFETCH_NOT_BENEFICIAL,
1835 false, /* Prefer constant pool. */
1836 arm_default_branch_cost,
1837 false, /* Prefer LDRD/STRD. */
1838 {true, true}, /* Prefer non short circuit. */
1839 &arm_default_vec_cost, /* Vectorizer costs. */
1840 false, /* Prefer Neon for 64-bits bitops. */
1841 false, false /* Prefer 32-bit encodings. */
1844 const struct tune_params arm_cortex_a15_tune =
1846 arm_9e_rtx_costs,
1847 &cortexa15_extra_costs,
1848 NULL, /* Sched adj cost. */
1849 1, /* Constant limit. */
1850 2, /* Max cond insns. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 false, /* Prefer constant pool. */
1853 arm_default_branch_cost,
1854 true, /* Prefer LDRD/STRD. */
1855 {true, true}, /* Prefer non short circuit. */
1856 &arm_default_vec_cost, /* Vectorizer costs. */
1857 false, /* Prefer Neon for 64-bits bitops. */
1858 true, true /* Prefer 32-bit encodings. */
1861 const struct tune_params arm_cortex_a53_tune =
1863 arm_9e_rtx_costs,
1864 &cortexa53_extra_costs,
1865 NULL, /* Scheduler cost adjustment. */
1866 1, /* Constant limit. */
1867 5, /* Max cond insns. */
1868 ARM_PREFETCH_NOT_BENEFICIAL,
1869 false, /* Prefer constant pool. */
1870 arm_default_branch_cost,
1871 false, /* Prefer LDRD/STRD. */
1872 {true, true}, /* Prefer non short circuit. */
1873 &arm_default_vec_cost, /* Vectorizer costs. */
1874 false, /* Prefer Neon for 64-bits bitops. */
1875 false, false /* Prefer 32-bit encodings. */
1878 const struct tune_params arm_cortex_a57_tune =
1880 arm_9e_rtx_costs,
1881 &cortexa57_extra_costs,
1882 NULL, /* Scheduler cost adjustment. */
1883 1, /* Constant limit. */
1884 2, /* Max cond insns. */
1885 ARM_PREFETCH_NOT_BENEFICIAL,
1886 false, /* Prefer constant pool. */
1887 arm_default_branch_cost,
1888 true, /* Prefer LDRD/STRD. */
1889 {true, true}, /* Prefer non short circuit. */
1890 &arm_default_vec_cost, /* Vectorizer costs. */
1891 false, /* Prefer Neon for 64-bits bitops. */
1892 true, true /* Prefer 32-bit encodings. */
1895 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1896 less appealing. Set max_insns_skipped to a low value. */
1898 const struct tune_params arm_cortex_a5_tune =
1900 arm_9e_rtx_costs,
1901 &cortexa5_extra_costs,
1902 NULL, /* Sched adj cost. */
1903 1, /* Constant limit. */
1904 1, /* Max cond insns. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 false, /* Prefer constant pool. */
1907 arm_cortex_a5_branch_cost,
1908 false, /* Prefer LDRD/STRD. */
1909 {false, false}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false /* Prefer 32-bit encodings. */
1915 const struct tune_params arm_cortex_a9_tune =
1917 arm_9e_rtx_costs,
1918 &cortexa9_extra_costs,
1919 cortex_a9_sched_adjust_cost,
1920 1, /* Constant limit. */
1921 5, /* Max cond insns. */
1922 ARM_PREFETCH_BENEFICIAL(4,32,32),
1923 false, /* Prefer constant pool. */
1924 arm_default_branch_cost,
1925 false, /* Prefer LDRD/STRD. */
1926 {true, true}, /* Prefer non short circuit. */
1927 &arm_default_vec_cost, /* Vectorizer costs. */
1928 false, /* Prefer Neon for 64-bits bitops. */
1929 false, false /* Prefer 32-bit encodings. */
1932 const struct tune_params arm_cortex_a12_tune =
1934 arm_9e_rtx_costs,
1935 &cortexa12_extra_costs,
1936 NULL,
1937 1, /* Constant limit. */
1938 5, /* Max cond insns. */
1939 ARM_PREFETCH_BENEFICIAL(4,32,32),
1940 false, /* Prefer constant pool. */
1941 arm_default_branch_cost,
1942 true, /* Prefer LDRD/STRD. */
1943 {true, true}, /* Prefer non short circuit. */
1944 &arm_default_vec_cost, /* Vectorizer costs. */
1945 false, /* Prefer Neon for 64-bits bitops. */
1946 false, false /* Prefer 32-bit encodings. */
1949 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1950 cycle to execute each. An LDR from the constant pool also takes two cycles
1951 to execute, but mildly increases pipelining opportunity (consecutive
1952 loads/stores can be pipelined together, saving one cycle), and may also
1953 improve icache utilisation. Hence we prefer the constant pool for such
1954 processors. */
1956 const struct tune_params arm_v7m_tune =
1958 arm_9e_rtx_costs,
1959 &v7m_extra_costs,
1960 NULL, /* Sched adj cost. */
1961 1, /* Constant limit. */
1962 2, /* Max cond insns. */
1963 ARM_PREFETCH_NOT_BENEFICIAL,
1964 true, /* Prefer constant pool. */
1965 arm_cortex_m_branch_cost,
1966 false, /* Prefer LDRD/STRD. */
1967 {false, false}, /* Prefer non short circuit. */
1968 &arm_default_vec_cost, /* Vectorizer costs. */
1969 false, /* Prefer Neon for 64-bits bitops. */
1970 false, false /* Prefer 32-bit encodings. */
1973 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1974 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1975 const struct tune_params arm_v6m_tune =
1977 arm_9e_rtx_costs,
1978 NULL,
1979 NULL, /* Sched adj cost. */
1980 1, /* Constant limit. */
1981 5, /* Max cond insns. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 false, /* Prefer constant pool. */
1984 arm_default_branch_cost,
1985 false, /* Prefer LDRD/STRD. */
1986 {false, false}, /* Prefer non short circuit. */
1987 &arm_default_vec_cost, /* Vectorizer costs. */
1988 false, /* Prefer Neon for 64-bits bitops. */
1989 false, false /* Prefer 32-bit encodings. */
1992 const struct tune_params arm_fa726te_tune =
1994 arm_9e_rtx_costs,
1995 NULL,
1996 fa726te_sched_adjust_cost,
1997 1, /* Constant limit. */
1998 5, /* Max cond insns. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 true, /* Prefer constant pool. */
2001 arm_default_branch_cost,
2002 false, /* Prefer LDRD/STRD. */
2003 {true, true}, /* Prefer non short circuit. */
2004 &arm_default_vec_cost, /* Vectorizer costs. */
2005 false, /* Prefer Neon for 64-bits bitops. */
2006 false, false /* Prefer 32-bit encodings. */
2010 /* Not all of these give usefully different compilation alternatives,
2011 but there is no simple way of generalizing them. */
2012 static const struct processors all_cores[] =
2014 /* ARM Cores */
2015 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2016 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2017 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2018 #include "arm-cores.def"
2019 #undef ARM_CORE
2020 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2023 static const struct processors all_architectures[] =
2025 /* ARM Architectures */
2026 /* We don't specify tuning costs here as it will be figured out
2027 from the core. */
2029 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2030 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2031 #include "arm-arches.def"
2032 #undef ARM_ARCH
2033 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2037 /* These are populated as commandline arguments are processed, or NULL
2038 if not specified. */
2039 static const struct processors *arm_selected_arch;
2040 static const struct processors *arm_selected_cpu;
2041 static const struct processors *arm_selected_tune;
2043 /* The name of the preprocessor macro to define for this architecture. */
2045 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2047 /* Available values for -mfpu=. */
2049 static const struct arm_fpu_desc all_fpus[] =
2051 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2052 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2053 #include "arm-fpus.def"
2054 #undef ARM_FPU
2058 /* Supported TLS relocations. */
2060 enum tls_reloc {
2061 TLS_GD32,
2062 TLS_LDM32,
2063 TLS_LDO32,
2064 TLS_IE32,
2065 TLS_LE32,
2066 TLS_DESCSEQ /* GNU scheme */
2069 /* The maximum number of insns to be used when loading a constant. */
2070 inline static int
2071 arm_constant_limit (bool size_p)
2073 return size_p ? 1 : current_tune->constant_limit;
2076 /* Emit an insn that's a simple single-set. Both the operands must be known
2077 to be valid. */
2078 inline static rtx
2079 emit_set_insn (rtx x, rtx y)
2081 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2084 /* Return the number of bits set in VALUE. */
2085 static unsigned
2086 bit_count (unsigned long value)
2088 unsigned long count = 0;
2090 while (value)
2092 count++;
2093 value &= value - 1; /* Clear the least-significant set bit. */
2096 return count;
2099 typedef struct
2101 enum machine_mode mode;
2102 const char *name;
2103 } arm_fixed_mode_set;
2105 /* A small helper for setting fixed-point library libfuncs. */
2107 static void
2108 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
2109 const char *funcname, const char *modename,
2110 int num_suffix)
2112 char buffer[50];
2114 if (num_suffix == 0)
2115 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2116 else
2117 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2119 set_optab_libfunc (optable, mode, buffer);
2122 static void
2123 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2124 enum machine_mode from, const char *funcname,
2125 const char *toname, const char *fromname)
2127 char buffer[50];
2128 const char *maybe_suffix_2 = "";
2130 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2131 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2132 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2133 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2134 maybe_suffix_2 = "2";
2136 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2137 maybe_suffix_2);
2139 set_conv_libfunc (optable, to, from, buffer);
2142 /* Set up library functions unique to ARM. */
2144 static void
2145 arm_init_libfuncs (void)
2147 /* For Linux, we have access to kernel support for atomic operations. */
2148 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2149 init_sync_libfuncs (2 * UNITS_PER_WORD);
2151 /* There are no special library functions unless we are using the
2152 ARM BPABI. */
2153 if (!TARGET_BPABI)
2154 return;
2156 /* The functions below are described in Section 4 of the "Run-Time
2157 ABI for the ARM architecture", Version 1.0. */
2159 /* Double-precision floating-point arithmetic. Table 2. */
2160 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2161 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2162 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2163 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2164 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2166 /* Double-precision comparisons. Table 3. */
2167 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2168 set_optab_libfunc (ne_optab, DFmode, NULL);
2169 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2170 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2171 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2172 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2173 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2175 /* Single-precision floating-point arithmetic. Table 4. */
2176 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2177 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2178 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2179 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2180 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2182 /* Single-precision comparisons. Table 5. */
2183 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2184 set_optab_libfunc (ne_optab, SFmode, NULL);
2185 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2186 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2187 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2188 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2189 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2191 /* Floating-point to integer conversions. Table 6. */
2192 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2193 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2194 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2195 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2196 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2197 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2198 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2199 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2201 /* Conversions between floating types. Table 7. */
2202 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2203 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2205 /* Integer to floating-point conversions. Table 8. */
2206 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2207 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2208 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2209 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2210 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2211 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2212 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2213 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2215 /* Long long. Table 9. */
2216 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2217 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2218 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2219 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2220 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2221 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2222 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2223 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2225 /* Integer (32/32->32) division. \S 4.3.1. */
2226 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2227 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2229 /* The divmod functions are designed so that they can be used for
2230 plain division, even though they return both the quotient and the
2231 remainder. The quotient is returned in the usual location (i.e.,
2232 r0 for SImode, {r0, r1} for DImode), just as would be expected
2233 for an ordinary division routine. Because the AAPCS calling
2234 conventions specify that all of { r0, r1, r2, r3 } are
2235 callee-saved registers, there is no need to tell the compiler
2236 explicitly that those registers are clobbered by these
2237 routines. */
2238 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2239 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2241 /* For SImode division the ABI provides div-without-mod routines,
2242 which are faster. */
2243 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2244 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2246 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2247 divmod libcalls instead. */
2248 set_optab_libfunc (smod_optab, DImode, NULL);
2249 set_optab_libfunc (umod_optab, DImode, NULL);
2250 set_optab_libfunc (smod_optab, SImode, NULL);
2251 set_optab_libfunc (umod_optab, SImode, NULL);
2253 /* Half-precision float operations. The compiler handles all operations
2254 with NULL libfuncs by converting the SFmode. */
2255 switch (arm_fp16_format)
2257 case ARM_FP16_FORMAT_IEEE:
2258 case ARM_FP16_FORMAT_ALTERNATIVE:
2260 /* Conversions. */
2261 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2262 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2263 ? "__gnu_f2h_ieee"
2264 : "__gnu_f2h_alternative"));
2265 set_conv_libfunc (sext_optab, SFmode, HFmode,
2266 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2267 ? "__gnu_h2f_ieee"
2268 : "__gnu_h2f_alternative"));
2270 /* Arithmetic. */
2271 set_optab_libfunc (add_optab, HFmode, NULL);
2272 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2273 set_optab_libfunc (smul_optab, HFmode, NULL);
2274 set_optab_libfunc (neg_optab, HFmode, NULL);
2275 set_optab_libfunc (sub_optab, HFmode, NULL);
2277 /* Comparisons. */
2278 set_optab_libfunc (eq_optab, HFmode, NULL);
2279 set_optab_libfunc (ne_optab, HFmode, NULL);
2280 set_optab_libfunc (lt_optab, HFmode, NULL);
2281 set_optab_libfunc (le_optab, HFmode, NULL);
2282 set_optab_libfunc (ge_optab, HFmode, NULL);
2283 set_optab_libfunc (gt_optab, HFmode, NULL);
2284 set_optab_libfunc (unord_optab, HFmode, NULL);
2285 break;
2287 default:
2288 break;
2291 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2293 const arm_fixed_mode_set fixed_arith_modes[] =
2295 { QQmode, "qq" },
2296 { UQQmode, "uqq" },
2297 { HQmode, "hq" },
2298 { UHQmode, "uhq" },
2299 { SQmode, "sq" },
2300 { USQmode, "usq" },
2301 { DQmode, "dq" },
2302 { UDQmode, "udq" },
2303 { TQmode, "tq" },
2304 { UTQmode, "utq" },
2305 { HAmode, "ha" },
2306 { UHAmode, "uha" },
2307 { SAmode, "sa" },
2308 { USAmode, "usa" },
2309 { DAmode, "da" },
2310 { UDAmode, "uda" },
2311 { TAmode, "ta" },
2312 { UTAmode, "uta" }
2314 const arm_fixed_mode_set fixed_conv_modes[] =
2316 { QQmode, "qq" },
2317 { UQQmode, "uqq" },
2318 { HQmode, "hq" },
2319 { UHQmode, "uhq" },
2320 { SQmode, "sq" },
2321 { USQmode, "usq" },
2322 { DQmode, "dq" },
2323 { UDQmode, "udq" },
2324 { TQmode, "tq" },
2325 { UTQmode, "utq" },
2326 { HAmode, "ha" },
2327 { UHAmode, "uha" },
2328 { SAmode, "sa" },
2329 { USAmode, "usa" },
2330 { DAmode, "da" },
2331 { UDAmode, "uda" },
2332 { TAmode, "ta" },
2333 { UTAmode, "uta" },
2334 { QImode, "qi" },
2335 { HImode, "hi" },
2336 { SImode, "si" },
2337 { DImode, "di" },
2338 { TImode, "ti" },
2339 { SFmode, "sf" },
2340 { DFmode, "df" }
2342 unsigned int i, j;
2344 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2346 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2347 "add", fixed_arith_modes[i].name, 3);
2348 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2349 "ssadd", fixed_arith_modes[i].name, 3);
2350 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2351 "usadd", fixed_arith_modes[i].name, 3);
2352 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2353 "sub", fixed_arith_modes[i].name, 3);
2354 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2355 "sssub", fixed_arith_modes[i].name, 3);
2356 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2357 "ussub", fixed_arith_modes[i].name, 3);
2358 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2359 "mul", fixed_arith_modes[i].name, 3);
2360 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2361 "ssmul", fixed_arith_modes[i].name, 3);
2362 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2363 "usmul", fixed_arith_modes[i].name, 3);
2364 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2365 "div", fixed_arith_modes[i].name, 3);
2366 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2367 "udiv", fixed_arith_modes[i].name, 3);
2368 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2369 "ssdiv", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2371 "usdiv", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2373 "neg", fixed_arith_modes[i].name, 2);
2374 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2375 "ssneg", fixed_arith_modes[i].name, 2);
2376 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2377 "usneg", fixed_arith_modes[i].name, 2);
2378 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2379 "ashl", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2381 "ashr", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2383 "lshr", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2385 "ssashl", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2387 "usashl", fixed_arith_modes[i].name, 3);
2388 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2389 "cmp", fixed_arith_modes[i].name, 2);
2392 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2393 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2395 if (i == j
2396 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2397 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2398 continue;
2400 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2401 fixed_conv_modes[j].mode, "fract",
2402 fixed_conv_modes[i].name,
2403 fixed_conv_modes[j].name);
2404 arm_set_fixed_conv_libfunc (satfract_optab,
2405 fixed_conv_modes[i].mode,
2406 fixed_conv_modes[j].mode, "satfract",
2407 fixed_conv_modes[i].name,
2408 fixed_conv_modes[j].name);
2409 arm_set_fixed_conv_libfunc (fractuns_optab,
2410 fixed_conv_modes[i].mode,
2411 fixed_conv_modes[j].mode, "fractuns",
2412 fixed_conv_modes[i].name,
2413 fixed_conv_modes[j].name);
2414 arm_set_fixed_conv_libfunc (satfractuns_optab,
2415 fixed_conv_modes[i].mode,
2416 fixed_conv_modes[j].mode, "satfractuns",
2417 fixed_conv_modes[i].name,
2418 fixed_conv_modes[j].name);
2422 if (TARGET_AAPCS_BASED)
2423 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2426 /* On AAPCS systems, this is the "struct __va_list". */
2427 static GTY(()) tree va_list_type;
2429 /* Return the type to use as __builtin_va_list. */
2430 static tree
2431 arm_build_builtin_va_list (void)
2433 tree va_list_name;
2434 tree ap_field;
2436 if (!TARGET_AAPCS_BASED)
2437 return std_build_builtin_va_list ();
2439 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2440 defined as:
2442 struct __va_list
2444 void *__ap;
2447 The C Library ABI further reinforces this definition in \S
2448 4.1.
2450 We must follow this definition exactly. The structure tag
2451 name is visible in C++ mangled names, and thus forms a part
2452 of the ABI. The field name may be used by people who
2453 #include <stdarg.h>. */
2454 /* Create the type. */
2455 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2456 /* Give it the required name. */
2457 va_list_name = build_decl (BUILTINS_LOCATION,
2458 TYPE_DECL,
2459 get_identifier ("__va_list"),
2460 va_list_type);
2461 DECL_ARTIFICIAL (va_list_name) = 1;
2462 TYPE_NAME (va_list_type) = va_list_name;
2463 TYPE_STUB_DECL (va_list_type) = va_list_name;
2464 /* Create the __ap field. */
2465 ap_field = build_decl (BUILTINS_LOCATION,
2466 FIELD_DECL,
2467 get_identifier ("__ap"),
2468 ptr_type_node);
2469 DECL_ARTIFICIAL (ap_field) = 1;
2470 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2471 TYPE_FIELDS (va_list_type) = ap_field;
2472 /* Compute its layout. */
2473 layout_type (va_list_type);
2475 return va_list_type;
2478 /* Return an expression of type "void *" pointing to the next
2479 available argument in a variable-argument list. VALIST is the
2480 user-level va_list object, of type __builtin_va_list. */
2481 static tree
2482 arm_extract_valist_ptr (tree valist)
2484 if (TREE_TYPE (valist) == error_mark_node)
2485 return error_mark_node;
2487 /* On an AAPCS target, the pointer is stored within "struct
2488 va_list". */
2489 if (TARGET_AAPCS_BASED)
2491 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2492 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2493 valist, ap_field, NULL_TREE);
2496 return valist;
2499 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2500 static void
2501 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2503 valist = arm_extract_valist_ptr (valist);
2504 std_expand_builtin_va_start (valist, nextarg);
2507 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2508 static tree
2509 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2510 gimple_seq *post_p)
2512 valist = arm_extract_valist_ptr (valist);
2513 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2516 /* Fix up any incompatible options that the user has specified. */
2517 static void
2518 arm_option_override (void)
2520 if (global_options_set.x_arm_arch_option)
2521 arm_selected_arch = &all_architectures[arm_arch_option];
2523 if (global_options_set.x_arm_cpu_option)
2525 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2526 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2529 if (global_options_set.x_arm_tune_option)
2530 arm_selected_tune = &all_cores[(int) arm_tune_option];
2532 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2533 SUBTARGET_OVERRIDE_OPTIONS;
2534 #endif
2536 if (arm_selected_arch)
2538 if (arm_selected_cpu)
2540 /* Check for conflict between mcpu and march. */
2541 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2543 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2544 arm_selected_cpu->name, arm_selected_arch->name);
2545 /* -march wins for code generation.
2546 -mcpu wins for default tuning. */
2547 if (!arm_selected_tune)
2548 arm_selected_tune = arm_selected_cpu;
2550 arm_selected_cpu = arm_selected_arch;
2552 else
2553 /* -mcpu wins. */
2554 arm_selected_arch = NULL;
2556 else
2557 /* Pick a CPU based on the architecture. */
2558 arm_selected_cpu = arm_selected_arch;
2561 /* If the user did not specify a processor, choose one for them. */
2562 if (!arm_selected_cpu)
2564 const struct processors * sel;
2565 unsigned int sought;
2567 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2568 if (!arm_selected_cpu->name)
2570 #ifdef SUBTARGET_CPU_DEFAULT
2571 /* Use the subtarget default CPU if none was specified by
2572 configure. */
2573 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2574 #endif
2575 /* Default to ARM6. */
2576 if (!arm_selected_cpu->name)
2577 arm_selected_cpu = &all_cores[arm6];
2580 sel = arm_selected_cpu;
2581 insn_flags = sel->flags;
2583 /* Now check to see if the user has specified some command line
2584 switch that require certain abilities from the cpu. */
2585 sought = 0;
2587 if (TARGET_INTERWORK || TARGET_THUMB)
2589 sought |= (FL_THUMB | FL_MODE32);
2591 /* There are no ARM processors that support both APCS-26 and
2592 interworking. Therefore we force FL_MODE26 to be removed
2593 from insn_flags here (if it was set), so that the search
2594 below will always be able to find a compatible processor. */
2595 insn_flags &= ~FL_MODE26;
2598 if (sought != 0 && ((sought & insn_flags) != sought))
2600 /* Try to locate a CPU type that supports all of the abilities
2601 of the default CPU, plus the extra abilities requested by
2602 the user. */
2603 for (sel = all_cores; sel->name != NULL; sel++)
2604 if ((sel->flags & sought) == (sought | insn_flags))
2605 break;
2607 if (sel->name == NULL)
2609 unsigned current_bit_count = 0;
2610 const struct processors * best_fit = NULL;
2612 /* Ideally we would like to issue an error message here
2613 saying that it was not possible to find a CPU compatible
2614 with the default CPU, but which also supports the command
2615 line options specified by the programmer, and so they
2616 ought to use the -mcpu=<name> command line option to
2617 override the default CPU type.
2619 If we cannot find a cpu that has both the
2620 characteristics of the default cpu and the given
2621 command line options we scan the array again looking
2622 for a best match. */
2623 for (sel = all_cores; sel->name != NULL; sel++)
2624 if ((sel->flags & sought) == sought)
2626 unsigned count;
2628 count = bit_count (sel->flags & insn_flags);
2630 if (count >= current_bit_count)
2632 best_fit = sel;
2633 current_bit_count = count;
2637 gcc_assert (best_fit);
2638 sel = best_fit;
2641 arm_selected_cpu = sel;
2645 gcc_assert (arm_selected_cpu);
2646 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2647 if (!arm_selected_tune)
2648 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2650 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2651 insn_flags = arm_selected_cpu->flags;
2652 arm_base_arch = arm_selected_cpu->base_arch;
2654 arm_tune = arm_selected_tune->core;
2655 tune_flags = arm_selected_tune->flags;
2656 current_tune = arm_selected_tune->tune;
2658 /* Make sure that the processor choice does not conflict with any of the
2659 other command line choices. */
2660 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2661 error ("target CPU does not support ARM mode");
2663 /* BPABI targets use linker tricks to allow interworking on cores
2664 without thumb support. */
2665 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2667 warning (0, "target CPU does not support interworking" );
2668 target_flags &= ~MASK_INTERWORK;
2671 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2673 warning (0, "target CPU does not support THUMB instructions");
2674 target_flags &= ~MASK_THUMB;
2677 if (TARGET_APCS_FRAME && TARGET_THUMB)
2679 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2680 target_flags &= ~MASK_APCS_FRAME;
2683 /* Callee super interworking implies thumb interworking. Adding
2684 this to the flags here simplifies the logic elsewhere. */
2685 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2686 target_flags |= MASK_INTERWORK;
2688 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2689 from here where no function is being compiled currently. */
2690 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2691 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2693 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2694 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2696 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2698 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2699 target_flags |= MASK_APCS_FRAME;
2702 if (TARGET_POKE_FUNCTION_NAME)
2703 target_flags |= MASK_APCS_FRAME;
2705 if (TARGET_APCS_REENT && flag_pic)
2706 error ("-fpic and -mapcs-reent are incompatible");
2708 if (TARGET_APCS_REENT)
2709 warning (0, "APCS reentrant code not supported. Ignored");
2711 /* If this target is normally configured to use APCS frames, warn if they
2712 are turned off and debugging is turned on. */
2713 if (TARGET_ARM
2714 && write_symbols != NO_DEBUG
2715 && !TARGET_APCS_FRAME
2716 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2717 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2719 if (TARGET_APCS_FLOAT)
2720 warning (0, "passing floating point arguments in fp regs not yet supported");
2722 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2723 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2724 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2725 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2726 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2727 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2728 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2729 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2730 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2731 arm_arch6m = arm_arch6 && !arm_arch_notm;
2732 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2733 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2734 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2735 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2736 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2738 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2739 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2740 thumb_code = TARGET_ARM == 0;
2741 thumb1_code = TARGET_THUMB1 != 0;
2742 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2743 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2744 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2745 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2746 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2747 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2748 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2749 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2750 if (arm_restrict_it == 2)
2751 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2753 if (!TARGET_THUMB2)
2754 arm_restrict_it = 0;
2756 /* If we are not using the default (ARM mode) section anchor offset
2757 ranges, then set the correct ranges now. */
2758 if (TARGET_THUMB1)
2760 /* Thumb-1 LDR instructions cannot have negative offsets.
2761 Permissible positive offset ranges are 5-bit (for byte loads),
2762 6-bit (for halfword loads), or 7-bit (for word loads).
2763 Empirical results suggest a 7-bit anchor range gives the best
2764 overall code size. */
2765 targetm.min_anchor_offset = 0;
2766 targetm.max_anchor_offset = 127;
2768 else if (TARGET_THUMB2)
2770 /* The minimum is set such that the total size of the block
2771 for a particular anchor is 248 + 1 + 4095 bytes, which is
2772 divisible by eight, ensuring natural spacing of anchors. */
2773 targetm.min_anchor_offset = -248;
2774 targetm.max_anchor_offset = 4095;
2777 /* V5 code we generate is completely interworking capable, so we turn off
2778 TARGET_INTERWORK here to avoid many tests later on. */
2780 /* XXX However, we must pass the right pre-processor defines to CPP
2781 or GLD can get confused. This is a hack. */
2782 if (TARGET_INTERWORK)
2783 arm_cpp_interwork = 1;
2785 if (arm_arch5)
2786 target_flags &= ~MASK_INTERWORK;
2788 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2789 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2791 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2792 error ("iwmmxt abi requires an iwmmxt capable cpu");
2794 if (!global_options_set.x_arm_fpu_index)
2796 const char *target_fpu_name;
2797 bool ok;
2799 #ifdef FPUTYPE_DEFAULT
2800 target_fpu_name = FPUTYPE_DEFAULT;
2801 #else
2802 target_fpu_name = "vfp";
2803 #endif
2805 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2806 CL_TARGET);
2807 gcc_assert (ok);
2810 arm_fpu_desc = &all_fpus[arm_fpu_index];
2812 switch (arm_fpu_desc->model)
2814 case ARM_FP_MODEL_VFP:
2815 arm_fpu_attr = FPU_VFP;
2816 break;
2818 default:
2819 gcc_unreachable();
2822 if (TARGET_AAPCS_BASED)
2824 if (TARGET_CALLER_INTERWORKING)
2825 error ("AAPCS does not support -mcaller-super-interworking");
2826 else
2827 if (TARGET_CALLEE_INTERWORKING)
2828 error ("AAPCS does not support -mcallee-super-interworking");
2831 /* iWMMXt and NEON are incompatible. */
2832 if (TARGET_IWMMXT && TARGET_NEON)
2833 error ("iWMMXt and NEON are incompatible");
2835 /* iWMMXt unsupported under Thumb mode. */
2836 if (TARGET_THUMB && TARGET_IWMMXT)
2837 error ("iWMMXt unsupported under Thumb mode");
2839 /* __fp16 support currently assumes the core has ldrh. */
2840 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2841 sorry ("__fp16 and no ldrh");
2843 /* If soft-float is specified then don't use FPU. */
2844 if (TARGET_SOFT_FLOAT)
2845 arm_fpu_attr = FPU_NONE;
2847 if (TARGET_AAPCS_BASED)
2849 if (arm_abi == ARM_ABI_IWMMXT)
2850 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2851 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2852 && TARGET_HARD_FLOAT
2853 && TARGET_VFP)
2854 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2855 else
2856 arm_pcs_default = ARM_PCS_AAPCS;
2858 else
2860 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2861 sorry ("-mfloat-abi=hard and VFP");
2863 if (arm_abi == ARM_ABI_APCS)
2864 arm_pcs_default = ARM_PCS_APCS;
2865 else
2866 arm_pcs_default = ARM_PCS_ATPCS;
2869 /* For arm2/3 there is no need to do any scheduling if we are doing
2870 software floating-point. */
2871 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2872 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2874 /* Use the cp15 method if it is available. */
2875 if (target_thread_pointer == TP_AUTO)
2877 if (arm_arch6k && !TARGET_THUMB1)
2878 target_thread_pointer = TP_CP15;
2879 else
2880 target_thread_pointer = TP_SOFT;
2883 if (TARGET_HARD_TP && TARGET_THUMB1)
2884 error ("can not use -mtp=cp15 with 16-bit Thumb");
2886 /* Override the default structure alignment for AAPCS ABI. */
2887 if (!global_options_set.x_arm_structure_size_boundary)
2889 if (TARGET_AAPCS_BASED)
2890 arm_structure_size_boundary = 8;
2892 else
2894 if (arm_structure_size_boundary != 8
2895 && arm_structure_size_boundary != 32
2896 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2898 if (ARM_DOUBLEWORD_ALIGN)
2899 warning (0,
2900 "structure size boundary can only be set to 8, 32 or 64");
2901 else
2902 warning (0, "structure size boundary can only be set to 8 or 32");
2903 arm_structure_size_boundary
2904 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2908 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2910 error ("RTP PIC is incompatible with Thumb");
2911 flag_pic = 0;
2914 /* If stack checking is disabled, we can use r10 as the PIC register,
2915 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2916 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2918 if (TARGET_VXWORKS_RTP)
2919 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2920 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2923 if (flag_pic && TARGET_VXWORKS_RTP)
2924 arm_pic_register = 9;
2926 if (arm_pic_register_string != NULL)
2928 int pic_register = decode_reg_name (arm_pic_register_string);
2930 if (!flag_pic)
2931 warning (0, "-mpic-register= is useless without -fpic");
2933 /* Prevent the user from choosing an obviously stupid PIC register. */
2934 else if (pic_register < 0 || call_used_regs[pic_register]
2935 || pic_register == HARD_FRAME_POINTER_REGNUM
2936 || pic_register == STACK_POINTER_REGNUM
2937 || pic_register >= PC_REGNUM
2938 || (TARGET_VXWORKS_RTP
2939 && (unsigned int) pic_register != arm_pic_register))
2940 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2941 else
2942 arm_pic_register = pic_register;
2945 if (TARGET_VXWORKS_RTP
2946 && !global_options_set.x_arm_pic_data_is_text_relative)
2947 arm_pic_data_is_text_relative = 0;
2949 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2950 if (fix_cm3_ldrd == 2)
2952 if (arm_selected_cpu->core == cortexm3)
2953 fix_cm3_ldrd = 1;
2954 else
2955 fix_cm3_ldrd = 0;
2958 /* Enable -munaligned-access by default for
2959 - all ARMv6 architecture-based processors
2960 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2961 - ARMv8 architecture-base processors.
2963 Disable -munaligned-access by default for
2964 - all pre-ARMv6 architecture-based processors
2965 - ARMv6-M architecture-based processors. */
2967 if (unaligned_access == 2)
2969 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2970 unaligned_access = 1;
2971 else
2972 unaligned_access = 0;
2974 else if (unaligned_access == 1
2975 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2977 warning (0, "target CPU does not support unaligned accesses");
2978 unaligned_access = 0;
2981 if (TARGET_THUMB1 && flag_schedule_insns)
2983 /* Don't warn since it's on by default in -O2. */
2984 flag_schedule_insns = 0;
2987 if (optimize_size)
2989 /* If optimizing for size, bump the number of instructions that we
2990 are prepared to conditionally execute (even on a StrongARM). */
2991 max_insns_skipped = 6;
2993 else
2994 max_insns_skipped = current_tune->max_insns_skipped;
2996 /* Hot/Cold partitioning is not currently supported, since we can't
2997 handle literal pool placement in that case. */
2998 if (flag_reorder_blocks_and_partition)
3000 inform (input_location,
3001 "-freorder-blocks-and-partition not supported on this architecture");
3002 flag_reorder_blocks_and_partition = 0;
3003 flag_reorder_blocks = 1;
3006 if (flag_pic)
3007 /* Hoisting PIC address calculations more aggressively provides a small,
3008 but measurable, size reduction for PIC code. Therefore, we decrease
3009 the bar for unrestricted expression hoisting to the cost of PIC address
3010 calculation, which is 2 instructions. */
3011 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3012 global_options.x_param_values,
3013 global_options_set.x_param_values);
3015 /* ARM EABI defaults to strict volatile bitfields. */
3016 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3017 && abi_version_at_least(2))
3018 flag_strict_volatile_bitfields = 1;
3020 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3021 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3022 if (flag_prefetch_loop_arrays < 0
3023 && HAVE_prefetch
3024 && optimize >= 3
3025 && current_tune->num_prefetch_slots > 0)
3026 flag_prefetch_loop_arrays = 1;
3028 /* Set up parameters to be used in prefetching algorithm. Do not override the
3029 defaults unless we are tuning for a core we have researched values for. */
3030 if (current_tune->num_prefetch_slots > 0)
3031 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3032 current_tune->num_prefetch_slots,
3033 global_options.x_param_values,
3034 global_options_set.x_param_values);
3035 if (current_tune->l1_cache_line_size >= 0)
3036 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3037 current_tune->l1_cache_line_size,
3038 global_options.x_param_values,
3039 global_options_set.x_param_values);
3040 if (current_tune->l1_cache_size >= 0)
3041 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3042 current_tune->l1_cache_size,
3043 global_options.x_param_values,
3044 global_options_set.x_param_values);
3046 /* Use Neon to perform 64-bits operations rather than core
3047 registers. */
3048 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3049 if (use_neon_for_64bits == 1)
3050 prefer_neon_for_64bits = true;
3052 /* Use the alternative scheduling-pressure algorithm by default. */
3053 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3054 global_options.x_param_values,
3055 global_options_set.x_param_values);
3057 /* Disable shrink-wrap when optimizing function for size, since it tends to
3058 generate additional returns. */
3059 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3060 flag_shrink_wrap = false;
3061 /* TBD: Dwarf info for apcs frame is not handled yet. */
3062 if (TARGET_APCS_FRAME)
3063 flag_shrink_wrap = false;
3065 /* We only support -mslow-flash-data on armv7-m targets. */
3066 if (target_slow_flash_data
3067 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3068 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3069 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3071 /* Currently, for slow flash data, we just disable literal pools. */
3072 if (target_slow_flash_data)
3073 arm_disable_literal_pool = true;
3075 /* Register global variables with the garbage collector. */
3076 arm_add_gc_roots ();
3079 static void
3080 arm_add_gc_roots (void)
3082 gcc_obstack_init(&minipool_obstack);
3083 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3086 /* A table of known ARM exception types.
3087 For use with the interrupt function attribute. */
3089 typedef struct
3091 const char *const arg;
3092 const unsigned long return_value;
3094 isr_attribute_arg;
3096 static const isr_attribute_arg isr_attribute_args [] =
3098 { "IRQ", ARM_FT_ISR },
3099 { "irq", ARM_FT_ISR },
3100 { "FIQ", ARM_FT_FIQ },
3101 { "fiq", ARM_FT_FIQ },
3102 { "ABORT", ARM_FT_ISR },
3103 { "abort", ARM_FT_ISR },
3104 { "ABORT", ARM_FT_ISR },
3105 { "abort", ARM_FT_ISR },
3106 { "UNDEF", ARM_FT_EXCEPTION },
3107 { "undef", ARM_FT_EXCEPTION },
3108 { "SWI", ARM_FT_EXCEPTION },
3109 { "swi", ARM_FT_EXCEPTION },
3110 { NULL, ARM_FT_NORMAL }
3113 /* Returns the (interrupt) function type of the current
3114 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3116 static unsigned long
3117 arm_isr_value (tree argument)
3119 const isr_attribute_arg * ptr;
3120 const char * arg;
3122 if (!arm_arch_notm)
3123 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3125 /* No argument - default to IRQ. */
3126 if (argument == NULL_TREE)
3127 return ARM_FT_ISR;
3129 /* Get the value of the argument. */
3130 if (TREE_VALUE (argument) == NULL_TREE
3131 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3132 return ARM_FT_UNKNOWN;
3134 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3136 /* Check it against the list of known arguments. */
3137 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3138 if (streq (arg, ptr->arg))
3139 return ptr->return_value;
3141 /* An unrecognized interrupt type. */
3142 return ARM_FT_UNKNOWN;
3145 /* Computes the type of the current function. */
3147 static unsigned long
3148 arm_compute_func_type (void)
3150 unsigned long type = ARM_FT_UNKNOWN;
3151 tree a;
3152 tree attr;
3154 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3156 /* Decide if the current function is volatile. Such functions
3157 never return, and many memory cycles can be saved by not storing
3158 register values that will never be needed again. This optimization
3159 was added to speed up context switching in a kernel application. */
3160 if (optimize > 0
3161 && (TREE_NOTHROW (current_function_decl)
3162 || !(flag_unwind_tables
3163 || (flag_exceptions
3164 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3165 && TREE_THIS_VOLATILE (current_function_decl))
3166 type |= ARM_FT_VOLATILE;
3168 if (cfun->static_chain_decl != NULL)
3169 type |= ARM_FT_NESTED;
3171 attr = DECL_ATTRIBUTES (current_function_decl);
3173 a = lookup_attribute ("naked", attr);
3174 if (a != NULL_TREE)
3175 type |= ARM_FT_NAKED;
3177 a = lookup_attribute ("isr", attr);
3178 if (a == NULL_TREE)
3179 a = lookup_attribute ("interrupt", attr);
3181 if (a == NULL_TREE)
3182 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3183 else
3184 type |= arm_isr_value (TREE_VALUE (a));
3186 return type;
3189 /* Returns the type of the current function. */
3191 unsigned long
3192 arm_current_func_type (void)
3194 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3195 cfun->machine->func_type = arm_compute_func_type ();
3197 return cfun->machine->func_type;
3200 bool
3201 arm_allocate_stack_slots_for_args (void)
3203 /* Naked functions should not allocate stack slots for arguments. */
3204 return !IS_NAKED (arm_current_func_type ());
3207 static bool
3208 arm_warn_func_return (tree decl)
3210 /* Naked functions are implemented entirely in assembly, including the
3211 return sequence, so suppress warnings about this. */
3212 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3216 /* Output assembler code for a block containing the constant parts
3217 of a trampoline, leaving space for the variable parts.
3219 On the ARM, (if r8 is the static chain regnum, and remembering that
3220 referencing pc adds an offset of 8) the trampoline looks like:
3221 ldr r8, [pc, #0]
3222 ldr pc, [pc]
3223 .word static chain value
3224 .word function's address
3225 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3227 static void
3228 arm_asm_trampoline_template (FILE *f)
3230 if (TARGET_ARM)
3232 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3233 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3235 else if (TARGET_THUMB2)
3237 /* The Thumb-2 trampoline is similar to the arm implementation.
3238 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3239 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3240 STATIC_CHAIN_REGNUM, PC_REGNUM);
3241 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3243 else
3245 ASM_OUTPUT_ALIGN (f, 2);
3246 fprintf (f, "\t.code\t16\n");
3247 fprintf (f, ".Ltrampoline_start:\n");
3248 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3249 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3250 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3251 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3252 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3253 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3255 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3256 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3259 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3261 static void
3262 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3264 rtx fnaddr, mem, a_tramp;
3266 emit_block_move (m_tramp, assemble_trampoline_template (),
3267 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3269 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3270 emit_move_insn (mem, chain_value);
3272 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3273 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3274 emit_move_insn (mem, fnaddr);
3276 a_tramp = XEXP (m_tramp, 0);
3277 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3278 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3279 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3282 /* Thumb trampolines should be entered in thumb mode, so set
3283 the bottom bit of the address. */
3285 static rtx
3286 arm_trampoline_adjust_address (rtx addr)
3288 if (TARGET_THUMB)
3289 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3290 NULL, 0, OPTAB_LIB_WIDEN);
3291 return addr;
3294 /* Return 1 if it is possible to return using a single instruction.
3295 If SIBLING is non-null, this is a test for a return before a sibling
3296 call. SIBLING is the call insn, so we can examine its register usage. */
3299 use_return_insn (int iscond, rtx sibling)
3301 int regno;
3302 unsigned int func_type;
3303 unsigned long saved_int_regs;
3304 unsigned HOST_WIDE_INT stack_adjust;
3305 arm_stack_offsets *offsets;
3307 /* Never use a return instruction before reload has run. */
3308 if (!reload_completed)
3309 return 0;
3311 func_type = arm_current_func_type ();
3313 /* Naked, volatile and stack alignment functions need special
3314 consideration. */
3315 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3316 return 0;
3318 /* So do interrupt functions that use the frame pointer and Thumb
3319 interrupt functions. */
3320 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3321 return 0;
3323 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3324 && !optimize_function_for_size_p (cfun))
3325 return 0;
3327 offsets = arm_get_frame_offsets ();
3328 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3330 /* As do variadic functions. */
3331 if (crtl->args.pretend_args_size
3332 || cfun->machine->uses_anonymous_args
3333 /* Or if the function calls __builtin_eh_return () */
3334 || crtl->calls_eh_return
3335 /* Or if the function calls alloca */
3336 || cfun->calls_alloca
3337 /* Or if there is a stack adjustment. However, if the stack pointer
3338 is saved on the stack, we can use a pre-incrementing stack load. */
3339 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3340 && stack_adjust == 4)))
3341 return 0;
3343 saved_int_regs = offsets->saved_regs_mask;
3345 /* Unfortunately, the insn
3347 ldmib sp, {..., sp, ...}
3349 triggers a bug on most SA-110 based devices, such that the stack
3350 pointer won't be correctly restored if the instruction takes a
3351 page fault. We work around this problem by popping r3 along with
3352 the other registers, since that is never slower than executing
3353 another instruction.
3355 We test for !arm_arch5 here, because code for any architecture
3356 less than this could potentially be run on one of the buggy
3357 chips. */
3358 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3360 /* Validate that r3 is a call-clobbered register (always true in
3361 the default abi) ... */
3362 if (!call_used_regs[3])
3363 return 0;
3365 /* ... that it isn't being used for a return value ... */
3366 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3367 return 0;
3369 /* ... or for a tail-call argument ... */
3370 if (sibling)
3372 gcc_assert (CALL_P (sibling));
3374 if (find_regno_fusage (sibling, USE, 3))
3375 return 0;
3378 /* ... and that there are no call-saved registers in r0-r2
3379 (always true in the default ABI). */
3380 if (saved_int_regs & 0x7)
3381 return 0;
3384 /* Can't be done if interworking with Thumb, and any registers have been
3385 stacked. */
3386 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3387 return 0;
3389 /* On StrongARM, conditional returns are expensive if they aren't
3390 taken and multiple registers have been stacked. */
3391 if (iscond && arm_tune_strongarm)
3393 /* Conditional return when just the LR is stored is a simple
3394 conditional-load instruction, that's not expensive. */
3395 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3396 return 0;
3398 if (flag_pic
3399 && arm_pic_register != INVALID_REGNUM
3400 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3401 return 0;
3404 /* If there are saved registers but the LR isn't saved, then we need
3405 two instructions for the return. */
3406 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3407 return 0;
3409 /* Can't be done if any of the VFP regs are pushed,
3410 since this also requires an insn. */
3411 if (TARGET_HARD_FLOAT && TARGET_VFP)
3412 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3413 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3414 return 0;
3416 if (TARGET_REALLY_IWMMXT)
3417 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3418 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3419 return 0;
3421 return 1;
3424 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3425 shrink-wrapping if possible. This is the case if we need to emit a
3426 prologue, which we can test by looking at the offsets. */
3427 bool
3428 use_simple_return_p (void)
3430 arm_stack_offsets *offsets;
3432 offsets = arm_get_frame_offsets ();
3433 return offsets->outgoing_args != 0;
3436 /* Return TRUE if int I is a valid immediate ARM constant. */
3439 const_ok_for_arm (HOST_WIDE_INT i)
3441 int lowbit;
3443 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3444 be all zero, or all one. */
3445 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3446 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3447 != ((~(unsigned HOST_WIDE_INT) 0)
3448 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3449 return FALSE;
3451 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3453 /* Fast return for 0 and small values. We must do this for zero, since
3454 the code below can't handle that one case. */
3455 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3456 return TRUE;
3458 /* Get the number of trailing zeros. */
3459 lowbit = ffs((int) i) - 1;
3461 /* Only even shifts are allowed in ARM mode so round down to the
3462 nearest even number. */
3463 if (TARGET_ARM)
3464 lowbit &= ~1;
3466 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3467 return TRUE;
3469 if (TARGET_ARM)
3471 /* Allow rotated constants in ARM mode. */
3472 if (lowbit <= 4
3473 && ((i & ~0xc000003f) == 0
3474 || (i & ~0xf000000f) == 0
3475 || (i & ~0xfc000003) == 0))
3476 return TRUE;
3478 else
3480 HOST_WIDE_INT v;
3482 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3483 v = i & 0xff;
3484 v |= v << 16;
3485 if (i == v || i == (v | (v << 8)))
3486 return TRUE;
3488 /* Allow repeated pattern 0xXY00XY00. */
3489 v = i & 0xff00;
3490 v |= v << 16;
3491 if (i == v)
3492 return TRUE;
3495 return FALSE;
3498 /* Return true if I is a valid constant for the operation CODE. */
3500 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3502 if (const_ok_for_arm (i))
3503 return 1;
3505 switch (code)
3507 case SET:
3508 /* See if we can use movw. */
3509 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3510 return 1;
3511 else
3512 /* Otherwise, try mvn. */
3513 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3515 case PLUS:
3516 /* See if we can use addw or subw. */
3517 if (TARGET_THUMB2
3518 && ((i & 0xfffff000) == 0
3519 || ((-i) & 0xfffff000) == 0))
3520 return 1;
3521 /* else fall through. */
3523 case COMPARE:
3524 case EQ:
3525 case NE:
3526 case GT:
3527 case LE:
3528 case LT:
3529 case GE:
3530 case GEU:
3531 case LTU:
3532 case GTU:
3533 case LEU:
3534 case UNORDERED:
3535 case ORDERED:
3536 case UNEQ:
3537 case UNGE:
3538 case UNLT:
3539 case UNGT:
3540 case UNLE:
3541 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3543 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3544 case XOR:
3545 return 0;
3547 case IOR:
3548 if (TARGET_THUMB2)
3549 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3550 return 0;
3552 case AND:
3553 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3555 default:
3556 gcc_unreachable ();
3560 /* Return true if I is a valid di mode constant for the operation CODE. */
3562 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3564 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3565 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3566 rtx hi = GEN_INT (hi_val);
3567 rtx lo = GEN_INT (lo_val);
3569 if (TARGET_THUMB1)
3570 return 0;
3572 switch (code)
3574 case AND:
3575 case IOR:
3576 case XOR:
3577 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3578 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3579 case PLUS:
3580 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3582 default:
3583 return 0;
3587 /* Emit a sequence of insns to handle a large constant.
3588 CODE is the code of the operation required, it can be any of SET, PLUS,
3589 IOR, AND, XOR, MINUS;
3590 MODE is the mode in which the operation is being performed;
3591 VAL is the integer to operate on;
3592 SOURCE is the other operand (a register, or a null-pointer for SET);
3593 SUBTARGETS means it is safe to create scratch registers if that will
3594 either produce a simpler sequence, or we will want to cse the values.
3595 Return value is the number of insns emitted. */
3597 /* ??? Tweak this for thumb2. */
3599 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3600 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3602 rtx cond;
3604 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3605 cond = COND_EXEC_TEST (PATTERN (insn));
3606 else
3607 cond = NULL_RTX;
3609 if (subtargets || code == SET
3610 || (REG_P (target) && REG_P (source)
3611 && REGNO (target) != REGNO (source)))
3613 /* After arm_reorg has been called, we can't fix up expensive
3614 constants by pushing them into memory so we must synthesize
3615 them in-line, regardless of the cost. This is only likely to
3616 be more costly on chips that have load delay slots and we are
3617 compiling without running the scheduler (so no splitting
3618 occurred before the final instruction emission).
3620 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3622 if (!cfun->machine->after_arm_reorg
3623 && !cond
3624 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3625 1, 0)
3626 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3627 + (code != SET))))
3629 if (code == SET)
3631 /* Currently SET is the only monadic value for CODE, all
3632 the rest are diadic. */
3633 if (TARGET_USE_MOVT)
3634 arm_emit_movpair (target, GEN_INT (val));
3635 else
3636 emit_set_insn (target, GEN_INT (val));
3638 return 1;
3640 else
3642 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3644 if (TARGET_USE_MOVT)
3645 arm_emit_movpair (temp, GEN_INT (val));
3646 else
3647 emit_set_insn (temp, GEN_INT (val));
3649 /* For MINUS, the value is subtracted from, since we never
3650 have subtraction of a constant. */
3651 if (code == MINUS)
3652 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3653 else
3654 emit_set_insn (target,
3655 gen_rtx_fmt_ee (code, mode, source, temp));
3656 return 2;
3661 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3665 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3666 ARM/THUMB2 immediates, and add up to VAL.
3667 Thr function return value gives the number of insns required. */
3668 static int
3669 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3670 struct four_ints *return_sequence)
3672 int best_consecutive_zeros = 0;
3673 int i;
3674 int best_start = 0;
3675 int insns1, insns2;
3676 struct four_ints tmp_sequence;
3678 /* If we aren't targeting ARM, the best place to start is always at
3679 the bottom, otherwise look more closely. */
3680 if (TARGET_ARM)
3682 for (i = 0; i < 32; i += 2)
3684 int consecutive_zeros = 0;
3686 if (!(val & (3 << i)))
3688 while ((i < 32) && !(val & (3 << i)))
3690 consecutive_zeros += 2;
3691 i += 2;
3693 if (consecutive_zeros > best_consecutive_zeros)
3695 best_consecutive_zeros = consecutive_zeros;
3696 best_start = i - consecutive_zeros;
3698 i -= 2;
3703 /* So long as it won't require any more insns to do so, it's
3704 desirable to emit a small constant (in bits 0...9) in the last
3705 insn. This way there is more chance that it can be combined with
3706 a later addressing insn to form a pre-indexed load or store
3707 operation. Consider:
3709 *((volatile int *)0xe0000100) = 1;
3710 *((volatile int *)0xe0000110) = 2;
3712 We want this to wind up as:
3714 mov rA, #0xe0000000
3715 mov rB, #1
3716 str rB, [rA, #0x100]
3717 mov rB, #2
3718 str rB, [rA, #0x110]
3720 rather than having to synthesize both large constants from scratch.
3722 Therefore, we calculate how many insns would be required to emit
3723 the constant starting from `best_start', and also starting from
3724 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3725 yield a shorter sequence, we may as well use zero. */
3726 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3727 if (best_start != 0
3728 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3730 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3731 if (insns2 <= insns1)
3733 *return_sequence = tmp_sequence;
3734 insns1 = insns2;
3738 return insns1;
3741 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3742 static int
3743 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3744 struct four_ints *return_sequence, int i)
3746 int remainder = val & 0xffffffff;
3747 int insns = 0;
3749 /* Try and find a way of doing the job in either two or three
3750 instructions.
3752 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3753 location. We start at position I. This may be the MSB, or
3754 optimial_immediate_sequence may have positioned it at the largest block
3755 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3756 wrapping around to the top of the word when we drop off the bottom.
3757 In the worst case this code should produce no more than four insns.
3759 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3760 constants, shifted to any arbitrary location. We should always start
3761 at the MSB. */
3764 int end;
3765 unsigned int b1, b2, b3, b4;
3766 unsigned HOST_WIDE_INT result;
3767 int loc;
3769 gcc_assert (insns < 4);
3771 if (i <= 0)
3772 i += 32;
3774 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3775 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3777 loc = i;
3778 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3779 /* We can use addw/subw for the last 12 bits. */
3780 result = remainder;
3781 else
3783 /* Use an 8-bit shifted/rotated immediate. */
3784 end = i - 8;
3785 if (end < 0)
3786 end += 32;
3787 result = remainder & ((0x0ff << end)
3788 | ((i < end) ? (0xff >> (32 - end))
3789 : 0));
3790 i -= 8;
3793 else
3795 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3796 arbitrary shifts. */
3797 i -= TARGET_ARM ? 2 : 1;
3798 continue;
3801 /* Next, see if we can do a better job with a thumb2 replicated
3802 constant.
3804 We do it this way around to catch the cases like 0x01F001E0 where
3805 two 8-bit immediates would work, but a replicated constant would
3806 make it worse.
3808 TODO: 16-bit constants that don't clear all the bits, but still win.
3809 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3810 if (TARGET_THUMB2)
3812 b1 = (remainder & 0xff000000) >> 24;
3813 b2 = (remainder & 0x00ff0000) >> 16;
3814 b3 = (remainder & 0x0000ff00) >> 8;
3815 b4 = remainder & 0xff;
3817 if (loc > 24)
3819 /* The 8-bit immediate already found clears b1 (and maybe b2),
3820 but must leave b3 and b4 alone. */
3822 /* First try to find a 32-bit replicated constant that clears
3823 almost everything. We can assume that we can't do it in one,
3824 or else we wouldn't be here. */
3825 unsigned int tmp = b1 & b2 & b3 & b4;
3826 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3827 + (tmp << 24);
3828 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3829 + (tmp == b3) + (tmp == b4);
3830 if (tmp
3831 && (matching_bytes >= 3
3832 || (matching_bytes == 2
3833 && const_ok_for_op (remainder & ~tmp2, code))))
3835 /* At least 3 of the bytes match, and the fourth has at
3836 least as many bits set, or two of the bytes match
3837 and it will only require one more insn to finish. */
3838 result = tmp2;
3839 i = tmp != b1 ? 32
3840 : tmp != b2 ? 24
3841 : tmp != b3 ? 16
3842 : 8;
3845 /* Second, try to find a 16-bit replicated constant that can
3846 leave three of the bytes clear. If b2 or b4 is already
3847 zero, then we can. If the 8-bit from above would not
3848 clear b2 anyway, then we still win. */
3849 else if (b1 == b3 && (!b2 || !b4
3850 || (remainder & 0x00ff0000 & ~result)))
3852 result = remainder & 0xff00ff00;
3853 i = 24;
3856 else if (loc > 16)
3858 /* The 8-bit immediate already found clears b2 (and maybe b3)
3859 and we don't get here unless b1 is alredy clear, but it will
3860 leave b4 unchanged. */
3862 /* If we can clear b2 and b4 at once, then we win, since the
3863 8-bits couldn't possibly reach that far. */
3864 if (b2 == b4)
3866 result = remainder & 0x00ff00ff;
3867 i = 16;
3872 return_sequence->i[insns++] = result;
3873 remainder &= ~result;
3875 if (code == SET || code == MINUS)
3876 code = PLUS;
3878 while (remainder);
3880 return insns;
3883 /* Emit an instruction with the indicated PATTERN. If COND is
3884 non-NULL, conditionalize the execution of the instruction on COND
3885 being true. */
3887 static void
3888 emit_constant_insn (rtx cond, rtx pattern)
3890 if (cond)
3891 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3892 emit_insn (pattern);
3895 /* As above, but extra parameter GENERATE which, if clear, suppresses
3896 RTL generation. */
3898 static int
3899 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3900 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3901 int generate)
3903 int can_invert = 0;
3904 int can_negate = 0;
3905 int final_invert = 0;
3906 int i;
3907 int set_sign_bit_copies = 0;
3908 int clear_sign_bit_copies = 0;
3909 int clear_zero_bit_copies = 0;
3910 int set_zero_bit_copies = 0;
3911 int insns = 0, neg_insns, inv_insns;
3912 unsigned HOST_WIDE_INT temp1, temp2;
3913 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3914 struct four_ints *immediates;
3915 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3917 /* Find out which operations are safe for a given CODE. Also do a quick
3918 check for degenerate cases; these can occur when DImode operations
3919 are split. */
3920 switch (code)
3922 case SET:
3923 can_invert = 1;
3924 break;
3926 case PLUS:
3927 can_negate = 1;
3928 break;
3930 case IOR:
3931 if (remainder == 0xffffffff)
3933 if (generate)
3934 emit_constant_insn (cond,
3935 gen_rtx_SET (VOIDmode, target,
3936 GEN_INT (ARM_SIGN_EXTEND (val))));
3937 return 1;
3940 if (remainder == 0)
3942 if (reload_completed && rtx_equal_p (target, source))
3943 return 0;
3945 if (generate)
3946 emit_constant_insn (cond,
3947 gen_rtx_SET (VOIDmode, target, source));
3948 return 1;
3950 break;
3952 case AND:
3953 if (remainder == 0)
3955 if (generate)
3956 emit_constant_insn (cond,
3957 gen_rtx_SET (VOIDmode, target, const0_rtx));
3958 return 1;
3960 if (remainder == 0xffffffff)
3962 if (reload_completed && rtx_equal_p (target, source))
3963 return 0;
3964 if (generate)
3965 emit_constant_insn (cond,
3966 gen_rtx_SET (VOIDmode, target, source));
3967 return 1;
3969 can_invert = 1;
3970 break;
3972 case XOR:
3973 if (remainder == 0)
3975 if (reload_completed && rtx_equal_p (target, source))
3976 return 0;
3977 if (generate)
3978 emit_constant_insn (cond,
3979 gen_rtx_SET (VOIDmode, target, source));
3980 return 1;
3983 if (remainder == 0xffffffff)
3985 if (generate)
3986 emit_constant_insn (cond,
3987 gen_rtx_SET (VOIDmode, target,
3988 gen_rtx_NOT (mode, source)));
3989 return 1;
3991 final_invert = 1;
3992 break;
3994 case MINUS:
3995 /* We treat MINUS as (val - source), since (source - val) is always
3996 passed as (source + (-val)). */
3997 if (remainder == 0)
3999 if (generate)
4000 emit_constant_insn (cond,
4001 gen_rtx_SET (VOIDmode, target,
4002 gen_rtx_NEG (mode, source)));
4003 return 1;
4005 if (const_ok_for_arm (val))
4007 if (generate)
4008 emit_constant_insn (cond,
4009 gen_rtx_SET (VOIDmode, target,
4010 gen_rtx_MINUS (mode, GEN_INT (val),
4011 source)));
4012 return 1;
4015 break;
4017 default:
4018 gcc_unreachable ();
4021 /* If we can do it in one insn get out quickly. */
4022 if (const_ok_for_op (val, code))
4024 if (generate)
4025 emit_constant_insn (cond,
4026 gen_rtx_SET (VOIDmode, target,
4027 (source
4028 ? gen_rtx_fmt_ee (code, mode, source,
4029 GEN_INT (val))
4030 : GEN_INT (val))));
4031 return 1;
4034 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4035 insn. */
4036 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4037 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4039 if (generate)
4041 if (mode == SImode && i == 16)
4042 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4043 smaller insn. */
4044 emit_constant_insn (cond,
4045 gen_zero_extendhisi2
4046 (target, gen_lowpart (HImode, source)));
4047 else
4048 /* Extz only supports SImode, but we can coerce the operands
4049 into that mode. */
4050 emit_constant_insn (cond,
4051 gen_extzv_t2 (gen_lowpart (SImode, target),
4052 gen_lowpart (SImode, source),
4053 GEN_INT (i), const0_rtx));
4056 return 1;
4059 /* Calculate a few attributes that may be useful for specific
4060 optimizations. */
4061 /* Count number of leading zeros. */
4062 for (i = 31; i >= 0; i--)
4064 if ((remainder & (1 << i)) == 0)
4065 clear_sign_bit_copies++;
4066 else
4067 break;
4070 /* Count number of leading 1's. */
4071 for (i = 31; i >= 0; i--)
4073 if ((remainder & (1 << i)) != 0)
4074 set_sign_bit_copies++;
4075 else
4076 break;
4079 /* Count number of trailing zero's. */
4080 for (i = 0; i <= 31; i++)
4082 if ((remainder & (1 << i)) == 0)
4083 clear_zero_bit_copies++;
4084 else
4085 break;
4088 /* Count number of trailing 1's. */
4089 for (i = 0; i <= 31; i++)
4091 if ((remainder & (1 << i)) != 0)
4092 set_zero_bit_copies++;
4093 else
4094 break;
4097 switch (code)
4099 case SET:
4100 /* See if we can do this by sign_extending a constant that is known
4101 to be negative. This is a good, way of doing it, since the shift
4102 may well merge into a subsequent insn. */
4103 if (set_sign_bit_copies > 1)
4105 if (const_ok_for_arm
4106 (temp1 = ARM_SIGN_EXTEND (remainder
4107 << (set_sign_bit_copies - 1))))
4109 if (generate)
4111 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4112 emit_constant_insn (cond,
4113 gen_rtx_SET (VOIDmode, new_src,
4114 GEN_INT (temp1)));
4115 emit_constant_insn (cond,
4116 gen_ashrsi3 (target, new_src,
4117 GEN_INT (set_sign_bit_copies - 1)));
4119 return 2;
4121 /* For an inverted constant, we will need to set the low bits,
4122 these will be shifted out of harm's way. */
4123 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4124 if (const_ok_for_arm (~temp1))
4126 if (generate)
4128 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4129 emit_constant_insn (cond,
4130 gen_rtx_SET (VOIDmode, new_src,
4131 GEN_INT (temp1)));
4132 emit_constant_insn (cond,
4133 gen_ashrsi3 (target, new_src,
4134 GEN_INT (set_sign_bit_copies - 1)));
4136 return 2;
4140 /* See if we can calculate the value as the difference between two
4141 valid immediates. */
4142 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4144 int topshift = clear_sign_bit_copies & ~1;
4146 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4147 & (0xff000000 >> topshift));
4149 /* If temp1 is zero, then that means the 9 most significant
4150 bits of remainder were 1 and we've caused it to overflow.
4151 When topshift is 0 we don't need to do anything since we
4152 can borrow from 'bit 32'. */
4153 if (temp1 == 0 && topshift != 0)
4154 temp1 = 0x80000000 >> (topshift - 1);
4156 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4158 if (const_ok_for_arm (temp2))
4160 if (generate)
4162 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4163 emit_constant_insn (cond,
4164 gen_rtx_SET (VOIDmode, new_src,
4165 GEN_INT (temp1)));
4166 emit_constant_insn (cond,
4167 gen_addsi3 (target, new_src,
4168 GEN_INT (-temp2)));
4171 return 2;
4175 /* See if we can generate this by setting the bottom (or the top)
4176 16 bits, and then shifting these into the other half of the
4177 word. We only look for the simplest cases, to do more would cost
4178 too much. Be careful, however, not to generate this when the
4179 alternative would take fewer insns. */
4180 if (val & 0xffff0000)
4182 temp1 = remainder & 0xffff0000;
4183 temp2 = remainder & 0x0000ffff;
4185 /* Overlaps outside this range are best done using other methods. */
4186 for (i = 9; i < 24; i++)
4188 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4189 && !const_ok_for_arm (temp2))
4191 rtx new_src = (subtargets
4192 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4193 : target);
4194 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4195 source, subtargets, generate);
4196 source = new_src;
4197 if (generate)
4198 emit_constant_insn
4199 (cond,
4200 gen_rtx_SET
4201 (VOIDmode, target,
4202 gen_rtx_IOR (mode,
4203 gen_rtx_ASHIFT (mode, source,
4204 GEN_INT (i)),
4205 source)));
4206 return insns + 1;
4210 /* Don't duplicate cases already considered. */
4211 for (i = 17; i < 24; i++)
4213 if (((temp1 | (temp1 >> i)) == remainder)
4214 && !const_ok_for_arm (temp1))
4216 rtx new_src = (subtargets
4217 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4218 : target);
4219 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4220 source, subtargets, generate);
4221 source = new_src;
4222 if (generate)
4223 emit_constant_insn
4224 (cond,
4225 gen_rtx_SET (VOIDmode, target,
4226 gen_rtx_IOR
4227 (mode,
4228 gen_rtx_LSHIFTRT (mode, source,
4229 GEN_INT (i)),
4230 source)));
4231 return insns + 1;
4235 break;
4237 case IOR:
4238 case XOR:
4239 /* If we have IOR or XOR, and the constant can be loaded in a
4240 single instruction, and we can find a temporary to put it in,
4241 then this can be done in two instructions instead of 3-4. */
4242 if (subtargets
4243 /* TARGET can't be NULL if SUBTARGETS is 0 */
4244 || (reload_completed && !reg_mentioned_p (target, source)))
4246 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4248 if (generate)
4250 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4252 emit_constant_insn (cond,
4253 gen_rtx_SET (VOIDmode, sub,
4254 GEN_INT (val)));
4255 emit_constant_insn (cond,
4256 gen_rtx_SET (VOIDmode, target,
4257 gen_rtx_fmt_ee (code, mode,
4258 source, sub)));
4260 return 2;
4264 if (code == XOR)
4265 break;
4267 /* Convert.
4268 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4269 and the remainder 0s for e.g. 0xfff00000)
4270 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4272 This can be done in 2 instructions by using shifts with mov or mvn.
4273 e.g. for
4274 x = x | 0xfff00000;
4275 we generate.
4276 mvn r0, r0, asl #12
4277 mvn r0, r0, lsr #12 */
4278 if (set_sign_bit_copies > 8
4279 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4281 if (generate)
4283 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4284 rtx shift = GEN_INT (set_sign_bit_copies);
4286 emit_constant_insn
4287 (cond,
4288 gen_rtx_SET (VOIDmode, sub,
4289 gen_rtx_NOT (mode,
4290 gen_rtx_ASHIFT (mode,
4291 source,
4292 shift))));
4293 emit_constant_insn
4294 (cond,
4295 gen_rtx_SET (VOIDmode, target,
4296 gen_rtx_NOT (mode,
4297 gen_rtx_LSHIFTRT (mode, sub,
4298 shift))));
4300 return 2;
4303 /* Convert
4304 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4306 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4308 For eg. r0 = r0 | 0xfff
4309 mvn r0, r0, lsr #12
4310 mvn r0, r0, asl #12
4313 if (set_zero_bit_copies > 8
4314 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4316 if (generate)
4318 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4319 rtx shift = GEN_INT (set_zero_bit_copies);
4321 emit_constant_insn
4322 (cond,
4323 gen_rtx_SET (VOIDmode, sub,
4324 gen_rtx_NOT (mode,
4325 gen_rtx_LSHIFTRT (mode,
4326 source,
4327 shift))));
4328 emit_constant_insn
4329 (cond,
4330 gen_rtx_SET (VOIDmode, target,
4331 gen_rtx_NOT (mode,
4332 gen_rtx_ASHIFT (mode, sub,
4333 shift))));
4335 return 2;
4338 /* This will never be reached for Thumb2 because orn is a valid
4339 instruction. This is for Thumb1 and the ARM 32 bit cases.
4341 x = y | constant (such that ~constant is a valid constant)
4342 Transform this to
4343 x = ~(~y & ~constant).
4345 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4347 if (generate)
4349 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4350 emit_constant_insn (cond,
4351 gen_rtx_SET (VOIDmode, sub,
4352 gen_rtx_NOT (mode, source)));
4353 source = sub;
4354 if (subtargets)
4355 sub = gen_reg_rtx (mode);
4356 emit_constant_insn (cond,
4357 gen_rtx_SET (VOIDmode, sub,
4358 gen_rtx_AND (mode, source,
4359 GEN_INT (temp1))));
4360 emit_constant_insn (cond,
4361 gen_rtx_SET (VOIDmode, target,
4362 gen_rtx_NOT (mode, sub)));
4364 return 3;
4366 break;
4368 case AND:
4369 /* See if two shifts will do 2 or more insn's worth of work. */
4370 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4372 HOST_WIDE_INT shift_mask = ((0xffffffff
4373 << (32 - clear_sign_bit_copies))
4374 & 0xffffffff);
4376 if ((remainder | shift_mask) != 0xffffffff)
4378 if (generate)
4380 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4381 insns = arm_gen_constant (AND, mode, cond,
4382 remainder | shift_mask,
4383 new_src, source, subtargets, 1);
4384 source = new_src;
4386 else
4388 rtx targ = subtargets ? NULL_RTX : target;
4389 insns = arm_gen_constant (AND, mode, cond,
4390 remainder | shift_mask,
4391 targ, source, subtargets, 0);
4395 if (generate)
4397 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4398 rtx shift = GEN_INT (clear_sign_bit_copies);
4400 emit_insn (gen_ashlsi3 (new_src, source, shift));
4401 emit_insn (gen_lshrsi3 (target, new_src, shift));
4404 return insns + 2;
4407 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4409 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4411 if ((remainder | shift_mask) != 0xffffffff)
4413 if (generate)
4415 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4417 insns = arm_gen_constant (AND, mode, cond,
4418 remainder | shift_mask,
4419 new_src, source, subtargets, 1);
4420 source = new_src;
4422 else
4424 rtx targ = subtargets ? NULL_RTX : target;
4426 insns = arm_gen_constant (AND, mode, cond,
4427 remainder | shift_mask,
4428 targ, source, subtargets, 0);
4432 if (generate)
4434 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4435 rtx shift = GEN_INT (clear_zero_bit_copies);
4437 emit_insn (gen_lshrsi3 (new_src, source, shift));
4438 emit_insn (gen_ashlsi3 (target, new_src, shift));
4441 return insns + 2;
4444 break;
4446 default:
4447 break;
4450 /* Calculate what the instruction sequences would be if we generated it
4451 normally, negated, or inverted. */
4452 if (code == AND)
4453 /* AND cannot be split into multiple insns, so invert and use BIC. */
4454 insns = 99;
4455 else
4456 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4458 if (can_negate)
4459 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4460 &neg_immediates);
4461 else
4462 neg_insns = 99;
4464 if (can_invert || final_invert)
4465 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4466 &inv_immediates);
4467 else
4468 inv_insns = 99;
4470 immediates = &pos_immediates;
4472 /* Is the negated immediate sequence more efficient? */
4473 if (neg_insns < insns && neg_insns <= inv_insns)
4475 insns = neg_insns;
4476 immediates = &neg_immediates;
4478 else
4479 can_negate = 0;
4481 /* Is the inverted immediate sequence more efficient?
4482 We must allow for an extra NOT instruction for XOR operations, although
4483 there is some chance that the final 'mvn' will get optimized later. */
4484 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4486 insns = inv_insns;
4487 immediates = &inv_immediates;
4489 else
4491 can_invert = 0;
4492 final_invert = 0;
4495 /* Now output the chosen sequence as instructions. */
4496 if (generate)
4498 for (i = 0; i < insns; i++)
4500 rtx new_src, temp1_rtx;
4502 temp1 = immediates->i[i];
4504 if (code == SET || code == MINUS)
4505 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4506 else if ((final_invert || i < (insns - 1)) && subtargets)
4507 new_src = gen_reg_rtx (mode);
4508 else
4509 new_src = target;
4511 if (can_invert)
4512 temp1 = ~temp1;
4513 else if (can_negate)
4514 temp1 = -temp1;
4516 temp1 = trunc_int_for_mode (temp1, mode);
4517 temp1_rtx = GEN_INT (temp1);
4519 if (code == SET)
4521 else if (code == MINUS)
4522 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4523 else
4524 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4526 emit_constant_insn (cond,
4527 gen_rtx_SET (VOIDmode, new_src,
4528 temp1_rtx));
4529 source = new_src;
4531 if (code == SET)
4533 can_negate = can_invert;
4534 can_invert = 0;
4535 code = PLUS;
4537 else if (code == MINUS)
4538 code = PLUS;
4542 if (final_invert)
4544 if (generate)
4545 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4546 gen_rtx_NOT (mode, source)));
4547 insns++;
4550 return insns;
4553 /* Canonicalize a comparison so that we are more likely to recognize it.
4554 This can be done for a few constant compares, where we can make the
4555 immediate value easier to load. */
4557 static void
4558 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4559 bool op0_preserve_value)
4561 enum machine_mode mode;
4562 unsigned HOST_WIDE_INT i, maxval;
4564 mode = GET_MODE (*op0);
4565 if (mode == VOIDmode)
4566 mode = GET_MODE (*op1);
4568 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4570 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4571 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4572 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4573 for GTU/LEU in Thumb mode. */
4574 if (mode == DImode)
4576 rtx tem;
4578 if (*code == GT || *code == LE
4579 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4581 /* Missing comparison. First try to use an available
4582 comparison. */
4583 if (CONST_INT_P (*op1))
4585 i = INTVAL (*op1);
4586 switch (*code)
4588 case GT:
4589 case LE:
4590 if (i != maxval
4591 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4593 *op1 = GEN_INT (i + 1);
4594 *code = *code == GT ? GE : LT;
4595 return;
4597 break;
4598 case GTU:
4599 case LEU:
4600 if (i != ~((unsigned HOST_WIDE_INT) 0)
4601 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4603 *op1 = GEN_INT (i + 1);
4604 *code = *code == GTU ? GEU : LTU;
4605 return;
4607 break;
4608 default:
4609 gcc_unreachable ();
4613 /* If that did not work, reverse the condition. */
4614 if (!op0_preserve_value)
4616 tem = *op0;
4617 *op0 = *op1;
4618 *op1 = tem;
4619 *code = (int)swap_condition ((enum rtx_code)*code);
4622 return;
4625 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4626 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4627 to facilitate possible combining with a cmp into 'ands'. */
4628 if (mode == SImode
4629 && GET_CODE (*op0) == ZERO_EXTEND
4630 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4631 && GET_MODE (XEXP (*op0, 0)) == QImode
4632 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4633 && subreg_lowpart_p (XEXP (*op0, 0))
4634 && *op1 == const0_rtx)
4635 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4636 GEN_INT (255));
4638 /* Comparisons smaller than DImode. Only adjust comparisons against
4639 an out-of-range constant. */
4640 if (!CONST_INT_P (*op1)
4641 || const_ok_for_arm (INTVAL (*op1))
4642 || const_ok_for_arm (- INTVAL (*op1)))
4643 return;
4645 i = INTVAL (*op1);
4647 switch (*code)
4649 case EQ:
4650 case NE:
4651 return;
4653 case GT:
4654 case LE:
4655 if (i != maxval
4656 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4658 *op1 = GEN_INT (i + 1);
4659 *code = *code == GT ? GE : LT;
4660 return;
4662 break;
4664 case GE:
4665 case LT:
4666 if (i != ~maxval
4667 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4669 *op1 = GEN_INT (i - 1);
4670 *code = *code == GE ? GT : LE;
4671 return;
4673 break;
4675 case GTU:
4676 case LEU:
4677 if (i != ~((unsigned HOST_WIDE_INT) 0)
4678 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4680 *op1 = GEN_INT (i + 1);
4681 *code = *code == GTU ? GEU : LTU;
4682 return;
4684 break;
4686 case GEU:
4687 case LTU:
4688 if (i != 0
4689 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4691 *op1 = GEN_INT (i - 1);
4692 *code = *code == GEU ? GTU : LEU;
4693 return;
4695 break;
4697 default:
4698 gcc_unreachable ();
4703 /* Define how to find the value returned by a function. */
4705 static rtx
4706 arm_function_value(const_tree type, const_tree func,
4707 bool outgoing ATTRIBUTE_UNUSED)
4709 enum machine_mode mode;
4710 int unsignedp ATTRIBUTE_UNUSED;
4711 rtx r ATTRIBUTE_UNUSED;
4713 mode = TYPE_MODE (type);
4715 if (TARGET_AAPCS_BASED)
4716 return aapcs_allocate_return_reg (mode, type, func);
4718 /* Promote integer types. */
4719 if (INTEGRAL_TYPE_P (type))
4720 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4722 /* Promotes small structs returned in a register to full-word size
4723 for big-endian AAPCS. */
4724 if (arm_return_in_msb (type))
4726 HOST_WIDE_INT size = int_size_in_bytes (type);
4727 if (size % UNITS_PER_WORD != 0)
4729 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4730 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4734 return arm_libcall_value_1 (mode);
4737 /* libcall hashtable helpers. */
4739 struct libcall_hasher : typed_noop_remove <rtx_def>
4741 typedef rtx_def value_type;
4742 typedef rtx_def compare_type;
4743 static inline hashval_t hash (const value_type *);
4744 static inline bool equal (const value_type *, const compare_type *);
4745 static inline void remove (value_type *);
4748 inline bool
4749 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4751 return rtx_equal_p (p1, p2);
4754 inline hashval_t
4755 libcall_hasher::hash (const value_type *p1)
4757 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4760 typedef hash_table<libcall_hasher> libcall_table_type;
4762 static void
4763 add_libcall (libcall_table_type *htab, rtx libcall)
4765 *htab->find_slot (libcall, INSERT) = libcall;
4768 static bool
4769 arm_libcall_uses_aapcs_base (const_rtx libcall)
4771 static bool init_done = false;
4772 static libcall_table_type *libcall_htab = NULL;
4774 if (!init_done)
4776 init_done = true;
4778 libcall_htab = new libcall_table_type (31);
4779 add_libcall (libcall_htab,
4780 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4781 add_libcall (libcall_htab,
4782 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4783 add_libcall (libcall_htab,
4784 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4785 add_libcall (libcall_htab,
4786 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4788 add_libcall (libcall_htab,
4789 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4790 add_libcall (libcall_htab,
4791 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4792 add_libcall (libcall_htab,
4793 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4794 add_libcall (libcall_htab,
4795 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4797 add_libcall (libcall_htab,
4798 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4799 add_libcall (libcall_htab,
4800 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4801 add_libcall (libcall_htab,
4802 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4803 add_libcall (libcall_htab,
4804 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4805 add_libcall (libcall_htab,
4806 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4807 add_libcall (libcall_htab,
4808 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4809 add_libcall (libcall_htab,
4810 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4811 add_libcall (libcall_htab,
4812 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4814 /* Values from double-precision helper functions are returned in core
4815 registers if the selected core only supports single-precision
4816 arithmetic, even if we are using the hard-float ABI. The same is
4817 true for single-precision helpers, but we will never be using the
4818 hard-float ABI on a CPU which doesn't support single-precision
4819 operations in hardware. */
4820 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4821 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4822 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4823 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4824 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4825 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4826 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4827 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4828 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4829 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4830 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4831 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4832 SFmode));
4833 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4834 DFmode));
4837 return libcall && libcall_htab->find (libcall) != NULL;
4840 static rtx
4841 arm_libcall_value_1 (enum machine_mode mode)
4843 if (TARGET_AAPCS_BASED)
4844 return aapcs_libcall_value (mode);
4845 else if (TARGET_IWMMXT_ABI
4846 && arm_vector_mode_supported_p (mode))
4847 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4848 else
4849 return gen_rtx_REG (mode, ARG_REGISTER (1));
4852 /* Define how to find the value returned by a library function
4853 assuming the value has mode MODE. */
4855 static rtx
4856 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4858 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4859 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4861 /* The following libcalls return their result in integer registers,
4862 even though they return a floating point value. */
4863 if (arm_libcall_uses_aapcs_base (libcall))
4864 return gen_rtx_REG (mode, ARG_REGISTER(1));
4868 return arm_libcall_value_1 (mode);
4871 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4873 static bool
4874 arm_function_value_regno_p (const unsigned int regno)
4876 if (regno == ARG_REGISTER (1)
4877 || (TARGET_32BIT
4878 && TARGET_AAPCS_BASED
4879 && TARGET_VFP
4880 && TARGET_HARD_FLOAT
4881 && regno == FIRST_VFP_REGNUM)
4882 || (TARGET_IWMMXT_ABI
4883 && regno == FIRST_IWMMXT_REGNUM))
4884 return true;
4886 return false;
4889 /* Determine the amount of memory needed to store the possible return
4890 registers of an untyped call. */
4892 arm_apply_result_size (void)
4894 int size = 16;
4896 if (TARGET_32BIT)
4898 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4899 size += 32;
4900 if (TARGET_IWMMXT_ABI)
4901 size += 8;
4904 return size;
4907 /* Decide whether TYPE should be returned in memory (true)
4908 or in a register (false). FNTYPE is the type of the function making
4909 the call. */
4910 static bool
4911 arm_return_in_memory (const_tree type, const_tree fntype)
4913 HOST_WIDE_INT size;
4915 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4917 if (TARGET_AAPCS_BASED)
4919 /* Simple, non-aggregate types (ie not including vectors and
4920 complex) are always returned in a register (or registers).
4921 We don't care about which register here, so we can short-cut
4922 some of the detail. */
4923 if (!AGGREGATE_TYPE_P (type)
4924 && TREE_CODE (type) != VECTOR_TYPE
4925 && TREE_CODE (type) != COMPLEX_TYPE)
4926 return false;
4928 /* Any return value that is no larger than one word can be
4929 returned in r0. */
4930 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4931 return false;
4933 /* Check any available co-processors to see if they accept the
4934 type as a register candidate (VFP, for example, can return
4935 some aggregates in consecutive registers). These aren't
4936 available if the call is variadic. */
4937 if (aapcs_select_return_coproc (type, fntype) >= 0)
4938 return false;
4940 /* Vector values should be returned using ARM registers, not
4941 memory (unless they're over 16 bytes, which will break since
4942 we only have four call-clobbered registers to play with). */
4943 if (TREE_CODE (type) == VECTOR_TYPE)
4944 return (size < 0 || size > (4 * UNITS_PER_WORD));
4946 /* The rest go in memory. */
4947 return true;
4950 if (TREE_CODE (type) == VECTOR_TYPE)
4951 return (size < 0 || size > (4 * UNITS_PER_WORD));
4953 if (!AGGREGATE_TYPE_P (type) &&
4954 (TREE_CODE (type) != VECTOR_TYPE))
4955 /* All simple types are returned in registers. */
4956 return false;
4958 if (arm_abi != ARM_ABI_APCS)
4960 /* ATPCS and later return aggregate types in memory only if they are
4961 larger than a word (or are variable size). */
4962 return (size < 0 || size > UNITS_PER_WORD);
4965 /* For the arm-wince targets we choose to be compatible with Microsoft's
4966 ARM and Thumb compilers, which always return aggregates in memory. */
4967 #ifndef ARM_WINCE
4968 /* All structures/unions bigger than one word are returned in memory.
4969 Also catch the case where int_size_in_bytes returns -1. In this case
4970 the aggregate is either huge or of variable size, and in either case
4971 we will want to return it via memory and not in a register. */
4972 if (size < 0 || size > UNITS_PER_WORD)
4973 return true;
4975 if (TREE_CODE (type) == RECORD_TYPE)
4977 tree field;
4979 /* For a struct the APCS says that we only return in a register
4980 if the type is 'integer like' and every addressable element
4981 has an offset of zero. For practical purposes this means
4982 that the structure can have at most one non bit-field element
4983 and that this element must be the first one in the structure. */
4985 /* Find the first field, ignoring non FIELD_DECL things which will
4986 have been created by C++. */
4987 for (field = TYPE_FIELDS (type);
4988 field && TREE_CODE (field) != FIELD_DECL;
4989 field = DECL_CHAIN (field))
4990 continue;
4992 if (field == NULL)
4993 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4995 /* Check that the first field is valid for returning in a register. */
4997 /* ... Floats are not allowed */
4998 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4999 return true;
5001 /* ... Aggregates that are not themselves valid for returning in
5002 a register are not allowed. */
5003 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5004 return true;
5006 /* Now check the remaining fields, if any. Only bitfields are allowed,
5007 since they are not addressable. */
5008 for (field = DECL_CHAIN (field);
5009 field;
5010 field = DECL_CHAIN (field))
5012 if (TREE_CODE (field) != FIELD_DECL)
5013 continue;
5015 if (!DECL_BIT_FIELD_TYPE (field))
5016 return true;
5019 return false;
5022 if (TREE_CODE (type) == UNION_TYPE)
5024 tree field;
5026 /* Unions can be returned in registers if every element is
5027 integral, or can be returned in an integer register. */
5028 for (field = TYPE_FIELDS (type);
5029 field;
5030 field = DECL_CHAIN (field))
5032 if (TREE_CODE (field) != FIELD_DECL)
5033 continue;
5035 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5036 return true;
5038 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5039 return true;
5042 return false;
5044 #endif /* not ARM_WINCE */
5046 /* Return all other types in memory. */
5047 return true;
5050 const struct pcs_attribute_arg
5052 const char *arg;
5053 enum arm_pcs value;
5054 } pcs_attribute_args[] =
5056 {"aapcs", ARM_PCS_AAPCS},
5057 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5058 #if 0
5059 /* We could recognize these, but changes would be needed elsewhere
5060 * to implement them. */
5061 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5062 {"atpcs", ARM_PCS_ATPCS},
5063 {"apcs", ARM_PCS_APCS},
5064 #endif
5065 {NULL, ARM_PCS_UNKNOWN}
5068 static enum arm_pcs
5069 arm_pcs_from_attribute (tree attr)
5071 const struct pcs_attribute_arg *ptr;
5072 const char *arg;
5074 /* Get the value of the argument. */
5075 if (TREE_VALUE (attr) == NULL_TREE
5076 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5077 return ARM_PCS_UNKNOWN;
5079 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5081 /* Check it against the list of known arguments. */
5082 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5083 if (streq (arg, ptr->arg))
5084 return ptr->value;
5086 /* An unrecognized interrupt type. */
5087 return ARM_PCS_UNKNOWN;
5090 /* Get the PCS variant to use for this call. TYPE is the function's type
5091 specification, DECL is the specific declartion. DECL may be null if
5092 the call could be indirect or if this is a library call. */
5093 static enum arm_pcs
5094 arm_get_pcs_model (const_tree type, const_tree decl)
5096 bool user_convention = false;
5097 enum arm_pcs user_pcs = arm_pcs_default;
5098 tree attr;
5100 gcc_assert (type);
5102 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5103 if (attr)
5105 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5106 user_convention = true;
5109 if (TARGET_AAPCS_BASED)
5111 /* Detect varargs functions. These always use the base rules
5112 (no argument is ever a candidate for a co-processor
5113 register). */
5114 bool base_rules = stdarg_p (type);
5116 if (user_convention)
5118 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5119 sorry ("non-AAPCS derived PCS variant");
5120 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5121 error ("variadic functions must use the base AAPCS variant");
5124 if (base_rules)
5125 return ARM_PCS_AAPCS;
5126 else if (user_convention)
5127 return user_pcs;
5128 else if (decl && flag_unit_at_a_time)
5130 /* Local functions never leak outside this compilation unit,
5131 so we are free to use whatever conventions are
5132 appropriate. */
5133 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5134 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5135 if (i && i->local)
5136 return ARM_PCS_AAPCS_LOCAL;
5139 else if (user_convention && user_pcs != arm_pcs_default)
5140 sorry ("PCS variant");
5142 /* For everything else we use the target's default. */
5143 return arm_pcs_default;
5147 static void
5148 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5149 const_tree fntype ATTRIBUTE_UNUSED,
5150 rtx libcall ATTRIBUTE_UNUSED,
5151 const_tree fndecl ATTRIBUTE_UNUSED)
5153 /* Record the unallocated VFP registers. */
5154 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5155 pcum->aapcs_vfp_reg_alloc = 0;
5158 /* Walk down the type tree of TYPE counting consecutive base elements.
5159 If *MODEP is VOIDmode, then set it to the first valid floating point
5160 type. If a non-floating point type is found, or if a floating point
5161 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5162 otherwise return the count in the sub-tree. */
5163 static int
5164 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5166 enum machine_mode mode;
5167 HOST_WIDE_INT size;
5169 switch (TREE_CODE (type))
5171 case REAL_TYPE:
5172 mode = TYPE_MODE (type);
5173 if (mode != DFmode && mode != SFmode)
5174 return -1;
5176 if (*modep == VOIDmode)
5177 *modep = mode;
5179 if (*modep == mode)
5180 return 1;
5182 break;
5184 case COMPLEX_TYPE:
5185 mode = TYPE_MODE (TREE_TYPE (type));
5186 if (mode != DFmode && mode != SFmode)
5187 return -1;
5189 if (*modep == VOIDmode)
5190 *modep = mode;
5192 if (*modep == mode)
5193 return 2;
5195 break;
5197 case VECTOR_TYPE:
5198 /* Use V2SImode and V4SImode as representatives of all 64-bit
5199 and 128-bit vector types, whether or not those modes are
5200 supported with the present options. */
5201 size = int_size_in_bytes (type);
5202 switch (size)
5204 case 8:
5205 mode = V2SImode;
5206 break;
5207 case 16:
5208 mode = V4SImode;
5209 break;
5210 default:
5211 return -1;
5214 if (*modep == VOIDmode)
5215 *modep = mode;
5217 /* Vector modes are considered to be opaque: two vectors are
5218 equivalent for the purposes of being homogeneous aggregates
5219 if they are the same size. */
5220 if (*modep == mode)
5221 return 1;
5223 break;
5225 case ARRAY_TYPE:
5227 int count;
5228 tree index = TYPE_DOMAIN (type);
5230 /* Can't handle incomplete types nor sizes that are not
5231 fixed. */
5232 if (!COMPLETE_TYPE_P (type)
5233 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5234 return -1;
5236 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5237 if (count == -1
5238 || !index
5239 || !TYPE_MAX_VALUE (index)
5240 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5241 || !TYPE_MIN_VALUE (index)
5242 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5243 || count < 0)
5244 return -1;
5246 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5247 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5249 /* There must be no padding. */
5250 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5251 return -1;
5253 return count;
5256 case RECORD_TYPE:
5258 int count = 0;
5259 int sub_count;
5260 tree field;
5262 /* Can't handle incomplete types nor sizes that are not
5263 fixed. */
5264 if (!COMPLETE_TYPE_P (type)
5265 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5266 return -1;
5268 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5270 if (TREE_CODE (field) != FIELD_DECL)
5271 continue;
5273 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5274 if (sub_count < 0)
5275 return -1;
5276 count += sub_count;
5279 /* There must be no padding. */
5280 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5281 return -1;
5283 return count;
5286 case UNION_TYPE:
5287 case QUAL_UNION_TYPE:
5289 /* These aren't very interesting except in a degenerate case. */
5290 int count = 0;
5291 int sub_count;
5292 tree field;
5294 /* Can't handle incomplete types nor sizes that are not
5295 fixed. */
5296 if (!COMPLETE_TYPE_P (type)
5297 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5298 return -1;
5300 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5302 if (TREE_CODE (field) != FIELD_DECL)
5303 continue;
5305 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5306 if (sub_count < 0)
5307 return -1;
5308 count = count > sub_count ? count : sub_count;
5311 /* There must be no padding. */
5312 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5313 return -1;
5315 return count;
5318 default:
5319 break;
5322 return -1;
5325 /* Return true if PCS_VARIANT should use VFP registers. */
5326 static bool
5327 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5329 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5331 static bool seen_thumb1_vfp = false;
5333 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5335 sorry ("Thumb-1 hard-float VFP ABI");
5336 /* sorry() is not immediately fatal, so only display this once. */
5337 seen_thumb1_vfp = true;
5340 return true;
5343 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5344 return false;
5346 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5347 (TARGET_VFP_DOUBLE || !is_double));
5350 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5351 suitable for passing or returning in VFP registers for the PCS
5352 variant selected. If it is, then *BASE_MODE is updated to contain
5353 a machine mode describing each element of the argument's type and
5354 *COUNT to hold the number of such elements. */
5355 static bool
5356 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5357 enum machine_mode mode, const_tree type,
5358 enum machine_mode *base_mode, int *count)
5360 enum machine_mode new_mode = VOIDmode;
5362 /* If we have the type information, prefer that to working things
5363 out from the mode. */
5364 if (type)
5366 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5368 if (ag_count > 0 && ag_count <= 4)
5369 *count = ag_count;
5370 else
5371 return false;
5373 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5374 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5375 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5377 *count = 1;
5378 new_mode = mode;
5380 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5382 *count = 2;
5383 new_mode = (mode == DCmode ? DFmode : SFmode);
5385 else
5386 return false;
5389 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5390 return false;
5392 *base_mode = new_mode;
5393 return true;
5396 static bool
5397 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5398 enum machine_mode mode, const_tree type)
5400 int count ATTRIBUTE_UNUSED;
5401 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5403 if (!use_vfp_abi (pcs_variant, false))
5404 return false;
5405 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5406 &ag_mode, &count);
5409 static bool
5410 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5411 const_tree type)
5413 if (!use_vfp_abi (pcum->pcs_variant, false))
5414 return false;
5416 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5417 &pcum->aapcs_vfp_rmode,
5418 &pcum->aapcs_vfp_rcount);
5421 static bool
5422 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5423 const_tree type ATTRIBUTE_UNUSED)
5425 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5426 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5427 int regno;
5429 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5430 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5432 pcum->aapcs_vfp_reg_alloc = mask << regno;
5433 if (mode == BLKmode
5434 || (mode == TImode && ! TARGET_NEON)
5435 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5437 int i;
5438 int rcount = pcum->aapcs_vfp_rcount;
5439 int rshift = shift;
5440 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5441 rtx par;
5442 if (!TARGET_NEON)
5444 /* Avoid using unsupported vector modes. */
5445 if (rmode == V2SImode)
5446 rmode = DImode;
5447 else if (rmode == V4SImode)
5449 rmode = DImode;
5450 rcount *= 2;
5451 rshift /= 2;
5454 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5455 for (i = 0; i < rcount; i++)
5457 rtx tmp = gen_rtx_REG (rmode,
5458 FIRST_VFP_REGNUM + regno + i * rshift);
5459 tmp = gen_rtx_EXPR_LIST
5460 (VOIDmode, tmp,
5461 GEN_INT (i * GET_MODE_SIZE (rmode)));
5462 XVECEXP (par, 0, i) = tmp;
5465 pcum->aapcs_reg = par;
5467 else
5468 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5469 return true;
5471 return false;
5474 static rtx
5475 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5476 enum machine_mode mode,
5477 const_tree type ATTRIBUTE_UNUSED)
5479 if (!use_vfp_abi (pcs_variant, false))
5480 return NULL;
5482 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5484 int count;
5485 enum machine_mode ag_mode;
5486 int i;
5487 rtx par;
5488 int shift;
5490 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5491 &ag_mode, &count);
5493 if (!TARGET_NEON)
5495 if (ag_mode == V2SImode)
5496 ag_mode = DImode;
5497 else if (ag_mode == V4SImode)
5499 ag_mode = DImode;
5500 count *= 2;
5503 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5504 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5505 for (i = 0; i < count; i++)
5507 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5508 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5509 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5510 XVECEXP (par, 0, i) = tmp;
5513 return par;
5516 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5519 static void
5520 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5521 enum machine_mode mode ATTRIBUTE_UNUSED,
5522 const_tree type ATTRIBUTE_UNUSED)
5524 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5525 pcum->aapcs_vfp_reg_alloc = 0;
5526 return;
5529 #define AAPCS_CP(X) \
5531 aapcs_ ## X ## _cum_init, \
5532 aapcs_ ## X ## _is_call_candidate, \
5533 aapcs_ ## X ## _allocate, \
5534 aapcs_ ## X ## _is_return_candidate, \
5535 aapcs_ ## X ## _allocate_return_reg, \
5536 aapcs_ ## X ## _advance \
5539 /* Table of co-processors that can be used to pass arguments in
5540 registers. Idealy no arugment should be a candidate for more than
5541 one co-processor table entry, but the table is processed in order
5542 and stops after the first match. If that entry then fails to put
5543 the argument into a co-processor register, the argument will go on
5544 the stack. */
5545 static struct
5547 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5548 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5550 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5551 BLKmode) is a candidate for this co-processor's registers; this
5552 function should ignore any position-dependent state in
5553 CUMULATIVE_ARGS and only use call-type dependent information. */
5554 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5556 /* Return true if the argument does get a co-processor register; it
5557 should set aapcs_reg to an RTX of the register allocated as is
5558 required for a return from FUNCTION_ARG. */
5559 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5561 /* Return true if a result of mode MODE (or type TYPE if MODE is
5562 BLKmode) is can be returned in this co-processor's registers. */
5563 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5565 /* Allocate and return an RTX element to hold the return type of a
5566 call, this routine must not fail and will only be called if
5567 is_return_candidate returned true with the same parameters. */
5568 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5570 /* Finish processing this argument and prepare to start processing
5571 the next one. */
5572 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5573 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5575 AAPCS_CP(vfp)
5578 #undef AAPCS_CP
5580 static int
5581 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5582 const_tree type)
5584 int i;
5586 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5587 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5588 return i;
5590 return -1;
5593 static int
5594 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5596 /* We aren't passed a decl, so we can't check that a call is local.
5597 However, it isn't clear that that would be a win anyway, since it
5598 might limit some tail-calling opportunities. */
5599 enum arm_pcs pcs_variant;
5601 if (fntype)
5603 const_tree fndecl = NULL_TREE;
5605 if (TREE_CODE (fntype) == FUNCTION_DECL)
5607 fndecl = fntype;
5608 fntype = TREE_TYPE (fntype);
5611 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5613 else
5614 pcs_variant = arm_pcs_default;
5616 if (pcs_variant != ARM_PCS_AAPCS)
5618 int i;
5620 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5621 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5622 TYPE_MODE (type),
5623 type))
5624 return i;
5626 return -1;
5629 static rtx
5630 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5631 const_tree fntype)
5633 /* We aren't passed a decl, so we can't check that a call is local.
5634 However, it isn't clear that that would be a win anyway, since it
5635 might limit some tail-calling opportunities. */
5636 enum arm_pcs pcs_variant;
5637 int unsignedp ATTRIBUTE_UNUSED;
5639 if (fntype)
5641 const_tree fndecl = NULL_TREE;
5643 if (TREE_CODE (fntype) == FUNCTION_DECL)
5645 fndecl = fntype;
5646 fntype = TREE_TYPE (fntype);
5649 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5651 else
5652 pcs_variant = arm_pcs_default;
5654 /* Promote integer types. */
5655 if (type && INTEGRAL_TYPE_P (type))
5656 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5658 if (pcs_variant != ARM_PCS_AAPCS)
5660 int i;
5662 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5663 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5664 type))
5665 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5666 mode, type);
5669 /* Promotes small structs returned in a register to full-word size
5670 for big-endian AAPCS. */
5671 if (type && arm_return_in_msb (type))
5673 HOST_WIDE_INT size = int_size_in_bytes (type);
5674 if (size % UNITS_PER_WORD != 0)
5676 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5677 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5681 return gen_rtx_REG (mode, R0_REGNUM);
5684 static rtx
5685 aapcs_libcall_value (enum machine_mode mode)
5687 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5688 && GET_MODE_SIZE (mode) <= 4)
5689 mode = SImode;
5691 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5694 /* Lay out a function argument using the AAPCS rules. The rule
5695 numbers referred to here are those in the AAPCS. */
5696 static void
5697 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5698 const_tree type, bool named)
5700 int nregs, nregs2;
5701 int ncrn;
5703 /* We only need to do this once per argument. */
5704 if (pcum->aapcs_arg_processed)
5705 return;
5707 pcum->aapcs_arg_processed = true;
5709 /* Special case: if named is false then we are handling an incoming
5710 anonymous argument which is on the stack. */
5711 if (!named)
5712 return;
5714 /* Is this a potential co-processor register candidate? */
5715 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5717 int slot = aapcs_select_call_coproc (pcum, mode, type);
5718 pcum->aapcs_cprc_slot = slot;
5720 /* We don't have to apply any of the rules from part B of the
5721 preparation phase, these are handled elsewhere in the
5722 compiler. */
5724 if (slot >= 0)
5726 /* A Co-processor register candidate goes either in its own
5727 class of registers or on the stack. */
5728 if (!pcum->aapcs_cprc_failed[slot])
5730 /* C1.cp - Try to allocate the argument to co-processor
5731 registers. */
5732 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5733 return;
5735 /* C2.cp - Put the argument on the stack and note that we
5736 can't assign any more candidates in this slot. We also
5737 need to note that we have allocated stack space, so that
5738 we won't later try to split a non-cprc candidate between
5739 core registers and the stack. */
5740 pcum->aapcs_cprc_failed[slot] = true;
5741 pcum->can_split = false;
5744 /* We didn't get a register, so this argument goes on the
5745 stack. */
5746 gcc_assert (pcum->can_split == false);
5747 return;
5751 /* C3 - For double-word aligned arguments, round the NCRN up to the
5752 next even number. */
5753 ncrn = pcum->aapcs_ncrn;
5754 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5755 ncrn++;
5757 nregs = ARM_NUM_REGS2(mode, type);
5759 /* Sigh, this test should really assert that nregs > 0, but a GCC
5760 extension allows empty structs and then gives them empty size; it
5761 then allows such a structure to be passed by value. For some of
5762 the code below we have to pretend that such an argument has
5763 non-zero size so that we 'locate' it correctly either in
5764 registers or on the stack. */
5765 gcc_assert (nregs >= 0);
5767 nregs2 = nregs ? nregs : 1;
5769 /* C4 - Argument fits entirely in core registers. */
5770 if (ncrn + nregs2 <= NUM_ARG_REGS)
5772 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5773 pcum->aapcs_next_ncrn = ncrn + nregs;
5774 return;
5777 /* C5 - Some core registers left and there are no arguments already
5778 on the stack: split this argument between the remaining core
5779 registers and the stack. */
5780 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5782 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5783 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5784 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5785 return;
5788 /* C6 - NCRN is set to 4. */
5789 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5791 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5792 return;
5795 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5796 for a call to a function whose data type is FNTYPE.
5797 For a library call, FNTYPE is NULL. */
5798 void
5799 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5800 rtx libname,
5801 tree fndecl ATTRIBUTE_UNUSED)
5803 /* Long call handling. */
5804 if (fntype)
5805 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5806 else
5807 pcum->pcs_variant = arm_pcs_default;
5809 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5811 if (arm_libcall_uses_aapcs_base (libname))
5812 pcum->pcs_variant = ARM_PCS_AAPCS;
5814 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5815 pcum->aapcs_reg = NULL_RTX;
5816 pcum->aapcs_partial = 0;
5817 pcum->aapcs_arg_processed = false;
5818 pcum->aapcs_cprc_slot = -1;
5819 pcum->can_split = true;
5821 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5823 int i;
5825 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5827 pcum->aapcs_cprc_failed[i] = false;
5828 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5831 return;
5834 /* Legacy ABIs */
5836 /* On the ARM, the offset starts at 0. */
5837 pcum->nregs = 0;
5838 pcum->iwmmxt_nregs = 0;
5839 pcum->can_split = true;
5841 /* Varargs vectors are treated the same as long long.
5842 named_count avoids having to change the way arm handles 'named' */
5843 pcum->named_count = 0;
5844 pcum->nargs = 0;
5846 if (TARGET_REALLY_IWMMXT && fntype)
5848 tree fn_arg;
5850 for (fn_arg = TYPE_ARG_TYPES (fntype);
5851 fn_arg;
5852 fn_arg = TREE_CHAIN (fn_arg))
5853 pcum->named_count += 1;
5855 if (! pcum->named_count)
5856 pcum->named_count = INT_MAX;
5860 /* Return true if we use LRA instead of reload pass. */
5861 static bool
5862 arm_lra_p (void)
5864 return arm_lra_flag;
5867 /* Return true if mode/type need doubleword alignment. */
5868 static bool
5869 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5871 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5872 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5876 /* Determine where to put an argument to a function.
5877 Value is zero to push the argument on the stack,
5878 or a hard register in which to store the argument.
5880 MODE is the argument's machine mode.
5881 TYPE is the data type of the argument (as a tree).
5882 This is null for libcalls where that information may
5883 not be available.
5884 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5885 the preceding args and about the function being called.
5886 NAMED is nonzero if this argument is a named parameter
5887 (otherwise it is an extra parameter matching an ellipsis).
5889 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5890 other arguments are passed on the stack. If (NAMED == 0) (which happens
5891 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5892 defined), say it is passed in the stack (function_prologue will
5893 indeed make it pass in the stack if necessary). */
5895 static rtx
5896 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5897 const_tree type, bool named)
5899 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5900 int nregs;
5902 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5903 a call insn (op3 of a call_value insn). */
5904 if (mode == VOIDmode)
5905 return const0_rtx;
5907 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5909 aapcs_layout_arg (pcum, mode, type, named);
5910 return pcum->aapcs_reg;
5913 /* Varargs vectors are treated the same as long long.
5914 named_count avoids having to change the way arm handles 'named' */
5915 if (TARGET_IWMMXT_ABI
5916 && arm_vector_mode_supported_p (mode)
5917 && pcum->named_count > pcum->nargs + 1)
5919 if (pcum->iwmmxt_nregs <= 9)
5920 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5921 else
5923 pcum->can_split = false;
5924 return NULL_RTX;
5928 /* Put doubleword aligned quantities in even register pairs. */
5929 if (pcum->nregs & 1
5930 && ARM_DOUBLEWORD_ALIGN
5931 && arm_needs_doubleword_align (mode, type))
5932 pcum->nregs++;
5934 /* Only allow splitting an arg between regs and memory if all preceding
5935 args were allocated to regs. For args passed by reference we only count
5936 the reference pointer. */
5937 if (pcum->can_split)
5938 nregs = 1;
5939 else
5940 nregs = ARM_NUM_REGS2 (mode, type);
5942 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5943 return NULL_RTX;
5945 return gen_rtx_REG (mode, pcum->nregs);
5948 static unsigned int
5949 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5951 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5952 ? DOUBLEWORD_ALIGNMENT
5953 : PARM_BOUNDARY);
5956 static int
5957 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5958 tree type, bool named)
5960 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5961 int nregs = pcum->nregs;
5963 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5965 aapcs_layout_arg (pcum, mode, type, named);
5966 return pcum->aapcs_partial;
5969 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5970 return 0;
5972 if (NUM_ARG_REGS > nregs
5973 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5974 && pcum->can_split)
5975 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5977 return 0;
5980 /* Update the data in PCUM to advance over an argument
5981 of mode MODE and data type TYPE.
5982 (TYPE is null for libcalls where that information may not be available.) */
5984 static void
5985 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5986 const_tree type, bool named)
5988 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5990 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5992 aapcs_layout_arg (pcum, mode, type, named);
5994 if (pcum->aapcs_cprc_slot >= 0)
5996 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5997 type);
5998 pcum->aapcs_cprc_slot = -1;
6001 /* Generic stuff. */
6002 pcum->aapcs_arg_processed = false;
6003 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6004 pcum->aapcs_reg = NULL_RTX;
6005 pcum->aapcs_partial = 0;
6007 else
6009 pcum->nargs += 1;
6010 if (arm_vector_mode_supported_p (mode)
6011 && pcum->named_count > pcum->nargs
6012 && TARGET_IWMMXT_ABI)
6013 pcum->iwmmxt_nregs += 1;
6014 else
6015 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6019 /* Variable sized types are passed by reference. This is a GCC
6020 extension to the ARM ABI. */
6022 static bool
6023 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6024 enum machine_mode mode ATTRIBUTE_UNUSED,
6025 const_tree type, bool named ATTRIBUTE_UNUSED)
6027 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6030 /* Encode the current state of the #pragma [no_]long_calls. */
6031 typedef enum
6033 OFF, /* No #pragma [no_]long_calls is in effect. */
6034 LONG, /* #pragma long_calls is in effect. */
6035 SHORT /* #pragma no_long_calls is in effect. */
6036 } arm_pragma_enum;
6038 static arm_pragma_enum arm_pragma_long_calls = OFF;
6040 void
6041 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6043 arm_pragma_long_calls = LONG;
6046 void
6047 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6049 arm_pragma_long_calls = SHORT;
6052 void
6053 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6055 arm_pragma_long_calls = OFF;
6058 /* Handle an attribute requiring a FUNCTION_DECL;
6059 arguments as in struct attribute_spec.handler. */
6060 static tree
6061 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6062 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6064 if (TREE_CODE (*node) != FUNCTION_DECL)
6066 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6067 name);
6068 *no_add_attrs = true;
6071 return NULL_TREE;
6074 /* Handle an "interrupt" or "isr" attribute;
6075 arguments as in struct attribute_spec.handler. */
6076 static tree
6077 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6078 bool *no_add_attrs)
6080 if (DECL_P (*node))
6082 if (TREE_CODE (*node) != FUNCTION_DECL)
6084 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6085 name);
6086 *no_add_attrs = true;
6088 /* FIXME: the argument if any is checked for type attributes;
6089 should it be checked for decl ones? */
6091 else
6093 if (TREE_CODE (*node) == FUNCTION_TYPE
6094 || TREE_CODE (*node) == METHOD_TYPE)
6096 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6098 warning (OPT_Wattributes, "%qE attribute ignored",
6099 name);
6100 *no_add_attrs = true;
6103 else if (TREE_CODE (*node) == POINTER_TYPE
6104 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6105 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6106 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6108 *node = build_variant_type_copy (*node);
6109 TREE_TYPE (*node) = build_type_attribute_variant
6110 (TREE_TYPE (*node),
6111 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6112 *no_add_attrs = true;
6114 else
6116 /* Possibly pass this attribute on from the type to a decl. */
6117 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6118 | (int) ATTR_FLAG_FUNCTION_NEXT
6119 | (int) ATTR_FLAG_ARRAY_NEXT))
6121 *no_add_attrs = true;
6122 return tree_cons (name, args, NULL_TREE);
6124 else
6126 warning (OPT_Wattributes, "%qE attribute ignored",
6127 name);
6132 return NULL_TREE;
6135 /* Handle a "pcs" attribute; arguments as in struct
6136 attribute_spec.handler. */
6137 static tree
6138 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6139 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6141 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6143 warning (OPT_Wattributes, "%qE attribute ignored", name);
6144 *no_add_attrs = true;
6146 return NULL_TREE;
6149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6150 /* Handle the "notshared" attribute. This attribute is another way of
6151 requesting hidden visibility. ARM's compiler supports
6152 "__declspec(notshared)"; we support the same thing via an
6153 attribute. */
6155 static tree
6156 arm_handle_notshared_attribute (tree *node,
6157 tree name ATTRIBUTE_UNUSED,
6158 tree args ATTRIBUTE_UNUSED,
6159 int flags ATTRIBUTE_UNUSED,
6160 bool *no_add_attrs)
6162 tree decl = TYPE_NAME (*node);
6164 if (decl)
6166 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6167 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6168 *no_add_attrs = false;
6170 return NULL_TREE;
6172 #endif
6174 /* Return 0 if the attributes for two types are incompatible, 1 if they
6175 are compatible, and 2 if they are nearly compatible (which causes a
6176 warning to be generated). */
6177 static int
6178 arm_comp_type_attributes (const_tree type1, const_tree type2)
6180 int l1, l2, s1, s2;
6182 /* Check for mismatch of non-default calling convention. */
6183 if (TREE_CODE (type1) != FUNCTION_TYPE)
6184 return 1;
6186 /* Check for mismatched call attributes. */
6187 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6188 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6189 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6190 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6192 /* Only bother to check if an attribute is defined. */
6193 if (l1 | l2 | s1 | s2)
6195 /* If one type has an attribute, the other must have the same attribute. */
6196 if ((l1 != l2) || (s1 != s2))
6197 return 0;
6199 /* Disallow mixed attributes. */
6200 if ((l1 & s2) || (l2 & s1))
6201 return 0;
6204 /* Check for mismatched ISR attribute. */
6205 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6206 if (! l1)
6207 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6208 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6209 if (! l2)
6210 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6211 if (l1 != l2)
6212 return 0;
6214 return 1;
6217 /* Assigns default attributes to newly defined type. This is used to
6218 set short_call/long_call attributes for function types of
6219 functions defined inside corresponding #pragma scopes. */
6220 static void
6221 arm_set_default_type_attributes (tree type)
6223 /* Add __attribute__ ((long_call)) to all functions, when
6224 inside #pragma long_calls or __attribute__ ((short_call)),
6225 when inside #pragma no_long_calls. */
6226 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6228 tree type_attr_list, attr_name;
6229 type_attr_list = TYPE_ATTRIBUTES (type);
6231 if (arm_pragma_long_calls == LONG)
6232 attr_name = get_identifier ("long_call");
6233 else if (arm_pragma_long_calls == SHORT)
6234 attr_name = get_identifier ("short_call");
6235 else
6236 return;
6238 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6239 TYPE_ATTRIBUTES (type) = type_attr_list;
6243 /* Return true if DECL is known to be linked into section SECTION. */
6245 static bool
6246 arm_function_in_section_p (tree decl, section *section)
6248 /* We can only be certain about functions defined in the same
6249 compilation unit. */
6250 if (!TREE_STATIC (decl))
6251 return false;
6253 /* Make sure that SYMBOL always binds to the definition in this
6254 compilation unit. */
6255 if (!targetm.binds_local_p (decl))
6256 return false;
6258 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6259 if (!DECL_SECTION_NAME (decl))
6261 /* Make sure that we will not create a unique section for DECL. */
6262 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6263 return false;
6266 return function_section (decl) == section;
6269 /* Return nonzero if a 32-bit "long_call" should be generated for
6270 a call from the current function to DECL. We generate a long_call
6271 if the function:
6273 a. has an __attribute__((long call))
6274 or b. is within the scope of a #pragma long_calls
6275 or c. the -mlong-calls command line switch has been specified
6277 However we do not generate a long call if the function:
6279 d. has an __attribute__ ((short_call))
6280 or e. is inside the scope of a #pragma no_long_calls
6281 or f. is defined in the same section as the current function. */
6283 bool
6284 arm_is_long_call_p (tree decl)
6286 tree attrs;
6288 if (!decl)
6289 return TARGET_LONG_CALLS;
6291 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6292 if (lookup_attribute ("short_call", attrs))
6293 return false;
6295 /* For "f", be conservative, and only cater for cases in which the
6296 whole of the current function is placed in the same section. */
6297 if (!flag_reorder_blocks_and_partition
6298 && TREE_CODE (decl) == FUNCTION_DECL
6299 && arm_function_in_section_p (decl, current_function_section ()))
6300 return false;
6302 if (lookup_attribute ("long_call", attrs))
6303 return true;
6305 return TARGET_LONG_CALLS;
6308 /* Return nonzero if it is ok to make a tail-call to DECL. */
6309 static bool
6310 arm_function_ok_for_sibcall (tree decl, tree exp)
6312 unsigned long func_type;
6314 if (cfun->machine->sibcall_blocked)
6315 return false;
6317 /* Never tailcall something if we are generating code for Thumb-1. */
6318 if (TARGET_THUMB1)
6319 return false;
6321 /* The PIC register is live on entry to VxWorks PLT entries, so we
6322 must make the call before restoring the PIC register. */
6323 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6324 return false;
6326 /* If we are interworking and the function is not declared static
6327 then we can't tail-call it unless we know that it exists in this
6328 compilation unit (since it might be a Thumb routine). */
6329 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6330 && !TREE_ASM_WRITTEN (decl))
6331 return false;
6333 func_type = arm_current_func_type ();
6334 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6335 if (IS_INTERRUPT (func_type))
6336 return false;
6338 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6340 /* Check that the return value locations are the same. For
6341 example that we aren't returning a value from the sibling in
6342 a VFP register but then need to transfer it to a core
6343 register. */
6344 rtx a, b;
6346 a = arm_function_value (TREE_TYPE (exp), decl, false);
6347 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6348 cfun->decl, false);
6349 if (!rtx_equal_p (a, b))
6350 return false;
6353 /* Never tailcall if function may be called with a misaligned SP. */
6354 if (IS_STACKALIGN (func_type))
6355 return false;
6357 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6358 references should become a NOP. Don't convert such calls into
6359 sibling calls. */
6360 if (TARGET_AAPCS_BASED
6361 && arm_abi == ARM_ABI_AAPCS
6362 && decl
6363 && DECL_WEAK (decl))
6364 return false;
6366 /* Everything else is ok. */
6367 return true;
6371 /* Addressing mode support functions. */
6373 /* Return nonzero if X is a legitimate immediate operand when compiling
6374 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6376 legitimate_pic_operand_p (rtx x)
6378 if (GET_CODE (x) == SYMBOL_REF
6379 || (GET_CODE (x) == CONST
6380 && GET_CODE (XEXP (x, 0)) == PLUS
6381 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6382 return 0;
6384 return 1;
6387 /* Record that the current function needs a PIC register. Initialize
6388 cfun->machine->pic_reg if we have not already done so. */
6390 static void
6391 require_pic_register (void)
6393 /* A lot of the logic here is made obscure by the fact that this
6394 routine gets called as part of the rtx cost estimation process.
6395 We don't want those calls to affect any assumptions about the real
6396 function; and further, we can't call entry_of_function() until we
6397 start the real expansion process. */
6398 if (!crtl->uses_pic_offset_table)
6400 gcc_assert (can_create_pseudo_p ());
6401 if (arm_pic_register != INVALID_REGNUM
6402 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6404 if (!cfun->machine->pic_reg)
6405 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6407 /* Play games to avoid marking the function as needing pic
6408 if we are being called as part of the cost-estimation
6409 process. */
6410 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6411 crtl->uses_pic_offset_table = 1;
6413 else
6415 rtx seq, insn;
6417 if (!cfun->machine->pic_reg)
6418 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6420 /* Play games to avoid marking the function as needing pic
6421 if we are being called as part of the cost-estimation
6422 process. */
6423 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6425 crtl->uses_pic_offset_table = 1;
6426 start_sequence ();
6428 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6429 && arm_pic_register > LAST_LO_REGNUM)
6430 emit_move_insn (cfun->machine->pic_reg,
6431 gen_rtx_REG (Pmode, arm_pic_register));
6432 else
6433 arm_load_pic_register (0UL);
6435 seq = get_insns ();
6436 end_sequence ();
6438 for (insn = seq; insn; insn = NEXT_INSN (insn))
6439 if (INSN_P (insn))
6440 INSN_LOCATION (insn) = prologue_location;
6442 /* We can be called during expansion of PHI nodes, where
6443 we can't yet emit instructions directly in the final
6444 insn stream. Queue the insns on the entry edge, they will
6445 be committed after everything else is expanded. */
6446 insert_insn_on_edge (seq,
6447 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6454 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6456 if (GET_CODE (orig) == SYMBOL_REF
6457 || GET_CODE (orig) == LABEL_REF)
6459 rtx insn;
6461 if (reg == 0)
6463 gcc_assert (can_create_pseudo_p ());
6464 reg = gen_reg_rtx (Pmode);
6467 /* VxWorks does not impose a fixed gap between segments; the run-time
6468 gap can be different from the object-file gap. We therefore can't
6469 use GOTOFF unless we are absolutely sure that the symbol is in the
6470 same segment as the GOT. Unfortunately, the flexibility of linker
6471 scripts means that we can't be sure of that in general, so assume
6472 that GOTOFF is never valid on VxWorks. */
6473 if ((GET_CODE (orig) == LABEL_REF
6474 || (GET_CODE (orig) == SYMBOL_REF &&
6475 SYMBOL_REF_LOCAL_P (orig)))
6476 && NEED_GOT_RELOC
6477 && arm_pic_data_is_text_relative)
6478 insn = arm_pic_static_addr (orig, reg);
6479 else
6481 rtx pat;
6482 rtx mem;
6484 /* If this function doesn't have a pic register, create one now. */
6485 require_pic_register ();
6487 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6489 /* Make the MEM as close to a constant as possible. */
6490 mem = SET_SRC (pat);
6491 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6492 MEM_READONLY_P (mem) = 1;
6493 MEM_NOTRAP_P (mem) = 1;
6495 insn = emit_insn (pat);
6498 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6499 by loop. */
6500 set_unique_reg_note (insn, REG_EQUAL, orig);
6502 return reg;
6504 else if (GET_CODE (orig) == CONST)
6506 rtx base, offset;
6508 if (GET_CODE (XEXP (orig, 0)) == PLUS
6509 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6510 return orig;
6512 /* Handle the case where we have: const (UNSPEC_TLS). */
6513 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6514 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6515 return orig;
6517 /* Handle the case where we have:
6518 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6519 CONST_INT. */
6520 if (GET_CODE (XEXP (orig, 0)) == PLUS
6521 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6522 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6524 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6525 return orig;
6528 if (reg == 0)
6530 gcc_assert (can_create_pseudo_p ());
6531 reg = gen_reg_rtx (Pmode);
6534 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6536 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6537 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6538 base == reg ? 0 : reg);
6540 if (CONST_INT_P (offset))
6542 /* The base register doesn't really matter, we only want to
6543 test the index for the appropriate mode. */
6544 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6546 gcc_assert (can_create_pseudo_p ());
6547 offset = force_reg (Pmode, offset);
6550 if (CONST_INT_P (offset))
6551 return plus_constant (Pmode, base, INTVAL (offset));
6554 if (GET_MODE_SIZE (mode) > 4
6555 && (GET_MODE_CLASS (mode) == MODE_INT
6556 || TARGET_SOFT_FLOAT))
6558 emit_insn (gen_addsi3 (reg, base, offset));
6559 return reg;
6562 return gen_rtx_PLUS (Pmode, base, offset);
6565 return orig;
6569 /* Find a spare register to use during the prolog of a function. */
6571 static int
6572 thumb_find_work_register (unsigned long pushed_regs_mask)
6574 int reg;
6576 /* Check the argument registers first as these are call-used. The
6577 register allocation order means that sometimes r3 might be used
6578 but earlier argument registers might not, so check them all. */
6579 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6580 if (!df_regs_ever_live_p (reg))
6581 return reg;
6583 /* Before going on to check the call-saved registers we can try a couple
6584 more ways of deducing that r3 is available. The first is when we are
6585 pushing anonymous arguments onto the stack and we have less than 4
6586 registers worth of fixed arguments(*). In this case r3 will be part of
6587 the variable argument list and so we can be sure that it will be
6588 pushed right at the start of the function. Hence it will be available
6589 for the rest of the prologue.
6590 (*): ie crtl->args.pretend_args_size is greater than 0. */
6591 if (cfun->machine->uses_anonymous_args
6592 && crtl->args.pretend_args_size > 0)
6593 return LAST_ARG_REGNUM;
6595 /* The other case is when we have fixed arguments but less than 4 registers
6596 worth. In this case r3 might be used in the body of the function, but
6597 it is not being used to convey an argument into the function. In theory
6598 we could just check crtl->args.size to see how many bytes are
6599 being passed in argument registers, but it seems that it is unreliable.
6600 Sometimes it will have the value 0 when in fact arguments are being
6601 passed. (See testcase execute/20021111-1.c for an example). So we also
6602 check the args_info.nregs field as well. The problem with this field is
6603 that it makes no allowances for arguments that are passed to the
6604 function but which are not used. Hence we could miss an opportunity
6605 when a function has an unused argument in r3. But it is better to be
6606 safe than to be sorry. */
6607 if (! cfun->machine->uses_anonymous_args
6608 && crtl->args.size >= 0
6609 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6610 && (TARGET_AAPCS_BASED
6611 ? crtl->args.info.aapcs_ncrn < 4
6612 : crtl->args.info.nregs < 4))
6613 return LAST_ARG_REGNUM;
6615 /* Otherwise look for a call-saved register that is going to be pushed. */
6616 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6617 if (pushed_regs_mask & (1 << reg))
6618 return reg;
6620 if (TARGET_THUMB2)
6622 /* Thumb-2 can use high regs. */
6623 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6624 if (pushed_regs_mask & (1 << reg))
6625 return reg;
6627 /* Something went wrong - thumb_compute_save_reg_mask()
6628 should have arranged for a suitable register to be pushed. */
6629 gcc_unreachable ();
6632 static GTY(()) int pic_labelno;
6634 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6635 low register. */
6637 void
6638 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6640 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6642 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6643 return;
6645 gcc_assert (flag_pic);
6647 pic_reg = cfun->machine->pic_reg;
6648 if (TARGET_VXWORKS_RTP)
6650 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6651 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6652 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6654 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6656 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6657 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6659 else
6661 /* We use an UNSPEC rather than a LABEL_REF because this label
6662 never appears in the code stream. */
6664 labelno = GEN_INT (pic_labelno++);
6665 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6666 l1 = gen_rtx_CONST (VOIDmode, l1);
6668 /* On the ARM the PC register contains 'dot + 8' at the time of the
6669 addition, on the Thumb it is 'dot + 4'. */
6670 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6671 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6672 UNSPEC_GOTSYM_OFF);
6673 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6675 if (TARGET_32BIT)
6677 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6679 else /* TARGET_THUMB1 */
6681 if (arm_pic_register != INVALID_REGNUM
6682 && REGNO (pic_reg) > LAST_LO_REGNUM)
6684 /* We will have pushed the pic register, so we should always be
6685 able to find a work register. */
6686 pic_tmp = gen_rtx_REG (SImode,
6687 thumb_find_work_register (saved_regs));
6688 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6689 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6690 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6692 else if (arm_pic_register != INVALID_REGNUM
6693 && arm_pic_register > LAST_LO_REGNUM
6694 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6696 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6697 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6698 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6700 else
6701 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6705 /* Need to emit this whether or not we obey regdecls,
6706 since setjmp/longjmp can cause life info to screw up. */
6707 emit_use (pic_reg);
6710 /* Generate code to load the address of a static var when flag_pic is set. */
6711 static rtx
6712 arm_pic_static_addr (rtx orig, rtx reg)
6714 rtx l1, labelno, offset_rtx, insn;
6716 gcc_assert (flag_pic);
6718 /* We use an UNSPEC rather than a LABEL_REF because this label
6719 never appears in the code stream. */
6720 labelno = GEN_INT (pic_labelno++);
6721 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6722 l1 = gen_rtx_CONST (VOIDmode, l1);
6724 /* On the ARM the PC register contains 'dot + 8' at the time of the
6725 addition, on the Thumb it is 'dot + 4'. */
6726 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6727 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6728 UNSPEC_SYMBOL_OFFSET);
6729 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6731 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6732 return insn;
6735 /* Return nonzero if X is valid as an ARM state addressing register. */
6736 static int
6737 arm_address_register_rtx_p (rtx x, int strict_p)
6739 int regno;
6741 if (!REG_P (x))
6742 return 0;
6744 regno = REGNO (x);
6746 if (strict_p)
6747 return ARM_REGNO_OK_FOR_BASE_P (regno);
6749 return (regno <= LAST_ARM_REGNUM
6750 || regno >= FIRST_PSEUDO_REGISTER
6751 || regno == FRAME_POINTER_REGNUM
6752 || regno == ARG_POINTER_REGNUM);
6755 /* Return TRUE if this rtx is the difference of a symbol and a label,
6756 and will reduce to a PC-relative relocation in the object file.
6757 Expressions like this can be left alone when generating PIC, rather
6758 than forced through the GOT. */
6759 static int
6760 pcrel_constant_p (rtx x)
6762 if (GET_CODE (x) == MINUS)
6763 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6765 return FALSE;
6768 /* Return true if X will surely end up in an index register after next
6769 splitting pass. */
6770 static bool
6771 will_be_in_index_register (const_rtx x)
6773 /* arm.md: calculate_pic_address will split this into a register. */
6774 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6777 /* Return nonzero if X is a valid ARM state address operand. */
6779 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6780 int strict_p)
6782 bool use_ldrd;
6783 enum rtx_code code = GET_CODE (x);
6785 if (arm_address_register_rtx_p (x, strict_p))
6786 return 1;
6788 use_ldrd = (TARGET_LDRD
6789 && (mode == DImode
6790 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6792 if (code == POST_INC || code == PRE_DEC
6793 || ((code == PRE_INC || code == POST_DEC)
6794 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6795 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6797 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6798 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6799 && GET_CODE (XEXP (x, 1)) == PLUS
6800 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6802 rtx addend = XEXP (XEXP (x, 1), 1);
6804 /* Don't allow ldrd post increment by register because it's hard
6805 to fixup invalid register choices. */
6806 if (use_ldrd
6807 && GET_CODE (x) == POST_MODIFY
6808 && REG_P (addend))
6809 return 0;
6811 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6812 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6815 /* After reload constants split into minipools will have addresses
6816 from a LABEL_REF. */
6817 else if (reload_completed
6818 && (code == LABEL_REF
6819 || (code == CONST
6820 && GET_CODE (XEXP (x, 0)) == PLUS
6821 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6822 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6823 return 1;
6825 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6826 return 0;
6828 else if (code == PLUS)
6830 rtx xop0 = XEXP (x, 0);
6831 rtx xop1 = XEXP (x, 1);
6833 return ((arm_address_register_rtx_p (xop0, strict_p)
6834 && ((CONST_INT_P (xop1)
6835 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6836 || (!strict_p && will_be_in_index_register (xop1))))
6837 || (arm_address_register_rtx_p (xop1, strict_p)
6838 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6841 #if 0
6842 /* Reload currently can't handle MINUS, so disable this for now */
6843 else if (GET_CODE (x) == MINUS)
6845 rtx xop0 = XEXP (x, 0);
6846 rtx xop1 = XEXP (x, 1);
6848 return (arm_address_register_rtx_p (xop0, strict_p)
6849 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6851 #endif
6853 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6854 && code == SYMBOL_REF
6855 && CONSTANT_POOL_ADDRESS_P (x)
6856 && ! (flag_pic
6857 && symbol_mentioned_p (get_pool_constant (x))
6858 && ! pcrel_constant_p (get_pool_constant (x))))
6859 return 1;
6861 return 0;
6864 /* Return nonzero if X is a valid Thumb-2 address operand. */
6865 static int
6866 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6868 bool use_ldrd;
6869 enum rtx_code code = GET_CODE (x);
6871 if (arm_address_register_rtx_p (x, strict_p))
6872 return 1;
6874 use_ldrd = (TARGET_LDRD
6875 && (mode == DImode
6876 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6878 if (code == POST_INC || code == PRE_DEC
6879 || ((code == PRE_INC || code == POST_DEC)
6880 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6881 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6883 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6884 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6885 && GET_CODE (XEXP (x, 1)) == PLUS
6886 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6888 /* Thumb-2 only has autoincrement by constant. */
6889 rtx addend = XEXP (XEXP (x, 1), 1);
6890 HOST_WIDE_INT offset;
6892 if (!CONST_INT_P (addend))
6893 return 0;
6895 offset = INTVAL(addend);
6896 if (GET_MODE_SIZE (mode) <= 4)
6897 return (offset > -256 && offset < 256);
6899 return (use_ldrd && offset > -1024 && offset < 1024
6900 && (offset & 3) == 0);
6903 /* After reload constants split into minipools will have addresses
6904 from a LABEL_REF. */
6905 else if (reload_completed
6906 && (code == LABEL_REF
6907 || (code == CONST
6908 && GET_CODE (XEXP (x, 0)) == PLUS
6909 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6910 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6911 return 1;
6913 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6914 return 0;
6916 else if (code == PLUS)
6918 rtx xop0 = XEXP (x, 0);
6919 rtx xop1 = XEXP (x, 1);
6921 return ((arm_address_register_rtx_p (xop0, strict_p)
6922 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6923 || (!strict_p && will_be_in_index_register (xop1))))
6924 || (arm_address_register_rtx_p (xop1, strict_p)
6925 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6928 /* Normally we can assign constant values to target registers without
6929 the help of constant pool. But there are cases we have to use constant
6930 pool like:
6931 1) assign a label to register.
6932 2) sign-extend a 8bit value to 32bit and then assign to register.
6934 Constant pool access in format:
6935 (set (reg r0) (mem (symbol_ref (".LC0"))))
6936 will cause the use of literal pool (later in function arm_reorg).
6937 So here we mark such format as an invalid format, then the compiler
6938 will adjust it into:
6939 (set (reg r0) (symbol_ref (".LC0")))
6940 (set (reg r0) (mem (reg r0))).
6941 No extra register is required, and (mem (reg r0)) won't cause the use
6942 of literal pools. */
6943 else if (arm_disable_literal_pool && code == SYMBOL_REF
6944 && CONSTANT_POOL_ADDRESS_P (x))
6945 return 0;
6947 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6948 && code == SYMBOL_REF
6949 && CONSTANT_POOL_ADDRESS_P (x)
6950 && ! (flag_pic
6951 && symbol_mentioned_p (get_pool_constant (x))
6952 && ! pcrel_constant_p (get_pool_constant (x))))
6953 return 1;
6955 return 0;
6958 /* Return nonzero if INDEX is valid for an address index operand in
6959 ARM state. */
6960 static int
6961 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6962 int strict_p)
6964 HOST_WIDE_INT range;
6965 enum rtx_code code = GET_CODE (index);
6967 /* Standard coprocessor addressing modes. */
6968 if (TARGET_HARD_FLOAT
6969 && TARGET_VFP
6970 && (mode == SFmode || mode == DFmode))
6971 return (code == CONST_INT && INTVAL (index) < 1024
6972 && INTVAL (index) > -1024
6973 && (INTVAL (index) & 3) == 0);
6975 /* For quad modes, we restrict the constant offset to be slightly less
6976 than what the instruction format permits. We do this because for
6977 quad mode moves, we will actually decompose them into two separate
6978 double-mode reads or writes. INDEX must therefore be a valid
6979 (double-mode) offset and so should INDEX+8. */
6980 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6981 return (code == CONST_INT
6982 && INTVAL (index) < 1016
6983 && INTVAL (index) > -1024
6984 && (INTVAL (index) & 3) == 0);
6986 /* We have no such constraint on double mode offsets, so we permit the
6987 full range of the instruction format. */
6988 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6989 return (code == CONST_INT
6990 && INTVAL (index) < 1024
6991 && INTVAL (index) > -1024
6992 && (INTVAL (index) & 3) == 0);
6994 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6995 return (code == CONST_INT
6996 && INTVAL (index) < 1024
6997 && INTVAL (index) > -1024
6998 && (INTVAL (index) & 3) == 0);
7000 if (arm_address_register_rtx_p (index, strict_p)
7001 && (GET_MODE_SIZE (mode) <= 4))
7002 return 1;
7004 if (mode == DImode || mode == DFmode)
7006 if (code == CONST_INT)
7008 HOST_WIDE_INT val = INTVAL (index);
7010 if (TARGET_LDRD)
7011 return val > -256 && val < 256;
7012 else
7013 return val > -4096 && val < 4092;
7016 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7019 if (GET_MODE_SIZE (mode) <= 4
7020 && ! (arm_arch4
7021 && (mode == HImode
7022 || mode == HFmode
7023 || (mode == QImode && outer == SIGN_EXTEND))))
7025 if (code == MULT)
7027 rtx xiop0 = XEXP (index, 0);
7028 rtx xiop1 = XEXP (index, 1);
7030 return ((arm_address_register_rtx_p (xiop0, strict_p)
7031 && power_of_two_operand (xiop1, SImode))
7032 || (arm_address_register_rtx_p (xiop1, strict_p)
7033 && power_of_two_operand (xiop0, SImode)));
7035 else if (code == LSHIFTRT || code == ASHIFTRT
7036 || code == ASHIFT || code == ROTATERT)
7038 rtx op = XEXP (index, 1);
7040 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7041 && CONST_INT_P (op)
7042 && INTVAL (op) > 0
7043 && INTVAL (op) <= 31);
7047 /* For ARM v4 we may be doing a sign-extend operation during the
7048 load. */
7049 if (arm_arch4)
7051 if (mode == HImode
7052 || mode == HFmode
7053 || (outer == SIGN_EXTEND && mode == QImode))
7054 range = 256;
7055 else
7056 range = 4096;
7058 else
7059 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7061 return (code == CONST_INT
7062 && INTVAL (index) < range
7063 && INTVAL (index) > -range);
7066 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7067 index operand. i.e. 1, 2, 4 or 8. */
7068 static bool
7069 thumb2_index_mul_operand (rtx op)
7071 HOST_WIDE_INT val;
7073 if (!CONST_INT_P (op))
7074 return false;
7076 val = INTVAL(op);
7077 return (val == 1 || val == 2 || val == 4 || val == 8);
7080 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7081 static int
7082 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
7084 enum rtx_code code = GET_CODE (index);
7086 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7087 /* Standard coprocessor addressing modes. */
7088 if (TARGET_HARD_FLOAT
7089 && TARGET_VFP
7090 && (mode == SFmode || mode == DFmode))
7091 return (code == CONST_INT && INTVAL (index) < 1024
7092 /* Thumb-2 allows only > -256 index range for it's core register
7093 load/stores. Since we allow SF/DF in core registers, we have
7094 to use the intersection between -256~4096 (core) and -1024~1024
7095 (coprocessor). */
7096 && INTVAL (index) > -256
7097 && (INTVAL (index) & 3) == 0);
7099 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7101 /* For DImode assume values will usually live in core regs
7102 and only allow LDRD addressing modes. */
7103 if (!TARGET_LDRD || mode != DImode)
7104 return (code == CONST_INT
7105 && INTVAL (index) < 1024
7106 && INTVAL (index) > -1024
7107 && (INTVAL (index) & 3) == 0);
7110 /* For quad modes, we restrict the constant offset to be slightly less
7111 than what the instruction format permits. We do this because for
7112 quad mode moves, we will actually decompose them into two separate
7113 double-mode reads or writes. INDEX must therefore be a valid
7114 (double-mode) offset and so should INDEX+8. */
7115 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7116 return (code == CONST_INT
7117 && INTVAL (index) < 1016
7118 && INTVAL (index) > -1024
7119 && (INTVAL (index) & 3) == 0);
7121 /* We have no such constraint on double mode offsets, so we permit the
7122 full range of the instruction format. */
7123 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7124 return (code == CONST_INT
7125 && INTVAL (index) < 1024
7126 && INTVAL (index) > -1024
7127 && (INTVAL (index) & 3) == 0);
7129 if (arm_address_register_rtx_p (index, strict_p)
7130 && (GET_MODE_SIZE (mode) <= 4))
7131 return 1;
7133 if (mode == DImode || mode == DFmode)
7135 if (code == CONST_INT)
7137 HOST_WIDE_INT val = INTVAL (index);
7138 /* ??? Can we assume ldrd for thumb2? */
7139 /* Thumb-2 ldrd only has reg+const addressing modes. */
7140 /* ldrd supports offsets of +-1020.
7141 However the ldr fallback does not. */
7142 return val > -256 && val < 256 && (val & 3) == 0;
7144 else
7145 return 0;
7148 if (code == MULT)
7150 rtx xiop0 = XEXP (index, 0);
7151 rtx xiop1 = XEXP (index, 1);
7153 return ((arm_address_register_rtx_p (xiop0, strict_p)
7154 && thumb2_index_mul_operand (xiop1))
7155 || (arm_address_register_rtx_p (xiop1, strict_p)
7156 && thumb2_index_mul_operand (xiop0)));
7158 else if (code == ASHIFT)
7160 rtx op = XEXP (index, 1);
7162 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7163 && CONST_INT_P (op)
7164 && INTVAL (op) > 0
7165 && INTVAL (op) <= 3);
7168 return (code == CONST_INT
7169 && INTVAL (index) < 4096
7170 && INTVAL (index) > -256);
7173 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7174 static int
7175 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7177 int regno;
7179 if (!REG_P (x))
7180 return 0;
7182 regno = REGNO (x);
7184 if (strict_p)
7185 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7187 return (regno <= LAST_LO_REGNUM
7188 || regno > LAST_VIRTUAL_REGISTER
7189 || regno == FRAME_POINTER_REGNUM
7190 || (GET_MODE_SIZE (mode) >= 4
7191 && (regno == STACK_POINTER_REGNUM
7192 || regno >= FIRST_PSEUDO_REGISTER
7193 || x == hard_frame_pointer_rtx
7194 || x == arg_pointer_rtx)));
7197 /* Return nonzero if x is a legitimate index register. This is the case
7198 for any base register that can access a QImode object. */
7199 inline static int
7200 thumb1_index_register_rtx_p (rtx x, int strict_p)
7202 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7205 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7207 The AP may be eliminated to either the SP or the FP, so we use the
7208 least common denominator, e.g. SImode, and offsets from 0 to 64.
7210 ??? Verify whether the above is the right approach.
7212 ??? Also, the FP may be eliminated to the SP, so perhaps that
7213 needs special handling also.
7215 ??? Look at how the mips16 port solves this problem. It probably uses
7216 better ways to solve some of these problems.
7218 Although it is not incorrect, we don't accept QImode and HImode
7219 addresses based on the frame pointer or arg pointer until the
7220 reload pass starts. This is so that eliminating such addresses
7221 into stack based ones won't produce impossible code. */
7223 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7225 /* ??? Not clear if this is right. Experiment. */
7226 if (GET_MODE_SIZE (mode) < 4
7227 && !(reload_in_progress || reload_completed)
7228 && (reg_mentioned_p (frame_pointer_rtx, x)
7229 || reg_mentioned_p (arg_pointer_rtx, x)
7230 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7231 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7232 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7233 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7234 return 0;
7236 /* Accept any base register. SP only in SImode or larger. */
7237 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7238 return 1;
7240 /* This is PC relative data before arm_reorg runs. */
7241 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7242 && GET_CODE (x) == SYMBOL_REF
7243 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7244 return 1;
7246 /* This is PC relative data after arm_reorg runs. */
7247 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7248 && reload_completed
7249 && (GET_CODE (x) == LABEL_REF
7250 || (GET_CODE (x) == CONST
7251 && GET_CODE (XEXP (x, 0)) == PLUS
7252 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7253 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7254 return 1;
7256 /* Post-inc indexing only supported for SImode and larger. */
7257 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7258 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7259 return 1;
7261 else if (GET_CODE (x) == PLUS)
7263 /* REG+REG address can be any two index registers. */
7264 /* We disallow FRAME+REG addressing since we know that FRAME
7265 will be replaced with STACK, and SP relative addressing only
7266 permits SP+OFFSET. */
7267 if (GET_MODE_SIZE (mode) <= 4
7268 && XEXP (x, 0) != frame_pointer_rtx
7269 && XEXP (x, 1) != frame_pointer_rtx
7270 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7271 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7272 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7273 return 1;
7275 /* REG+const has 5-7 bit offset for non-SP registers. */
7276 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7277 || XEXP (x, 0) == arg_pointer_rtx)
7278 && CONST_INT_P (XEXP (x, 1))
7279 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7280 return 1;
7282 /* REG+const has 10-bit offset for SP, but only SImode and
7283 larger is supported. */
7284 /* ??? Should probably check for DI/DFmode overflow here
7285 just like GO_IF_LEGITIMATE_OFFSET does. */
7286 else if (REG_P (XEXP (x, 0))
7287 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7288 && GET_MODE_SIZE (mode) >= 4
7289 && CONST_INT_P (XEXP (x, 1))
7290 && INTVAL (XEXP (x, 1)) >= 0
7291 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7292 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7293 return 1;
7295 else if (REG_P (XEXP (x, 0))
7296 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7297 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7298 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7299 && REGNO (XEXP (x, 0))
7300 <= LAST_VIRTUAL_POINTER_REGISTER))
7301 && GET_MODE_SIZE (mode) >= 4
7302 && CONST_INT_P (XEXP (x, 1))
7303 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7304 return 1;
7307 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7308 && GET_MODE_SIZE (mode) == 4
7309 && GET_CODE (x) == SYMBOL_REF
7310 && CONSTANT_POOL_ADDRESS_P (x)
7311 && ! (flag_pic
7312 && symbol_mentioned_p (get_pool_constant (x))
7313 && ! pcrel_constant_p (get_pool_constant (x))))
7314 return 1;
7316 return 0;
7319 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7320 instruction of mode MODE. */
7322 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7324 switch (GET_MODE_SIZE (mode))
7326 case 1:
7327 return val >= 0 && val < 32;
7329 case 2:
7330 return val >= 0 && val < 64 && (val & 1) == 0;
7332 default:
7333 return (val >= 0
7334 && (val + GET_MODE_SIZE (mode)) <= 128
7335 && (val & 3) == 0);
7339 bool
7340 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7342 if (TARGET_ARM)
7343 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7344 else if (TARGET_THUMB2)
7345 return thumb2_legitimate_address_p (mode, x, strict_p);
7346 else /* if (TARGET_THUMB1) */
7347 return thumb1_legitimate_address_p (mode, x, strict_p);
7350 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7352 Given an rtx X being reloaded into a reg required to be
7353 in class CLASS, return the class of reg to actually use.
7354 In general this is just CLASS, but for the Thumb core registers and
7355 immediate constants we prefer a LO_REGS class or a subset. */
7357 static reg_class_t
7358 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7360 if (TARGET_32BIT)
7361 return rclass;
7362 else
7364 if (rclass == GENERAL_REGS)
7365 return LO_REGS;
7366 else
7367 return rclass;
7371 /* Build the SYMBOL_REF for __tls_get_addr. */
7373 static GTY(()) rtx tls_get_addr_libfunc;
7375 static rtx
7376 get_tls_get_addr (void)
7378 if (!tls_get_addr_libfunc)
7379 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7380 return tls_get_addr_libfunc;
7384 arm_load_tp (rtx target)
7386 if (!target)
7387 target = gen_reg_rtx (SImode);
7389 if (TARGET_HARD_TP)
7391 /* Can return in any reg. */
7392 emit_insn (gen_load_tp_hard (target));
7394 else
7396 /* Always returned in r0. Immediately copy the result into a pseudo,
7397 otherwise other uses of r0 (e.g. setting up function arguments) may
7398 clobber the value. */
7400 rtx tmp;
7402 emit_insn (gen_load_tp_soft ());
7404 tmp = gen_rtx_REG (SImode, 0);
7405 emit_move_insn (target, tmp);
7407 return target;
7410 static rtx
7411 load_tls_operand (rtx x, rtx reg)
7413 rtx tmp;
7415 if (reg == NULL_RTX)
7416 reg = gen_reg_rtx (SImode);
7418 tmp = gen_rtx_CONST (SImode, x);
7420 emit_move_insn (reg, tmp);
7422 return reg;
7425 static rtx
7426 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7428 rtx insns, label, labelno, sum;
7430 gcc_assert (reloc != TLS_DESCSEQ);
7431 start_sequence ();
7433 labelno = GEN_INT (pic_labelno++);
7434 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7435 label = gen_rtx_CONST (VOIDmode, label);
7437 sum = gen_rtx_UNSPEC (Pmode,
7438 gen_rtvec (4, x, GEN_INT (reloc), label,
7439 GEN_INT (TARGET_ARM ? 8 : 4)),
7440 UNSPEC_TLS);
7441 reg = load_tls_operand (sum, reg);
7443 if (TARGET_ARM)
7444 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7445 else
7446 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7448 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7449 LCT_PURE, /* LCT_CONST? */
7450 Pmode, 1, reg, Pmode);
7452 insns = get_insns ();
7453 end_sequence ();
7455 return insns;
7458 static rtx
7459 arm_tls_descseq_addr (rtx x, rtx reg)
7461 rtx labelno = GEN_INT (pic_labelno++);
7462 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7463 rtx sum = gen_rtx_UNSPEC (Pmode,
7464 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7465 gen_rtx_CONST (VOIDmode, label),
7466 GEN_INT (!TARGET_ARM)),
7467 UNSPEC_TLS);
7468 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7470 emit_insn (gen_tlscall (x, labelno));
7471 if (!reg)
7472 reg = gen_reg_rtx (SImode);
7473 else
7474 gcc_assert (REGNO (reg) != 0);
7476 emit_move_insn (reg, reg0);
7478 return reg;
7482 legitimize_tls_address (rtx x, rtx reg)
7484 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7485 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7487 switch (model)
7489 case TLS_MODEL_GLOBAL_DYNAMIC:
7490 if (TARGET_GNU2_TLS)
7492 reg = arm_tls_descseq_addr (x, reg);
7494 tp = arm_load_tp (NULL_RTX);
7496 dest = gen_rtx_PLUS (Pmode, tp, reg);
7498 else
7500 /* Original scheme */
7501 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7502 dest = gen_reg_rtx (Pmode);
7503 emit_libcall_block (insns, dest, ret, x);
7505 return dest;
7507 case TLS_MODEL_LOCAL_DYNAMIC:
7508 if (TARGET_GNU2_TLS)
7510 reg = arm_tls_descseq_addr (x, reg);
7512 tp = arm_load_tp (NULL_RTX);
7514 dest = gen_rtx_PLUS (Pmode, tp, reg);
7516 else
7518 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7520 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7521 share the LDM result with other LD model accesses. */
7522 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7523 UNSPEC_TLS);
7524 dest = gen_reg_rtx (Pmode);
7525 emit_libcall_block (insns, dest, ret, eqv);
7527 /* Load the addend. */
7528 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7529 GEN_INT (TLS_LDO32)),
7530 UNSPEC_TLS);
7531 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7532 dest = gen_rtx_PLUS (Pmode, dest, addend);
7534 return dest;
7536 case TLS_MODEL_INITIAL_EXEC:
7537 labelno = GEN_INT (pic_labelno++);
7538 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7539 label = gen_rtx_CONST (VOIDmode, label);
7540 sum = gen_rtx_UNSPEC (Pmode,
7541 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7542 GEN_INT (TARGET_ARM ? 8 : 4)),
7543 UNSPEC_TLS);
7544 reg = load_tls_operand (sum, reg);
7546 if (TARGET_ARM)
7547 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7548 else if (TARGET_THUMB2)
7549 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7550 else
7552 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7553 emit_move_insn (reg, gen_const_mem (SImode, reg));
7556 tp = arm_load_tp (NULL_RTX);
7558 return gen_rtx_PLUS (Pmode, tp, reg);
7560 case TLS_MODEL_LOCAL_EXEC:
7561 tp = arm_load_tp (NULL_RTX);
7563 reg = gen_rtx_UNSPEC (Pmode,
7564 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7565 UNSPEC_TLS);
7566 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7568 return gen_rtx_PLUS (Pmode, tp, reg);
7570 default:
7571 abort ();
7575 /* Try machine-dependent ways of modifying an illegitimate address
7576 to be legitimate. If we find one, return the new, valid address. */
7578 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7580 if (arm_tls_referenced_p (x))
7582 rtx addend = NULL;
7584 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7586 addend = XEXP (XEXP (x, 0), 1);
7587 x = XEXP (XEXP (x, 0), 0);
7590 if (GET_CODE (x) != SYMBOL_REF)
7591 return x;
7593 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7595 x = legitimize_tls_address (x, NULL_RTX);
7597 if (addend)
7599 x = gen_rtx_PLUS (SImode, x, addend);
7600 orig_x = x;
7602 else
7603 return x;
7606 if (!TARGET_ARM)
7608 /* TODO: legitimize_address for Thumb2. */
7609 if (TARGET_THUMB2)
7610 return x;
7611 return thumb_legitimize_address (x, orig_x, mode);
7614 if (GET_CODE (x) == PLUS)
7616 rtx xop0 = XEXP (x, 0);
7617 rtx xop1 = XEXP (x, 1);
7619 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7620 xop0 = force_reg (SImode, xop0);
7622 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7623 && !symbol_mentioned_p (xop1))
7624 xop1 = force_reg (SImode, xop1);
7626 if (ARM_BASE_REGISTER_RTX_P (xop0)
7627 && CONST_INT_P (xop1))
7629 HOST_WIDE_INT n, low_n;
7630 rtx base_reg, val;
7631 n = INTVAL (xop1);
7633 /* VFP addressing modes actually allow greater offsets, but for
7634 now we just stick with the lowest common denominator. */
7635 if (mode == DImode
7636 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7638 low_n = n & 0x0f;
7639 n &= ~0x0f;
7640 if (low_n > 4)
7642 n += 16;
7643 low_n -= 16;
7646 else
7648 low_n = ((mode) == TImode ? 0
7649 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7650 n -= low_n;
7653 base_reg = gen_reg_rtx (SImode);
7654 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7655 emit_move_insn (base_reg, val);
7656 x = plus_constant (Pmode, base_reg, low_n);
7658 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7659 x = gen_rtx_PLUS (SImode, xop0, xop1);
7662 /* XXX We don't allow MINUS any more -- see comment in
7663 arm_legitimate_address_outer_p (). */
7664 else if (GET_CODE (x) == MINUS)
7666 rtx xop0 = XEXP (x, 0);
7667 rtx xop1 = XEXP (x, 1);
7669 if (CONSTANT_P (xop0))
7670 xop0 = force_reg (SImode, xop0);
7672 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7673 xop1 = force_reg (SImode, xop1);
7675 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7676 x = gen_rtx_MINUS (SImode, xop0, xop1);
7679 /* Make sure to take full advantage of the pre-indexed addressing mode
7680 with absolute addresses which often allows for the base register to
7681 be factorized for multiple adjacent memory references, and it might
7682 even allows for the mini pool to be avoided entirely. */
7683 else if (CONST_INT_P (x) && optimize > 0)
7685 unsigned int bits;
7686 HOST_WIDE_INT mask, base, index;
7687 rtx base_reg;
7689 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7690 use a 8-bit index. So let's use a 12-bit index for SImode only and
7691 hope that arm_gen_constant will enable ldrb to use more bits. */
7692 bits = (mode == SImode) ? 12 : 8;
7693 mask = (1 << bits) - 1;
7694 base = INTVAL (x) & ~mask;
7695 index = INTVAL (x) & mask;
7696 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7698 /* It'll most probably be more efficient to generate the base
7699 with more bits set and use a negative index instead. */
7700 base |= mask;
7701 index -= mask;
7703 base_reg = force_reg (SImode, GEN_INT (base));
7704 x = plus_constant (Pmode, base_reg, index);
7707 if (flag_pic)
7709 /* We need to find and carefully transform any SYMBOL and LABEL
7710 references; so go back to the original address expression. */
7711 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7713 if (new_x != orig_x)
7714 x = new_x;
7717 return x;
7721 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7722 to be legitimate. If we find one, return the new, valid address. */
7724 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7726 if (GET_CODE (x) == PLUS
7727 && CONST_INT_P (XEXP (x, 1))
7728 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7729 || INTVAL (XEXP (x, 1)) < 0))
7731 rtx xop0 = XEXP (x, 0);
7732 rtx xop1 = XEXP (x, 1);
7733 HOST_WIDE_INT offset = INTVAL (xop1);
7735 /* Try and fold the offset into a biasing of the base register and
7736 then offsetting that. Don't do this when optimizing for space
7737 since it can cause too many CSEs. */
7738 if (optimize_size && offset >= 0
7739 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7741 HOST_WIDE_INT delta;
7743 if (offset >= 256)
7744 delta = offset - (256 - GET_MODE_SIZE (mode));
7745 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7746 delta = 31 * GET_MODE_SIZE (mode);
7747 else
7748 delta = offset & (~31 * GET_MODE_SIZE (mode));
7750 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7751 NULL_RTX);
7752 x = plus_constant (Pmode, xop0, delta);
7754 else if (offset < 0 && offset > -256)
7755 /* Small negative offsets are best done with a subtract before the
7756 dereference, forcing these into a register normally takes two
7757 instructions. */
7758 x = force_operand (x, NULL_RTX);
7759 else
7761 /* For the remaining cases, force the constant into a register. */
7762 xop1 = force_reg (SImode, xop1);
7763 x = gen_rtx_PLUS (SImode, xop0, xop1);
7766 else if (GET_CODE (x) == PLUS
7767 && s_register_operand (XEXP (x, 1), SImode)
7768 && !s_register_operand (XEXP (x, 0), SImode))
7770 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7772 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7775 if (flag_pic)
7777 /* We need to find and carefully transform any SYMBOL and LABEL
7778 references; so go back to the original address expression. */
7779 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7781 if (new_x != orig_x)
7782 x = new_x;
7785 return x;
7788 bool
7789 arm_legitimize_reload_address (rtx *p,
7790 enum machine_mode mode,
7791 int opnum, int type,
7792 int ind_levels ATTRIBUTE_UNUSED)
7794 /* We must recognize output that we have already generated ourselves. */
7795 if (GET_CODE (*p) == PLUS
7796 && GET_CODE (XEXP (*p, 0)) == PLUS
7797 && REG_P (XEXP (XEXP (*p, 0), 0))
7798 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7799 && CONST_INT_P (XEXP (*p, 1)))
7801 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7802 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7803 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7804 return true;
7807 if (GET_CODE (*p) == PLUS
7808 && REG_P (XEXP (*p, 0))
7809 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7810 /* If the base register is equivalent to a constant, let the generic
7811 code handle it. Otherwise we will run into problems if a future
7812 reload pass decides to rematerialize the constant. */
7813 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7814 && CONST_INT_P (XEXP (*p, 1)))
7816 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7817 HOST_WIDE_INT low, high;
7819 /* Detect coprocessor load/stores. */
7820 bool coproc_p = ((TARGET_HARD_FLOAT
7821 && TARGET_VFP
7822 && (mode == SFmode || mode == DFmode))
7823 || (TARGET_REALLY_IWMMXT
7824 && VALID_IWMMXT_REG_MODE (mode))
7825 || (TARGET_NEON
7826 && (VALID_NEON_DREG_MODE (mode)
7827 || VALID_NEON_QREG_MODE (mode))));
7829 /* For some conditions, bail out when lower two bits are unaligned. */
7830 if ((val & 0x3) != 0
7831 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7832 && (coproc_p
7833 /* For DI, and DF under soft-float: */
7834 || ((mode == DImode || mode == DFmode)
7835 /* Without ldrd, we use stm/ldm, which does not
7836 fair well with unaligned bits. */
7837 && (! TARGET_LDRD
7838 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7839 || TARGET_THUMB2))))
7840 return false;
7842 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7843 of which the (reg+high) gets turned into a reload add insn,
7844 we try to decompose the index into high/low values that can often
7845 also lead to better reload CSE.
7846 For example:
7847 ldr r0, [r2, #4100] // Offset too large
7848 ldr r1, [r2, #4104] // Offset too large
7850 is best reloaded as:
7851 add t1, r2, #4096
7852 ldr r0, [t1, #4]
7853 add t2, r2, #4096
7854 ldr r1, [t2, #8]
7856 which post-reload CSE can simplify in most cases to eliminate the
7857 second add instruction:
7858 add t1, r2, #4096
7859 ldr r0, [t1, #4]
7860 ldr r1, [t1, #8]
7862 The idea here is that we want to split out the bits of the constant
7863 as a mask, rather than as subtracting the maximum offset that the
7864 respective type of load/store used can handle.
7866 When encountering negative offsets, we can still utilize it even if
7867 the overall offset is positive; sometimes this may lead to an immediate
7868 that can be constructed with fewer instructions.
7869 For example:
7870 ldr r0, [r2, #0x3FFFFC]
7872 This is best reloaded as:
7873 add t1, r2, #0x400000
7874 ldr r0, [t1, #-4]
7876 The trick for spotting this for a load insn with N bits of offset
7877 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7878 negative offset that is going to make bit N and all the bits below
7879 it become zero in the remainder part.
7881 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7882 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7883 used in most cases of ARM load/store instructions. */
7885 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7886 (((VAL) & ((1 << (N)) - 1)) \
7887 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7888 : 0)
7890 if (coproc_p)
7892 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7894 /* NEON quad-word load/stores are made of two double-word accesses,
7895 so the valid index range is reduced by 8. Treat as 9-bit range if
7896 we go over it. */
7897 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7898 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7900 else if (GET_MODE_SIZE (mode) == 8)
7902 if (TARGET_LDRD)
7903 low = (TARGET_THUMB2
7904 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7905 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7906 else
7907 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7908 to access doublewords. The supported load/store offsets are
7909 -8, -4, and 4, which we try to produce here. */
7910 low = ((val & 0xf) ^ 0x8) - 0x8;
7912 else if (GET_MODE_SIZE (mode) < 8)
7914 /* NEON element load/stores do not have an offset. */
7915 if (TARGET_NEON_FP16 && mode == HFmode)
7916 return false;
7918 if (TARGET_THUMB2)
7920 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7921 Try the wider 12-bit range first, and re-try if the result
7922 is out of range. */
7923 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7924 if (low < -255)
7925 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7927 else
7929 if (mode == HImode || mode == HFmode)
7931 if (arm_arch4)
7932 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7933 else
7935 /* The storehi/movhi_bytes fallbacks can use only
7936 [-4094,+4094] of the full ldrb/strb index range. */
7937 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7938 if (low == 4095 || low == -4095)
7939 return false;
7942 else
7943 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7946 else
7947 return false;
7949 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7950 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7951 - (unsigned HOST_WIDE_INT) 0x80000000);
7952 /* Check for overflow or zero */
7953 if (low == 0 || high == 0 || (high + low != val))
7954 return false;
7956 /* Reload the high part into a base reg; leave the low part
7957 in the mem.
7958 Note that replacing this gen_rtx_PLUS with plus_constant is
7959 wrong in this case because we rely on the
7960 (plus (plus reg c1) c2) structure being preserved so that
7961 XEXP (*p, 0) in push_reload below uses the correct term. */
7962 *p = gen_rtx_PLUS (GET_MODE (*p),
7963 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7964 GEN_INT (high)),
7965 GEN_INT (low));
7966 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7967 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7968 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7969 return true;
7972 return false;
7976 thumb_legitimize_reload_address (rtx *x_p,
7977 enum machine_mode mode,
7978 int opnum, int type,
7979 int ind_levels ATTRIBUTE_UNUSED)
7981 rtx x = *x_p;
7983 if (GET_CODE (x) == PLUS
7984 && GET_MODE_SIZE (mode) < 4
7985 && REG_P (XEXP (x, 0))
7986 && XEXP (x, 0) == stack_pointer_rtx
7987 && CONST_INT_P (XEXP (x, 1))
7988 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7990 rtx orig_x = x;
7992 x = copy_rtx (x);
7993 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7994 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7995 return x;
7998 /* If both registers are hi-regs, then it's better to reload the
7999 entire expression rather than each register individually. That
8000 only requires one reload register rather than two. */
8001 if (GET_CODE (x) == PLUS
8002 && REG_P (XEXP (x, 0))
8003 && REG_P (XEXP (x, 1))
8004 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8005 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8007 rtx orig_x = x;
8009 x = copy_rtx (x);
8010 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8011 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8012 return x;
8015 return NULL;
8018 /* Test for various thread-local symbols. */
8020 /* Helper for arm_tls_referenced_p. */
8022 static int
8023 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
8025 if (GET_CODE (*x) == SYMBOL_REF)
8026 return SYMBOL_REF_TLS_MODEL (*x) != 0;
8028 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8029 TLS offsets, not real symbol references. */
8030 if (GET_CODE (*x) == UNSPEC
8031 && XINT (*x, 1) == UNSPEC_TLS)
8032 return -1;
8034 return 0;
8037 /* Return TRUE if X contains any TLS symbol references. */
8039 bool
8040 arm_tls_referenced_p (rtx x)
8042 if (! TARGET_HAVE_TLS)
8043 return false;
8045 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
8048 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8050 On the ARM, allow any integer (invalid ones are removed later by insn
8051 patterns), nice doubles and symbol_refs which refer to the function's
8052 constant pool XXX.
8054 When generating pic allow anything. */
8056 static bool
8057 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
8059 /* At present, we have no support for Neon structure constants, so forbid
8060 them here. It might be possible to handle simple cases like 0 and -1
8061 in future. */
8062 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8063 return false;
8065 return flag_pic || !label_mentioned_p (x);
8068 static bool
8069 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8071 return (CONST_INT_P (x)
8072 || CONST_DOUBLE_P (x)
8073 || CONSTANT_ADDRESS_P (x)
8074 || flag_pic);
8077 static bool
8078 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
8080 return (!arm_cannot_force_const_mem (mode, x)
8081 && (TARGET_32BIT
8082 ? arm_legitimate_constant_p_1 (mode, x)
8083 : thumb_legitimate_constant_p (mode, x)));
8086 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8088 static bool
8089 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8091 rtx base, offset;
8093 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8095 split_const (x, &base, &offset);
8096 if (GET_CODE (base) == SYMBOL_REF
8097 && !offset_within_block_p (base, INTVAL (offset)))
8098 return true;
8100 return arm_tls_referenced_p (x);
8103 #define REG_OR_SUBREG_REG(X) \
8104 (REG_P (X) \
8105 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8107 #define REG_OR_SUBREG_RTX(X) \
8108 (REG_P (X) ? (X) : SUBREG_REG (X))
8110 static inline int
8111 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8113 enum machine_mode mode = GET_MODE (x);
8114 int total, words;
8116 switch (code)
8118 case ASHIFT:
8119 case ASHIFTRT:
8120 case LSHIFTRT:
8121 case ROTATERT:
8122 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8124 case PLUS:
8125 case MINUS:
8126 case COMPARE:
8127 case NEG:
8128 case NOT:
8129 return COSTS_N_INSNS (1);
8131 case MULT:
8132 if (CONST_INT_P (XEXP (x, 1)))
8134 int cycles = 0;
8135 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8137 while (i)
8139 i >>= 2;
8140 cycles++;
8142 return COSTS_N_INSNS (2) + cycles;
8144 return COSTS_N_INSNS (1) + 16;
8146 case SET:
8147 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8148 the mode. */
8149 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8150 return (COSTS_N_INSNS (words)
8151 + 4 * ((MEM_P (SET_SRC (x)))
8152 + MEM_P (SET_DEST (x))));
8154 case CONST_INT:
8155 if (outer == SET)
8157 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8158 return 0;
8159 if (thumb_shiftable_const (INTVAL (x)))
8160 return COSTS_N_INSNS (2);
8161 return COSTS_N_INSNS (3);
8163 else if ((outer == PLUS || outer == COMPARE)
8164 && INTVAL (x) < 256 && INTVAL (x) > -256)
8165 return 0;
8166 else if ((outer == IOR || outer == XOR || outer == AND)
8167 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8168 return COSTS_N_INSNS (1);
8169 else if (outer == AND)
8171 int i;
8172 /* This duplicates the tests in the andsi3 expander. */
8173 for (i = 9; i <= 31; i++)
8174 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8175 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8176 return COSTS_N_INSNS (2);
8178 else if (outer == ASHIFT || outer == ASHIFTRT
8179 || outer == LSHIFTRT)
8180 return 0;
8181 return COSTS_N_INSNS (2);
8183 case CONST:
8184 case CONST_DOUBLE:
8185 case LABEL_REF:
8186 case SYMBOL_REF:
8187 return COSTS_N_INSNS (3);
8189 case UDIV:
8190 case UMOD:
8191 case DIV:
8192 case MOD:
8193 return 100;
8195 case TRUNCATE:
8196 return 99;
8198 case AND:
8199 case XOR:
8200 case IOR:
8201 /* XXX guess. */
8202 return 8;
8204 case MEM:
8205 /* XXX another guess. */
8206 /* Memory costs quite a lot for the first word, but subsequent words
8207 load at the equivalent of a single insn each. */
8208 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8209 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8210 ? 4 : 0));
8212 case IF_THEN_ELSE:
8213 /* XXX a guess. */
8214 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8215 return 14;
8216 return 2;
8218 case SIGN_EXTEND:
8219 case ZERO_EXTEND:
8220 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8221 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8223 if (mode == SImode)
8224 return total;
8226 if (arm_arch6)
8227 return total + COSTS_N_INSNS (1);
8229 /* Assume a two-shift sequence. Increase the cost slightly so
8230 we prefer actual shifts over an extend operation. */
8231 return total + 1 + COSTS_N_INSNS (2);
8233 default:
8234 return 99;
8238 static inline bool
8239 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8241 enum machine_mode mode = GET_MODE (x);
8242 enum rtx_code subcode;
8243 rtx operand;
8244 enum rtx_code code = GET_CODE (x);
8245 *total = 0;
8247 switch (code)
8249 case MEM:
8250 /* Memory costs quite a lot for the first word, but subsequent words
8251 load at the equivalent of a single insn each. */
8252 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8253 return true;
8255 case DIV:
8256 case MOD:
8257 case UDIV:
8258 case UMOD:
8259 if (TARGET_HARD_FLOAT && mode == SFmode)
8260 *total = COSTS_N_INSNS (2);
8261 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8262 *total = COSTS_N_INSNS (4);
8263 else
8264 *total = COSTS_N_INSNS (20);
8265 return false;
8267 case ROTATE:
8268 if (REG_P (XEXP (x, 1)))
8269 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8270 else if (!CONST_INT_P (XEXP (x, 1)))
8271 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8273 /* Fall through */
8274 case ROTATERT:
8275 if (mode != SImode)
8277 *total += COSTS_N_INSNS (4);
8278 return true;
8281 /* Fall through */
8282 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8283 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8284 if (mode == DImode)
8286 *total += COSTS_N_INSNS (3);
8287 return true;
8290 *total += COSTS_N_INSNS (1);
8291 /* Increase the cost of complex shifts because they aren't any faster,
8292 and reduce dual issue opportunities. */
8293 if (arm_tune_cortex_a9
8294 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8295 ++*total;
8297 return true;
8299 case MINUS:
8300 if (mode == DImode)
8302 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8303 if (CONST_INT_P (XEXP (x, 0))
8304 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8306 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8307 return true;
8310 if (CONST_INT_P (XEXP (x, 1))
8311 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8313 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8314 return true;
8317 return false;
8320 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8322 if (TARGET_HARD_FLOAT
8323 && (mode == SFmode
8324 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8326 *total = COSTS_N_INSNS (1);
8327 if (CONST_DOUBLE_P (XEXP (x, 0))
8328 && arm_const_double_rtx (XEXP (x, 0)))
8330 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8331 return true;
8334 if (CONST_DOUBLE_P (XEXP (x, 1))
8335 && arm_const_double_rtx (XEXP (x, 1)))
8337 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8338 return true;
8341 return false;
8343 *total = COSTS_N_INSNS (20);
8344 return false;
8347 *total = COSTS_N_INSNS (1);
8348 if (CONST_INT_P (XEXP (x, 0))
8349 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8351 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8352 return true;
8355 subcode = GET_CODE (XEXP (x, 1));
8356 if (subcode == ASHIFT || subcode == ASHIFTRT
8357 || subcode == LSHIFTRT
8358 || subcode == ROTATE || subcode == ROTATERT)
8360 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8361 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8362 return true;
8365 /* A shift as a part of RSB costs no more than RSB itself. */
8366 if (GET_CODE (XEXP (x, 0)) == MULT
8367 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8369 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8370 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8371 return true;
8374 if (subcode == MULT
8375 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8377 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8378 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8379 return true;
8382 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8383 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8385 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8386 if (REG_P (XEXP (XEXP (x, 1), 0))
8387 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8388 *total += COSTS_N_INSNS (1);
8390 return true;
8393 /* Fall through */
8395 case PLUS:
8396 if (code == PLUS && arm_arch6 && mode == SImode
8397 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8398 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8400 *total = COSTS_N_INSNS (1);
8401 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8402 0, speed);
8403 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8404 return true;
8407 /* MLA: All arguments must be registers. We filter out
8408 multiplication by a power of two, so that we fall down into
8409 the code below. */
8410 if (GET_CODE (XEXP (x, 0)) == MULT
8411 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8413 /* The cost comes from the cost of the multiply. */
8414 return false;
8417 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8419 if (TARGET_HARD_FLOAT
8420 && (mode == SFmode
8421 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8423 *total = COSTS_N_INSNS (1);
8424 if (CONST_DOUBLE_P (XEXP (x, 1))
8425 && arm_const_double_rtx (XEXP (x, 1)))
8427 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8428 return true;
8431 return false;
8434 *total = COSTS_N_INSNS (20);
8435 return false;
8438 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8439 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8441 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8442 if (REG_P (XEXP (XEXP (x, 0), 0))
8443 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8444 *total += COSTS_N_INSNS (1);
8445 return true;
8448 /* Fall through */
8450 case AND: case XOR: case IOR:
8452 /* Normally the frame registers will be spilt into reg+const during
8453 reload, so it is a bad idea to combine them with other instructions,
8454 since then they might not be moved outside of loops. As a compromise
8455 we allow integration with ops that have a constant as their second
8456 operand. */
8457 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8458 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8459 && !CONST_INT_P (XEXP (x, 1)))
8460 *total = COSTS_N_INSNS (1);
8462 if (mode == DImode)
8464 *total += COSTS_N_INSNS (2);
8465 if (CONST_INT_P (XEXP (x, 1))
8466 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8468 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8469 return true;
8472 return false;
8475 *total += COSTS_N_INSNS (1);
8476 if (CONST_INT_P (XEXP (x, 1))
8477 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8479 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8480 return true;
8482 subcode = GET_CODE (XEXP (x, 0));
8483 if (subcode == ASHIFT || subcode == ASHIFTRT
8484 || subcode == LSHIFTRT
8485 || subcode == ROTATE || subcode == ROTATERT)
8487 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8488 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8489 return true;
8492 if (subcode == MULT
8493 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8495 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8496 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8497 return true;
8500 if (subcode == UMIN || subcode == UMAX
8501 || subcode == SMIN || subcode == SMAX)
8503 *total = COSTS_N_INSNS (3);
8504 return true;
8507 return false;
8509 case MULT:
8510 /* This should have been handled by the CPU specific routines. */
8511 gcc_unreachable ();
8513 case TRUNCATE:
8514 if (arm_arch3m && mode == SImode
8515 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8516 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8517 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8518 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8519 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8520 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8522 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8523 return true;
8525 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8526 return false;
8528 case NEG:
8529 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8531 if (TARGET_HARD_FLOAT
8532 && (mode == SFmode
8533 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8535 *total = COSTS_N_INSNS (1);
8536 return false;
8538 *total = COSTS_N_INSNS (2);
8539 return false;
8542 /* Fall through */
8543 case NOT:
8544 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8545 if (mode == SImode && code == NOT)
8547 subcode = GET_CODE (XEXP (x, 0));
8548 if (subcode == ASHIFT || subcode == ASHIFTRT
8549 || subcode == LSHIFTRT
8550 || subcode == ROTATE || subcode == ROTATERT
8551 || (subcode == MULT
8552 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8554 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8555 /* Register shifts cost an extra cycle. */
8556 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8557 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8558 subcode, 1, speed);
8559 return true;
8563 return false;
8565 case IF_THEN_ELSE:
8566 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8568 *total = COSTS_N_INSNS (4);
8569 return true;
8572 operand = XEXP (x, 0);
8574 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8575 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8576 && REG_P (XEXP (operand, 0))
8577 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8578 *total += COSTS_N_INSNS (1);
8579 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8580 + rtx_cost (XEXP (x, 2), code, 2, speed));
8581 return true;
8583 case NE:
8584 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8586 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8587 return true;
8589 goto scc_insn;
8591 case GE:
8592 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8593 && mode == SImode && XEXP (x, 1) == const0_rtx)
8595 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8596 return true;
8598 goto scc_insn;
8600 case LT:
8601 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8602 && mode == SImode && XEXP (x, 1) == const0_rtx)
8604 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8605 return true;
8607 goto scc_insn;
8609 case EQ:
8610 case GT:
8611 case LE:
8612 case GEU:
8613 case LTU:
8614 case GTU:
8615 case LEU:
8616 case UNORDERED:
8617 case ORDERED:
8618 case UNEQ:
8619 case UNGE:
8620 case UNLT:
8621 case UNGT:
8622 case UNLE:
8623 scc_insn:
8624 /* SCC insns. In the case where the comparison has already been
8625 performed, then they cost 2 instructions. Otherwise they need
8626 an additional comparison before them. */
8627 *total = COSTS_N_INSNS (2);
8628 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8630 return true;
8633 /* Fall through */
8634 case COMPARE:
8635 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8637 *total = 0;
8638 return true;
8641 *total += COSTS_N_INSNS (1);
8642 if (CONST_INT_P (XEXP (x, 1))
8643 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8645 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8646 return true;
8649 subcode = GET_CODE (XEXP (x, 0));
8650 if (subcode == ASHIFT || subcode == ASHIFTRT
8651 || subcode == LSHIFTRT
8652 || subcode == ROTATE || subcode == ROTATERT)
8654 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8655 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8656 return true;
8659 if (subcode == MULT
8660 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8662 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8663 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8664 return true;
8667 return false;
8669 case UMIN:
8670 case UMAX:
8671 case SMIN:
8672 case SMAX:
8673 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8674 if (!CONST_INT_P (XEXP (x, 1))
8675 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8676 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8677 return true;
8679 case ABS:
8680 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8682 if (TARGET_HARD_FLOAT
8683 && (mode == SFmode
8684 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8686 *total = COSTS_N_INSNS (1);
8687 return false;
8689 *total = COSTS_N_INSNS (20);
8690 return false;
8692 *total = COSTS_N_INSNS (1);
8693 if (mode == DImode)
8694 *total += COSTS_N_INSNS (3);
8695 return false;
8697 case SIGN_EXTEND:
8698 case ZERO_EXTEND:
8699 *total = 0;
8700 if (GET_MODE_CLASS (mode) == MODE_INT)
8702 rtx op = XEXP (x, 0);
8703 enum machine_mode opmode = GET_MODE (op);
8705 if (mode == DImode)
8706 *total += COSTS_N_INSNS (1);
8708 if (opmode != SImode)
8710 if (MEM_P (op))
8712 /* If !arm_arch4, we use one of the extendhisi2_mem
8713 or movhi_bytes patterns for HImode. For a QImode
8714 sign extension, we first zero-extend from memory
8715 and then perform a shift sequence. */
8716 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8717 *total += COSTS_N_INSNS (2);
8719 else if (arm_arch6)
8720 *total += COSTS_N_INSNS (1);
8722 /* We don't have the necessary insn, so we need to perform some
8723 other operation. */
8724 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8725 /* An and with constant 255. */
8726 *total += COSTS_N_INSNS (1);
8727 else
8728 /* A shift sequence. Increase costs slightly to avoid
8729 combining two shifts into an extend operation. */
8730 *total += COSTS_N_INSNS (2) + 1;
8733 return false;
8736 switch (GET_MODE (XEXP (x, 0)))
8738 case V8QImode:
8739 case V4HImode:
8740 case V2SImode:
8741 case V4QImode:
8742 case V2HImode:
8743 *total = COSTS_N_INSNS (1);
8744 return false;
8746 default:
8747 gcc_unreachable ();
8749 gcc_unreachable ();
8751 case ZERO_EXTRACT:
8752 case SIGN_EXTRACT:
8753 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8754 return true;
8756 case CONST_INT:
8757 if (const_ok_for_arm (INTVAL (x))
8758 || const_ok_for_arm (~INTVAL (x)))
8759 *total = COSTS_N_INSNS (1);
8760 else
8761 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8762 INTVAL (x), NULL_RTX,
8763 NULL_RTX, 0, 0));
8764 return true;
8766 case CONST:
8767 case LABEL_REF:
8768 case SYMBOL_REF:
8769 *total = COSTS_N_INSNS (3);
8770 return true;
8772 case HIGH:
8773 *total = COSTS_N_INSNS (1);
8774 return true;
8776 case LO_SUM:
8777 *total = COSTS_N_INSNS (1);
8778 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8779 return true;
8781 case CONST_DOUBLE:
8782 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8783 && (mode == SFmode || !TARGET_VFP_SINGLE))
8784 *total = COSTS_N_INSNS (1);
8785 else
8786 *total = COSTS_N_INSNS (4);
8787 return true;
8789 case SET:
8790 /* The vec_extract patterns accept memory operands that require an
8791 address reload. Account for the cost of that reload to give the
8792 auto-inc-dec pass an incentive to try to replace them. */
8793 if (TARGET_NEON && MEM_P (SET_DEST (x))
8794 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8796 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8797 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8798 *total += COSTS_N_INSNS (1);
8799 return true;
8801 /* Likewise for the vec_set patterns. */
8802 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8803 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8804 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8806 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8807 *total = rtx_cost (mem, code, 0, speed);
8808 if (!neon_vector_mem_operand (mem, 2, true))
8809 *total += COSTS_N_INSNS (1);
8810 return true;
8812 return false;
8814 case UNSPEC:
8815 /* We cost this as high as our memory costs to allow this to
8816 be hoisted from loops. */
8817 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8819 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8821 return true;
8823 case CONST_VECTOR:
8824 if (TARGET_NEON
8825 && TARGET_HARD_FLOAT
8826 && outer == SET
8827 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8828 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8829 *total = COSTS_N_INSNS (1);
8830 else
8831 *total = COSTS_N_INSNS (4);
8832 return true;
8834 default:
8835 *total = COSTS_N_INSNS (4);
8836 return false;
8840 /* Estimates the size cost of thumb1 instructions.
8841 For now most of the code is copied from thumb1_rtx_costs. We need more
8842 fine grain tuning when we have more related test cases. */
8843 static inline int
8844 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8846 enum machine_mode mode = GET_MODE (x);
8847 int words;
8849 switch (code)
8851 case ASHIFT:
8852 case ASHIFTRT:
8853 case LSHIFTRT:
8854 case ROTATERT:
8855 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8857 case PLUS:
8858 case MINUS:
8859 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8860 defined by RTL expansion, especially for the expansion of
8861 multiplication. */
8862 if ((GET_CODE (XEXP (x, 0)) == MULT
8863 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8864 || (GET_CODE (XEXP (x, 1)) == MULT
8865 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8866 return COSTS_N_INSNS (2);
8867 /* On purpose fall through for normal RTX. */
8868 case COMPARE:
8869 case NEG:
8870 case NOT:
8871 return COSTS_N_INSNS (1);
8873 case MULT:
8874 if (CONST_INT_P (XEXP (x, 1)))
8876 /* Thumb1 mul instruction can't operate on const. We must Load it
8877 into a register first. */
8878 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8879 return COSTS_N_INSNS (1) + const_size;
8881 return COSTS_N_INSNS (1);
8883 case SET:
8884 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8885 the mode. */
8886 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8887 return (COSTS_N_INSNS (words)
8888 + 4 * ((MEM_P (SET_SRC (x)))
8889 + MEM_P (SET_DEST (x))));
8891 case CONST_INT:
8892 if (outer == SET)
8894 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8895 return COSTS_N_INSNS (1);
8896 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8897 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8898 return COSTS_N_INSNS (2);
8899 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8900 if (thumb_shiftable_const (INTVAL (x)))
8901 return COSTS_N_INSNS (2);
8902 return COSTS_N_INSNS (3);
8904 else if ((outer == PLUS || outer == COMPARE)
8905 && INTVAL (x) < 256 && INTVAL (x) > -256)
8906 return 0;
8907 else if ((outer == IOR || outer == XOR || outer == AND)
8908 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8909 return COSTS_N_INSNS (1);
8910 else if (outer == AND)
8912 int i;
8913 /* This duplicates the tests in the andsi3 expander. */
8914 for (i = 9; i <= 31; i++)
8915 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8916 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8917 return COSTS_N_INSNS (2);
8919 else if (outer == ASHIFT || outer == ASHIFTRT
8920 || outer == LSHIFTRT)
8921 return 0;
8922 return COSTS_N_INSNS (2);
8924 case CONST:
8925 case CONST_DOUBLE:
8926 case LABEL_REF:
8927 case SYMBOL_REF:
8928 return COSTS_N_INSNS (3);
8930 case UDIV:
8931 case UMOD:
8932 case DIV:
8933 case MOD:
8934 return 100;
8936 case TRUNCATE:
8937 return 99;
8939 case AND:
8940 case XOR:
8941 case IOR:
8942 /* XXX guess. */
8943 return 8;
8945 case MEM:
8946 /* XXX another guess. */
8947 /* Memory costs quite a lot for the first word, but subsequent words
8948 load at the equivalent of a single insn each. */
8949 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8950 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8951 ? 4 : 0));
8953 case IF_THEN_ELSE:
8954 /* XXX a guess. */
8955 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8956 return 14;
8957 return 2;
8959 case ZERO_EXTEND:
8960 /* XXX still guessing. */
8961 switch (GET_MODE (XEXP (x, 0)))
8963 case QImode:
8964 return (1 + (mode == DImode ? 4 : 0)
8965 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8967 case HImode:
8968 return (4 + (mode == DImode ? 4 : 0)
8969 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8971 case SImode:
8972 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8974 default:
8975 return 99;
8978 default:
8979 return 99;
8983 /* RTX costs when optimizing for size. */
8984 static bool
8985 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8986 int *total)
8988 enum machine_mode mode = GET_MODE (x);
8989 if (TARGET_THUMB1)
8991 *total = thumb1_size_rtx_costs (x, code, outer_code);
8992 return true;
8995 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8996 switch (code)
8998 case MEM:
8999 /* A memory access costs 1 insn if the mode is small, or the address is
9000 a single register, otherwise it costs one insn per word. */
9001 if (REG_P (XEXP (x, 0)))
9002 *total = COSTS_N_INSNS (1);
9003 else if (flag_pic
9004 && GET_CODE (XEXP (x, 0)) == PLUS
9005 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9006 /* This will be split into two instructions.
9007 See arm.md:calculate_pic_address. */
9008 *total = COSTS_N_INSNS (2);
9009 else
9010 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9011 return true;
9013 case DIV:
9014 case MOD:
9015 case UDIV:
9016 case UMOD:
9017 /* Needs a libcall, so it costs about this. */
9018 *total = COSTS_N_INSNS (2);
9019 return false;
9021 case ROTATE:
9022 if (mode == SImode && REG_P (XEXP (x, 1)))
9024 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9025 return true;
9027 /* Fall through */
9028 case ROTATERT:
9029 case ASHIFT:
9030 case LSHIFTRT:
9031 case ASHIFTRT:
9032 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9034 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9035 return true;
9037 else if (mode == SImode)
9039 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9040 /* Slightly disparage register shifts, but not by much. */
9041 if (!CONST_INT_P (XEXP (x, 1)))
9042 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9043 return true;
9046 /* Needs a libcall. */
9047 *total = COSTS_N_INSNS (2);
9048 return false;
9050 case MINUS:
9051 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9052 && (mode == SFmode || !TARGET_VFP_SINGLE))
9054 *total = COSTS_N_INSNS (1);
9055 return false;
9058 if (mode == SImode)
9060 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9061 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9063 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9064 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9065 || subcode1 == ROTATE || subcode1 == ROTATERT
9066 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9067 || subcode1 == ASHIFTRT)
9069 /* It's just the cost of the two operands. */
9070 *total = 0;
9071 return false;
9074 *total = COSTS_N_INSNS (1);
9075 return false;
9078 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9079 return false;
9081 case PLUS:
9082 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9083 && (mode == SFmode || !TARGET_VFP_SINGLE))
9085 *total = COSTS_N_INSNS (1);
9086 return false;
9089 /* A shift as a part of ADD costs nothing. */
9090 if (GET_CODE (XEXP (x, 0)) == MULT
9091 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9093 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9094 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9095 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9096 return true;
9099 /* Fall through */
9100 case AND: case XOR: case IOR:
9101 if (mode == SImode)
9103 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9105 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9106 || subcode == LSHIFTRT || subcode == ASHIFTRT
9107 || (code == AND && subcode == NOT))
9109 /* It's just the cost of the two operands. */
9110 *total = 0;
9111 return false;
9115 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9116 return false;
9118 case MULT:
9119 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9120 return false;
9122 case NEG:
9123 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9124 && (mode == SFmode || !TARGET_VFP_SINGLE))
9126 *total = COSTS_N_INSNS (1);
9127 return false;
9130 /* Fall through */
9131 case NOT:
9132 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9134 return false;
9136 case IF_THEN_ELSE:
9137 *total = 0;
9138 return false;
9140 case COMPARE:
9141 if (cc_register (XEXP (x, 0), VOIDmode))
9142 * total = 0;
9143 else
9144 *total = COSTS_N_INSNS (1);
9145 return false;
9147 case ABS:
9148 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9149 && (mode == SFmode || !TARGET_VFP_SINGLE))
9150 *total = COSTS_N_INSNS (1);
9151 else
9152 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9153 return false;
9155 case SIGN_EXTEND:
9156 case ZERO_EXTEND:
9157 return arm_rtx_costs_1 (x, outer_code, total, 0);
9159 case CONST_INT:
9160 if (const_ok_for_arm (INTVAL (x)))
9161 /* A multiplication by a constant requires another instruction
9162 to load the constant to a register. */
9163 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9164 ? 1 : 0);
9165 else if (const_ok_for_arm (~INTVAL (x)))
9166 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9167 else if (const_ok_for_arm (-INTVAL (x)))
9169 if (outer_code == COMPARE || outer_code == PLUS
9170 || outer_code == MINUS)
9171 *total = 0;
9172 else
9173 *total = COSTS_N_INSNS (1);
9175 else
9176 *total = COSTS_N_INSNS (2);
9177 return true;
9179 case CONST:
9180 case LABEL_REF:
9181 case SYMBOL_REF:
9182 *total = COSTS_N_INSNS (2);
9183 return true;
9185 case CONST_DOUBLE:
9186 *total = COSTS_N_INSNS (4);
9187 return true;
9189 case CONST_VECTOR:
9190 if (TARGET_NEON
9191 && TARGET_HARD_FLOAT
9192 && outer_code == SET
9193 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9194 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9195 *total = COSTS_N_INSNS (1);
9196 else
9197 *total = COSTS_N_INSNS (4);
9198 return true;
9200 case HIGH:
9201 case LO_SUM:
9202 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9203 cost of these slightly. */
9204 *total = COSTS_N_INSNS (1) + 1;
9205 return true;
9207 case SET:
9208 return false;
9210 default:
9211 if (mode != VOIDmode)
9212 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9213 else
9214 *total = COSTS_N_INSNS (4); /* How knows? */
9215 return false;
9219 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9220 operand, then return the operand that is being shifted. If the shift
9221 is not by a constant, then set SHIFT_REG to point to the operand.
9222 Return NULL if OP is not a shifter operand. */
9223 static rtx
9224 shifter_op_p (rtx op, rtx *shift_reg)
9226 enum rtx_code code = GET_CODE (op);
9228 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9229 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9230 return XEXP (op, 0);
9231 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9232 return XEXP (op, 0);
9233 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9234 || code == ASHIFTRT)
9236 if (!CONST_INT_P (XEXP (op, 1)))
9237 *shift_reg = XEXP (op, 1);
9238 return XEXP (op, 0);
9241 return NULL;
9244 static bool
9245 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9247 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9248 gcc_assert (GET_CODE (x) == UNSPEC);
9250 switch (XINT (x, 1))
9252 case UNSPEC_UNALIGNED_LOAD:
9253 /* We can only do unaligned loads into the integer unit, and we can't
9254 use LDM or LDRD. */
9255 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9256 if (speed_p)
9257 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9258 + extra_cost->ldst.load_unaligned);
9260 #ifdef NOT_YET
9261 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9262 ADDR_SPACE_GENERIC, speed_p);
9263 #endif
9264 return true;
9266 case UNSPEC_UNALIGNED_STORE:
9267 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9268 if (speed_p)
9269 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9270 + extra_cost->ldst.store_unaligned);
9272 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9273 #ifdef NOT_YET
9274 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9275 ADDR_SPACE_GENERIC, speed_p);
9276 #endif
9277 return true;
9279 case UNSPEC_VRINTZ:
9280 case UNSPEC_VRINTP:
9281 case UNSPEC_VRINTM:
9282 case UNSPEC_VRINTR:
9283 case UNSPEC_VRINTX:
9284 case UNSPEC_VRINTA:
9285 *cost = COSTS_N_INSNS (1);
9286 if (speed_p)
9287 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9289 return true;
9290 default:
9291 *cost = COSTS_N_INSNS (2);
9292 break;
9294 return false;
9297 /* Cost of a libcall. We assume one insn per argument, an amount for the
9298 call (one insn for -Os) and then one for processing the result. */
9299 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9301 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9302 do \
9304 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9305 if (shift_op != NULL \
9306 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9308 if (shift_reg) \
9310 if (speed_p) \
9311 *cost += extra_cost->alu.arith_shift_reg; \
9312 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9314 else if (speed_p) \
9315 *cost += extra_cost->alu.arith_shift; \
9317 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9318 + rtx_cost (XEXP (x, 1 - IDX), \
9319 OP, 1, speed_p)); \
9320 return true; \
9323 while (0);
9325 /* RTX costs. Make an estimate of the cost of executing the operation
9326 X, which is contained with an operation with code OUTER_CODE.
9327 SPEED_P indicates whether the cost desired is the performance cost,
9328 or the size cost. The estimate is stored in COST and the return
9329 value is TRUE if the cost calculation is final, or FALSE if the
9330 caller should recurse through the operands of X to add additional
9331 costs.
9333 We currently make no attempt to model the size savings of Thumb-2
9334 16-bit instructions. At the normal points in compilation where
9335 this code is called we have no measure of whether the condition
9336 flags are live or not, and thus no realistic way to determine what
9337 the size will eventually be. */
9338 static bool
9339 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9340 const struct cpu_cost_table *extra_cost,
9341 int *cost, bool speed_p)
9343 enum machine_mode mode = GET_MODE (x);
9345 if (TARGET_THUMB1)
9347 if (speed_p)
9348 *cost = thumb1_rtx_costs (x, code, outer_code);
9349 else
9350 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9351 return true;
9354 switch (code)
9356 case SET:
9357 *cost = 0;
9358 /* SET RTXs don't have a mode so we get it from the destination. */
9359 mode = GET_MODE (SET_DEST (x));
9361 if (REG_P (SET_SRC (x))
9362 && REG_P (SET_DEST (x)))
9364 /* Assume that most copies can be done with a single insn,
9365 unless we don't have HW FP, in which case everything
9366 larger than word mode will require two insns. */
9367 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9368 && GET_MODE_SIZE (mode) > 4)
9369 || mode == DImode)
9370 ? 2 : 1);
9371 /* Conditional register moves can be encoded
9372 in 16 bits in Thumb mode. */
9373 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9374 *cost >>= 1;
9376 return true;
9379 if (CONST_INT_P (SET_SRC (x)))
9381 /* Handle CONST_INT here, since the value doesn't have a mode
9382 and we would otherwise be unable to work out the true cost. */
9383 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9384 outer_code = SET;
9385 /* Slightly lower the cost of setting a core reg to a constant.
9386 This helps break up chains and allows for better scheduling. */
9387 if (REG_P (SET_DEST (x))
9388 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9389 *cost -= 1;
9390 x = SET_SRC (x);
9391 /* Immediate moves with an immediate in the range [0, 255] can be
9392 encoded in 16 bits in Thumb mode. */
9393 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9394 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9395 *cost >>= 1;
9396 goto const_int_cost;
9399 return false;
9401 case MEM:
9402 /* A memory access costs 1 insn if the mode is small, or the address is
9403 a single register, otherwise it costs one insn per word. */
9404 if (REG_P (XEXP (x, 0)))
9405 *cost = COSTS_N_INSNS (1);
9406 else if (flag_pic
9407 && GET_CODE (XEXP (x, 0)) == PLUS
9408 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9409 /* This will be split into two instructions.
9410 See arm.md:calculate_pic_address. */
9411 *cost = COSTS_N_INSNS (2);
9412 else
9413 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9415 /* For speed optimizations, add the costs of the address and
9416 accessing memory. */
9417 if (speed_p)
9418 #ifdef NOT_YET
9419 *cost += (extra_cost->ldst.load
9420 + arm_address_cost (XEXP (x, 0), mode,
9421 ADDR_SPACE_GENERIC, speed_p));
9422 #else
9423 *cost += extra_cost->ldst.load;
9424 #endif
9425 return true;
9427 case PARALLEL:
9429 /* Calculations of LDM costs are complex. We assume an initial cost
9430 (ldm_1st) which will load the number of registers mentioned in
9431 ldm_regs_per_insn_1st registers; then each additional
9432 ldm_regs_per_insn_subsequent registers cost one more insn. The
9433 formula for N regs is thus:
9435 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9436 + ldm_regs_per_insn_subsequent - 1)
9437 / ldm_regs_per_insn_subsequent).
9439 Additional costs may also be added for addressing. A similar
9440 formula is used for STM. */
9442 bool is_ldm = load_multiple_operation (x, SImode);
9443 bool is_stm = store_multiple_operation (x, SImode);
9445 *cost = COSTS_N_INSNS (1);
9447 if (is_ldm || is_stm)
9449 if (speed_p)
9451 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9452 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9453 ? extra_cost->ldst.ldm_regs_per_insn_1st
9454 : extra_cost->ldst.stm_regs_per_insn_1st;
9455 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9456 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9457 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9459 *cost += regs_per_insn_1st
9460 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9461 + regs_per_insn_sub - 1)
9462 / regs_per_insn_sub);
9463 return true;
9467 return false;
9469 case DIV:
9470 case UDIV:
9471 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9472 && (mode == SFmode || !TARGET_VFP_SINGLE))
9473 *cost = COSTS_N_INSNS (speed_p
9474 ? extra_cost->fp[mode != SFmode].div : 1);
9475 else if (mode == SImode && TARGET_IDIV)
9476 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9477 else
9478 *cost = LIBCALL_COST (2);
9479 return false; /* All arguments must be in registers. */
9481 case MOD:
9482 case UMOD:
9483 *cost = LIBCALL_COST (2);
9484 return false; /* All arguments must be in registers. */
9486 case ROTATE:
9487 if (mode == SImode && REG_P (XEXP (x, 1)))
9489 *cost = (COSTS_N_INSNS (2)
9490 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9491 if (speed_p)
9492 *cost += extra_cost->alu.shift_reg;
9493 return true;
9495 /* Fall through */
9496 case ROTATERT:
9497 case ASHIFT:
9498 case LSHIFTRT:
9499 case ASHIFTRT:
9500 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9502 *cost = (COSTS_N_INSNS (3)
9503 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9504 if (speed_p)
9505 *cost += 2 * extra_cost->alu.shift;
9506 return true;
9508 else if (mode == SImode)
9510 *cost = (COSTS_N_INSNS (1)
9511 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9512 /* Slightly disparage register shifts at -Os, but not by much. */
9513 if (!CONST_INT_P (XEXP (x, 1)))
9514 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9515 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9516 return true;
9518 else if (GET_MODE_CLASS (mode) == MODE_INT
9519 && GET_MODE_SIZE (mode) < 4)
9521 if (code == ASHIFT)
9523 *cost = (COSTS_N_INSNS (1)
9524 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9525 /* Slightly disparage register shifts at -Os, but not by
9526 much. */
9527 if (!CONST_INT_P (XEXP (x, 1)))
9528 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9529 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9531 else if (code == LSHIFTRT || code == ASHIFTRT)
9533 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9535 /* Can use SBFX/UBFX. */
9536 *cost = COSTS_N_INSNS (1);
9537 if (speed_p)
9538 *cost += extra_cost->alu.bfx;
9539 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9541 else
9543 *cost = COSTS_N_INSNS (2);
9544 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9545 if (speed_p)
9547 if (CONST_INT_P (XEXP (x, 1)))
9548 *cost += 2 * extra_cost->alu.shift;
9549 else
9550 *cost += (extra_cost->alu.shift
9551 + extra_cost->alu.shift_reg);
9553 else
9554 /* Slightly disparage register shifts. */
9555 *cost += !CONST_INT_P (XEXP (x, 1));
9558 else /* Rotates. */
9560 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9561 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9562 if (speed_p)
9564 if (CONST_INT_P (XEXP (x, 1)))
9565 *cost += (2 * extra_cost->alu.shift
9566 + extra_cost->alu.log_shift);
9567 else
9568 *cost += (extra_cost->alu.shift
9569 + extra_cost->alu.shift_reg
9570 + extra_cost->alu.log_shift_reg);
9573 return true;
9576 *cost = LIBCALL_COST (2);
9577 return false;
9579 case BSWAP:
9580 if (arm_arch6)
9582 if (mode == SImode)
9584 *cost = COSTS_N_INSNS (1);
9585 if (speed_p)
9586 *cost += extra_cost->alu.rev;
9588 return false;
9591 else
9593 /* No rev instruction available. Look at arm_legacy_rev
9594 and thumb_legacy_rev for the form of RTL used then. */
9595 if (TARGET_THUMB)
9597 *cost = COSTS_N_INSNS (10);
9599 if (speed_p)
9601 *cost += 6 * extra_cost->alu.shift;
9602 *cost += 3 * extra_cost->alu.logical;
9605 else
9607 *cost = COSTS_N_INSNS (5);
9609 if (speed_p)
9611 *cost += 2 * extra_cost->alu.shift;
9612 *cost += extra_cost->alu.arith_shift;
9613 *cost += 2 * extra_cost->alu.logical;
9616 return true;
9618 return false;
9620 case MINUS:
9621 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9622 && (mode == SFmode || !TARGET_VFP_SINGLE))
9624 *cost = COSTS_N_INSNS (1);
9625 if (GET_CODE (XEXP (x, 0)) == MULT
9626 || GET_CODE (XEXP (x, 1)) == MULT)
9628 rtx mul_op0, mul_op1, sub_op;
9630 if (speed_p)
9631 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9633 if (GET_CODE (XEXP (x, 0)) == MULT)
9635 mul_op0 = XEXP (XEXP (x, 0), 0);
9636 mul_op1 = XEXP (XEXP (x, 0), 1);
9637 sub_op = XEXP (x, 1);
9639 else
9641 mul_op0 = XEXP (XEXP (x, 1), 0);
9642 mul_op1 = XEXP (XEXP (x, 1), 1);
9643 sub_op = XEXP (x, 0);
9646 /* The first operand of the multiply may be optionally
9647 negated. */
9648 if (GET_CODE (mul_op0) == NEG)
9649 mul_op0 = XEXP (mul_op0, 0);
9651 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9652 + rtx_cost (mul_op1, code, 0, speed_p)
9653 + rtx_cost (sub_op, code, 0, speed_p));
9655 return true;
9658 if (speed_p)
9659 *cost += extra_cost->fp[mode != SFmode].addsub;
9660 return false;
9663 if (mode == SImode)
9665 rtx shift_by_reg = NULL;
9666 rtx shift_op;
9667 rtx non_shift_op;
9669 *cost = COSTS_N_INSNS (1);
9671 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9672 if (shift_op == NULL)
9674 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9675 non_shift_op = XEXP (x, 0);
9677 else
9678 non_shift_op = XEXP (x, 1);
9680 if (shift_op != NULL)
9682 if (shift_by_reg != NULL)
9684 if (speed_p)
9685 *cost += extra_cost->alu.arith_shift_reg;
9686 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9688 else if (speed_p)
9689 *cost += extra_cost->alu.arith_shift;
9691 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9692 + rtx_cost (non_shift_op, code, 0, speed_p));
9693 return true;
9696 if (arm_arch_thumb2
9697 && GET_CODE (XEXP (x, 1)) == MULT)
9699 /* MLS. */
9700 if (speed_p)
9701 *cost += extra_cost->mult[0].add;
9702 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9703 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9704 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9705 return true;
9708 if (CONST_INT_P (XEXP (x, 0)))
9710 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9711 INTVAL (XEXP (x, 0)), NULL_RTX,
9712 NULL_RTX, 1, 0);
9713 *cost = COSTS_N_INSNS (insns);
9714 if (speed_p)
9715 *cost += insns * extra_cost->alu.arith;
9716 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9717 return true;
9720 return false;
9723 if (GET_MODE_CLASS (mode) == MODE_INT
9724 && GET_MODE_SIZE (mode) < 4)
9726 rtx shift_op, shift_reg;
9727 shift_reg = NULL;
9729 /* We check both sides of the MINUS for shifter operands since,
9730 unlike PLUS, it's not commutative. */
9732 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9733 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9735 /* Slightly disparage, as we might need to widen the result. */
9736 *cost = 1 + COSTS_N_INSNS (1);
9737 if (speed_p)
9738 *cost += extra_cost->alu.arith;
9740 if (CONST_INT_P (XEXP (x, 0)))
9742 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9743 return true;
9746 return false;
9749 if (mode == DImode)
9751 *cost = COSTS_N_INSNS (2);
9753 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9755 rtx op1 = XEXP (x, 1);
9757 if (speed_p)
9758 *cost += 2 * extra_cost->alu.arith;
9760 if (GET_CODE (op1) == ZERO_EXTEND)
9761 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9762 else
9763 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9764 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9765 0, speed_p);
9766 return true;
9768 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9770 if (speed_p)
9771 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9772 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9773 0, speed_p)
9774 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9775 return true;
9777 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9778 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9780 if (speed_p)
9781 *cost += (extra_cost->alu.arith
9782 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9783 ? extra_cost->alu.arith
9784 : extra_cost->alu.arith_shift));
9785 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9786 + rtx_cost (XEXP (XEXP (x, 1), 0),
9787 GET_CODE (XEXP (x, 1)), 0, speed_p));
9788 return true;
9791 if (speed_p)
9792 *cost += 2 * extra_cost->alu.arith;
9793 return false;
9796 /* Vector mode? */
9798 *cost = LIBCALL_COST (2);
9799 return false;
9801 case PLUS:
9802 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9803 && (mode == SFmode || !TARGET_VFP_SINGLE))
9805 *cost = COSTS_N_INSNS (1);
9806 if (GET_CODE (XEXP (x, 0)) == MULT)
9808 rtx mul_op0, mul_op1, add_op;
9810 if (speed_p)
9811 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9813 mul_op0 = XEXP (XEXP (x, 0), 0);
9814 mul_op1 = XEXP (XEXP (x, 0), 1);
9815 add_op = XEXP (x, 1);
9817 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9818 + rtx_cost (mul_op1, code, 0, speed_p)
9819 + rtx_cost (add_op, code, 0, speed_p));
9821 return true;
9824 if (speed_p)
9825 *cost += extra_cost->fp[mode != SFmode].addsub;
9826 return false;
9828 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9830 *cost = LIBCALL_COST (2);
9831 return false;
9834 /* Narrow modes can be synthesized in SImode, but the range
9835 of useful sub-operations is limited. Check for shift operations
9836 on one of the operands. Only left shifts can be used in the
9837 narrow modes. */
9838 if (GET_MODE_CLASS (mode) == MODE_INT
9839 && GET_MODE_SIZE (mode) < 4)
9841 rtx shift_op, shift_reg;
9842 shift_reg = NULL;
9844 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9846 if (CONST_INT_P (XEXP (x, 1)))
9848 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9849 INTVAL (XEXP (x, 1)), NULL_RTX,
9850 NULL_RTX, 1, 0);
9851 *cost = COSTS_N_INSNS (insns);
9852 if (speed_p)
9853 *cost += insns * extra_cost->alu.arith;
9854 /* Slightly penalize a narrow operation as the result may
9855 need widening. */
9856 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9857 return true;
9860 /* Slightly penalize a narrow operation as the result may
9861 need widening. */
9862 *cost = 1 + COSTS_N_INSNS (1);
9863 if (speed_p)
9864 *cost += extra_cost->alu.arith;
9866 return false;
9869 if (mode == SImode)
9871 rtx shift_op, shift_reg;
9873 *cost = COSTS_N_INSNS (1);
9874 if (TARGET_INT_SIMD
9875 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9876 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9878 /* UXTA[BH] or SXTA[BH]. */
9879 if (speed_p)
9880 *cost += extra_cost->alu.extend_arith;
9881 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9882 speed_p)
9883 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9884 return true;
9887 shift_reg = NULL;
9888 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9889 if (shift_op != NULL)
9891 if (shift_reg)
9893 if (speed_p)
9894 *cost += extra_cost->alu.arith_shift_reg;
9895 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9897 else if (speed_p)
9898 *cost += extra_cost->alu.arith_shift;
9900 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9901 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9902 return true;
9904 if (GET_CODE (XEXP (x, 0)) == MULT)
9906 rtx mul_op = XEXP (x, 0);
9908 *cost = COSTS_N_INSNS (1);
9910 if (TARGET_DSP_MULTIPLY
9911 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9912 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9913 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9914 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9915 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9916 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9917 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9918 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9919 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9920 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9921 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9922 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9923 == 16))))))
9925 /* SMLA[BT][BT]. */
9926 if (speed_p)
9927 *cost += extra_cost->mult[0].extend_add;
9928 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9929 SIGN_EXTEND, 0, speed_p)
9930 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9931 SIGN_EXTEND, 0, speed_p)
9932 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9933 return true;
9936 if (speed_p)
9937 *cost += extra_cost->mult[0].add;
9938 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9939 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9940 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9941 return true;
9943 if (CONST_INT_P (XEXP (x, 1)))
9945 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9946 INTVAL (XEXP (x, 1)), NULL_RTX,
9947 NULL_RTX, 1, 0);
9948 *cost = COSTS_N_INSNS (insns);
9949 if (speed_p)
9950 *cost += insns * extra_cost->alu.arith;
9951 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9952 return true;
9954 return false;
9957 if (mode == DImode)
9959 if (arm_arch3m
9960 && GET_CODE (XEXP (x, 0)) == MULT
9961 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9962 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9963 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9964 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9966 *cost = COSTS_N_INSNS (1);
9967 if (speed_p)
9968 *cost += extra_cost->mult[1].extend_add;
9969 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9970 ZERO_EXTEND, 0, speed_p)
9971 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9972 ZERO_EXTEND, 0, speed_p)
9973 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9974 return true;
9977 *cost = COSTS_N_INSNS (2);
9979 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9980 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9982 if (speed_p)
9983 *cost += (extra_cost->alu.arith
9984 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9985 ? extra_cost->alu.arith
9986 : extra_cost->alu.arith_shift));
9988 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9989 speed_p)
9990 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9991 return true;
9994 if (speed_p)
9995 *cost += 2 * extra_cost->alu.arith;
9996 return false;
9999 /* Vector mode? */
10000 *cost = LIBCALL_COST (2);
10001 return false;
10002 case IOR:
10003 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10005 *cost = COSTS_N_INSNS (1);
10006 if (speed_p)
10007 *cost += extra_cost->alu.rev;
10009 return true;
10011 /* Fall through. */
10012 case AND: case XOR:
10013 if (mode == SImode)
10015 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10016 rtx op0 = XEXP (x, 0);
10017 rtx shift_op, shift_reg;
10019 *cost = COSTS_N_INSNS (1);
10021 if (subcode == NOT
10022 && (code == AND
10023 || (code == IOR && TARGET_THUMB2)))
10024 op0 = XEXP (op0, 0);
10026 shift_reg = NULL;
10027 shift_op = shifter_op_p (op0, &shift_reg);
10028 if (shift_op != NULL)
10030 if (shift_reg)
10032 if (speed_p)
10033 *cost += extra_cost->alu.log_shift_reg;
10034 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10036 else if (speed_p)
10037 *cost += extra_cost->alu.log_shift;
10039 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10040 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10041 return true;
10044 if (CONST_INT_P (XEXP (x, 1)))
10046 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10047 INTVAL (XEXP (x, 1)), NULL_RTX,
10048 NULL_RTX, 1, 0);
10050 *cost = COSTS_N_INSNS (insns);
10051 if (speed_p)
10052 *cost += insns * extra_cost->alu.logical;
10053 *cost += rtx_cost (op0, code, 0, speed_p);
10054 return true;
10057 if (speed_p)
10058 *cost += extra_cost->alu.logical;
10059 *cost += (rtx_cost (op0, code, 0, speed_p)
10060 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10061 return true;
10064 if (mode == DImode)
10066 rtx op0 = XEXP (x, 0);
10067 enum rtx_code subcode = GET_CODE (op0);
10069 *cost = COSTS_N_INSNS (2);
10071 if (subcode == NOT
10072 && (code == AND
10073 || (code == IOR && TARGET_THUMB2)))
10074 op0 = XEXP (op0, 0);
10076 if (GET_CODE (op0) == ZERO_EXTEND)
10078 if (speed_p)
10079 *cost += 2 * extra_cost->alu.logical;
10081 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10082 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10083 return true;
10085 else if (GET_CODE (op0) == SIGN_EXTEND)
10087 if (speed_p)
10088 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10090 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10091 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10092 return true;
10095 if (speed_p)
10096 *cost += 2 * extra_cost->alu.logical;
10098 return true;
10100 /* Vector mode? */
10102 *cost = LIBCALL_COST (2);
10103 return false;
10105 case MULT:
10106 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10107 && (mode == SFmode || !TARGET_VFP_SINGLE))
10109 rtx op0 = XEXP (x, 0);
10111 *cost = COSTS_N_INSNS (1);
10113 if (GET_CODE (op0) == NEG)
10114 op0 = XEXP (op0, 0);
10116 if (speed_p)
10117 *cost += extra_cost->fp[mode != SFmode].mult;
10119 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10120 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10121 return true;
10123 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10125 *cost = LIBCALL_COST (2);
10126 return false;
10129 if (mode == SImode)
10131 *cost = COSTS_N_INSNS (1);
10132 if (TARGET_DSP_MULTIPLY
10133 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10134 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10135 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10136 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10137 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10138 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10139 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10140 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10141 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10142 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10143 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10144 && (INTVAL (XEXP (XEXP (x, 1), 1))
10145 == 16))))))
10147 /* SMUL[TB][TB]. */
10148 if (speed_p)
10149 *cost += extra_cost->mult[0].extend;
10150 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10151 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10152 return true;
10154 if (speed_p)
10155 *cost += extra_cost->mult[0].simple;
10156 return false;
10159 if (mode == DImode)
10161 if (arm_arch3m
10162 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10163 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10164 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10165 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10167 *cost = COSTS_N_INSNS (1);
10168 if (speed_p)
10169 *cost += extra_cost->mult[1].extend;
10170 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10171 ZERO_EXTEND, 0, speed_p)
10172 + rtx_cost (XEXP (XEXP (x, 1), 0),
10173 ZERO_EXTEND, 0, speed_p));
10174 return true;
10177 *cost = LIBCALL_COST (2);
10178 return false;
10181 /* Vector mode? */
10182 *cost = LIBCALL_COST (2);
10183 return false;
10185 case NEG:
10186 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10187 && (mode == SFmode || !TARGET_VFP_SINGLE))
10189 *cost = COSTS_N_INSNS (1);
10190 if (speed_p)
10191 *cost += extra_cost->fp[mode != SFmode].neg;
10193 return false;
10195 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10197 *cost = LIBCALL_COST (1);
10198 return false;
10201 if (mode == SImode)
10203 if (GET_CODE (XEXP (x, 0)) == ABS)
10205 *cost = COSTS_N_INSNS (2);
10206 /* Assume the non-flag-changing variant. */
10207 if (speed_p)
10208 *cost += (extra_cost->alu.log_shift
10209 + extra_cost->alu.arith_shift);
10210 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10211 return true;
10214 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10215 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10217 *cost = COSTS_N_INSNS (2);
10218 /* No extra cost for MOV imm and MVN imm. */
10219 /* If the comparison op is using the flags, there's no further
10220 cost, otherwise we need to add the cost of the comparison. */
10221 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10222 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10223 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10225 *cost += (COSTS_N_INSNS (1)
10226 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10227 speed_p)
10228 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10229 speed_p));
10230 if (speed_p)
10231 *cost += extra_cost->alu.arith;
10233 return true;
10235 *cost = COSTS_N_INSNS (1);
10236 if (speed_p)
10237 *cost += extra_cost->alu.arith;
10238 return false;
10241 if (GET_MODE_CLASS (mode) == MODE_INT
10242 && GET_MODE_SIZE (mode) < 4)
10244 /* Slightly disparage, as we might need an extend operation. */
10245 *cost = 1 + COSTS_N_INSNS (1);
10246 if (speed_p)
10247 *cost += extra_cost->alu.arith;
10248 return false;
10251 if (mode == DImode)
10253 *cost = COSTS_N_INSNS (2);
10254 if (speed_p)
10255 *cost += 2 * extra_cost->alu.arith;
10256 return false;
10259 /* Vector mode? */
10260 *cost = LIBCALL_COST (1);
10261 return false;
10263 case NOT:
10264 if (mode == SImode)
10266 rtx shift_op;
10267 rtx shift_reg = NULL;
10269 *cost = COSTS_N_INSNS (1);
10270 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10272 if (shift_op)
10274 if (shift_reg != NULL)
10276 if (speed_p)
10277 *cost += extra_cost->alu.log_shift_reg;
10278 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10280 else if (speed_p)
10281 *cost += extra_cost->alu.log_shift;
10282 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10283 return true;
10286 if (speed_p)
10287 *cost += extra_cost->alu.logical;
10288 return false;
10290 if (mode == DImode)
10292 *cost = COSTS_N_INSNS (2);
10293 return false;
10296 /* Vector mode? */
10298 *cost += LIBCALL_COST (1);
10299 return false;
10301 case IF_THEN_ELSE:
10303 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10305 *cost = COSTS_N_INSNS (4);
10306 return true;
10308 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10309 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10311 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10312 /* Assume that if one arm of the if_then_else is a register,
10313 that it will be tied with the result and eliminate the
10314 conditional insn. */
10315 if (REG_P (XEXP (x, 1)))
10316 *cost += op2cost;
10317 else if (REG_P (XEXP (x, 2)))
10318 *cost += op1cost;
10319 else
10321 if (speed_p)
10323 if (extra_cost->alu.non_exec_costs_exec)
10324 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10325 else
10326 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10328 else
10329 *cost += op1cost + op2cost;
10332 return true;
10334 case COMPARE:
10335 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10336 *cost = 0;
10337 else
10339 enum machine_mode op0mode;
10340 /* We'll mostly assume that the cost of a compare is the cost of the
10341 LHS. However, there are some notable exceptions. */
10343 /* Floating point compares are never done as side-effects. */
10344 op0mode = GET_MODE (XEXP (x, 0));
10345 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10346 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10348 *cost = COSTS_N_INSNS (1);
10349 if (speed_p)
10350 *cost += extra_cost->fp[op0mode != SFmode].compare;
10352 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10354 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10355 return true;
10358 return false;
10360 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10362 *cost = LIBCALL_COST (2);
10363 return false;
10366 /* DImode compares normally take two insns. */
10367 if (op0mode == DImode)
10369 *cost = COSTS_N_INSNS (2);
10370 if (speed_p)
10371 *cost += 2 * extra_cost->alu.arith;
10372 return false;
10375 if (op0mode == SImode)
10377 rtx shift_op;
10378 rtx shift_reg;
10380 if (XEXP (x, 1) == const0_rtx
10381 && !(REG_P (XEXP (x, 0))
10382 || (GET_CODE (XEXP (x, 0)) == SUBREG
10383 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10385 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10387 /* Multiply operations that set the flags are often
10388 significantly more expensive. */
10389 if (speed_p
10390 && GET_CODE (XEXP (x, 0)) == MULT
10391 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10392 *cost += extra_cost->mult[0].flag_setting;
10394 if (speed_p
10395 && GET_CODE (XEXP (x, 0)) == PLUS
10396 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10397 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10398 0), 1), mode))
10399 *cost += extra_cost->mult[0].flag_setting;
10400 return true;
10403 shift_reg = NULL;
10404 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10405 if (shift_op != NULL)
10407 *cost = COSTS_N_INSNS (1);
10408 if (shift_reg != NULL)
10410 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10411 if (speed_p)
10412 *cost += extra_cost->alu.arith_shift_reg;
10414 else if (speed_p)
10415 *cost += extra_cost->alu.arith_shift;
10416 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10417 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10418 return true;
10421 *cost = COSTS_N_INSNS (1);
10422 if (speed_p)
10423 *cost += extra_cost->alu.arith;
10424 if (CONST_INT_P (XEXP (x, 1))
10425 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10427 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10428 return true;
10430 return false;
10433 /* Vector mode? */
10435 *cost = LIBCALL_COST (2);
10436 return false;
10438 return true;
10440 case EQ:
10441 case NE:
10442 case LT:
10443 case LE:
10444 case GT:
10445 case GE:
10446 case LTU:
10447 case LEU:
10448 case GEU:
10449 case GTU:
10450 case ORDERED:
10451 case UNORDERED:
10452 case UNEQ:
10453 case UNLE:
10454 case UNLT:
10455 case UNGE:
10456 case UNGT:
10457 case LTGT:
10458 if (outer_code == SET)
10460 /* Is it a store-flag operation? */
10461 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10462 && XEXP (x, 1) == const0_rtx)
10464 /* Thumb also needs an IT insn. */
10465 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10466 return true;
10468 if (XEXP (x, 1) == const0_rtx)
10470 switch (code)
10472 case LT:
10473 /* LSR Rd, Rn, #31. */
10474 *cost = COSTS_N_INSNS (1);
10475 if (speed_p)
10476 *cost += extra_cost->alu.shift;
10477 break;
10479 case EQ:
10480 /* RSBS T1, Rn, #0
10481 ADC Rd, Rn, T1. */
10483 case NE:
10484 /* SUBS T1, Rn, #1
10485 SBC Rd, Rn, T1. */
10486 *cost = COSTS_N_INSNS (2);
10487 break;
10489 case LE:
10490 /* RSBS T1, Rn, Rn, LSR #31
10491 ADC Rd, Rn, T1. */
10492 *cost = COSTS_N_INSNS (2);
10493 if (speed_p)
10494 *cost += extra_cost->alu.arith_shift;
10495 break;
10497 case GT:
10498 /* RSB Rd, Rn, Rn, ASR #1
10499 LSR Rd, Rd, #31. */
10500 *cost = COSTS_N_INSNS (2);
10501 if (speed_p)
10502 *cost += (extra_cost->alu.arith_shift
10503 + extra_cost->alu.shift);
10504 break;
10506 case GE:
10507 /* ASR Rd, Rn, #31
10508 ADD Rd, Rn, #1. */
10509 *cost = COSTS_N_INSNS (2);
10510 if (speed_p)
10511 *cost += extra_cost->alu.shift;
10512 break;
10514 default:
10515 /* Remaining cases are either meaningless or would take
10516 three insns anyway. */
10517 *cost = COSTS_N_INSNS (3);
10518 break;
10520 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10521 return true;
10523 else
10525 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10526 if (CONST_INT_P (XEXP (x, 1))
10527 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10529 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10530 return true;
10533 return false;
10536 /* Not directly inside a set. If it involves the condition code
10537 register it must be the condition for a branch, cond_exec or
10538 I_T_E operation. Since the comparison is performed elsewhere
10539 this is just the control part which has no additional
10540 cost. */
10541 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10542 && XEXP (x, 1) == const0_rtx)
10544 *cost = 0;
10545 return true;
10547 return false;
10549 case ABS:
10550 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10551 && (mode == SFmode || !TARGET_VFP_SINGLE))
10553 *cost = COSTS_N_INSNS (1);
10554 if (speed_p)
10555 *cost += extra_cost->fp[mode != SFmode].neg;
10557 return false;
10559 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10561 *cost = LIBCALL_COST (1);
10562 return false;
10565 if (mode == SImode)
10567 *cost = COSTS_N_INSNS (1);
10568 if (speed_p)
10569 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10570 return false;
10572 /* Vector mode? */
10573 *cost = LIBCALL_COST (1);
10574 return false;
10576 case SIGN_EXTEND:
10577 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10578 && MEM_P (XEXP (x, 0)))
10580 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10582 if (mode == DImode)
10583 *cost += COSTS_N_INSNS (1);
10585 if (!speed_p)
10586 return true;
10588 if (GET_MODE (XEXP (x, 0)) == SImode)
10589 *cost += extra_cost->ldst.load;
10590 else
10591 *cost += extra_cost->ldst.load_sign_extend;
10593 if (mode == DImode)
10594 *cost += extra_cost->alu.shift;
10596 return true;
10599 /* Widening from less than 32-bits requires an extend operation. */
10600 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10602 /* We have SXTB/SXTH. */
10603 *cost = COSTS_N_INSNS (1);
10604 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10605 if (speed_p)
10606 *cost += extra_cost->alu.extend;
10608 else if (GET_MODE (XEXP (x, 0)) != SImode)
10610 /* Needs two shifts. */
10611 *cost = COSTS_N_INSNS (2);
10612 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10613 if (speed_p)
10614 *cost += 2 * extra_cost->alu.shift;
10617 /* Widening beyond 32-bits requires one more insn. */
10618 if (mode == DImode)
10620 *cost += COSTS_N_INSNS (1);
10621 if (speed_p)
10622 *cost += extra_cost->alu.shift;
10625 return true;
10627 case ZERO_EXTEND:
10628 if ((arm_arch4
10629 || GET_MODE (XEXP (x, 0)) == SImode
10630 || GET_MODE (XEXP (x, 0)) == QImode)
10631 && MEM_P (XEXP (x, 0)))
10633 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10635 if (mode == DImode)
10636 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10638 return true;
10641 /* Widening from less than 32-bits requires an extend operation. */
10642 if (GET_MODE (XEXP (x, 0)) == QImode)
10644 /* UXTB can be a shorter instruction in Thumb2, but it might
10645 be slower than the AND Rd, Rn, #255 alternative. When
10646 optimizing for speed it should never be slower to use
10647 AND, and we don't really model 16-bit vs 32-bit insns
10648 here. */
10649 *cost = COSTS_N_INSNS (1);
10650 if (speed_p)
10651 *cost += extra_cost->alu.logical;
10653 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10655 /* We have UXTB/UXTH. */
10656 *cost = COSTS_N_INSNS (1);
10657 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10658 if (speed_p)
10659 *cost += extra_cost->alu.extend;
10661 else if (GET_MODE (XEXP (x, 0)) != SImode)
10663 /* Needs two shifts. It's marginally preferable to use
10664 shifts rather than two BIC instructions as the second
10665 shift may merge with a subsequent insn as a shifter
10666 op. */
10667 *cost = COSTS_N_INSNS (2);
10668 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10669 if (speed_p)
10670 *cost += 2 * extra_cost->alu.shift;
10672 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10673 *cost = COSTS_N_INSNS (1);
10675 /* Widening beyond 32-bits requires one more insn. */
10676 if (mode == DImode)
10678 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10681 return true;
10683 case CONST_INT:
10684 *cost = 0;
10685 /* CONST_INT has no mode, so we cannot tell for sure how many
10686 insns are really going to be needed. The best we can do is
10687 look at the value passed. If it fits in SImode, then assume
10688 that's the mode it will be used for. Otherwise assume it
10689 will be used in DImode. */
10690 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10691 mode = SImode;
10692 else
10693 mode = DImode;
10695 /* Avoid blowing up in arm_gen_constant (). */
10696 if (!(outer_code == PLUS
10697 || outer_code == AND
10698 || outer_code == IOR
10699 || outer_code == XOR
10700 || outer_code == MINUS))
10701 outer_code = SET;
10703 const_int_cost:
10704 if (mode == SImode)
10706 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10707 INTVAL (x), NULL, NULL,
10708 0, 0));
10709 /* Extra costs? */
10711 else
10713 *cost += COSTS_N_INSNS (arm_gen_constant
10714 (outer_code, SImode, NULL,
10715 trunc_int_for_mode (INTVAL (x), SImode),
10716 NULL, NULL, 0, 0)
10717 + arm_gen_constant (outer_code, SImode, NULL,
10718 INTVAL (x) >> 32, NULL,
10719 NULL, 0, 0));
10720 /* Extra costs? */
10723 return true;
10725 case CONST:
10726 case LABEL_REF:
10727 case SYMBOL_REF:
10728 if (speed_p)
10730 if (arm_arch_thumb2 && !flag_pic)
10731 *cost = COSTS_N_INSNS (2);
10732 else
10733 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10735 else
10736 *cost = COSTS_N_INSNS (2);
10738 if (flag_pic)
10740 *cost += COSTS_N_INSNS (1);
10741 if (speed_p)
10742 *cost += extra_cost->alu.arith;
10745 return true;
10747 case CONST_FIXED:
10748 *cost = COSTS_N_INSNS (4);
10749 /* Fixme. */
10750 return true;
10752 case CONST_DOUBLE:
10753 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10754 && (mode == SFmode || !TARGET_VFP_SINGLE))
10756 if (vfp3_const_double_rtx (x))
10758 *cost = COSTS_N_INSNS (1);
10759 if (speed_p)
10760 *cost += extra_cost->fp[mode == DFmode].fpconst;
10761 return true;
10764 if (speed_p)
10766 *cost = COSTS_N_INSNS (1);
10767 if (mode == DFmode)
10768 *cost += extra_cost->ldst.loadd;
10769 else
10770 *cost += extra_cost->ldst.loadf;
10772 else
10773 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10775 return true;
10777 *cost = COSTS_N_INSNS (4);
10778 return true;
10780 case CONST_VECTOR:
10781 /* Fixme. */
10782 if (TARGET_NEON
10783 && TARGET_HARD_FLOAT
10784 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10785 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10786 *cost = COSTS_N_INSNS (1);
10787 else
10788 *cost = COSTS_N_INSNS (4);
10789 return true;
10791 case HIGH:
10792 case LO_SUM:
10793 *cost = COSTS_N_INSNS (1);
10794 /* When optimizing for size, we prefer constant pool entries to
10795 MOVW/MOVT pairs, so bump the cost of these slightly. */
10796 if (!speed_p)
10797 *cost += 1;
10798 return true;
10800 case CLZ:
10801 *cost = COSTS_N_INSNS (1);
10802 if (speed_p)
10803 *cost += extra_cost->alu.clz;
10804 return false;
10806 case SMIN:
10807 if (XEXP (x, 1) == const0_rtx)
10809 *cost = COSTS_N_INSNS (1);
10810 if (speed_p)
10811 *cost += extra_cost->alu.log_shift;
10812 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10813 return true;
10815 /* Fall through. */
10816 case SMAX:
10817 case UMIN:
10818 case UMAX:
10819 *cost = COSTS_N_INSNS (2);
10820 return false;
10822 case TRUNCATE:
10823 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10824 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10825 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10826 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10827 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10828 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10829 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10830 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10831 == ZERO_EXTEND))))
10833 *cost = COSTS_N_INSNS (1);
10834 if (speed_p)
10835 *cost += extra_cost->mult[1].extend;
10836 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10837 speed_p)
10838 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10839 0, speed_p));
10840 return true;
10842 *cost = LIBCALL_COST (1);
10843 return false;
10845 case UNSPEC:
10846 return arm_unspec_cost (x, outer_code, speed_p, cost);
10848 case PC:
10849 /* Reading the PC is like reading any other register. Writing it
10850 is more expensive, but we take that into account elsewhere. */
10851 *cost = 0;
10852 return true;
10854 case ZERO_EXTRACT:
10855 /* TODO: Simple zero_extract of bottom bits using AND. */
10856 /* Fall through. */
10857 case SIGN_EXTRACT:
10858 if (arm_arch6
10859 && mode == SImode
10860 && CONST_INT_P (XEXP (x, 1))
10861 && CONST_INT_P (XEXP (x, 2)))
10863 *cost = COSTS_N_INSNS (1);
10864 if (speed_p)
10865 *cost += extra_cost->alu.bfx;
10866 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10867 return true;
10869 /* Without UBFX/SBFX, need to resort to shift operations. */
10870 *cost = COSTS_N_INSNS (2);
10871 if (speed_p)
10872 *cost += 2 * extra_cost->alu.shift;
10873 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10874 return true;
10876 case FLOAT_EXTEND:
10877 if (TARGET_HARD_FLOAT)
10879 *cost = COSTS_N_INSNS (1);
10880 if (speed_p)
10881 *cost += extra_cost->fp[mode == DFmode].widen;
10882 if (!TARGET_FPU_ARMV8
10883 && GET_MODE (XEXP (x, 0)) == HFmode)
10885 /* Pre v8, widening HF->DF is a two-step process, first
10886 widening to SFmode. */
10887 *cost += COSTS_N_INSNS (1);
10888 if (speed_p)
10889 *cost += extra_cost->fp[0].widen;
10891 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10892 return true;
10895 *cost = LIBCALL_COST (1);
10896 return false;
10898 case FLOAT_TRUNCATE:
10899 if (TARGET_HARD_FLOAT)
10901 *cost = COSTS_N_INSNS (1);
10902 if (speed_p)
10903 *cost += extra_cost->fp[mode == DFmode].narrow;
10904 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10905 return true;
10906 /* Vector modes? */
10908 *cost = LIBCALL_COST (1);
10909 return false;
10911 case FMA:
10912 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10914 rtx op0 = XEXP (x, 0);
10915 rtx op1 = XEXP (x, 1);
10916 rtx op2 = XEXP (x, 2);
10918 *cost = COSTS_N_INSNS (1);
10920 /* vfms or vfnma. */
10921 if (GET_CODE (op0) == NEG)
10922 op0 = XEXP (op0, 0);
10924 /* vfnms or vfnma. */
10925 if (GET_CODE (op2) == NEG)
10926 op2 = XEXP (op2, 0);
10928 *cost += rtx_cost (op0, FMA, 0, speed_p);
10929 *cost += rtx_cost (op1, FMA, 1, speed_p);
10930 *cost += rtx_cost (op2, FMA, 2, speed_p);
10932 if (speed_p)
10933 *cost += extra_cost->fp[mode ==DFmode].fma;
10935 return true;
10938 *cost = LIBCALL_COST (3);
10939 return false;
10941 case FIX:
10942 case UNSIGNED_FIX:
10943 if (TARGET_HARD_FLOAT)
10945 if (GET_MODE_CLASS (mode) == MODE_INT)
10947 *cost = COSTS_N_INSNS (1);
10948 if (speed_p)
10949 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10950 /* Strip of the 'cost' of rounding towards zero. */
10951 if (GET_CODE (XEXP (x, 0)) == FIX)
10952 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10953 else
10954 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10955 /* ??? Increase the cost to deal with transferring from
10956 FP -> CORE registers? */
10957 return true;
10959 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10960 && TARGET_FPU_ARMV8)
10962 *cost = COSTS_N_INSNS (1);
10963 if (speed_p)
10964 *cost += extra_cost->fp[mode == DFmode].roundint;
10965 return false;
10967 /* Vector costs? */
10969 *cost = LIBCALL_COST (1);
10970 return false;
10972 case FLOAT:
10973 case UNSIGNED_FLOAT:
10974 if (TARGET_HARD_FLOAT)
10976 /* ??? Increase the cost to deal with transferring from CORE
10977 -> FP registers? */
10978 *cost = COSTS_N_INSNS (1);
10979 if (speed_p)
10980 *cost += extra_cost->fp[mode == DFmode].fromint;
10981 return false;
10983 *cost = LIBCALL_COST (1);
10984 return false;
10986 case CALL:
10987 *cost = COSTS_N_INSNS (1);
10988 return true;
10990 case ASM_OPERANDS:
10992 /* Just a guess. Guess number of instructions in the asm
10993 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10994 though (see PR60663). */
10995 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10996 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10998 *cost = COSTS_N_INSNS (asm_length + num_operands);
10999 return true;
11001 default:
11002 if (mode != VOIDmode)
11003 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11004 else
11005 *cost = COSTS_N_INSNS (4); /* Who knows? */
11006 return false;
11010 #undef HANDLE_NARROW_SHIFT_ARITH
11012 /* RTX costs when optimizing for size. */
11013 static bool
11014 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11015 int *total, bool speed)
11017 bool result;
11019 if (TARGET_OLD_RTX_COSTS
11020 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11022 /* Old way. (Deprecated.) */
11023 if (!speed)
11024 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11025 (enum rtx_code) outer_code, total);
11026 else
11027 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11028 (enum rtx_code) outer_code, total,
11029 speed);
11031 else
11033 /* New way. */
11034 if (current_tune->insn_extra_cost)
11035 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11036 (enum rtx_code) outer_code,
11037 current_tune->insn_extra_cost,
11038 total, speed);
11039 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11040 && current_tune->insn_extra_cost != NULL */
11041 else
11042 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11043 (enum rtx_code) outer_code,
11044 &generic_extra_costs, total, speed);
11047 if (dump_file && (dump_flags & TDF_DETAILS))
11049 print_rtl_single (dump_file, x);
11050 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11051 *total, result ? "final" : "partial");
11053 return result;
11056 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11057 supported on any "slowmul" cores, so it can be ignored. */
11059 static bool
11060 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11061 int *total, bool speed)
11063 enum machine_mode mode = GET_MODE (x);
11065 if (TARGET_THUMB)
11067 *total = thumb1_rtx_costs (x, code, outer_code);
11068 return true;
11071 switch (code)
11073 case MULT:
11074 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11075 || mode == DImode)
11077 *total = COSTS_N_INSNS (20);
11078 return false;
11081 if (CONST_INT_P (XEXP (x, 1)))
11083 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11084 & (unsigned HOST_WIDE_INT) 0xffffffff);
11085 int cost, const_ok = const_ok_for_arm (i);
11086 int j, booth_unit_size;
11088 /* Tune as appropriate. */
11089 cost = const_ok ? 4 : 8;
11090 booth_unit_size = 2;
11091 for (j = 0; i && j < 32; j += booth_unit_size)
11093 i >>= booth_unit_size;
11094 cost++;
11097 *total = COSTS_N_INSNS (cost);
11098 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11099 return true;
11102 *total = COSTS_N_INSNS (20);
11103 return false;
11105 default:
11106 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11111 /* RTX cost for cores with a fast multiply unit (M variants). */
11113 static bool
11114 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11115 int *total, bool speed)
11117 enum machine_mode mode = GET_MODE (x);
11119 if (TARGET_THUMB1)
11121 *total = thumb1_rtx_costs (x, code, outer_code);
11122 return true;
11125 /* ??? should thumb2 use different costs? */
11126 switch (code)
11128 case MULT:
11129 /* There is no point basing this on the tuning, since it is always the
11130 fast variant if it exists at all. */
11131 if (mode == DImode
11132 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11133 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11134 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11136 *total = COSTS_N_INSNS(2);
11137 return false;
11141 if (mode == DImode)
11143 *total = COSTS_N_INSNS (5);
11144 return false;
11147 if (CONST_INT_P (XEXP (x, 1)))
11149 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11150 & (unsigned HOST_WIDE_INT) 0xffffffff);
11151 int cost, const_ok = const_ok_for_arm (i);
11152 int j, booth_unit_size;
11154 /* Tune as appropriate. */
11155 cost = const_ok ? 4 : 8;
11156 booth_unit_size = 8;
11157 for (j = 0; i && j < 32; j += booth_unit_size)
11159 i >>= booth_unit_size;
11160 cost++;
11163 *total = COSTS_N_INSNS(cost);
11164 return false;
11167 if (mode == SImode)
11169 *total = COSTS_N_INSNS (4);
11170 return false;
11173 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11175 if (TARGET_HARD_FLOAT
11176 && (mode == SFmode
11177 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11179 *total = COSTS_N_INSNS (1);
11180 return false;
11184 /* Requires a lib call */
11185 *total = COSTS_N_INSNS (20);
11186 return false;
11188 default:
11189 return arm_rtx_costs_1 (x, outer_code, total, speed);
11194 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11195 so it can be ignored. */
11197 static bool
11198 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11199 int *total, bool speed)
11201 enum machine_mode mode = GET_MODE (x);
11203 if (TARGET_THUMB)
11205 *total = thumb1_rtx_costs (x, code, outer_code);
11206 return true;
11209 switch (code)
11211 case COMPARE:
11212 if (GET_CODE (XEXP (x, 0)) != MULT)
11213 return arm_rtx_costs_1 (x, outer_code, total, speed);
11215 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11216 will stall until the multiplication is complete. */
11217 *total = COSTS_N_INSNS (3);
11218 return false;
11220 case MULT:
11221 /* There is no point basing this on the tuning, since it is always the
11222 fast variant if it exists at all. */
11223 if (mode == DImode
11224 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11225 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11226 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11228 *total = COSTS_N_INSNS (2);
11229 return false;
11233 if (mode == DImode)
11235 *total = COSTS_N_INSNS (5);
11236 return false;
11239 if (CONST_INT_P (XEXP (x, 1)))
11241 /* If operand 1 is a constant we can more accurately
11242 calculate the cost of the multiply. The multiplier can
11243 retire 15 bits on the first cycle and a further 12 on the
11244 second. We do, of course, have to load the constant into
11245 a register first. */
11246 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11247 /* There's a general overhead of one cycle. */
11248 int cost = 1;
11249 unsigned HOST_WIDE_INT masked_const;
11251 if (i & 0x80000000)
11252 i = ~i;
11254 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11256 masked_const = i & 0xffff8000;
11257 if (masked_const != 0)
11259 cost++;
11260 masked_const = i & 0xf8000000;
11261 if (masked_const != 0)
11262 cost++;
11264 *total = COSTS_N_INSNS (cost);
11265 return false;
11268 if (mode == SImode)
11270 *total = COSTS_N_INSNS (3);
11271 return false;
11274 /* Requires a lib call */
11275 *total = COSTS_N_INSNS (20);
11276 return false;
11278 default:
11279 return arm_rtx_costs_1 (x, outer_code, total, speed);
11284 /* RTX costs for 9e (and later) cores. */
11286 static bool
11287 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11288 int *total, bool speed)
11290 enum machine_mode mode = GET_MODE (x);
11292 if (TARGET_THUMB1)
11294 switch (code)
11296 case MULT:
11297 *total = COSTS_N_INSNS (3);
11298 return true;
11300 default:
11301 *total = thumb1_rtx_costs (x, code, outer_code);
11302 return true;
11306 switch (code)
11308 case MULT:
11309 /* There is no point basing this on the tuning, since it is always the
11310 fast variant if it exists at all. */
11311 if (mode == DImode
11312 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11313 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11314 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11316 *total = COSTS_N_INSNS (2);
11317 return false;
11321 if (mode == DImode)
11323 *total = COSTS_N_INSNS (5);
11324 return false;
11327 if (mode == SImode)
11329 *total = COSTS_N_INSNS (2);
11330 return false;
11333 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11335 if (TARGET_HARD_FLOAT
11336 && (mode == SFmode
11337 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11339 *total = COSTS_N_INSNS (1);
11340 return false;
11344 *total = COSTS_N_INSNS (20);
11345 return false;
11347 default:
11348 return arm_rtx_costs_1 (x, outer_code, total, speed);
11351 /* All address computations that can be done are free, but rtx cost returns
11352 the same for practically all of them. So we weight the different types
11353 of address here in the order (most pref first):
11354 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11355 static inline int
11356 arm_arm_address_cost (rtx x)
11358 enum rtx_code c = GET_CODE (x);
11360 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11361 return 0;
11362 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11363 return 10;
11365 if (c == PLUS)
11367 if (CONST_INT_P (XEXP (x, 1)))
11368 return 2;
11370 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11371 return 3;
11373 return 4;
11376 return 6;
11379 static inline int
11380 arm_thumb_address_cost (rtx x)
11382 enum rtx_code c = GET_CODE (x);
11384 if (c == REG)
11385 return 1;
11386 if (c == PLUS
11387 && REG_P (XEXP (x, 0))
11388 && CONST_INT_P (XEXP (x, 1)))
11389 return 1;
11391 return 2;
11394 static int
11395 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11396 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11398 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11401 /* Adjust cost hook for XScale. */
11402 static bool
11403 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11405 /* Some true dependencies can have a higher cost depending
11406 on precisely how certain input operands are used. */
11407 if (REG_NOTE_KIND(link) == 0
11408 && recog_memoized (insn) >= 0
11409 && recog_memoized (dep) >= 0)
11411 int shift_opnum = get_attr_shift (insn);
11412 enum attr_type attr_type = get_attr_type (dep);
11414 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11415 operand for INSN. If we have a shifted input operand and the
11416 instruction we depend on is another ALU instruction, then we may
11417 have to account for an additional stall. */
11418 if (shift_opnum != 0
11419 && (attr_type == TYPE_ALU_SHIFT_IMM
11420 || attr_type == TYPE_ALUS_SHIFT_IMM
11421 || attr_type == TYPE_LOGIC_SHIFT_IMM
11422 || attr_type == TYPE_LOGICS_SHIFT_IMM
11423 || attr_type == TYPE_ALU_SHIFT_REG
11424 || attr_type == TYPE_ALUS_SHIFT_REG
11425 || attr_type == TYPE_LOGIC_SHIFT_REG
11426 || attr_type == TYPE_LOGICS_SHIFT_REG
11427 || attr_type == TYPE_MOV_SHIFT
11428 || attr_type == TYPE_MVN_SHIFT
11429 || attr_type == TYPE_MOV_SHIFT_REG
11430 || attr_type == TYPE_MVN_SHIFT_REG))
11432 rtx shifted_operand;
11433 int opno;
11435 /* Get the shifted operand. */
11436 extract_insn (insn);
11437 shifted_operand = recog_data.operand[shift_opnum];
11439 /* Iterate over all the operands in DEP. If we write an operand
11440 that overlaps with SHIFTED_OPERAND, then we have increase the
11441 cost of this dependency. */
11442 extract_insn (dep);
11443 preprocess_constraints (dep);
11444 for (opno = 0; opno < recog_data.n_operands; opno++)
11446 /* We can ignore strict inputs. */
11447 if (recog_data.operand_type[opno] == OP_IN)
11448 continue;
11450 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11451 shifted_operand))
11453 *cost = 2;
11454 return false;
11459 return true;
11462 /* Adjust cost hook for Cortex A9. */
11463 static bool
11464 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11466 switch (REG_NOTE_KIND (link))
11468 case REG_DEP_ANTI:
11469 *cost = 0;
11470 return false;
11472 case REG_DEP_TRUE:
11473 case REG_DEP_OUTPUT:
11474 if (recog_memoized (insn) >= 0
11475 && recog_memoized (dep) >= 0)
11477 if (GET_CODE (PATTERN (insn)) == SET)
11479 if (GET_MODE_CLASS
11480 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11481 || GET_MODE_CLASS
11482 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11484 enum attr_type attr_type_insn = get_attr_type (insn);
11485 enum attr_type attr_type_dep = get_attr_type (dep);
11487 /* By default all dependencies of the form
11488 s0 = s0 <op> s1
11489 s0 = s0 <op> s2
11490 have an extra latency of 1 cycle because
11491 of the input and output dependency in this
11492 case. However this gets modeled as an true
11493 dependency and hence all these checks. */
11494 if (REG_P (SET_DEST (PATTERN (insn)))
11495 && REG_P (SET_DEST (PATTERN (dep)))
11496 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11497 SET_DEST (PATTERN (dep))))
11499 /* FMACS is a special case where the dependent
11500 instruction can be issued 3 cycles before
11501 the normal latency in case of an output
11502 dependency. */
11503 if ((attr_type_insn == TYPE_FMACS
11504 || attr_type_insn == TYPE_FMACD)
11505 && (attr_type_dep == TYPE_FMACS
11506 || attr_type_dep == TYPE_FMACD))
11508 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11509 *cost = insn_default_latency (dep) - 3;
11510 else
11511 *cost = insn_default_latency (dep);
11512 return false;
11514 else
11516 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11517 *cost = insn_default_latency (dep) + 1;
11518 else
11519 *cost = insn_default_latency (dep);
11521 return false;
11526 break;
11528 default:
11529 gcc_unreachable ();
11532 return true;
11535 /* Adjust cost hook for FA726TE. */
11536 static bool
11537 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11539 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11540 have penalty of 3. */
11541 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11542 && recog_memoized (insn) >= 0
11543 && recog_memoized (dep) >= 0
11544 && get_attr_conds (dep) == CONDS_SET)
11546 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11547 if (get_attr_conds (insn) == CONDS_USE
11548 && get_attr_type (insn) != TYPE_BRANCH)
11550 *cost = 3;
11551 return false;
11554 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11555 || get_attr_conds (insn) == CONDS_USE)
11557 *cost = 0;
11558 return false;
11562 return true;
11565 /* Implement TARGET_REGISTER_MOVE_COST.
11567 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11568 it is typically more expensive than a single memory access. We set
11569 the cost to less than two memory accesses so that floating
11570 point to integer conversion does not go through memory. */
11573 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11574 reg_class_t from, reg_class_t to)
11576 if (TARGET_32BIT)
11578 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11579 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11580 return 15;
11581 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11582 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11583 return 4;
11584 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11585 return 20;
11586 else
11587 return 2;
11589 else
11591 if (from == HI_REGS || to == HI_REGS)
11592 return 4;
11593 else
11594 return 2;
11598 /* Implement TARGET_MEMORY_MOVE_COST. */
11601 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11602 bool in ATTRIBUTE_UNUSED)
11604 if (TARGET_32BIT)
11605 return 10;
11606 else
11608 if (GET_MODE_SIZE (mode) < 4)
11609 return 8;
11610 else
11611 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11615 /* Vectorizer cost model implementation. */
11617 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11618 static int
11619 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11620 tree vectype,
11621 int misalign ATTRIBUTE_UNUSED)
11623 unsigned elements;
11625 switch (type_of_cost)
11627 case scalar_stmt:
11628 return current_tune->vec_costs->scalar_stmt_cost;
11630 case scalar_load:
11631 return current_tune->vec_costs->scalar_load_cost;
11633 case scalar_store:
11634 return current_tune->vec_costs->scalar_store_cost;
11636 case vector_stmt:
11637 return current_tune->vec_costs->vec_stmt_cost;
11639 case vector_load:
11640 return current_tune->vec_costs->vec_align_load_cost;
11642 case vector_store:
11643 return current_tune->vec_costs->vec_store_cost;
11645 case vec_to_scalar:
11646 return current_tune->vec_costs->vec_to_scalar_cost;
11648 case scalar_to_vec:
11649 return current_tune->vec_costs->scalar_to_vec_cost;
11651 case unaligned_load:
11652 return current_tune->vec_costs->vec_unalign_load_cost;
11654 case unaligned_store:
11655 return current_tune->vec_costs->vec_unalign_store_cost;
11657 case cond_branch_taken:
11658 return current_tune->vec_costs->cond_taken_branch_cost;
11660 case cond_branch_not_taken:
11661 return current_tune->vec_costs->cond_not_taken_branch_cost;
11663 case vec_perm:
11664 case vec_promote_demote:
11665 return current_tune->vec_costs->vec_stmt_cost;
11667 case vec_construct:
11668 elements = TYPE_VECTOR_SUBPARTS (vectype);
11669 return elements / 2 + 1;
11671 default:
11672 gcc_unreachable ();
11676 /* Implement targetm.vectorize.add_stmt_cost. */
11678 static unsigned
11679 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11680 struct _stmt_vec_info *stmt_info, int misalign,
11681 enum vect_cost_model_location where)
11683 unsigned *cost = (unsigned *) data;
11684 unsigned retval = 0;
11686 if (flag_vect_cost_model)
11688 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11689 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11691 /* Statements in an inner loop relative to the loop being
11692 vectorized are weighted more heavily. The value here is
11693 arbitrary and could potentially be improved with analysis. */
11694 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11695 count *= 50; /* FIXME. */
11697 retval = (unsigned) (count * stmt_cost);
11698 cost[where] += retval;
11701 return retval;
11704 /* Return true if and only if this insn can dual-issue only as older. */
11705 static bool
11706 cortexa7_older_only (rtx insn)
11708 if (recog_memoized (insn) < 0)
11709 return false;
11711 switch (get_attr_type (insn))
11713 case TYPE_ALU_DSP_REG:
11714 case TYPE_ALU_SREG:
11715 case TYPE_ALUS_SREG:
11716 case TYPE_LOGIC_REG:
11717 case TYPE_LOGICS_REG:
11718 case TYPE_ADC_REG:
11719 case TYPE_ADCS_REG:
11720 case TYPE_ADR:
11721 case TYPE_BFM:
11722 case TYPE_REV:
11723 case TYPE_MVN_REG:
11724 case TYPE_SHIFT_IMM:
11725 case TYPE_SHIFT_REG:
11726 case TYPE_LOAD_BYTE:
11727 case TYPE_LOAD1:
11728 case TYPE_STORE1:
11729 case TYPE_FFARITHS:
11730 case TYPE_FADDS:
11731 case TYPE_FFARITHD:
11732 case TYPE_FADDD:
11733 case TYPE_FMOV:
11734 case TYPE_F_CVT:
11735 case TYPE_FCMPS:
11736 case TYPE_FCMPD:
11737 case TYPE_FCONSTS:
11738 case TYPE_FCONSTD:
11739 case TYPE_FMULS:
11740 case TYPE_FMACS:
11741 case TYPE_FMULD:
11742 case TYPE_FMACD:
11743 case TYPE_FDIVS:
11744 case TYPE_FDIVD:
11745 case TYPE_F_MRC:
11746 case TYPE_F_MRRC:
11747 case TYPE_F_FLAG:
11748 case TYPE_F_LOADS:
11749 case TYPE_F_STORES:
11750 return true;
11751 default:
11752 return false;
11756 /* Return true if and only if this insn can dual-issue as younger. */
11757 static bool
11758 cortexa7_younger (FILE *file, int verbose, rtx insn)
11760 if (recog_memoized (insn) < 0)
11762 if (verbose > 5)
11763 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11764 return false;
11767 switch (get_attr_type (insn))
11769 case TYPE_ALU_IMM:
11770 case TYPE_ALUS_IMM:
11771 case TYPE_LOGIC_IMM:
11772 case TYPE_LOGICS_IMM:
11773 case TYPE_EXTEND:
11774 case TYPE_MVN_IMM:
11775 case TYPE_MOV_IMM:
11776 case TYPE_MOV_REG:
11777 case TYPE_MOV_SHIFT:
11778 case TYPE_MOV_SHIFT_REG:
11779 case TYPE_BRANCH:
11780 case TYPE_CALL:
11781 return true;
11782 default:
11783 return false;
11788 /* Look for an instruction that can dual issue only as an older
11789 instruction, and move it in front of any instructions that can
11790 dual-issue as younger, while preserving the relative order of all
11791 other instructions in the ready list. This is a hueuristic to help
11792 dual-issue in later cycles, by postponing issue of more flexible
11793 instructions. This heuristic may affect dual issue opportunities
11794 in the current cycle. */
11795 static void
11796 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11797 int clock)
11799 int i;
11800 int first_older_only = -1, first_younger = -1;
11802 if (verbose > 5)
11803 fprintf (file,
11804 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11805 clock,
11806 *n_readyp);
11808 /* Traverse the ready list from the head (the instruction to issue
11809 first), and looking for the first instruction that can issue as
11810 younger and the first instruction that can dual-issue only as
11811 older. */
11812 for (i = *n_readyp - 1; i >= 0; i--)
11814 rtx insn = ready[i];
11815 if (cortexa7_older_only (insn))
11817 first_older_only = i;
11818 if (verbose > 5)
11819 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11820 break;
11822 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11823 first_younger = i;
11826 /* Nothing to reorder because either no younger insn found or insn
11827 that can dual-issue only as older appears before any insn that
11828 can dual-issue as younger. */
11829 if (first_younger == -1)
11831 if (verbose > 5)
11832 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11833 return;
11836 /* Nothing to reorder because no older-only insn in the ready list. */
11837 if (first_older_only == -1)
11839 if (verbose > 5)
11840 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11841 return;
11844 /* Move first_older_only insn before first_younger. */
11845 if (verbose > 5)
11846 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11847 INSN_UID(ready [first_older_only]),
11848 INSN_UID(ready [first_younger]));
11849 rtx first_older_only_insn = ready [first_older_only];
11850 for (i = first_older_only; i < first_younger; i++)
11852 ready[i] = ready[i+1];
11855 ready[i] = first_older_only_insn;
11856 return;
11859 /* Implement TARGET_SCHED_REORDER. */
11860 static int
11861 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11862 int clock)
11864 switch (arm_tune)
11866 case cortexa7:
11867 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11868 break;
11869 default:
11870 /* Do nothing for other cores. */
11871 break;
11874 return arm_issue_rate ();
11877 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11878 It corrects the value of COST based on the relationship between
11879 INSN and DEP through the dependence LINK. It returns the new
11880 value. There is a per-core adjust_cost hook to adjust scheduler costs
11881 and the per-core hook can choose to completely override the generic
11882 adjust_cost function. Only put bits of code into arm_adjust_cost that
11883 are common across all cores. */
11884 static int
11885 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11887 rtx i_pat, d_pat;
11889 /* When generating Thumb-1 code, we want to place flag-setting operations
11890 close to a conditional branch which depends on them, so that we can
11891 omit the comparison. */
11892 if (TARGET_THUMB1
11893 && REG_NOTE_KIND (link) == 0
11894 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11895 && recog_memoized (dep) >= 0
11896 && get_attr_conds (dep) == CONDS_SET)
11897 return 0;
11899 if (current_tune->sched_adjust_cost != NULL)
11901 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11902 return cost;
11905 /* XXX Is this strictly true? */
11906 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11907 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11908 return 0;
11910 /* Call insns don't incur a stall, even if they follow a load. */
11911 if (REG_NOTE_KIND (link) == 0
11912 && CALL_P (insn))
11913 return 1;
11915 if ((i_pat = single_set (insn)) != NULL
11916 && MEM_P (SET_SRC (i_pat))
11917 && (d_pat = single_set (dep)) != NULL
11918 && MEM_P (SET_DEST (d_pat)))
11920 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11921 /* This is a load after a store, there is no conflict if the load reads
11922 from a cached area. Assume that loads from the stack, and from the
11923 constant pool are cached, and that others will miss. This is a
11924 hack. */
11926 if ((GET_CODE (src_mem) == SYMBOL_REF
11927 && CONSTANT_POOL_ADDRESS_P (src_mem))
11928 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11929 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11930 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11931 return 1;
11934 return cost;
11938 arm_max_conditional_execute (void)
11940 return max_insns_skipped;
11943 static int
11944 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11946 if (TARGET_32BIT)
11947 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11948 else
11949 return (optimize > 0) ? 2 : 0;
11952 static int
11953 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11955 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11958 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11959 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11960 sequences of non-executed instructions in IT blocks probably take the same
11961 amount of time as executed instructions (and the IT instruction itself takes
11962 space in icache). This function was experimentally determined to give good
11963 results on a popular embedded benchmark. */
11965 static int
11966 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11968 return (TARGET_32BIT && speed_p) ? 1
11969 : arm_default_branch_cost (speed_p, predictable_p);
11972 static bool fp_consts_inited = false;
11974 static REAL_VALUE_TYPE value_fp0;
11976 static void
11977 init_fp_table (void)
11979 REAL_VALUE_TYPE r;
11981 r = REAL_VALUE_ATOF ("0", DFmode);
11982 value_fp0 = r;
11983 fp_consts_inited = true;
11986 /* Return TRUE if rtx X is a valid immediate FP constant. */
11988 arm_const_double_rtx (rtx x)
11990 REAL_VALUE_TYPE r;
11992 if (!fp_consts_inited)
11993 init_fp_table ();
11995 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11996 if (REAL_VALUE_MINUS_ZERO (r))
11997 return 0;
11999 if (REAL_VALUES_EQUAL (r, value_fp0))
12000 return 1;
12002 return 0;
12005 /* VFPv3 has a fairly wide range of representable immediates, formed from
12006 "quarter-precision" floating-point values. These can be evaluated using this
12007 formula (with ^ for exponentiation):
12009 -1^s * n * 2^-r
12011 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12012 16 <= n <= 31 and 0 <= r <= 7.
12014 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12016 - A (most-significant) is the sign bit.
12017 - BCD are the exponent (encoded as r XOR 3).
12018 - EFGH are the mantissa (encoded as n - 16).
12021 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12022 fconst[sd] instruction, or -1 if X isn't suitable. */
12023 static int
12024 vfp3_const_double_index (rtx x)
12026 REAL_VALUE_TYPE r, m;
12027 int sign, exponent;
12028 unsigned HOST_WIDE_INT mantissa, mant_hi;
12029 unsigned HOST_WIDE_INT mask;
12030 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12031 bool fail;
12033 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12034 return -1;
12036 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12038 /* We can't represent these things, so detect them first. */
12039 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12040 return -1;
12042 /* Extract sign, exponent and mantissa. */
12043 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12044 r = real_value_abs (&r);
12045 exponent = REAL_EXP (&r);
12046 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12047 highest (sign) bit, with a fixed binary point at bit point_pos.
12048 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12049 bits for the mantissa, this may fail (low bits would be lost). */
12050 real_ldexp (&m, &r, point_pos - exponent);
12051 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12052 mantissa = w.elt (0);
12053 mant_hi = w.elt (1);
12055 /* If there are bits set in the low part of the mantissa, we can't
12056 represent this value. */
12057 if (mantissa != 0)
12058 return -1;
12060 /* Now make it so that mantissa contains the most-significant bits, and move
12061 the point_pos to indicate that the least-significant bits have been
12062 discarded. */
12063 point_pos -= HOST_BITS_PER_WIDE_INT;
12064 mantissa = mant_hi;
12066 /* We can permit four significant bits of mantissa only, plus a high bit
12067 which is always 1. */
12068 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12069 if ((mantissa & mask) != 0)
12070 return -1;
12072 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12073 mantissa >>= point_pos - 5;
12075 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12076 floating-point immediate zero with Neon using an integer-zero load, but
12077 that case is handled elsewhere.) */
12078 if (mantissa == 0)
12079 return -1;
12081 gcc_assert (mantissa >= 16 && mantissa <= 31);
12083 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12084 normalized significands are in the range [1, 2). (Our mantissa is shifted
12085 left 4 places at this point relative to normalized IEEE754 values). GCC
12086 internally uses [0.5, 1) (see real.c), so the exponent returned from
12087 REAL_EXP must be altered. */
12088 exponent = 5 - exponent;
12090 if (exponent < 0 || exponent > 7)
12091 return -1;
12093 /* Sign, mantissa and exponent are now in the correct form to plug into the
12094 formula described in the comment above. */
12095 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12098 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12100 vfp3_const_double_rtx (rtx x)
12102 if (!TARGET_VFP3)
12103 return 0;
12105 return vfp3_const_double_index (x) != -1;
12108 /* Recognize immediates which can be used in various Neon instructions. Legal
12109 immediates are described by the following table (for VMVN variants, the
12110 bitwise inverse of the constant shown is recognized. In either case, VMOV
12111 is output and the correct instruction to use for a given constant is chosen
12112 by the assembler). The constant shown is replicated across all elements of
12113 the destination vector.
12115 insn elems variant constant (binary)
12116 ---- ----- ------- -----------------
12117 vmov i32 0 00000000 00000000 00000000 abcdefgh
12118 vmov i32 1 00000000 00000000 abcdefgh 00000000
12119 vmov i32 2 00000000 abcdefgh 00000000 00000000
12120 vmov i32 3 abcdefgh 00000000 00000000 00000000
12121 vmov i16 4 00000000 abcdefgh
12122 vmov i16 5 abcdefgh 00000000
12123 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12124 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12125 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12126 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12127 vmvn i16 10 00000000 abcdefgh
12128 vmvn i16 11 abcdefgh 00000000
12129 vmov i32 12 00000000 00000000 abcdefgh 11111111
12130 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12131 vmov i32 14 00000000 abcdefgh 11111111 11111111
12132 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12133 vmov i8 16 abcdefgh
12134 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12135 eeeeeeee ffffffff gggggggg hhhhhhhh
12136 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12137 vmov f32 19 00000000 00000000 00000000 00000000
12139 For case 18, B = !b. Representable values are exactly those accepted by
12140 vfp3_const_double_index, but are output as floating-point numbers rather
12141 than indices.
12143 For case 19, we will change it to vmov.i32 when assembling.
12145 Variants 0-5 (inclusive) may also be used as immediates for the second
12146 operand of VORR/VBIC instructions.
12148 The INVERSE argument causes the bitwise inverse of the given operand to be
12149 recognized instead (used for recognizing legal immediates for the VAND/VORN
12150 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12151 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12152 output, rather than the real insns vbic/vorr).
12154 INVERSE makes no difference to the recognition of float vectors.
12156 The return value is the variant of immediate as shown in the above table, or
12157 -1 if the given value doesn't match any of the listed patterns.
12159 static int
12160 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12161 rtx *modconst, int *elementwidth)
12163 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12164 matches = 1; \
12165 for (i = 0; i < idx; i += (STRIDE)) \
12166 if (!(TEST)) \
12167 matches = 0; \
12168 if (matches) \
12170 immtype = (CLASS); \
12171 elsize = (ELSIZE); \
12172 break; \
12175 unsigned int i, elsize = 0, idx = 0, n_elts;
12176 unsigned int innersize;
12177 unsigned char bytes[16];
12178 int immtype = -1, matches;
12179 unsigned int invmask = inverse ? 0xff : 0;
12180 bool vector = GET_CODE (op) == CONST_VECTOR;
12182 if (vector)
12184 n_elts = CONST_VECTOR_NUNITS (op);
12185 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12187 else
12189 n_elts = 1;
12190 if (mode == VOIDmode)
12191 mode = DImode;
12192 innersize = GET_MODE_SIZE (mode);
12195 /* Vectors of float constants. */
12196 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12198 rtx el0 = CONST_VECTOR_ELT (op, 0);
12199 REAL_VALUE_TYPE r0;
12201 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12202 return -1;
12204 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12206 for (i = 1; i < n_elts; i++)
12208 rtx elt = CONST_VECTOR_ELT (op, i);
12209 REAL_VALUE_TYPE re;
12211 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12213 if (!REAL_VALUES_EQUAL (r0, re))
12214 return -1;
12217 if (modconst)
12218 *modconst = CONST_VECTOR_ELT (op, 0);
12220 if (elementwidth)
12221 *elementwidth = 0;
12223 if (el0 == CONST0_RTX (GET_MODE (el0)))
12224 return 19;
12225 else
12226 return 18;
12229 /* Splat vector constant out into a byte vector. */
12230 for (i = 0; i < n_elts; i++)
12232 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12233 unsigned HOST_WIDE_INT elpart;
12234 unsigned int part, parts;
12236 if (CONST_INT_P (el))
12238 elpart = INTVAL (el);
12239 parts = 1;
12241 else if (CONST_DOUBLE_P (el))
12243 elpart = CONST_DOUBLE_LOW (el);
12244 parts = 2;
12246 else
12247 gcc_unreachable ();
12249 for (part = 0; part < parts; part++)
12251 unsigned int byte;
12252 for (byte = 0; byte < innersize; byte++)
12254 bytes[idx++] = (elpart & 0xff) ^ invmask;
12255 elpart >>= BITS_PER_UNIT;
12257 if (CONST_DOUBLE_P (el))
12258 elpart = CONST_DOUBLE_HIGH (el);
12262 /* Sanity check. */
12263 gcc_assert (idx == GET_MODE_SIZE (mode));
12267 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12268 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12270 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12271 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12273 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12274 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12276 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12277 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12279 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12281 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12283 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12284 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12286 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12287 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12289 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12290 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12292 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12293 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12295 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12297 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12299 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12300 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12302 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12303 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12305 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12306 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12308 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12309 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12311 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12313 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12314 && bytes[i] == bytes[(i + 8) % idx]);
12316 while (0);
12318 if (immtype == -1)
12319 return -1;
12321 if (elementwidth)
12322 *elementwidth = elsize;
12324 if (modconst)
12326 unsigned HOST_WIDE_INT imm = 0;
12328 /* Un-invert bytes of recognized vector, if necessary. */
12329 if (invmask != 0)
12330 for (i = 0; i < idx; i++)
12331 bytes[i] ^= invmask;
12333 if (immtype == 17)
12335 /* FIXME: Broken on 32-bit H_W_I hosts. */
12336 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12338 for (i = 0; i < 8; i++)
12339 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12340 << (i * BITS_PER_UNIT);
12342 *modconst = GEN_INT (imm);
12344 else
12346 unsigned HOST_WIDE_INT imm = 0;
12348 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12349 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12351 *modconst = GEN_INT (imm);
12355 return immtype;
12356 #undef CHECK
12359 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12360 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12361 float elements), and a modified constant (whatever should be output for a
12362 VMOV) in *MODCONST. */
12365 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12366 rtx *modconst, int *elementwidth)
12368 rtx tmpconst;
12369 int tmpwidth;
12370 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12372 if (retval == -1)
12373 return 0;
12375 if (modconst)
12376 *modconst = tmpconst;
12378 if (elementwidth)
12379 *elementwidth = tmpwidth;
12381 return 1;
12384 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12385 the immediate is valid, write a constant suitable for using as an operand
12386 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12387 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12390 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12391 rtx *modconst, int *elementwidth)
12393 rtx tmpconst;
12394 int tmpwidth;
12395 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12397 if (retval < 0 || retval > 5)
12398 return 0;
12400 if (modconst)
12401 *modconst = tmpconst;
12403 if (elementwidth)
12404 *elementwidth = tmpwidth;
12406 return 1;
12409 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12410 the immediate is valid, write a constant suitable for using as an operand
12411 to VSHR/VSHL to *MODCONST and the corresponding element width to
12412 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12413 because they have different limitations. */
12416 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12417 rtx *modconst, int *elementwidth,
12418 bool isleftshift)
12420 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12421 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12422 unsigned HOST_WIDE_INT last_elt = 0;
12423 unsigned HOST_WIDE_INT maxshift;
12425 /* Split vector constant out into a byte vector. */
12426 for (i = 0; i < n_elts; i++)
12428 rtx el = CONST_VECTOR_ELT (op, i);
12429 unsigned HOST_WIDE_INT elpart;
12431 if (CONST_INT_P (el))
12432 elpart = INTVAL (el);
12433 else if (CONST_DOUBLE_P (el))
12434 return 0;
12435 else
12436 gcc_unreachable ();
12438 if (i != 0 && elpart != last_elt)
12439 return 0;
12441 last_elt = elpart;
12444 /* Shift less than element size. */
12445 maxshift = innersize * 8;
12447 if (isleftshift)
12449 /* Left shift immediate value can be from 0 to <size>-1. */
12450 if (last_elt >= maxshift)
12451 return 0;
12453 else
12455 /* Right shift immediate value can be from 1 to <size>. */
12456 if (last_elt == 0 || last_elt > maxshift)
12457 return 0;
12460 if (elementwidth)
12461 *elementwidth = innersize * 8;
12463 if (modconst)
12464 *modconst = CONST_VECTOR_ELT (op, 0);
12466 return 1;
12469 /* Return a string suitable for output of Neon immediate logic operation
12470 MNEM. */
12472 char *
12473 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12474 int inverse, int quad)
12476 int width, is_valid;
12477 static char templ[40];
12479 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12481 gcc_assert (is_valid != 0);
12483 if (quad)
12484 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12485 else
12486 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12488 return templ;
12491 /* Return a string suitable for output of Neon immediate shift operation
12492 (VSHR or VSHL) MNEM. */
12494 char *
12495 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12496 enum machine_mode mode, int quad,
12497 bool isleftshift)
12499 int width, is_valid;
12500 static char templ[40];
12502 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12503 gcc_assert (is_valid != 0);
12505 if (quad)
12506 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12507 else
12508 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12510 return templ;
12513 /* Output a sequence of pairwise operations to implement a reduction.
12514 NOTE: We do "too much work" here, because pairwise operations work on two
12515 registers-worth of operands in one go. Unfortunately we can't exploit those
12516 extra calculations to do the full operation in fewer steps, I don't think.
12517 Although all vector elements of the result but the first are ignored, we
12518 actually calculate the same result in each of the elements. An alternative
12519 such as initially loading a vector with zero to use as each of the second
12520 operands would use up an additional register and take an extra instruction,
12521 for no particular gain. */
12523 void
12524 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12525 rtx (*reduc) (rtx, rtx, rtx))
12527 enum machine_mode inner = GET_MODE_INNER (mode);
12528 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12529 rtx tmpsum = op1;
12531 for (i = parts / 2; i >= 1; i /= 2)
12533 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12534 emit_insn (reduc (dest, tmpsum, tmpsum));
12535 tmpsum = dest;
12539 /* If VALS is a vector constant that can be loaded into a register
12540 using VDUP, generate instructions to do so and return an RTX to
12541 assign to the register. Otherwise return NULL_RTX. */
12543 static rtx
12544 neon_vdup_constant (rtx vals)
12546 enum machine_mode mode = GET_MODE (vals);
12547 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12548 int n_elts = GET_MODE_NUNITS (mode);
12549 bool all_same = true;
12550 rtx x;
12551 int i;
12553 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12554 return NULL_RTX;
12556 for (i = 0; i < n_elts; ++i)
12558 x = XVECEXP (vals, 0, i);
12559 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12560 all_same = false;
12563 if (!all_same)
12564 /* The elements are not all the same. We could handle repeating
12565 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12566 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12567 vdup.i16). */
12568 return NULL_RTX;
12570 /* We can load this constant by using VDUP and a constant in a
12571 single ARM register. This will be cheaper than a vector
12572 load. */
12574 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12575 return gen_rtx_VEC_DUPLICATE (mode, x);
12578 /* Generate code to load VALS, which is a PARALLEL containing only
12579 constants (for vec_init) or CONST_VECTOR, efficiently into a
12580 register. Returns an RTX to copy into the register, or NULL_RTX
12581 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12584 neon_make_constant (rtx vals)
12586 enum machine_mode mode = GET_MODE (vals);
12587 rtx target;
12588 rtx const_vec = NULL_RTX;
12589 int n_elts = GET_MODE_NUNITS (mode);
12590 int n_const = 0;
12591 int i;
12593 if (GET_CODE (vals) == CONST_VECTOR)
12594 const_vec = vals;
12595 else if (GET_CODE (vals) == PARALLEL)
12597 /* A CONST_VECTOR must contain only CONST_INTs and
12598 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12599 Only store valid constants in a CONST_VECTOR. */
12600 for (i = 0; i < n_elts; ++i)
12602 rtx x = XVECEXP (vals, 0, i);
12603 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12604 n_const++;
12606 if (n_const == n_elts)
12607 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12609 else
12610 gcc_unreachable ();
12612 if (const_vec != NULL
12613 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12614 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12615 return const_vec;
12616 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12617 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12618 pipeline cycle; creating the constant takes one or two ARM
12619 pipeline cycles. */
12620 return target;
12621 else if (const_vec != NULL_RTX)
12622 /* Load from constant pool. On Cortex-A8 this takes two cycles
12623 (for either double or quad vectors). We can not take advantage
12624 of single-cycle VLD1 because we need a PC-relative addressing
12625 mode. */
12626 return const_vec;
12627 else
12628 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12629 We can not construct an initializer. */
12630 return NULL_RTX;
12633 /* Initialize vector TARGET to VALS. */
12635 void
12636 neon_expand_vector_init (rtx target, rtx vals)
12638 enum machine_mode mode = GET_MODE (target);
12639 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12640 int n_elts = GET_MODE_NUNITS (mode);
12641 int n_var = 0, one_var = -1;
12642 bool all_same = true;
12643 rtx x, mem;
12644 int i;
12646 for (i = 0; i < n_elts; ++i)
12648 x = XVECEXP (vals, 0, i);
12649 if (!CONSTANT_P (x))
12650 ++n_var, one_var = i;
12652 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12653 all_same = false;
12656 if (n_var == 0)
12658 rtx constant = neon_make_constant (vals);
12659 if (constant != NULL_RTX)
12661 emit_move_insn (target, constant);
12662 return;
12666 /* Splat a single non-constant element if we can. */
12667 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12669 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12670 emit_insn (gen_rtx_SET (VOIDmode, target,
12671 gen_rtx_VEC_DUPLICATE (mode, x)));
12672 return;
12675 /* One field is non-constant. Load constant then overwrite varying
12676 field. This is more efficient than using the stack. */
12677 if (n_var == 1)
12679 rtx copy = copy_rtx (vals);
12680 rtx index = GEN_INT (one_var);
12682 /* Load constant part of vector, substitute neighboring value for
12683 varying element. */
12684 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12685 neon_expand_vector_init (target, copy);
12687 /* Insert variable. */
12688 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12689 switch (mode)
12691 case V8QImode:
12692 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12693 break;
12694 case V16QImode:
12695 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12696 break;
12697 case V4HImode:
12698 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12699 break;
12700 case V8HImode:
12701 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12702 break;
12703 case V2SImode:
12704 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12705 break;
12706 case V4SImode:
12707 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12708 break;
12709 case V2SFmode:
12710 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12711 break;
12712 case V4SFmode:
12713 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12714 break;
12715 case V2DImode:
12716 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12717 break;
12718 default:
12719 gcc_unreachable ();
12721 return;
12724 /* Construct the vector in memory one field at a time
12725 and load the whole vector. */
12726 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12727 for (i = 0; i < n_elts; i++)
12728 emit_move_insn (adjust_address_nv (mem, inner_mode,
12729 i * GET_MODE_SIZE (inner_mode)),
12730 XVECEXP (vals, 0, i));
12731 emit_move_insn (target, mem);
12734 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12735 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12736 reported source locations are bogus. */
12738 static void
12739 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12740 const char *err)
12742 HOST_WIDE_INT lane;
12744 gcc_assert (CONST_INT_P (operand));
12746 lane = INTVAL (operand);
12748 if (lane < low || lane >= high)
12749 error (err);
12752 /* Bounds-check lanes. */
12754 void
12755 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12757 bounds_check (operand, low, high, "lane out of range");
12760 /* Bounds-check constants. */
12762 void
12763 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12765 bounds_check (operand, low, high, "constant out of range");
12768 HOST_WIDE_INT
12769 neon_element_bits (enum machine_mode mode)
12771 if (mode == DImode)
12772 return GET_MODE_BITSIZE (mode);
12773 else
12774 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12778 /* Predicates for `match_operand' and `match_operator'. */
12780 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12781 WB is true if full writeback address modes are allowed and is false
12782 if limited writeback address modes (POST_INC and PRE_DEC) are
12783 allowed. */
12786 arm_coproc_mem_operand (rtx op, bool wb)
12788 rtx ind;
12790 /* Reject eliminable registers. */
12791 if (! (reload_in_progress || reload_completed || lra_in_progress)
12792 && ( reg_mentioned_p (frame_pointer_rtx, op)
12793 || reg_mentioned_p (arg_pointer_rtx, op)
12794 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12795 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12796 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12797 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12798 return FALSE;
12800 /* Constants are converted into offsets from labels. */
12801 if (!MEM_P (op))
12802 return FALSE;
12804 ind = XEXP (op, 0);
12806 if (reload_completed
12807 && (GET_CODE (ind) == LABEL_REF
12808 || (GET_CODE (ind) == CONST
12809 && GET_CODE (XEXP (ind, 0)) == PLUS
12810 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12811 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12812 return TRUE;
12814 /* Match: (mem (reg)). */
12815 if (REG_P (ind))
12816 return arm_address_register_rtx_p (ind, 0);
12818 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12819 acceptable in any case (subject to verification by
12820 arm_address_register_rtx_p). We need WB to be true to accept
12821 PRE_INC and POST_DEC. */
12822 if (GET_CODE (ind) == POST_INC
12823 || GET_CODE (ind) == PRE_DEC
12824 || (wb
12825 && (GET_CODE (ind) == PRE_INC
12826 || GET_CODE (ind) == POST_DEC)))
12827 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12829 if (wb
12830 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12831 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12832 && GET_CODE (XEXP (ind, 1)) == PLUS
12833 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12834 ind = XEXP (ind, 1);
12836 /* Match:
12837 (plus (reg)
12838 (const)). */
12839 if (GET_CODE (ind) == PLUS
12840 && REG_P (XEXP (ind, 0))
12841 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12842 && CONST_INT_P (XEXP (ind, 1))
12843 && INTVAL (XEXP (ind, 1)) > -1024
12844 && INTVAL (XEXP (ind, 1)) < 1024
12845 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12846 return TRUE;
12848 return FALSE;
12851 /* Return TRUE if OP is a memory operand which we can load or store a vector
12852 to/from. TYPE is one of the following values:
12853 0 - Vector load/stor (vldr)
12854 1 - Core registers (ldm)
12855 2 - Element/structure loads (vld1)
12858 neon_vector_mem_operand (rtx op, int type, bool strict)
12860 rtx ind;
12862 /* Reject eliminable registers. */
12863 if (! (reload_in_progress || reload_completed)
12864 && ( reg_mentioned_p (frame_pointer_rtx, op)
12865 || reg_mentioned_p (arg_pointer_rtx, op)
12866 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12867 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12868 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12869 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12870 return !strict;
12872 /* Constants are converted into offsets from labels. */
12873 if (!MEM_P (op))
12874 return FALSE;
12876 ind = XEXP (op, 0);
12878 if (reload_completed
12879 && (GET_CODE (ind) == LABEL_REF
12880 || (GET_CODE (ind) == CONST
12881 && GET_CODE (XEXP (ind, 0)) == PLUS
12882 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12883 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12884 return TRUE;
12886 /* Match: (mem (reg)). */
12887 if (REG_P (ind))
12888 return arm_address_register_rtx_p (ind, 0);
12890 /* Allow post-increment with Neon registers. */
12891 if ((type != 1 && GET_CODE (ind) == POST_INC)
12892 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12893 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12895 /* Allow post-increment by register for VLDn */
12896 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12897 && GET_CODE (XEXP (ind, 1)) == PLUS
12898 && REG_P (XEXP (XEXP (ind, 1), 1)))
12899 return true;
12901 /* Match:
12902 (plus (reg)
12903 (const)). */
12904 if (type == 0
12905 && GET_CODE (ind) == PLUS
12906 && REG_P (XEXP (ind, 0))
12907 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12908 && CONST_INT_P (XEXP (ind, 1))
12909 && INTVAL (XEXP (ind, 1)) > -1024
12910 /* For quad modes, we restrict the constant offset to be slightly less
12911 than what the instruction format permits. We have no such constraint
12912 on double mode offsets. (This must match arm_legitimate_index_p.) */
12913 && (INTVAL (XEXP (ind, 1))
12914 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12915 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12916 return TRUE;
12918 return FALSE;
12921 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12922 type. */
12924 neon_struct_mem_operand (rtx op)
12926 rtx ind;
12928 /* Reject eliminable registers. */
12929 if (! (reload_in_progress || reload_completed)
12930 && ( reg_mentioned_p (frame_pointer_rtx, op)
12931 || reg_mentioned_p (arg_pointer_rtx, op)
12932 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12933 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12934 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12935 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12936 return FALSE;
12938 /* Constants are converted into offsets from labels. */
12939 if (!MEM_P (op))
12940 return FALSE;
12942 ind = XEXP (op, 0);
12944 if (reload_completed
12945 && (GET_CODE (ind) == LABEL_REF
12946 || (GET_CODE (ind) == CONST
12947 && GET_CODE (XEXP (ind, 0)) == PLUS
12948 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12949 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12950 return TRUE;
12952 /* Match: (mem (reg)). */
12953 if (REG_P (ind))
12954 return arm_address_register_rtx_p (ind, 0);
12956 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12957 if (GET_CODE (ind) == POST_INC
12958 || GET_CODE (ind) == PRE_DEC)
12959 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12961 return FALSE;
12964 /* Return true if X is a register that will be eliminated later on. */
12966 arm_eliminable_register (rtx x)
12968 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12969 || REGNO (x) == ARG_POINTER_REGNUM
12970 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12971 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12974 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12975 coprocessor registers. Otherwise return NO_REGS. */
12977 enum reg_class
12978 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12980 if (mode == HFmode)
12982 if (!TARGET_NEON_FP16)
12983 return GENERAL_REGS;
12984 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12985 return NO_REGS;
12986 return GENERAL_REGS;
12989 /* The neon move patterns handle all legitimate vector and struct
12990 addresses. */
12991 if (TARGET_NEON
12992 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12993 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12994 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12995 || VALID_NEON_STRUCT_MODE (mode)))
12996 return NO_REGS;
12998 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12999 return NO_REGS;
13001 return GENERAL_REGS;
13004 /* Values which must be returned in the most-significant end of the return
13005 register. */
13007 static bool
13008 arm_return_in_msb (const_tree valtype)
13010 return (TARGET_AAPCS_BASED
13011 && BYTES_BIG_ENDIAN
13012 && (AGGREGATE_TYPE_P (valtype)
13013 || TREE_CODE (valtype) == COMPLEX_TYPE
13014 || FIXED_POINT_TYPE_P (valtype)));
13017 /* Return TRUE if X references a SYMBOL_REF. */
13019 symbol_mentioned_p (rtx x)
13021 const char * fmt;
13022 int i;
13024 if (GET_CODE (x) == SYMBOL_REF)
13025 return 1;
13027 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13028 are constant offsets, not symbols. */
13029 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13030 return 0;
13032 fmt = GET_RTX_FORMAT (GET_CODE (x));
13034 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13036 if (fmt[i] == 'E')
13038 int j;
13040 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13041 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13042 return 1;
13044 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13045 return 1;
13048 return 0;
13051 /* Return TRUE if X references a LABEL_REF. */
13053 label_mentioned_p (rtx x)
13055 const char * fmt;
13056 int i;
13058 if (GET_CODE (x) == LABEL_REF)
13059 return 1;
13061 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13062 instruction, but they are constant offsets, not symbols. */
13063 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13064 return 0;
13066 fmt = GET_RTX_FORMAT (GET_CODE (x));
13067 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13069 if (fmt[i] == 'E')
13071 int j;
13073 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13074 if (label_mentioned_p (XVECEXP (x, i, j)))
13075 return 1;
13077 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13078 return 1;
13081 return 0;
13085 tls_mentioned_p (rtx x)
13087 switch (GET_CODE (x))
13089 case CONST:
13090 return tls_mentioned_p (XEXP (x, 0));
13092 case UNSPEC:
13093 if (XINT (x, 1) == UNSPEC_TLS)
13094 return 1;
13096 default:
13097 return 0;
13101 /* Must not copy any rtx that uses a pc-relative address. */
13103 static int
13104 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13106 if (GET_CODE (*x) == UNSPEC
13107 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13108 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13109 return 1;
13110 return 0;
13113 static bool
13114 arm_cannot_copy_insn_p (rtx insn)
13116 /* The tls call insn cannot be copied, as it is paired with a data
13117 word. */
13118 if (recog_memoized (insn) == CODE_FOR_tlscall)
13119 return true;
13121 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13124 enum rtx_code
13125 minmax_code (rtx x)
13127 enum rtx_code code = GET_CODE (x);
13129 switch (code)
13131 case SMAX:
13132 return GE;
13133 case SMIN:
13134 return LE;
13135 case UMIN:
13136 return LEU;
13137 case UMAX:
13138 return GEU;
13139 default:
13140 gcc_unreachable ();
13144 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13146 bool
13147 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13148 int *mask, bool *signed_sat)
13150 /* The high bound must be a power of two minus one. */
13151 int log = exact_log2 (INTVAL (hi_bound) + 1);
13152 if (log == -1)
13153 return false;
13155 /* The low bound is either zero (for usat) or one less than the
13156 negation of the high bound (for ssat). */
13157 if (INTVAL (lo_bound) == 0)
13159 if (mask)
13160 *mask = log;
13161 if (signed_sat)
13162 *signed_sat = false;
13164 return true;
13167 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13169 if (mask)
13170 *mask = log + 1;
13171 if (signed_sat)
13172 *signed_sat = true;
13174 return true;
13177 return false;
13180 /* Return 1 if memory locations are adjacent. */
13182 adjacent_mem_locations (rtx a, rtx b)
13184 /* We don't guarantee to preserve the order of these memory refs. */
13185 if (volatile_refs_p (a) || volatile_refs_p (b))
13186 return 0;
13188 if ((REG_P (XEXP (a, 0))
13189 || (GET_CODE (XEXP (a, 0)) == PLUS
13190 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13191 && (REG_P (XEXP (b, 0))
13192 || (GET_CODE (XEXP (b, 0)) == PLUS
13193 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13195 HOST_WIDE_INT val0 = 0, val1 = 0;
13196 rtx reg0, reg1;
13197 int val_diff;
13199 if (GET_CODE (XEXP (a, 0)) == PLUS)
13201 reg0 = XEXP (XEXP (a, 0), 0);
13202 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13204 else
13205 reg0 = XEXP (a, 0);
13207 if (GET_CODE (XEXP (b, 0)) == PLUS)
13209 reg1 = XEXP (XEXP (b, 0), 0);
13210 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13212 else
13213 reg1 = XEXP (b, 0);
13215 /* Don't accept any offset that will require multiple
13216 instructions to handle, since this would cause the
13217 arith_adjacentmem pattern to output an overlong sequence. */
13218 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13219 return 0;
13221 /* Don't allow an eliminable register: register elimination can make
13222 the offset too large. */
13223 if (arm_eliminable_register (reg0))
13224 return 0;
13226 val_diff = val1 - val0;
13228 if (arm_ld_sched)
13230 /* If the target has load delay slots, then there's no benefit
13231 to using an ldm instruction unless the offset is zero and
13232 we are optimizing for size. */
13233 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13234 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13235 && (val_diff == 4 || val_diff == -4));
13238 return ((REGNO (reg0) == REGNO (reg1))
13239 && (val_diff == 4 || val_diff == -4));
13242 return 0;
13245 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13246 for load operations, false for store operations. CONSECUTIVE is true
13247 if the register numbers in the operation must be consecutive in the register
13248 bank. RETURN_PC is true if value is to be loaded in PC.
13249 The pattern we are trying to match for load is:
13250 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13251 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13254 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13256 where
13257 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13258 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13259 3. If consecutive is TRUE, then for kth register being loaded,
13260 REGNO (R_dk) = REGNO (R_d0) + k.
13261 The pattern for store is similar. */
13262 bool
13263 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13264 bool consecutive, bool return_pc)
13266 HOST_WIDE_INT count = XVECLEN (op, 0);
13267 rtx reg, mem, addr;
13268 unsigned regno;
13269 unsigned first_regno;
13270 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13271 rtx elt;
13272 bool addr_reg_in_reglist = false;
13273 bool update = false;
13274 int reg_increment;
13275 int offset_adj;
13276 int regs_per_val;
13278 /* If not in SImode, then registers must be consecutive
13279 (e.g., VLDM instructions for DFmode). */
13280 gcc_assert ((mode == SImode) || consecutive);
13281 /* Setting return_pc for stores is illegal. */
13282 gcc_assert (!return_pc || load);
13284 /* Set up the increments and the regs per val based on the mode. */
13285 reg_increment = GET_MODE_SIZE (mode);
13286 regs_per_val = reg_increment / 4;
13287 offset_adj = return_pc ? 1 : 0;
13289 if (count <= 1
13290 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13291 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13292 return false;
13294 /* Check if this is a write-back. */
13295 elt = XVECEXP (op, 0, offset_adj);
13296 if (GET_CODE (SET_SRC (elt)) == PLUS)
13298 i++;
13299 base = 1;
13300 update = true;
13302 /* The offset adjustment must be the number of registers being
13303 popped times the size of a single register. */
13304 if (!REG_P (SET_DEST (elt))
13305 || !REG_P (XEXP (SET_SRC (elt), 0))
13306 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13307 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13308 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13309 ((count - 1 - offset_adj) * reg_increment))
13310 return false;
13313 i = i + offset_adj;
13314 base = base + offset_adj;
13315 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13316 success depends on the type: VLDM can do just one reg,
13317 LDM must do at least two. */
13318 if ((count <= i) && (mode == SImode))
13319 return false;
13321 elt = XVECEXP (op, 0, i - 1);
13322 if (GET_CODE (elt) != SET)
13323 return false;
13325 if (load)
13327 reg = SET_DEST (elt);
13328 mem = SET_SRC (elt);
13330 else
13332 reg = SET_SRC (elt);
13333 mem = SET_DEST (elt);
13336 if (!REG_P (reg) || !MEM_P (mem))
13337 return false;
13339 regno = REGNO (reg);
13340 first_regno = regno;
13341 addr = XEXP (mem, 0);
13342 if (GET_CODE (addr) == PLUS)
13344 if (!CONST_INT_P (XEXP (addr, 1)))
13345 return false;
13347 offset = INTVAL (XEXP (addr, 1));
13348 addr = XEXP (addr, 0);
13351 if (!REG_P (addr))
13352 return false;
13354 /* Don't allow SP to be loaded unless it is also the base register. It
13355 guarantees that SP is reset correctly when an LDM instruction
13356 is interrupted. Otherwise, we might end up with a corrupt stack. */
13357 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13358 return false;
13360 for (; i < count; i++)
13362 elt = XVECEXP (op, 0, i);
13363 if (GET_CODE (elt) != SET)
13364 return false;
13366 if (load)
13368 reg = SET_DEST (elt);
13369 mem = SET_SRC (elt);
13371 else
13373 reg = SET_SRC (elt);
13374 mem = SET_DEST (elt);
13377 if (!REG_P (reg)
13378 || GET_MODE (reg) != mode
13379 || REGNO (reg) <= regno
13380 || (consecutive
13381 && (REGNO (reg) !=
13382 (unsigned int) (first_regno + regs_per_val * (i - base))))
13383 /* Don't allow SP to be loaded unless it is also the base register. It
13384 guarantees that SP is reset correctly when an LDM instruction
13385 is interrupted. Otherwise, we might end up with a corrupt stack. */
13386 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13387 || !MEM_P (mem)
13388 || GET_MODE (mem) != mode
13389 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13390 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13391 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13392 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13393 offset + (i - base) * reg_increment))
13394 && (!REG_P (XEXP (mem, 0))
13395 || offset + (i - base) * reg_increment != 0)))
13396 return false;
13398 regno = REGNO (reg);
13399 if (regno == REGNO (addr))
13400 addr_reg_in_reglist = true;
13403 if (load)
13405 if (update && addr_reg_in_reglist)
13406 return false;
13408 /* For Thumb-1, address register is always modified - either by write-back
13409 or by explicit load. If the pattern does not describe an update,
13410 then the address register must be in the list of loaded registers. */
13411 if (TARGET_THUMB1)
13412 return update || addr_reg_in_reglist;
13415 return true;
13418 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13419 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13420 instruction. ADD_OFFSET is nonzero if the base address register needs
13421 to be modified with an add instruction before we can use it. */
13423 static bool
13424 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13425 int nops, HOST_WIDE_INT add_offset)
13427 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13428 if the offset isn't small enough. The reason 2 ldrs are faster
13429 is because these ARMs are able to do more than one cache access
13430 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13431 whilst the ARM8 has a double bandwidth cache. This means that
13432 these cores can do both an instruction fetch and a data fetch in
13433 a single cycle, so the trick of calculating the address into a
13434 scratch register (one of the result regs) and then doing a load
13435 multiple actually becomes slower (and no smaller in code size).
13436 That is the transformation
13438 ldr rd1, [rbase + offset]
13439 ldr rd2, [rbase + offset + 4]
13443 add rd1, rbase, offset
13444 ldmia rd1, {rd1, rd2}
13446 produces worse code -- '3 cycles + any stalls on rd2' instead of
13447 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13448 access per cycle, the first sequence could never complete in less
13449 than 6 cycles, whereas the ldm sequence would only take 5 and
13450 would make better use of sequential accesses if not hitting the
13451 cache.
13453 We cheat here and test 'arm_ld_sched' which we currently know to
13454 only be true for the ARM8, ARM9 and StrongARM. If this ever
13455 changes, then the test below needs to be reworked. */
13456 if (nops == 2 && arm_ld_sched && add_offset != 0)
13457 return false;
13459 /* XScale has load-store double instructions, but they have stricter
13460 alignment requirements than load-store multiple, so we cannot
13461 use them.
13463 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13464 the pipeline until completion.
13466 NREGS CYCLES
13472 An ldr instruction takes 1-3 cycles, but does not block the
13473 pipeline.
13475 NREGS CYCLES
13476 1 1-3
13477 2 2-6
13478 3 3-9
13479 4 4-12
13481 Best case ldr will always win. However, the more ldr instructions
13482 we issue, the less likely we are to be able to schedule them well.
13483 Using ldr instructions also increases code size.
13485 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13486 for counts of 3 or 4 regs. */
13487 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13488 return false;
13489 return true;
13492 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13493 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13494 an array ORDER which describes the sequence to use when accessing the
13495 offsets that produces an ascending order. In this sequence, each
13496 offset must be larger by exactly 4 than the previous one. ORDER[0]
13497 must have been filled in with the lowest offset by the caller.
13498 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13499 we use to verify that ORDER produces an ascending order of registers.
13500 Return true if it was possible to construct such an order, false if
13501 not. */
13503 static bool
13504 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13505 int *unsorted_regs)
13507 int i;
13508 for (i = 1; i < nops; i++)
13510 int j;
13512 order[i] = order[i - 1];
13513 for (j = 0; j < nops; j++)
13514 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13516 /* We must find exactly one offset that is higher than the
13517 previous one by 4. */
13518 if (order[i] != order[i - 1])
13519 return false;
13520 order[i] = j;
13522 if (order[i] == order[i - 1])
13523 return false;
13524 /* The register numbers must be ascending. */
13525 if (unsorted_regs != NULL
13526 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13527 return false;
13529 return true;
13532 /* Used to determine in a peephole whether a sequence of load
13533 instructions can be changed into a load-multiple instruction.
13534 NOPS is the number of separate load instructions we are examining. The
13535 first NOPS entries in OPERANDS are the destination registers, the
13536 next NOPS entries are memory operands. If this function is
13537 successful, *BASE is set to the common base register of the memory
13538 accesses; *LOAD_OFFSET is set to the first memory location's offset
13539 from that base register.
13540 REGS is an array filled in with the destination register numbers.
13541 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13542 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13543 the sequence of registers in REGS matches the loads from ascending memory
13544 locations, and the function verifies that the register numbers are
13545 themselves ascending. If CHECK_REGS is false, the register numbers
13546 are stored in the order they are found in the operands. */
13547 static int
13548 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13549 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13551 int unsorted_regs[MAX_LDM_STM_OPS];
13552 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13553 int order[MAX_LDM_STM_OPS];
13554 rtx base_reg_rtx = NULL;
13555 int base_reg = -1;
13556 int i, ldm_case;
13558 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13559 easily extended if required. */
13560 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13562 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13564 /* Loop over the operands and check that the memory references are
13565 suitable (i.e. immediate offsets from the same base register). At
13566 the same time, extract the target register, and the memory
13567 offsets. */
13568 for (i = 0; i < nops; i++)
13570 rtx reg;
13571 rtx offset;
13573 /* Convert a subreg of a mem into the mem itself. */
13574 if (GET_CODE (operands[nops + i]) == SUBREG)
13575 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13577 gcc_assert (MEM_P (operands[nops + i]));
13579 /* Don't reorder volatile memory references; it doesn't seem worth
13580 looking for the case where the order is ok anyway. */
13581 if (MEM_VOLATILE_P (operands[nops + i]))
13582 return 0;
13584 offset = const0_rtx;
13586 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13587 || (GET_CODE (reg) == SUBREG
13588 && REG_P (reg = SUBREG_REG (reg))))
13589 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13590 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13591 || (GET_CODE (reg) == SUBREG
13592 && REG_P (reg = SUBREG_REG (reg))))
13593 && (CONST_INT_P (offset
13594 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13596 if (i == 0)
13598 base_reg = REGNO (reg);
13599 base_reg_rtx = reg;
13600 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13601 return 0;
13603 else if (base_reg != (int) REGNO (reg))
13604 /* Not addressed from the same base register. */
13605 return 0;
13607 unsorted_regs[i] = (REG_P (operands[i])
13608 ? REGNO (operands[i])
13609 : REGNO (SUBREG_REG (operands[i])));
13611 /* If it isn't an integer register, or if it overwrites the
13612 base register but isn't the last insn in the list, then
13613 we can't do this. */
13614 if (unsorted_regs[i] < 0
13615 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13616 || unsorted_regs[i] > 14
13617 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13618 return 0;
13620 /* Don't allow SP to be loaded unless it is also the base
13621 register. It guarantees that SP is reset correctly when
13622 an LDM instruction is interrupted. Otherwise, we might
13623 end up with a corrupt stack. */
13624 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13625 return 0;
13627 unsorted_offsets[i] = INTVAL (offset);
13628 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13629 order[0] = i;
13631 else
13632 /* Not a suitable memory address. */
13633 return 0;
13636 /* All the useful information has now been extracted from the
13637 operands into unsorted_regs and unsorted_offsets; additionally,
13638 order[0] has been set to the lowest offset in the list. Sort
13639 the offsets into order, verifying that they are adjacent, and
13640 check that the register numbers are ascending. */
13641 if (!compute_offset_order (nops, unsorted_offsets, order,
13642 check_regs ? unsorted_regs : NULL))
13643 return 0;
13645 if (saved_order)
13646 memcpy (saved_order, order, sizeof order);
13648 if (base)
13650 *base = base_reg;
13652 for (i = 0; i < nops; i++)
13653 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13655 *load_offset = unsorted_offsets[order[0]];
13658 if (TARGET_THUMB1
13659 && !peep2_reg_dead_p (nops, base_reg_rtx))
13660 return 0;
13662 if (unsorted_offsets[order[0]] == 0)
13663 ldm_case = 1; /* ldmia */
13664 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13665 ldm_case = 2; /* ldmib */
13666 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13667 ldm_case = 3; /* ldmda */
13668 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13669 ldm_case = 4; /* ldmdb */
13670 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13671 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13672 ldm_case = 5;
13673 else
13674 return 0;
13676 if (!multiple_operation_profitable_p (false, nops,
13677 ldm_case == 5
13678 ? unsorted_offsets[order[0]] : 0))
13679 return 0;
13681 return ldm_case;
13684 /* Used to determine in a peephole whether a sequence of store instructions can
13685 be changed into a store-multiple instruction.
13686 NOPS is the number of separate store instructions we are examining.
13687 NOPS_TOTAL is the total number of instructions recognized by the peephole
13688 pattern.
13689 The first NOPS entries in OPERANDS are the source registers, the next
13690 NOPS entries are memory operands. If this function is successful, *BASE is
13691 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13692 to the first memory location's offset from that base register. REGS is an
13693 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13694 likewise filled with the corresponding rtx's.
13695 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13696 numbers to an ascending order of stores.
13697 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13698 from ascending memory locations, and the function verifies that the register
13699 numbers are themselves ascending. If CHECK_REGS is false, the register
13700 numbers are stored in the order they are found in the operands. */
13701 static int
13702 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13703 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13704 HOST_WIDE_INT *load_offset, bool check_regs)
13706 int unsorted_regs[MAX_LDM_STM_OPS];
13707 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13708 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13709 int order[MAX_LDM_STM_OPS];
13710 int base_reg = -1;
13711 rtx base_reg_rtx = NULL;
13712 int i, stm_case;
13714 /* Write back of base register is currently only supported for Thumb 1. */
13715 int base_writeback = TARGET_THUMB1;
13717 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13718 easily extended if required. */
13719 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13721 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13723 /* Loop over the operands and check that the memory references are
13724 suitable (i.e. immediate offsets from the same base register). At
13725 the same time, extract the target register, and the memory
13726 offsets. */
13727 for (i = 0; i < nops; i++)
13729 rtx reg;
13730 rtx offset;
13732 /* Convert a subreg of a mem into the mem itself. */
13733 if (GET_CODE (operands[nops + i]) == SUBREG)
13734 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13736 gcc_assert (MEM_P (operands[nops + i]));
13738 /* Don't reorder volatile memory references; it doesn't seem worth
13739 looking for the case where the order is ok anyway. */
13740 if (MEM_VOLATILE_P (operands[nops + i]))
13741 return 0;
13743 offset = const0_rtx;
13745 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13746 || (GET_CODE (reg) == SUBREG
13747 && REG_P (reg = SUBREG_REG (reg))))
13748 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13749 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13750 || (GET_CODE (reg) == SUBREG
13751 && REG_P (reg = SUBREG_REG (reg))))
13752 && (CONST_INT_P (offset
13753 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13755 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13756 ? operands[i] : SUBREG_REG (operands[i]));
13757 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13759 if (i == 0)
13761 base_reg = REGNO (reg);
13762 base_reg_rtx = reg;
13763 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13764 return 0;
13766 else if (base_reg != (int) REGNO (reg))
13767 /* Not addressed from the same base register. */
13768 return 0;
13770 /* If it isn't an integer register, then we can't do this. */
13771 if (unsorted_regs[i] < 0
13772 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13773 /* The effects are unpredictable if the base register is
13774 both updated and stored. */
13775 || (base_writeback && unsorted_regs[i] == base_reg)
13776 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13777 || unsorted_regs[i] > 14)
13778 return 0;
13780 unsorted_offsets[i] = INTVAL (offset);
13781 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13782 order[0] = i;
13784 else
13785 /* Not a suitable memory address. */
13786 return 0;
13789 /* All the useful information has now been extracted from the
13790 operands into unsorted_regs and unsorted_offsets; additionally,
13791 order[0] has been set to the lowest offset in the list. Sort
13792 the offsets into order, verifying that they are adjacent, and
13793 check that the register numbers are ascending. */
13794 if (!compute_offset_order (nops, unsorted_offsets, order,
13795 check_regs ? unsorted_regs : NULL))
13796 return 0;
13798 if (saved_order)
13799 memcpy (saved_order, order, sizeof order);
13801 if (base)
13803 *base = base_reg;
13805 for (i = 0; i < nops; i++)
13807 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13808 if (reg_rtxs)
13809 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13812 *load_offset = unsorted_offsets[order[0]];
13815 if (TARGET_THUMB1
13816 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13817 return 0;
13819 if (unsorted_offsets[order[0]] == 0)
13820 stm_case = 1; /* stmia */
13821 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13822 stm_case = 2; /* stmib */
13823 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13824 stm_case = 3; /* stmda */
13825 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13826 stm_case = 4; /* stmdb */
13827 else
13828 return 0;
13830 if (!multiple_operation_profitable_p (false, nops, 0))
13831 return 0;
13833 return stm_case;
13836 /* Routines for use in generating RTL. */
13838 /* Generate a load-multiple instruction. COUNT is the number of loads in
13839 the instruction; REGS and MEMS are arrays containing the operands.
13840 BASEREG is the base register to be used in addressing the memory operands.
13841 WBACK_OFFSET is nonzero if the instruction should update the base
13842 register. */
13844 static rtx
13845 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13846 HOST_WIDE_INT wback_offset)
13848 int i = 0, j;
13849 rtx result;
13851 if (!multiple_operation_profitable_p (false, count, 0))
13853 rtx seq;
13855 start_sequence ();
13857 for (i = 0; i < count; i++)
13858 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13860 if (wback_offset != 0)
13861 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13863 seq = get_insns ();
13864 end_sequence ();
13866 return seq;
13869 result = gen_rtx_PARALLEL (VOIDmode,
13870 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13871 if (wback_offset != 0)
13873 XVECEXP (result, 0, 0)
13874 = gen_rtx_SET (VOIDmode, basereg,
13875 plus_constant (Pmode, basereg, wback_offset));
13876 i = 1;
13877 count++;
13880 for (j = 0; i < count; i++, j++)
13881 XVECEXP (result, 0, i)
13882 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13884 return result;
13887 /* Generate a store-multiple instruction. COUNT is the number of stores in
13888 the instruction; REGS and MEMS are arrays containing the operands.
13889 BASEREG is the base register to be used in addressing the memory operands.
13890 WBACK_OFFSET is nonzero if the instruction should update the base
13891 register. */
13893 static rtx
13894 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13895 HOST_WIDE_INT wback_offset)
13897 int i = 0, j;
13898 rtx result;
13900 if (GET_CODE (basereg) == PLUS)
13901 basereg = XEXP (basereg, 0);
13903 if (!multiple_operation_profitable_p (false, count, 0))
13905 rtx seq;
13907 start_sequence ();
13909 for (i = 0; i < count; i++)
13910 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13912 if (wback_offset != 0)
13913 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13915 seq = get_insns ();
13916 end_sequence ();
13918 return seq;
13921 result = gen_rtx_PARALLEL (VOIDmode,
13922 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13923 if (wback_offset != 0)
13925 XVECEXP (result, 0, 0)
13926 = gen_rtx_SET (VOIDmode, basereg,
13927 plus_constant (Pmode, basereg, wback_offset));
13928 i = 1;
13929 count++;
13932 for (j = 0; i < count; i++, j++)
13933 XVECEXP (result, 0, i)
13934 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13936 return result;
13939 /* Generate either a load-multiple or a store-multiple instruction. This
13940 function can be used in situations where we can start with a single MEM
13941 rtx and adjust its address upwards.
13942 COUNT is the number of operations in the instruction, not counting a
13943 possible update of the base register. REGS is an array containing the
13944 register operands.
13945 BASEREG is the base register to be used in addressing the memory operands,
13946 which are constructed from BASEMEM.
13947 WRITE_BACK specifies whether the generated instruction should include an
13948 update of the base register.
13949 OFFSETP is used to pass an offset to and from this function; this offset
13950 is not used when constructing the address (instead BASEMEM should have an
13951 appropriate offset in its address), it is used only for setting
13952 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13954 static rtx
13955 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13956 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13958 rtx mems[MAX_LDM_STM_OPS];
13959 HOST_WIDE_INT offset = *offsetp;
13960 int i;
13962 gcc_assert (count <= MAX_LDM_STM_OPS);
13964 if (GET_CODE (basereg) == PLUS)
13965 basereg = XEXP (basereg, 0);
13967 for (i = 0; i < count; i++)
13969 rtx addr = plus_constant (Pmode, basereg, i * 4);
13970 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13971 offset += 4;
13974 if (write_back)
13975 *offsetp = offset;
13977 if (is_load)
13978 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13979 write_back ? 4 * count : 0);
13980 else
13981 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13982 write_back ? 4 * count : 0);
13986 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13987 rtx basemem, HOST_WIDE_INT *offsetp)
13989 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13990 offsetp);
13994 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13995 rtx basemem, HOST_WIDE_INT *offsetp)
13997 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13998 offsetp);
14001 /* Called from a peephole2 expander to turn a sequence of loads into an
14002 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14003 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14004 is true if we can reorder the registers because they are used commutatively
14005 subsequently.
14006 Returns true iff we could generate a new instruction. */
14008 bool
14009 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14011 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14012 rtx mems[MAX_LDM_STM_OPS];
14013 int i, j, base_reg;
14014 rtx base_reg_rtx;
14015 HOST_WIDE_INT offset;
14016 int write_back = FALSE;
14017 int ldm_case;
14018 rtx addr;
14020 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14021 &base_reg, &offset, !sort_regs);
14023 if (ldm_case == 0)
14024 return false;
14026 if (sort_regs)
14027 for (i = 0; i < nops - 1; i++)
14028 for (j = i + 1; j < nops; j++)
14029 if (regs[i] > regs[j])
14031 int t = regs[i];
14032 regs[i] = regs[j];
14033 regs[j] = t;
14035 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14037 if (TARGET_THUMB1)
14039 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14040 gcc_assert (ldm_case == 1 || ldm_case == 5);
14041 write_back = TRUE;
14044 if (ldm_case == 5)
14046 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14047 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14048 offset = 0;
14049 if (!TARGET_THUMB1)
14051 base_reg = regs[0];
14052 base_reg_rtx = newbase;
14056 for (i = 0; i < nops; i++)
14058 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14059 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14060 SImode, addr, 0);
14062 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14063 write_back ? offset + i * 4 : 0));
14064 return true;
14067 /* Called from a peephole2 expander to turn a sequence of stores into an
14068 STM instruction. OPERANDS are the operands found by the peephole matcher;
14069 NOPS indicates how many separate stores we are trying to combine.
14070 Returns true iff we could generate a new instruction. */
14072 bool
14073 gen_stm_seq (rtx *operands, int nops)
14075 int i;
14076 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14077 rtx mems[MAX_LDM_STM_OPS];
14078 int base_reg;
14079 rtx base_reg_rtx;
14080 HOST_WIDE_INT offset;
14081 int write_back = FALSE;
14082 int stm_case;
14083 rtx addr;
14084 bool base_reg_dies;
14086 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14087 mem_order, &base_reg, &offset, true);
14089 if (stm_case == 0)
14090 return false;
14092 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14094 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14095 if (TARGET_THUMB1)
14097 gcc_assert (base_reg_dies);
14098 write_back = TRUE;
14101 if (stm_case == 5)
14103 gcc_assert (base_reg_dies);
14104 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14105 offset = 0;
14108 addr = plus_constant (Pmode, base_reg_rtx, offset);
14110 for (i = 0; i < nops; i++)
14112 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14113 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14114 SImode, addr, 0);
14116 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14117 write_back ? offset + i * 4 : 0));
14118 return true;
14121 /* Called from a peephole2 expander to turn a sequence of stores that are
14122 preceded by constant loads into an STM instruction. OPERANDS are the
14123 operands found by the peephole matcher; NOPS indicates how many
14124 separate stores we are trying to combine; there are 2 * NOPS
14125 instructions in the peephole.
14126 Returns true iff we could generate a new instruction. */
14128 bool
14129 gen_const_stm_seq (rtx *operands, int nops)
14131 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14132 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14133 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14134 rtx mems[MAX_LDM_STM_OPS];
14135 int base_reg;
14136 rtx base_reg_rtx;
14137 HOST_WIDE_INT offset;
14138 int write_back = FALSE;
14139 int stm_case;
14140 rtx addr;
14141 bool base_reg_dies;
14142 int i, j;
14143 HARD_REG_SET allocated;
14145 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14146 mem_order, &base_reg, &offset, false);
14148 if (stm_case == 0)
14149 return false;
14151 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14153 /* If the same register is used more than once, try to find a free
14154 register. */
14155 CLEAR_HARD_REG_SET (allocated);
14156 for (i = 0; i < nops; i++)
14158 for (j = i + 1; j < nops; j++)
14159 if (regs[i] == regs[j])
14161 rtx t = peep2_find_free_register (0, nops * 2,
14162 TARGET_THUMB1 ? "l" : "r",
14163 SImode, &allocated);
14164 if (t == NULL_RTX)
14165 return false;
14166 reg_rtxs[i] = t;
14167 regs[i] = REGNO (t);
14171 /* Compute an ordering that maps the register numbers to an ascending
14172 sequence. */
14173 reg_order[0] = 0;
14174 for (i = 0; i < nops; i++)
14175 if (regs[i] < regs[reg_order[0]])
14176 reg_order[0] = i;
14178 for (i = 1; i < nops; i++)
14180 int this_order = reg_order[i - 1];
14181 for (j = 0; j < nops; j++)
14182 if (regs[j] > regs[reg_order[i - 1]]
14183 && (this_order == reg_order[i - 1]
14184 || regs[j] < regs[this_order]))
14185 this_order = j;
14186 reg_order[i] = this_order;
14189 /* Ensure that registers that must be live after the instruction end
14190 up with the correct value. */
14191 for (i = 0; i < nops; i++)
14193 int this_order = reg_order[i];
14194 if ((this_order != mem_order[i]
14195 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14196 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14197 return false;
14200 /* Load the constants. */
14201 for (i = 0; i < nops; i++)
14203 rtx op = operands[2 * nops + mem_order[i]];
14204 sorted_regs[i] = regs[reg_order[i]];
14205 emit_move_insn (reg_rtxs[reg_order[i]], op);
14208 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14210 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14211 if (TARGET_THUMB1)
14213 gcc_assert (base_reg_dies);
14214 write_back = TRUE;
14217 if (stm_case == 5)
14219 gcc_assert (base_reg_dies);
14220 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14221 offset = 0;
14224 addr = plus_constant (Pmode, base_reg_rtx, offset);
14226 for (i = 0; i < nops; i++)
14228 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14229 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14230 SImode, addr, 0);
14232 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14233 write_back ? offset + i * 4 : 0));
14234 return true;
14237 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14238 unaligned copies on processors which support unaligned semantics for those
14239 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14240 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14241 An interleave factor of 1 (the minimum) will perform no interleaving.
14242 Load/store multiple are used for aligned addresses where possible. */
14244 static void
14245 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14246 HOST_WIDE_INT length,
14247 unsigned int interleave_factor)
14249 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14250 int *regnos = XALLOCAVEC (int, interleave_factor);
14251 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14252 HOST_WIDE_INT i, j;
14253 HOST_WIDE_INT remaining = length, words;
14254 rtx halfword_tmp = NULL, byte_tmp = NULL;
14255 rtx dst, src;
14256 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14257 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14258 HOST_WIDE_INT srcoffset, dstoffset;
14259 HOST_WIDE_INT src_autoinc, dst_autoinc;
14260 rtx mem, addr;
14262 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14264 /* Use hard registers if we have aligned source or destination so we can use
14265 load/store multiple with contiguous registers. */
14266 if (dst_aligned || src_aligned)
14267 for (i = 0; i < interleave_factor; i++)
14268 regs[i] = gen_rtx_REG (SImode, i);
14269 else
14270 for (i = 0; i < interleave_factor; i++)
14271 regs[i] = gen_reg_rtx (SImode);
14273 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14274 src = copy_addr_to_reg (XEXP (srcbase, 0));
14276 srcoffset = dstoffset = 0;
14278 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14279 For copying the last bytes we want to subtract this offset again. */
14280 src_autoinc = dst_autoinc = 0;
14282 for (i = 0; i < interleave_factor; i++)
14283 regnos[i] = i;
14285 /* Copy BLOCK_SIZE_BYTES chunks. */
14287 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14289 /* Load words. */
14290 if (src_aligned && interleave_factor > 1)
14292 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14293 TRUE, srcbase, &srcoffset));
14294 src_autoinc += UNITS_PER_WORD * interleave_factor;
14296 else
14298 for (j = 0; j < interleave_factor; j++)
14300 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14301 - src_autoinc));
14302 mem = adjust_automodify_address (srcbase, SImode, addr,
14303 srcoffset + j * UNITS_PER_WORD);
14304 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14306 srcoffset += block_size_bytes;
14309 /* Store words. */
14310 if (dst_aligned && interleave_factor > 1)
14312 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14313 TRUE, dstbase, &dstoffset));
14314 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14316 else
14318 for (j = 0; j < interleave_factor; j++)
14320 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14321 - dst_autoinc));
14322 mem = adjust_automodify_address (dstbase, SImode, addr,
14323 dstoffset + j * UNITS_PER_WORD);
14324 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14326 dstoffset += block_size_bytes;
14329 remaining -= block_size_bytes;
14332 /* Copy any whole words left (note these aren't interleaved with any
14333 subsequent halfword/byte load/stores in the interests of simplicity). */
14335 words = remaining / UNITS_PER_WORD;
14337 gcc_assert (words < interleave_factor);
14339 if (src_aligned && words > 1)
14341 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14342 &srcoffset));
14343 src_autoinc += UNITS_PER_WORD * words;
14345 else
14347 for (j = 0; j < words; j++)
14349 addr = plus_constant (Pmode, src,
14350 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14351 mem = adjust_automodify_address (srcbase, SImode, addr,
14352 srcoffset + j * UNITS_PER_WORD);
14353 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14355 srcoffset += words * UNITS_PER_WORD;
14358 if (dst_aligned && words > 1)
14360 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14361 &dstoffset));
14362 dst_autoinc += words * UNITS_PER_WORD;
14364 else
14366 for (j = 0; j < words; j++)
14368 addr = plus_constant (Pmode, dst,
14369 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14370 mem = adjust_automodify_address (dstbase, SImode, addr,
14371 dstoffset + j * UNITS_PER_WORD);
14372 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14374 dstoffset += words * UNITS_PER_WORD;
14377 remaining -= words * UNITS_PER_WORD;
14379 gcc_assert (remaining < 4);
14381 /* Copy a halfword if necessary. */
14383 if (remaining >= 2)
14385 halfword_tmp = gen_reg_rtx (SImode);
14387 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14388 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14389 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14391 /* Either write out immediately, or delay until we've loaded the last
14392 byte, depending on interleave factor. */
14393 if (interleave_factor == 1)
14395 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14396 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14397 emit_insn (gen_unaligned_storehi (mem,
14398 gen_lowpart (HImode, halfword_tmp)));
14399 halfword_tmp = NULL;
14400 dstoffset += 2;
14403 remaining -= 2;
14404 srcoffset += 2;
14407 gcc_assert (remaining < 2);
14409 /* Copy last byte. */
14411 if ((remaining & 1) != 0)
14413 byte_tmp = gen_reg_rtx (SImode);
14415 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14416 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14417 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14419 if (interleave_factor == 1)
14421 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14422 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14423 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14424 byte_tmp = NULL;
14425 dstoffset++;
14428 remaining--;
14429 srcoffset++;
14432 /* Store last halfword if we haven't done so already. */
14434 if (halfword_tmp)
14436 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14437 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14438 emit_insn (gen_unaligned_storehi (mem,
14439 gen_lowpart (HImode, halfword_tmp)));
14440 dstoffset += 2;
14443 /* Likewise for last byte. */
14445 if (byte_tmp)
14447 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14448 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14449 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14450 dstoffset++;
14453 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14456 /* From mips_adjust_block_mem:
14458 Helper function for doing a loop-based block operation on memory
14459 reference MEM. Each iteration of the loop will operate on LENGTH
14460 bytes of MEM.
14462 Create a new base register for use within the loop and point it to
14463 the start of MEM. Create a new memory reference that uses this
14464 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14466 static void
14467 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14468 rtx *loop_mem)
14470 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14472 /* Although the new mem does not refer to a known location,
14473 it does keep up to LENGTH bytes of alignment. */
14474 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14475 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14478 /* From mips_block_move_loop:
14480 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14481 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14482 the memory regions do not overlap. */
14484 static void
14485 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14486 unsigned int interleave_factor,
14487 HOST_WIDE_INT bytes_per_iter)
14489 rtx label, src_reg, dest_reg, final_src, test;
14490 HOST_WIDE_INT leftover;
14492 leftover = length % bytes_per_iter;
14493 length -= leftover;
14495 /* Create registers and memory references for use within the loop. */
14496 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14497 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14499 /* Calculate the value that SRC_REG should have after the last iteration of
14500 the loop. */
14501 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14502 0, 0, OPTAB_WIDEN);
14504 /* Emit the start of the loop. */
14505 label = gen_label_rtx ();
14506 emit_label (label);
14508 /* Emit the loop body. */
14509 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14510 interleave_factor);
14512 /* Move on to the next block. */
14513 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14514 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14516 /* Emit the loop condition. */
14517 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14518 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14520 /* Mop up any left-over bytes. */
14521 if (leftover)
14522 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14525 /* Emit a block move when either the source or destination is unaligned (not
14526 aligned to a four-byte boundary). This may need further tuning depending on
14527 core type, optimize_size setting, etc. */
14529 static int
14530 arm_movmemqi_unaligned (rtx *operands)
14532 HOST_WIDE_INT length = INTVAL (operands[2]);
14534 if (optimize_size)
14536 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14537 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14538 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14539 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14540 or dst_aligned though: allow more interleaving in those cases since the
14541 resulting code can be smaller. */
14542 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14543 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14545 if (length > 12)
14546 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14547 interleave_factor, bytes_per_iter);
14548 else
14549 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14550 interleave_factor);
14552 else
14554 /* Note that the loop created by arm_block_move_unaligned_loop may be
14555 subject to loop unrolling, which makes tuning this condition a little
14556 redundant. */
14557 if (length > 32)
14558 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14559 else
14560 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14563 return 1;
14567 arm_gen_movmemqi (rtx *operands)
14569 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14570 HOST_WIDE_INT srcoffset, dstoffset;
14571 int i;
14572 rtx src, dst, srcbase, dstbase;
14573 rtx part_bytes_reg = NULL;
14574 rtx mem;
14576 if (!CONST_INT_P (operands[2])
14577 || !CONST_INT_P (operands[3])
14578 || INTVAL (operands[2]) > 64)
14579 return 0;
14581 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14582 return arm_movmemqi_unaligned (operands);
14584 if (INTVAL (operands[3]) & 3)
14585 return 0;
14587 dstbase = operands[0];
14588 srcbase = operands[1];
14590 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14591 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14593 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14594 out_words_to_go = INTVAL (operands[2]) / 4;
14595 last_bytes = INTVAL (operands[2]) & 3;
14596 dstoffset = srcoffset = 0;
14598 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14599 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14601 for (i = 0; in_words_to_go >= 2; i+=4)
14603 if (in_words_to_go > 4)
14604 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14605 TRUE, srcbase, &srcoffset));
14606 else
14607 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14608 src, FALSE, srcbase,
14609 &srcoffset));
14611 if (out_words_to_go)
14613 if (out_words_to_go > 4)
14614 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14615 TRUE, dstbase, &dstoffset));
14616 else if (out_words_to_go != 1)
14617 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14618 out_words_to_go, dst,
14619 (last_bytes == 0
14620 ? FALSE : TRUE),
14621 dstbase, &dstoffset));
14622 else
14624 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14625 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14626 if (last_bytes != 0)
14628 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14629 dstoffset += 4;
14634 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14635 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14638 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14639 if (out_words_to_go)
14641 rtx sreg;
14643 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14644 sreg = copy_to_reg (mem);
14646 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14647 emit_move_insn (mem, sreg);
14648 in_words_to_go--;
14650 gcc_assert (!in_words_to_go); /* Sanity check */
14653 if (in_words_to_go)
14655 gcc_assert (in_words_to_go > 0);
14657 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14658 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14661 gcc_assert (!last_bytes || part_bytes_reg);
14663 if (BYTES_BIG_ENDIAN && last_bytes)
14665 rtx tmp = gen_reg_rtx (SImode);
14667 /* The bytes we want are in the top end of the word. */
14668 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14669 GEN_INT (8 * (4 - last_bytes))));
14670 part_bytes_reg = tmp;
14672 while (last_bytes)
14674 mem = adjust_automodify_address (dstbase, QImode,
14675 plus_constant (Pmode, dst,
14676 last_bytes - 1),
14677 dstoffset + last_bytes - 1);
14678 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14680 if (--last_bytes)
14682 tmp = gen_reg_rtx (SImode);
14683 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14684 part_bytes_reg = tmp;
14689 else
14691 if (last_bytes > 1)
14693 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14694 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14695 last_bytes -= 2;
14696 if (last_bytes)
14698 rtx tmp = gen_reg_rtx (SImode);
14699 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14700 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14701 part_bytes_reg = tmp;
14702 dstoffset += 2;
14706 if (last_bytes)
14708 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14709 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14713 return 1;
14716 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14717 by mode size. */
14718 inline static rtx
14719 next_consecutive_mem (rtx mem)
14721 enum machine_mode mode = GET_MODE (mem);
14722 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14723 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14725 return adjust_automodify_address (mem, mode, addr, offset);
14728 /* Copy using LDRD/STRD instructions whenever possible.
14729 Returns true upon success. */
14730 bool
14731 gen_movmem_ldrd_strd (rtx *operands)
14733 unsigned HOST_WIDE_INT len;
14734 HOST_WIDE_INT align;
14735 rtx src, dst, base;
14736 rtx reg0;
14737 bool src_aligned, dst_aligned;
14738 bool src_volatile, dst_volatile;
14740 gcc_assert (CONST_INT_P (operands[2]));
14741 gcc_assert (CONST_INT_P (operands[3]));
14743 len = UINTVAL (operands[2]);
14744 if (len > 64)
14745 return false;
14747 /* Maximum alignment we can assume for both src and dst buffers. */
14748 align = INTVAL (operands[3]);
14750 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14751 return false;
14753 /* Place src and dst addresses in registers
14754 and update the corresponding mem rtx. */
14755 dst = operands[0];
14756 dst_volatile = MEM_VOLATILE_P (dst);
14757 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14758 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14759 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14761 src = operands[1];
14762 src_volatile = MEM_VOLATILE_P (src);
14763 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14764 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14765 src = adjust_automodify_address (src, VOIDmode, base, 0);
14767 if (!unaligned_access && !(src_aligned && dst_aligned))
14768 return false;
14770 if (src_volatile || dst_volatile)
14771 return false;
14773 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14774 if (!(dst_aligned || src_aligned))
14775 return arm_gen_movmemqi (operands);
14777 src = adjust_address (src, DImode, 0);
14778 dst = adjust_address (dst, DImode, 0);
14779 while (len >= 8)
14781 len -= 8;
14782 reg0 = gen_reg_rtx (DImode);
14783 if (src_aligned)
14784 emit_move_insn (reg0, src);
14785 else
14786 emit_insn (gen_unaligned_loaddi (reg0, src));
14788 if (dst_aligned)
14789 emit_move_insn (dst, reg0);
14790 else
14791 emit_insn (gen_unaligned_storedi (dst, reg0));
14793 src = next_consecutive_mem (src);
14794 dst = next_consecutive_mem (dst);
14797 gcc_assert (len < 8);
14798 if (len >= 4)
14800 /* More than a word but less than a double-word to copy. Copy a word. */
14801 reg0 = gen_reg_rtx (SImode);
14802 src = adjust_address (src, SImode, 0);
14803 dst = adjust_address (dst, SImode, 0);
14804 if (src_aligned)
14805 emit_move_insn (reg0, src);
14806 else
14807 emit_insn (gen_unaligned_loadsi (reg0, src));
14809 if (dst_aligned)
14810 emit_move_insn (dst, reg0);
14811 else
14812 emit_insn (gen_unaligned_storesi (dst, reg0));
14814 src = next_consecutive_mem (src);
14815 dst = next_consecutive_mem (dst);
14816 len -= 4;
14819 if (len == 0)
14820 return true;
14822 /* Copy the remaining bytes. */
14823 if (len >= 2)
14825 dst = adjust_address (dst, HImode, 0);
14826 src = adjust_address (src, HImode, 0);
14827 reg0 = gen_reg_rtx (SImode);
14828 if (src_aligned)
14829 emit_insn (gen_zero_extendhisi2 (reg0, src));
14830 else
14831 emit_insn (gen_unaligned_loadhiu (reg0, src));
14833 if (dst_aligned)
14834 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14835 else
14836 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14838 src = next_consecutive_mem (src);
14839 dst = next_consecutive_mem (dst);
14840 if (len == 2)
14841 return true;
14844 dst = adjust_address (dst, QImode, 0);
14845 src = adjust_address (src, QImode, 0);
14846 reg0 = gen_reg_rtx (QImode);
14847 emit_move_insn (reg0, src);
14848 emit_move_insn (dst, reg0);
14849 return true;
14852 /* Select a dominance comparison mode if possible for a test of the general
14853 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14854 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14855 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14856 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14857 In all cases OP will be either EQ or NE, but we don't need to know which
14858 here. If we are unable to support a dominance comparison we return
14859 CC mode. This will then fail to match for the RTL expressions that
14860 generate this call. */
14861 enum machine_mode
14862 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14864 enum rtx_code cond1, cond2;
14865 int swapped = 0;
14867 /* Currently we will probably get the wrong result if the individual
14868 comparisons are not simple. This also ensures that it is safe to
14869 reverse a comparison if necessary. */
14870 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14871 != CCmode)
14872 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14873 != CCmode))
14874 return CCmode;
14876 /* The if_then_else variant of this tests the second condition if the
14877 first passes, but is true if the first fails. Reverse the first
14878 condition to get a true "inclusive-or" expression. */
14879 if (cond_or == DOM_CC_NX_OR_Y)
14880 cond1 = reverse_condition (cond1);
14882 /* If the comparisons are not equal, and one doesn't dominate the other,
14883 then we can't do this. */
14884 if (cond1 != cond2
14885 && !comparison_dominates_p (cond1, cond2)
14886 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14887 return CCmode;
14889 if (swapped)
14891 enum rtx_code temp = cond1;
14892 cond1 = cond2;
14893 cond2 = temp;
14896 switch (cond1)
14898 case EQ:
14899 if (cond_or == DOM_CC_X_AND_Y)
14900 return CC_DEQmode;
14902 switch (cond2)
14904 case EQ: return CC_DEQmode;
14905 case LE: return CC_DLEmode;
14906 case LEU: return CC_DLEUmode;
14907 case GE: return CC_DGEmode;
14908 case GEU: return CC_DGEUmode;
14909 default: gcc_unreachable ();
14912 case LT:
14913 if (cond_or == DOM_CC_X_AND_Y)
14914 return CC_DLTmode;
14916 switch (cond2)
14918 case LT:
14919 return CC_DLTmode;
14920 case LE:
14921 return CC_DLEmode;
14922 case NE:
14923 return CC_DNEmode;
14924 default:
14925 gcc_unreachable ();
14928 case GT:
14929 if (cond_or == DOM_CC_X_AND_Y)
14930 return CC_DGTmode;
14932 switch (cond2)
14934 case GT:
14935 return CC_DGTmode;
14936 case GE:
14937 return CC_DGEmode;
14938 case NE:
14939 return CC_DNEmode;
14940 default:
14941 gcc_unreachable ();
14944 case LTU:
14945 if (cond_or == DOM_CC_X_AND_Y)
14946 return CC_DLTUmode;
14948 switch (cond2)
14950 case LTU:
14951 return CC_DLTUmode;
14952 case LEU:
14953 return CC_DLEUmode;
14954 case NE:
14955 return CC_DNEmode;
14956 default:
14957 gcc_unreachable ();
14960 case GTU:
14961 if (cond_or == DOM_CC_X_AND_Y)
14962 return CC_DGTUmode;
14964 switch (cond2)
14966 case GTU:
14967 return CC_DGTUmode;
14968 case GEU:
14969 return CC_DGEUmode;
14970 case NE:
14971 return CC_DNEmode;
14972 default:
14973 gcc_unreachable ();
14976 /* The remaining cases only occur when both comparisons are the
14977 same. */
14978 case NE:
14979 gcc_assert (cond1 == cond2);
14980 return CC_DNEmode;
14982 case LE:
14983 gcc_assert (cond1 == cond2);
14984 return CC_DLEmode;
14986 case GE:
14987 gcc_assert (cond1 == cond2);
14988 return CC_DGEmode;
14990 case LEU:
14991 gcc_assert (cond1 == cond2);
14992 return CC_DLEUmode;
14994 case GEU:
14995 gcc_assert (cond1 == cond2);
14996 return CC_DGEUmode;
14998 default:
14999 gcc_unreachable ();
15003 enum machine_mode
15004 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15006 /* All floating point compares return CCFP if it is an equality
15007 comparison, and CCFPE otherwise. */
15008 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15010 switch (op)
15012 case EQ:
15013 case NE:
15014 case UNORDERED:
15015 case ORDERED:
15016 case UNLT:
15017 case UNLE:
15018 case UNGT:
15019 case UNGE:
15020 case UNEQ:
15021 case LTGT:
15022 return CCFPmode;
15024 case LT:
15025 case LE:
15026 case GT:
15027 case GE:
15028 return CCFPEmode;
15030 default:
15031 gcc_unreachable ();
15035 /* A compare with a shifted operand. Because of canonicalization, the
15036 comparison will have to be swapped when we emit the assembler. */
15037 if (GET_MODE (y) == SImode
15038 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15039 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15040 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15041 || GET_CODE (x) == ROTATERT))
15042 return CC_SWPmode;
15044 /* This operation is performed swapped, but since we only rely on the Z
15045 flag we don't need an additional mode. */
15046 if (GET_MODE (y) == SImode
15047 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15048 && GET_CODE (x) == NEG
15049 && (op == EQ || op == NE))
15050 return CC_Zmode;
15052 /* This is a special case that is used by combine to allow a
15053 comparison of a shifted byte load to be split into a zero-extend
15054 followed by a comparison of the shifted integer (only valid for
15055 equalities and unsigned inequalities). */
15056 if (GET_MODE (x) == SImode
15057 && GET_CODE (x) == ASHIFT
15058 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15059 && GET_CODE (XEXP (x, 0)) == SUBREG
15060 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15061 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15062 && (op == EQ || op == NE
15063 || op == GEU || op == GTU || op == LTU || op == LEU)
15064 && CONST_INT_P (y))
15065 return CC_Zmode;
15067 /* A construct for a conditional compare, if the false arm contains
15068 0, then both conditions must be true, otherwise either condition
15069 must be true. Not all conditions are possible, so CCmode is
15070 returned if it can't be done. */
15071 if (GET_CODE (x) == IF_THEN_ELSE
15072 && (XEXP (x, 2) == const0_rtx
15073 || XEXP (x, 2) == const1_rtx)
15074 && COMPARISON_P (XEXP (x, 0))
15075 && COMPARISON_P (XEXP (x, 1)))
15076 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15077 INTVAL (XEXP (x, 2)));
15079 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15080 if (GET_CODE (x) == AND
15081 && (op == EQ || op == NE)
15082 && COMPARISON_P (XEXP (x, 0))
15083 && COMPARISON_P (XEXP (x, 1)))
15084 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15085 DOM_CC_X_AND_Y);
15087 if (GET_CODE (x) == IOR
15088 && (op == EQ || op == NE)
15089 && COMPARISON_P (XEXP (x, 0))
15090 && COMPARISON_P (XEXP (x, 1)))
15091 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15092 DOM_CC_X_OR_Y);
15094 /* An operation (on Thumb) where we want to test for a single bit.
15095 This is done by shifting that bit up into the top bit of a
15096 scratch register; we can then branch on the sign bit. */
15097 if (TARGET_THUMB1
15098 && GET_MODE (x) == SImode
15099 && (op == EQ || op == NE)
15100 && GET_CODE (x) == ZERO_EXTRACT
15101 && XEXP (x, 1) == const1_rtx)
15102 return CC_Nmode;
15104 /* An operation that sets the condition codes as a side-effect, the
15105 V flag is not set correctly, so we can only use comparisons where
15106 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15107 instead.) */
15108 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15109 if (GET_MODE (x) == SImode
15110 && y == const0_rtx
15111 && (op == EQ || op == NE || op == LT || op == GE)
15112 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15113 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15114 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15115 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15116 || GET_CODE (x) == LSHIFTRT
15117 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15118 || GET_CODE (x) == ROTATERT
15119 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15120 return CC_NOOVmode;
15122 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15123 return CC_Zmode;
15125 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15126 && GET_CODE (x) == PLUS
15127 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15128 return CC_Cmode;
15130 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15132 switch (op)
15134 case EQ:
15135 case NE:
15136 /* A DImode comparison against zero can be implemented by
15137 or'ing the two halves together. */
15138 if (y == const0_rtx)
15139 return CC_Zmode;
15141 /* We can do an equality test in three Thumb instructions. */
15142 if (!TARGET_32BIT)
15143 return CC_Zmode;
15145 /* FALLTHROUGH */
15147 case LTU:
15148 case LEU:
15149 case GTU:
15150 case GEU:
15151 /* DImode unsigned comparisons can be implemented by cmp +
15152 cmpeq without a scratch register. Not worth doing in
15153 Thumb-2. */
15154 if (TARGET_32BIT)
15155 return CC_CZmode;
15157 /* FALLTHROUGH */
15159 case LT:
15160 case LE:
15161 case GT:
15162 case GE:
15163 /* DImode signed and unsigned comparisons can be implemented
15164 by cmp + sbcs with a scratch register, but that does not
15165 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15166 gcc_assert (op != EQ && op != NE);
15167 return CC_NCVmode;
15169 default:
15170 gcc_unreachable ();
15174 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15175 return GET_MODE (x);
15177 return CCmode;
15180 /* X and Y are two things to compare using CODE. Emit the compare insn and
15181 return the rtx for register 0 in the proper mode. FP means this is a
15182 floating point compare: I don't think that it is needed on the arm. */
15184 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15186 enum machine_mode mode;
15187 rtx cc_reg;
15188 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15190 /* We might have X as a constant, Y as a register because of the predicates
15191 used for cmpdi. If so, force X to a register here. */
15192 if (dimode_comparison && !REG_P (x))
15193 x = force_reg (DImode, x);
15195 mode = SELECT_CC_MODE (code, x, y);
15196 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15198 if (dimode_comparison
15199 && mode != CC_CZmode)
15201 rtx clobber, set;
15203 /* To compare two non-zero values for equality, XOR them and
15204 then compare against zero. Not used for ARM mode; there
15205 CC_CZmode is cheaper. */
15206 if (mode == CC_Zmode && y != const0_rtx)
15208 gcc_assert (!reload_completed);
15209 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15210 y = const0_rtx;
15213 /* A scratch register is required. */
15214 if (reload_completed)
15215 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15216 else
15217 scratch = gen_rtx_SCRATCH (SImode);
15219 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15220 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15221 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15223 else
15224 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15226 return cc_reg;
15229 /* Generate a sequence of insns that will generate the correct return
15230 address mask depending on the physical architecture that the program
15231 is running on. */
15233 arm_gen_return_addr_mask (void)
15235 rtx reg = gen_reg_rtx (Pmode);
15237 emit_insn (gen_return_addr_mask (reg));
15238 return reg;
15241 void
15242 arm_reload_in_hi (rtx *operands)
15244 rtx ref = operands[1];
15245 rtx base, scratch;
15246 HOST_WIDE_INT offset = 0;
15248 if (GET_CODE (ref) == SUBREG)
15250 offset = SUBREG_BYTE (ref);
15251 ref = SUBREG_REG (ref);
15254 if (REG_P (ref))
15256 /* We have a pseudo which has been spilt onto the stack; there
15257 are two cases here: the first where there is a simple
15258 stack-slot replacement and a second where the stack-slot is
15259 out of range, or is used as a subreg. */
15260 if (reg_equiv_mem (REGNO (ref)))
15262 ref = reg_equiv_mem (REGNO (ref));
15263 base = find_replacement (&XEXP (ref, 0));
15265 else
15266 /* The slot is out of range, or was dressed up in a SUBREG. */
15267 base = reg_equiv_address (REGNO (ref));
15269 else
15270 base = find_replacement (&XEXP (ref, 0));
15272 /* Handle the case where the address is too complex to be offset by 1. */
15273 if (GET_CODE (base) == MINUS
15274 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15276 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15278 emit_set_insn (base_plus, base);
15279 base = base_plus;
15281 else if (GET_CODE (base) == PLUS)
15283 /* The addend must be CONST_INT, or we would have dealt with it above. */
15284 HOST_WIDE_INT hi, lo;
15286 offset += INTVAL (XEXP (base, 1));
15287 base = XEXP (base, 0);
15289 /* Rework the address into a legal sequence of insns. */
15290 /* Valid range for lo is -4095 -> 4095 */
15291 lo = (offset >= 0
15292 ? (offset & 0xfff)
15293 : -((-offset) & 0xfff));
15295 /* Corner case, if lo is the max offset then we would be out of range
15296 once we have added the additional 1 below, so bump the msb into the
15297 pre-loading insn(s). */
15298 if (lo == 4095)
15299 lo &= 0x7ff;
15301 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15302 ^ (HOST_WIDE_INT) 0x80000000)
15303 - (HOST_WIDE_INT) 0x80000000);
15305 gcc_assert (hi + lo == offset);
15307 if (hi != 0)
15309 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15311 /* Get the base address; addsi3 knows how to handle constants
15312 that require more than one insn. */
15313 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15314 base = base_plus;
15315 offset = lo;
15319 /* Operands[2] may overlap operands[0] (though it won't overlap
15320 operands[1]), that's why we asked for a DImode reg -- so we can
15321 use the bit that does not overlap. */
15322 if (REGNO (operands[2]) == REGNO (operands[0]))
15323 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15324 else
15325 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15327 emit_insn (gen_zero_extendqisi2 (scratch,
15328 gen_rtx_MEM (QImode,
15329 plus_constant (Pmode, base,
15330 offset))));
15331 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15332 gen_rtx_MEM (QImode,
15333 plus_constant (Pmode, base,
15334 offset + 1))));
15335 if (!BYTES_BIG_ENDIAN)
15336 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15337 gen_rtx_IOR (SImode,
15338 gen_rtx_ASHIFT
15339 (SImode,
15340 gen_rtx_SUBREG (SImode, operands[0], 0),
15341 GEN_INT (8)),
15342 scratch));
15343 else
15344 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15345 gen_rtx_IOR (SImode,
15346 gen_rtx_ASHIFT (SImode, scratch,
15347 GEN_INT (8)),
15348 gen_rtx_SUBREG (SImode, operands[0], 0)));
15351 /* Handle storing a half-word to memory during reload by synthesizing as two
15352 byte stores. Take care not to clobber the input values until after we
15353 have moved them somewhere safe. This code assumes that if the DImode
15354 scratch in operands[2] overlaps either the input value or output address
15355 in some way, then that value must die in this insn (we absolutely need
15356 two scratch registers for some corner cases). */
15357 void
15358 arm_reload_out_hi (rtx *operands)
15360 rtx ref = operands[0];
15361 rtx outval = operands[1];
15362 rtx base, scratch;
15363 HOST_WIDE_INT offset = 0;
15365 if (GET_CODE (ref) == SUBREG)
15367 offset = SUBREG_BYTE (ref);
15368 ref = SUBREG_REG (ref);
15371 if (REG_P (ref))
15373 /* We have a pseudo which has been spilt onto the stack; there
15374 are two cases here: the first where there is a simple
15375 stack-slot replacement and a second where the stack-slot is
15376 out of range, or is used as a subreg. */
15377 if (reg_equiv_mem (REGNO (ref)))
15379 ref = reg_equiv_mem (REGNO (ref));
15380 base = find_replacement (&XEXP (ref, 0));
15382 else
15383 /* The slot is out of range, or was dressed up in a SUBREG. */
15384 base = reg_equiv_address (REGNO (ref));
15386 else
15387 base = find_replacement (&XEXP (ref, 0));
15389 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15391 /* Handle the case where the address is too complex to be offset by 1. */
15392 if (GET_CODE (base) == MINUS
15393 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15395 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15397 /* Be careful not to destroy OUTVAL. */
15398 if (reg_overlap_mentioned_p (base_plus, outval))
15400 /* Updating base_plus might destroy outval, see if we can
15401 swap the scratch and base_plus. */
15402 if (!reg_overlap_mentioned_p (scratch, outval))
15404 rtx tmp = scratch;
15405 scratch = base_plus;
15406 base_plus = tmp;
15408 else
15410 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15412 /* Be conservative and copy OUTVAL into the scratch now,
15413 this should only be necessary if outval is a subreg
15414 of something larger than a word. */
15415 /* XXX Might this clobber base? I can't see how it can,
15416 since scratch is known to overlap with OUTVAL, and
15417 must be wider than a word. */
15418 emit_insn (gen_movhi (scratch_hi, outval));
15419 outval = scratch_hi;
15423 emit_set_insn (base_plus, base);
15424 base = base_plus;
15426 else if (GET_CODE (base) == PLUS)
15428 /* The addend must be CONST_INT, or we would have dealt with it above. */
15429 HOST_WIDE_INT hi, lo;
15431 offset += INTVAL (XEXP (base, 1));
15432 base = XEXP (base, 0);
15434 /* Rework the address into a legal sequence of insns. */
15435 /* Valid range for lo is -4095 -> 4095 */
15436 lo = (offset >= 0
15437 ? (offset & 0xfff)
15438 : -((-offset) & 0xfff));
15440 /* Corner case, if lo is the max offset then we would be out of range
15441 once we have added the additional 1 below, so bump the msb into the
15442 pre-loading insn(s). */
15443 if (lo == 4095)
15444 lo &= 0x7ff;
15446 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15447 ^ (HOST_WIDE_INT) 0x80000000)
15448 - (HOST_WIDE_INT) 0x80000000);
15450 gcc_assert (hi + lo == offset);
15452 if (hi != 0)
15454 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15456 /* Be careful not to destroy OUTVAL. */
15457 if (reg_overlap_mentioned_p (base_plus, outval))
15459 /* Updating base_plus might destroy outval, see if we
15460 can swap the scratch and base_plus. */
15461 if (!reg_overlap_mentioned_p (scratch, outval))
15463 rtx tmp = scratch;
15464 scratch = base_plus;
15465 base_plus = tmp;
15467 else
15469 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15471 /* Be conservative and copy outval into scratch now,
15472 this should only be necessary if outval is a
15473 subreg of something larger than a word. */
15474 /* XXX Might this clobber base? I can't see how it
15475 can, since scratch is known to overlap with
15476 outval. */
15477 emit_insn (gen_movhi (scratch_hi, outval));
15478 outval = scratch_hi;
15482 /* Get the base address; addsi3 knows how to handle constants
15483 that require more than one insn. */
15484 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15485 base = base_plus;
15486 offset = lo;
15490 if (BYTES_BIG_ENDIAN)
15492 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15493 plus_constant (Pmode, base,
15494 offset + 1)),
15495 gen_lowpart (QImode, outval)));
15496 emit_insn (gen_lshrsi3 (scratch,
15497 gen_rtx_SUBREG (SImode, outval, 0),
15498 GEN_INT (8)));
15499 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15500 offset)),
15501 gen_lowpart (QImode, scratch)));
15503 else
15505 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15506 offset)),
15507 gen_lowpart (QImode, outval)));
15508 emit_insn (gen_lshrsi3 (scratch,
15509 gen_rtx_SUBREG (SImode, outval, 0),
15510 GEN_INT (8)));
15511 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15512 plus_constant (Pmode, base,
15513 offset + 1)),
15514 gen_lowpart (QImode, scratch)));
15518 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15519 (padded to the size of a word) should be passed in a register. */
15521 static bool
15522 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15524 if (TARGET_AAPCS_BASED)
15525 return must_pass_in_stack_var_size (mode, type);
15526 else
15527 return must_pass_in_stack_var_size_or_pad (mode, type);
15531 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15532 Return true if an argument passed on the stack should be padded upwards,
15533 i.e. if the least-significant byte has useful data.
15534 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15535 aggregate types are placed in the lowest memory address. */
15537 bool
15538 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15540 if (!TARGET_AAPCS_BASED)
15541 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15543 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15544 return false;
15546 return true;
15550 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15551 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15552 register has useful data, and return the opposite if the most
15553 significant byte does. */
15555 bool
15556 arm_pad_reg_upward (enum machine_mode mode,
15557 tree type, int first ATTRIBUTE_UNUSED)
15559 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15561 /* For AAPCS, small aggregates, small fixed-point types,
15562 and small complex types are always padded upwards. */
15563 if (type)
15565 if ((AGGREGATE_TYPE_P (type)
15566 || TREE_CODE (type) == COMPLEX_TYPE
15567 || FIXED_POINT_TYPE_P (type))
15568 && int_size_in_bytes (type) <= 4)
15569 return true;
15571 else
15573 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15574 && GET_MODE_SIZE (mode) <= 4)
15575 return true;
15579 /* Otherwise, use default padding. */
15580 return !BYTES_BIG_ENDIAN;
15583 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15584 assuming that the address in the base register is word aligned. */
15585 bool
15586 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15588 HOST_WIDE_INT max_offset;
15590 /* Offset must be a multiple of 4 in Thumb mode. */
15591 if (TARGET_THUMB2 && ((offset & 3) != 0))
15592 return false;
15594 if (TARGET_THUMB2)
15595 max_offset = 1020;
15596 else if (TARGET_ARM)
15597 max_offset = 255;
15598 else
15599 return false;
15601 return ((offset <= max_offset) && (offset >= -max_offset));
15604 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15605 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15606 Assumes that the address in the base register RN is word aligned. Pattern
15607 guarantees that both memory accesses use the same base register,
15608 the offsets are constants within the range, and the gap between the offsets is 4.
15609 If preload complete then check that registers are legal. WBACK indicates whether
15610 address is updated. LOAD indicates whether memory access is load or store. */
15611 bool
15612 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15613 bool wback, bool load)
15615 unsigned int t, t2, n;
15617 if (!reload_completed)
15618 return true;
15620 if (!offset_ok_for_ldrd_strd (offset))
15621 return false;
15623 t = REGNO (rt);
15624 t2 = REGNO (rt2);
15625 n = REGNO (rn);
15627 if ((TARGET_THUMB2)
15628 && ((wback && (n == t || n == t2))
15629 || (t == SP_REGNUM)
15630 || (t == PC_REGNUM)
15631 || (t2 == SP_REGNUM)
15632 || (t2 == PC_REGNUM)
15633 || (!load && (n == PC_REGNUM))
15634 || (load && (t == t2))
15635 /* Triggers Cortex-M3 LDRD errata. */
15636 || (!wback && load && fix_cm3_ldrd && (n == t))))
15637 return false;
15639 if ((TARGET_ARM)
15640 && ((wback && (n == t || n == t2))
15641 || (t2 == PC_REGNUM)
15642 || (t % 2 != 0) /* First destination register is not even. */
15643 || (t2 != t + 1)
15644 /* PC can be used as base register (for offset addressing only),
15645 but it is depricated. */
15646 || (n == PC_REGNUM)))
15647 return false;
15649 return true;
15652 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15653 operand MEM's address contains an immediate offset from the base
15654 register and has no side effects, in which case it sets BASE and
15655 OFFSET accordingly. */
15656 static bool
15657 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15659 rtx addr;
15661 gcc_assert (base != NULL && offset != NULL);
15663 /* TODO: Handle more general memory operand patterns, such as
15664 PRE_DEC and PRE_INC. */
15666 if (side_effects_p (mem))
15667 return false;
15669 /* Can't deal with subregs. */
15670 if (GET_CODE (mem) == SUBREG)
15671 return false;
15673 gcc_assert (MEM_P (mem));
15675 *offset = const0_rtx;
15677 addr = XEXP (mem, 0);
15679 /* If addr isn't valid for DImode, then we can't handle it. */
15680 if (!arm_legitimate_address_p (DImode, addr,
15681 reload_in_progress || reload_completed))
15682 return false;
15684 if (REG_P (addr))
15686 *base = addr;
15687 return true;
15689 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15691 *base = XEXP (addr, 0);
15692 *offset = XEXP (addr, 1);
15693 return (REG_P (*base) && CONST_INT_P (*offset));
15696 return false;
15699 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15701 /* Called from a peephole2 to replace two word-size accesses with a
15702 single LDRD/STRD instruction. Returns true iff we can generate a
15703 new instruction sequence. That is, both accesses use the same base
15704 register and the gap between constant offsets is 4. This function
15705 may reorder its operands to match ldrd/strd RTL templates.
15706 OPERANDS are the operands found by the peephole matcher;
15707 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15708 corresponding memory operands. LOAD indicaates whether the access
15709 is load or store. CONST_STORE indicates a store of constant
15710 integer values held in OPERANDS[4,5] and assumes that the pattern
15711 is of length 4 insn, for the purpose of checking dead registers.
15712 COMMUTE indicates that register operands may be reordered. */
15713 bool
15714 gen_operands_ldrd_strd (rtx *operands, bool load,
15715 bool const_store, bool commute)
15717 int nops = 2;
15718 HOST_WIDE_INT offsets[2], offset;
15719 rtx base = NULL_RTX;
15720 rtx cur_base, cur_offset, tmp;
15721 int i, gap;
15722 HARD_REG_SET regset;
15724 gcc_assert (!const_store || !load);
15725 /* Check that the memory references are immediate offsets from the
15726 same base register. Extract the base register, the destination
15727 registers, and the corresponding memory offsets. */
15728 for (i = 0; i < nops; i++)
15730 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15731 return false;
15733 if (i == 0)
15734 base = cur_base;
15735 else if (REGNO (base) != REGNO (cur_base))
15736 return false;
15738 offsets[i] = INTVAL (cur_offset);
15739 if (GET_CODE (operands[i]) == SUBREG)
15741 tmp = SUBREG_REG (operands[i]);
15742 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15743 operands[i] = tmp;
15747 /* Make sure there is no dependency between the individual loads. */
15748 if (load && REGNO (operands[0]) == REGNO (base))
15749 return false; /* RAW */
15751 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15752 return false; /* WAW */
15754 /* If the same input register is used in both stores
15755 when storing different constants, try to find a free register.
15756 For example, the code
15757 mov r0, 0
15758 str r0, [r2]
15759 mov r0, 1
15760 str r0, [r2, #4]
15761 can be transformed into
15762 mov r1, 0
15763 strd r1, r0, [r2]
15764 in Thumb mode assuming that r1 is free. */
15765 if (const_store
15766 && REGNO (operands[0]) == REGNO (operands[1])
15767 && INTVAL (operands[4]) != INTVAL (operands[5]))
15769 if (TARGET_THUMB2)
15771 CLEAR_HARD_REG_SET (regset);
15772 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15773 if (tmp == NULL_RTX)
15774 return false;
15776 /* Use the new register in the first load to ensure that
15777 if the original input register is not dead after peephole,
15778 then it will have the correct constant value. */
15779 operands[0] = tmp;
15781 else if (TARGET_ARM)
15783 return false;
15784 int regno = REGNO (operands[0]);
15785 if (!peep2_reg_dead_p (4, operands[0]))
15787 /* When the input register is even and is not dead after the
15788 pattern, it has to hold the second constant but we cannot
15789 form a legal STRD in ARM mode with this register as the second
15790 register. */
15791 if (regno % 2 == 0)
15792 return false;
15794 /* Is regno-1 free? */
15795 SET_HARD_REG_SET (regset);
15796 CLEAR_HARD_REG_BIT(regset, regno - 1);
15797 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15798 if (tmp == NULL_RTX)
15799 return false;
15801 operands[0] = tmp;
15803 else
15805 /* Find a DImode register. */
15806 CLEAR_HARD_REG_SET (regset);
15807 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15808 if (tmp != NULL_RTX)
15810 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15811 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15813 else
15815 /* Can we use the input register to form a DI register? */
15816 SET_HARD_REG_SET (regset);
15817 CLEAR_HARD_REG_BIT(regset,
15818 regno % 2 == 0 ? regno + 1 : regno - 1);
15819 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15820 if (tmp == NULL_RTX)
15821 return false;
15822 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15826 gcc_assert (operands[0] != NULL_RTX);
15827 gcc_assert (operands[1] != NULL_RTX);
15828 gcc_assert (REGNO (operands[0]) % 2 == 0);
15829 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15833 /* Make sure the instructions are ordered with lower memory access first. */
15834 if (offsets[0] > offsets[1])
15836 gap = offsets[0] - offsets[1];
15837 offset = offsets[1];
15839 /* Swap the instructions such that lower memory is accessed first. */
15840 SWAP_RTX (operands[0], operands[1]);
15841 SWAP_RTX (operands[2], operands[3]);
15842 if (const_store)
15843 SWAP_RTX (operands[4], operands[5]);
15845 else
15847 gap = offsets[1] - offsets[0];
15848 offset = offsets[0];
15851 /* Make sure accesses are to consecutive memory locations. */
15852 if (gap != 4)
15853 return false;
15855 /* Make sure we generate legal instructions. */
15856 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15857 false, load))
15858 return true;
15860 /* In Thumb state, where registers are almost unconstrained, there
15861 is little hope to fix it. */
15862 if (TARGET_THUMB2)
15863 return false;
15865 if (load && commute)
15867 /* Try reordering registers. */
15868 SWAP_RTX (operands[0], operands[1]);
15869 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15870 false, load))
15871 return true;
15874 if (const_store)
15876 /* If input registers are dead after this pattern, they can be
15877 reordered or replaced by other registers that are free in the
15878 current pattern. */
15879 if (!peep2_reg_dead_p (4, operands[0])
15880 || !peep2_reg_dead_p (4, operands[1]))
15881 return false;
15883 /* Try to reorder the input registers. */
15884 /* For example, the code
15885 mov r0, 0
15886 mov r1, 1
15887 str r1, [r2]
15888 str r0, [r2, #4]
15889 can be transformed into
15890 mov r1, 0
15891 mov r0, 1
15892 strd r0, [r2]
15894 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15895 false, false))
15897 SWAP_RTX (operands[0], operands[1]);
15898 return true;
15901 /* Try to find a free DI register. */
15902 CLEAR_HARD_REG_SET (regset);
15903 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15904 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15905 while (true)
15907 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15908 if (tmp == NULL_RTX)
15909 return false;
15911 /* DREG must be an even-numbered register in DImode.
15912 Split it into SI registers. */
15913 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15914 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15915 gcc_assert (operands[0] != NULL_RTX);
15916 gcc_assert (operands[1] != NULL_RTX);
15917 gcc_assert (REGNO (operands[0]) % 2 == 0);
15918 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15920 return (operands_ok_ldrd_strd (operands[0], operands[1],
15921 base, offset,
15922 false, load));
15926 return false;
15928 #undef SWAP_RTX
15933 /* Print a symbolic form of X to the debug file, F. */
15934 static void
15935 arm_print_value (FILE *f, rtx x)
15937 switch (GET_CODE (x))
15939 case CONST_INT:
15940 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15941 return;
15943 case CONST_DOUBLE:
15944 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15945 return;
15947 case CONST_VECTOR:
15949 int i;
15951 fprintf (f, "<");
15952 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15954 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15955 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15956 fputc (',', f);
15958 fprintf (f, ">");
15960 return;
15962 case CONST_STRING:
15963 fprintf (f, "\"%s\"", XSTR (x, 0));
15964 return;
15966 case SYMBOL_REF:
15967 fprintf (f, "`%s'", XSTR (x, 0));
15968 return;
15970 case LABEL_REF:
15971 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15972 return;
15974 case CONST:
15975 arm_print_value (f, XEXP (x, 0));
15976 return;
15978 case PLUS:
15979 arm_print_value (f, XEXP (x, 0));
15980 fprintf (f, "+");
15981 arm_print_value (f, XEXP (x, 1));
15982 return;
15984 case PC:
15985 fprintf (f, "pc");
15986 return;
15988 default:
15989 fprintf (f, "????");
15990 return;
15994 /* Routines for manipulation of the constant pool. */
15996 /* Arm instructions cannot load a large constant directly into a
15997 register; they have to come from a pc relative load. The constant
15998 must therefore be placed in the addressable range of the pc
15999 relative load. Depending on the precise pc relative load
16000 instruction the range is somewhere between 256 bytes and 4k. This
16001 means that we often have to dump a constant inside a function, and
16002 generate code to branch around it.
16004 It is important to minimize this, since the branches will slow
16005 things down and make the code larger.
16007 Normally we can hide the table after an existing unconditional
16008 branch so that there is no interruption of the flow, but in the
16009 worst case the code looks like this:
16011 ldr rn, L1
16013 b L2
16014 align
16015 L1: .long value
16019 ldr rn, L3
16021 b L4
16022 align
16023 L3: .long value
16027 We fix this by performing a scan after scheduling, which notices
16028 which instructions need to have their operands fetched from the
16029 constant table and builds the table.
16031 The algorithm starts by building a table of all the constants that
16032 need fixing up and all the natural barriers in the function (places
16033 where a constant table can be dropped without breaking the flow).
16034 For each fixup we note how far the pc-relative replacement will be
16035 able to reach and the offset of the instruction into the function.
16037 Having built the table we then group the fixes together to form
16038 tables that are as large as possible (subject to addressing
16039 constraints) and emit each table of constants after the last
16040 barrier that is within range of all the instructions in the group.
16041 If a group does not contain a barrier, then we forcibly create one
16042 by inserting a jump instruction into the flow. Once the table has
16043 been inserted, the insns are then modified to reference the
16044 relevant entry in the pool.
16046 Possible enhancements to the algorithm (not implemented) are:
16048 1) For some processors and object formats, there may be benefit in
16049 aligning the pools to the start of cache lines; this alignment
16050 would need to be taken into account when calculating addressability
16051 of a pool. */
16053 /* These typedefs are located at the start of this file, so that
16054 they can be used in the prototypes there. This comment is to
16055 remind readers of that fact so that the following structures
16056 can be understood more easily.
16058 typedef struct minipool_node Mnode;
16059 typedef struct minipool_fixup Mfix; */
16061 struct minipool_node
16063 /* Doubly linked chain of entries. */
16064 Mnode * next;
16065 Mnode * prev;
16066 /* The maximum offset into the code that this entry can be placed. While
16067 pushing fixes for forward references, all entries are sorted in order
16068 of increasing max_address. */
16069 HOST_WIDE_INT max_address;
16070 /* Similarly for an entry inserted for a backwards ref. */
16071 HOST_WIDE_INT min_address;
16072 /* The number of fixes referencing this entry. This can become zero
16073 if we "unpush" an entry. In this case we ignore the entry when we
16074 come to emit the code. */
16075 int refcount;
16076 /* The offset from the start of the minipool. */
16077 HOST_WIDE_INT offset;
16078 /* The value in table. */
16079 rtx value;
16080 /* The mode of value. */
16081 enum machine_mode mode;
16082 /* The size of the value. With iWMMXt enabled
16083 sizes > 4 also imply an alignment of 8-bytes. */
16084 int fix_size;
16087 struct minipool_fixup
16089 Mfix * next;
16090 rtx insn;
16091 HOST_WIDE_INT address;
16092 rtx * loc;
16093 enum machine_mode mode;
16094 int fix_size;
16095 rtx value;
16096 Mnode * minipool;
16097 HOST_WIDE_INT forwards;
16098 HOST_WIDE_INT backwards;
16101 /* Fixes less than a word need padding out to a word boundary. */
16102 #define MINIPOOL_FIX_SIZE(mode) \
16103 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16105 static Mnode * minipool_vector_head;
16106 static Mnode * minipool_vector_tail;
16107 static rtx minipool_vector_label;
16108 static int minipool_pad;
16110 /* The linked list of all minipool fixes required for this function. */
16111 Mfix * minipool_fix_head;
16112 Mfix * minipool_fix_tail;
16113 /* The fix entry for the current minipool, once it has been placed. */
16114 Mfix * minipool_barrier;
16116 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16117 #define JUMP_TABLES_IN_TEXT_SECTION 0
16118 #endif
16120 static HOST_WIDE_INT
16121 get_jump_table_size (rtx insn)
16123 /* ADDR_VECs only take room if read-only data does into the text
16124 section. */
16125 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16127 rtx body = PATTERN (insn);
16128 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16129 HOST_WIDE_INT size;
16130 HOST_WIDE_INT modesize;
16132 modesize = GET_MODE_SIZE (GET_MODE (body));
16133 size = modesize * XVECLEN (body, elt);
16134 switch (modesize)
16136 case 1:
16137 /* Round up size of TBB table to a halfword boundary. */
16138 size = (size + 1) & ~(HOST_WIDE_INT)1;
16139 break;
16140 case 2:
16141 /* No padding necessary for TBH. */
16142 break;
16143 case 4:
16144 /* Add two bytes for alignment on Thumb. */
16145 if (TARGET_THUMB)
16146 size += 2;
16147 break;
16148 default:
16149 gcc_unreachable ();
16151 return size;
16154 return 0;
16157 /* Return the maximum amount of padding that will be inserted before
16158 label LABEL. */
16160 static HOST_WIDE_INT
16161 get_label_padding (rtx label)
16163 HOST_WIDE_INT align, min_insn_size;
16165 align = 1 << label_to_alignment (label);
16166 min_insn_size = TARGET_THUMB ? 2 : 4;
16167 return align > min_insn_size ? align - min_insn_size : 0;
16170 /* Move a minipool fix MP from its current location to before MAX_MP.
16171 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16172 constraints may need updating. */
16173 static Mnode *
16174 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16175 HOST_WIDE_INT max_address)
16177 /* The code below assumes these are different. */
16178 gcc_assert (mp != max_mp);
16180 if (max_mp == NULL)
16182 if (max_address < mp->max_address)
16183 mp->max_address = max_address;
16185 else
16187 if (max_address > max_mp->max_address - mp->fix_size)
16188 mp->max_address = max_mp->max_address - mp->fix_size;
16189 else
16190 mp->max_address = max_address;
16192 /* Unlink MP from its current position. Since max_mp is non-null,
16193 mp->prev must be non-null. */
16194 mp->prev->next = mp->next;
16195 if (mp->next != NULL)
16196 mp->next->prev = mp->prev;
16197 else
16198 minipool_vector_tail = mp->prev;
16200 /* Re-insert it before MAX_MP. */
16201 mp->next = max_mp;
16202 mp->prev = max_mp->prev;
16203 max_mp->prev = mp;
16205 if (mp->prev != NULL)
16206 mp->prev->next = mp;
16207 else
16208 minipool_vector_head = mp;
16211 /* Save the new entry. */
16212 max_mp = mp;
16214 /* Scan over the preceding entries and adjust their addresses as
16215 required. */
16216 while (mp->prev != NULL
16217 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16219 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16220 mp = mp->prev;
16223 return max_mp;
16226 /* Add a constant to the minipool for a forward reference. Returns the
16227 node added or NULL if the constant will not fit in this pool. */
16228 static Mnode *
16229 add_minipool_forward_ref (Mfix *fix)
16231 /* If set, max_mp is the first pool_entry that has a lower
16232 constraint than the one we are trying to add. */
16233 Mnode * max_mp = NULL;
16234 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16235 Mnode * mp;
16237 /* If the minipool starts before the end of FIX->INSN then this FIX
16238 can not be placed into the current pool. Furthermore, adding the
16239 new constant pool entry may cause the pool to start FIX_SIZE bytes
16240 earlier. */
16241 if (minipool_vector_head &&
16242 (fix->address + get_attr_length (fix->insn)
16243 >= minipool_vector_head->max_address - fix->fix_size))
16244 return NULL;
16246 /* Scan the pool to see if a constant with the same value has
16247 already been added. While we are doing this, also note the
16248 location where we must insert the constant if it doesn't already
16249 exist. */
16250 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16252 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16253 && fix->mode == mp->mode
16254 && (!LABEL_P (fix->value)
16255 || (CODE_LABEL_NUMBER (fix->value)
16256 == CODE_LABEL_NUMBER (mp->value)))
16257 && rtx_equal_p (fix->value, mp->value))
16259 /* More than one fix references this entry. */
16260 mp->refcount++;
16261 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16264 /* Note the insertion point if necessary. */
16265 if (max_mp == NULL
16266 && mp->max_address > max_address)
16267 max_mp = mp;
16269 /* If we are inserting an 8-bytes aligned quantity and
16270 we have not already found an insertion point, then
16271 make sure that all such 8-byte aligned quantities are
16272 placed at the start of the pool. */
16273 if (ARM_DOUBLEWORD_ALIGN
16274 && max_mp == NULL
16275 && fix->fix_size >= 8
16276 && mp->fix_size < 8)
16278 max_mp = mp;
16279 max_address = mp->max_address;
16283 /* The value is not currently in the minipool, so we need to create
16284 a new entry for it. If MAX_MP is NULL, the entry will be put on
16285 the end of the list since the placement is less constrained than
16286 any existing entry. Otherwise, we insert the new fix before
16287 MAX_MP and, if necessary, adjust the constraints on the other
16288 entries. */
16289 mp = XNEW (Mnode);
16290 mp->fix_size = fix->fix_size;
16291 mp->mode = fix->mode;
16292 mp->value = fix->value;
16293 mp->refcount = 1;
16294 /* Not yet required for a backwards ref. */
16295 mp->min_address = -65536;
16297 if (max_mp == NULL)
16299 mp->max_address = max_address;
16300 mp->next = NULL;
16301 mp->prev = minipool_vector_tail;
16303 if (mp->prev == NULL)
16305 minipool_vector_head = mp;
16306 minipool_vector_label = gen_label_rtx ();
16308 else
16309 mp->prev->next = mp;
16311 minipool_vector_tail = mp;
16313 else
16315 if (max_address > max_mp->max_address - mp->fix_size)
16316 mp->max_address = max_mp->max_address - mp->fix_size;
16317 else
16318 mp->max_address = max_address;
16320 mp->next = max_mp;
16321 mp->prev = max_mp->prev;
16322 max_mp->prev = mp;
16323 if (mp->prev != NULL)
16324 mp->prev->next = mp;
16325 else
16326 minipool_vector_head = mp;
16329 /* Save the new entry. */
16330 max_mp = mp;
16332 /* Scan over the preceding entries and adjust their addresses as
16333 required. */
16334 while (mp->prev != NULL
16335 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16337 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16338 mp = mp->prev;
16341 return max_mp;
16344 static Mnode *
16345 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16346 HOST_WIDE_INT min_address)
16348 HOST_WIDE_INT offset;
16350 /* The code below assumes these are different. */
16351 gcc_assert (mp != min_mp);
16353 if (min_mp == NULL)
16355 if (min_address > mp->min_address)
16356 mp->min_address = min_address;
16358 else
16360 /* We will adjust this below if it is too loose. */
16361 mp->min_address = min_address;
16363 /* Unlink MP from its current position. Since min_mp is non-null,
16364 mp->next must be non-null. */
16365 mp->next->prev = mp->prev;
16366 if (mp->prev != NULL)
16367 mp->prev->next = mp->next;
16368 else
16369 minipool_vector_head = mp->next;
16371 /* Reinsert it after MIN_MP. */
16372 mp->prev = min_mp;
16373 mp->next = min_mp->next;
16374 min_mp->next = mp;
16375 if (mp->next != NULL)
16376 mp->next->prev = mp;
16377 else
16378 minipool_vector_tail = mp;
16381 min_mp = mp;
16383 offset = 0;
16384 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16386 mp->offset = offset;
16387 if (mp->refcount > 0)
16388 offset += mp->fix_size;
16390 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16391 mp->next->min_address = mp->min_address + mp->fix_size;
16394 return min_mp;
16397 /* Add a constant to the minipool for a backward reference. Returns the
16398 node added or NULL if the constant will not fit in this pool.
16400 Note that the code for insertion for a backwards reference can be
16401 somewhat confusing because the calculated offsets for each fix do
16402 not take into account the size of the pool (which is still under
16403 construction. */
16404 static Mnode *
16405 add_minipool_backward_ref (Mfix *fix)
16407 /* If set, min_mp is the last pool_entry that has a lower constraint
16408 than the one we are trying to add. */
16409 Mnode *min_mp = NULL;
16410 /* This can be negative, since it is only a constraint. */
16411 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16412 Mnode *mp;
16414 /* If we can't reach the current pool from this insn, or if we can't
16415 insert this entry at the end of the pool without pushing other
16416 fixes out of range, then we don't try. This ensures that we
16417 can't fail later on. */
16418 if (min_address >= minipool_barrier->address
16419 || (minipool_vector_tail->min_address + fix->fix_size
16420 >= minipool_barrier->address))
16421 return NULL;
16423 /* Scan the pool to see if a constant with the same value has
16424 already been added. While we are doing this, also note the
16425 location where we must insert the constant if it doesn't already
16426 exist. */
16427 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16429 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16430 && fix->mode == mp->mode
16431 && (!LABEL_P (fix->value)
16432 || (CODE_LABEL_NUMBER (fix->value)
16433 == CODE_LABEL_NUMBER (mp->value)))
16434 && rtx_equal_p (fix->value, mp->value)
16435 /* Check that there is enough slack to move this entry to the
16436 end of the table (this is conservative). */
16437 && (mp->max_address
16438 > (minipool_barrier->address
16439 + minipool_vector_tail->offset
16440 + minipool_vector_tail->fix_size)))
16442 mp->refcount++;
16443 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16446 if (min_mp != NULL)
16447 mp->min_address += fix->fix_size;
16448 else
16450 /* Note the insertion point if necessary. */
16451 if (mp->min_address < min_address)
16453 /* For now, we do not allow the insertion of 8-byte alignment
16454 requiring nodes anywhere but at the start of the pool. */
16455 if (ARM_DOUBLEWORD_ALIGN
16456 && fix->fix_size >= 8 && mp->fix_size < 8)
16457 return NULL;
16458 else
16459 min_mp = mp;
16461 else if (mp->max_address
16462 < minipool_barrier->address + mp->offset + fix->fix_size)
16464 /* Inserting before this entry would push the fix beyond
16465 its maximum address (which can happen if we have
16466 re-located a forwards fix); force the new fix to come
16467 after it. */
16468 if (ARM_DOUBLEWORD_ALIGN
16469 && fix->fix_size >= 8 && mp->fix_size < 8)
16470 return NULL;
16471 else
16473 min_mp = mp;
16474 min_address = mp->min_address + fix->fix_size;
16477 /* Do not insert a non-8-byte aligned quantity before 8-byte
16478 aligned quantities. */
16479 else if (ARM_DOUBLEWORD_ALIGN
16480 && fix->fix_size < 8
16481 && mp->fix_size >= 8)
16483 min_mp = mp;
16484 min_address = mp->min_address + fix->fix_size;
16489 /* We need to create a new entry. */
16490 mp = XNEW (Mnode);
16491 mp->fix_size = fix->fix_size;
16492 mp->mode = fix->mode;
16493 mp->value = fix->value;
16494 mp->refcount = 1;
16495 mp->max_address = minipool_barrier->address + 65536;
16497 mp->min_address = min_address;
16499 if (min_mp == NULL)
16501 mp->prev = NULL;
16502 mp->next = minipool_vector_head;
16504 if (mp->next == NULL)
16506 minipool_vector_tail = mp;
16507 minipool_vector_label = gen_label_rtx ();
16509 else
16510 mp->next->prev = mp;
16512 minipool_vector_head = mp;
16514 else
16516 mp->next = min_mp->next;
16517 mp->prev = min_mp;
16518 min_mp->next = mp;
16520 if (mp->next != NULL)
16521 mp->next->prev = mp;
16522 else
16523 minipool_vector_tail = mp;
16526 /* Save the new entry. */
16527 min_mp = mp;
16529 if (mp->prev)
16530 mp = mp->prev;
16531 else
16532 mp->offset = 0;
16534 /* Scan over the following entries and adjust their offsets. */
16535 while (mp->next != NULL)
16537 if (mp->next->min_address < mp->min_address + mp->fix_size)
16538 mp->next->min_address = mp->min_address + mp->fix_size;
16540 if (mp->refcount)
16541 mp->next->offset = mp->offset + mp->fix_size;
16542 else
16543 mp->next->offset = mp->offset;
16545 mp = mp->next;
16548 return min_mp;
16551 static void
16552 assign_minipool_offsets (Mfix *barrier)
16554 HOST_WIDE_INT offset = 0;
16555 Mnode *mp;
16557 minipool_barrier = barrier;
16559 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16561 mp->offset = offset;
16563 if (mp->refcount > 0)
16564 offset += mp->fix_size;
16568 /* Output the literal table */
16569 static void
16570 dump_minipool (rtx scan)
16572 Mnode * mp;
16573 Mnode * nmp;
16574 int align64 = 0;
16576 if (ARM_DOUBLEWORD_ALIGN)
16577 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16578 if (mp->refcount > 0 && mp->fix_size >= 8)
16580 align64 = 1;
16581 break;
16584 if (dump_file)
16585 fprintf (dump_file,
16586 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16587 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16589 scan = emit_label_after (gen_label_rtx (), scan);
16590 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16591 scan = emit_label_after (minipool_vector_label, scan);
16593 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16595 if (mp->refcount > 0)
16597 if (dump_file)
16599 fprintf (dump_file,
16600 ";; Offset %u, min %ld, max %ld ",
16601 (unsigned) mp->offset, (unsigned long) mp->min_address,
16602 (unsigned long) mp->max_address);
16603 arm_print_value (dump_file, mp->value);
16604 fputc ('\n', dump_file);
16607 switch (mp->fix_size)
16609 #ifdef HAVE_consttable_1
16610 case 1:
16611 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16612 break;
16614 #endif
16615 #ifdef HAVE_consttable_2
16616 case 2:
16617 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16618 break;
16620 #endif
16621 #ifdef HAVE_consttable_4
16622 case 4:
16623 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16624 break;
16626 #endif
16627 #ifdef HAVE_consttable_8
16628 case 8:
16629 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16630 break;
16632 #endif
16633 #ifdef HAVE_consttable_16
16634 case 16:
16635 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16636 break;
16638 #endif
16639 default:
16640 gcc_unreachable ();
16644 nmp = mp->next;
16645 free (mp);
16648 minipool_vector_head = minipool_vector_tail = NULL;
16649 scan = emit_insn_after (gen_consttable_end (), scan);
16650 scan = emit_barrier_after (scan);
16653 /* Return the cost of forcibly inserting a barrier after INSN. */
16654 static int
16655 arm_barrier_cost (rtx insn)
16657 /* Basing the location of the pool on the loop depth is preferable,
16658 but at the moment, the basic block information seems to be
16659 corrupt by this stage of the compilation. */
16660 int base_cost = 50;
16661 rtx next = next_nonnote_insn (insn);
16663 if (next != NULL && LABEL_P (next))
16664 base_cost -= 20;
16666 switch (GET_CODE (insn))
16668 case CODE_LABEL:
16669 /* It will always be better to place the table before the label, rather
16670 than after it. */
16671 return 50;
16673 case INSN:
16674 case CALL_INSN:
16675 return base_cost;
16677 case JUMP_INSN:
16678 return base_cost - 10;
16680 default:
16681 return base_cost + 10;
16685 /* Find the best place in the insn stream in the range
16686 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16687 Create the barrier by inserting a jump and add a new fix entry for
16688 it. */
16689 static Mfix *
16690 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16692 HOST_WIDE_INT count = 0;
16693 rtx barrier;
16694 rtx from = fix->insn;
16695 /* The instruction after which we will insert the jump. */
16696 rtx selected = NULL;
16697 int selected_cost;
16698 /* The address at which the jump instruction will be placed. */
16699 HOST_WIDE_INT selected_address;
16700 Mfix * new_fix;
16701 HOST_WIDE_INT max_count = max_address - fix->address;
16702 rtx label = gen_label_rtx ();
16704 selected_cost = arm_barrier_cost (from);
16705 selected_address = fix->address;
16707 while (from && count < max_count)
16709 rtx tmp;
16710 int new_cost;
16712 /* This code shouldn't have been called if there was a natural barrier
16713 within range. */
16714 gcc_assert (!BARRIER_P (from));
16716 /* Count the length of this insn. This must stay in sync with the
16717 code that pushes minipool fixes. */
16718 if (LABEL_P (from))
16719 count += get_label_padding (from);
16720 else
16721 count += get_attr_length (from);
16723 /* If there is a jump table, add its length. */
16724 if (tablejump_p (from, NULL, &tmp))
16726 count += get_jump_table_size (tmp);
16728 /* Jump tables aren't in a basic block, so base the cost on
16729 the dispatch insn. If we select this location, we will
16730 still put the pool after the table. */
16731 new_cost = arm_barrier_cost (from);
16733 if (count < max_count
16734 && (!selected || new_cost <= selected_cost))
16736 selected = tmp;
16737 selected_cost = new_cost;
16738 selected_address = fix->address + count;
16741 /* Continue after the dispatch table. */
16742 from = NEXT_INSN (tmp);
16743 continue;
16746 new_cost = arm_barrier_cost (from);
16748 if (count < max_count
16749 && (!selected || new_cost <= selected_cost))
16751 selected = from;
16752 selected_cost = new_cost;
16753 selected_address = fix->address + count;
16756 from = NEXT_INSN (from);
16759 /* Make sure that we found a place to insert the jump. */
16760 gcc_assert (selected);
16762 /* Make sure we do not split a call and its corresponding
16763 CALL_ARG_LOCATION note. */
16764 if (CALL_P (selected))
16766 rtx next = NEXT_INSN (selected);
16767 if (next && NOTE_P (next)
16768 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16769 selected = next;
16772 /* Create a new JUMP_INSN that branches around a barrier. */
16773 from = emit_jump_insn_after (gen_jump (label), selected);
16774 JUMP_LABEL (from) = label;
16775 barrier = emit_barrier_after (from);
16776 emit_label_after (label, barrier);
16778 /* Create a minipool barrier entry for the new barrier. */
16779 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16780 new_fix->insn = barrier;
16781 new_fix->address = selected_address;
16782 new_fix->next = fix->next;
16783 fix->next = new_fix;
16785 return new_fix;
16788 /* Record that there is a natural barrier in the insn stream at
16789 ADDRESS. */
16790 static void
16791 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16793 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16795 fix->insn = insn;
16796 fix->address = address;
16798 fix->next = NULL;
16799 if (minipool_fix_head != NULL)
16800 minipool_fix_tail->next = fix;
16801 else
16802 minipool_fix_head = fix;
16804 minipool_fix_tail = fix;
16807 /* Record INSN, which will need fixing up to load a value from the
16808 minipool. ADDRESS is the offset of the insn since the start of the
16809 function; LOC is a pointer to the part of the insn which requires
16810 fixing; VALUE is the constant that must be loaded, which is of type
16811 MODE. */
16812 static void
16813 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16814 enum machine_mode mode, rtx value)
16816 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16818 fix->insn = insn;
16819 fix->address = address;
16820 fix->loc = loc;
16821 fix->mode = mode;
16822 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16823 fix->value = value;
16824 fix->forwards = get_attr_pool_range (insn);
16825 fix->backwards = get_attr_neg_pool_range (insn);
16826 fix->minipool = NULL;
16828 /* If an insn doesn't have a range defined for it, then it isn't
16829 expecting to be reworked by this code. Better to stop now than
16830 to generate duff assembly code. */
16831 gcc_assert (fix->forwards || fix->backwards);
16833 /* If an entry requires 8-byte alignment then assume all constant pools
16834 require 4 bytes of padding. Trying to do this later on a per-pool
16835 basis is awkward because existing pool entries have to be modified. */
16836 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16837 minipool_pad = 4;
16839 if (dump_file)
16841 fprintf (dump_file,
16842 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16843 GET_MODE_NAME (mode),
16844 INSN_UID (insn), (unsigned long) address,
16845 -1 * (long)fix->backwards, (long)fix->forwards);
16846 arm_print_value (dump_file, fix->value);
16847 fprintf (dump_file, "\n");
16850 /* Add it to the chain of fixes. */
16851 fix->next = NULL;
16853 if (minipool_fix_head != NULL)
16854 minipool_fix_tail->next = fix;
16855 else
16856 minipool_fix_head = fix;
16858 minipool_fix_tail = fix;
16861 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16862 Returns the number of insns needed, or 99 if we always want to synthesize
16863 the value. */
16865 arm_max_const_double_inline_cost ()
16867 /* Let the value get synthesized to avoid the use of literal pools. */
16868 if (arm_disable_literal_pool)
16869 return 99;
16871 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16874 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16875 Returns the number of insns needed, or 99 if we don't know how to
16876 do it. */
16878 arm_const_double_inline_cost (rtx val)
16880 rtx lowpart, highpart;
16881 enum machine_mode mode;
16883 mode = GET_MODE (val);
16885 if (mode == VOIDmode)
16886 mode = DImode;
16888 gcc_assert (GET_MODE_SIZE (mode) == 8);
16890 lowpart = gen_lowpart (SImode, val);
16891 highpart = gen_highpart_mode (SImode, mode, val);
16893 gcc_assert (CONST_INT_P (lowpart));
16894 gcc_assert (CONST_INT_P (highpart));
16896 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16897 NULL_RTX, NULL_RTX, 0, 0)
16898 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16899 NULL_RTX, NULL_RTX, 0, 0));
16902 /* Return true if it is worthwhile to split a 64-bit constant into two
16903 32-bit operations. This is the case if optimizing for size, or
16904 if we have load delay slots, or if one 32-bit part can be done with
16905 a single data operation. */
16906 bool
16907 arm_const_double_by_parts (rtx val)
16909 enum machine_mode mode = GET_MODE (val);
16910 rtx part;
16912 if (optimize_size || arm_ld_sched)
16913 return true;
16915 if (mode == VOIDmode)
16916 mode = DImode;
16918 part = gen_highpart_mode (SImode, mode, val);
16920 gcc_assert (CONST_INT_P (part));
16922 if (const_ok_for_arm (INTVAL (part))
16923 || const_ok_for_arm (~INTVAL (part)))
16924 return true;
16926 part = gen_lowpart (SImode, val);
16928 gcc_assert (CONST_INT_P (part));
16930 if (const_ok_for_arm (INTVAL (part))
16931 || const_ok_for_arm (~INTVAL (part)))
16932 return true;
16934 return false;
16937 /* Return true if it is possible to inline both the high and low parts
16938 of a 64-bit constant into 32-bit data processing instructions. */
16939 bool
16940 arm_const_double_by_immediates (rtx val)
16942 enum machine_mode mode = GET_MODE (val);
16943 rtx part;
16945 if (mode == VOIDmode)
16946 mode = DImode;
16948 part = gen_highpart_mode (SImode, mode, val);
16950 gcc_assert (CONST_INT_P (part));
16952 if (!const_ok_for_arm (INTVAL (part)))
16953 return false;
16955 part = gen_lowpart (SImode, val);
16957 gcc_assert (CONST_INT_P (part));
16959 if (!const_ok_for_arm (INTVAL (part)))
16960 return false;
16962 return true;
16965 /* Scan INSN and note any of its operands that need fixing.
16966 If DO_PUSHES is false we do not actually push any of the fixups
16967 needed. */
16968 static void
16969 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16971 int opno;
16973 extract_insn (insn);
16975 if (!constrain_operands (1))
16976 fatal_insn_not_found (insn);
16978 if (recog_data.n_alternatives == 0)
16979 return;
16981 /* Fill in recog_op_alt with information about the constraints of
16982 this insn. */
16983 preprocess_constraints (insn);
16985 const operand_alternative *op_alt = which_op_alt ();
16986 for (opno = 0; opno < recog_data.n_operands; opno++)
16988 /* Things we need to fix can only occur in inputs. */
16989 if (recog_data.operand_type[opno] != OP_IN)
16990 continue;
16992 /* If this alternative is a memory reference, then any mention
16993 of constants in this alternative is really to fool reload
16994 into allowing us to accept one there. We need to fix them up
16995 now so that we output the right code. */
16996 if (op_alt[opno].memory_ok)
16998 rtx op = recog_data.operand[opno];
17000 if (CONSTANT_P (op))
17002 if (do_pushes)
17003 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17004 recog_data.operand_mode[opno], op);
17006 else if (MEM_P (op)
17007 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17008 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17010 if (do_pushes)
17012 rtx cop = avoid_constant_pool_reference (op);
17014 /* Casting the address of something to a mode narrower
17015 than a word can cause avoid_constant_pool_reference()
17016 to return the pool reference itself. That's no good to
17017 us here. Lets just hope that we can use the
17018 constant pool value directly. */
17019 if (op == cop)
17020 cop = get_pool_constant (XEXP (op, 0));
17022 push_minipool_fix (insn, address,
17023 recog_data.operand_loc[opno],
17024 recog_data.operand_mode[opno], cop);
17031 return;
17034 /* Rewrite move insn into subtract of 0 if the condition codes will
17035 be useful in next conditional jump insn. */
17037 static void
17038 thumb1_reorg (void)
17040 basic_block bb;
17042 FOR_EACH_BB_FN (bb, cfun)
17044 rtx dest, src;
17045 rtx pat, op0, set = NULL;
17046 rtx prev, insn = BB_END (bb);
17047 bool insn_clobbered = false;
17049 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17050 insn = PREV_INSN (insn);
17052 /* Find the last cbranchsi4_insn in basic block BB. */
17053 if (insn == BB_HEAD (bb)
17054 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17055 continue;
17057 /* Get the register with which we are comparing. */
17058 pat = PATTERN (insn);
17059 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17061 /* Find the first flag setting insn before INSN in basic block BB. */
17062 gcc_assert (insn != BB_HEAD (bb));
17063 for (prev = PREV_INSN (insn);
17064 (!insn_clobbered
17065 && prev != BB_HEAD (bb)
17066 && (NOTE_P (prev)
17067 || DEBUG_INSN_P (prev)
17068 || ((set = single_set (prev)) != NULL
17069 && get_attr_conds (prev) == CONDS_NOCOND)));
17070 prev = PREV_INSN (prev))
17072 if (reg_set_p (op0, prev))
17073 insn_clobbered = true;
17076 /* Skip if op0 is clobbered by insn other than prev. */
17077 if (insn_clobbered)
17078 continue;
17080 if (!set)
17081 continue;
17083 dest = SET_DEST (set);
17084 src = SET_SRC (set);
17085 if (!low_register_operand (dest, SImode)
17086 || !low_register_operand (src, SImode))
17087 continue;
17089 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17090 in INSN. Both src and dest of the move insn are checked. */
17091 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17093 dest = copy_rtx (dest);
17094 src = copy_rtx (src);
17095 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17096 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17097 INSN_CODE (prev) = -1;
17098 /* Set test register in INSN to dest. */
17099 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17100 INSN_CODE (insn) = -1;
17105 /* Convert instructions to their cc-clobbering variant if possible, since
17106 that allows us to use smaller encodings. */
17108 static void
17109 thumb2_reorg (void)
17111 basic_block bb;
17112 regset_head live;
17114 INIT_REG_SET (&live);
17116 /* We are freeing block_for_insn in the toplev to keep compatibility
17117 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17118 compute_bb_for_insn ();
17119 df_analyze ();
17121 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17123 FOR_EACH_BB_FN (bb, cfun)
17125 if (current_tune->disparage_flag_setting_t16_encodings
17126 && optimize_bb_for_speed_p (bb))
17127 continue;
17129 rtx insn;
17130 Convert_Action action = SKIP;
17131 Convert_Action action_for_partial_flag_setting
17132 = (current_tune->disparage_partial_flag_setting_t16_encodings
17133 && optimize_bb_for_speed_p (bb))
17134 ? SKIP : CONV;
17136 COPY_REG_SET (&live, DF_LR_OUT (bb));
17137 df_simulate_initialize_backwards (bb, &live);
17138 FOR_BB_INSNS_REVERSE (bb, insn)
17140 if (NONJUMP_INSN_P (insn)
17141 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17142 && GET_CODE (PATTERN (insn)) == SET)
17144 action = SKIP;
17145 rtx pat = PATTERN (insn);
17146 rtx dst = XEXP (pat, 0);
17147 rtx src = XEXP (pat, 1);
17148 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17150 if (!OBJECT_P (src))
17151 op0 = XEXP (src, 0);
17153 if (BINARY_P (src))
17154 op1 = XEXP (src, 1);
17156 if (low_register_operand (dst, SImode))
17158 switch (GET_CODE (src))
17160 case PLUS:
17161 /* Adding two registers and storing the result
17162 in the first source is already a 16-bit
17163 operation. */
17164 if (rtx_equal_p (dst, op0)
17165 && register_operand (op1, SImode))
17166 break;
17168 if (low_register_operand (op0, SImode))
17170 /* ADDS <Rd>,<Rn>,<Rm> */
17171 if (low_register_operand (op1, SImode))
17172 action = CONV;
17173 /* ADDS <Rdn>,#<imm8> */
17174 /* SUBS <Rdn>,#<imm8> */
17175 else if (rtx_equal_p (dst, op0)
17176 && CONST_INT_P (op1)
17177 && IN_RANGE (INTVAL (op1), -255, 255))
17178 action = CONV;
17179 /* ADDS <Rd>,<Rn>,#<imm3> */
17180 /* SUBS <Rd>,<Rn>,#<imm3> */
17181 else if (CONST_INT_P (op1)
17182 && IN_RANGE (INTVAL (op1), -7, 7))
17183 action = CONV;
17185 /* ADCS <Rd>, <Rn> */
17186 else if (GET_CODE (XEXP (src, 0)) == PLUS
17187 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17188 && low_register_operand (XEXP (XEXP (src, 0), 1),
17189 SImode)
17190 && COMPARISON_P (op1)
17191 && cc_register (XEXP (op1, 0), VOIDmode)
17192 && maybe_get_arm_condition_code (op1) == ARM_CS
17193 && XEXP (op1, 1) == const0_rtx)
17194 action = CONV;
17195 break;
17197 case MINUS:
17198 /* RSBS <Rd>,<Rn>,#0
17199 Not handled here: see NEG below. */
17200 /* SUBS <Rd>,<Rn>,#<imm3>
17201 SUBS <Rdn>,#<imm8>
17202 Not handled here: see PLUS above. */
17203 /* SUBS <Rd>,<Rn>,<Rm> */
17204 if (low_register_operand (op0, SImode)
17205 && low_register_operand (op1, SImode))
17206 action = CONV;
17207 break;
17209 case MULT:
17210 /* MULS <Rdm>,<Rn>,<Rdm>
17211 As an exception to the rule, this is only used
17212 when optimizing for size since MULS is slow on all
17213 known implementations. We do not even want to use
17214 MULS in cold code, if optimizing for speed, so we
17215 test the global flag here. */
17216 if (!optimize_size)
17217 break;
17218 /* else fall through. */
17219 case AND:
17220 case IOR:
17221 case XOR:
17222 /* ANDS <Rdn>,<Rm> */
17223 if (rtx_equal_p (dst, op0)
17224 && low_register_operand (op1, SImode))
17225 action = action_for_partial_flag_setting;
17226 else if (rtx_equal_p (dst, op1)
17227 && low_register_operand (op0, SImode))
17228 action = action_for_partial_flag_setting == SKIP
17229 ? SKIP : SWAP_CONV;
17230 break;
17232 case ASHIFTRT:
17233 case ASHIFT:
17234 case LSHIFTRT:
17235 /* ASRS <Rdn>,<Rm> */
17236 /* LSRS <Rdn>,<Rm> */
17237 /* LSLS <Rdn>,<Rm> */
17238 if (rtx_equal_p (dst, op0)
17239 && low_register_operand (op1, SImode))
17240 action = action_for_partial_flag_setting;
17241 /* ASRS <Rd>,<Rm>,#<imm5> */
17242 /* LSRS <Rd>,<Rm>,#<imm5> */
17243 /* LSLS <Rd>,<Rm>,#<imm5> */
17244 else if (low_register_operand (op0, SImode)
17245 && CONST_INT_P (op1)
17246 && IN_RANGE (INTVAL (op1), 0, 31))
17247 action = action_for_partial_flag_setting;
17248 break;
17250 case ROTATERT:
17251 /* RORS <Rdn>,<Rm> */
17252 if (rtx_equal_p (dst, op0)
17253 && low_register_operand (op1, SImode))
17254 action = action_for_partial_flag_setting;
17255 break;
17257 case NOT:
17258 /* MVNS <Rd>,<Rm> */
17259 if (low_register_operand (op0, SImode))
17260 action = action_for_partial_flag_setting;
17261 break;
17263 case NEG:
17264 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17265 if (low_register_operand (op0, SImode))
17266 action = CONV;
17267 break;
17269 case CONST_INT:
17270 /* MOVS <Rd>,#<imm8> */
17271 if (CONST_INT_P (src)
17272 && IN_RANGE (INTVAL (src), 0, 255))
17273 action = action_for_partial_flag_setting;
17274 break;
17276 case REG:
17277 /* MOVS and MOV<c> with registers have different
17278 encodings, so are not relevant here. */
17279 break;
17281 default:
17282 break;
17286 if (action != SKIP)
17288 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17289 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17290 rtvec vec;
17292 if (action == SWAP_CONV)
17294 src = copy_rtx (src);
17295 XEXP (src, 0) = op1;
17296 XEXP (src, 1) = op0;
17297 pat = gen_rtx_SET (VOIDmode, dst, src);
17298 vec = gen_rtvec (2, pat, clobber);
17300 else /* action == CONV */
17301 vec = gen_rtvec (2, pat, clobber);
17303 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17304 INSN_CODE (insn) = -1;
17308 if (NONDEBUG_INSN_P (insn))
17309 df_simulate_one_insn_backwards (bb, insn, &live);
17313 CLEAR_REG_SET (&live);
17316 /* Gcc puts the pool in the wrong place for ARM, since we can only
17317 load addresses a limited distance around the pc. We do some
17318 special munging to move the constant pool values to the correct
17319 point in the code. */
17320 static void
17321 arm_reorg (void)
17323 rtx insn;
17324 HOST_WIDE_INT address = 0;
17325 Mfix * fix;
17327 if (TARGET_THUMB1)
17328 thumb1_reorg ();
17329 else if (TARGET_THUMB2)
17330 thumb2_reorg ();
17332 /* Ensure all insns that must be split have been split at this point.
17333 Otherwise, the pool placement code below may compute incorrect
17334 insn lengths. Note that when optimizing, all insns have already
17335 been split at this point. */
17336 if (!optimize)
17337 split_all_insns_noflow ();
17339 minipool_fix_head = minipool_fix_tail = NULL;
17341 /* The first insn must always be a note, or the code below won't
17342 scan it properly. */
17343 insn = get_insns ();
17344 gcc_assert (NOTE_P (insn));
17345 minipool_pad = 0;
17347 /* Scan all the insns and record the operands that will need fixing. */
17348 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17350 if (BARRIER_P (insn))
17351 push_minipool_barrier (insn, address);
17352 else if (INSN_P (insn))
17354 rtx table;
17356 note_invalid_constants (insn, address, true);
17357 address += get_attr_length (insn);
17359 /* If the insn is a vector jump, add the size of the table
17360 and skip the table. */
17361 if (tablejump_p (insn, NULL, &table))
17363 address += get_jump_table_size (table);
17364 insn = table;
17367 else if (LABEL_P (insn))
17368 /* Add the worst-case padding due to alignment. We don't add
17369 the _current_ padding because the minipool insertions
17370 themselves might change it. */
17371 address += get_label_padding (insn);
17374 fix = minipool_fix_head;
17376 /* Now scan the fixups and perform the required changes. */
17377 while (fix)
17379 Mfix * ftmp;
17380 Mfix * fdel;
17381 Mfix * last_added_fix;
17382 Mfix * last_barrier = NULL;
17383 Mfix * this_fix;
17385 /* Skip any further barriers before the next fix. */
17386 while (fix && BARRIER_P (fix->insn))
17387 fix = fix->next;
17389 /* No more fixes. */
17390 if (fix == NULL)
17391 break;
17393 last_added_fix = NULL;
17395 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17397 if (BARRIER_P (ftmp->insn))
17399 if (ftmp->address >= minipool_vector_head->max_address)
17400 break;
17402 last_barrier = ftmp;
17404 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17405 break;
17407 last_added_fix = ftmp; /* Keep track of the last fix added. */
17410 /* If we found a barrier, drop back to that; any fixes that we
17411 could have reached but come after the barrier will now go in
17412 the next mini-pool. */
17413 if (last_barrier != NULL)
17415 /* Reduce the refcount for those fixes that won't go into this
17416 pool after all. */
17417 for (fdel = last_barrier->next;
17418 fdel && fdel != ftmp;
17419 fdel = fdel->next)
17421 fdel->minipool->refcount--;
17422 fdel->minipool = NULL;
17425 ftmp = last_barrier;
17427 else
17429 /* ftmp is first fix that we can't fit into this pool and
17430 there no natural barriers that we could use. Insert a
17431 new barrier in the code somewhere between the previous
17432 fix and this one, and arrange to jump around it. */
17433 HOST_WIDE_INT max_address;
17435 /* The last item on the list of fixes must be a barrier, so
17436 we can never run off the end of the list of fixes without
17437 last_barrier being set. */
17438 gcc_assert (ftmp);
17440 max_address = minipool_vector_head->max_address;
17441 /* Check that there isn't another fix that is in range that
17442 we couldn't fit into this pool because the pool was
17443 already too large: we need to put the pool before such an
17444 instruction. The pool itself may come just after the
17445 fix because create_fix_barrier also allows space for a
17446 jump instruction. */
17447 if (ftmp->address < max_address)
17448 max_address = ftmp->address + 1;
17450 last_barrier = create_fix_barrier (last_added_fix, max_address);
17453 assign_minipool_offsets (last_barrier);
17455 while (ftmp)
17457 if (!BARRIER_P (ftmp->insn)
17458 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17459 == NULL))
17460 break;
17462 ftmp = ftmp->next;
17465 /* Scan over the fixes we have identified for this pool, fixing them
17466 up and adding the constants to the pool itself. */
17467 for (this_fix = fix; this_fix && ftmp != this_fix;
17468 this_fix = this_fix->next)
17469 if (!BARRIER_P (this_fix->insn))
17471 rtx addr
17472 = plus_constant (Pmode,
17473 gen_rtx_LABEL_REF (VOIDmode,
17474 minipool_vector_label),
17475 this_fix->minipool->offset);
17476 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17479 dump_minipool (last_barrier->insn);
17480 fix = ftmp;
17483 /* From now on we must synthesize any constants that we can't handle
17484 directly. This can happen if the RTL gets split during final
17485 instruction generation. */
17486 cfun->machine->after_arm_reorg = 1;
17488 /* Free the minipool memory. */
17489 obstack_free (&minipool_obstack, minipool_startobj);
17492 /* Routines to output assembly language. */
17494 /* If the rtx is the correct value then return the string of the number.
17495 In this way we can ensure that valid double constants are generated even
17496 when cross compiling. */
17497 const char *
17498 fp_immediate_constant (rtx x)
17500 REAL_VALUE_TYPE r;
17502 if (!fp_consts_inited)
17503 init_fp_table ();
17505 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17507 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17508 return "0";
17511 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17512 static const char *
17513 fp_const_from_val (REAL_VALUE_TYPE *r)
17515 if (!fp_consts_inited)
17516 init_fp_table ();
17518 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17519 return "0";
17522 /* OPERANDS[0] is the entire list of insns that constitute pop,
17523 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17524 is in the list, UPDATE is true iff the list contains explicit
17525 update of base register. */
17526 void
17527 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17528 bool update)
17530 int i;
17531 char pattern[100];
17532 int offset;
17533 const char *conditional;
17534 int num_saves = XVECLEN (operands[0], 0);
17535 unsigned int regno;
17536 unsigned int regno_base = REGNO (operands[1]);
17538 offset = 0;
17539 offset += update ? 1 : 0;
17540 offset += return_pc ? 1 : 0;
17542 /* Is the base register in the list? */
17543 for (i = offset; i < num_saves; i++)
17545 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17546 /* If SP is in the list, then the base register must be SP. */
17547 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17548 /* If base register is in the list, there must be no explicit update. */
17549 if (regno == regno_base)
17550 gcc_assert (!update);
17553 conditional = reverse ? "%?%D0" : "%?%d0";
17554 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17556 /* Output pop (not stmfd) because it has a shorter encoding. */
17557 gcc_assert (update);
17558 sprintf (pattern, "pop%s\t{", conditional);
17560 else
17562 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17563 It's just a convention, their semantics are identical. */
17564 if (regno_base == SP_REGNUM)
17565 sprintf (pattern, "ldm%sfd\t", conditional);
17566 else if (TARGET_UNIFIED_ASM)
17567 sprintf (pattern, "ldmia%s\t", conditional);
17568 else
17569 sprintf (pattern, "ldm%sia\t", conditional);
17571 strcat (pattern, reg_names[regno_base]);
17572 if (update)
17573 strcat (pattern, "!, {");
17574 else
17575 strcat (pattern, ", {");
17578 /* Output the first destination register. */
17579 strcat (pattern,
17580 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17582 /* Output the rest of the destination registers. */
17583 for (i = offset + 1; i < num_saves; i++)
17585 strcat (pattern, ", ");
17586 strcat (pattern,
17587 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17590 strcat (pattern, "}");
17592 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17593 strcat (pattern, "^");
17595 output_asm_insn (pattern, &cond);
17599 /* Output the assembly for a store multiple. */
17601 const char *
17602 vfp_output_fstmd (rtx * operands)
17604 char pattern[100];
17605 int p;
17606 int base;
17607 int i;
17609 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17610 p = strlen (pattern);
17612 gcc_assert (REG_P (operands[1]));
17614 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17615 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17617 p += sprintf (&pattern[p], ", d%d", base + i);
17619 strcpy (&pattern[p], "}");
17621 output_asm_insn (pattern, operands);
17622 return "";
17626 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17627 number of bytes pushed. */
17629 static int
17630 vfp_emit_fstmd (int base_reg, int count)
17632 rtx par;
17633 rtx dwarf;
17634 rtx tmp, reg;
17635 int i;
17637 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17638 register pairs are stored by a store multiple insn. We avoid this
17639 by pushing an extra pair. */
17640 if (count == 2 && !arm_arch6)
17642 if (base_reg == LAST_VFP_REGNUM - 3)
17643 base_reg -= 2;
17644 count++;
17647 /* FSTMD may not store more than 16 doubleword registers at once. Split
17648 larger stores into multiple parts (up to a maximum of two, in
17649 practice). */
17650 if (count > 16)
17652 int saved;
17653 /* NOTE: base_reg is an internal register number, so each D register
17654 counts as 2. */
17655 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17656 saved += vfp_emit_fstmd (base_reg, 16);
17657 return saved;
17660 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17661 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17663 reg = gen_rtx_REG (DFmode, base_reg);
17664 base_reg += 2;
17666 XVECEXP (par, 0, 0)
17667 = gen_rtx_SET (VOIDmode,
17668 gen_frame_mem
17669 (BLKmode,
17670 gen_rtx_PRE_MODIFY (Pmode,
17671 stack_pointer_rtx,
17672 plus_constant
17673 (Pmode, stack_pointer_rtx,
17674 - (count * 8)))
17676 gen_rtx_UNSPEC (BLKmode,
17677 gen_rtvec (1, reg),
17678 UNSPEC_PUSH_MULT));
17680 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17681 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17682 RTX_FRAME_RELATED_P (tmp) = 1;
17683 XVECEXP (dwarf, 0, 0) = tmp;
17685 tmp = gen_rtx_SET (VOIDmode,
17686 gen_frame_mem (DFmode, stack_pointer_rtx),
17687 reg);
17688 RTX_FRAME_RELATED_P (tmp) = 1;
17689 XVECEXP (dwarf, 0, 1) = tmp;
17691 for (i = 1; i < count; i++)
17693 reg = gen_rtx_REG (DFmode, base_reg);
17694 base_reg += 2;
17695 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17697 tmp = gen_rtx_SET (VOIDmode,
17698 gen_frame_mem (DFmode,
17699 plus_constant (Pmode,
17700 stack_pointer_rtx,
17701 i * 8)),
17702 reg);
17703 RTX_FRAME_RELATED_P (tmp) = 1;
17704 XVECEXP (dwarf, 0, i + 1) = tmp;
17707 par = emit_insn (par);
17708 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17709 RTX_FRAME_RELATED_P (par) = 1;
17711 return count * 8;
17714 /* Emit a call instruction with pattern PAT. ADDR is the address of
17715 the call target. */
17717 void
17718 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17720 rtx insn;
17722 insn = emit_call_insn (pat);
17724 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17725 If the call might use such an entry, add a use of the PIC register
17726 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17727 if (TARGET_VXWORKS_RTP
17728 && flag_pic
17729 && !sibcall
17730 && GET_CODE (addr) == SYMBOL_REF
17731 && (SYMBOL_REF_DECL (addr)
17732 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17733 : !SYMBOL_REF_LOCAL_P (addr)))
17735 require_pic_register ();
17736 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17739 if (TARGET_AAPCS_BASED)
17741 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17742 linker. We need to add an IP clobber to allow setting
17743 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17744 is not needed since it's a fixed register. */
17745 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17746 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17750 /* Output a 'call' insn. */
17751 const char *
17752 output_call (rtx *operands)
17754 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17756 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17757 if (REGNO (operands[0]) == LR_REGNUM)
17759 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17760 output_asm_insn ("mov%?\t%0, %|lr", operands);
17763 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17765 if (TARGET_INTERWORK || arm_arch4t)
17766 output_asm_insn ("bx%?\t%0", operands);
17767 else
17768 output_asm_insn ("mov%?\t%|pc, %0", operands);
17770 return "";
17773 /* Output a 'call' insn that is a reference in memory. This is
17774 disabled for ARMv5 and we prefer a blx instead because otherwise
17775 there's a significant performance overhead. */
17776 const char *
17777 output_call_mem (rtx *operands)
17779 gcc_assert (!arm_arch5);
17780 if (TARGET_INTERWORK)
17782 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17783 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17784 output_asm_insn ("bx%?\t%|ip", operands);
17786 else if (regno_use_in (LR_REGNUM, operands[0]))
17788 /* LR is used in the memory address. We load the address in the
17789 first instruction. It's safe to use IP as the target of the
17790 load since the call will kill it anyway. */
17791 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17792 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17793 if (arm_arch4t)
17794 output_asm_insn ("bx%?\t%|ip", operands);
17795 else
17796 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17798 else
17800 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17801 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17804 return "";
17808 /* Output a move from arm registers to arm registers of a long double
17809 OPERANDS[0] is the destination.
17810 OPERANDS[1] is the source. */
17811 const char *
17812 output_mov_long_double_arm_from_arm (rtx *operands)
17814 /* We have to be careful here because the two might overlap. */
17815 int dest_start = REGNO (operands[0]);
17816 int src_start = REGNO (operands[1]);
17817 rtx ops[2];
17818 int i;
17820 if (dest_start < src_start)
17822 for (i = 0; i < 3; i++)
17824 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17825 ops[1] = gen_rtx_REG (SImode, src_start + i);
17826 output_asm_insn ("mov%?\t%0, %1", ops);
17829 else
17831 for (i = 2; i >= 0; i--)
17833 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17834 ops[1] = gen_rtx_REG (SImode, src_start + i);
17835 output_asm_insn ("mov%?\t%0, %1", ops);
17839 return "";
17842 void
17843 arm_emit_movpair (rtx dest, rtx src)
17845 /* If the src is an immediate, simplify it. */
17846 if (CONST_INT_P (src))
17848 HOST_WIDE_INT val = INTVAL (src);
17849 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17850 if ((val >> 16) & 0x0000ffff)
17851 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17852 GEN_INT (16)),
17853 GEN_INT ((val >> 16) & 0x0000ffff));
17854 return;
17856 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17857 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17860 /* Output a move between double words. It must be REG<-MEM
17861 or MEM<-REG. */
17862 const char *
17863 output_move_double (rtx *operands, bool emit, int *count)
17865 enum rtx_code code0 = GET_CODE (operands[0]);
17866 enum rtx_code code1 = GET_CODE (operands[1]);
17867 rtx otherops[3];
17868 if (count)
17869 *count = 1;
17871 /* The only case when this might happen is when
17872 you are looking at the length of a DImode instruction
17873 that has an invalid constant in it. */
17874 if (code0 == REG && code1 != MEM)
17876 gcc_assert (!emit);
17877 *count = 2;
17878 return "";
17881 if (code0 == REG)
17883 unsigned int reg0 = REGNO (operands[0]);
17885 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17887 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17889 switch (GET_CODE (XEXP (operands[1], 0)))
17891 case REG:
17893 if (emit)
17895 if (TARGET_LDRD
17896 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17897 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17898 else
17899 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17901 break;
17903 case PRE_INC:
17904 gcc_assert (TARGET_LDRD);
17905 if (emit)
17906 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17907 break;
17909 case PRE_DEC:
17910 if (emit)
17912 if (TARGET_LDRD)
17913 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17914 else
17915 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17917 break;
17919 case POST_INC:
17920 if (emit)
17922 if (TARGET_LDRD)
17923 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17924 else
17925 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17927 break;
17929 case POST_DEC:
17930 gcc_assert (TARGET_LDRD);
17931 if (emit)
17932 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17933 break;
17935 case PRE_MODIFY:
17936 case POST_MODIFY:
17937 /* Autoicrement addressing modes should never have overlapping
17938 base and destination registers, and overlapping index registers
17939 are already prohibited, so this doesn't need to worry about
17940 fix_cm3_ldrd. */
17941 otherops[0] = operands[0];
17942 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17943 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17945 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17947 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17949 /* Registers overlap so split out the increment. */
17950 if (emit)
17952 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17953 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17955 if (count)
17956 *count = 2;
17958 else
17960 /* Use a single insn if we can.
17961 FIXME: IWMMXT allows offsets larger than ldrd can
17962 handle, fix these up with a pair of ldr. */
17963 if (TARGET_THUMB2
17964 || !CONST_INT_P (otherops[2])
17965 || (INTVAL (otherops[2]) > -256
17966 && INTVAL (otherops[2]) < 256))
17968 if (emit)
17969 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17971 else
17973 if (emit)
17975 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17976 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17978 if (count)
17979 *count = 2;
17984 else
17986 /* Use a single insn if we can.
17987 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17988 fix these up with a pair of ldr. */
17989 if (TARGET_THUMB2
17990 || !CONST_INT_P (otherops[2])
17991 || (INTVAL (otherops[2]) > -256
17992 && INTVAL (otherops[2]) < 256))
17994 if (emit)
17995 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17997 else
17999 if (emit)
18001 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18002 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18004 if (count)
18005 *count = 2;
18008 break;
18010 case LABEL_REF:
18011 case CONST:
18012 /* We might be able to use ldrd %0, %1 here. However the range is
18013 different to ldr/adr, and it is broken on some ARMv7-M
18014 implementations. */
18015 /* Use the second register of the pair to avoid problematic
18016 overlap. */
18017 otherops[1] = operands[1];
18018 if (emit)
18019 output_asm_insn ("adr%?\t%0, %1", otherops);
18020 operands[1] = otherops[0];
18021 if (emit)
18023 if (TARGET_LDRD)
18024 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18025 else
18026 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18029 if (count)
18030 *count = 2;
18031 break;
18033 /* ??? This needs checking for thumb2. */
18034 default:
18035 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18036 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18038 otherops[0] = operands[0];
18039 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18040 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18042 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18044 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18046 switch ((int) INTVAL (otherops[2]))
18048 case -8:
18049 if (emit)
18050 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18051 return "";
18052 case -4:
18053 if (TARGET_THUMB2)
18054 break;
18055 if (emit)
18056 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18057 return "";
18058 case 4:
18059 if (TARGET_THUMB2)
18060 break;
18061 if (emit)
18062 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18063 return "";
18066 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18067 operands[1] = otherops[0];
18068 if (TARGET_LDRD
18069 && (REG_P (otherops[2])
18070 || TARGET_THUMB2
18071 || (CONST_INT_P (otherops[2])
18072 && INTVAL (otherops[2]) > -256
18073 && INTVAL (otherops[2]) < 256)))
18075 if (reg_overlap_mentioned_p (operands[0],
18076 otherops[2]))
18078 rtx tmp;
18079 /* Swap base and index registers over to
18080 avoid a conflict. */
18081 tmp = otherops[1];
18082 otherops[1] = otherops[2];
18083 otherops[2] = tmp;
18085 /* If both registers conflict, it will usually
18086 have been fixed by a splitter. */
18087 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18088 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18090 if (emit)
18092 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18093 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18095 if (count)
18096 *count = 2;
18098 else
18100 otherops[0] = operands[0];
18101 if (emit)
18102 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18104 return "";
18107 if (CONST_INT_P (otherops[2]))
18109 if (emit)
18111 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18112 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18113 else
18114 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18117 else
18119 if (emit)
18120 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18123 else
18125 if (emit)
18126 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18129 if (count)
18130 *count = 2;
18132 if (TARGET_LDRD)
18133 return "ldr%(d%)\t%0, [%1]";
18135 return "ldm%(ia%)\t%1, %M0";
18137 else
18139 otherops[1] = adjust_address (operands[1], SImode, 4);
18140 /* Take care of overlapping base/data reg. */
18141 if (reg_mentioned_p (operands[0], operands[1]))
18143 if (emit)
18145 output_asm_insn ("ldr%?\t%0, %1", otherops);
18146 output_asm_insn ("ldr%?\t%0, %1", operands);
18148 if (count)
18149 *count = 2;
18152 else
18154 if (emit)
18156 output_asm_insn ("ldr%?\t%0, %1", operands);
18157 output_asm_insn ("ldr%?\t%0, %1", otherops);
18159 if (count)
18160 *count = 2;
18165 else
18167 /* Constraints should ensure this. */
18168 gcc_assert (code0 == MEM && code1 == REG);
18169 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18170 || (TARGET_ARM && TARGET_LDRD));
18172 switch (GET_CODE (XEXP (operands[0], 0)))
18174 case REG:
18175 if (emit)
18177 if (TARGET_LDRD)
18178 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18179 else
18180 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18182 break;
18184 case PRE_INC:
18185 gcc_assert (TARGET_LDRD);
18186 if (emit)
18187 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18188 break;
18190 case PRE_DEC:
18191 if (emit)
18193 if (TARGET_LDRD)
18194 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18195 else
18196 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18198 break;
18200 case POST_INC:
18201 if (emit)
18203 if (TARGET_LDRD)
18204 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18205 else
18206 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18208 break;
18210 case POST_DEC:
18211 gcc_assert (TARGET_LDRD);
18212 if (emit)
18213 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18214 break;
18216 case PRE_MODIFY:
18217 case POST_MODIFY:
18218 otherops[0] = operands[1];
18219 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18220 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18222 /* IWMMXT allows offsets larger than ldrd can handle,
18223 fix these up with a pair of ldr. */
18224 if (!TARGET_THUMB2
18225 && CONST_INT_P (otherops[2])
18226 && (INTVAL(otherops[2]) <= -256
18227 || INTVAL(otherops[2]) >= 256))
18229 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18231 if (emit)
18233 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18234 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18236 if (count)
18237 *count = 2;
18239 else
18241 if (emit)
18243 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18244 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18246 if (count)
18247 *count = 2;
18250 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18252 if (emit)
18253 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18255 else
18257 if (emit)
18258 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18260 break;
18262 case PLUS:
18263 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18264 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18266 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18268 case -8:
18269 if (emit)
18270 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18271 return "";
18273 case -4:
18274 if (TARGET_THUMB2)
18275 break;
18276 if (emit)
18277 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18278 return "";
18280 case 4:
18281 if (TARGET_THUMB2)
18282 break;
18283 if (emit)
18284 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18285 return "";
18288 if (TARGET_LDRD
18289 && (REG_P (otherops[2])
18290 || TARGET_THUMB2
18291 || (CONST_INT_P (otherops[2])
18292 && INTVAL (otherops[2]) > -256
18293 && INTVAL (otherops[2]) < 256)))
18295 otherops[0] = operands[1];
18296 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18297 if (emit)
18298 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18299 return "";
18301 /* Fall through */
18303 default:
18304 otherops[0] = adjust_address (operands[0], SImode, 4);
18305 otherops[1] = operands[1];
18306 if (emit)
18308 output_asm_insn ("str%?\t%1, %0", operands);
18309 output_asm_insn ("str%?\t%H1, %0", otherops);
18311 if (count)
18312 *count = 2;
18316 return "";
18319 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18320 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18322 const char *
18323 output_move_quad (rtx *operands)
18325 if (REG_P (operands[0]))
18327 /* Load, or reg->reg move. */
18329 if (MEM_P (operands[1]))
18331 switch (GET_CODE (XEXP (operands[1], 0)))
18333 case REG:
18334 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18335 break;
18337 case LABEL_REF:
18338 case CONST:
18339 output_asm_insn ("adr%?\t%0, %1", operands);
18340 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18341 break;
18343 default:
18344 gcc_unreachable ();
18347 else
18349 rtx ops[2];
18350 int dest, src, i;
18352 gcc_assert (REG_P (operands[1]));
18354 dest = REGNO (operands[0]);
18355 src = REGNO (operands[1]);
18357 /* This seems pretty dumb, but hopefully GCC won't try to do it
18358 very often. */
18359 if (dest < src)
18360 for (i = 0; i < 4; i++)
18362 ops[0] = gen_rtx_REG (SImode, dest + i);
18363 ops[1] = gen_rtx_REG (SImode, src + i);
18364 output_asm_insn ("mov%?\t%0, %1", ops);
18366 else
18367 for (i = 3; i >= 0; i--)
18369 ops[0] = gen_rtx_REG (SImode, dest + i);
18370 ops[1] = gen_rtx_REG (SImode, src + i);
18371 output_asm_insn ("mov%?\t%0, %1", ops);
18375 else
18377 gcc_assert (MEM_P (operands[0]));
18378 gcc_assert (REG_P (operands[1]));
18379 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18381 switch (GET_CODE (XEXP (operands[0], 0)))
18383 case REG:
18384 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18385 break;
18387 default:
18388 gcc_unreachable ();
18392 return "";
18395 /* Output a VFP load or store instruction. */
18397 const char *
18398 output_move_vfp (rtx *operands)
18400 rtx reg, mem, addr, ops[2];
18401 int load = REG_P (operands[0]);
18402 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18403 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18404 const char *templ;
18405 char buff[50];
18406 enum machine_mode mode;
18408 reg = operands[!load];
18409 mem = operands[load];
18411 mode = GET_MODE (reg);
18413 gcc_assert (REG_P (reg));
18414 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18415 gcc_assert (mode == SFmode
18416 || mode == DFmode
18417 || mode == SImode
18418 || mode == DImode
18419 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18420 gcc_assert (MEM_P (mem));
18422 addr = XEXP (mem, 0);
18424 switch (GET_CODE (addr))
18426 case PRE_DEC:
18427 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18428 ops[0] = XEXP (addr, 0);
18429 ops[1] = reg;
18430 break;
18432 case POST_INC:
18433 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18434 ops[0] = XEXP (addr, 0);
18435 ops[1] = reg;
18436 break;
18438 default:
18439 templ = "f%s%c%%?\t%%%s0, %%1%s";
18440 ops[0] = reg;
18441 ops[1] = mem;
18442 break;
18445 sprintf (buff, templ,
18446 load ? "ld" : "st",
18447 dp ? 'd' : 's',
18448 dp ? "P" : "",
18449 integer_p ? "\t%@ int" : "");
18450 output_asm_insn (buff, ops);
18452 return "";
18455 /* Output a Neon double-word or quad-word load or store, or a load
18456 or store for larger structure modes.
18458 WARNING: The ordering of elements is weird in big-endian mode,
18459 because the EABI requires that vectors stored in memory appear
18460 as though they were stored by a VSTM, as required by the EABI.
18461 GCC RTL defines element ordering based on in-memory order.
18462 This can be different from the architectural ordering of elements
18463 within a NEON register. The intrinsics defined in arm_neon.h use the
18464 NEON register element ordering, not the GCC RTL element ordering.
18466 For example, the in-memory ordering of a big-endian a quadword
18467 vector with 16-bit elements when stored from register pair {d0,d1}
18468 will be (lowest address first, d0[N] is NEON register element N):
18470 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18472 When necessary, quadword registers (dN, dN+1) are moved to ARM
18473 registers from rN in the order:
18475 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18477 So that STM/LDM can be used on vectors in ARM registers, and the
18478 same memory layout will result as if VSTM/VLDM were used.
18480 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18481 possible, which allows use of appropriate alignment tags.
18482 Note that the choice of "64" is independent of the actual vector
18483 element size; this size simply ensures that the behavior is
18484 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18486 Due to limitations of those instructions, use of VST1.64/VLD1.64
18487 is not possible if:
18488 - the address contains PRE_DEC, or
18489 - the mode refers to more than 4 double-word registers
18491 In those cases, it would be possible to replace VSTM/VLDM by a
18492 sequence of instructions; this is not currently implemented since
18493 this is not certain to actually improve performance. */
18495 const char *
18496 output_move_neon (rtx *operands)
18498 rtx reg, mem, addr, ops[2];
18499 int regno, nregs, load = REG_P (operands[0]);
18500 const char *templ;
18501 char buff[50];
18502 enum machine_mode mode;
18504 reg = operands[!load];
18505 mem = operands[load];
18507 mode = GET_MODE (reg);
18509 gcc_assert (REG_P (reg));
18510 regno = REGNO (reg);
18511 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18512 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18513 || NEON_REGNO_OK_FOR_QUAD (regno));
18514 gcc_assert (VALID_NEON_DREG_MODE (mode)
18515 || VALID_NEON_QREG_MODE (mode)
18516 || VALID_NEON_STRUCT_MODE (mode));
18517 gcc_assert (MEM_P (mem));
18519 addr = XEXP (mem, 0);
18521 /* Strip off const from addresses like (const (plus (...))). */
18522 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18523 addr = XEXP (addr, 0);
18525 switch (GET_CODE (addr))
18527 case POST_INC:
18528 /* We have to use vldm / vstm for too-large modes. */
18529 if (nregs > 4)
18531 templ = "v%smia%%?\t%%0!, %%h1";
18532 ops[0] = XEXP (addr, 0);
18534 else
18536 templ = "v%s1.64\t%%h1, %%A0";
18537 ops[0] = mem;
18539 ops[1] = reg;
18540 break;
18542 case PRE_DEC:
18543 /* We have to use vldm / vstm in this case, since there is no
18544 pre-decrement form of the vld1 / vst1 instructions. */
18545 templ = "v%smdb%%?\t%%0!, %%h1";
18546 ops[0] = XEXP (addr, 0);
18547 ops[1] = reg;
18548 break;
18550 case POST_MODIFY:
18551 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18552 gcc_unreachable ();
18554 case LABEL_REF:
18555 case PLUS:
18557 int i;
18558 int overlap = -1;
18559 for (i = 0; i < nregs; i++)
18561 /* We're only using DImode here because it's a convenient size. */
18562 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18563 ops[1] = adjust_address (mem, DImode, 8 * i);
18564 if (reg_overlap_mentioned_p (ops[0], mem))
18566 gcc_assert (overlap == -1);
18567 overlap = i;
18569 else
18571 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18572 output_asm_insn (buff, ops);
18575 if (overlap != -1)
18577 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18578 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18579 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18580 output_asm_insn (buff, ops);
18583 return "";
18586 default:
18587 /* We have to use vldm / vstm for too-large modes. */
18588 if (nregs > 4)
18589 templ = "v%smia%%?\t%%m0, %%h1";
18590 else
18591 templ = "v%s1.64\t%%h1, %%A0";
18593 ops[0] = mem;
18594 ops[1] = reg;
18597 sprintf (buff, templ, load ? "ld" : "st");
18598 output_asm_insn (buff, ops);
18600 return "";
18603 /* Compute and return the length of neon_mov<mode>, where <mode> is
18604 one of VSTRUCT modes: EI, OI, CI or XI. */
18606 arm_attr_length_move_neon (rtx insn)
18608 rtx reg, mem, addr;
18609 int load;
18610 enum machine_mode mode;
18612 extract_insn_cached (insn);
18614 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18616 mode = GET_MODE (recog_data.operand[0]);
18617 switch (mode)
18619 case EImode:
18620 case OImode:
18621 return 8;
18622 case CImode:
18623 return 12;
18624 case XImode:
18625 return 16;
18626 default:
18627 gcc_unreachable ();
18631 load = REG_P (recog_data.operand[0]);
18632 reg = recog_data.operand[!load];
18633 mem = recog_data.operand[load];
18635 gcc_assert (MEM_P (mem));
18637 mode = GET_MODE (reg);
18638 addr = XEXP (mem, 0);
18640 /* Strip off const from addresses like (const (plus (...))). */
18641 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18642 addr = XEXP (addr, 0);
18644 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18646 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18647 return insns * 4;
18649 else
18650 return 4;
18653 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18654 return zero. */
18657 arm_address_offset_is_imm (rtx insn)
18659 rtx mem, addr;
18661 extract_insn_cached (insn);
18663 if (REG_P (recog_data.operand[0]))
18664 return 0;
18666 mem = recog_data.operand[0];
18668 gcc_assert (MEM_P (mem));
18670 addr = XEXP (mem, 0);
18672 if (REG_P (addr)
18673 || (GET_CODE (addr) == PLUS
18674 && REG_P (XEXP (addr, 0))
18675 && CONST_INT_P (XEXP (addr, 1))))
18676 return 1;
18677 else
18678 return 0;
18681 /* Output an ADD r, s, #n where n may be too big for one instruction.
18682 If adding zero to one register, output nothing. */
18683 const char *
18684 output_add_immediate (rtx *operands)
18686 HOST_WIDE_INT n = INTVAL (operands[2]);
18688 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18690 if (n < 0)
18691 output_multi_immediate (operands,
18692 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18693 -n);
18694 else
18695 output_multi_immediate (operands,
18696 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18700 return "";
18703 /* Output a multiple immediate operation.
18704 OPERANDS is the vector of operands referred to in the output patterns.
18705 INSTR1 is the output pattern to use for the first constant.
18706 INSTR2 is the output pattern to use for subsequent constants.
18707 IMMED_OP is the index of the constant slot in OPERANDS.
18708 N is the constant value. */
18709 static const char *
18710 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18711 int immed_op, HOST_WIDE_INT n)
18713 #if HOST_BITS_PER_WIDE_INT > 32
18714 n &= 0xffffffff;
18715 #endif
18717 if (n == 0)
18719 /* Quick and easy output. */
18720 operands[immed_op] = const0_rtx;
18721 output_asm_insn (instr1, operands);
18723 else
18725 int i;
18726 const char * instr = instr1;
18728 /* Note that n is never zero here (which would give no output). */
18729 for (i = 0; i < 32; i += 2)
18731 if (n & (3 << i))
18733 operands[immed_op] = GEN_INT (n & (255 << i));
18734 output_asm_insn (instr, operands);
18735 instr = instr2;
18736 i += 6;
18741 return "";
18744 /* Return the name of a shifter operation. */
18745 static const char *
18746 arm_shift_nmem(enum rtx_code code)
18748 switch (code)
18750 case ASHIFT:
18751 return ARM_LSL_NAME;
18753 case ASHIFTRT:
18754 return "asr";
18756 case LSHIFTRT:
18757 return "lsr";
18759 case ROTATERT:
18760 return "ror";
18762 default:
18763 abort();
18767 /* Return the appropriate ARM instruction for the operation code.
18768 The returned result should not be overwritten. OP is the rtx of the
18769 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18770 was shifted. */
18771 const char *
18772 arithmetic_instr (rtx op, int shift_first_arg)
18774 switch (GET_CODE (op))
18776 case PLUS:
18777 return "add";
18779 case MINUS:
18780 return shift_first_arg ? "rsb" : "sub";
18782 case IOR:
18783 return "orr";
18785 case XOR:
18786 return "eor";
18788 case AND:
18789 return "and";
18791 case ASHIFT:
18792 case ASHIFTRT:
18793 case LSHIFTRT:
18794 case ROTATERT:
18795 return arm_shift_nmem(GET_CODE(op));
18797 default:
18798 gcc_unreachable ();
18802 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18803 for the operation code. The returned result should not be overwritten.
18804 OP is the rtx code of the shift.
18805 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18806 shift. */
18807 static const char *
18808 shift_op (rtx op, HOST_WIDE_INT *amountp)
18810 const char * mnem;
18811 enum rtx_code code = GET_CODE (op);
18813 switch (code)
18815 case ROTATE:
18816 if (!CONST_INT_P (XEXP (op, 1)))
18818 output_operand_lossage ("invalid shift operand");
18819 return NULL;
18822 code = ROTATERT;
18823 *amountp = 32 - INTVAL (XEXP (op, 1));
18824 mnem = "ror";
18825 break;
18827 case ASHIFT:
18828 case ASHIFTRT:
18829 case LSHIFTRT:
18830 case ROTATERT:
18831 mnem = arm_shift_nmem(code);
18832 if (CONST_INT_P (XEXP (op, 1)))
18834 *amountp = INTVAL (XEXP (op, 1));
18836 else if (REG_P (XEXP (op, 1)))
18838 *amountp = -1;
18839 return mnem;
18841 else
18843 output_operand_lossage ("invalid shift operand");
18844 return NULL;
18846 break;
18848 case MULT:
18849 /* We never have to worry about the amount being other than a
18850 power of 2, since this case can never be reloaded from a reg. */
18851 if (!CONST_INT_P (XEXP (op, 1)))
18853 output_operand_lossage ("invalid shift operand");
18854 return NULL;
18857 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18859 /* Amount must be a power of two. */
18860 if (*amountp & (*amountp - 1))
18862 output_operand_lossage ("invalid shift operand");
18863 return NULL;
18866 *amountp = int_log2 (*amountp);
18867 return ARM_LSL_NAME;
18869 default:
18870 output_operand_lossage ("invalid shift operand");
18871 return NULL;
18874 /* This is not 100% correct, but follows from the desire to merge
18875 multiplication by a power of 2 with the recognizer for a
18876 shift. >=32 is not a valid shift for "lsl", so we must try and
18877 output a shift that produces the correct arithmetical result.
18878 Using lsr #32 is identical except for the fact that the carry bit
18879 is not set correctly if we set the flags; but we never use the
18880 carry bit from such an operation, so we can ignore that. */
18881 if (code == ROTATERT)
18882 /* Rotate is just modulo 32. */
18883 *amountp &= 31;
18884 else if (*amountp != (*amountp & 31))
18886 if (code == ASHIFT)
18887 mnem = "lsr";
18888 *amountp = 32;
18891 /* Shifts of 0 are no-ops. */
18892 if (*amountp == 0)
18893 return NULL;
18895 return mnem;
18898 /* Obtain the shift from the POWER of two. */
18900 static HOST_WIDE_INT
18901 int_log2 (HOST_WIDE_INT power)
18903 HOST_WIDE_INT shift = 0;
18905 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18907 gcc_assert (shift <= 31);
18908 shift++;
18911 return shift;
18914 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18915 because /bin/as is horribly restrictive. The judgement about
18916 whether or not each character is 'printable' (and can be output as
18917 is) or not (and must be printed with an octal escape) must be made
18918 with reference to the *host* character set -- the situation is
18919 similar to that discussed in the comments above pp_c_char in
18920 c-pretty-print.c. */
18922 #define MAX_ASCII_LEN 51
18924 void
18925 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18927 int i;
18928 int len_so_far = 0;
18930 fputs ("\t.ascii\t\"", stream);
18932 for (i = 0; i < len; i++)
18934 int c = p[i];
18936 if (len_so_far >= MAX_ASCII_LEN)
18938 fputs ("\"\n\t.ascii\t\"", stream);
18939 len_so_far = 0;
18942 if (ISPRINT (c))
18944 if (c == '\\' || c == '\"')
18946 putc ('\\', stream);
18947 len_so_far++;
18949 putc (c, stream);
18950 len_so_far++;
18952 else
18954 fprintf (stream, "\\%03o", c);
18955 len_so_far += 4;
18959 fputs ("\"\n", stream);
18962 /* Compute the register save mask for registers 0 through 12
18963 inclusive. This code is used by arm_compute_save_reg_mask. */
18965 static unsigned long
18966 arm_compute_save_reg0_reg12_mask (void)
18968 unsigned long func_type = arm_current_func_type ();
18969 unsigned long save_reg_mask = 0;
18970 unsigned int reg;
18972 if (IS_INTERRUPT (func_type))
18974 unsigned int max_reg;
18975 /* Interrupt functions must not corrupt any registers,
18976 even call clobbered ones. If this is a leaf function
18977 we can just examine the registers used by the RTL, but
18978 otherwise we have to assume that whatever function is
18979 called might clobber anything, and so we have to save
18980 all the call-clobbered registers as well. */
18981 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18982 /* FIQ handlers have registers r8 - r12 banked, so
18983 we only need to check r0 - r7, Normal ISRs only
18984 bank r14 and r15, so we must check up to r12.
18985 r13 is the stack pointer which is always preserved,
18986 so we do not need to consider it here. */
18987 max_reg = 7;
18988 else
18989 max_reg = 12;
18991 for (reg = 0; reg <= max_reg; reg++)
18992 if (df_regs_ever_live_p (reg)
18993 || (! crtl->is_leaf && call_used_regs[reg]))
18994 save_reg_mask |= (1 << reg);
18996 /* Also save the pic base register if necessary. */
18997 if (flag_pic
18998 && !TARGET_SINGLE_PIC_BASE
18999 && arm_pic_register != INVALID_REGNUM
19000 && crtl->uses_pic_offset_table)
19001 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19003 else if (IS_VOLATILE(func_type))
19005 /* For noreturn functions we historically omitted register saves
19006 altogether. However this really messes up debugging. As a
19007 compromise save just the frame pointers. Combined with the link
19008 register saved elsewhere this should be sufficient to get
19009 a backtrace. */
19010 if (frame_pointer_needed)
19011 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19012 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19013 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19014 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19015 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19017 else
19019 /* In the normal case we only need to save those registers
19020 which are call saved and which are used by this function. */
19021 for (reg = 0; reg <= 11; reg++)
19022 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19023 save_reg_mask |= (1 << reg);
19025 /* Handle the frame pointer as a special case. */
19026 if (frame_pointer_needed)
19027 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19029 /* If we aren't loading the PIC register,
19030 don't stack it even though it may be live. */
19031 if (flag_pic
19032 && !TARGET_SINGLE_PIC_BASE
19033 && arm_pic_register != INVALID_REGNUM
19034 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19035 || crtl->uses_pic_offset_table))
19036 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19038 /* The prologue will copy SP into R0, so save it. */
19039 if (IS_STACKALIGN (func_type))
19040 save_reg_mask |= 1;
19043 /* Save registers so the exception handler can modify them. */
19044 if (crtl->calls_eh_return)
19046 unsigned int i;
19048 for (i = 0; ; i++)
19050 reg = EH_RETURN_DATA_REGNO (i);
19051 if (reg == INVALID_REGNUM)
19052 break;
19053 save_reg_mask |= 1 << reg;
19057 return save_reg_mask;
19060 /* Return true if r3 is live at the start of the function. */
19062 static bool
19063 arm_r3_live_at_start_p (void)
19065 /* Just look at cfg info, which is still close enough to correct at this
19066 point. This gives false positives for broken functions that might use
19067 uninitialized data that happens to be allocated in r3, but who cares? */
19068 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19071 /* Compute the number of bytes used to store the static chain register on the
19072 stack, above the stack frame. We need to know this accurately to get the
19073 alignment of the rest of the stack frame correct. */
19075 static int
19076 arm_compute_static_chain_stack_bytes (void)
19078 /* See the defining assertion in arm_expand_prologue. */
19079 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19080 && IS_NESTED (arm_current_func_type ())
19081 && arm_r3_live_at_start_p ()
19082 && crtl->args.pretend_args_size == 0)
19083 return 4;
19085 return 0;
19088 /* Compute a bit mask of which registers need to be
19089 saved on the stack for the current function.
19090 This is used by arm_get_frame_offsets, which may add extra registers. */
19092 static unsigned long
19093 arm_compute_save_reg_mask (void)
19095 unsigned int save_reg_mask = 0;
19096 unsigned long func_type = arm_current_func_type ();
19097 unsigned int reg;
19099 if (IS_NAKED (func_type))
19100 /* This should never really happen. */
19101 return 0;
19103 /* If we are creating a stack frame, then we must save the frame pointer,
19104 IP (which will hold the old stack pointer), LR and the PC. */
19105 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19106 save_reg_mask |=
19107 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19108 | (1 << IP_REGNUM)
19109 | (1 << LR_REGNUM)
19110 | (1 << PC_REGNUM);
19112 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19114 /* Decide if we need to save the link register.
19115 Interrupt routines have their own banked link register,
19116 so they never need to save it.
19117 Otherwise if we do not use the link register we do not need to save
19118 it. If we are pushing other registers onto the stack however, we
19119 can save an instruction in the epilogue by pushing the link register
19120 now and then popping it back into the PC. This incurs extra memory
19121 accesses though, so we only do it when optimizing for size, and only
19122 if we know that we will not need a fancy return sequence. */
19123 if (df_regs_ever_live_p (LR_REGNUM)
19124 || (save_reg_mask
19125 && optimize_size
19126 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19127 && !crtl->calls_eh_return))
19128 save_reg_mask |= 1 << LR_REGNUM;
19130 if (cfun->machine->lr_save_eliminated)
19131 save_reg_mask &= ~ (1 << LR_REGNUM);
19133 if (TARGET_REALLY_IWMMXT
19134 && ((bit_count (save_reg_mask)
19135 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19136 arm_compute_static_chain_stack_bytes())
19137 ) % 2) != 0)
19139 /* The total number of registers that are going to be pushed
19140 onto the stack is odd. We need to ensure that the stack
19141 is 64-bit aligned before we start to save iWMMXt registers,
19142 and also before we start to create locals. (A local variable
19143 might be a double or long long which we will load/store using
19144 an iWMMXt instruction). Therefore we need to push another
19145 ARM register, so that the stack will be 64-bit aligned. We
19146 try to avoid using the arg registers (r0 -r3) as they might be
19147 used to pass values in a tail call. */
19148 for (reg = 4; reg <= 12; reg++)
19149 if ((save_reg_mask & (1 << reg)) == 0)
19150 break;
19152 if (reg <= 12)
19153 save_reg_mask |= (1 << reg);
19154 else
19156 cfun->machine->sibcall_blocked = 1;
19157 save_reg_mask |= (1 << 3);
19161 /* We may need to push an additional register for use initializing the
19162 PIC base register. */
19163 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19164 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19166 reg = thumb_find_work_register (1 << 4);
19167 if (!call_used_regs[reg])
19168 save_reg_mask |= (1 << reg);
19171 return save_reg_mask;
19175 /* Compute a bit mask of which registers need to be
19176 saved on the stack for the current function. */
19177 static unsigned long
19178 thumb1_compute_save_reg_mask (void)
19180 unsigned long mask;
19181 unsigned reg;
19183 mask = 0;
19184 for (reg = 0; reg < 12; reg ++)
19185 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19186 mask |= 1 << reg;
19188 if (flag_pic
19189 && !TARGET_SINGLE_PIC_BASE
19190 && arm_pic_register != INVALID_REGNUM
19191 && crtl->uses_pic_offset_table)
19192 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19194 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19195 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19196 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19198 /* LR will also be pushed if any lo regs are pushed. */
19199 if (mask & 0xff || thumb_force_lr_save ())
19200 mask |= (1 << LR_REGNUM);
19202 /* Make sure we have a low work register if we need one.
19203 We will need one if we are going to push a high register,
19204 but we are not currently intending to push a low register. */
19205 if ((mask & 0xff) == 0
19206 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19208 /* Use thumb_find_work_register to choose which register
19209 we will use. If the register is live then we will
19210 have to push it. Use LAST_LO_REGNUM as our fallback
19211 choice for the register to select. */
19212 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19213 /* Make sure the register returned by thumb_find_work_register is
19214 not part of the return value. */
19215 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19216 reg = LAST_LO_REGNUM;
19218 if (! call_used_regs[reg])
19219 mask |= 1 << reg;
19222 /* The 504 below is 8 bytes less than 512 because there are two possible
19223 alignment words. We can't tell here if they will be present or not so we
19224 have to play it safe and assume that they are. */
19225 if ((CALLER_INTERWORKING_SLOT_SIZE +
19226 ROUND_UP_WORD (get_frame_size ()) +
19227 crtl->outgoing_args_size) >= 504)
19229 /* This is the same as the code in thumb1_expand_prologue() which
19230 determines which register to use for stack decrement. */
19231 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19232 if (mask & (1 << reg))
19233 break;
19235 if (reg > LAST_LO_REGNUM)
19237 /* Make sure we have a register available for stack decrement. */
19238 mask |= 1 << LAST_LO_REGNUM;
19242 return mask;
19246 /* Return the number of bytes required to save VFP registers. */
19247 static int
19248 arm_get_vfp_saved_size (void)
19250 unsigned int regno;
19251 int count;
19252 int saved;
19254 saved = 0;
19255 /* Space for saved VFP registers. */
19256 if (TARGET_HARD_FLOAT && TARGET_VFP)
19258 count = 0;
19259 for (regno = FIRST_VFP_REGNUM;
19260 regno < LAST_VFP_REGNUM;
19261 regno += 2)
19263 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19264 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19266 if (count > 0)
19268 /* Workaround ARM10 VFPr1 bug. */
19269 if (count == 2 && !arm_arch6)
19270 count++;
19271 saved += count * 8;
19273 count = 0;
19275 else
19276 count++;
19278 if (count > 0)
19280 if (count == 2 && !arm_arch6)
19281 count++;
19282 saved += count * 8;
19285 return saved;
19289 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19290 everything bar the final return instruction. If simple_return is true,
19291 then do not output epilogue, because it has already been emitted in RTL. */
19292 const char *
19293 output_return_instruction (rtx operand, bool really_return, bool reverse,
19294 bool simple_return)
19296 char conditional[10];
19297 char instr[100];
19298 unsigned reg;
19299 unsigned long live_regs_mask;
19300 unsigned long func_type;
19301 arm_stack_offsets *offsets;
19303 func_type = arm_current_func_type ();
19305 if (IS_NAKED (func_type))
19306 return "";
19308 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19310 /* If this function was declared non-returning, and we have
19311 found a tail call, then we have to trust that the called
19312 function won't return. */
19313 if (really_return)
19315 rtx ops[2];
19317 /* Otherwise, trap an attempted return by aborting. */
19318 ops[0] = operand;
19319 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19320 : "abort");
19321 assemble_external_libcall (ops[1]);
19322 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19325 return "";
19328 gcc_assert (!cfun->calls_alloca || really_return);
19330 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19332 cfun->machine->return_used_this_function = 1;
19334 offsets = arm_get_frame_offsets ();
19335 live_regs_mask = offsets->saved_regs_mask;
19337 if (!simple_return && live_regs_mask)
19339 const char * return_reg;
19341 /* If we do not have any special requirements for function exit
19342 (e.g. interworking) then we can load the return address
19343 directly into the PC. Otherwise we must load it into LR. */
19344 if (really_return
19345 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19346 return_reg = reg_names[PC_REGNUM];
19347 else
19348 return_reg = reg_names[LR_REGNUM];
19350 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19352 /* There are three possible reasons for the IP register
19353 being saved. 1) a stack frame was created, in which case
19354 IP contains the old stack pointer, or 2) an ISR routine
19355 corrupted it, or 3) it was saved to align the stack on
19356 iWMMXt. In case 1, restore IP into SP, otherwise just
19357 restore IP. */
19358 if (frame_pointer_needed)
19360 live_regs_mask &= ~ (1 << IP_REGNUM);
19361 live_regs_mask |= (1 << SP_REGNUM);
19363 else
19364 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19367 /* On some ARM architectures it is faster to use LDR rather than
19368 LDM to load a single register. On other architectures, the
19369 cost is the same. In 26 bit mode, or for exception handlers,
19370 we have to use LDM to load the PC so that the CPSR is also
19371 restored. */
19372 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19373 if (live_regs_mask == (1U << reg))
19374 break;
19376 if (reg <= LAST_ARM_REGNUM
19377 && (reg != LR_REGNUM
19378 || ! really_return
19379 || ! IS_INTERRUPT (func_type)))
19381 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19382 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19384 else
19386 char *p;
19387 int first = 1;
19389 /* Generate the load multiple instruction to restore the
19390 registers. Note we can get here, even if
19391 frame_pointer_needed is true, but only if sp already
19392 points to the base of the saved core registers. */
19393 if (live_regs_mask & (1 << SP_REGNUM))
19395 unsigned HOST_WIDE_INT stack_adjust;
19397 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19398 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19400 if (stack_adjust && arm_arch5 && TARGET_ARM)
19401 if (TARGET_UNIFIED_ASM)
19402 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19403 else
19404 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19405 else
19407 /* If we can't use ldmib (SA110 bug),
19408 then try to pop r3 instead. */
19409 if (stack_adjust)
19410 live_regs_mask |= 1 << 3;
19412 if (TARGET_UNIFIED_ASM)
19413 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19414 else
19415 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19418 else
19419 if (TARGET_UNIFIED_ASM)
19420 sprintf (instr, "pop%s\t{", conditional);
19421 else
19422 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19424 p = instr + strlen (instr);
19426 for (reg = 0; reg <= SP_REGNUM; reg++)
19427 if (live_regs_mask & (1 << reg))
19429 int l = strlen (reg_names[reg]);
19431 if (first)
19432 first = 0;
19433 else
19435 memcpy (p, ", ", 2);
19436 p += 2;
19439 memcpy (p, "%|", 2);
19440 memcpy (p + 2, reg_names[reg], l);
19441 p += l + 2;
19444 if (live_regs_mask & (1 << LR_REGNUM))
19446 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19447 /* If returning from an interrupt, restore the CPSR. */
19448 if (IS_INTERRUPT (func_type))
19449 strcat (p, "^");
19451 else
19452 strcpy (p, "}");
19455 output_asm_insn (instr, & operand);
19457 /* See if we need to generate an extra instruction to
19458 perform the actual function return. */
19459 if (really_return
19460 && func_type != ARM_FT_INTERWORKED
19461 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19463 /* The return has already been handled
19464 by loading the LR into the PC. */
19465 return "";
19469 if (really_return)
19471 switch ((int) ARM_FUNC_TYPE (func_type))
19473 case ARM_FT_ISR:
19474 case ARM_FT_FIQ:
19475 /* ??? This is wrong for unified assembly syntax. */
19476 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19477 break;
19479 case ARM_FT_INTERWORKED:
19480 sprintf (instr, "bx%s\t%%|lr", conditional);
19481 break;
19483 case ARM_FT_EXCEPTION:
19484 /* ??? This is wrong for unified assembly syntax. */
19485 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19486 break;
19488 default:
19489 /* Use bx if it's available. */
19490 if (arm_arch5 || arm_arch4t)
19491 sprintf (instr, "bx%s\t%%|lr", conditional);
19492 else
19493 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19494 break;
19497 output_asm_insn (instr, & operand);
19500 return "";
19503 /* Write the function name into the code section, directly preceding
19504 the function prologue.
19506 Code will be output similar to this:
19508 .ascii "arm_poke_function_name", 0
19509 .align
19511 .word 0xff000000 + (t1 - t0)
19512 arm_poke_function_name
19513 mov ip, sp
19514 stmfd sp!, {fp, ip, lr, pc}
19515 sub fp, ip, #4
19517 When performing a stack backtrace, code can inspect the value
19518 of 'pc' stored at 'fp' + 0. If the trace function then looks
19519 at location pc - 12 and the top 8 bits are set, then we know
19520 that there is a function name embedded immediately preceding this
19521 location and has length ((pc[-3]) & 0xff000000).
19523 We assume that pc is declared as a pointer to an unsigned long.
19525 It is of no benefit to output the function name if we are assembling
19526 a leaf function. These function types will not contain a stack
19527 backtrace structure, therefore it is not possible to determine the
19528 function name. */
19529 void
19530 arm_poke_function_name (FILE *stream, const char *name)
19532 unsigned long alignlength;
19533 unsigned long length;
19534 rtx x;
19536 length = strlen (name) + 1;
19537 alignlength = ROUND_UP_WORD (length);
19539 ASM_OUTPUT_ASCII (stream, name, length);
19540 ASM_OUTPUT_ALIGN (stream, 2);
19541 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19542 assemble_aligned_integer (UNITS_PER_WORD, x);
19545 /* Place some comments into the assembler stream
19546 describing the current function. */
19547 static void
19548 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19550 unsigned long func_type;
19552 /* ??? Do we want to print some of the below anyway? */
19553 if (TARGET_THUMB1)
19554 return;
19556 /* Sanity check. */
19557 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19559 func_type = arm_current_func_type ();
19561 switch ((int) ARM_FUNC_TYPE (func_type))
19563 default:
19564 case ARM_FT_NORMAL:
19565 break;
19566 case ARM_FT_INTERWORKED:
19567 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19568 break;
19569 case ARM_FT_ISR:
19570 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19571 break;
19572 case ARM_FT_FIQ:
19573 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19574 break;
19575 case ARM_FT_EXCEPTION:
19576 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19577 break;
19580 if (IS_NAKED (func_type))
19581 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19583 if (IS_VOLATILE (func_type))
19584 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19586 if (IS_NESTED (func_type))
19587 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19588 if (IS_STACKALIGN (func_type))
19589 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19591 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19592 crtl->args.size,
19593 crtl->args.pretend_args_size, frame_size);
19595 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19596 frame_pointer_needed,
19597 cfun->machine->uses_anonymous_args);
19599 if (cfun->machine->lr_save_eliminated)
19600 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19602 if (crtl->calls_eh_return)
19603 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19607 static void
19608 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19609 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19611 arm_stack_offsets *offsets;
19613 if (TARGET_THUMB1)
19615 int regno;
19617 /* Emit any call-via-reg trampolines that are needed for v4t support
19618 of call_reg and call_value_reg type insns. */
19619 for (regno = 0; regno < LR_REGNUM; regno++)
19621 rtx label = cfun->machine->call_via[regno];
19623 if (label != NULL)
19625 switch_to_section (function_section (current_function_decl));
19626 targetm.asm_out.internal_label (asm_out_file, "L",
19627 CODE_LABEL_NUMBER (label));
19628 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19632 /* ??? Probably not safe to set this here, since it assumes that a
19633 function will be emitted as assembly immediately after we generate
19634 RTL for it. This does not happen for inline functions. */
19635 cfun->machine->return_used_this_function = 0;
19637 else /* TARGET_32BIT */
19639 /* We need to take into account any stack-frame rounding. */
19640 offsets = arm_get_frame_offsets ();
19642 gcc_assert (!use_return_insn (FALSE, NULL)
19643 || (cfun->machine->return_used_this_function != 0)
19644 || offsets->saved_regs == offsets->outgoing_args
19645 || frame_pointer_needed);
19649 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19650 STR and STRD. If an even number of registers are being pushed, one
19651 or more STRD patterns are created for each register pair. If an
19652 odd number of registers are pushed, emit an initial STR followed by
19653 as many STRD instructions as are needed. This works best when the
19654 stack is initially 64-bit aligned (the normal case), since it
19655 ensures that each STRD is also 64-bit aligned. */
19656 static void
19657 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19659 int num_regs = 0;
19660 int i;
19661 int regno;
19662 rtx par = NULL_RTX;
19663 rtx dwarf = NULL_RTX;
19664 rtx tmp;
19665 bool first = true;
19667 num_regs = bit_count (saved_regs_mask);
19669 /* Must be at least one register to save, and can't save SP or PC. */
19670 gcc_assert (num_regs > 0 && num_regs <= 14);
19671 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19672 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19674 /* Create sequence for DWARF info. All the frame-related data for
19675 debugging is held in this wrapper. */
19676 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19678 /* Describe the stack adjustment. */
19679 tmp = gen_rtx_SET (VOIDmode,
19680 stack_pointer_rtx,
19681 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19682 RTX_FRAME_RELATED_P (tmp) = 1;
19683 XVECEXP (dwarf, 0, 0) = tmp;
19685 /* Find the first register. */
19686 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19689 i = 0;
19691 /* If there's an odd number of registers to push. Start off by
19692 pushing a single register. This ensures that subsequent strd
19693 operations are dword aligned (assuming that SP was originally
19694 64-bit aligned). */
19695 if ((num_regs & 1) != 0)
19697 rtx reg, mem, insn;
19699 reg = gen_rtx_REG (SImode, regno);
19700 if (num_regs == 1)
19701 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19702 stack_pointer_rtx));
19703 else
19704 mem = gen_frame_mem (Pmode,
19705 gen_rtx_PRE_MODIFY
19706 (Pmode, stack_pointer_rtx,
19707 plus_constant (Pmode, stack_pointer_rtx,
19708 -4 * num_regs)));
19710 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19711 RTX_FRAME_RELATED_P (tmp) = 1;
19712 insn = emit_insn (tmp);
19713 RTX_FRAME_RELATED_P (insn) = 1;
19714 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19715 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19716 reg);
19717 RTX_FRAME_RELATED_P (tmp) = 1;
19718 i++;
19719 regno++;
19720 XVECEXP (dwarf, 0, i) = tmp;
19721 first = false;
19724 while (i < num_regs)
19725 if (saved_regs_mask & (1 << regno))
19727 rtx reg1, reg2, mem1, mem2;
19728 rtx tmp0, tmp1, tmp2;
19729 int regno2;
19731 /* Find the register to pair with this one. */
19732 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19733 regno2++)
19736 reg1 = gen_rtx_REG (SImode, regno);
19737 reg2 = gen_rtx_REG (SImode, regno2);
19739 if (first)
19741 rtx insn;
19743 first = false;
19744 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19745 stack_pointer_rtx,
19746 -4 * num_regs));
19747 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19748 stack_pointer_rtx,
19749 -4 * (num_regs - 1)));
19750 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19751 plus_constant (Pmode, stack_pointer_rtx,
19752 -4 * (num_regs)));
19753 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19754 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19755 RTX_FRAME_RELATED_P (tmp0) = 1;
19756 RTX_FRAME_RELATED_P (tmp1) = 1;
19757 RTX_FRAME_RELATED_P (tmp2) = 1;
19758 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19759 XVECEXP (par, 0, 0) = tmp0;
19760 XVECEXP (par, 0, 1) = tmp1;
19761 XVECEXP (par, 0, 2) = tmp2;
19762 insn = emit_insn (par);
19763 RTX_FRAME_RELATED_P (insn) = 1;
19764 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19766 else
19768 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19769 stack_pointer_rtx,
19770 4 * i));
19771 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19772 stack_pointer_rtx,
19773 4 * (i + 1)));
19774 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19775 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19776 RTX_FRAME_RELATED_P (tmp1) = 1;
19777 RTX_FRAME_RELATED_P (tmp2) = 1;
19778 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19779 XVECEXP (par, 0, 0) = tmp1;
19780 XVECEXP (par, 0, 1) = tmp2;
19781 emit_insn (par);
19784 /* Create unwind information. This is an approximation. */
19785 tmp1 = gen_rtx_SET (VOIDmode,
19786 gen_frame_mem (Pmode,
19787 plus_constant (Pmode,
19788 stack_pointer_rtx,
19789 4 * i)),
19790 reg1);
19791 tmp2 = gen_rtx_SET (VOIDmode,
19792 gen_frame_mem (Pmode,
19793 plus_constant (Pmode,
19794 stack_pointer_rtx,
19795 4 * (i + 1))),
19796 reg2);
19798 RTX_FRAME_RELATED_P (tmp1) = 1;
19799 RTX_FRAME_RELATED_P (tmp2) = 1;
19800 XVECEXP (dwarf, 0, i + 1) = tmp1;
19801 XVECEXP (dwarf, 0, i + 2) = tmp2;
19802 i += 2;
19803 regno = regno2 + 1;
19805 else
19806 regno++;
19808 return;
19811 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19812 whenever possible, otherwise it emits single-word stores. The first store
19813 also allocates stack space for all saved registers, using writeback with
19814 post-addressing mode. All other stores use offset addressing. If no STRD
19815 can be emitted, this function emits a sequence of single-word stores,
19816 and not an STM as before, because single-word stores provide more freedom
19817 scheduling and can be turned into an STM by peephole optimizations. */
19818 static void
19819 arm_emit_strd_push (unsigned long saved_regs_mask)
19821 int num_regs = 0;
19822 int i, j, dwarf_index = 0;
19823 int offset = 0;
19824 rtx dwarf = NULL_RTX;
19825 rtx insn = NULL_RTX;
19826 rtx tmp, mem;
19828 /* TODO: A more efficient code can be emitted by changing the
19829 layout, e.g., first push all pairs that can use STRD to keep the
19830 stack aligned, and then push all other registers. */
19831 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19832 if (saved_regs_mask & (1 << i))
19833 num_regs++;
19835 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19836 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19837 gcc_assert (num_regs > 0);
19839 /* Create sequence for DWARF info. */
19840 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19842 /* For dwarf info, we generate explicit stack update. */
19843 tmp = gen_rtx_SET (VOIDmode,
19844 stack_pointer_rtx,
19845 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19846 RTX_FRAME_RELATED_P (tmp) = 1;
19847 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19849 /* Save registers. */
19850 offset = - 4 * num_regs;
19851 j = 0;
19852 while (j <= LAST_ARM_REGNUM)
19853 if (saved_regs_mask & (1 << j))
19855 if ((j % 2 == 0)
19856 && (saved_regs_mask & (1 << (j + 1))))
19858 /* Current register and previous register form register pair for
19859 which STRD can be generated. */
19860 if (offset < 0)
19862 /* Allocate stack space for all saved registers. */
19863 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19864 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19865 mem = gen_frame_mem (DImode, tmp);
19866 offset = 0;
19868 else if (offset > 0)
19869 mem = gen_frame_mem (DImode,
19870 plus_constant (Pmode,
19871 stack_pointer_rtx,
19872 offset));
19873 else
19874 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19876 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19877 RTX_FRAME_RELATED_P (tmp) = 1;
19878 tmp = emit_insn (tmp);
19880 /* Record the first store insn. */
19881 if (dwarf_index == 1)
19882 insn = tmp;
19884 /* Generate dwarf info. */
19885 mem = gen_frame_mem (SImode,
19886 plus_constant (Pmode,
19887 stack_pointer_rtx,
19888 offset));
19889 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19890 RTX_FRAME_RELATED_P (tmp) = 1;
19891 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19893 mem = gen_frame_mem (SImode,
19894 plus_constant (Pmode,
19895 stack_pointer_rtx,
19896 offset + 4));
19897 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19898 RTX_FRAME_RELATED_P (tmp) = 1;
19899 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19901 offset += 8;
19902 j += 2;
19904 else
19906 /* Emit a single word store. */
19907 if (offset < 0)
19909 /* Allocate stack space for all saved registers. */
19910 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19911 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19912 mem = gen_frame_mem (SImode, tmp);
19913 offset = 0;
19915 else if (offset > 0)
19916 mem = gen_frame_mem (SImode,
19917 plus_constant (Pmode,
19918 stack_pointer_rtx,
19919 offset));
19920 else
19921 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19923 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19924 RTX_FRAME_RELATED_P (tmp) = 1;
19925 tmp = emit_insn (tmp);
19927 /* Record the first store insn. */
19928 if (dwarf_index == 1)
19929 insn = tmp;
19931 /* Generate dwarf info. */
19932 mem = gen_frame_mem (SImode,
19933 plus_constant(Pmode,
19934 stack_pointer_rtx,
19935 offset));
19936 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19937 RTX_FRAME_RELATED_P (tmp) = 1;
19938 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19940 offset += 4;
19941 j += 1;
19944 else
19945 j++;
19947 /* Attach dwarf info to the first insn we generate. */
19948 gcc_assert (insn != NULL_RTX);
19949 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19950 RTX_FRAME_RELATED_P (insn) = 1;
19953 /* Generate and emit an insn that we will recognize as a push_multi.
19954 Unfortunately, since this insn does not reflect very well the actual
19955 semantics of the operation, we need to annotate the insn for the benefit
19956 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19957 MASK for registers that should be annotated for DWARF2 frame unwind
19958 information. */
19959 static rtx
19960 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19962 int num_regs = 0;
19963 int num_dwarf_regs = 0;
19964 int i, j;
19965 rtx par;
19966 rtx dwarf;
19967 int dwarf_par_index;
19968 rtx tmp, reg;
19970 /* We don't record the PC in the dwarf frame information. */
19971 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19973 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19975 if (mask & (1 << i))
19976 num_regs++;
19977 if (dwarf_regs_mask & (1 << i))
19978 num_dwarf_regs++;
19981 gcc_assert (num_regs && num_regs <= 16);
19982 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19984 /* For the body of the insn we are going to generate an UNSPEC in
19985 parallel with several USEs. This allows the insn to be recognized
19986 by the push_multi pattern in the arm.md file.
19988 The body of the insn looks something like this:
19990 (parallel [
19991 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19992 (const_int:SI <num>)))
19993 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19994 (use (reg:SI XX))
19995 (use (reg:SI YY))
19999 For the frame note however, we try to be more explicit and actually
20000 show each register being stored into the stack frame, plus a (single)
20001 decrement of the stack pointer. We do it this way in order to be
20002 friendly to the stack unwinding code, which only wants to see a single
20003 stack decrement per instruction. The RTL we generate for the note looks
20004 something like this:
20006 (sequence [
20007 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20008 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20009 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20010 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20014 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20015 instead we'd have a parallel expression detailing all
20016 the stores to the various memory addresses so that debug
20017 information is more up-to-date. Remember however while writing
20018 this to take care of the constraints with the push instruction.
20020 Note also that this has to be taken care of for the VFP registers.
20022 For more see PR43399. */
20024 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20025 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20026 dwarf_par_index = 1;
20028 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20030 if (mask & (1 << i))
20032 reg = gen_rtx_REG (SImode, i);
20034 XVECEXP (par, 0, 0)
20035 = gen_rtx_SET (VOIDmode,
20036 gen_frame_mem
20037 (BLKmode,
20038 gen_rtx_PRE_MODIFY (Pmode,
20039 stack_pointer_rtx,
20040 plus_constant
20041 (Pmode, stack_pointer_rtx,
20042 -4 * num_regs))
20044 gen_rtx_UNSPEC (BLKmode,
20045 gen_rtvec (1, reg),
20046 UNSPEC_PUSH_MULT));
20048 if (dwarf_regs_mask & (1 << i))
20050 tmp = gen_rtx_SET (VOIDmode,
20051 gen_frame_mem (SImode, stack_pointer_rtx),
20052 reg);
20053 RTX_FRAME_RELATED_P (tmp) = 1;
20054 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20057 break;
20061 for (j = 1, i++; j < num_regs; i++)
20063 if (mask & (1 << i))
20065 reg = gen_rtx_REG (SImode, i);
20067 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20069 if (dwarf_regs_mask & (1 << i))
20072 = gen_rtx_SET (VOIDmode,
20073 gen_frame_mem
20074 (SImode,
20075 plus_constant (Pmode, stack_pointer_rtx,
20076 4 * j)),
20077 reg);
20078 RTX_FRAME_RELATED_P (tmp) = 1;
20079 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20082 j++;
20086 par = emit_insn (par);
20088 tmp = gen_rtx_SET (VOIDmode,
20089 stack_pointer_rtx,
20090 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20091 RTX_FRAME_RELATED_P (tmp) = 1;
20092 XVECEXP (dwarf, 0, 0) = tmp;
20094 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20096 return par;
20099 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20100 SIZE is the offset to be adjusted.
20101 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20102 static void
20103 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20105 rtx dwarf;
20107 RTX_FRAME_RELATED_P (insn) = 1;
20108 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20109 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20112 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20113 SAVED_REGS_MASK shows which registers need to be restored.
20115 Unfortunately, since this insn does not reflect very well the actual
20116 semantics of the operation, we need to annotate the insn for the benefit
20117 of DWARF2 frame unwind information. */
20118 static void
20119 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20121 int num_regs = 0;
20122 int i, j;
20123 rtx par;
20124 rtx dwarf = NULL_RTX;
20125 rtx tmp, reg;
20126 bool return_in_pc;
20127 int offset_adj;
20128 int emit_update;
20130 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20131 offset_adj = return_in_pc ? 1 : 0;
20132 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20133 if (saved_regs_mask & (1 << i))
20134 num_regs++;
20136 gcc_assert (num_regs && num_regs <= 16);
20138 /* If SP is in reglist, then we don't emit SP update insn. */
20139 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20141 /* The parallel needs to hold num_regs SETs
20142 and one SET for the stack update. */
20143 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20145 if (return_in_pc)
20147 tmp = ret_rtx;
20148 XVECEXP (par, 0, 0) = tmp;
20151 if (emit_update)
20153 /* Increment the stack pointer, based on there being
20154 num_regs 4-byte registers to restore. */
20155 tmp = gen_rtx_SET (VOIDmode,
20156 stack_pointer_rtx,
20157 plus_constant (Pmode,
20158 stack_pointer_rtx,
20159 4 * num_regs));
20160 RTX_FRAME_RELATED_P (tmp) = 1;
20161 XVECEXP (par, 0, offset_adj) = tmp;
20164 /* Now restore every reg, which may include PC. */
20165 for (j = 0, i = 0; j < num_regs; i++)
20166 if (saved_regs_mask & (1 << i))
20168 reg = gen_rtx_REG (SImode, i);
20169 if ((num_regs == 1) && emit_update && !return_in_pc)
20171 /* Emit single load with writeback. */
20172 tmp = gen_frame_mem (SImode,
20173 gen_rtx_POST_INC (Pmode,
20174 stack_pointer_rtx));
20175 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20176 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20177 return;
20180 tmp = gen_rtx_SET (VOIDmode,
20181 reg,
20182 gen_frame_mem
20183 (SImode,
20184 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20185 RTX_FRAME_RELATED_P (tmp) = 1;
20186 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20188 /* We need to maintain a sequence for DWARF info too. As dwarf info
20189 should not have PC, skip PC. */
20190 if (i != PC_REGNUM)
20191 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20193 j++;
20196 if (return_in_pc)
20197 par = emit_jump_insn (par);
20198 else
20199 par = emit_insn (par);
20201 REG_NOTES (par) = dwarf;
20202 if (!return_in_pc)
20203 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20204 stack_pointer_rtx, stack_pointer_rtx);
20207 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20208 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20210 Unfortunately, since this insn does not reflect very well the actual
20211 semantics of the operation, we need to annotate the insn for the benefit
20212 of DWARF2 frame unwind information. */
20213 static void
20214 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20216 int i, j;
20217 rtx par;
20218 rtx dwarf = NULL_RTX;
20219 rtx tmp, reg;
20221 gcc_assert (num_regs && num_regs <= 32);
20223 /* Workaround ARM10 VFPr1 bug. */
20224 if (num_regs == 2 && !arm_arch6)
20226 if (first_reg == 15)
20227 first_reg--;
20229 num_regs++;
20232 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20233 there could be up to 32 D-registers to restore.
20234 If there are more than 16 D-registers, make two recursive calls,
20235 each of which emits one pop_multi instruction. */
20236 if (num_regs > 16)
20238 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20239 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20240 return;
20243 /* The parallel needs to hold num_regs SETs
20244 and one SET for the stack update. */
20245 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20247 /* Increment the stack pointer, based on there being
20248 num_regs 8-byte registers to restore. */
20249 tmp = gen_rtx_SET (VOIDmode,
20250 base_reg,
20251 plus_constant (Pmode, base_reg, 8 * num_regs));
20252 RTX_FRAME_RELATED_P (tmp) = 1;
20253 XVECEXP (par, 0, 0) = tmp;
20255 /* Now show every reg that will be restored, using a SET for each. */
20256 for (j = 0, i=first_reg; j < num_regs; i += 2)
20258 reg = gen_rtx_REG (DFmode, i);
20260 tmp = gen_rtx_SET (VOIDmode,
20261 reg,
20262 gen_frame_mem
20263 (DFmode,
20264 plus_constant (Pmode, base_reg, 8 * j)));
20265 RTX_FRAME_RELATED_P (tmp) = 1;
20266 XVECEXP (par, 0, j + 1) = tmp;
20268 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20270 j++;
20273 par = emit_insn (par);
20274 REG_NOTES (par) = dwarf;
20276 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20277 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20279 RTX_FRAME_RELATED_P (par) = 1;
20280 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20282 else
20283 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20284 base_reg, base_reg);
20287 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20288 number of registers are being popped, multiple LDRD patterns are created for
20289 all register pairs. If odd number of registers are popped, last register is
20290 loaded by using LDR pattern. */
20291 static void
20292 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20294 int num_regs = 0;
20295 int i, j;
20296 rtx par = NULL_RTX;
20297 rtx dwarf = NULL_RTX;
20298 rtx tmp, reg, tmp1;
20299 bool return_in_pc;
20301 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20302 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20303 if (saved_regs_mask & (1 << i))
20304 num_regs++;
20306 gcc_assert (num_regs && num_regs <= 16);
20308 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20309 to be popped. So, if num_regs is even, now it will become odd,
20310 and we can generate pop with PC. If num_regs is odd, it will be
20311 even now, and ldr with return can be generated for PC. */
20312 if (return_in_pc)
20313 num_regs--;
20315 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20317 /* Var j iterates over all the registers to gather all the registers in
20318 saved_regs_mask. Var i gives index of saved registers in stack frame.
20319 A PARALLEL RTX of register-pair is created here, so that pattern for
20320 LDRD can be matched. As PC is always last register to be popped, and
20321 we have already decremented num_regs if PC, we don't have to worry
20322 about PC in this loop. */
20323 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20324 if (saved_regs_mask & (1 << j))
20326 /* Create RTX for memory load. */
20327 reg = gen_rtx_REG (SImode, j);
20328 tmp = gen_rtx_SET (SImode,
20329 reg,
20330 gen_frame_mem (SImode,
20331 plus_constant (Pmode,
20332 stack_pointer_rtx, 4 * i)));
20333 RTX_FRAME_RELATED_P (tmp) = 1;
20335 if (i % 2 == 0)
20337 /* When saved-register index (i) is even, the RTX to be emitted is
20338 yet to be created. Hence create it first. The LDRD pattern we
20339 are generating is :
20340 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20341 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20342 where target registers need not be consecutive. */
20343 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20344 dwarf = NULL_RTX;
20347 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20348 added as 0th element and if i is odd, reg_i is added as 1st element
20349 of LDRD pattern shown above. */
20350 XVECEXP (par, 0, (i % 2)) = tmp;
20351 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20353 if ((i % 2) == 1)
20355 /* When saved-register index (i) is odd, RTXs for both the registers
20356 to be loaded are generated in above given LDRD pattern, and the
20357 pattern can be emitted now. */
20358 par = emit_insn (par);
20359 REG_NOTES (par) = dwarf;
20360 RTX_FRAME_RELATED_P (par) = 1;
20363 i++;
20366 /* If the number of registers pushed is odd AND return_in_pc is false OR
20367 number of registers are even AND return_in_pc is true, last register is
20368 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20369 then LDR with post increment. */
20371 /* Increment the stack pointer, based on there being
20372 num_regs 4-byte registers to restore. */
20373 tmp = gen_rtx_SET (VOIDmode,
20374 stack_pointer_rtx,
20375 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20376 RTX_FRAME_RELATED_P (tmp) = 1;
20377 tmp = emit_insn (tmp);
20378 if (!return_in_pc)
20380 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20381 stack_pointer_rtx, stack_pointer_rtx);
20384 dwarf = NULL_RTX;
20386 if (((num_regs % 2) == 1 && !return_in_pc)
20387 || ((num_regs % 2) == 0 && return_in_pc))
20389 /* Scan for the single register to be popped. Skip until the saved
20390 register is found. */
20391 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20393 /* Gen LDR with post increment here. */
20394 tmp1 = gen_rtx_MEM (SImode,
20395 gen_rtx_POST_INC (SImode,
20396 stack_pointer_rtx));
20397 set_mem_alias_set (tmp1, get_frame_alias_set ());
20399 reg = gen_rtx_REG (SImode, j);
20400 tmp = gen_rtx_SET (SImode, reg, tmp1);
20401 RTX_FRAME_RELATED_P (tmp) = 1;
20402 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20404 if (return_in_pc)
20406 /* If return_in_pc, j must be PC_REGNUM. */
20407 gcc_assert (j == PC_REGNUM);
20408 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20409 XVECEXP (par, 0, 0) = ret_rtx;
20410 XVECEXP (par, 0, 1) = tmp;
20411 par = emit_jump_insn (par);
20413 else
20415 par = emit_insn (tmp);
20416 REG_NOTES (par) = dwarf;
20417 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20418 stack_pointer_rtx, stack_pointer_rtx);
20422 else if ((num_regs % 2) == 1 && return_in_pc)
20424 /* There are 2 registers to be popped. So, generate the pattern
20425 pop_multiple_with_stack_update_and_return to pop in PC. */
20426 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20429 return;
20432 /* LDRD in ARM mode needs consecutive registers as operands. This function
20433 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20434 offset addressing and then generates one separate stack udpate. This provides
20435 more scheduling freedom, compared to writeback on every load. However,
20436 if the function returns using load into PC directly
20437 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20438 before the last load. TODO: Add a peephole optimization to recognize
20439 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20440 peephole optimization to merge the load at stack-offset zero
20441 with the stack update instruction using load with writeback
20442 in post-index addressing mode. */
20443 static void
20444 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20446 int j = 0;
20447 int offset = 0;
20448 rtx par = NULL_RTX;
20449 rtx dwarf = NULL_RTX;
20450 rtx tmp, mem;
20452 /* Restore saved registers. */
20453 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20454 j = 0;
20455 while (j <= LAST_ARM_REGNUM)
20456 if (saved_regs_mask & (1 << j))
20458 if ((j % 2) == 0
20459 && (saved_regs_mask & (1 << (j + 1)))
20460 && (j + 1) != PC_REGNUM)
20462 /* Current register and next register form register pair for which
20463 LDRD can be generated. PC is always the last register popped, and
20464 we handle it separately. */
20465 if (offset > 0)
20466 mem = gen_frame_mem (DImode,
20467 plus_constant (Pmode,
20468 stack_pointer_rtx,
20469 offset));
20470 else
20471 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20473 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20474 tmp = emit_insn (tmp);
20475 RTX_FRAME_RELATED_P (tmp) = 1;
20477 /* Generate dwarf info. */
20479 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20480 gen_rtx_REG (SImode, j),
20481 NULL_RTX);
20482 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20483 gen_rtx_REG (SImode, j + 1),
20484 dwarf);
20486 REG_NOTES (tmp) = dwarf;
20488 offset += 8;
20489 j += 2;
20491 else if (j != PC_REGNUM)
20493 /* Emit a single word load. */
20494 if (offset > 0)
20495 mem = gen_frame_mem (SImode,
20496 plus_constant (Pmode,
20497 stack_pointer_rtx,
20498 offset));
20499 else
20500 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20502 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20503 tmp = emit_insn (tmp);
20504 RTX_FRAME_RELATED_P (tmp) = 1;
20506 /* Generate dwarf info. */
20507 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20508 gen_rtx_REG (SImode, j),
20509 NULL_RTX);
20511 offset += 4;
20512 j += 1;
20514 else /* j == PC_REGNUM */
20515 j++;
20517 else
20518 j++;
20520 /* Update the stack. */
20521 if (offset > 0)
20523 tmp = gen_rtx_SET (Pmode,
20524 stack_pointer_rtx,
20525 plus_constant (Pmode,
20526 stack_pointer_rtx,
20527 offset));
20528 tmp = emit_insn (tmp);
20529 arm_add_cfa_adjust_cfa_note (tmp, offset,
20530 stack_pointer_rtx, stack_pointer_rtx);
20531 offset = 0;
20534 if (saved_regs_mask & (1 << PC_REGNUM))
20536 /* Only PC is to be popped. */
20537 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20538 XVECEXP (par, 0, 0) = ret_rtx;
20539 tmp = gen_rtx_SET (SImode,
20540 gen_rtx_REG (SImode, PC_REGNUM),
20541 gen_frame_mem (SImode,
20542 gen_rtx_POST_INC (SImode,
20543 stack_pointer_rtx)));
20544 RTX_FRAME_RELATED_P (tmp) = 1;
20545 XVECEXP (par, 0, 1) = tmp;
20546 par = emit_jump_insn (par);
20548 /* Generate dwarf info. */
20549 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20550 gen_rtx_REG (SImode, PC_REGNUM),
20551 NULL_RTX);
20552 REG_NOTES (par) = dwarf;
20553 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20554 stack_pointer_rtx, stack_pointer_rtx);
20558 /* Calculate the size of the return value that is passed in registers. */
20559 static unsigned
20560 arm_size_return_regs (void)
20562 enum machine_mode mode;
20564 if (crtl->return_rtx != 0)
20565 mode = GET_MODE (crtl->return_rtx);
20566 else
20567 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20569 return GET_MODE_SIZE (mode);
20572 /* Return true if the current function needs to save/restore LR. */
20573 static bool
20574 thumb_force_lr_save (void)
20576 return !cfun->machine->lr_save_eliminated
20577 && (!leaf_function_p ()
20578 || thumb_far_jump_used_p ()
20579 || df_regs_ever_live_p (LR_REGNUM));
20582 /* We do not know if r3 will be available because
20583 we do have an indirect tailcall happening in this
20584 particular case. */
20585 static bool
20586 is_indirect_tailcall_p (rtx call)
20588 rtx pat = PATTERN (call);
20590 /* Indirect tail call. */
20591 pat = XVECEXP (pat, 0, 0);
20592 if (GET_CODE (pat) == SET)
20593 pat = SET_SRC (pat);
20595 pat = XEXP (XEXP (pat, 0), 0);
20596 return REG_P (pat);
20599 /* Return true if r3 is used by any of the tail call insns in the
20600 current function. */
20601 static bool
20602 any_sibcall_could_use_r3 (void)
20604 edge_iterator ei;
20605 edge e;
20607 if (!crtl->tail_call_emit)
20608 return false;
20609 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20610 if (e->flags & EDGE_SIBCALL)
20612 rtx call = BB_END (e->src);
20613 if (!CALL_P (call))
20614 call = prev_nonnote_nondebug_insn (call);
20615 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20616 if (find_regno_fusage (call, USE, 3)
20617 || is_indirect_tailcall_p (call))
20618 return true;
20620 return false;
20624 /* Compute the distance from register FROM to register TO.
20625 These can be the arg pointer (26), the soft frame pointer (25),
20626 the stack pointer (13) or the hard frame pointer (11).
20627 In thumb mode r7 is used as the soft frame pointer, if needed.
20628 Typical stack layout looks like this:
20630 old stack pointer -> | |
20631 ----
20632 | | \
20633 | | saved arguments for
20634 | | vararg functions
20635 | | /
20637 hard FP & arg pointer -> | | \
20638 | | stack
20639 | | frame
20640 | | /
20642 | | \
20643 | | call saved
20644 | | registers
20645 soft frame pointer -> | | /
20647 | | \
20648 | | local
20649 | | variables
20650 locals base pointer -> | | /
20652 | | \
20653 | | outgoing
20654 | | arguments
20655 current stack pointer -> | | /
20658 For a given function some or all of these stack components
20659 may not be needed, giving rise to the possibility of
20660 eliminating some of the registers.
20662 The values returned by this function must reflect the behavior
20663 of arm_expand_prologue() and arm_compute_save_reg_mask().
20665 The sign of the number returned reflects the direction of stack
20666 growth, so the values are positive for all eliminations except
20667 from the soft frame pointer to the hard frame pointer.
20669 SFP may point just inside the local variables block to ensure correct
20670 alignment. */
20673 /* Calculate stack offsets. These are used to calculate register elimination
20674 offsets and in prologue/epilogue code. Also calculates which registers
20675 should be saved. */
20677 static arm_stack_offsets *
20678 arm_get_frame_offsets (void)
20680 struct arm_stack_offsets *offsets;
20681 unsigned long func_type;
20682 int leaf;
20683 int saved;
20684 int core_saved;
20685 HOST_WIDE_INT frame_size;
20686 int i;
20688 offsets = &cfun->machine->stack_offsets;
20690 /* We need to know if we are a leaf function. Unfortunately, it
20691 is possible to be called after start_sequence has been called,
20692 which causes get_insns to return the insns for the sequence,
20693 not the function, which will cause leaf_function_p to return
20694 the incorrect result.
20696 to know about leaf functions once reload has completed, and the
20697 frame size cannot be changed after that time, so we can safely
20698 use the cached value. */
20700 if (reload_completed)
20701 return offsets;
20703 /* Initially this is the size of the local variables. It will translated
20704 into an offset once we have determined the size of preceding data. */
20705 frame_size = ROUND_UP_WORD (get_frame_size ());
20707 leaf = leaf_function_p ();
20709 /* Space for variadic functions. */
20710 offsets->saved_args = crtl->args.pretend_args_size;
20712 /* In Thumb mode this is incorrect, but never used. */
20713 offsets->frame
20714 = (offsets->saved_args
20715 + arm_compute_static_chain_stack_bytes ()
20716 + (frame_pointer_needed ? 4 : 0));
20718 if (TARGET_32BIT)
20720 unsigned int regno;
20722 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20723 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20724 saved = core_saved;
20726 /* We know that SP will be doubleword aligned on entry, and we must
20727 preserve that condition at any subroutine call. We also require the
20728 soft frame pointer to be doubleword aligned. */
20730 if (TARGET_REALLY_IWMMXT)
20732 /* Check for the call-saved iWMMXt registers. */
20733 for (regno = FIRST_IWMMXT_REGNUM;
20734 regno <= LAST_IWMMXT_REGNUM;
20735 regno++)
20736 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20737 saved += 8;
20740 func_type = arm_current_func_type ();
20741 /* Space for saved VFP registers. */
20742 if (! IS_VOLATILE (func_type)
20743 && TARGET_HARD_FLOAT && TARGET_VFP)
20744 saved += arm_get_vfp_saved_size ();
20746 else /* TARGET_THUMB1 */
20748 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20749 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20750 saved = core_saved;
20751 if (TARGET_BACKTRACE)
20752 saved += 16;
20755 /* Saved registers include the stack frame. */
20756 offsets->saved_regs
20757 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20758 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20760 /* A leaf function does not need any stack alignment if it has nothing
20761 on the stack. */
20762 if (leaf && frame_size == 0
20763 /* However if it calls alloca(), we have a dynamically allocated
20764 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20765 && ! cfun->calls_alloca)
20767 offsets->outgoing_args = offsets->soft_frame;
20768 offsets->locals_base = offsets->soft_frame;
20769 return offsets;
20772 /* Ensure SFP has the correct alignment. */
20773 if (ARM_DOUBLEWORD_ALIGN
20774 && (offsets->soft_frame & 7))
20776 offsets->soft_frame += 4;
20777 /* Try to align stack by pushing an extra reg. Don't bother doing this
20778 when there is a stack frame as the alignment will be rolled into
20779 the normal stack adjustment. */
20780 if (frame_size + crtl->outgoing_args_size == 0)
20782 int reg = -1;
20784 /* Register r3 is caller-saved. Normally it does not need to be
20785 saved on entry by the prologue. However if we choose to save
20786 it for padding then we may confuse the compiler into thinking
20787 a prologue sequence is required when in fact it is not. This
20788 will occur when shrink-wrapping if r3 is used as a scratch
20789 register and there are no other callee-saved writes.
20791 This situation can be avoided when other callee-saved registers
20792 are available and r3 is not mandatory if we choose a callee-saved
20793 register for padding. */
20794 bool prefer_callee_reg_p = false;
20796 /* If it is safe to use r3, then do so. This sometimes
20797 generates better code on Thumb-2 by avoiding the need to
20798 use 32-bit push/pop instructions. */
20799 if (! any_sibcall_could_use_r3 ()
20800 && arm_size_return_regs () <= 12
20801 && (offsets->saved_regs_mask & (1 << 3)) == 0
20802 && (TARGET_THUMB2
20803 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20805 reg = 3;
20806 if (!TARGET_THUMB2)
20807 prefer_callee_reg_p = true;
20809 if (reg == -1
20810 || prefer_callee_reg_p)
20812 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20814 /* Avoid fixed registers; they may be changed at
20815 arbitrary times so it's unsafe to restore them
20816 during the epilogue. */
20817 if (!fixed_regs[i]
20818 && (offsets->saved_regs_mask & (1 << i)) == 0)
20820 reg = i;
20821 break;
20826 if (reg != -1)
20828 offsets->saved_regs += 4;
20829 offsets->saved_regs_mask |= (1 << reg);
20834 offsets->locals_base = offsets->soft_frame + frame_size;
20835 offsets->outgoing_args = (offsets->locals_base
20836 + crtl->outgoing_args_size);
20838 if (ARM_DOUBLEWORD_ALIGN)
20840 /* Ensure SP remains doubleword aligned. */
20841 if (offsets->outgoing_args & 7)
20842 offsets->outgoing_args += 4;
20843 gcc_assert (!(offsets->outgoing_args & 7));
20846 return offsets;
20850 /* Calculate the relative offsets for the different stack pointers. Positive
20851 offsets are in the direction of stack growth. */
20853 HOST_WIDE_INT
20854 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20856 arm_stack_offsets *offsets;
20858 offsets = arm_get_frame_offsets ();
20860 /* OK, now we have enough information to compute the distances.
20861 There must be an entry in these switch tables for each pair
20862 of registers in ELIMINABLE_REGS, even if some of the entries
20863 seem to be redundant or useless. */
20864 switch (from)
20866 case ARG_POINTER_REGNUM:
20867 switch (to)
20869 case THUMB_HARD_FRAME_POINTER_REGNUM:
20870 return 0;
20872 case FRAME_POINTER_REGNUM:
20873 /* This is the reverse of the soft frame pointer
20874 to hard frame pointer elimination below. */
20875 return offsets->soft_frame - offsets->saved_args;
20877 case ARM_HARD_FRAME_POINTER_REGNUM:
20878 /* This is only non-zero in the case where the static chain register
20879 is stored above the frame. */
20880 return offsets->frame - offsets->saved_args - 4;
20882 case STACK_POINTER_REGNUM:
20883 /* If nothing has been pushed on the stack at all
20884 then this will return -4. This *is* correct! */
20885 return offsets->outgoing_args - (offsets->saved_args + 4);
20887 default:
20888 gcc_unreachable ();
20890 gcc_unreachable ();
20892 case FRAME_POINTER_REGNUM:
20893 switch (to)
20895 case THUMB_HARD_FRAME_POINTER_REGNUM:
20896 return 0;
20898 case ARM_HARD_FRAME_POINTER_REGNUM:
20899 /* The hard frame pointer points to the top entry in the
20900 stack frame. The soft frame pointer to the bottom entry
20901 in the stack frame. If there is no stack frame at all,
20902 then they are identical. */
20904 return offsets->frame - offsets->soft_frame;
20906 case STACK_POINTER_REGNUM:
20907 return offsets->outgoing_args - offsets->soft_frame;
20909 default:
20910 gcc_unreachable ();
20912 gcc_unreachable ();
20914 default:
20915 /* You cannot eliminate from the stack pointer.
20916 In theory you could eliminate from the hard frame
20917 pointer to the stack pointer, but this will never
20918 happen, since if a stack frame is not needed the
20919 hard frame pointer will never be used. */
20920 gcc_unreachable ();
20924 /* Given FROM and TO register numbers, say whether this elimination is
20925 allowed. Frame pointer elimination is automatically handled.
20927 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20928 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20929 pointer, we must eliminate FRAME_POINTER_REGNUM into
20930 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20931 ARG_POINTER_REGNUM. */
20933 bool
20934 arm_can_eliminate (const int from, const int to)
20936 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20937 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20938 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20939 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20940 true);
20943 /* Emit RTL to save coprocessor registers on function entry. Returns the
20944 number of bytes pushed. */
20946 static int
20947 arm_save_coproc_regs(void)
20949 int saved_size = 0;
20950 unsigned reg;
20951 unsigned start_reg;
20952 rtx insn;
20954 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20955 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20957 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20958 insn = gen_rtx_MEM (V2SImode, insn);
20959 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20960 RTX_FRAME_RELATED_P (insn) = 1;
20961 saved_size += 8;
20964 if (TARGET_HARD_FLOAT && TARGET_VFP)
20966 start_reg = FIRST_VFP_REGNUM;
20968 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20970 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20971 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20973 if (start_reg != reg)
20974 saved_size += vfp_emit_fstmd (start_reg,
20975 (reg - start_reg) / 2);
20976 start_reg = reg + 2;
20979 if (start_reg != reg)
20980 saved_size += vfp_emit_fstmd (start_reg,
20981 (reg - start_reg) / 2);
20983 return saved_size;
20987 /* Set the Thumb frame pointer from the stack pointer. */
20989 static void
20990 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20992 HOST_WIDE_INT amount;
20993 rtx insn, dwarf;
20995 amount = offsets->outgoing_args - offsets->locals_base;
20996 if (amount < 1024)
20997 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20998 stack_pointer_rtx, GEN_INT (amount)));
20999 else
21001 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21002 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21003 expects the first two operands to be the same. */
21004 if (TARGET_THUMB2)
21006 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21007 stack_pointer_rtx,
21008 hard_frame_pointer_rtx));
21010 else
21012 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21013 hard_frame_pointer_rtx,
21014 stack_pointer_rtx));
21016 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21017 plus_constant (Pmode, stack_pointer_rtx, amount));
21018 RTX_FRAME_RELATED_P (dwarf) = 1;
21019 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21022 RTX_FRAME_RELATED_P (insn) = 1;
21025 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21026 function. */
21027 void
21028 arm_expand_prologue (void)
21030 rtx amount;
21031 rtx insn;
21032 rtx ip_rtx;
21033 unsigned long live_regs_mask;
21034 unsigned long func_type;
21035 int fp_offset = 0;
21036 int saved_pretend_args = 0;
21037 int saved_regs = 0;
21038 unsigned HOST_WIDE_INT args_to_push;
21039 arm_stack_offsets *offsets;
21041 func_type = arm_current_func_type ();
21043 /* Naked functions don't have prologues. */
21044 if (IS_NAKED (func_type))
21045 return;
21047 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21048 args_to_push = crtl->args.pretend_args_size;
21050 /* Compute which register we will have to save onto the stack. */
21051 offsets = arm_get_frame_offsets ();
21052 live_regs_mask = offsets->saved_regs_mask;
21054 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21056 if (IS_STACKALIGN (func_type))
21058 rtx r0, r1;
21060 /* Handle a word-aligned stack pointer. We generate the following:
21062 mov r0, sp
21063 bic r1, r0, #7
21064 mov sp, r1
21065 <save and restore r0 in normal prologue/epilogue>
21066 mov sp, r0
21067 bx lr
21069 The unwinder doesn't need to know about the stack realignment.
21070 Just tell it we saved SP in r0. */
21071 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21073 r0 = gen_rtx_REG (SImode, 0);
21074 r1 = gen_rtx_REG (SImode, 1);
21076 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21077 RTX_FRAME_RELATED_P (insn) = 1;
21078 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21080 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21082 /* ??? The CFA changes here, which may cause GDB to conclude that it
21083 has entered a different function. That said, the unwind info is
21084 correct, individually, before and after this instruction because
21085 we've described the save of SP, which will override the default
21086 handling of SP as restoring from the CFA. */
21087 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21090 /* For APCS frames, if IP register is clobbered
21091 when creating frame, save that register in a special
21092 way. */
21093 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21095 if (IS_INTERRUPT (func_type))
21097 /* Interrupt functions must not corrupt any registers.
21098 Creating a frame pointer however, corrupts the IP
21099 register, so we must push it first. */
21100 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21102 /* Do not set RTX_FRAME_RELATED_P on this insn.
21103 The dwarf stack unwinding code only wants to see one
21104 stack decrement per function, and this is not it. If
21105 this instruction is labeled as being part of the frame
21106 creation sequence then dwarf2out_frame_debug_expr will
21107 die when it encounters the assignment of IP to FP
21108 later on, since the use of SP here establishes SP as
21109 the CFA register and not IP.
21111 Anyway this instruction is not really part of the stack
21112 frame creation although it is part of the prologue. */
21114 else if (IS_NESTED (func_type))
21116 /* The static chain register is the same as the IP register
21117 used as a scratch register during stack frame creation.
21118 To get around this need to find somewhere to store IP
21119 whilst the frame is being created. We try the following
21120 places in order:
21122 1. The last argument register r3 if it is available.
21123 2. A slot on the stack above the frame if there are no
21124 arguments to push onto the stack.
21125 3. Register r3 again, after pushing the argument registers
21126 onto the stack, if this is a varargs function.
21127 4. The last slot on the stack created for the arguments to
21128 push, if this isn't a varargs function.
21130 Note - we only need to tell the dwarf2 backend about the SP
21131 adjustment in the second variant; the static chain register
21132 doesn't need to be unwound, as it doesn't contain a value
21133 inherited from the caller. */
21135 if (!arm_r3_live_at_start_p ())
21136 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21137 else if (args_to_push == 0)
21139 rtx addr, dwarf;
21141 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21142 saved_regs += 4;
21144 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21145 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21146 fp_offset = 4;
21148 /* Just tell the dwarf backend that we adjusted SP. */
21149 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21150 plus_constant (Pmode, stack_pointer_rtx,
21151 -fp_offset));
21152 RTX_FRAME_RELATED_P (insn) = 1;
21153 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21155 else
21157 /* Store the args on the stack. */
21158 if (cfun->machine->uses_anonymous_args)
21160 insn
21161 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21162 (0xf0 >> (args_to_push / 4)) & 0xf);
21163 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21164 saved_pretend_args = 1;
21166 else
21168 rtx addr, dwarf;
21170 if (args_to_push == 4)
21171 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21172 else
21173 addr
21174 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21175 plus_constant (Pmode,
21176 stack_pointer_rtx,
21177 -args_to_push));
21179 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21181 /* Just tell the dwarf backend that we adjusted SP. */
21182 dwarf
21183 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21184 plus_constant (Pmode, stack_pointer_rtx,
21185 -args_to_push));
21186 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21189 RTX_FRAME_RELATED_P (insn) = 1;
21190 fp_offset = args_to_push;
21191 args_to_push = 0;
21195 insn = emit_set_insn (ip_rtx,
21196 plus_constant (Pmode, stack_pointer_rtx,
21197 fp_offset));
21198 RTX_FRAME_RELATED_P (insn) = 1;
21201 if (args_to_push)
21203 /* Push the argument registers, or reserve space for them. */
21204 if (cfun->machine->uses_anonymous_args)
21205 insn = emit_multi_reg_push
21206 ((0xf0 >> (args_to_push / 4)) & 0xf,
21207 (0xf0 >> (args_to_push / 4)) & 0xf);
21208 else
21209 insn = emit_insn
21210 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21211 GEN_INT (- args_to_push)));
21212 RTX_FRAME_RELATED_P (insn) = 1;
21215 /* If this is an interrupt service routine, and the link register
21216 is going to be pushed, and we're not generating extra
21217 push of IP (needed when frame is needed and frame layout if apcs),
21218 subtracting four from LR now will mean that the function return
21219 can be done with a single instruction. */
21220 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21221 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21222 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21223 && TARGET_ARM)
21225 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21227 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21230 if (live_regs_mask)
21232 unsigned long dwarf_regs_mask = live_regs_mask;
21234 saved_regs += bit_count (live_regs_mask) * 4;
21235 if (optimize_size && !frame_pointer_needed
21236 && saved_regs == offsets->saved_regs - offsets->saved_args)
21238 /* If no coprocessor registers are being pushed and we don't have
21239 to worry about a frame pointer then push extra registers to
21240 create the stack frame. This is done is a way that does not
21241 alter the frame layout, so is independent of the epilogue. */
21242 int n;
21243 int frame;
21244 n = 0;
21245 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21246 n++;
21247 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21248 if (frame && n * 4 >= frame)
21250 n = frame / 4;
21251 live_regs_mask |= (1 << n) - 1;
21252 saved_regs += frame;
21256 if (TARGET_LDRD
21257 && current_tune->prefer_ldrd_strd
21258 && !optimize_function_for_size_p (cfun))
21260 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21261 if (TARGET_THUMB2)
21262 thumb2_emit_strd_push (live_regs_mask);
21263 else if (TARGET_ARM
21264 && !TARGET_APCS_FRAME
21265 && !IS_INTERRUPT (func_type))
21266 arm_emit_strd_push (live_regs_mask);
21267 else
21269 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21270 RTX_FRAME_RELATED_P (insn) = 1;
21273 else
21275 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21276 RTX_FRAME_RELATED_P (insn) = 1;
21280 if (! IS_VOLATILE (func_type))
21281 saved_regs += arm_save_coproc_regs ();
21283 if (frame_pointer_needed && TARGET_ARM)
21285 /* Create the new frame pointer. */
21286 if (TARGET_APCS_FRAME)
21288 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21289 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21290 RTX_FRAME_RELATED_P (insn) = 1;
21292 if (IS_NESTED (func_type))
21294 /* Recover the static chain register. */
21295 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21296 insn = gen_rtx_REG (SImode, 3);
21297 else
21299 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21300 insn = gen_frame_mem (SImode, insn);
21302 emit_set_insn (ip_rtx, insn);
21303 /* Add a USE to stop propagate_one_insn() from barfing. */
21304 emit_insn (gen_force_register_use (ip_rtx));
21307 else
21309 insn = GEN_INT (saved_regs - 4);
21310 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21311 stack_pointer_rtx, insn));
21312 RTX_FRAME_RELATED_P (insn) = 1;
21316 if (flag_stack_usage_info)
21317 current_function_static_stack_size
21318 = offsets->outgoing_args - offsets->saved_args;
21320 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21322 /* This add can produce multiple insns for a large constant, so we
21323 need to get tricky. */
21324 rtx last = get_last_insn ();
21326 amount = GEN_INT (offsets->saved_args + saved_regs
21327 - offsets->outgoing_args);
21329 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21330 amount));
21333 last = last ? NEXT_INSN (last) : get_insns ();
21334 RTX_FRAME_RELATED_P (last) = 1;
21336 while (last != insn);
21338 /* If the frame pointer is needed, emit a special barrier that
21339 will prevent the scheduler from moving stores to the frame
21340 before the stack adjustment. */
21341 if (frame_pointer_needed)
21342 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21343 hard_frame_pointer_rtx));
21347 if (frame_pointer_needed && TARGET_THUMB2)
21348 thumb_set_frame_pointer (offsets);
21350 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21352 unsigned long mask;
21354 mask = live_regs_mask;
21355 mask &= THUMB2_WORK_REGS;
21356 if (!IS_NESTED (func_type))
21357 mask |= (1 << IP_REGNUM);
21358 arm_load_pic_register (mask);
21361 /* If we are profiling, make sure no instructions are scheduled before
21362 the call to mcount. Similarly if the user has requested no
21363 scheduling in the prolog. Similarly if we want non-call exceptions
21364 using the EABI unwinder, to prevent faulting instructions from being
21365 swapped with a stack adjustment. */
21366 if (crtl->profile || !TARGET_SCHED_PROLOG
21367 || (arm_except_unwind_info (&global_options) == UI_TARGET
21368 && cfun->can_throw_non_call_exceptions))
21369 emit_insn (gen_blockage ());
21371 /* If the link register is being kept alive, with the return address in it,
21372 then make sure that it does not get reused by the ce2 pass. */
21373 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21374 cfun->machine->lr_save_eliminated = 1;
21377 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21378 static void
21379 arm_print_condition (FILE *stream)
21381 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21383 /* Branch conversion is not implemented for Thumb-2. */
21384 if (TARGET_THUMB)
21386 output_operand_lossage ("predicated Thumb instruction");
21387 return;
21389 if (current_insn_predicate != NULL)
21391 output_operand_lossage
21392 ("predicated instruction in conditional sequence");
21393 return;
21396 fputs (arm_condition_codes[arm_current_cc], stream);
21398 else if (current_insn_predicate)
21400 enum arm_cond_code code;
21402 if (TARGET_THUMB1)
21404 output_operand_lossage ("predicated Thumb instruction");
21405 return;
21408 code = get_arm_condition_code (current_insn_predicate);
21409 fputs (arm_condition_codes[code], stream);
21414 /* Globally reserved letters: acln
21415 Puncutation letters currently used: @_|?().!#
21416 Lower case letters currently used: bcdefhimpqtvwxyz
21417 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21418 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21420 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21422 If CODE is 'd', then the X is a condition operand and the instruction
21423 should only be executed if the condition is true.
21424 if CODE is 'D', then the X is a condition operand and the instruction
21425 should only be executed if the condition is false: however, if the mode
21426 of the comparison is CCFPEmode, then always execute the instruction -- we
21427 do this because in these circumstances !GE does not necessarily imply LT;
21428 in these cases the instruction pattern will take care to make sure that
21429 an instruction containing %d will follow, thereby undoing the effects of
21430 doing this instruction unconditionally.
21431 If CODE is 'N' then X is a floating point operand that must be negated
21432 before output.
21433 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21434 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21435 static void
21436 arm_print_operand (FILE *stream, rtx x, int code)
21438 switch (code)
21440 case '@':
21441 fputs (ASM_COMMENT_START, stream);
21442 return;
21444 case '_':
21445 fputs (user_label_prefix, stream);
21446 return;
21448 case '|':
21449 fputs (REGISTER_PREFIX, stream);
21450 return;
21452 case '?':
21453 arm_print_condition (stream);
21454 return;
21456 case '(':
21457 /* Nothing in unified syntax, otherwise the current condition code. */
21458 if (!TARGET_UNIFIED_ASM)
21459 arm_print_condition (stream);
21460 break;
21462 case ')':
21463 /* The current condition code in unified syntax, otherwise nothing. */
21464 if (TARGET_UNIFIED_ASM)
21465 arm_print_condition (stream);
21466 break;
21468 case '.':
21469 /* The current condition code for a condition code setting instruction.
21470 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21471 if (TARGET_UNIFIED_ASM)
21473 fputc('s', stream);
21474 arm_print_condition (stream);
21476 else
21478 arm_print_condition (stream);
21479 fputc('s', stream);
21481 return;
21483 case '!':
21484 /* If the instruction is conditionally executed then print
21485 the current condition code, otherwise print 's'. */
21486 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21487 if (current_insn_predicate)
21488 arm_print_condition (stream);
21489 else
21490 fputc('s', stream);
21491 break;
21493 /* %# is a "break" sequence. It doesn't output anything, but is used to
21494 separate e.g. operand numbers from following text, if that text consists
21495 of further digits which we don't want to be part of the operand
21496 number. */
21497 case '#':
21498 return;
21500 case 'N':
21502 REAL_VALUE_TYPE r;
21503 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21504 r = real_value_negate (&r);
21505 fprintf (stream, "%s", fp_const_from_val (&r));
21507 return;
21509 /* An integer or symbol address without a preceding # sign. */
21510 case 'c':
21511 switch (GET_CODE (x))
21513 case CONST_INT:
21514 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21515 break;
21517 case SYMBOL_REF:
21518 output_addr_const (stream, x);
21519 break;
21521 case CONST:
21522 if (GET_CODE (XEXP (x, 0)) == PLUS
21523 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21525 output_addr_const (stream, x);
21526 break;
21528 /* Fall through. */
21530 default:
21531 output_operand_lossage ("Unsupported operand for code '%c'", code);
21533 return;
21535 /* An integer that we want to print in HEX. */
21536 case 'x':
21537 switch (GET_CODE (x))
21539 case CONST_INT:
21540 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21541 break;
21543 default:
21544 output_operand_lossage ("Unsupported operand for code '%c'", code);
21546 return;
21548 case 'B':
21549 if (CONST_INT_P (x))
21551 HOST_WIDE_INT val;
21552 val = ARM_SIGN_EXTEND (~INTVAL (x));
21553 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21555 else
21557 putc ('~', stream);
21558 output_addr_const (stream, x);
21560 return;
21562 case 'b':
21563 /* Print the log2 of a CONST_INT. */
21565 HOST_WIDE_INT val;
21567 if (!CONST_INT_P (x)
21568 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21569 output_operand_lossage ("Unsupported operand for code '%c'", code);
21570 else
21571 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21573 return;
21575 case 'L':
21576 /* The low 16 bits of an immediate constant. */
21577 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21578 return;
21580 case 'i':
21581 fprintf (stream, "%s", arithmetic_instr (x, 1));
21582 return;
21584 case 'I':
21585 fprintf (stream, "%s", arithmetic_instr (x, 0));
21586 return;
21588 case 'S':
21590 HOST_WIDE_INT val;
21591 const char *shift;
21593 shift = shift_op (x, &val);
21595 if (shift)
21597 fprintf (stream, ", %s ", shift);
21598 if (val == -1)
21599 arm_print_operand (stream, XEXP (x, 1), 0);
21600 else
21601 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21604 return;
21606 /* An explanation of the 'Q', 'R' and 'H' register operands:
21608 In a pair of registers containing a DI or DF value the 'Q'
21609 operand returns the register number of the register containing
21610 the least significant part of the value. The 'R' operand returns
21611 the register number of the register containing the most
21612 significant part of the value.
21614 The 'H' operand returns the higher of the two register numbers.
21615 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21616 same as the 'Q' operand, since the most significant part of the
21617 value is held in the lower number register. The reverse is true
21618 on systems where WORDS_BIG_ENDIAN is false.
21620 The purpose of these operands is to distinguish between cases
21621 where the endian-ness of the values is important (for example
21622 when they are added together), and cases where the endian-ness
21623 is irrelevant, but the order of register operations is important.
21624 For example when loading a value from memory into a register
21625 pair, the endian-ness does not matter. Provided that the value
21626 from the lower memory address is put into the lower numbered
21627 register, and the value from the higher address is put into the
21628 higher numbered register, the load will work regardless of whether
21629 the value being loaded is big-wordian or little-wordian. The
21630 order of the two register loads can matter however, if the address
21631 of the memory location is actually held in one of the registers
21632 being overwritten by the load.
21634 The 'Q' and 'R' constraints are also available for 64-bit
21635 constants. */
21636 case 'Q':
21637 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21639 rtx part = gen_lowpart (SImode, x);
21640 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21641 return;
21644 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21646 output_operand_lossage ("invalid operand for code '%c'", code);
21647 return;
21650 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21651 return;
21653 case 'R':
21654 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21656 enum machine_mode mode = GET_MODE (x);
21657 rtx part;
21659 if (mode == VOIDmode)
21660 mode = DImode;
21661 part = gen_highpart_mode (SImode, mode, x);
21662 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21663 return;
21666 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21668 output_operand_lossage ("invalid operand for code '%c'", code);
21669 return;
21672 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21673 return;
21675 case 'H':
21676 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21678 output_operand_lossage ("invalid operand for code '%c'", code);
21679 return;
21682 asm_fprintf (stream, "%r", REGNO (x) + 1);
21683 return;
21685 case 'J':
21686 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21688 output_operand_lossage ("invalid operand for code '%c'", code);
21689 return;
21692 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21693 return;
21695 case 'K':
21696 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21698 output_operand_lossage ("invalid operand for code '%c'", code);
21699 return;
21702 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21703 return;
21705 case 'm':
21706 asm_fprintf (stream, "%r",
21707 REG_P (XEXP (x, 0))
21708 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21709 return;
21711 case 'M':
21712 asm_fprintf (stream, "{%r-%r}",
21713 REGNO (x),
21714 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21715 return;
21717 /* Like 'M', but writing doubleword vector registers, for use by Neon
21718 insns. */
21719 case 'h':
21721 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21722 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21723 if (numregs == 1)
21724 asm_fprintf (stream, "{d%d}", regno);
21725 else
21726 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21728 return;
21730 case 'd':
21731 /* CONST_TRUE_RTX means always -- that's the default. */
21732 if (x == const_true_rtx)
21733 return;
21735 if (!COMPARISON_P (x))
21737 output_operand_lossage ("invalid operand for code '%c'", code);
21738 return;
21741 fputs (arm_condition_codes[get_arm_condition_code (x)],
21742 stream);
21743 return;
21745 case 'D':
21746 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21747 want to do that. */
21748 if (x == const_true_rtx)
21750 output_operand_lossage ("instruction never executed");
21751 return;
21753 if (!COMPARISON_P (x))
21755 output_operand_lossage ("invalid operand for code '%c'", code);
21756 return;
21759 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21760 (get_arm_condition_code (x))],
21761 stream);
21762 return;
21764 case 's':
21765 case 'V':
21766 case 'W':
21767 case 'X':
21768 case 'Y':
21769 case 'Z':
21770 /* Former Maverick support, removed after GCC-4.7. */
21771 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21772 return;
21774 case 'U':
21775 if (!REG_P (x)
21776 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21777 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21778 /* Bad value for wCG register number. */
21780 output_operand_lossage ("invalid operand for code '%c'", code);
21781 return;
21784 else
21785 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21786 return;
21788 /* Print an iWMMXt control register name. */
21789 case 'w':
21790 if (!CONST_INT_P (x)
21791 || INTVAL (x) < 0
21792 || INTVAL (x) >= 16)
21793 /* Bad value for wC register number. */
21795 output_operand_lossage ("invalid operand for code '%c'", code);
21796 return;
21799 else
21801 static const char * wc_reg_names [16] =
21803 "wCID", "wCon", "wCSSF", "wCASF",
21804 "wC4", "wC5", "wC6", "wC7",
21805 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21806 "wC12", "wC13", "wC14", "wC15"
21809 fputs (wc_reg_names [INTVAL (x)], stream);
21811 return;
21813 /* Print the high single-precision register of a VFP double-precision
21814 register. */
21815 case 'p':
21817 enum machine_mode mode = GET_MODE (x);
21818 int regno;
21820 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21822 output_operand_lossage ("invalid operand for code '%c'", code);
21823 return;
21826 regno = REGNO (x);
21827 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21829 output_operand_lossage ("invalid operand for code '%c'", code);
21830 return;
21833 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21835 return;
21837 /* Print a VFP/Neon double precision or quad precision register name. */
21838 case 'P':
21839 case 'q':
21841 enum machine_mode mode = GET_MODE (x);
21842 int is_quad = (code == 'q');
21843 int regno;
21845 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21847 output_operand_lossage ("invalid operand for code '%c'", code);
21848 return;
21851 if (!REG_P (x)
21852 || !IS_VFP_REGNUM (REGNO (x)))
21854 output_operand_lossage ("invalid operand for code '%c'", code);
21855 return;
21858 regno = REGNO (x);
21859 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21860 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21862 output_operand_lossage ("invalid operand for code '%c'", code);
21863 return;
21866 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21867 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21869 return;
21871 /* These two codes print the low/high doubleword register of a Neon quad
21872 register, respectively. For pair-structure types, can also print
21873 low/high quadword registers. */
21874 case 'e':
21875 case 'f':
21877 enum machine_mode mode = GET_MODE (x);
21878 int regno;
21880 if ((GET_MODE_SIZE (mode) != 16
21881 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21883 output_operand_lossage ("invalid operand for code '%c'", code);
21884 return;
21887 regno = REGNO (x);
21888 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21890 output_operand_lossage ("invalid operand for code '%c'", code);
21891 return;
21894 if (GET_MODE_SIZE (mode) == 16)
21895 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21896 + (code == 'f' ? 1 : 0));
21897 else
21898 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21899 + (code == 'f' ? 1 : 0));
21901 return;
21903 /* Print a VFPv3 floating-point constant, represented as an integer
21904 index. */
21905 case 'G':
21907 int index = vfp3_const_double_index (x);
21908 gcc_assert (index != -1);
21909 fprintf (stream, "%d", index);
21911 return;
21913 /* Print bits representing opcode features for Neon.
21915 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21916 and polynomials as unsigned.
21918 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21920 Bit 2 is 1 for rounding functions, 0 otherwise. */
21922 /* Identify the type as 's', 'u', 'p' or 'f'. */
21923 case 'T':
21925 HOST_WIDE_INT bits = INTVAL (x);
21926 fputc ("uspf"[bits & 3], stream);
21928 return;
21930 /* Likewise, but signed and unsigned integers are both 'i'. */
21931 case 'F':
21933 HOST_WIDE_INT bits = INTVAL (x);
21934 fputc ("iipf"[bits & 3], stream);
21936 return;
21938 /* As for 'T', but emit 'u' instead of 'p'. */
21939 case 't':
21941 HOST_WIDE_INT bits = INTVAL (x);
21942 fputc ("usuf"[bits & 3], stream);
21944 return;
21946 /* Bit 2: rounding (vs none). */
21947 case 'O':
21949 HOST_WIDE_INT bits = INTVAL (x);
21950 fputs ((bits & 4) != 0 ? "r" : "", stream);
21952 return;
21954 /* Memory operand for vld1/vst1 instruction. */
21955 case 'A':
21957 rtx addr;
21958 bool postinc = FALSE;
21959 rtx postinc_reg = NULL;
21960 unsigned align, memsize, align_bits;
21962 gcc_assert (MEM_P (x));
21963 addr = XEXP (x, 0);
21964 if (GET_CODE (addr) == POST_INC)
21966 postinc = 1;
21967 addr = XEXP (addr, 0);
21969 if (GET_CODE (addr) == POST_MODIFY)
21971 postinc_reg = XEXP( XEXP (addr, 1), 1);
21972 addr = XEXP (addr, 0);
21974 asm_fprintf (stream, "[%r", REGNO (addr));
21976 /* We know the alignment of this access, so we can emit a hint in the
21977 instruction (for some alignments) as an aid to the memory subsystem
21978 of the target. */
21979 align = MEM_ALIGN (x) >> 3;
21980 memsize = MEM_SIZE (x);
21982 /* Only certain alignment specifiers are supported by the hardware. */
21983 if (memsize == 32 && (align % 32) == 0)
21984 align_bits = 256;
21985 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21986 align_bits = 128;
21987 else if (memsize >= 8 && (align % 8) == 0)
21988 align_bits = 64;
21989 else
21990 align_bits = 0;
21992 if (align_bits != 0)
21993 asm_fprintf (stream, ":%d", align_bits);
21995 asm_fprintf (stream, "]");
21997 if (postinc)
21998 fputs("!", stream);
21999 if (postinc_reg)
22000 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22002 return;
22004 case 'C':
22006 rtx addr;
22008 gcc_assert (MEM_P (x));
22009 addr = XEXP (x, 0);
22010 gcc_assert (REG_P (addr));
22011 asm_fprintf (stream, "[%r]", REGNO (addr));
22013 return;
22015 /* Translate an S register number into a D register number and element index. */
22016 case 'y':
22018 enum machine_mode mode = GET_MODE (x);
22019 int regno;
22021 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22023 output_operand_lossage ("invalid operand for code '%c'", code);
22024 return;
22027 regno = REGNO (x);
22028 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22030 output_operand_lossage ("invalid operand for code '%c'", code);
22031 return;
22034 regno = regno - FIRST_VFP_REGNUM;
22035 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22037 return;
22039 case 'v':
22040 gcc_assert (CONST_DOUBLE_P (x));
22041 int result;
22042 result = vfp3_const_double_for_fract_bits (x);
22043 if (result == 0)
22044 result = vfp3_const_double_for_bits (x);
22045 fprintf (stream, "#%d", result);
22046 return;
22048 /* Register specifier for vld1.16/vst1.16. Translate the S register
22049 number into a D register number and element index. */
22050 case 'z':
22052 enum machine_mode mode = GET_MODE (x);
22053 int regno;
22055 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22057 output_operand_lossage ("invalid operand for code '%c'", code);
22058 return;
22061 regno = REGNO (x);
22062 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22064 output_operand_lossage ("invalid operand for code '%c'", code);
22065 return;
22068 regno = regno - FIRST_VFP_REGNUM;
22069 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22071 return;
22073 default:
22074 if (x == 0)
22076 output_operand_lossage ("missing operand");
22077 return;
22080 switch (GET_CODE (x))
22082 case REG:
22083 asm_fprintf (stream, "%r", REGNO (x));
22084 break;
22086 case MEM:
22087 output_memory_reference_mode = GET_MODE (x);
22088 output_address (XEXP (x, 0));
22089 break;
22091 case CONST_DOUBLE:
22092 if (TARGET_NEON)
22094 char fpstr[20];
22095 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22096 sizeof (fpstr), 0, 1);
22097 fprintf (stream, "#%s", fpstr);
22099 else
22100 fprintf (stream, "#%s", fp_immediate_constant (x));
22101 break;
22103 default:
22104 gcc_assert (GET_CODE (x) != NEG);
22105 fputc ('#', stream);
22106 if (GET_CODE (x) == HIGH)
22108 fputs (":lower16:", stream);
22109 x = XEXP (x, 0);
22112 output_addr_const (stream, x);
22113 break;
22118 /* Target hook for printing a memory address. */
22119 static void
22120 arm_print_operand_address (FILE *stream, rtx x)
22122 if (TARGET_32BIT)
22124 int is_minus = GET_CODE (x) == MINUS;
22126 if (REG_P (x))
22127 asm_fprintf (stream, "[%r]", REGNO (x));
22128 else if (GET_CODE (x) == PLUS || is_minus)
22130 rtx base = XEXP (x, 0);
22131 rtx index = XEXP (x, 1);
22132 HOST_WIDE_INT offset = 0;
22133 if (!REG_P (base)
22134 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22136 /* Ensure that BASE is a register. */
22137 /* (one of them must be). */
22138 /* Also ensure the SP is not used as in index register. */
22139 rtx temp = base;
22140 base = index;
22141 index = temp;
22143 switch (GET_CODE (index))
22145 case CONST_INT:
22146 offset = INTVAL (index);
22147 if (is_minus)
22148 offset = -offset;
22149 asm_fprintf (stream, "[%r, #%wd]",
22150 REGNO (base), offset);
22151 break;
22153 case REG:
22154 asm_fprintf (stream, "[%r, %s%r]",
22155 REGNO (base), is_minus ? "-" : "",
22156 REGNO (index));
22157 break;
22159 case MULT:
22160 case ASHIFTRT:
22161 case LSHIFTRT:
22162 case ASHIFT:
22163 case ROTATERT:
22165 asm_fprintf (stream, "[%r, %s%r",
22166 REGNO (base), is_minus ? "-" : "",
22167 REGNO (XEXP (index, 0)));
22168 arm_print_operand (stream, index, 'S');
22169 fputs ("]", stream);
22170 break;
22173 default:
22174 gcc_unreachable ();
22177 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22178 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22180 extern enum machine_mode output_memory_reference_mode;
22182 gcc_assert (REG_P (XEXP (x, 0)));
22184 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22185 asm_fprintf (stream, "[%r, #%s%d]!",
22186 REGNO (XEXP (x, 0)),
22187 GET_CODE (x) == PRE_DEC ? "-" : "",
22188 GET_MODE_SIZE (output_memory_reference_mode));
22189 else
22190 asm_fprintf (stream, "[%r], #%s%d",
22191 REGNO (XEXP (x, 0)),
22192 GET_CODE (x) == POST_DEC ? "-" : "",
22193 GET_MODE_SIZE (output_memory_reference_mode));
22195 else if (GET_CODE (x) == PRE_MODIFY)
22197 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22198 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22199 asm_fprintf (stream, "#%wd]!",
22200 INTVAL (XEXP (XEXP (x, 1), 1)));
22201 else
22202 asm_fprintf (stream, "%r]!",
22203 REGNO (XEXP (XEXP (x, 1), 1)));
22205 else if (GET_CODE (x) == POST_MODIFY)
22207 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22208 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22209 asm_fprintf (stream, "#%wd",
22210 INTVAL (XEXP (XEXP (x, 1), 1)));
22211 else
22212 asm_fprintf (stream, "%r",
22213 REGNO (XEXP (XEXP (x, 1), 1)));
22215 else output_addr_const (stream, x);
22217 else
22219 if (REG_P (x))
22220 asm_fprintf (stream, "[%r]", REGNO (x));
22221 else if (GET_CODE (x) == POST_INC)
22222 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22223 else if (GET_CODE (x) == PLUS)
22225 gcc_assert (REG_P (XEXP (x, 0)));
22226 if (CONST_INT_P (XEXP (x, 1)))
22227 asm_fprintf (stream, "[%r, #%wd]",
22228 REGNO (XEXP (x, 0)),
22229 INTVAL (XEXP (x, 1)));
22230 else
22231 asm_fprintf (stream, "[%r, %r]",
22232 REGNO (XEXP (x, 0)),
22233 REGNO (XEXP (x, 1)));
22235 else
22236 output_addr_const (stream, x);
22240 /* Target hook for indicating whether a punctuation character for
22241 TARGET_PRINT_OPERAND is valid. */
22242 static bool
22243 arm_print_operand_punct_valid_p (unsigned char code)
22245 return (code == '@' || code == '|' || code == '.'
22246 || code == '(' || code == ')' || code == '#'
22247 || (TARGET_32BIT && (code == '?'))
22248 || (TARGET_THUMB2 && (code == '!'))
22249 || (TARGET_THUMB && (code == '_')));
22252 /* Target hook for assembling integer objects. The ARM version needs to
22253 handle word-sized values specially. */
22254 static bool
22255 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22257 enum machine_mode mode;
22259 if (size == UNITS_PER_WORD && aligned_p)
22261 fputs ("\t.word\t", asm_out_file);
22262 output_addr_const (asm_out_file, x);
22264 /* Mark symbols as position independent. We only do this in the
22265 .text segment, not in the .data segment. */
22266 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22267 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22269 /* See legitimize_pic_address for an explanation of the
22270 TARGET_VXWORKS_RTP check. */
22271 if (!arm_pic_data_is_text_relative
22272 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22273 fputs ("(GOT)", asm_out_file);
22274 else
22275 fputs ("(GOTOFF)", asm_out_file);
22277 fputc ('\n', asm_out_file);
22278 return true;
22281 mode = GET_MODE (x);
22283 if (arm_vector_mode_supported_p (mode))
22285 int i, units;
22287 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22289 units = CONST_VECTOR_NUNITS (x);
22290 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22292 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22293 for (i = 0; i < units; i++)
22295 rtx elt = CONST_VECTOR_ELT (x, i);
22296 assemble_integer
22297 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22299 else
22300 for (i = 0; i < units; i++)
22302 rtx elt = CONST_VECTOR_ELT (x, i);
22303 REAL_VALUE_TYPE rval;
22305 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22307 assemble_real
22308 (rval, GET_MODE_INNER (mode),
22309 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22312 return true;
22315 return default_assemble_integer (x, size, aligned_p);
22318 static void
22319 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22321 section *s;
22323 if (!TARGET_AAPCS_BASED)
22325 (is_ctor ?
22326 default_named_section_asm_out_constructor
22327 : default_named_section_asm_out_destructor) (symbol, priority);
22328 return;
22331 /* Put these in the .init_array section, using a special relocation. */
22332 if (priority != DEFAULT_INIT_PRIORITY)
22334 char buf[18];
22335 sprintf (buf, "%s.%.5u",
22336 is_ctor ? ".init_array" : ".fini_array",
22337 priority);
22338 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22340 else if (is_ctor)
22341 s = ctors_section;
22342 else
22343 s = dtors_section;
22345 switch_to_section (s);
22346 assemble_align (POINTER_SIZE);
22347 fputs ("\t.word\t", asm_out_file);
22348 output_addr_const (asm_out_file, symbol);
22349 fputs ("(target1)\n", asm_out_file);
22352 /* Add a function to the list of static constructors. */
22354 static void
22355 arm_elf_asm_constructor (rtx symbol, int priority)
22357 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22360 /* Add a function to the list of static destructors. */
22362 static void
22363 arm_elf_asm_destructor (rtx symbol, int priority)
22365 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22368 /* A finite state machine takes care of noticing whether or not instructions
22369 can be conditionally executed, and thus decrease execution time and code
22370 size by deleting branch instructions. The fsm is controlled by
22371 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22373 /* The state of the fsm controlling condition codes are:
22374 0: normal, do nothing special
22375 1: make ASM_OUTPUT_OPCODE not output this instruction
22376 2: make ASM_OUTPUT_OPCODE not output this instruction
22377 3: make instructions conditional
22378 4: make instructions conditional
22380 State transitions (state->state by whom under condition):
22381 0 -> 1 final_prescan_insn if the `target' is a label
22382 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22383 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22384 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22385 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22386 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22387 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22388 (the target insn is arm_target_insn).
22390 If the jump clobbers the conditions then we use states 2 and 4.
22392 A similar thing can be done with conditional return insns.
22394 XXX In case the `target' is an unconditional branch, this conditionalising
22395 of the instructions always reduces code size, but not always execution
22396 time. But then, I want to reduce the code size to somewhere near what
22397 /bin/cc produces. */
22399 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22400 instructions. When a COND_EXEC instruction is seen the subsequent
22401 instructions are scanned so that multiple conditional instructions can be
22402 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22403 specify the length and true/false mask for the IT block. These will be
22404 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22406 /* Returns the index of the ARM condition code string in
22407 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22408 COMPARISON should be an rtx like `(eq (...) (...))'. */
22410 enum arm_cond_code
22411 maybe_get_arm_condition_code (rtx comparison)
22413 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22414 enum arm_cond_code code;
22415 enum rtx_code comp_code = GET_CODE (comparison);
22417 if (GET_MODE_CLASS (mode) != MODE_CC)
22418 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22419 XEXP (comparison, 1));
22421 switch (mode)
22423 case CC_DNEmode: code = ARM_NE; goto dominance;
22424 case CC_DEQmode: code = ARM_EQ; goto dominance;
22425 case CC_DGEmode: code = ARM_GE; goto dominance;
22426 case CC_DGTmode: code = ARM_GT; goto dominance;
22427 case CC_DLEmode: code = ARM_LE; goto dominance;
22428 case CC_DLTmode: code = ARM_LT; goto dominance;
22429 case CC_DGEUmode: code = ARM_CS; goto dominance;
22430 case CC_DGTUmode: code = ARM_HI; goto dominance;
22431 case CC_DLEUmode: code = ARM_LS; goto dominance;
22432 case CC_DLTUmode: code = ARM_CC;
22434 dominance:
22435 if (comp_code == EQ)
22436 return ARM_INVERSE_CONDITION_CODE (code);
22437 if (comp_code == NE)
22438 return code;
22439 return ARM_NV;
22441 case CC_NOOVmode:
22442 switch (comp_code)
22444 case NE: return ARM_NE;
22445 case EQ: return ARM_EQ;
22446 case GE: return ARM_PL;
22447 case LT: return ARM_MI;
22448 default: return ARM_NV;
22451 case CC_Zmode:
22452 switch (comp_code)
22454 case NE: return ARM_NE;
22455 case EQ: return ARM_EQ;
22456 default: return ARM_NV;
22459 case CC_Nmode:
22460 switch (comp_code)
22462 case NE: return ARM_MI;
22463 case EQ: return ARM_PL;
22464 default: return ARM_NV;
22467 case CCFPEmode:
22468 case CCFPmode:
22469 /* We can handle all cases except UNEQ and LTGT. */
22470 switch (comp_code)
22472 case GE: return ARM_GE;
22473 case GT: return ARM_GT;
22474 case LE: return ARM_LS;
22475 case LT: return ARM_MI;
22476 case NE: return ARM_NE;
22477 case EQ: return ARM_EQ;
22478 case ORDERED: return ARM_VC;
22479 case UNORDERED: return ARM_VS;
22480 case UNLT: return ARM_LT;
22481 case UNLE: return ARM_LE;
22482 case UNGT: return ARM_HI;
22483 case UNGE: return ARM_PL;
22484 /* UNEQ and LTGT do not have a representation. */
22485 case UNEQ: /* Fall through. */
22486 case LTGT: /* Fall through. */
22487 default: return ARM_NV;
22490 case CC_SWPmode:
22491 switch (comp_code)
22493 case NE: return ARM_NE;
22494 case EQ: return ARM_EQ;
22495 case GE: return ARM_LE;
22496 case GT: return ARM_LT;
22497 case LE: return ARM_GE;
22498 case LT: return ARM_GT;
22499 case GEU: return ARM_LS;
22500 case GTU: return ARM_CC;
22501 case LEU: return ARM_CS;
22502 case LTU: return ARM_HI;
22503 default: return ARM_NV;
22506 case CC_Cmode:
22507 switch (comp_code)
22509 case LTU: return ARM_CS;
22510 case GEU: return ARM_CC;
22511 default: return ARM_NV;
22514 case CC_CZmode:
22515 switch (comp_code)
22517 case NE: return ARM_NE;
22518 case EQ: return ARM_EQ;
22519 case GEU: return ARM_CS;
22520 case GTU: return ARM_HI;
22521 case LEU: return ARM_LS;
22522 case LTU: return ARM_CC;
22523 default: return ARM_NV;
22526 case CC_NCVmode:
22527 switch (comp_code)
22529 case GE: return ARM_GE;
22530 case LT: return ARM_LT;
22531 case GEU: return ARM_CS;
22532 case LTU: return ARM_CC;
22533 default: return ARM_NV;
22536 case CCmode:
22537 switch (comp_code)
22539 case NE: return ARM_NE;
22540 case EQ: return ARM_EQ;
22541 case GE: return ARM_GE;
22542 case GT: return ARM_GT;
22543 case LE: return ARM_LE;
22544 case LT: return ARM_LT;
22545 case GEU: return ARM_CS;
22546 case GTU: return ARM_HI;
22547 case LEU: return ARM_LS;
22548 case LTU: return ARM_CC;
22549 default: return ARM_NV;
22552 default: gcc_unreachable ();
22556 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22557 static enum arm_cond_code
22558 get_arm_condition_code (rtx comparison)
22560 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22561 gcc_assert (code != ARM_NV);
22562 return code;
22565 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22566 instructions. */
22567 void
22568 thumb2_final_prescan_insn (rtx insn)
22570 rtx first_insn = insn;
22571 rtx body = PATTERN (insn);
22572 rtx predicate;
22573 enum arm_cond_code code;
22574 int n;
22575 int mask;
22576 int max;
22578 /* max_insns_skipped in the tune was already taken into account in the
22579 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22580 just emit the IT blocks as we can. It does not make sense to split
22581 the IT blocks. */
22582 max = MAX_INSN_PER_IT_BLOCK;
22584 /* Remove the previous insn from the count of insns to be output. */
22585 if (arm_condexec_count)
22586 arm_condexec_count--;
22588 /* Nothing to do if we are already inside a conditional block. */
22589 if (arm_condexec_count)
22590 return;
22592 if (GET_CODE (body) != COND_EXEC)
22593 return;
22595 /* Conditional jumps are implemented directly. */
22596 if (JUMP_P (insn))
22597 return;
22599 predicate = COND_EXEC_TEST (body);
22600 arm_current_cc = get_arm_condition_code (predicate);
22602 n = get_attr_ce_count (insn);
22603 arm_condexec_count = 1;
22604 arm_condexec_mask = (1 << n) - 1;
22605 arm_condexec_masklen = n;
22606 /* See if subsequent instructions can be combined into the same block. */
22607 for (;;)
22609 insn = next_nonnote_insn (insn);
22611 /* Jumping into the middle of an IT block is illegal, so a label or
22612 barrier terminates the block. */
22613 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22614 break;
22616 body = PATTERN (insn);
22617 /* USE and CLOBBER aren't really insns, so just skip them. */
22618 if (GET_CODE (body) == USE
22619 || GET_CODE (body) == CLOBBER)
22620 continue;
22622 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22623 if (GET_CODE (body) != COND_EXEC)
22624 break;
22625 /* Maximum number of conditionally executed instructions in a block. */
22626 n = get_attr_ce_count (insn);
22627 if (arm_condexec_masklen + n > max)
22628 break;
22630 predicate = COND_EXEC_TEST (body);
22631 code = get_arm_condition_code (predicate);
22632 mask = (1 << n) - 1;
22633 if (arm_current_cc == code)
22634 arm_condexec_mask |= (mask << arm_condexec_masklen);
22635 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22636 break;
22638 arm_condexec_count++;
22639 arm_condexec_masklen += n;
22641 /* A jump must be the last instruction in a conditional block. */
22642 if (JUMP_P (insn))
22643 break;
22645 /* Restore recog_data (getting the attributes of other insns can
22646 destroy this array, but final.c assumes that it remains intact
22647 across this call). */
22648 extract_constrain_insn_cached (first_insn);
22651 void
22652 arm_final_prescan_insn (rtx insn)
22654 /* BODY will hold the body of INSN. */
22655 rtx body = PATTERN (insn);
22657 /* This will be 1 if trying to repeat the trick, and things need to be
22658 reversed if it appears to fail. */
22659 int reverse = 0;
22661 /* If we start with a return insn, we only succeed if we find another one. */
22662 int seeking_return = 0;
22663 enum rtx_code return_code = UNKNOWN;
22665 /* START_INSN will hold the insn from where we start looking. This is the
22666 first insn after the following code_label if REVERSE is true. */
22667 rtx start_insn = insn;
22669 /* If in state 4, check if the target branch is reached, in order to
22670 change back to state 0. */
22671 if (arm_ccfsm_state == 4)
22673 if (insn == arm_target_insn)
22675 arm_target_insn = NULL;
22676 arm_ccfsm_state = 0;
22678 return;
22681 /* If in state 3, it is possible to repeat the trick, if this insn is an
22682 unconditional branch to a label, and immediately following this branch
22683 is the previous target label which is only used once, and the label this
22684 branch jumps to is not too far off. */
22685 if (arm_ccfsm_state == 3)
22687 if (simplejump_p (insn))
22689 start_insn = next_nonnote_insn (start_insn);
22690 if (BARRIER_P (start_insn))
22692 /* XXX Isn't this always a barrier? */
22693 start_insn = next_nonnote_insn (start_insn);
22695 if (LABEL_P (start_insn)
22696 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22697 && LABEL_NUSES (start_insn) == 1)
22698 reverse = TRUE;
22699 else
22700 return;
22702 else if (ANY_RETURN_P (body))
22704 start_insn = next_nonnote_insn (start_insn);
22705 if (BARRIER_P (start_insn))
22706 start_insn = next_nonnote_insn (start_insn);
22707 if (LABEL_P (start_insn)
22708 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22709 && LABEL_NUSES (start_insn) == 1)
22711 reverse = TRUE;
22712 seeking_return = 1;
22713 return_code = GET_CODE (body);
22715 else
22716 return;
22718 else
22719 return;
22722 gcc_assert (!arm_ccfsm_state || reverse);
22723 if (!JUMP_P (insn))
22724 return;
22726 /* This jump might be paralleled with a clobber of the condition codes
22727 the jump should always come first */
22728 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22729 body = XVECEXP (body, 0, 0);
22731 if (reverse
22732 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22733 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22735 int insns_skipped;
22736 int fail = FALSE, succeed = FALSE;
22737 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22738 int then_not_else = TRUE;
22739 rtx this_insn = start_insn, label = 0;
22741 /* Register the insn jumped to. */
22742 if (reverse)
22744 if (!seeking_return)
22745 label = XEXP (SET_SRC (body), 0);
22747 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22748 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22749 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22751 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22752 then_not_else = FALSE;
22754 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22756 seeking_return = 1;
22757 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22759 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22761 seeking_return = 1;
22762 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22763 then_not_else = FALSE;
22765 else
22766 gcc_unreachable ();
22768 /* See how many insns this branch skips, and what kind of insns. If all
22769 insns are okay, and the label or unconditional branch to the same
22770 label is not too far away, succeed. */
22771 for (insns_skipped = 0;
22772 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22774 rtx scanbody;
22776 this_insn = next_nonnote_insn (this_insn);
22777 if (!this_insn)
22778 break;
22780 switch (GET_CODE (this_insn))
22782 case CODE_LABEL:
22783 /* Succeed if it is the target label, otherwise fail since
22784 control falls in from somewhere else. */
22785 if (this_insn == label)
22787 arm_ccfsm_state = 1;
22788 succeed = TRUE;
22790 else
22791 fail = TRUE;
22792 break;
22794 case BARRIER:
22795 /* Succeed if the following insn is the target label.
22796 Otherwise fail.
22797 If return insns are used then the last insn in a function
22798 will be a barrier. */
22799 this_insn = next_nonnote_insn (this_insn);
22800 if (this_insn && this_insn == label)
22802 arm_ccfsm_state = 1;
22803 succeed = TRUE;
22805 else
22806 fail = TRUE;
22807 break;
22809 case CALL_INSN:
22810 /* The AAPCS says that conditional calls should not be
22811 used since they make interworking inefficient (the
22812 linker can't transform BL<cond> into BLX). That's
22813 only a problem if the machine has BLX. */
22814 if (arm_arch5)
22816 fail = TRUE;
22817 break;
22820 /* Succeed if the following insn is the target label, or
22821 if the following two insns are a barrier and the
22822 target label. */
22823 this_insn = next_nonnote_insn (this_insn);
22824 if (this_insn && BARRIER_P (this_insn))
22825 this_insn = next_nonnote_insn (this_insn);
22827 if (this_insn && this_insn == label
22828 && insns_skipped < max_insns_skipped)
22830 arm_ccfsm_state = 1;
22831 succeed = TRUE;
22833 else
22834 fail = TRUE;
22835 break;
22837 case JUMP_INSN:
22838 /* If this is an unconditional branch to the same label, succeed.
22839 If it is to another label, do nothing. If it is conditional,
22840 fail. */
22841 /* XXX Probably, the tests for SET and the PC are
22842 unnecessary. */
22844 scanbody = PATTERN (this_insn);
22845 if (GET_CODE (scanbody) == SET
22846 && GET_CODE (SET_DEST (scanbody)) == PC)
22848 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22849 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22851 arm_ccfsm_state = 2;
22852 succeed = TRUE;
22854 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22855 fail = TRUE;
22857 /* Fail if a conditional return is undesirable (e.g. on a
22858 StrongARM), but still allow this if optimizing for size. */
22859 else if (GET_CODE (scanbody) == return_code
22860 && !use_return_insn (TRUE, NULL)
22861 && !optimize_size)
22862 fail = TRUE;
22863 else if (GET_CODE (scanbody) == return_code)
22865 arm_ccfsm_state = 2;
22866 succeed = TRUE;
22868 else if (GET_CODE (scanbody) == PARALLEL)
22870 switch (get_attr_conds (this_insn))
22872 case CONDS_NOCOND:
22873 break;
22874 default:
22875 fail = TRUE;
22876 break;
22879 else
22880 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22882 break;
22884 case INSN:
22885 /* Instructions using or affecting the condition codes make it
22886 fail. */
22887 scanbody = PATTERN (this_insn);
22888 if (!(GET_CODE (scanbody) == SET
22889 || GET_CODE (scanbody) == PARALLEL)
22890 || get_attr_conds (this_insn) != CONDS_NOCOND)
22891 fail = TRUE;
22892 break;
22894 default:
22895 break;
22898 if (succeed)
22900 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22901 arm_target_label = CODE_LABEL_NUMBER (label);
22902 else
22904 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22906 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22908 this_insn = next_nonnote_insn (this_insn);
22909 gcc_assert (!this_insn
22910 || (!BARRIER_P (this_insn)
22911 && !LABEL_P (this_insn)));
22913 if (!this_insn)
22915 /* Oh, dear! we ran off the end.. give up. */
22916 extract_constrain_insn_cached (insn);
22917 arm_ccfsm_state = 0;
22918 arm_target_insn = NULL;
22919 return;
22921 arm_target_insn = this_insn;
22924 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22925 what it was. */
22926 if (!reverse)
22927 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22929 if (reverse || then_not_else)
22930 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22933 /* Restore recog_data (getting the attributes of other insns can
22934 destroy this array, but final.c assumes that it remains intact
22935 across this call. */
22936 extract_constrain_insn_cached (insn);
22940 /* Output IT instructions. */
22941 void
22942 thumb2_asm_output_opcode (FILE * stream)
22944 char buff[5];
22945 int n;
22947 if (arm_condexec_mask)
22949 for (n = 0; n < arm_condexec_masklen; n++)
22950 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22951 buff[n] = 0;
22952 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22953 arm_condition_codes[arm_current_cc]);
22954 arm_condexec_mask = 0;
22958 /* Returns true if REGNO is a valid register
22959 for holding a quantity of type MODE. */
22961 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22963 if (GET_MODE_CLASS (mode) == MODE_CC)
22964 return (regno == CC_REGNUM
22965 || (TARGET_HARD_FLOAT && TARGET_VFP
22966 && regno == VFPCC_REGNUM));
22968 if (TARGET_THUMB1)
22969 /* For the Thumb we only allow values bigger than SImode in
22970 registers 0 - 6, so that there is always a second low
22971 register available to hold the upper part of the value.
22972 We probably we ought to ensure that the register is the
22973 start of an even numbered register pair. */
22974 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22976 if (TARGET_HARD_FLOAT && TARGET_VFP
22977 && IS_VFP_REGNUM (regno))
22979 if (mode == SFmode || mode == SImode)
22980 return VFP_REGNO_OK_FOR_SINGLE (regno);
22982 if (mode == DFmode)
22983 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22985 /* VFP registers can hold HFmode values, but there is no point in
22986 putting them there unless we have hardware conversion insns. */
22987 if (mode == HFmode)
22988 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22990 if (TARGET_NEON)
22991 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22992 || (VALID_NEON_QREG_MODE (mode)
22993 && NEON_REGNO_OK_FOR_QUAD (regno))
22994 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22995 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22996 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22997 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22998 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23000 return FALSE;
23003 if (TARGET_REALLY_IWMMXT)
23005 if (IS_IWMMXT_GR_REGNUM (regno))
23006 return mode == SImode;
23008 if (IS_IWMMXT_REGNUM (regno))
23009 return VALID_IWMMXT_REG_MODE (mode);
23012 /* We allow almost any value to be stored in the general registers.
23013 Restrict doubleword quantities to even register pairs in ARM state
23014 so that we can use ldrd. Do not allow very large Neon structure
23015 opaque modes in general registers; they would use too many. */
23016 if (regno <= LAST_ARM_REGNUM)
23018 if (ARM_NUM_REGS (mode) > 4)
23019 return FALSE;
23021 if (TARGET_THUMB2)
23022 return TRUE;
23024 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23027 if (regno == FRAME_POINTER_REGNUM
23028 || regno == ARG_POINTER_REGNUM)
23029 /* We only allow integers in the fake hard registers. */
23030 return GET_MODE_CLASS (mode) == MODE_INT;
23032 return FALSE;
23035 /* Implement MODES_TIEABLE_P. */
23037 bool
23038 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
23040 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23041 return true;
23043 /* We specifically want to allow elements of "structure" modes to
23044 be tieable to the structure. This more general condition allows
23045 other rarer situations too. */
23046 if (TARGET_NEON
23047 && (VALID_NEON_DREG_MODE (mode1)
23048 || VALID_NEON_QREG_MODE (mode1)
23049 || VALID_NEON_STRUCT_MODE (mode1))
23050 && (VALID_NEON_DREG_MODE (mode2)
23051 || VALID_NEON_QREG_MODE (mode2)
23052 || VALID_NEON_STRUCT_MODE (mode2)))
23053 return true;
23055 return false;
23058 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23059 not used in arm mode. */
23061 enum reg_class
23062 arm_regno_class (int regno)
23064 if (TARGET_THUMB1)
23066 if (regno == STACK_POINTER_REGNUM)
23067 return STACK_REG;
23068 if (regno == CC_REGNUM)
23069 return CC_REG;
23070 if (regno < 8)
23071 return LO_REGS;
23072 return HI_REGS;
23075 if (TARGET_THUMB2 && regno < 8)
23076 return LO_REGS;
23078 if ( regno <= LAST_ARM_REGNUM
23079 || regno == FRAME_POINTER_REGNUM
23080 || regno == ARG_POINTER_REGNUM)
23081 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23083 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23084 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23086 if (IS_VFP_REGNUM (regno))
23088 if (regno <= D7_VFP_REGNUM)
23089 return VFP_D0_D7_REGS;
23090 else if (regno <= LAST_LO_VFP_REGNUM)
23091 return VFP_LO_REGS;
23092 else
23093 return VFP_HI_REGS;
23096 if (IS_IWMMXT_REGNUM (regno))
23097 return IWMMXT_REGS;
23099 if (IS_IWMMXT_GR_REGNUM (regno))
23100 return IWMMXT_GR_REGS;
23102 return NO_REGS;
23105 /* Handle a special case when computing the offset
23106 of an argument from the frame pointer. */
23108 arm_debugger_arg_offset (int value, rtx addr)
23110 rtx insn;
23112 /* We are only interested if dbxout_parms() failed to compute the offset. */
23113 if (value != 0)
23114 return 0;
23116 /* We can only cope with the case where the address is held in a register. */
23117 if (!REG_P (addr))
23118 return 0;
23120 /* If we are using the frame pointer to point at the argument, then
23121 an offset of 0 is correct. */
23122 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23123 return 0;
23125 /* If we are using the stack pointer to point at the
23126 argument, then an offset of 0 is correct. */
23127 /* ??? Check this is consistent with thumb2 frame layout. */
23128 if ((TARGET_THUMB || !frame_pointer_needed)
23129 && REGNO (addr) == SP_REGNUM)
23130 return 0;
23132 /* Oh dear. The argument is pointed to by a register rather
23133 than being held in a register, or being stored at a known
23134 offset from the frame pointer. Since GDB only understands
23135 those two kinds of argument we must translate the address
23136 held in the register into an offset from the frame pointer.
23137 We do this by searching through the insns for the function
23138 looking to see where this register gets its value. If the
23139 register is initialized from the frame pointer plus an offset
23140 then we are in luck and we can continue, otherwise we give up.
23142 This code is exercised by producing debugging information
23143 for a function with arguments like this:
23145 double func (double a, double b, int c, double d) {return d;}
23147 Without this code the stab for parameter 'd' will be set to
23148 an offset of 0 from the frame pointer, rather than 8. */
23150 /* The if() statement says:
23152 If the insn is a normal instruction
23153 and if the insn is setting the value in a register
23154 and if the register being set is the register holding the address of the argument
23155 and if the address is computing by an addition
23156 that involves adding to a register
23157 which is the frame pointer
23158 a constant integer
23160 then... */
23162 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23164 if ( NONJUMP_INSN_P (insn)
23165 && GET_CODE (PATTERN (insn)) == SET
23166 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23167 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23168 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23169 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23170 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23173 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23175 break;
23179 if (value == 0)
23181 debug_rtx (addr);
23182 warning (0, "unable to compute real location of stacked parameter");
23183 value = 8; /* XXX magic hack */
23186 return value;
23189 typedef enum {
23190 T_V8QI,
23191 T_V4HI,
23192 T_V4HF,
23193 T_V2SI,
23194 T_V2SF,
23195 T_DI,
23196 T_V16QI,
23197 T_V8HI,
23198 T_V4SI,
23199 T_V4SF,
23200 T_V2DI,
23201 T_TI,
23202 T_EI,
23203 T_OI,
23204 T_MAX /* Size of enum. Keep last. */
23205 } neon_builtin_type_mode;
23207 #define TYPE_MODE_BIT(X) (1 << (X))
23209 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23210 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23211 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23212 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23213 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23214 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23216 #define v8qi_UP T_V8QI
23217 #define v4hi_UP T_V4HI
23218 #define v4hf_UP T_V4HF
23219 #define v2si_UP T_V2SI
23220 #define v2sf_UP T_V2SF
23221 #define di_UP T_DI
23222 #define v16qi_UP T_V16QI
23223 #define v8hi_UP T_V8HI
23224 #define v4si_UP T_V4SI
23225 #define v4sf_UP T_V4SF
23226 #define v2di_UP T_V2DI
23227 #define ti_UP T_TI
23228 #define ei_UP T_EI
23229 #define oi_UP T_OI
23231 #define UP(X) X##_UP
23233 typedef enum {
23234 NEON_BINOP,
23235 NEON_TERNOP,
23236 NEON_UNOP,
23237 NEON_BSWAP,
23238 NEON_GETLANE,
23239 NEON_SETLANE,
23240 NEON_CREATE,
23241 NEON_RINT,
23242 NEON_DUP,
23243 NEON_DUPLANE,
23244 NEON_COMBINE,
23245 NEON_SPLIT,
23246 NEON_LANEMUL,
23247 NEON_LANEMULL,
23248 NEON_LANEMULH,
23249 NEON_LANEMAC,
23250 NEON_SCALARMUL,
23251 NEON_SCALARMULL,
23252 NEON_SCALARMULH,
23253 NEON_SCALARMAC,
23254 NEON_CONVERT,
23255 NEON_FLOAT_WIDEN,
23256 NEON_FLOAT_NARROW,
23257 NEON_FIXCONV,
23258 NEON_SELECT,
23259 NEON_REINTERP,
23260 NEON_VTBL,
23261 NEON_VTBX,
23262 NEON_LOAD1,
23263 NEON_LOAD1LANE,
23264 NEON_STORE1,
23265 NEON_STORE1LANE,
23266 NEON_LOADSTRUCT,
23267 NEON_LOADSTRUCTLANE,
23268 NEON_STORESTRUCT,
23269 NEON_STORESTRUCTLANE,
23270 NEON_LOGICBINOP,
23271 NEON_SHIFTINSERT,
23272 NEON_SHIFTIMM,
23273 NEON_SHIFTACC
23274 } neon_itype;
23276 typedef struct {
23277 const char *name;
23278 const neon_itype itype;
23279 const neon_builtin_type_mode mode;
23280 const enum insn_code code;
23281 unsigned int fcode;
23282 } neon_builtin_datum;
23284 #define CF(N,X) CODE_FOR_neon_##N##X
23286 #define VAR1(T, N, A) \
23287 {#N, NEON_##T, UP (A), CF (N, A), 0}
23288 #define VAR2(T, N, A, B) \
23289 VAR1 (T, N, A), \
23290 {#N, NEON_##T, UP (B), CF (N, B), 0}
23291 #define VAR3(T, N, A, B, C) \
23292 VAR2 (T, N, A, B), \
23293 {#N, NEON_##T, UP (C), CF (N, C), 0}
23294 #define VAR4(T, N, A, B, C, D) \
23295 VAR3 (T, N, A, B, C), \
23296 {#N, NEON_##T, UP (D), CF (N, D), 0}
23297 #define VAR5(T, N, A, B, C, D, E) \
23298 VAR4 (T, N, A, B, C, D), \
23299 {#N, NEON_##T, UP (E), CF (N, E), 0}
23300 #define VAR6(T, N, A, B, C, D, E, F) \
23301 VAR5 (T, N, A, B, C, D, E), \
23302 {#N, NEON_##T, UP (F), CF (N, F), 0}
23303 #define VAR7(T, N, A, B, C, D, E, F, G) \
23304 VAR6 (T, N, A, B, C, D, E, F), \
23305 {#N, NEON_##T, UP (G), CF (N, G), 0}
23306 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23307 VAR7 (T, N, A, B, C, D, E, F, G), \
23308 {#N, NEON_##T, UP (H), CF (N, H), 0}
23309 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23310 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23311 {#N, NEON_##T, UP (I), CF (N, I), 0}
23312 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23313 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23314 {#N, NEON_##T, UP (J), CF (N, J), 0}
23316 /* The NEON builtin data can be found in arm_neon_builtins.def.
23317 The mode entries in the following table correspond to the "key" type of the
23318 instruction variant, i.e. equivalent to that which would be specified after
23319 the assembler mnemonic, which usually refers to the last vector operand.
23320 (Signed/unsigned/polynomial types are not differentiated between though, and
23321 are all mapped onto the same mode for a given element size.) The modes
23322 listed per instruction should be the same as those defined for that
23323 instruction's pattern in neon.md. */
23325 static neon_builtin_datum neon_builtin_data[] =
23327 #include "arm_neon_builtins.def"
23330 #undef CF
23331 #undef VAR1
23332 #undef VAR2
23333 #undef VAR3
23334 #undef VAR4
23335 #undef VAR5
23336 #undef VAR6
23337 #undef VAR7
23338 #undef VAR8
23339 #undef VAR9
23340 #undef VAR10
23342 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23343 #define VAR1(T, N, A) \
23344 CF (N, A)
23345 #define VAR2(T, N, A, B) \
23346 VAR1 (T, N, A), \
23347 CF (N, B)
23348 #define VAR3(T, N, A, B, C) \
23349 VAR2 (T, N, A, B), \
23350 CF (N, C)
23351 #define VAR4(T, N, A, B, C, D) \
23352 VAR3 (T, N, A, B, C), \
23353 CF (N, D)
23354 #define VAR5(T, N, A, B, C, D, E) \
23355 VAR4 (T, N, A, B, C, D), \
23356 CF (N, E)
23357 #define VAR6(T, N, A, B, C, D, E, F) \
23358 VAR5 (T, N, A, B, C, D, E), \
23359 CF (N, F)
23360 #define VAR7(T, N, A, B, C, D, E, F, G) \
23361 VAR6 (T, N, A, B, C, D, E, F), \
23362 CF (N, G)
23363 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23364 VAR7 (T, N, A, B, C, D, E, F, G), \
23365 CF (N, H)
23366 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23367 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23368 CF (N, I)
23369 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23370 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23371 CF (N, J)
23372 enum arm_builtins
23374 ARM_BUILTIN_GETWCGR0,
23375 ARM_BUILTIN_GETWCGR1,
23376 ARM_BUILTIN_GETWCGR2,
23377 ARM_BUILTIN_GETWCGR3,
23379 ARM_BUILTIN_SETWCGR0,
23380 ARM_BUILTIN_SETWCGR1,
23381 ARM_BUILTIN_SETWCGR2,
23382 ARM_BUILTIN_SETWCGR3,
23384 ARM_BUILTIN_WZERO,
23386 ARM_BUILTIN_WAVG2BR,
23387 ARM_BUILTIN_WAVG2HR,
23388 ARM_BUILTIN_WAVG2B,
23389 ARM_BUILTIN_WAVG2H,
23391 ARM_BUILTIN_WACCB,
23392 ARM_BUILTIN_WACCH,
23393 ARM_BUILTIN_WACCW,
23395 ARM_BUILTIN_WMACS,
23396 ARM_BUILTIN_WMACSZ,
23397 ARM_BUILTIN_WMACU,
23398 ARM_BUILTIN_WMACUZ,
23400 ARM_BUILTIN_WSADB,
23401 ARM_BUILTIN_WSADBZ,
23402 ARM_BUILTIN_WSADH,
23403 ARM_BUILTIN_WSADHZ,
23405 ARM_BUILTIN_WALIGNI,
23406 ARM_BUILTIN_WALIGNR0,
23407 ARM_BUILTIN_WALIGNR1,
23408 ARM_BUILTIN_WALIGNR2,
23409 ARM_BUILTIN_WALIGNR3,
23411 ARM_BUILTIN_TMIA,
23412 ARM_BUILTIN_TMIAPH,
23413 ARM_BUILTIN_TMIABB,
23414 ARM_BUILTIN_TMIABT,
23415 ARM_BUILTIN_TMIATB,
23416 ARM_BUILTIN_TMIATT,
23418 ARM_BUILTIN_TMOVMSKB,
23419 ARM_BUILTIN_TMOVMSKH,
23420 ARM_BUILTIN_TMOVMSKW,
23422 ARM_BUILTIN_TBCSTB,
23423 ARM_BUILTIN_TBCSTH,
23424 ARM_BUILTIN_TBCSTW,
23426 ARM_BUILTIN_WMADDS,
23427 ARM_BUILTIN_WMADDU,
23429 ARM_BUILTIN_WPACKHSS,
23430 ARM_BUILTIN_WPACKWSS,
23431 ARM_BUILTIN_WPACKDSS,
23432 ARM_BUILTIN_WPACKHUS,
23433 ARM_BUILTIN_WPACKWUS,
23434 ARM_BUILTIN_WPACKDUS,
23436 ARM_BUILTIN_WADDB,
23437 ARM_BUILTIN_WADDH,
23438 ARM_BUILTIN_WADDW,
23439 ARM_BUILTIN_WADDSSB,
23440 ARM_BUILTIN_WADDSSH,
23441 ARM_BUILTIN_WADDSSW,
23442 ARM_BUILTIN_WADDUSB,
23443 ARM_BUILTIN_WADDUSH,
23444 ARM_BUILTIN_WADDUSW,
23445 ARM_BUILTIN_WSUBB,
23446 ARM_BUILTIN_WSUBH,
23447 ARM_BUILTIN_WSUBW,
23448 ARM_BUILTIN_WSUBSSB,
23449 ARM_BUILTIN_WSUBSSH,
23450 ARM_BUILTIN_WSUBSSW,
23451 ARM_BUILTIN_WSUBUSB,
23452 ARM_BUILTIN_WSUBUSH,
23453 ARM_BUILTIN_WSUBUSW,
23455 ARM_BUILTIN_WAND,
23456 ARM_BUILTIN_WANDN,
23457 ARM_BUILTIN_WOR,
23458 ARM_BUILTIN_WXOR,
23460 ARM_BUILTIN_WCMPEQB,
23461 ARM_BUILTIN_WCMPEQH,
23462 ARM_BUILTIN_WCMPEQW,
23463 ARM_BUILTIN_WCMPGTUB,
23464 ARM_BUILTIN_WCMPGTUH,
23465 ARM_BUILTIN_WCMPGTUW,
23466 ARM_BUILTIN_WCMPGTSB,
23467 ARM_BUILTIN_WCMPGTSH,
23468 ARM_BUILTIN_WCMPGTSW,
23470 ARM_BUILTIN_TEXTRMSB,
23471 ARM_BUILTIN_TEXTRMSH,
23472 ARM_BUILTIN_TEXTRMSW,
23473 ARM_BUILTIN_TEXTRMUB,
23474 ARM_BUILTIN_TEXTRMUH,
23475 ARM_BUILTIN_TEXTRMUW,
23476 ARM_BUILTIN_TINSRB,
23477 ARM_BUILTIN_TINSRH,
23478 ARM_BUILTIN_TINSRW,
23480 ARM_BUILTIN_WMAXSW,
23481 ARM_BUILTIN_WMAXSH,
23482 ARM_BUILTIN_WMAXSB,
23483 ARM_BUILTIN_WMAXUW,
23484 ARM_BUILTIN_WMAXUH,
23485 ARM_BUILTIN_WMAXUB,
23486 ARM_BUILTIN_WMINSW,
23487 ARM_BUILTIN_WMINSH,
23488 ARM_BUILTIN_WMINSB,
23489 ARM_BUILTIN_WMINUW,
23490 ARM_BUILTIN_WMINUH,
23491 ARM_BUILTIN_WMINUB,
23493 ARM_BUILTIN_WMULUM,
23494 ARM_BUILTIN_WMULSM,
23495 ARM_BUILTIN_WMULUL,
23497 ARM_BUILTIN_PSADBH,
23498 ARM_BUILTIN_WSHUFH,
23500 ARM_BUILTIN_WSLLH,
23501 ARM_BUILTIN_WSLLW,
23502 ARM_BUILTIN_WSLLD,
23503 ARM_BUILTIN_WSRAH,
23504 ARM_BUILTIN_WSRAW,
23505 ARM_BUILTIN_WSRAD,
23506 ARM_BUILTIN_WSRLH,
23507 ARM_BUILTIN_WSRLW,
23508 ARM_BUILTIN_WSRLD,
23509 ARM_BUILTIN_WRORH,
23510 ARM_BUILTIN_WRORW,
23511 ARM_BUILTIN_WRORD,
23512 ARM_BUILTIN_WSLLHI,
23513 ARM_BUILTIN_WSLLWI,
23514 ARM_BUILTIN_WSLLDI,
23515 ARM_BUILTIN_WSRAHI,
23516 ARM_BUILTIN_WSRAWI,
23517 ARM_BUILTIN_WSRADI,
23518 ARM_BUILTIN_WSRLHI,
23519 ARM_BUILTIN_WSRLWI,
23520 ARM_BUILTIN_WSRLDI,
23521 ARM_BUILTIN_WRORHI,
23522 ARM_BUILTIN_WRORWI,
23523 ARM_BUILTIN_WRORDI,
23525 ARM_BUILTIN_WUNPCKIHB,
23526 ARM_BUILTIN_WUNPCKIHH,
23527 ARM_BUILTIN_WUNPCKIHW,
23528 ARM_BUILTIN_WUNPCKILB,
23529 ARM_BUILTIN_WUNPCKILH,
23530 ARM_BUILTIN_WUNPCKILW,
23532 ARM_BUILTIN_WUNPCKEHSB,
23533 ARM_BUILTIN_WUNPCKEHSH,
23534 ARM_BUILTIN_WUNPCKEHSW,
23535 ARM_BUILTIN_WUNPCKEHUB,
23536 ARM_BUILTIN_WUNPCKEHUH,
23537 ARM_BUILTIN_WUNPCKEHUW,
23538 ARM_BUILTIN_WUNPCKELSB,
23539 ARM_BUILTIN_WUNPCKELSH,
23540 ARM_BUILTIN_WUNPCKELSW,
23541 ARM_BUILTIN_WUNPCKELUB,
23542 ARM_BUILTIN_WUNPCKELUH,
23543 ARM_BUILTIN_WUNPCKELUW,
23545 ARM_BUILTIN_WABSB,
23546 ARM_BUILTIN_WABSH,
23547 ARM_BUILTIN_WABSW,
23549 ARM_BUILTIN_WADDSUBHX,
23550 ARM_BUILTIN_WSUBADDHX,
23552 ARM_BUILTIN_WABSDIFFB,
23553 ARM_BUILTIN_WABSDIFFH,
23554 ARM_BUILTIN_WABSDIFFW,
23556 ARM_BUILTIN_WADDCH,
23557 ARM_BUILTIN_WADDCW,
23559 ARM_BUILTIN_WAVG4,
23560 ARM_BUILTIN_WAVG4R,
23562 ARM_BUILTIN_WMADDSX,
23563 ARM_BUILTIN_WMADDUX,
23565 ARM_BUILTIN_WMADDSN,
23566 ARM_BUILTIN_WMADDUN,
23568 ARM_BUILTIN_WMULWSM,
23569 ARM_BUILTIN_WMULWUM,
23571 ARM_BUILTIN_WMULWSMR,
23572 ARM_BUILTIN_WMULWUMR,
23574 ARM_BUILTIN_WMULWL,
23576 ARM_BUILTIN_WMULSMR,
23577 ARM_BUILTIN_WMULUMR,
23579 ARM_BUILTIN_WQMULM,
23580 ARM_BUILTIN_WQMULMR,
23582 ARM_BUILTIN_WQMULWM,
23583 ARM_BUILTIN_WQMULWMR,
23585 ARM_BUILTIN_WADDBHUSM,
23586 ARM_BUILTIN_WADDBHUSL,
23588 ARM_BUILTIN_WQMIABB,
23589 ARM_BUILTIN_WQMIABT,
23590 ARM_BUILTIN_WQMIATB,
23591 ARM_BUILTIN_WQMIATT,
23593 ARM_BUILTIN_WQMIABBN,
23594 ARM_BUILTIN_WQMIABTN,
23595 ARM_BUILTIN_WQMIATBN,
23596 ARM_BUILTIN_WQMIATTN,
23598 ARM_BUILTIN_WMIABB,
23599 ARM_BUILTIN_WMIABT,
23600 ARM_BUILTIN_WMIATB,
23601 ARM_BUILTIN_WMIATT,
23603 ARM_BUILTIN_WMIABBN,
23604 ARM_BUILTIN_WMIABTN,
23605 ARM_BUILTIN_WMIATBN,
23606 ARM_BUILTIN_WMIATTN,
23608 ARM_BUILTIN_WMIAWBB,
23609 ARM_BUILTIN_WMIAWBT,
23610 ARM_BUILTIN_WMIAWTB,
23611 ARM_BUILTIN_WMIAWTT,
23613 ARM_BUILTIN_WMIAWBBN,
23614 ARM_BUILTIN_WMIAWBTN,
23615 ARM_BUILTIN_WMIAWTBN,
23616 ARM_BUILTIN_WMIAWTTN,
23618 ARM_BUILTIN_WMERGE,
23620 ARM_BUILTIN_CRC32B,
23621 ARM_BUILTIN_CRC32H,
23622 ARM_BUILTIN_CRC32W,
23623 ARM_BUILTIN_CRC32CB,
23624 ARM_BUILTIN_CRC32CH,
23625 ARM_BUILTIN_CRC32CW,
23627 ARM_BUILTIN_GET_FPSCR,
23628 ARM_BUILTIN_SET_FPSCR,
23630 #undef CRYPTO1
23631 #undef CRYPTO2
23632 #undef CRYPTO3
23634 #define CRYPTO1(L, U, M1, M2) \
23635 ARM_BUILTIN_CRYPTO_##U,
23636 #define CRYPTO2(L, U, M1, M2, M3) \
23637 ARM_BUILTIN_CRYPTO_##U,
23638 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23639 ARM_BUILTIN_CRYPTO_##U,
23641 #include "crypto.def"
23643 #undef CRYPTO1
23644 #undef CRYPTO2
23645 #undef CRYPTO3
23647 #include "arm_neon_builtins.def"
23649 ,ARM_BUILTIN_MAX
23652 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23654 #undef CF
23655 #undef VAR1
23656 #undef VAR2
23657 #undef VAR3
23658 #undef VAR4
23659 #undef VAR5
23660 #undef VAR6
23661 #undef VAR7
23662 #undef VAR8
23663 #undef VAR9
23664 #undef VAR10
23666 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23668 #define NUM_DREG_TYPES 5
23669 #define NUM_QREG_TYPES 6
23671 static void
23672 arm_init_neon_builtins (void)
23674 unsigned int i, fcode;
23675 tree decl;
23677 tree neon_intQI_type_node;
23678 tree neon_intHI_type_node;
23679 tree neon_floatHF_type_node;
23680 tree neon_polyQI_type_node;
23681 tree neon_polyHI_type_node;
23682 tree neon_intSI_type_node;
23683 tree neon_intDI_type_node;
23684 tree neon_intUTI_type_node;
23685 tree neon_float_type_node;
23687 tree intQI_pointer_node;
23688 tree intHI_pointer_node;
23689 tree intSI_pointer_node;
23690 tree intDI_pointer_node;
23691 tree float_pointer_node;
23693 tree const_intQI_node;
23694 tree const_intHI_node;
23695 tree const_intSI_node;
23696 tree const_intDI_node;
23697 tree const_float_node;
23699 tree const_intQI_pointer_node;
23700 tree const_intHI_pointer_node;
23701 tree const_intSI_pointer_node;
23702 tree const_intDI_pointer_node;
23703 tree const_float_pointer_node;
23705 tree V8QI_type_node;
23706 tree V4HI_type_node;
23707 tree V4UHI_type_node;
23708 tree V4HF_type_node;
23709 tree V2SI_type_node;
23710 tree V2USI_type_node;
23711 tree V2SF_type_node;
23712 tree V16QI_type_node;
23713 tree V8HI_type_node;
23714 tree V8UHI_type_node;
23715 tree V4SI_type_node;
23716 tree V4USI_type_node;
23717 tree V4SF_type_node;
23718 tree V2DI_type_node;
23719 tree V2UDI_type_node;
23721 tree intUQI_type_node;
23722 tree intUHI_type_node;
23723 tree intUSI_type_node;
23724 tree intUDI_type_node;
23726 tree intEI_type_node;
23727 tree intOI_type_node;
23728 tree intCI_type_node;
23729 tree intXI_type_node;
23731 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23732 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23733 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23735 /* Create distinguished type nodes for NEON vector element types,
23736 and pointers to values of such types, so we can detect them later. */
23737 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23738 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23739 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23740 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23741 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23742 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23743 neon_float_type_node = make_node (REAL_TYPE);
23744 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23745 layout_type (neon_float_type_node);
23746 neon_floatHF_type_node = make_node (REAL_TYPE);
23747 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23748 layout_type (neon_floatHF_type_node);
23750 /* Define typedefs which exactly correspond to the modes we are basing vector
23751 types on. If you change these names you'll need to change
23752 the table used by arm_mangle_type too. */
23753 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23754 "__builtin_neon_qi");
23755 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23756 "__builtin_neon_hi");
23757 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23758 "__builtin_neon_hf");
23759 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23760 "__builtin_neon_si");
23761 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23762 "__builtin_neon_sf");
23763 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23764 "__builtin_neon_di");
23765 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23766 "__builtin_neon_poly8");
23767 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23768 "__builtin_neon_poly16");
23770 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23771 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23772 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23773 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23774 float_pointer_node = build_pointer_type (neon_float_type_node);
23776 /* Next create constant-qualified versions of the above types. */
23777 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23778 TYPE_QUAL_CONST);
23779 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23780 TYPE_QUAL_CONST);
23781 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23782 TYPE_QUAL_CONST);
23783 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23784 TYPE_QUAL_CONST);
23785 const_float_node = build_qualified_type (neon_float_type_node,
23786 TYPE_QUAL_CONST);
23788 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23789 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23790 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23791 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23792 const_float_pointer_node = build_pointer_type (const_float_node);
23794 /* Unsigned integer types for various mode sizes. */
23795 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23796 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23797 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23798 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23799 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23800 /* Now create vector types based on our NEON element types. */
23801 /* 64-bit vectors. */
23802 V8QI_type_node =
23803 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23804 V4HI_type_node =
23805 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23806 V4UHI_type_node =
23807 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23808 V4HF_type_node =
23809 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23810 V2SI_type_node =
23811 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23812 V2USI_type_node =
23813 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23814 V2SF_type_node =
23815 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23816 /* 128-bit vectors. */
23817 V16QI_type_node =
23818 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23819 V8HI_type_node =
23820 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23821 V8UHI_type_node =
23822 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23823 V4SI_type_node =
23824 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23825 V4USI_type_node =
23826 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23827 V4SF_type_node =
23828 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23829 V2DI_type_node =
23830 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23831 V2UDI_type_node =
23832 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23835 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23836 "__builtin_neon_uqi");
23837 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23838 "__builtin_neon_uhi");
23839 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23840 "__builtin_neon_usi");
23841 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23842 "__builtin_neon_udi");
23843 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23844 "__builtin_neon_poly64");
23845 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23846 "__builtin_neon_poly128");
23848 /* Opaque integer types for structures of vectors. */
23849 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23850 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23851 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23852 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23854 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23855 "__builtin_neon_ti");
23856 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23857 "__builtin_neon_ei");
23858 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23859 "__builtin_neon_oi");
23860 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23861 "__builtin_neon_ci");
23862 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23863 "__builtin_neon_xi");
23865 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23868 tree V16UQI_type_node =
23869 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23871 tree v16uqi_ftype_v16uqi
23872 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23874 tree v16uqi_ftype_v16uqi_v16uqi
23875 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23876 V16UQI_type_node, NULL_TREE);
23878 tree v4usi_ftype_v4usi
23879 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23881 tree v4usi_ftype_v4usi_v4usi
23882 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23883 V4USI_type_node, NULL_TREE);
23885 tree v4usi_ftype_v4usi_v4usi_v4usi
23886 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23887 V4USI_type_node, V4USI_type_node, NULL_TREE);
23889 tree uti_ftype_udi_udi
23890 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23891 intUDI_type_node, NULL_TREE);
23893 #undef CRYPTO1
23894 #undef CRYPTO2
23895 #undef CRYPTO3
23896 #undef C
23897 #undef N
23898 #undef CF
23899 #undef FT1
23900 #undef FT2
23901 #undef FT3
23903 #define C(U) \
23904 ARM_BUILTIN_CRYPTO_##U
23905 #define N(L) \
23906 "__builtin_arm_crypto_"#L
23907 #define FT1(R, A) \
23908 R##_ftype_##A
23909 #define FT2(R, A1, A2) \
23910 R##_ftype_##A1##_##A2
23911 #define FT3(R, A1, A2, A3) \
23912 R##_ftype_##A1##_##A2##_##A3
23913 #define CRYPTO1(L, U, R, A) \
23914 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23915 C (U), BUILT_IN_MD, \
23916 NULL, NULL_TREE);
23917 #define CRYPTO2(L, U, R, A1, A2) \
23918 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23919 C (U), BUILT_IN_MD, \
23920 NULL, NULL_TREE);
23922 #define CRYPTO3(L, U, R, A1, A2, A3) \
23923 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23924 C (U), BUILT_IN_MD, \
23925 NULL, NULL_TREE);
23926 #include "crypto.def"
23928 #undef CRYPTO1
23929 #undef CRYPTO2
23930 #undef CRYPTO3
23931 #undef C
23932 #undef N
23933 #undef FT1
23934 #undef FT2
23935 #undef FT3
23937 dreg_types[0] = V8QI_type_node;
23938 dreg_types[1] = V4HI_type_node;
23939 dreg_types[2] = V2SI_type_node;
23940 dreg_types[3] = V2SF_type_node;
23941 dreg_types[4] = neon_intDI_type_node;
23943 qreg_types[0] = V16QI_type_node;
23944 qreg_types[1] = V8HI_type_node;
23945 qreg_types[2] = V4SI_type_node;
23946 qreg_types[3] = V4SF_type_node;
23947 qreg_types[4] = V2DI_type_node;
23948 qreg_types[5] = neon_intUTI_type_node;
23950 for (i = 0; i < NUM_QREG_TYPES; i++)
23952 int j;
23953 for (j = 0; j < NUM_QREG_TYPES; j++)
23955 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23956 reinterp_ftype_dreg[i][j]
23957 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23959 reinterp_ftype_qreg[i][j]
23960 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23964 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23965 i < ARRAY_SIZE (neon_builtin_data);
23966 i++, fcode++)
23968 neon_builtin_datum *d = &neon_builtin_data[i];
23970 const char* const modenames[] = {
23971 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23972 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23973 "ti", "ei", "oi"
23975 char namebuf[60];
23976 tree ftype = NULL;
23977 int is_load = 0, is_store = 0;
23979 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23981 d->fcode = fcode;
23983 switch (d->itype)
23985 case NEON_LOAD1:
23986 case NEON_LOAD1LANE:
23987 case NEON_LOADSTRUCT:
23988 case NEON_LOADSTRUCTLANE:
23989 is_load = 1;
23990 /* Fall through. */
23991 case NEON_STORE1:
23992 case NEON_STORE1LANE:
23993 case NEON_STORESTRUCT:
23994 case NEON_STORESTRUCTLANE:
23995 if (!is_load)
23996 is_store = 1;
23997 /* Fall through. */
23998 case NEON_UNOP:
23999 case NEON_RINT:
24000 case NEON_BINOP:
24001 case NEON_LOGICBINOP:
24002 case NEON_SHIFTINSERT:
24003 case NEON_TERNOP:
24004 case NEON_GETLANE:
24005 case NEON_SETLANE:
24006 case NEON_CREATE:
24007 case NEON_DUP:
24008 case NEON_DUPLANE:
24009 case NEON_SHIFTIMM:
24010 case NEON_SHIFTACC:
24011 case NEON_COMBINE:
24012 case NEON_SPLIT:
24013 case NEON_CONVERT:
24014 case NEON_FIXCONV:
24015 case NEON_LANEMUL:
24016 case NEON_LANEMULL:
24017 case NEON_LANEMULH:
24018 case NEON_LANEMAC:
24019 case NEON_SCALARMUL:
24020 case NEON_SCALARMULL:
24021 case NEON_SCALARMULH:
24022 case NEON_SCALARMAC:
24023 case NEON_SELECT:
24024 case NEON_VTBL:
24025 case NEON_VTBX:
24027 int k;
24028 tree return_type = void_type_node, args = void_list_node;
24030 /* Build a function type directly from the insn_data for
24031 this builtin. The build_function_type() function takes
24032 care of removing duplicates for us. */
24033 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24035 tree eltype;
24037 if (is_load && k == 1)
24039 /* Neon load patterns always have the memory
24040 operand in the operand 1 position. */
24041 gcc_assert (insn_data[d->code].operand[k].predicate
24042 == neon_struct_operand);
24044 switch (d->mode)
24046 case T_V8QI:
24047 case T_V16QI:
24048 eltype = const_intQI_pointer_node;
24049 break;
24051 case T_V4HI:
24052 case T_V8HI:
24053 eltype = const_intHI_pointer_node;
24054 break;
24056 case T_V2SI:
24057 case T_V4SI:
24058 eltype = const_intSI_pointer_node;
24059 break;
24061 case T_V2SF:
24062 case T_V4SF:
24063 eltype = const_float_pointer_node;
24064 break;
24066 case T_DI:
24067 case T_V2DI:
24068 eltype = const_intDI_pointer_node;
24069 break;
24071 default: gcc_unreachable ();
24074 else if (is_store && k == 0)
24076 /* Similarly, Neon store patterns use operand 0 as
24077 the memory location to store to. */
24078 gcc_assert (insn_data[d->code].operand[k].predicate
24079 == neon_struct_operand);
24081 switch (d->mode)
24083 case T_V8QI:
24084 case T_V16QI:
24085 eltype = intQI_pointer_node;
24086 break;
24088 case T_V4HI:
24089 case T_V8HI:
24090 eltype = intHI_pointer_node;
24091 break;
24093 case T_V2SI:
24094 case T_V4SI:
24095 eltype = intSI_pointer_node;
24096 break;
24098 case T_V2SF:
24099 case T_V4SF:
24100 eltype = float_pointer_node;
24101 break;
24103 case T_DI:
24104 case T_V2DI:
24105 eltype = intDI_pointer_node;
24106 break;
24108 default: gcc_unreachable ();
24111 else
24113 switch (insn_data[d->code].operand[k].mode)
24115 case VOIDmode: eltype = void_type_node; break;
24116 /* Scalars. */
24117 case QImode: eltype = neon_intQI_type_node; break;
24118 case HImode: eltype = neon_intHI_type_node; break;
24119 case SImode: eltype = neon_intSI_type_node; break;
24120 case SFmode: eltype = neon_float_type_node; break;
24121 case DImode: eltype = neon_intDI_type_node; break;
24122 case TImode: eltype = intTI_type_node; break;
24123 case EImode: eltype = intEI_type_node; break;
24124 case OImode: eltype = intOI_type_node; break;
24125 case CImode: eltype = intCI_type_node; break;
24126 case XImode: eltype = intXI_type_node; break;
24127 /* 64-bit vectors. */
24128 case V8QImode: eltype = V8QI_type_node; break;
24129 case V4HImode: eltype = V4HI_type_node; break;
24130 case V2SImode: eltype = V2SI_type_node; break;
24131 case V2SFmode: eltype = V2SF_type_node; break;
24132 /* 128-bit vectors. */
24133 case V16QImode: eltype = V16QI_type_node; break;
24134 case V8HImode: eltype = V8HI_type_node; break;
24135 case V4SImode: eltype = V4SI_type_node; break;
24136 case V4SFmode: eltype = V4SF_type_node; break;
24137 case V2DImode: eltype = V2DI_type_node; break;
24138 default: gcc_unreachable ();
24142 if (k == 0 && !is_store)
24143 return_type = eltype;
24144 else
24145 args = tree_cons (NULL_TREE, eltype, args);
24148 ftype = build_function_type (return_type, args);
24150 break;
24152 case NEON_REINTERP:
24154 /* We iterate over NUM_DREG_TYPES doubleword types,
24155 then NUM_QREG_TYPES quadword types.
24156 V4HF is not a type used in reinterpret, so we translate
24157 d->mode to the correct index in reinterp_ftype_dreg. */
24158 bool qreg_p
24159 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24160 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24161 % NUM_QREG_TYPES;
24162 switch (insn_data[d->code].operand[0].mode)
24164 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24165 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24166 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24167 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24168 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24169 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24170 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24171 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24172 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24173 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24174 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24175 default: gcc_unreachable ();
24178 break;
24179 case NEON_FLOAT_WIDEN:
24181 tree eltype = NULL_TREE;
24182 tree return_type = NULL_TREE;
24184 switch (insn_data[d->code].operand[1].mode)
24186 case V4HFmode:
24187 eltype = V4HF_type_node;
24188 return_type = V4SF_type_node;
24189 break;
24190 default: gcc_unreachable ();
24192 ftype = build_function_type_list (return_type, eltype, NULL);
24193 break;
24195 case NEON_FLOAT_NARROW:
24197 tree eltype = NULL_TREE;
24198 tree return_type = NULL_TREE;
24200 switch (insn_data[d->code].operand[1].mode)
24202 case V4SFmode:
24203 eltype = V4SF_type_node;
24204 return_type = V4HF_type_node;
24205 break;
24206 default: gcc_unreachable ();
24208 ftype = build_function_type_list (return_type, eltype, NULL);
24209 break;
24211 case NEON_BSWAP:
24213 tree eltype = NULL_TREE;
24214 switch (insn_data[d->code].operand[1].mode)
24216 case V4HImode:
24217 eltype = V4UHI_type_node;
24218 break;
24219 case V8HImode:
24220 eltype = V8UHI_type_node;
24221 break;
24222 case V2SImode:
24223 eltype = V2USI_type_node;
24224 break;
24225 case V4SImode:
24226 eltype = V4USI_type_node;
24227 break;
24228 case V2DImode:
24229 eltype = V2UDI_type_node;
24230 break;
24231 default: gcc_unreachable ();
24233 ftype = build_function_type_list (eltype, eltype, NULL);
24234 break;
24236 default:
24237 gcc_unreachable ();
24240 gcc_assert (ftype != NULL);
24242 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24244 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24245 NULL_TREE);
24246 arm_builtin_decls[fcode] = decl;
24250 #undef NUM_DREG_TYPES
24251 #undef NUM_QREG_TYPES
24253 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24254 do \
24256 if ((MASK) & insn_flags) \
24258 tree bdecl; \
24259 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24260 BUILT_IN_MD, NULL, NULL_TREE); \
24261 arm_builtin_decls[CODE] = bdecl; \
24264 while (0)
24266 struct builtin_description
24268 const unsigned int mask;
24269 const enum insn_code icode;
24270 const char * const name;
24271 const enum arm_builtins code;
24272 const enum rtx_code comparison;
24273 const unsigned int flag;
24276 static const struct builtin_description bdesc_2arg[] =
24278 #define IWMMXT_BUILTIN(code, string, builtin) \
24279 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24280 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24282 #define IWMMXT2_BUILTIN(code, string, builtin) \
24283 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24284 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24286 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24287 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24288 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24289 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24290 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24291 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24292 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24293 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24294 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24295 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24296 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24297 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24298 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24299 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24300 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24301 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24302 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24303 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24304 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24305 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24306 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24307 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24308 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24309 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24310 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24311 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24312 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24313 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24314 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24315 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24316 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24317 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24318 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24319 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24320 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24321 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24322 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24323 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24324 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24325 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24326 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24327 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24328 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24329 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24330 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24331 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24332 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24333 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24334 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24335 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24336 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24337 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24338 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24339 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24340 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24341 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24342 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24343 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24344 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24345 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24346 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24347 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24348 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24349 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24350 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24351 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24352 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24353 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24354 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24355 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24356 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24357 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24358 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24359 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24360 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24361 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24362 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24363 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24365 #define IWMMXT_BUILTIN2(code, builtin) \
24366 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24368 #define IWMMXT2_BUILTIN2(code, builtin) \
24369 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24371 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24372 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24373 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24374 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24375 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24376 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24377 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24378 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24379 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24380 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24383 #define FP_BUILTIN(L, U) \
24384 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24385 UNKNOWN, 0},
24387 FP_BUILTIN (set_fpscr, GET_FPSCR)
24388 FP_BUILTIN (get_fpscr, SET_FPSCR)
24389 #undef FP_BUILTIN
24391 #define CRC32_BUILTIN(L, U) \
24392 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24393 UNKNOWN, 0},
24394 CRC32_BUILTIN (crc32b, CRC32B)
24395 CRC32_BUILTIN (crc32h, CRC32H)
24396 CRC32_BUILTIN (crc32w, CRC32W)
24397 CRC32_BUILTIN (crc32cb, CRC32CB)
24398 CRC32_BUILTIN (crc32ch, CRC32CH)
24399 CRC32_BUILTIN (crc32cw, CRC32CW)
24400 #undef CRC32_BUILTIN
24403 #define CRYPTO_BUILTIN(L, U) \
24404 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24405 UNKNOWN, 0},
24406 #undef CRYPTO1
24407 #undef CRYPTO2
24408 #undef CRYPTO3
24409 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24410 #define CRYPTO1(L, U, R, A)
24411 #define CRYPTO3(L, U, R, A1, A2, A3)
24412 #include "crypto.def"
24413 #undef CRYPTO1
24414 #undef CRYPTO2
24415 #undef CRYPTO3
24419 static const struct builtin_description bdesc_1arg[] =
24421 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24422 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24423 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24424 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24425 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24426 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24427 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24428 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24429 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24430 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24431 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24432 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24433 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24434 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24435 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24436 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24437 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24438 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24439 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24440 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24441 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24442 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24443 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24444 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24446 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24447 #define CRYPTO2(L, U, R, A1, A2)
24448 #define CRYPTO3(L, U, R, A1, A2, A3)
24449 #include "crypto.def"
24450 #undef CRYPTO1
24451 #undef CRYPTO2
24452 #undef CRYPTO3
24455 static const struct builtin_description bdesc_3arg[] =
24457 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24458 #define CRYPTO1(L, U, R, A)
24459 #define CRYPTO2(L, U, R, A1, A2)
24460 #include "crypto.def"
24461 #undef CRYPTO1
24462 #undef CRYPTO2
24463 #undef CRYPTO3
24465 #undef CRYPTO_BUILTIN
24467 /* Set up all the iWMMXt builtins. This is not called if
24468 TARGET_IWMMXT is zero. */
24470 static void
24471 arm_init_iwmmxt_builtins (void)
24473 const struct builtin_description * d;
24474 size_t i;
24476 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24477 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24478 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24480 tree v8qi_ftype_v8qi_v8qi_int
24481 = build_function_type_list (V8QI_type_node,
24482 V8QI_type_node, V8QI_type_node,
24483 integer_type_node, NULL_TREE);
24484 tree v4hi_ftype_v4hi_int
24485 = build_function_type_list (V4HI_type_node,
24486 V4HI_type_node, integer_type_node, NULL_TREE);
24487 tree v2si_ftype_v2si_int
24488 = build_function_type_list (V2SI_type_node,
24489 V2SI_type_node, integer_type_node, NULL_TREE);
24490 tree v2si_ftype_di_di
24491 = build_function_type_list (V2SI_type_node,
24492 long_long_integer_type_node,
24493 long_long_integer_type_node,
24494 NULL_TREE);
24495 tree di_ftype_di_int
24496 = build_function_type_list (long_long_integer_type_node,
24497 long_long_integer_type_node,
24498 integer_type_node, NULL_TREE);
24499 tree di_ftype_di_int_int
24500 = build_function_type_list (long_long_integer_type_node,
24501 long_long_integer_type_node,
24502 integer_type_node,
24503 integer_type_node, NULL_TREE);
24504 tree int_ftype_v8qi
24505 = build_function_type_list (integer_type_node,
24506 V8QI_type_node, NULL_TREE);
24507 tree int_ftype_v4hi
24508 = build_function_type_list (integer_type_node,
24509 V4HI_type_node, NULL_TREE);
24510 tree int_ftype_v2si
24511 = build_function_type_list (integer_type_node,
24512 V2SI_type_node, NULL_TREE);
24513 tree int_ftype_v8qi_int
24514 = build_function_type_list (integer_type_node,
24515 V8QI_type_node, integer_type_node, NULL_TREE);
24516 tree int_ftype_v4hi_int
24517 = build_function_type_list (integer_type_node,
24518 V4HI_type_node, integer_type_node, NULL_TREE);
24519 tree int_ftype_v2si_int
24520 = build_function_type_list (integer_type_node,
24521 V2SI_type_node, integer_type_node, NULL_TREE);
24522 tree v8qi_ftype_v8qi_int_int
24523 = build_function_type_list (V8QI_type_node,
24524 V8QI_type_node, integer_type_node,
24525 integer_type_node, NULL_TREE);
24526 tree v4hi_ftype_v4hi_int_int
24527 = build_function_type_list (V4HI_type_node,
24528 V4HI_type_node, integer_type_node,
24529 integer_type_node, NULL_TREE);
24530 tree v2si_ftype_v2si_int_int
24531 = build_function_type_list (V2SI_type_node,
24532 V2SI_type_node, integer_type_node,
24533 integer_type_node, NULL_TREE);
24534 /* Miscellaneous. */
24535 tree v8qi_ftype_v4hi_v4hi
24536 = build_function_type_list (V8QI_type_node,
24537 V4HI_type_node, V4HI_type_node, NULL_TREE);
24538 tree v4hi_ftype_v2si_v2si
24539 = build_function_type_list (V4HI_type_node,
24540 V2SI_type_node, V2SI_type_node, NULL_TREE);
24541 tree v8qi_ftype_v4hi_v8qi
24542 = build_function_type_list (V8QI_type_node,
24543 V4HI_type_node, V8QI_type_node, NULL_TREE);
24544 tree v2si_ftype_v4hi_v4hi
24545 = build_function_type_list (V2SI_type_node,
24546 V4HI_type_node, V4HI_type_node, NULL_TREE);
24547 tree v2si_ftype_v8qi_v8qi
24548 = build_function_type_list (V2SI_type_node,
24549 V8QI_type_node, V8QI_type_node, NULL_TREE);
24550 tree v4hi_ftype_v4hi_di
24551 = build_function_type_list (V4HI_type_node,
24552 V4HI_type_node, long_long_integer_type_node,
24553 NULL_TREE);
24554 tree v2si_ftype_v2si_di
24555 = build_function_type_list (V2SI_type_node,
24556 V2SI_type_node, long_long_integer_type_node,
24557 NULL_TREE);
24558 tree di_ftype_void
24559 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24560 tree int_ftype_void
24561 = build_function_type_list (integer_type_node, NULL_TREE);
24562 tree di_ftype_v8qi
24563 = build_function_type_list (long_long_integer_type_node,
24564 V8QI_type_node, NULL_TREE);
24565 tree di_ftype_v4hi
24566 = build_function_type_list (long_long_integer_type_node,
24567 V4HI_type_node, NULL_TREE);
24568 tree di_ftype_v2si
24569 = build_function_type_list (long_long_integer_type_node,
24570 V2SI_type_node, NULL_TREE);
24571 tree v2si_ftype_v4hi
24572 = build_function_type_list (V2SI_type_node,
24573 V4HI_type_node, NULL_TREE);
24574 tree v4hi_ftype_v8qi
24575 = build_function_type_list (V4HI_type_node,
24576 V8QI_type_node, NULL_TREE);
24577 tree v8qi_ftype_v8qi
24578 = build_function_type_list (V8QI_type_node,
24579 V8QI_type_node, NULL_TREE);
24580 tree v4hi_ftype_v4hi
24581 = build_function_type_list (V4HI_type_node,
24582 V4HI_type_node, NULL_TREE);
24583 tree v2si_ftype_v2si
24584 = build_function_type_list (V2SI_type_node,
24585 V2SI_type_node, NULL_TREE);
24587 tree di_ftype_di_v4hi_v4hi
24588 = build_function_type_list (long_long_unsigned_type_node,
24589 long_long_unsigned_type_node,
24590 V4HI_type_node, V4HI_type_node,
24591 NULL_TREE);
24593 tree di_ftype_v4hi_v4hi
24594 = build_function_type_list (long_long_unsigned_type_node,
24595 V4HI_type_node,V4HI_type_node,
24596 NULL_TREE);
24598 tree v2si_ftype_v2si_v4hi_v4hi
24599 = build_function_type_list (V2SI_type_node,
24600 V2SI_type_node, V4HI_type_node,
24601 V4HI_type_node, NULL_TREE);
24603 tree v2si_ftype_v2si_v8qi_v8qi
24604 = build_function_type_list (V2SI_type_node,
24605 V2SI_type_node, V8QI_type_node,
24606 V8QI_type_node, NULL_TREE);
24608 tree di_ftype_di_v2si_v2si
24609 = build_function_type_list (long_long_unsigned_type_node,
24610 long_long_unsigned_type_node,
24611 V2SI_type_node, V2SI_type_node,
24612 NULL_TREE);
24614 tree di_ftype_di_di_int
24615 = build_function_type_list (long_long_unsigned_type_node,
24616 long_long_unsigned_type_node,
24617 long_long_unsigned_type_node,
24618 integer_type_node, NULL_TREE);
24620 tree void_ftype_int
24621 = build_function_type_list (void_type_node,
24622 integer_type_node, NULL_TREE);
24624 tree v8qi_ftype_char
24625 = build_function_type_list (V8QI_type_node,
24626 signed_char_type_node, NULL_TREE);
24628 tree v4hi_ftype_short
24629 = build_function_type_list (V4HI_type_node,
24630 short_integer_type_node, NULL_TREE);
24632 tree v2si_ftype_int
24633 = build_function_type_list (V2SI_type_node,
24634 integer_type_node, NULL_TREE);
24636 /* Normal vector binops. */
24637 tree v8qi_ftype_v8qi_v8qi
24638 = build_function_type_list (V8QI_type_node,
24639 V8QI_type_node, V8QI_type_node, NULL_TREE);
24640 tree v4hi_ftype_v4hi_v4hi
24641 = build_function_type_list (V4HI_type_node,
24642 V4HI_type_node,V4HI_type_node, NULL_TREE);
24643 tree v2si_ftype_v2si_v2si
24644 = build_function_type_list (V2SI_type_node,
24645 V2SI_type_node, V2SI_type_node, NULL_TREE);
24646 tree di_ftype_di_di
24647 = build_function_type_list (long_long_unsigned_type_node,
24648 long_long_unsigned_type_node,
24649 long_long_unsigned_type_node,
24650 NULL_TREE);
24652 /* Add all builtins that are more or less simple operations on two
24653 operands. */
24654 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24656 /* Use one of the operands; the target can have a different mode for
24657 mask-generating compares. */
24658 enum machine_mode mode;
24659 tree type;
24661 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24662 continue;
24664 mode = insn_data[d->icode].operand[1].mode;
24666 switch (mode)
24668 case V8QImode:
24669 type = v8qi_ftype_v8qi_v8qi;
24670 break;
24671 case V4HImode:
24672 type = v4hi_ftype_v4hi_v4hi;
24673 break;
24674 case V2SImode:
24675 type = v2si_ftype_v2si_v2si;
24676 break;
24677 case DImode:
24678 type = di_ftype_di_di;
24679 break;
24681 default:
24682 gcc_unreachable ();
24685 def_mbuiltin (d->mask, d->name, type, d->code);
24688 /* Add the remaining MMX insns with somewhat more complicated types. */
24689 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24690 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24691 ARM_BUILTIN_ ## CODE)
24693 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24694 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24695 ARM_BUILTIN_ ## CODE)
24697 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24698 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24699 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24700 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24701 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24702 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24703 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24704 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24705 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24707 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24708 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24709 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24710 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24711 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24712 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24714 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24715 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24716 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24717 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24718 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24719 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24721 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24722 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24723 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24724 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24725 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24726 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24728 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24729 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24730 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24731 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24732 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24733 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24735 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24737 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24738 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24739 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24740 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24741 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24742 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24743 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24744 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24745 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24746 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24748 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24749 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24750 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24751 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24752 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24753 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24754 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24755 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24756 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24758 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24759 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24760 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24762 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24763 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24764 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24766 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24767 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24769 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24770 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24771 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24772 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24773 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24774 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24776 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24777 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24778 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24779 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24780 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24781 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24782 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24783 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24784 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24785 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24786 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24787 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24789 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24790 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24791 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24792 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24794 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24795 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24796 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24797 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24798 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24799 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24800 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24802 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24803 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24804 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24806 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24807 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24808 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24809 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24811 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24812 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24813 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24814 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24816 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24817 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24818 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24819 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24821 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24822 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24823 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24824 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24826 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24827 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24828 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24829 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24831 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24832 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24833 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24834 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24836 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24838 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24839 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24840 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24842 #undef iwmmx_mbuiltin
24843 #undef iwmmx2_mbuiltin
24846 static void
24847 arm_init_fp16_builtins (void)
24849 tree fp16_type = make_node (REAL_TYPE);
24850 TYPE_PRECISION (fp16_type) = 16;
24851 layout_type (fp16_type);
24852 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24855 static void
24856 arm_init_crc32_builtins ()
24858 tree si_ftype_si_qi
24859 = build_function_type_list (unsigned_intSI_type_node,
24860 unsigned_intSI_type_node,
24861 unsigned_intQI_type_node, NULL_TREE);
24862 tree si_ftype_si_hi
24863 = build_function_type_list (unsigned_intSI_type_node,
24864 unsigned_intSI_type_node,
24865 unsigned_intHI_type_node, NULL_TREE);
24866 tree si_ftype_si_si
24867 = build_function_type_list (unsigned_intSI_type_node,
24868 unsigned_intSI_type_node,
24869 unsigned_intSI_type_node, NULL_TREE);
24871 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24872 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24873 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24874 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24875 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24876 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24877 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24878 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24879 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24880 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24881 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24882 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24883 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24884 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24885 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24886 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24887 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24888 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24891 static void
24892 arm_init_builtins (void)
24894 if (TARGET_REALLY_IWMMXT)
24895 arm_init_iwmmxt_builtins ();
24897 if (TARGET_NEON)
24898 arm_init_neon_builtins ();
24900 if (arm_fp16_format)
24901 arm_init_fp16_builtins ();
24903 if (TARGET_CRC32)
24904 arm_init_crc32_builtins ();
24906 if (TARGET_VFP && TARGET_HARD_FLOAT)
24908 tree ftype_set_fpscr
24909 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24910 tree ftype_get_fpscr
24911 = build_function_type_list (unsigned_type_node, NULL);
24913 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24914 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24915 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24916 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24917 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24918 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24922 /* Return the ARM builtin for CODE. */
24924 static tree
24925 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24927 if (code >= ARM_BUILTIN_MAX)
24928 return error_mark_node;
24930 return arm_builtin_decls[code];
24933 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24935 static const char *
24936 arm_invalid_parameter_type (const_tree t)
24938 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24939 return N_("function parameters cannot have __fp16 type");
24940 return NULL;
24943 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24945 static const char *
24946 arm_invalid_return_type (const_tree t)
24948 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24949 return N_("functions cannot return __fp16 type");
24950 return NULL;
24953 /* Implement TARGET_PROMOTED_TYPE. */
24955 static tree
24956 arm_promoted_type (const_tree t)
24958 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24959 return float_type_node;
24960 return NULL_TREE;
24963 /* Implement TARGET_CONVERT_TO_TYPE.
24964 Specifically, this hook implements the peculiarity of the ARM
24965 half-precision floating-point C semantics that requires conversions between
24966 __fp16 to or from double to do an intermediate conversion to float. */
24968 static tree
24969 arm_convert_to_type (tree type, tree expr)
24971 tree fromtype = TREE_TYPE (expr);
24972 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24973 return NULL_TREE;
24974 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24975 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24976 return convert (type, convert (float_type_node, expr));
24977 return NULL_TREE;
24980 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24981 This simply adds HFmode as a supported mode; even though we don't
24982 implement arithmetic on this type directly, it's supported by
24983 optabs conversions, much the way the double-word arithmetic is
24984 special-cased in the default hook. */
24986 static bool
24987 arm_scalar_mode_supported_p (enum machine_mode mode)
24989 if (mode == HFmode)
24990 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24991 else if (ALL_FIXED_POINT_MODE_P (mode))
24992 return true;
24993 else
24994 return default_scalar_mode_supported_p (mode);
24997 /* Errors in the source file can cause expand_expr to return const0_rtx
24998 where we expect a vector. To avoid crashing, use one of the vector
24999 clear instructions. */
25001 static rtx
25002 safe_vector_operand (rtx x, enum machine_mode mode)
25004 if (x != const0_rtx)
25005 return x;
25006 x = gen_reg_rtx (mode);
25008 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25009 : gen_rtx_SUBREG (DImode, x, 0)));
25010 return x;
25013 /* Function to expand ternary builtins. */
25014 static rtx
25015 arm_expand_ternop_builtin (enum insn_code icode,
25016 tree exp, rtx target)
25018 rtx pat;
25019 tree arg0 = CALL_EXPR_ARG (exp, 0);
25020 tree arg1 = CALL_EXPR_ARG (exp, 1);
25021 tree arg2 = CALL_EXPR_ARG (exp, 2);
25023 rtx op0 = expand_normal (arg0);
25024 rtx op1 = expand_normal (arg1);
25025 rtx op2 = expand_normal (arg2);
25026 rtx op3 = NULL_RTX;
25028 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25029 lane operand depending on endianness. */
25030 bool builtin_sha1cpm_p = false;
25032 if (insn_data[icode].n_operands == 5)
25034 gcc_assert (icode == CODE_FOR_crypto_sha1c
25035 || icode == CODE_FOR_crypto_sha1p
25036 || icode == CODE_FOR_crypto_sha1m);
25037 builtin_sha1cpm_p = true;
25039 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25040 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25041 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25042 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
25045 if (VECTOR_MODE_P (mode0))
25046 op0 = safe_vector_operand (op0, mode0);
25047 if (VECTOR_MODE_P (mode1))
25048 op1 = safe_vector_operand (op1, mode1);
25049 if (VECTOR_MODE_P (mode2))
25050 op2 = safe_vector_operand (op2, mode2);
25052 if (! target
25053 || GET_MODE (target) != tmode
25054 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25055 target = gen_reg_rtx (tmode);
25057 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25058 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25059 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25061 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25062 op0 = copy_to_mode_reg (mode0, op0);
25063 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25064 op1 = copy_to_mode_reg (mode1, op1);
25065 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25066 op2 = copy_to_mode_reg (mode2, op2);
25067 if (builtin_sha1cpm_p)
25068 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25070 if (builtin_sha1cpm_p)
25071 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25072 else
25073 pat = GEN_FCN (icode) (target, op0, op1, op2);
25074 if (! pat)
25075 return 0;
25076 emit_insn (pat);
25077 return target;
25080 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25082 static rtx
25083 arm_expand_binop_builtin (enum insn_code icode,
25084 tree exp, rtx target)
25086 rtx pat;
25087 tree arg0 = CALL_EXPR_ARG (exp, 0);
25088 tree arg1 = CALL_EXPR_ARG (exp, 1);
25089 rtx op0 = expand_normal (arg0);
25090 rtx op1 = expand_normal (arg1);
25091 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25092 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25093 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25095 if (VECTOR_MODE_P (mode0))
25096 op0 = safe_vector_operand (op0, mode0);
25097 if (VECTOR_MODE_P (mode1))
25098 op1 = safe_vector_operand (op1, mode1);
25100 if (! target
25101 || GET_MODE (target) != tmode
25102 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25103 target = gen_reg_rtx (tmode);
25105 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25106 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25108 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25109 op0 = copy_to_mode_reg (mode0, op0);
25110 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25111 op1 = copy_to_mode_reg (mode1, op1);
25113 pat = GEN_FCN (icode) (target, op0, op1);
25114 if (! pat)
25115 return 0;
25116 emit_insn (pat);
25117 return target;
25120 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25122 static rtx
25123 arm_expand_unop_builtin (enum insn_code icode,
25124 tree exp, rtx target, int do_load)
25126 rtx pat;
25127 tree arg0 = CALL_EXPR_ARG (exp, 0);
25128 rtx op0 = expand_normal (arg0);
25129 rtx op1 = NULL_RTX;
25130 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25131 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25132 bool builtin_sha1h_p = false;
25134 if (insn_data[icode].n_operands == 3)
25136 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25137 builtin_sha1h_p = true;
25140 if (! target
25141 || GET_MODE (target) != tmode
25142 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25143 target = gen_reg_rtx (tmode);
25144 if (do_load)
25145 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25146 else
25148 if (VECTOR_MODE_P (mode0))
25149 op0 = safe_vector_operand (op0, mode0);
25151 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25152 op0 = copy_to_mode_reg (mode0, op0);
25154 if (builtin_sha1h_p)
25155 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25157 if (builtin_sha1h_p)
25158 pat = GEN_FCN (icode) (target, op0, op1);
25159 else
25160 pat = GEN_FCN (icode) (target, op0);
25161 if (! pat)
25162 return 0;
25163 emit_insn (pat);
25164 return target;
25167 typedef enum {
25168 NEON_ARG_COPY_TO_REG,
25169 NEON_ARG_CONSTANT,
25170 NEON_ARG_MEMORY,
25171 NEON_ARG_STOP
25172 } builtin_arg;
25174 #define NEON_MAX_BUILTIN_ARGS 5
25176 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25177 and return an expression for the accessed memory.
25179 The intrinsic function operates on a block of registers that has
25180 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25181 function references the memory at EXP of type TYPE and in mode
25182 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25183 available. */
25185 static tree
25186 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25187 enum machine_mode reg_mode,
25188 neon_builtin_type_mode type_mode)
25190 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25191 tree elem_type, upper_bound, array_type;
25193 /* Work out the size of the register block in bytes. */
25194 reg_size = GET_MODE_SIZE (reg_mode);
25196 /* Work out the size of each vector in bytes. */
25197 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25198 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25200 /* Work out how many vectors there are. */
25201 gcc_assert (reg_size % vector_size == 0);
25202 nvectors = reg_size / vector_size;
25204 /* Work out the type of each element. */
25205 gcc_assert (POINTER_TYPE_P (type));
25206 elem_type = TREE_TYPE (type);
25208 /* Work out how many elements are being loaded or stored.
25209 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25210 and memory elements; anything else implies a lane load or store. */
25211 if (mem_mode == reg_mode)
25212 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25213 else
25214 nelems = nvectors;
25216 /* Create a type that describes the full access. */
25217 upper_bound = build_int_cst (size_type_node, nelems - 1);
25218 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25220 /* Dereference EXP using that type. */
25221 return fold_build2 (MEM_REF, array_type, exp,
25222 build_int_cst (build_pointer_type (array_type), 0));
25225 /* Expand a Neon builtin. */
25226 static rtx
25227 arm_expand_neon_args (rtx target, int icode, int have_retval,
25228 neon_builtin_type_mode type_mode,
25229 tree exp, int fcode, ...)
25231 va_list ap;
25232 rtx pat;
25233 tree arg[NEON_MAX_BUILTIN_ARGS];
25234 rtx op[NEON_MAX_BUILTIN_ARGS];
25235 tree arg_type;
25236 tree formals;
25237 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25238 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25239 enum machine_mode other_mode;
25240 int argc = 0;
25241 int opno;
25243 if (have_retval
25244 && (!target
25245 || GET_MODE (target) != tmode
25246 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25247 target = gen_reg_rtx (tmode);
25249 va_start (ap, fcode);
25251 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25253 for (;;)
25255 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25257 if (thisarg == NEON_ARG_STOP)
25258 break;
25259 else
25261 opno = argc + have_retval;
25262 mode[argc] = insn_data[icode].operand[opno].mode;
25263 arg[argc] = CALL_EXPR_ARG (exp, argc);
25264 arg_type = TREE_VALUE (formals);
25265 if (thisarg == NEON_ARG_MEMORY)
25267 other_mode = insn_data[icode].operand[1 - opno].mode;
25268 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25269 mode[argc], other_mode,
25270 type_mode);
25273 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25274 be returned. */
25275 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25276 (thisarg == NEON_ARG_MEMORY
25277 ? EXPAND_MEMORY : EXPAND_NORMAL));
25279 switch (thisarg)
25281 case NEON_ARG_COPY_TO_REG:
25282 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25283 if (!(*insn_data[icode].operand[opno].predicate)
25284 (op[argc], mode[argc]))
25285 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25286 break;
25288 case NEON_ARG_CONSTANT:
25289 /* FIXME: This error message is somewhat unhelpful. */
25290 if (!(*insn_data[icode].operand[opno].predicate)
25291 (op[argc], mode[argc]))
25292 error ("argument must be a constant");
25293 break;
25295 case NEON_ARG_MEMORY:
25296 /* Check if expand failed. */
25297 if (op[argc] == const0_rtx)
25298 return 0;
25299 gcc_assert (MEM_P (op[argc]));
25300 PUT_MODE (op[argc], mode[argc]);
25301 /* ??? arm_neon.h uses the same built-in functions for signed
25302 and unsigned accesses, casting where necessary. This isn't
25303 alias safe. */
25304 set_mem_alias_set (op[argc], 0);
25305 if (!(*insn_data[icode].operand[opno].predicate)
25306 (op[argc], mode[argc]))
25307 op[argc] = (replace_equiv_address
25308 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25309 break;
25311 case NEON_ARG_STOP:
25312 gcc_unreachable ();
25315 argc++;
25316 formals = TREE_CHAIN (formals);
25320 va_end (ap);
25322 if (have_retval)
25323 switch (argc)
25325 case 1:
25326 pat = GEN_FCN (icode) (target, op[0]);
25327 break;
25329 case 2:
25330 pat = GEN_FCN (icode) (target, op[0], op[1]);
25331 break;
25333 case 3:
25334 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25335 break;
25337 case 4:
25338 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25339 break;
25341 case 5:
25342 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25343 break;
25345 default:
25346 gcc_unreachable ();
25348 else
25349 switch (argc)
25351 case 1:
25352 pat = GEN_FCN (icode) (op[0]);
25353 break;
25355 case 2:
25356 pat = GEN_FCN (icode) (op[0], op[1]);
25357 break;
25359 case 3:
25360 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25361 break;
25363 case 4:
25364 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25365 break;
25367 case 5:
25368 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25369 break;
25371 default:
25372 gcc_unreachable ();
25375 if (!pat)
25376 return 0;
25378 emit_insn (pat);
25380 return target;
25383 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25384 constants defined per-instruction or per instruction-variant. Instead, the
25385 required info is looked up in the table neon_builtin_data. */
25386 static rtx
25387 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25389 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25390 neon_itype itype = d->itype;
25391 enum insn_code icode = d->code;
25392 neon_builtin_type_mode type_mode = d->mode;
25394 switch (itype)
25396 case NEON_UNOP:
25397 case NEON_CONVERT:
25398 case NEON_DUPLANE:
25399 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25400 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25402 case NEON_BINOP:
25403 case NEON_SETLANE:
25404 case NEON_SCALARMUL:
25405 case NEON_SCALARMULL:
25406 case NEON_SCALARMULH:
25407 case NEON_SHIFTINSERT:
25408 case NEON_LOGICBINOP:
25409 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25410 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25411 NEON_ARG_STOP);
25413 case NEON_TERNOP:
25414 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25415 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25416 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25418 case NEON_GETLANE:
25419 case NEON_FIXCONV:
25420 case NEON_SHIFTIMM:
25421 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25422 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25423 NEON_ARG_STOP);
25425 case NEON_CREATE:
25426 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25427 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25429 case NEON_DUP:
25430 case NEON_RINT:
25431 case NEON_SPLIT:
25432 case NEON_FLOAT_WIDEN:
25433 case NEON_FLOAT_NARROW:
25434 case NEON_BSWAP:
25435 case NEON_REINTERP:
25436 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25437 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25439 case NEON_COMBINE:
25440 case NEON_VTBL:
25441 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25442 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25444 case NEON_LANEMUL:
25445 case NEON_LANEMULL:
25446 case NEON_LANEMULH:
25447 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25448 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25449 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25451 case NEON_LANEMAC:
25452 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25453 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25454 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25456 case NEON_SHIFTACC:
25457 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25458 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25459 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25461 case NEON_SCALARMAC:
25462 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25463 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25464 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25466 case NEON_SELECT:
25467 case NEON_VTBX:
25468 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25469 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25470 NEON_ARG_STOP);
25472 case NEON_LOAD1:
25473 case NEON_LOADSTRUCT:
25474 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25475 NEON_ARG_MEMORY, NEON_ARG_STOP);
25477 case NEON_LOAD1LANE:
25478 case NEON_LOADSTRUCTLANE:
25479 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25480 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25481 NEON_ARG_STOP);
25483 case NEON_STORE1:
25484 case NEON_STORESTRUCT:
25485 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25486 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25488 case NEON_STORE1LANE:
25489 case NEON_STORESTRUCTLANE:
25490 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25491 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25492 NEON_ARG_STOP);
25495 gcc_unreachable ();
25498 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25499 void
25500 neon_reinterpret (rtx dest, rtx src)
25502 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25505 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25506 not to early-clobber SRC registers in the process.
25508 We assume that the operands described by SRC and DEST represent a
25509 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25510 number of components into which the copy has been decomposed. */
25511 void
25512 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25514 unsigned int i;
25516 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25517 || REGNO (operands[0]) < REGNO (operands[1]))
25519 for (i = 0; i < count; i++)
25521 operands[2 * i] = dest[i];
25522 operands[2 * i + 1] = src[i];
25525 else
25527 for (i = 0; i < count; i++)
25529 operands[2 * i] = dest[count - i - 1];
25530 operands[2 * i + 1] = src[count - i - 1];
25535 /* Split operands into moves from op[1] + op[2] into op[0]. */
25537 void
25538 neon_split_vcombine (rtx operands[3])
25540 unsigned int dest = REGNO (operands[0]);
25541 unsigned int src1 = REGNO (operands[1]);
25542 unsigned int src2 = REGNO (operands[2]);
25543 enum machine_mode halfmode = GET_MODE (operands[1]);
25544 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25545 rtx destlo, desthi;
25547 if (src1 == dest && src2 == dest + halfregs)
25549 /* No-op move. Can't split to nothing; emit something. */
25550 emit_note (NOTE_INSN_DELETED);
25551 return;
25554 /* Preserve register attributes for variable tracking. */
25555 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25556 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25557 GET_MODE_SIZE (halfmode));
25559 /* Special case of reversed high/low parts. Use VSWP. */
25560 if (src2 == dest && src1 == dest + halfregs)
25562 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25563 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25564 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25565 return;
25568 if (!reg_overlap_mentioned_p (operands[2], destlo))
25570 /* Try to avoid unnecessary moves if part of the result
25571 is in the right place already. */
25572 if (src1 != dest)
25573 emit_move_insn (destlo, operands[1]);
25574 if (src2 != dest + halfregs)
25575 emit_move_insn (desthi, operands[2]);
25577 else
25579 if (src2 != dest + halfregs)
25580 emit_move_insn (desthi, operands[2]);
25581 if (src1 != dest)
25582 emit_move_insn (destlo, operands[1]);
25586 /* Expand an expression EXP that calls a built-in function,
25587 with result going to TARGET if that's convenient
25588 (and in mode MODE if that's convenient).
25589 SUBTARGET may be used as the target for computing one of EXP's operands.
25590 IGNORE is nonzero if the value is to be ignored. */
25592 static rtx
25593 arm_expand_builtin (tree exp,
25594 rtx target,
25595 rtx subtarget ATTRIBUTE_UNUSED,
25596 enum machine_mode mode ATTRIBUTE_UNUSED,
25597 int ignore ATTRIBUTE_UNUSED)
25599 const struct builtin_description * d;
25600 enum insn_code icode;
25601 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25602 tree arg0;
25603 tree arg1;
25604 tree arg2;
25605 rtx op0;
25606 rtx op1;
25607 rtx op2;
25608 rtx pat;
25609 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25610 size_t i;
25611 enum machine_mode tmode;
25612 enum machine_mode mode0;
25613 enum machine_mode mode1;
25614 enum machine_mode mode2;
25615 int opint;
25616 int selector;
25617 int mask;
25618 int imm;
25620 if (fcode >= ARM_BUILTIN_NEON_BASE)
25621 return arm_expand_neon_builtin (fcode, exp, target);
25623 switch (fcode)
25625 case ARM_BUILTIN_GET_FPSCR:
25626 case ARM_BUILTIN_SET_FPSCR:
25627 if (fcode == ARM_BUILTIN_GET_FPSCR)
25629 icode = CODE_FOR_get_fpscr;
25630 target = gen_reg_rtx (SImode);
25631 pat = GEN_FCN (icode) (target);
25633 else
25635 target = NULL_RTX;
25636 icode = CODE_FOR_set_fpscr;
25637 arg0 = CALL_EXPR_ARG (exp, 0);
25638 op0 = expand_normal (arg0);
25639 pat = GEN_FCN (icode) (op0);
25641 emit_insn (pat);
25642 return target;
25644 case ARM_BUILTIN_TEXTRMSB:
25645 case ARM_BUILTIN_TEXTRMUB:
25646 case ARM_BUILTIN_TEXTRMSH:
25647 case ARM_BUILTIN_TEXTRMUH:
25648 case ARM_BUILTIN_TEXTRMSW:
25649 case ARM_BUILTIN_TEXTRMUW:
25650 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25651 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25652 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25653 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25654 : CODE_FOR_iwmmxt_textrmw);
25656 arg0 = CALL_EXPR_ARG (exp, 0);
25657 arg1 = CALL_EXPR_ARG (exp, 1);
25658 op0 = expand_normal (arg0);
25659 op1 = expand_normal (arg1);
25660 tmode = insn_data[icode].operand[0].mode;
25661 mode0 = insn_data[icode].operand[1].mode;
25662 mode1 = insn_data[icode].operand[2].mode;
25664 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25665 op0 = copy_to_mode_reg (mode0, op0);
25666 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25668 /* @@@ better error message */
25669 error ("selector must be an immediate");
25670 return gen_reg_rtx (tmode);
25673 opint = INTVAL (op1);
25674 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25676 if (opint > 7 || opint < 0)
25677 error ("the range of selector should be in 0 to 7");
25679 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25681 if (opint > 3 || opint < 0)
25682 error ("the range of selector should be in 0 to 3");
25684 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25686 if (opint > 1 || opint < 0)
25687 error ("the range of selector should be in 0 to 1");
25690 if (target == 0
25691 || GET_MODE (target) != tmode
25692 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25693 target = gen_reg_rtx (tmode);
25694 pat = GEN_FCN (icode) (target, op0, op1);
25695 if (! pat)
25696 return 0;
25697 emit_insn (pat);
25698 return target;
25700 case ARM_BUILTIN_WALIGNI:
25701 /* If op2 is immediate, call walighi, else call walighr. */
25702 arg0 = CALL_EXPR_ARG (exp, 0);
25703 arg1 = CALL_EXPR_ARG (exp, 1);
25704 arg2 = CALL_EXPR_ARG (exp, 2);
25705 op0 = expand_normal (arg0);
25706 op1 = expand_normal (arg1);
25707 op2 = expand_normal (arg2);
25708 if (CONST_INT_P (op2))
25710 icode = CODE_FOR_iwmmxt_waligni;
25711 tmode = insn_data[icode].operand[0].mode;
25712 mode0 = insn_data[icode].operand[1].mode;
25713 mode1 = insn_data[icode].operand[2].mode;
25714 mode2 = insn_data[icode].operand[3].mode;
25715 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25716 op0 = copy_to_mode_reg (mode0, op0);
25717 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25718 op1 = copy_to_mode_reg (mode1, op1);
25719 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25720 selector = INTVAL (op2);
25721 if (selector > 7 || selector < 0)
25722 error ("the range of selector should be in 0 to 7");
25724 else
25726 icode = CODE_FOR_iwmmxt_walignr;
25727 tmode = insn_data[icode].operand[0].mode;
25728 mode0 = insn_data[icode].operand[1].mode;
25729 mode1 = insn_data[icode].operand[2].mode;
25730 mode2 = insn_data[icode].operand[3].mode;
25731 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25732 op0 = copy_to_mode_reg (mode0, op0);
25733 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25734 op1 = copy_to_mode_reg (mode1, op1);
25735 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25736 op2 = copy_to_mode_reg (mode2, op2);
25738 if (target == 0
25739 || GET_MODE (target) != tmode
25740 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25741 target = gen_reg_rtx (tmode);
25742 pat = GEN_FCN (icode) (target, op0, op1, op2);
25743 if (!pat)
25744 return 0;
25745 emit_insn (pat);
25746 return target;
25748 case ARM_BUILTIN_TINSRB:
25749 case ARM_BUILTIN_TINSRH:
25750 case ARM_BUILTIN_TINSRW:
25751 case ARM_BUILTIN_WMERGE:
25752 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25753 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25754 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25755 : CODE_FOR_iwmmxt_tinsrw);
25756 arg0 = CALL_EXPR_ARG (exp, 0);
25757 arg1 = CALL_EXPR_ARG (exp, 1);
25758 arg2 = CALL_EXPR_ARG (exp, 2);
25759 op0 = expand_normal (arg0);
25760 op1 = expand_normal (arg1);
25761 op2 = expand_normal (arg2);
25762 tmode = insn_data[icode].operand[0].mode;
25763 mode0 = insn_data[icode].operand[1].mode;
25764 mode1 = insn_data[icode].operand[2].mode;
25765 mode2 = insn_data[icode].operand[3].mode;
25767 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25768 op0 = copy_to_mode_reg (mode0, op0);
25769 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25770 op1 = copy_to_mode_reg (mode1, op1);
25771 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25773 error ("selector must be an immediate");
25774 return const0_rtx;
25776 if (icode == CODE_FOR_iwmmxt_wmerge)
25778 selector = INTVAL (op2);
25779 if (selector > 7 || selector < 0)
25780 error ("the range of selector should be in 0 to 7");
25782 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25783 || (icode == CODE_FOR_iwmmxt_tinsrh)
25784 || (icode == CODE_FOR_iwmmxt_tinsrw))
25786 mask = 0x01;
25787 selector= INTVAL (op2);
25788 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25789 error ("the range of selector should be in 0 to 7");
25790 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25791 error ("the range of selector should be in 0 to 3");
25792 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25793 error ("the range of selector should be in 0 to 1");
25794 mask <<= selector;
25795 op2 = GEN_INT (mask);
25797 if (target == 0
25798 || GET_MODE (target) != tmode
25799 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25800 target = gen_reg_rtx (tmode);
25801 pat = GEN_FCN (icode) (target, op0, op1, op2);
25802 if (! pat)
25803 return 0;
25804 emit_insn (pat);
25805 return target;
25807 case ARM_BUILTIN_SETWCGR0:
25808 case ARM_BUILTIN_SETWCGR1:
25809 case ARM_BUILTIN_SETWCGR2:
25810 case ARM_BUILTIN_SETWCGR3:
25811 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25812 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25813 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25814 : CODE_FOR_iwmmxt_setwcgr3);
25815 arg0 = CALL_EXPR_ARG (exp, 0);
25816 op0 = expand_normal (arg0);
25817 mode0 = insn_data[icode].operand[0].mode;
25818 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25819 op0 = copy_to_mode_reg (mode0, op0);
25820 pat = GEN_FCN (icode) (op0);
25821 if (!pat)
25822 return 0;
25823 emit_insn (pat);
25824 return 0;
25826 case ARM_BUILTIN_GETWCGR0:
25827 case ARM_BUILTIN_GETWCGR1:
25828 case ARM_BUILTIN_GETWCGR2:
25829 case ARM_BUILTIN_GETWCGR3:
25830 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25831 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25832 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25833 : CODE_FOR_iwmmxt_getwcgr3);
25834 tmode = insn_data[icode].operand[0].mode;
25835 if (target == 0
25836 || GET_MODE (target) != tmode
25837 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25838 target = gen_reg_rtx (tmode);
25839 pat = GEN_FCN (icode) (target);
25840 if (!pat)
25841 return 0;
25842 emit_insn (pat);
25843 return target;
25845 case ARM_BUILTIN_WSHUFH:
25846 icode = CODE_FOR_iwmmxt_wshufh;
25847 arg0 = CALL_EXPR_ARG (exp, 0);
25848 arg1 = CALL_EXPR_ARG (exp, 1);
25849 op0 = expand_normal (arg0);
25850 op1 = expand_normal (arg1);
25851 tmode = insn_data[icode].operand[0].mode;
25852 mode1 = insn_data[icode].operand[1].mode;
25853 mode2 = insn_data[icode].operand[2].mode;
25855 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25856 op0 = copy_to_mode_reg (mode1, op0);
25857 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25859 error ("mask must be an immediate");
25860 return const0_rtx;
25862 selector = INTVAL (op1);
25863 if (selector < 0 || selector > 255)
25864 error ("the range of mask should be in 0 to 255");
25865 if (target == 0
25866 || GET_MODE (target) != tmode
25867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25868 target = gen_reg_rtx (tmode);
25869 pat = GEN_FCN (icode) (target, op0, op1);
25870 if (! pat)
25871 return 0;
25872 emit_insn (pat);
25873 return target;
25875 case ARM_BUILTIN_WMADDS:
25876 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25877 case ARM_BUILTIN_WMADDSX:
25878 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25879 case ARM_BUILTIN_WMADDSN:
25880 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25881 case ARM_BUILTIN_WMADDU:
25882 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25883 case ARM_BUILTIN_WMADDUX:
25884 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25885 case ARM_BUILTIN_WMADDUN:
25886 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25887 case ARM_BUILTIN_WSADBZ:
25888 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25889 case ARM_BUILTIN_WSADHZ:
25890 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25892 /* Several three-argument builtins. */
25893 case ARM_BUILTIN_WMACS:
25894 case ARM_BUILTIN_WMACU:
25895 case ARM_BUILTIN_TMIA:
25896 case ARM_BUILTIN_TMIAPH:
25897 case ARM_BUILTIN_TMIATT:
25898 case ARM_BUILTIN_TMIATB:
25899 case ARM_BUILTIN_TMIABT:
25900 case ARM_BUILTIN_TMIABB:
25901 case ARM_BUILTIN_WQMIABB:
25902 case ARM_BUILTIN_WQMIABT:
25903 case ARM_BUILTIN_WQMIATB:
25904 case ARM_BUILTIN_WQMIATT:
25905 case ARM_BUILTIN_WQMIABBN:
25906 case ARM_BUILTIN_WQMIABTN:
25907 case ARM_BUILTIN_WQMIATBN:
25908 case ARM_BUILTIN_WQMIATTN:
25909 case ARM_BUILTIN_WMIABB:
25910 case ARM_BUILTIN_WMIABT:
25911 case ARM_BUILTIN_WMIATB:
25912 case ARM_BUILTIN_WMIATT:
25913 case ARM_BUILTIN_WMIABBN:
25914 case ARM_BUILTIN_WMIABTN:
25915 case ARM_BUILTIN_WMIATBN:
25916 case ARM_BUILTIN_WMIATTN:
25917 case ARM_BUILTIN_WMIAWBB:
25918 case ARM_BUILTIN_WMIAWBT:
25919 case ARM_BUILTIN_WMIAWTB:
25920 case ARM_BUILTIN_WMIAWTT:
25921 case ARM_BUILTIN_WMIAWBBN:
25922 case ARM_BUILTIN_WMIAWBTN:
25923 case ARM_BUILTIN_WMIAWTBN:
25924 case ARM_BUILTIN_WMIAWTTN:
25925 case ARM_BUILTIN_WSADB:
25926 case ARM_BUILTIN_WSADH:
25927 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25928 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25929 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25930 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25931 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25932 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25933 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25934 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25935 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25936 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25937 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25938 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25939 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25940 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25941 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25942 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25943 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25944 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25945 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25946 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25947 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25948 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25949 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25950 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25951 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25952 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25953 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25954 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25955 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25956 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25957 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25958 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25959 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25960 : CODE_FOR_iwmmxt_wsadh);
25961 arg0 = CALL_EXPR_ARG (exp, 0);
25962 arg1 = CALL_EXPR_ARG (exp, 1);
25963 arg2 = CALL_EXPR_ARG (exp, 2);
25964 op0 = expand_normal (arg0);
25965 op1 = expand_normal (arg1);
25966 op2 = expand_normal (arg2);
25967 tmode = insn_data[icode].operand[0].mode;
25968 mode0 = insn_data[icode].operand[1].mode;
25969 mode1 = insn_data[icode].operand[2].mode;
25970 mode2 = insn_data[icode].operand[3].mode;
25972 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25973 op0 = copy_to_mode_reg (mode0, op0);
25974 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25975 op1 = copy_to_mode_reg (mode1, op1);
25976 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25977 op2 = copy_to_mode_reg (mode2, op2);
25978 if (target == 0
25979 || GET_MODE (target) != tmode
25980 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25981 target = gen_reg_rtx (tmode);
25982 pat = GEN_FCN (icode) (target, op0, op1, op2);
25983 if (! pat)
25984 return 0;
25985 emit_insn (pat);
25986 return target;
25988 case ARM_BUILTIN_WZERO:
25989 target = gen_reg_rtx (DImode);
25990 emit_insn (gen_iwmmxt_clrdi (target));
25991 return target;
25993 case ARM_BUILTIN_WSRLHI:
25994 case ARM_BUILTIN_WSRLWI:
25995 case ARM_BUILTIN_WSRLDI:
25996 case ARM_BUILTIN_WSLLHI:
25997 case ARM_BUILTIN_WSLLWI:
25998 case ARM_BUILTIN_WSLLDI:
25999 case ARM_BUILTIN_WSRAHI:
26000 case ARM_BUILTIN_WSRAWI:
26001 case ARM_BUILTIN_WSRADI:
26002 case ARM_BUILTIN_WRORHI:
26003 case ARM_BUILTIN_WRORWI:
26004 case ARM_BUILTIN_WRORDI:
26005 case ARM_BUILTIN_WSRLH:
26006 case ARM_BUILTIN_WSRLW:
26007 case ARM_BUILTIN_WSRLD:
26008 case ARM_BUILTIN_WSLLH:
26009 case ARM_BUILTIN_WSLLW:
26010 case ARM_BUILTIN_WSLLD:
26011 case ARM_BUILTIN_WSRAH:
26012 case ARM_BUILTIN_WSRAW:
26013 case ARM_BUILTIN_WSRAD:
26014 case ARM_BUILTIN_WRORH:
26015 case ARM_BUILTIN_WRORW:
26016 case ARM_BUILTIN_WRORD:
26017 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26018 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26019 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26020 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26021 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26022 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26023 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26024 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26025 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26026 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26027 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26028 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26029 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26030 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26031 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26032 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26033 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26034 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26035 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26036 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26037 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26038 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26039 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26040 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26041 : CODE_FOR_nothing);
26042 arg1 = CALL_EXPR_ARG (exp, 1);
26043 op1 = expand_normal (arg1);
26044 if (GET_MODE (op1) == VOIDmode)
26046 imm = INTVAL (op1);
26047 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26048 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26049 && (imm < 0 || imm > 32))
26051 if (fcode == ARM_BUILTIN_WRORHI)
26052 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26053 else if (fcode == ARM_BUILTIN_WRORWI)
26054 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26055 else if (fcode == ARM_BUILTIN_WRORH)
26056 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26057 else
26058 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26060 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26061 && (imm < 0 || imm > 64))
26063 if (fcode == ARM_BUILTIN_WRORDI)
26064 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26065 else
26066 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26068 else if (imm < 0)
26070 if (fcode == ARM_BUILTIN_WSRLHI)
26071 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26072 else if (fcode == ARM_BUILTIN_WSRLWI)
26073 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26074 else if (fcode == ARM_BUILTIN_WSRLDI)
26075 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26076 else if (fcode == ARM_BUILTIN_WSLLHI)
26077 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26078 else if (fcode == ARM_BUILTIN_WSLLWI)
26079 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26080 else if (fcode == ARM_BUILTIN_WSLLDI)
26081 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26082 else if (fcode == ARM_BUILTIN_WSRAHI)
26083 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26084 else if (fcode == ARM_BUILTIN_WSRAWI)
26085 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26086 else if (fcode == ARM_BUILTIN_WSRADI)
26087 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26088 else if (fcode == ARM_BUILTIN_WSRLH)
26089 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26090 else if (fcode == ARM_BUILTIN_WSRLW)
26091 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26092 else if (fcode == ARM_BUILTIN_WSRLD)
26093 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26094 else if (fcode == ARM_BUILTIN_WSLLH)
26095 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26096 else if (fcode == ARM_BUILTIN_WSLLW)
26097 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26098 else if (fcode == ARM_BUILTIN_WSLLD)
26099 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26100 else if (fcode == ARM_BUILTIN_WSRAH)
26101 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26102 else if (fcode == ARM_BUILTIN_WSRAW)
26103 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26104 else
26105 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26108 return arm_expand_binop_builtin (icode, exp, target);
26110 default:
26111 break;
26114 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26115 if (d->code == (const enum arm_builtins) fcode)
26116 return arm_expand_binop_builtin (d->icode, exp, target);
26118 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26119 if (d->code == (const enum arm_builtins) fcode)
26120 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26122 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26123 if (d->code == (const enum arm_builtins) fcode)
26124 return arm_expand_ternop_builtin (d->icode, exp, target);
26126 /* @@@ Should really do something sensible here. */
26127 return NULL_RTX;
26130 /* Return the number (counting from 0) of
26131 the least significant set bit in MASK. */
26133 inline static int
26134 number_of_first_bit_set (unsigned mask)
26136 return ctz_hwi (mask);
26139 /* Like emit_multi_reg_push, but allowing for a different set of
26140 registers to be described as saved. MASK is the set of registers
26141 to be saved; REAL_REGS is the set of registers to be described as
26142 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26144 static rtx
26145 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26147 unsigned long regno;
26148 rtx par[10], tmp, reg, insn;
26149 int i, j;
26151 /* Build the parallel of the registers actually being stored. */
26152 for (i = 0; mask; ++i, mask &= mask - 1)
26154 regno = ctz_hwi (mask);
26155 reg = gen_rtx_REG (SImode, regno);
26157 if (i == 0)
26158 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26159 else
26160 tmp = gen_rtx_USE (VOIDmode, reg);
26162 par[i] = tmp;
26165 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26166 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26167 tmp = gen_frame_mem (BLKmode, tmp);
26168 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26169 par[0] = tmp;
26171 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26172 insn = emit_insn (tmp);
26174 /* Always build the stack adjustment note for unwind info. */
26175 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26176 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26177 par[0] = tmp;
26179 /* Build the parallel of the registers recorded as saved for unwind. */
26180 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26182 regno = ctz_hwi (real_regs);
26183 reg = gen_rtx_REG (SImode, regno);
26185 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26186 tmp = gen_frame_mem (SImode, tmp);
26187 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26188 RTX_FRAME_RELATED_P (tmp) = 1;
26189 par[j + 1] = tmp;
26192 if (j == 0)
26193 tmp = par[0];
26194 else
26196 RTX_FRAME_RELATED_P (par[0]) = 1;
26197 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26200 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26202 return insn;
26205 /* Emit code to push or pop registers to or from the stack. F is the
26206 assembly file. MASK is the registers to pop. */
26207 static void
26208 thumb_pop (FILE *f, unsigned long mask)
26210 int regno;
26211 int lo_mask = mask & 0xFF;
26212 int pushed_words = 0;
26214 gcc_assert (mask);
26216 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26218 /* Special case. Do not generate a POP PC statement here, do it in
26219 thumb_exit() */
26220 thumb_exit (f, -1);
26221 return;
26224 fprintf (f, "\tpop\t{");
26226 /* Look at the low registers first. */
26227 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26229 if (lo_mask & 1)
26231 asm_fprintf (f, "%r", regno);
26233 if ((lo_mask & ~1) != 0)
26234 fprintf (f, ", ");
26236 pushed_words++;
26240 if (mask & (1 << PC_REGNUM))
26242 /* Catch popping the PC. */
26243 if (TARGET_INTERWORK || TARGET_BACKTRACE
26244 || crtl->calls_eh_return)
26246 /* The PC is never poped directly, instead
26247 it is popped into r3 and then BX is used. */
26248 fprintf (f, "}\n");
26250 thumb_exit (f, -1);
26252 return;
26254 else
26256 if (mask & 0xFF)
26257 fprintf (f, ", ");
26259 asm_fprintf (f, "%r", PC_REGNUM);
26263 fprintf (f, "}\n");
26266 /* Generate code to return from a thumb function.
26267 If 'reg_containing_return_addr' is -1, then the return address is
26268 actually on the stack, at the stack pointer. */
26269 static void
26270 thumb_exit (FILE *f, int reg_containing_return_addr)
26272 unsigned regs_available_for_popping;
26273 unsigned regs_to_pop;
26274 int pops_needed;
26275 unsigned available;
26276 unsigned required;
26277 enum machine_mode mode;
26278 int size;
26279 int restore_a4 = FALSE;
26281 /* Compute the registers we need to pop. */
26282 regs_to_pop = 0;
26283 pops_needed = 0;
26285 if (reg_containing_return_addr == -1)
26287 regs_to_pop |= 1 << LR_REGNUM;
26288 ++pops_needed;
26291 if (TARGET_BACKTRACE)
26293 /* Restore the (ARM) frame pointer and stack pointer. */
26294 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26295 pops_needed += 2;
26298 /* If there is nothing to pop then just emit the BX instruction and
26299 return. */
26300 if (pops_needed == 0)
26302 if (crtl->calls_eh_return)
26303 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26305 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26306 return;
26308 /* Otherwise if we are not supporting interworking and we have not created
26309 a backtrace structure and the function was not entered in ARM mode then
26310 just pop the return address straight into the PC. */
26311 else if (!TARGET_INTERWORK
26312 && !TARGET_BACKTRACE
26313 && !is_called_in_ARM_mode (current_function_decl)
26314 && !crtl->calls_eh_return)
26316 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26317 return;
26320 /* Find out how many of the (return) argument registers we can corrupt. */
26321 regs_available_for_popping = 0;
26323 /* If returning via __builtin_eh_return, the bottom three registers
26324 all contain information needed for the return. */
26325 if (crtl->calls_eh_return)
26326 size = 12;
26327 else
26329 /* If we can deduce the registers used from the function's
26330 return value. This is more reliable that examining
26331 df_regs_ever_live_p () because that will be set if the register is
26332 ever used in the function, not just if the register is used
26333 to hold a return value. */
26335 if (crtl->return_rtx != 0)
26336 mode = GET_MODE (crtl->return_rtx);
26337 else
26338 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26340 size = GET_MODE_SIZE (mode);
26342 if (size == 0)
26344 /* In a void function we can use any argument register.
26345 In a function that returns a structure on the stack
26346 we can use the second and third argument registers. */
26347 if (mode == VOIDmode)
26348 regs_available_for_popping =
26349 (1 << ARG_REGISTER (1))
26350 | (1 << ARG_REGISTER (2))
26351 | (1 << ARG_REGISTER (3));
26352 else
26353 regs_available_for_popping =
26354 (1 << ARG_REGISTER (2))
26355 | (1 << ARG_REGISTER (3));
26357 else if (size <= 4)
26358 regs_available_for_popping =
26359 (1 << ARG_REGISTER (2))
26360 | (1 << ARG_REGISTER (3));
26361 else if (size <= 8)
26362 regs_available_for_popping =
26363 (1 << ARG_REGISTER (3));
26366 /* Match registers to be popped with registers into which we pop them. */
26367 for (available = regs_available_for_popping,
26368 required = regs_to_pop;
26369 required != 0 && available != 0;
26370 available &= ~(available & - available),
26371 required &= ~(required & - required))
26372 -- pops_needed;
26374 /* If we have any popping registers left over, remove them. */
26375 if (available > 0)
26376 regs_available_for_popping &= ~available;
26378 /* Otherwise if we need another popping register we can use
26379 the fourth argument register. */
26380 else if (pops_needed)
26382 /* If we have not found any free argument registers and
26383 reg a4 contains the return address, we must move it. */
26384 if (regs_available_for_popping == 0
26385 && reg_containing_return_addr == LAST_ARG_REGNUM)
26387 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26388 reg_containing_return_addr = LR_REGNUM;
26390 else if (size > 12)
26392 /* Register a4 is being used to hold part of the return value,
26393 but we have dire need of a free, low register. */
26394 restore_a4 = TRUE;
26396 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26399 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26401 /* The fourth argument register is available. */
26402 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26404 --pops_needed;
26408 /* Pop as many registers as we can. */
26409 thumb_pop (f, regs_available_for_popping);
26411 /* Process the registers we popped. */
26412 if (reg_containing_return_addr == -1)
26414 /* The return address was popped into the lowest numbered register. */
26415 regs_to_pop &= ~(1 << LR_REGNUM);
26417 reg_containing_return_addr =
26418 number_of_first_bit_set (regs_available_for_popping);
26420 /* Remove this register for the mask of available registers, so that
26421 the return address will not be corrupted by further pops. */
26422 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26425 /* If we popped other registers then handle them here. */
26426 if (regs_available_for_popping)
26428 int frame_pointer;
26430 /* Work out which register currently contains the frame pointer. */
26431 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26433 /* Move it into the correct place. */
26434 asm_fprintf (f, "\tmov\t%r, %r\n",
26435 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26437 /* (Temporarily) remove it from the mask of popped registers. */
26438 regs_available_for_popping &= ~(1 << frame_pointer);
26439 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26441 if (regs_available_for_popping)
26443 int stack_pointer;
26445 /* We popped the stack pointer as well,
26446 find the register that contains it. */
26447 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26449 /* Move it into the stack register. */
26450 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26452 /* At this point we have popped all necessary registers, so
26453 do not worry about restoring regs_available_for_popping
26454 to its correct value:
26456 assert (pops_needed == 0)
26457 assert (regs_available_for_popping == (1 << frame_pointer))
26458 assert (regs_to_pop == (1 << STACK_POINTER)) */
26460 else
26462 /* Since we have just move the popped value into the frame
26463 pointer, the popping register is available for reuse, and
26464 we know that we still have the stack pointer left to pop. */
26465 regs_available_for_popping |= (1 << frame_pointer);
26469 /* If we still have registers left on the stack, but we no longer have
26470 any registers into which we can pop them, then we must move the return
26471 address into the link register and make available the register that
26472 contained it. */
26473 if (regs_available_for_popping == 0 && pops_needed > 0)
26475 regs_available_for_popping |= 1 << reg_containing_return_addr;
26477 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26478 reg_containing_return_addr);
26480 reg_containing_return_addr = LR_REGNUM;
26483 /* If we have registers left on the stack then pop some more.
26484 We know that at most we will want to pop FP and SP. */
26485 if (pops_needed > 0)
26487 int popped_into;
26488 int move_to;
26490 thumb_pop (f, regs_available_for_popping);
26492 /* We have popped either FP or SP.
26493 Move whichever one it is into the correct register. */
26494 popped_into = number_of_first_bit_set (regs_available_for_popping);
26495 move_to = number_of_first_bit_set (regs_to_pop);
26497 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26499 regs_to_pop &= ~(1 << move_to);
26501 --pops_needed;
26504 /* If we still have not popped everything then we must have only
26505 had one register available to us and we are now popping the SP. */
26506 if (pops_needed > 0)
26508 int popped_into;
26510 thumb_pop (f, regs_available_for_popping);
26512 popped_into = number_of_first_bit_set (regs_available_for_popping);
26514 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26516 assert (regs_to_pop == (1 << STACK_POINTER))
26517 assert (pops_needed == 1)
26521 /* If necessary restore the a4 register. */
26522 if (restore_a4)
26524 if (reg_containing_return_addr != LR_REGNUM)
26526 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26527 reg_containing_return_addr = LR_REGNUM;
26530 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26533 if (crtl->calls_eh_return)
26534 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26536 /* Return to caller. */
26537 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26540 /* Scan INSN just before assembler is output for it.
26541 For Thumb-1, we track the status of the condition codes; this
26542 information is used in the cbranchsi4_insn pattern. */
26543 void
26544 thumb1_final_prescan_insn (rtx insn)
26546 if (flag_print_asm_name)
26547 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26548 INSN_ADDRESSES (INSN_UID (insn)));
26549 /* Don't overwrite the previous setter when we get to a cbranch. */
26550 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26552 enum attr_conds conds;
26554 if (cfun->machine->thumb1_cc_insn)
26556 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26557 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26558 CC_STATUS_INIT;
26560 conds = get_attr_conds (insn);
26561 if (conds == CONDS_SET)
26563 rtx set = single_set (insn);
26564 cfun->machine->thumb1_cc_insn = insn;
26565 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26566 cfun->machine->thumb1_cc_op1 = const0_rtx;
26567 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26568 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26570 rtx src1 = XEXP (SET_SRC (set), 1);
26571 if (src1 == const0_rtx)
26572 cfun->machine->thumb1_cc_mode = CCmode;
26574 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26576 /* Record the src register operand instead of dest because
26577 cprop_hardreg pass propagates src. */
26578 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26581 else if (conds != CONDS_NOCOND)
26582 cfun->machine->thumb1_cc_insn = NULL_RTX;
26585 /* Check if unexpected far jump is used. */
26586 if (cfun->machine->lr_save_eliminated
26587 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26588 internal_error("Unexpected thumb1 far jump");
26592 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26594 unsigned HOST_WIDE_INT mask = 0xff;
26595 int i;
26597 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26598 if (val == 0) /* XXX */
26599 return 0;
26601 for (i = 0; i < 25; i++)
26602 if ((val & (mask << i)) == val)
26603 return 1;
26605 return 0;
26608 /* Returns nonzero if the current function contains,
26609 or might contain a far jump. */
26610 static int
26611 thumb_far_jump_used_p (void)
26613 rtx insn;
26614 bool far_jump = false;
26615 unsigned int func_size = 0;
26617 /* This test is only important for leaf functions. */
26618 /* assert (!leaf_function_p ()); */
26620 /* If we have already decided that far jumps may be used,
26621 do not bother checking again, and always return true even if
26622 it turns out that they are not being used. Once we have made
26623 the decision that far jumps are present (and that hence the link
26624 register will be pushed onto the stack) we cannot go back on it. */
26625 if (cfun->machine->far_jump_used)
26626 return 1;
26628 /* If this function is not being called from the prologue/epilogue
26629 generation code then it must be being called from the
26630 INITIAL_ELIMINATION_OFFSET macro. */
26631 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26633 /* In this case we know that we are being asked about the elimination
26634 of the arg pointer register. If that register is not being used,
26635 then there are no arguments on the stack, and we do not have to
26636 worry that a far jump might force the prologue to push the link
26637 register, changing the stack offsets. In this case we can just
26638 return false, since the presence of far jumps in the function will
26639 not affect stack offsets.
26641 If the arg pointer is live (or if it was live, but has now been
26642 eliminated and so set to dead) then we do have to test to see if
26643 the function might contain a far jump. This test can lead to some
26644 false negatives, since before reload is completed, then length of
26645 branch instructions is not known, so gcc defaults to returning their
26646 longest length, which in turn sets the far jump attribute to true.
26648 A false negative will not result in bad code being generated, but it
26649 will result in a needless push and pop of the link register. We
26650 hope that this does not occur too often.
26652 If we need doubleword stack alignment this could affect the other
26653 elimination offsets so we can't risk getting it wrong. */
26654 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26655 cfun->machine->arg_pointer_live = 1;
26656 else if (!cfun->machine->arg_pointer_live)
26657 return 0;
26660 /* We should not change far_jump_used during or after reload, as there is
26661 no chance to change stack frame layout. */
26662 if (reload_in_progress || reload_completed)
26663 return 0;
26665 /* Check to see if the function contains a branch
26666 insn with the far jump attribute set. */
26667 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26669 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26671 far_jump = true;
26673 func_size += get_attr_length (insn);
26676 /* Attribute far_jump will always be true for thumb1 before
26677 shorten_branch pass. So checking far_jump attribute before
26678 shorten_branch isn't much useful.
26680 Following heuristic tries to estimate more accurately if a far jump
26681 may finally be used. The heuristic is very conservative as there is
26682 no chance to roll-back the decision of not to use far jump.
26684 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26685 2-byte insn is associated with a 4 byte constant pool. Using
26686 function size 2048/3 as the threshold is conservative enough. */
26687 if (far_jump)
26689 if ((func_size * 3) >= 2048)
26691 /* Record the fact that we have decided that
26692 the function does use far jumps. */
26693 cfun->machine->far_jump_used = 1;
26694 return 1;
26698 return 0;
26701 /* Return nonzero if FUNC must be entered in ARM mode. */
26703 is_called_in_ARM_mode (tree func)
26705 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26707 /* Ignore the problem about functions whose address is taken. */
26708 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26709 return TRUE;
26711 #ifdef ARM_PE
26712 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26713 #else
26714 return FALSE;
26715 #endif
26718 /* Given the stack offsets and register mask in OFFSETS, decide how
26719 many additional registers to push instead of subtracting a constant
26720 from SP. For epilogues the principle is the same except we use pop.
26721 FOR_PROLOGUE indicates which we're generating. */
26722 static int
26723 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26725 HOST_WIDE_INT amount;
26726 unsigned long live_regs_mask = offsets->saved_regs_mask;
26727 /* Extract a mask of the ones we can give to the Thumb's push/pop
26728 instruction. */
26729 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26730 /* Then count how many other high registers will need to be pushed. */
26731 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26732 int n_free, reg_base, size;
26734 if (!for_prologue && frame_pointer_needed)
26735 amount = offsets->locals_base - offsets->saved_regs;
26736 else
26737 amount = offsets->outgoing_args - offsets->saved_regs;
26739 /* If the stack frame size is 512 exactly, we can save one load
26740 instruction, which should make this a win even when optimizing
26741 for speed. */
26742 if (!optimize_size && amount != 512)
26743 return 0;
26745 /* Can't do this if there are high registers to push. */
26746 if (high_regs_pushed != 0)
26747 return 0;
26749 /* Shouldn't do it in the prologue if no registers would normally
26750 be pushed at all. In the epilogue, also allow it if we'll have
26751 a pop insn for the PC. */
26752 if (l_mask == 0
26753 && (for_prologue
26754 || TARGET_BACKTRACE
26755 || (live_regs_mask & 1 << LR_REGNUM) == 0
26756 || TARGET_INTERWORK
26757 || crtl->args.pretend_args_size != 0))
26758 return 0;
26760 /* Don't do this if thumb_expand_prologue wants to emit instructions
26761 between the push and the stack frame allocation. */
26762 if (for_prologue
26763 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26764 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26765 return 0;
26767 reg_base = 0;
26768 n_free = 0;
26769 if (!for_prologue)
26771 size = arm_size_return_regs ();
26772 reg_base = ARM_NUM_INTS (size);
26773 live_regs_mask >>= reg_base;
26776 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26777 && (for_prologue || call_used_regs[reg_base + n_free]))
26779 live_regs_mask >>= 1;
26780 n_free++;
26783 if (n_free == 0)
26784 return 0;
26785 gcc_assert (amount / 4 * 4 == amount);
26787 if (amount >= 512 && (amount - n_free * 4) < 512)
26788 return (amount - 508) / 4;
26789 if (amount <= n_free * 4)
26790 return amount / 4;
26791 return 0;
26794 /* The bits which aren't usefully expanded as rtl. */
26795 const char *
26796 thumb1_unexpanded_epilogue (void)
26798 arm_stack_offsets *offsets;
26799 int regno;
26800 unsigned long live_regs_mask = 0;
26801 int high_regs_pushed = 0;
26802 int extra_pop;
26803 int had_to_push_lr;
26804 int size;
26806 if (cfun->machine->return_used_this_function != 0)
26807 return "";
26809 if (IS_NAKED (arm_current_func_type ()))
26810 return "";
26812 offsets = arm_get_frame_offsets ();
26813 live_regs_mask = offsets->saved_regs_mask;
26814 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26816 /* If we can deduce the registers used from the function's return value.
26817 This is more reliable that examining df_regs_ever_live_p () because that
26818 will be set if the register is ever used in the function, not just if
26819 the register is used to hold a return value. */
26820 size = arm_size_return_regs ();
26822 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26823 if (extra_pop > 0)
26825 unsigned long extra_mask = (1 << extra_pop) - 1;
26826 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26829 /* The prolog may have pushed some high registers to use as
26830 work registers. e.g. the testsuite file:
26831 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26832 compiles to produce:
26833 push {r4, r5, r6, r7, lr}
26834 mov r7, r9
26835 mov r6, r8
26836 push {r6, r7}
26837 as part of the prolog. We have to undo that pushing here. */
26839 if (high_regs_pushed)
26841 unsigned long mask = live_regs_mask & 0xff;
26842 int next_hi_reg;
26844 /* The available low registers depend on the size of the value we are
26845 returning. */
26846 if (size <= 12)
26847 mask |= 1 << 3;
26848 if (size <= 8)
26849 mask |= 1 << 2;
26851 if (mask == 0)
26852 /* Oh dear! We have no low registers into which we can pop
26853 high registers! */
26854 internal_error
26855 ("no low registers available for popping high registers");
26857 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26858 if (live_regs_mask & (1 << next_hi_reg))
26859 break;
26861 while (high_regs_pushed)
26863 /* Find lo register(s) into which the high register(s) can
26864 be popped. */
26865 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26867 if (mask & (1 << regno))
26868 high_regs_pushed--;
26869 if (high_regs_pushed == 0)
26870 break;
26873 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26875 /* Pop the values into the low register(s). */
26876 thumb_pop (asm_out_file, mask);
26878 /* Move the value(s) into the high registers. */
26879 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26881 if (mask & (1 << regno))
26883 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26884 regno);
26886 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26887 if (live_regs_mask & (1 << next_hi_reg))
26888 break;
26892 live_regs_mask &= ~0x0f00;
26895 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26896 live_regs_mask &= 0xff;
26898 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26900 /* Pop the return address into the PC. */
26901 if (had_to_push_lr)
26902 live_regs_mask |= 1 << PC_REGNUM;
26904 /* Either no argument registers were pushed or a backtrace
26905 structure was created which includes an adjusted stack
26906 pointer, so just pop everything. */
26907 if (live_regs_mask)
26908 thumb_pop (asm_out_file, live_regs_mask);
26910 /* We have either just popped the return address into the
26911 PC or it is was kept in LR for the entire function.
26912 Note that thumb_pop has already called thumb_exit if the
26913 PC was in the list. */
26914 if (!had_to_push_lr)
26915 thumb_exit (asm_out_file, LR_REGNUM);
26917 else
26919 /* Pop everything but the return address. */
26920 if (live_regs_mask)
26921 thumb_pop (asm_out_file, live_regs_mask);
26923 if (had_to_push_lr)
26925 if (size > 12)
26927 /* We have no free low regs, so save one. */
26928 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26929 LAST_ARG_REGNUM);
26932 /* Get the return address into a temporary register. */
26933 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26935 if (size > 12)
26937 /* Move the return address to lr. */
26938 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26939 LAST_ARG_REGNUM);
26940 /* Restore the low register. */
26941 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26942 IP_REGNUM);
26943 regno = LR_REGNUM;
26945 else
26946 regno = LAST_ARG_REGNUM;
26948 else
26949 regno = LR_REGNUM;
26951 /* Remove the argument registers that were pushed onto the stack. */
26952 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26953 SP_REGNUM, SP_REGNUM,
26954 crtl->args.pretend_args_size);
26956 thumb_exit (asm_out_file, regno);
26959 return "";
26962 /* Functions to save and restore machine-specific function data. */
26963 static struct machine_function *
26964 arm_init_machine_status (void)
26966 struct machine_function *machine;
26967 machine = ggc_cleared_alloc<machine_function> ();
26969 #if ARM_FT_UNKNOWN != 0
26970 machine->func_type = ARM_FT_UNKNOWN;
26971 #endif
26972 return machine;
26975 /* Return an RTX indicating where the return address to the
26976 calling function can be found. */
26978 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26980 if (count != 0)
26981 return NULL_RTX;
26983 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26986 /* Do anything needed before RTL is emitted for each function. */
26987 void
26988 arm_init_expanders (void)
26990 /* Arrange to initialize and mark the machine per-function status. */
26991 init_machine_status = arm_init_machine_status;
26993 /* This is to stop the combine pass optimizing away the alignment
26994 adjustment of va_arg. */
26995 /* ??? It is claimed that this should not be necessary. */
26996 if (cfun)
26997 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27001 /* Like arm_compute_initial_elimination offset. Simpler because there
27002 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27003 to point at the base of the local variables after static stack
27004 space for a function has been allocated. */
27006 HOST_WIDE_INT
27007 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27009 arm_stack_offsets *offsets;
27011 offsets = arm_get_frame_offsets ();
27013 switch (from)
27015 case ARG_POINTER_REGNUM:
27016 switch (to)
27018 case STACK_POINTER_REGNUM:
27019 return offsets->outgoing_args - offsets->saved_args;
27021 case FRAME_POINTER_REGNUM:
27022 return offsets->soft_frame - offsets->saved_args;
27024 case ARM_HARD_FRAME_POINTER_REGNUM:
27025 return offsets->saved_regs - offsets->saved_args;
27027 case THUMB_HARD_FRAME_POINTER_REGNUM:
27028 return offsets->locals_base - offsets->saved_args;
27030 default:
27031 gcc_unreachable ();
27033 break;
27035 case FRAME_POINTER_REGNUM:
27036 switch (to)
27038 case STACK_POINTER_REGNUM:
27039 return offsets->outgoing_args - offsets->soft_frame;
27041 case ARM_HARD_FRAME_POINTER_REGNUM:
27042 return offsets->saved_regs - offsets->soft_frame;
27044 case THUMB_HARD_FRAME_POINTER_REGNUM:
27045 return offsets->locals_base - offsets->soft_frame;
27047 default:
27048 gcc_unreachable ();
27050 break;
27052 default:
27053 gcc_unreachable ();
27057 /* Generate the function's prologue. */
27059 void
27060 thumb1_expand_prologue (void)
27062 rtx insn;
27064 HOST_WIDE_INT amount;
27065 arm_stack_offsets *offsets;
27066 unsigned long func_type;
27067 int regno;
27068 unsigned long live_regs_mask;
27069 unsigned long l_mask;
27070 unsigned high_regs_pushed = 0;
27072 func_type = arm_current_func_type ();
27074 /* Naked functions don't have prologues. */
27075 if (IS_NAKED (func_type))
27076 return;
27078 if (IS_INTERRUPT (func_type))
27080 error ("interrupt Service Routines cannot be coded in Thumb mode");
27081 return;
27084 if (is_called_in_ARM_mode (current_function_decl))
27085 emit_insn (gen_prologue_thumb1_interwork ());
27087 offsets = arm_get_frame_offsets ();
27088 live_regs_mask = offsets->saved_regs_mask;
27090 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27091 l_mask = live_regs_mask & 0x40ff;
27092 /* Then count how many other high registers will need to be pushed. */
27093 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27095 if (crtl->args.pretend_args_size)
27097 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27099 if (cfun->machine->uses_anonymous_args)
27101 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27102 unsigned long mask;
27104 mask = 1ul << (LAST_ARG_REGNUM + 1);
27105 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27107 insn = thumb1_emit_multi_reg_push (mask, 0);
27109 else
27111 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27112 stack_pointer_rtx, x));
27114 RTX_FRAME_RELATED_P (insn) = 1;
27117 if (TARGET_BACKTRACE)
27119 HOST_WIDE_INT offset = 0;
27120 unsigned work_register;
27121 rtx work_reg, x, arm_hfp_rtx;
27123 /* We have been asked to create a stack backtrace structure.
27124 The code looks like this:
27126 0 .align 2
27127 0 func:
27128 0 sub SP, #16 Reserve space for 4 registers.
27129 2 push {R7} Push low registers.
27130 4 add R7, SP, #20 Get the stack pointer before the push.
27131 6 str R7, [SP, #8] Store the stack pointer
27132 (before reserving the space).
27133 8 mov R7, PC Get hold of the start of this code + 12.
27134 10 str R7, [SP, #16] Store it.
27135 12 mov R7, FP Get hold of the current frame pointer.
27136 14 str R7, [SP, #4] Store it.
27137 16 mov R7, LR Get hold of the current return address.
27138 18 str R7, [SP, #12] Store it.
27139 20 add R7, SP, #16 Point at the start of the
27140 backtrace structure.
27141 22 mov FP, R7 Put this value into the frame pointer. */
27143 work_register = thumb_find_work_register (live_regs_mask);
27144 work_reg = gen_rtx_REG (SImode, work_register);
27145 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27147 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27148 stack_pointer_rtx, GEN_INT (-16)));
27149 RTX_FRAME_RELATED_P (insn) = 1;
27151 if (l_mask)
27153 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27154 RTX_FRAME_RELATED_P (insn) = 1;
27156 offset = bit_count (l_mask) * UNITS_PER_WORD;
27159 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27160 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27162 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27163 x = gen_frame_mem (SImode, x);
27164 emit_move_insn (x, work_reg);
27166 /* Make sure that the instruction fetching the PC is in the right place
27167 to calculate "start of backtrace creation code + 12". */
27168 /* ??? The stores using the common WORK_REG ought to be enough to
27169 prevent the scheduler from doing anything weird. Failing that
27170 we could always move all of the following into an UNSPEC_VOLATILE. */
27171 if (l_mask)
27173 x = gen_rtx_REG (SImode, PC_REGNUM);
27174 emit_move_insn (work_reg, x);
27176 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27177 x = gen_frame_mem (SImode, x);
27178 emit_move_insn (x, work_reg);
27180 emit_move_insn (work_reg, arm_hfp_rtx);
27182 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27183 x = gen_frame_mem (SImode, x);
27184 emit_move_insn (x, work_reg);
27186 else
27188 emit_move_insn (work_reg, arm_hfp_rtx);
27190 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27191 x = gen_frame_mem (SImode, x);
27192 emit_move_insn (x, work_reg);
27194 x = gen_rtx_REG (SImode, PC_REGNUM);
27195 emit_move_insn (work_reg, x);
27197 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27198 x = gen_frame_mem (SImode, x);
27199 emit_move_insn (x, work_reg);
27202 x = gen_rtx_REG (SImode, LR_REGNUM);
27203 emit_move_insn (work_reg, x);
27205 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27206 x = gen_frame_mem (SImode, x);
27207 emit_move_insn (x, work_reg);
27209 x = GEN_INT (offset + 12);
27210 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27212 emit_move_insn (arm_hfp_rtx, work_reg);
27214 /* Optimization: If we are not pushing any low registers but we are going
27215 to push some high registers then delay our first push. This will just
27216 be a push of LR and we can combine it with the push of the first high
27217 register. */
27218 else if ((l_mask & 0xff) != 0
27219 || (high_regs_pushed == 0 && l_mask))
27221 unsigned long mask = l_mask;
27222 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27223 insn = thumb1_emit_multi_reg_push (mask, mask);
27224 RTX_FRAME_RELATED_P (insn) = 1;
27227 if (high_regs_pushed)
27229 unsigned pushable_regs;
27230 unsigned next_hi_reg;
27231 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27232 : crtl->args.info.nregs;
27233 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27235 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27236 if (live_regs_mask & (1 << next_hi_reg))
27237 break;
27239 /* Here we need to mask out registers used for passing arguments
27240 even if they can be pushed. This is to avoid using them to stash the high
27241 registers. Such kind of stash may clobber the use of arguments. */
27242 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27244 if (pushable_regs == 0)
27245 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27247 while (high_regs_pushed > 0)
27249 unsigned long real_regs_mask = 0;
27251 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27253 if (pushable_regs & (1 << regno))
27255 emit_move_insn (gen_rtx_REG (SImode, regno),
27256 gen_rtx_REG (SImode, next_hi_reg));
27258 high_regs_pushed --;
27259 real_regs_mask |= (1 << next_hi_reg);
27261 if (high_regs_pushed)
27263 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27264 next_hi_reg --)
27265 if (live_regs_mask & (1 << next_hi_reg))
27266 break;
27268 else
27270 pushable_regs &= ~((1 << regno) - 1);
27271 break;
27276 /* If we had to find a work register and we have not yet
27277 saved the LR then add it to the list of regs to push. */
27278 if (l_mask == (1 << LR_REGNUM))
27280 pushable_regs |= l_mask;
27281 real_regs_mask |= l_mask;
27282 l_mask = 0;
27285 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27286 RTX_FRAME_RELATED_P (insn) = 1;
27290 /* Load the pic register before setting the frame pointer,
27291 so we can use r7 as a temporary work register. */
27292 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27293 arm_load_pic_register (live_regs_mask);
27295 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27296 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27297 stack_pointer_rtx);
27299 if (flag_stack_usage_info)
27300 current_function_static_stack_size
27301 = offsets->outgoing_args - offsets->saved_args;
27303 amount = offsets->outgoing_args - offsets->saved_regs;
27304 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27305 if (amount)
27307 if (amount < 512)
27309 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27310 GEN_INT (- amount)));
27311 RTX_FRAME_RELATED_P (insn) = 1;
27313 else
27315 rtx reg, dwarf;
27317 /* The stack decrement is too big for an immediate value in a single
27318 insn. In theory we could issue multiple subtracts, but after
27319 three of them it becomes more space efficient to place the full
27320 value in the constant pool and load into a register. (Also the
27321 ARM debugger really likes to see only one stack decrement per
27322 function). So instead we look for a scratch register into which
27323 we can load the decrement, and then we subtract this from the
27324 stack pointer. Unfortunately on the thumb the only available
27325 scratch registers are the argument registers, and we cannot use
27326 these as they may hold arguments to the function. Instead we
27327 attempt to locate a call preserved register which is used by this
27328 function. If we can find one, then we know that it will have
27329 been pushed at the start of the prologue and so we can corrupt
27330 it now. */
27331 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27332 if (live_regs_mask & (1 << regno))
27333 break;
27335 gcc_assert(regno <= LAST_LO_REGNUM);
27337 reg = gen_rtx_REG (SImode, regno);
27339 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27341 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27342 stack_pointer_rtx, reg));
27344 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27345 plus_constant (Pmode, stack_pointer_rtx,
27346 -amount));
27347 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27348 RTX_FRAME_RELATED_P (insn) = 1;
27352 if (frame_pointer_needed)
27353 thumb_set_frame_pointer (offsets);
27355 /* If we are profiling, make sure no instructions are scheduled before
27356 the call to mcount. Similarly if the user has requested no
27357 scheduling in the prolog. Similarly if we want non-call exceptions
27358 using the EABI unwinder, to prevent faulting instructions from being
27359 swapped with a stack adjustment. */
27360 if (crtl->profile || !TARGET_SCHED_PROLOG
27361 || (arm_except_unwind_info (&global_options) == UI_TARGET
27362 && cfun->can_throw_non_call_exceptions))
27363 emit_insn (gen_blockage ());
27365 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27366 if (live_regs_mask & 0xff)
27367 cfun->machine->lr_save_eliminated = 0;
27370 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27371 POP instruction can be generated. LR should be replaced by PC. All
27372 the checks required are already done by USE_RETURN_INSN (). Hence,
27373 all we really need to check here is if single register is to be
27374 returned, or multiple register return. */
27375 void
27376 thumb2_expand_return (bool simple_return)
27378 int i, num_regs;
27379 unsigned long saved_regs_mask;
27380 arm_stack_offsets *offsets;
27382 offsets = arm_get_frame_offsets ();
27383 saved_regs_mask = offsets->saved_regs_mask;
27385 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27386 if (saved_regs_mask & (1 << i))
27387 num_regs++;
27389 if (!simple_return && saved_regs_mask)
27391 if (num_regs == 1)
27393 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27394 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27395 rtx addr = gen_rtx_MEM (SImode,
27396 gen_rtx_POST_INC (SImode,
27397 stack_pointer_rtx));
27398 set_mem_alias_set (addr, get_frame_alias_set ());
27399 XVECEXP (par, 0, 0) = ret_rtx;
27400 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27401 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27402 emit_jump_insn (par);
27404 else
27406 saved_regs_mask &= ~ (1 << LR_REGNUM);
27407 saved_regs_mask |= (1 << PC_REGNUM);
27408 arm_emit_multi_reg_pop (saved_regs_mask);
27411 else
27413 emit_jump_insn (simple_return_rtx);
27417 void
27418 thumb1_expand_epilogue (void)
27420 HOST_WIDE_INT amount;
27421 arm_stack_offsets *offsets;
27422 int regno;
27424 /* Naked functions don't have prologues. */
27425 if (IS_NAKED (arm_current_func_type ()))
27426 return;
27428 offsets = arm_get_frame_offsets ();
27429 amount = offsets->outgoing_args - offsets->saved_regs;
27431 if (frame_pointer_needed)
27433 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27434 amount = offsets->locals_base - offsets->saved_regs;
27436 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27438 gcc_assert (amount >= 0);
27439 if (amount)
27441 emit_insn (gen_blockage ());
27443 if (amount < 512)
27444 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27445 GEN_INT (amount)));
27446 else
27448 /* r3 is always free in the epilogue. */
27449 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27451 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27452 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27456 /* Emit a USE (stack_pointer_rtx), so that
27457 the stack adjustment will not be deleted. */
27458 emit_insn (gen_force_register_use (stack_pointer_rtx));
27460 if (crtl->profile || !TARGET_SCHED_PROLOG)
27461 emit_insn (gen_blockage ());
27463 /* Emit a clobber for each insn that will be restored in the epilogue,
27464 so that flow2 will get register lifetimes correct. */
27465 for (regno = 0; regno < 13; regno++)
27466 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27467 emit_clobber (gen_rtx_REG (SImode, regno));
27469 if (! df_regs_ever_live_p (LR_REGNUM))
27470 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27473 /* Epilogue code for APCS frame. */
27474 static void
27475 arm_expand_epilogue_apcs_frame (bool really_return)
27477 unsigned long func_type;
27478 unsigned long saved_regs_mask;
27479 int num_regs = 0;
27480 int i;
27481 int floats_from_frame = 0;
27482 arm_stack_offsets *offsets;
27484 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27485 func_type = arm_current_func_type ();
27487 /* Get frame offsets for ARM. */
27488 offsets = arm_get_frame_offsets ();
27489 saved_regs_mask = offsets->saved_regs_mask;
27491 /* Find the offset of the floating-point save area in the frame. */
27492 floats_from_frame
27493 = (offsets->saved_args
27494 + arm_compute_static_chain_stack_bytes ()
27495 - offsets->frame);
27497 /* Compute how many core registers saved and how far away the floats are. */
27498 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27499 if (saved_regs_mask & (1 << i))
27501 num_regs++;
27502 floats_from_frame += 4;
27505 if (TARGET_HARD_FLOAT && TARGET_VFP)
27507 int start_reg;
27508 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27510 /* The offset is from IP_REGNUM. */
27511 int saved_size = arm_get_vfp_saved_size ();
27512 if (saved_size > 0)
27514 rtx insn;
27515 floats_from_frame += saved_size;
27516 insn = emit_insn (gen_addsi3 (ip_rtx,
27517 hard_frame_pointer_rtx,
27518 GEN_INT (-floats_from_frame)));
27519 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27520 ip_rtx, hard_frame_pointer_rtx);
27523 /* Generate VFP register multi-pop. */
27524 start_reg = FIRST_VFP_REGNUM;
27526 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27527 /* Look for a case where a reg does not need restoring. */
27528 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27529 && (!df_regs_ever_live_p (i + 1)
27530 || call_used_regs[i + 1]))
27532 if (start_reg != i)
27533 arm_emit_vfp_multi_reg_pop (start_reg,
27534 (i - start_reg) / 2,
27535 gen_rtx_REG (SImode,
27536 IP_REGNUM));
27537 start_reg = i + 2;
27540 /* Restore the remaining regs that we have discovered (or possibly
27541 even all of them, if the conditional in the for loop never
27542 fired). */
27543 if (start_reg != i)
27544 arm_emit_vfp_multi_reg_pop (start_reg,
27545 (i - start_reg) / 2,
27546 gen_rtx_REG (SImode, IP_REGNUM));
27549 if (TARGET_IWMMXT)
27551 /* The frame pointer is guaranteed to be non-double-word aligned, as
27552 it is set to double-word-aligned old_stack_pointer - 4. */
27553 rtx insn;
27554 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27556 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27557 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27559 rtx addr = gen_frame_mem (V2SImode,
27560 plus_constant (Pmode, hard_frame_pointer_rtx,
27561 - lrm_count * 4));
27562 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27563 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27564 gen_rtx_REG (V2SImode, i),
27565 NULL_RTX);
27566 lrm_count += 2;
27570 /* saved_regs_mask should contain IP which contains old stack pointer
27571 at the time of activation creation. Since SP and IP are adjacent registers,
27572 we can restore the value directly into SP. */
27573 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27574 saved_regs_mask &= ~(1 << IP_REGNUM);
27575 saved_regs_mask |= (1 << SP_REGNUM);
27577 /* There are two registers left in saved_regs_mask - LR and PC. We
27578 only need to restore LR (the return address), but to
27579 save time we can load it directly into PC, unless we need a
27580 special function exit sequence, or we are not really returning. */
27581 if (really_return
27582 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27583 && !crtl->calls_eh_return)
27584 /* Delete LR from the register mask, so that LR on
27585 the stack is loaded into the PC in the register mask. */
27586 saved_regs_mask &= ~(1 << LR_REGNUM);
27587 else
27588 saved_regs_mask &= ~(1 << PC_REGNUM);
27590 num_regs = bit_count (saved_regs_mask);
27591 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27593 rtx insn;
27594 emit_insn (gen_blockage ());
27595 /* Unwind the stack to just below the saved registers. */
27596 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27597 hard_frame_pointer_rtx,
27598 GEN_INT (- 4 * num_regs)));
27600 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27601 stack_pointer_rtx, hard_frame_pointer_rtx);
27604 arm_emit_multi_reg_pop (saved_regs_mask);
27606 if (IS_INTERRUPT (func_type))
27608 /* Interrupt handlers will have pushed the
27609 IP onto the stack, so restore it now. */
27610 rtx insn;
27611 rtx addr = gen_rtx_MEM (SImode,
27612 gen_rtx_POST_INC (SImode,
27613 stack_pointer_rtx));
27614 set_mem_alias_set (addr, get_frame_alias_set ());
27615 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27616 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27617 gen_rtx_REG (SImode, IP_REGNUM),
27618 NULL_RTX);
27621 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27622 return;
27624 if (crtl->calls_eh_return)
27625 emit_insn (gen_addsi3 (stack_pointer_rtx,
27626 stack_pointer_rtx,
27627 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27629 if (IS_STACKALIGN (func_type))
27630 /* Restore the original stack pointer. Before prologue, the stack was
27631 realigned and the original stack pointer saved in r0. For details,
27632 see comment in arm_expand_prologue. */
27633 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27635 emit_jump_insn (simple_return_rtx);
27638 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27639 function is not a sibcall. */
27640 void
27641 arm_expand_epilogue (bool really_return)
27643 unsigned long func_type;
27644 unsigned long saved_regs_mask;
27645 int num_regs = 0;
27646 int i;
27647 int amount;
27648 arm_stack_offsets *offsets;
27650 func_type = arm_current_func_type ();
27652 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27653 let output_return_instruction take care of instruction emission if any. */
27654 if (IS_NAKED (func_type)
27655 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27657 if (really_return)
27658 emit_jump_insn (simple_return_rtx);
27659 return;
27662 /* If we are throwing an exception, then we really must be doing a
27663 return, so we can't tail-call. */
27664 gcc_assert (!crtl->calls_eh_return || really_return);
27666 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27668 arm_expand_epilogue_apcs_frame (really_return);
27669 return;
27672 /* Get frame offsets for ARM. */
27673 offsets = arm_get_frame_offsets ();
27674 saved_regs_mask = offsets->saved_regs_mask;
27675 num_regs = bit_count (saved_regs_mask);
27677 if (frame_pointer_needed)
27679 rtx insn;
27680 /* Restore stack pointer if necessary. */
27681 if (TARGET_ARM)
27683 /* In ARM mode, frame pointer points to first saved register.
27684 Restore stack pointer to last saved register. */
27685 amount = offsets->frame - offsets->saved_regs;
27687 /* Force out any pending memory operations that reference stacked data
27688 before stack de-allocation occurs. */
27689 emit_insn (gen_blockage ());
27690 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27691 hard_frame_pointer_rtx,
27692 GEN_INT (amount)));
27693 arm_add_cfa_adjust_cfa_note (insn, amount,
27694 stack_pointer_rtx,
27695 hard_frame_pointer_rtx);
27697 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27698 deleted. */
27699 emit_insn (gen_force_register_use (stack_pointer_rtx));
27701 else
27703 /* In Thumb-2 mode, the frame pointer points to the last saved
27704 register. */
27705 amount = offsets->locals_base - offsets->saved_regs;
27706 if (amount)
27708 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27709 hard_frame_pointer_rtx,
27710 GEN_INT (amount)));
27711 arm_add_cfa_adjust_cfa_note (insn, amount,
27712 hard_frame_pointer_rtx,
27713 hard_frame_pointer_rtx);
27716 /* Force out any pending memory operations that reference stacked data
27717 before stack de-allocation occurs. */
27718 emit_insn (gen_blockage ());
27719 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27720 hard_frame_pointer_rtx));
27721 arm_add_cfa_adjust_cfa_note (insn, 0,
27722 stack_pointer_rtx,
27723 hard_frame_pointer_rtx);
27724 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27725 deleted. */
27726 emit_insn (gen_force_register_use (stack_pointer_rtx));
27729 else
27731 /* Pop off outgoing args and local frame to adjust stack pointer to
27732 last saved register. */
27733 amount = offsets->outgoing_args - offsets->saved_regs;
27734 if (amount)
27736 rtx tmp;
27737 /* Force out any pending memory operations that reference stacked data
27738 before stack de-allocation occurs. */
27739 emit_insn (gen_blockage ());
27740 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27741 stack_pointer_rtx,
27742 GEN_INT (amount)));
27743 arm_add_cfa_adjust_cfa_note (tmp, amount,
27744 stack_pointer_rtx, stack_pointer_rtx);
27745 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27746 not deleted. */
27747 emit_insn (gen_force_register_use (stack_pointer_rtx));
27751 if (TARGET_HARD_FLOAT && TARGET_VFP)
27753 /* Generate VFP register multi-pop. */
27754 int end_reg = LAST_VFP_REGNUM + 1;
27756 /* Scan the registers in reverse order. We need to match
27757 any groupings made in the prologue and generate matching
27758 vldm operations. The need to match groups is because,
27759 unlike pop, vldm can only do consecutive regs. */
27760 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27761 /* Look for a case where a reg does not need restoring. */
27762 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27763 && (!df_regs_ever_live_p (i + 1)
27764 || call_used_regs[i + 1]))
27766 /* Restore the regs discovered so far (from reg+2 to
27767 end_reg). */
27768 if (end_reg > i + 2)
27769 arm_emit_vfp_multi_reg_pop (i + 2,
27770 (end_reg - (i + 2)) / 2,
27771 stack_pointer_rtx);
27772 end_reg = i;
27775 /* Restore the remaining regs that we have discovered (or possibly
27776 even all of them, if the conditional in the for loop never
27777 fired). */
27778 if (end_reg > i + 2)
27779 arm_emit_vfp_multi_reg_pop (i + 2,
27780 (end_reg - (i + 2)) / 2,
27781 stack_pointer_rtx);
27784 if (TARGET_IWMMXT)
27785 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27786 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27788 rtx insn;
27789 rtx addr = gen_rtx_MEM (V2SImode,
27790 gen_rtx_POST_INC (SImode,
27791 stack_pointer_rtx));
27792 set_mem_alias_set (addr, get_frame_alias_set ());
27793 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27794 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27795 gen_rtx_REG (V2SImode, i),
27796 NULL_RTX);
27797 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27798 stack_pointer_rtx, stack_pointer_rtx);
27801 if (saved_regs_mask)
27803 rtx insn;
27804 bool return_in_pc = false;
27806 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27807 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27808 && !IS_STACKALIGN (func_type)
27809 && really_return
27810 && crtl->args.pretend_args_size == 0
27811 && saved_regs_mask & (1 << LR_REGNUM)
27812 && !crtl->calls_eh_return)
27814 saved_regs_mask &= ~(1 << LR_REGNUM);
27815 saved_regs_mask |= (1 << PC_REGNUM);
27816 return_in_pc = true;
27819 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27821 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27822 if (saved_regs_mask & (1 << i))
27824 rtx addr = gen_rtx_MEM (SImode,
27825 gen_rtx_POST_INC (SImode,
27826 stack_pointer_rtx));
27827 set_mem_alias_set (addr, get_frame_alias_set ());
27829 if (i == PC_REGNUM)
27831 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27832 XVECEXP (insn, 0, 0) = ret_rtx;
27833 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27834 gen_rtx_REG (SImode, i),
27835 addr);
27836 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27837 insn = emit_jump_insn (insn);
27839 else
27841 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27842 addr));
27843 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27844 gen_rtx_REG (SImode, i),
27845 NULL_RTX);
27846 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27847 stack_pointer_rtx,
27848 stack_pointer_rtx);
27852 else
27854 if (TARGET_LDRD
27855 && current_tune->prefer_ldrd_strd
27856 && !optimize_function_for_size_p (cfun))
27858 if (TARGET_THUMB2)
27859 thumb2_emit_ldrd_pop (saved_regs_mask);
27860 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27861 arm_emit_ldrd_pop (saved_regs_mask);
27862 else
27863 arm_emit_multi_reg_pop (saved_regs_mask);
27865 else
27866 arm_emit_multi_reg_pop (saved_regs_mask);
27869 if (return_in_pc == true)
27870 return;
27873 if (crtl->args.pretend_args_size)
27875 int i, j;
27876 rtx dwarf = NULL_RTX;
27877 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27878 stack_pointer_rtx,
27879 GEN_INT (crtl->args.pretend_args_size)));
27881 RTX_FRAME_RELATED_P (tmp) = 1;
27883 if (cfun->machine->uses_anonymous_args)
27885 /* Restore pretend args. Refer arm_expand_prologue on how to save
27886 pretend_args in stack. */
27887 int num_regs = crtl->args.pretend_args_size / 4;
27888 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27889 for (j = 0, i = 0; j < num_regs; i++)
27890 if (saved_regs_mask & (1 << i))
27892 rtx reg = gen_rtx_REG (SImode, i);
27893 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27894 j++;
27896 REG_NOTES (tmp) = dwarf;
27898 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27899 stack_pointer_rtx, stack_pointer_rtx);
27902 if (!really_return)
27903 return;
27905 if (crtl->calls_eh_return)
27906 emit_insn (gen_addsi3 (stack_pointer_rtx,
27907 stack_pointer_rtx,
27908 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27910 if (IS_STACKALIGN (func_type))
27911 /* Restore the original stack pointer. Before prologue, the stack was
27912 realigned and the original stack pointer saved in r0. For details,
27913 see comment in arm_expand_prologue. */
27914 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27916 emit_jump_insn (simple_return_rtx);
27919 /* Implementation of insn prologue_thumb1_interwork. This is the first
27920 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27922 const char *
27923 thumb1_output_interwork (void)
27925 const char * name;
27926 FILE *f = asm_out_file;
27928 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27929 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27930 == SYMBOL_REF);
27931 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27933 /* Generate code sequence to switch us into Thumb mode. */
27934 /* The .code 32 directive has already been emitted by
27935 ASM_DECLARE_FUNCTION_NAME. */
27936 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27937 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27939 /* Generate a label, so that the debugger will notice the
27940 change in instruction sets. This label is also used by
27941 the assembler to bypass the ARM code when this function
27942 is called from a Thumb encoded function elsewhere in the
27943 same file. Hence the definition of STUB_NAME here must
27944 agree with the definition in gas/config/tc-arm.c. */
27946 #define STUB_NAME ".real_start_of"
27948 fprintf (f, "\t.code\t16\n");
27949 #ifdef ARM_PE
27950 if (arm_dllexport_name_p (name))
27951 name = arm_strip_name_encoding (name);
27952 #endif
27953 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27954 fprintf (f, "\t.thumb_func\n");
27955 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27957 return "";
27960 /* Handle the case of a double word load into a low register from
27961 a computed memory address. The computed address may involve a
27962 register which is overwritten by the load. */
27963 const char *
27964 thumb_load_double_from_address (rtx *operands)
27966 rtx addr;
27967 rtx base;
27968 rtx offset;
27969 rtx arg1;
27970 rtx arg2;
27972 gcc_assert (REG_P (operands[0]));
27973 gcc_assert (MEM_P (operands[1]));
27975 /* Get the memory address. */
27976 addr = XEXP (operands[1], 0);
27978 /* Work out how the memory address is computed. */
27979 switch (GET_CODE (addr))
27981 case REG:
27982 operands[2] = adjust_address (operands[1], SImode, 4);
27984 if (REGNO (operands[0]) == REGNO (addr))
27986 output_asm_insn ("ldr\t%H0, %2", operands);
27987 output_asm_insn ("ldr\t%0, %1", operands);
27989 else
27991 output_asm_insn ("ldr\t%0, %1", operands);
27992 output_asm_insn ("ldr\t%H0, %2", operands);
27994 break;
27996 case CONST:
27997 /* Compute <address> + 4 for the high order load. */
27998 operands[2] = adjust_address (operands[1], SImode, 4);
28000 output_asm_insn ("ldr\t%0, %1", operands);
28001 output_asm_insn ("ldr\t%H0, %2", operands);
28002 break;
28004 case PLUS:
28005 arg1 = XEXP (addr, 0);
28006 arg2 = XEXP (addr, 1);
28008 if (CONSTANT_P (arg1))
28009 base = arg2, offset = arg1;
28010 else
28011 base = arg1, offset = arg2;
28013 gcc_assert (REG_P (base));
28015 /* Catch the case of <address> = <reg> + <reg> */
28016 if (REG_P (offset))
28018 int reg_offset = REGNO (offset);
28019 int reg_base = REGNO (base);
28020 int reg_dest = REGNO (operands[0]);
28022 /* Add the base and offset registers together into the
28023 higher destination register. */
28024 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28025 reg_dest + 1, reg_base, reg_offset);
28027 /* Load the lower destination register from the address in
28028 the higher destination register. */
28029 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28030 reg_dest, reg_dest + 1);
28032 /* Load the higher destination register from its own address
28033 plus 4. */
28034 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28035 reg_dest + 1, reg_dest + 1);
28037 else
28039 /* Compute <address> + 4 for the high order load. */
28040 operands[2] = adjust_address (operands[1], SImode, 4);
28042 /* If the computed address is held in the low order register
28043 then load the high order register first, otherwise always
28044 load the low order register first. */
28045 if (REGNO (operands[0]) == REGNO (base))
28047 output_asm_insn ("ldr\t%H0, %2", operands);
28048 output_asm_insn ("ldr\t%0, %1", operands);
28050 else
28052 output_asm_insn ("ldr\t%0, %1", operands);
28053 output_asm_insn ("ldr\t%H0, %2", operands);
28056 break;
28058 case LABEL_REF:
28059 /* With no registers to worry about we can just load the value
28060 directly. */
28061 operands[2] = adjust_address (operands[1], SImode, 4);
28063 output_asm_insn ("ldr\t%H0, %2", operands);
28064 output_asm_insn ("ldr\t%0, %1", operands);
28065 break;
28067 default:
28068 gcc_unreachable ();
28071 return "";
28074 const char *
28075 thumb_output_move_mem_multiple (int n, rtx *operands)
28077 rtx tmp;
28079 switch (n)
28081 case 2:
28082 if (REGNO (operands[4]) > REGNO (operands[5]))
28084 tmp = operands[4];
28085 operands[4] = operands[5];
28086 operands[5] = tmp;
28088 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28089 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28090 break;
28092 case 3:
28093 if (REGNO (operands[4]) > REGNO (operands[5]))
28095 tmp = operands[4];
28096 operands[4] = operands[5];
28097 operands[5] = tmp;
28099 if (REGNO (operands[5]) > REGNO (operands[6]))
28101 tmp = operands[5];
28102 operands[5] = operands[6];
28103 operands[6] = tmp;
28105 if (REGNO (operands[4]) > REGNO (operands[5]))
28107 tmp = operands[4];
28108 operands[4] = operands[5];
28109 operands[5] = tmp;
28112 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28113 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28114 break;
28116 default:
28117 gcc_unreachable ();
28120 return "";
28123 /* Output a call-via instruction for thumb state. */
28124 const char *
28125 thumb_call_via_reg (rtx reg)
28127 int regno = REGNO (reg);
28128 rtx *labelp;
28130 gcc_assert (regno < LR_REGNUM);
28132 /* If we are in the normal text section we can use a single instance
28133 per compilation unit. If we are doing function sections, then we need
28134 an entry per section, since we can't rely on reachability. */
28135 if (in_section == text_section)
28137 thumb_call_reg_needed = 1;
28139 if (thumb_call_via_label[regno] == NULL)
28140 thumb_call_via_label[regno] = gen_label_rtx ();
28141 labelp = thumb_call_via_label + regno;
28143 else
28145 if (cfun->machine->call_via[regno] == NULL)
28146 cfun->machine->call_via[regno] = gen_label_rtx ();
28147 labelp = cfun->machine->call_via + regno;
28150 output_asm_insn ("bl\t%a0", labelp);
28151 return "";
28154 /* Routines for generating rtl. */
28155 void
28156 thumb_expand_movmemqi (rtx *operands)
28158 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28159 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28160 HOST_WIDE_INT len = INTVAL (operands[2]);
28161 HOST_WIDE_INT offset = 0;
28163 while (len >= 12)
28165 emit_insn (gen_movmem12b (out, in, out, in));
28166 len -= 12;
28169 if (len >= 8)
28171 emit_insn (gen_movmem8b (out, in, out, in));
28172 len -= 8;
28175 if (len >= 4)
28177 rtx reg = gen_reg_rtx (SImode);
28178 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28179 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28180 len -= 4;
28181 offset += 4;
28184 if (len >= 2)
28186 rtx reg = gen_reg_rtx (HImode);
28187 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28188 plus_constant (Pmode, in,
28189 offset))));
28190 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28191 offset)),
28192 reg));
28193 len -= 2;
28194 offset += 2;
28197 if (len)
28199 rtx reg = gen_reg_rtx (QImode);
28200 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28201 plus_constant (Pmode, in,
28202 offset))));
28203 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28204 offset)),
28205 reg));
28209 void
28210 thumb_reload_out_hi (rtx *operands)
28212 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28215 /* Handle reading a half-word from memory during reload. */
28216 void
28217 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28219 gcc_unreachable ();
28222 /* Return the length of a function name prefix
28223 that starts with the character 'c'. */
28224 static int
28225 arm_get_strip_length (int c)
28227 switch (c)
28229 ARM_NAME_ENCODING_LENGTHS
28230 default: return 0;
28234 /* Return a pointer to a function's name with any
28235 and all prefix encodings stripped from it. */
28236 const char *
28237 arm_strip_name_encoding (const char *name)
28239 int skip;
28241 while ((skip = arm_get_strip_length (* name)))
28242 name += skip;
28244 return name;
28247 /* If there is a '*' anywhere in the name's prefix, then
28248 emit the stripped name verbatim, otherwise prepend an
28249 underscore if leading underscores are being used. */
28250 void
28251 arm_asm_output_labelref (FILE *stream, const char *name)
28253 int skip;
28254 int verbatim = 0;
28256 while ((skip = arm_get_strip_length (* name)))
28258 verbatim |= (*name == '*');
28259 name += skip;
28262 if (verbatim)
28263 fputs (name, stream);
28264 else
28265 asm_fprintf (stream, "%U%s", name);
28268 /* This function is used to emit an EABI tag and its associated value.
28269 We emit the numerical value of the tag in case the assembler does not
28270 support textual tags. (Eg gas prior to 2.20). If requested we include
28271 the tag name in a comment so that anyone reading the assembler output
28272 will know which tag is being set.
28274 This function is not static because arm-c.c needs it too. */
28276 void
28277 arm_emit_eabi_attribute (const char *name, int num, int val)
28279 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28280 if (flag_verbose_asm || flag_debug_asm)
28281 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28282 asm_fprintf (asm_out_file, "\n");
28285 static void
28286 arm_file_start (void)
28288 int val;
28290 if (TARGET_UNIFIED_ASM)
28291 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28293 if (TARGET_BPABI)
28295 const char *fpu_name;
28296 if (arm_selected_arch)
28298 /* armv7ve doesn't support any extensions. */
28299 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28301 /* Keep backward compatability for assemblers
28302 which don't support armv7ve. */
28303 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28304 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28305 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28306 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28307 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28309 else
28311 const char* pos = strchr (arm_selected_arch->name, '+');
28312 if (pos)
28314 char buf[15];
28315 gcc_assert (strlen (arm_selected_arch->name)
28316 <= sizeof (buf) / sizeof (*pos));
28317 strncpy (buf, arm_selected_arch->name,
28318 (pos - arm_selected_arch->name) * sizeof (*pos));
28319 buf[pos - arm_selected_arch->name] = '\0';
28320 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28321 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28323 else
28324 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28327 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28328 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28329 else
28331 const char* truncated_name
28332 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28333 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28336 if (TARGET_SOFT_FLOAT)
28338 fpu_name = "softvfp";
28340 else
28342 fpu_name = arm_fpu_desc->name;
28343 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28345 if (TARGET_HARD_FLOAT)
28346 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28347 if (TARGET_HARD_FLOAT_ABI)
28348 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28351 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28353 /* Some of these attributes only apply when the corresponding features
28354 are used. However we don't have any easy way of figuring this out.
28355 Conservatively record the setting that would have been used. */
28357 if (flag_rounding_math)
28358 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28360 if (!flag_unsafe_math_optimizations)
28362 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28363 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28365 if (flag_signaling_nans)
28366 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28368 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28369 flag_finite_math_only ? 1 : 3);
28371 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28372 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28373 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28374 flag_short_enums ? 1 : 2);
28376 /* Tag_ABI_optimization_goals. */
28377 if (optimize_size)
28378 val = 4;
28379 else if (optimize >= 2)
28380 val = 2;
28381 else if (optimize)
28382 val = 1;
28383 else
28384 val = 6;
28385 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28387 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28388 unaligned_access);
28390 if (arm_fp16_format)
28391 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28392 (int) arm_fp16_format);
28394 if (arm_lang_output_object_attributes_hook)
28395 arm_lang_output_object_attributes_hook();
28398 default_file_start ();
28401 static void
28402 arm_file_end (void)
28404 int regno;
28406 if (NEED_INDICATE_EXEC_STACK)
28407 /* Add .note.GNU-stack. */
28408 file_end_indicate_exec_stack ();
28410 if (! thumb_call_reg_needed)
28411 return;
28413 switch_to_section (text_section);
28414 asm_fprintf (asm_out_file, "\t.code 16\n");
28415 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28417 for (regno = 0; regno < LR_REGNUM; regno++)
28419 rtx label = thumb_call_via_label[regno];
28421 if (label != 0)
28423 targetm.asm_out.internal_label (asm_out_file, "L",
28424 CODE_LABEL_NUMBER (label));
28425 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28430 #ifndef ARM_PE
28431 /* Symbols in the text segment can be accessed without indirecting via the
28432 constant pool; it may take an extra binary operation, but this is still
28433 faster than indirecting via memory. Don't do this when not optimizing,
28434 since we won't be calculating al of the offsets necessary to do this
28435 simplification. */
28437 static void
28438 arm_encode_section_info (tree decl, rtx rtl, int first)
28440 if (optimize > 0 && TREE_CONSTANT (decl))
28441 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28443 default_encode_section_info (decl, rtl, first);
28445 #endif /* !ARM_PE */
28447 static void
28448 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28450 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28451 && !strcmp (prefix, "L"))
28453 arm_ccfsm_state = 0;
28454 arm_target_insn = NULL;
28456 default_internal_label (stream, prefix, labelno);
28459 /* Output code to add DELTA to the first argument, and then jump
28460 to FUNCTION. Used for C++ multiple inheritance. */
28461 static void
28462 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28463 HOST_WIDE_INT delta,
28464 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28465 tree function)
28467 static int thunk_label = 0;
28468 char label[256];
28469 char labelpc[256];
28470 int mi_delta = delta;
28471 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28472 int shift = 0;
28473 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28474 ? 1 : 0);
28475 if (mi_delta < 0)
28476 mi_delta = - mi_delta;
28478 final_start_function (emit_barrier (), file, 1);
28480 if (TARGET_THUMB1)
28482 int labelno = thunk_label++;
28483 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28484 /* Thunks are entered in arm mode when avaiable. */
28485 if (TARGET_THUMB1_ONLY)
28487 /* push r3 so we can use it as a temporary. */
28488 /* TODO: Omit this save if r3 is not used. */
28489 fputs ("\tpush {r3}\n", file);
28490 fputs ("\tldr\tr3, ", file);
28492 else
28494 fputs ("\tldr\tr12, ", file);
28496 assemble_name (file, label);
28497 fputc ('\n', file);
28498 if (flag_pic)
28500 /* If we are generating PIC, the ldr instruction below loads
28501 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28502 the address of the add + 8, so we have:
28504 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28505 = target + 1.
28507 Note that we have "+ 1" because some versions of GNU ld
28508 don't set the low bit of the result for R_ARM_REL32
28509 relocations against thumb function symbols.
28510 On ARMv6M this is +4, not +8. */
28511 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28512 assemble_name (file, labelpc);
28513 fputs (":\n", file);
28514 if (TARGET_THUMB1_ONLY)
28516 /* This is 2 insns after the start of the thunk, so we know it
28517 is 4-byte aligned. */
28518 fputs ("\tadd\tr3, pc, r3\n", file);
28519 fputs ("\tmov r12, r3\n", file);
28521 else
28522 fputs ("\tadd\tr12, pc, r12\n", file);
28524 else if (TARGET_THUMB1_ONLY)
28525 fputs ("\tmov r12, r3\n", file);
28527 if (TARGET_THUMB1_ONLY)
28529 if (mi_delta > 255)
28531 fputs ("\tldr\tr3, ", file);
28532 assemble_name (file, label);
28533 fputs ("+4\n", file);
28534 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28535 mi_op, this_regno, this_regno);
28537 else if (mi_delta != 0)
28539 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28540 mi_op, this_regno, this_regno,
28541 mi_delta);
28544 else
28546 /* TODO: Use movw/movt for large constants when available. */
28547 while (mi_delta != 0)
28549 if ((mi_delta & (3 << shift)) == 0)
28550 shift += 2;
28551 else
28553 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28554 mi_op, this_regno, this_regno,
28555 mi_delta & (0xff << shift));
28556 mi_delta &= ~(0xff << shift);
28557 shift += 8;
28561 if (TARGET_THUMB1)
28563 if (TARGET_THUMB1_ONLY)
28564 fputs ("\tpop\t{r3}\n", file);
28566 fprintf (file, "\tbx\tr12\n");
28567 ASM_OUTPUT_ALIGN (file, 2);
28568 assemble_name (file, label);
28569 fputs (":\n", file);
28570 if (flag_pic)
28572 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28573 rtx tem = XEXP (DECL_RTL (function), 0);
28574 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28575 pipeline offset is four rather than eight. Adjust the offset
28576 accordingly. */
28577 tem = plus_constant (GET_MODE (tem), tem,
28578 TARGET_THUMB1_ONLY ? -3 : -7);
28579 tem = gen_rtx_MINUS (GET_MODE (tem),
28580 tem,
28581 gen_rtx_SYMBOL_REF (Pmode,
28582 ggc_strdup (labelpc)));
28583 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28585 else
28586 /* Output ".word .LTHUNKn". */
28587 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28589 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28590 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28592 else
28594 fputs ("\tb\t", file);
28595 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28596 if (NEED_PLT_RELOC)
28597 fputs ("(PLT)", file);
28598 fputc ('\n', file);
28601 final_end_function ();
28605 arm_emit_vector_const (FILE *file, rtx x)
28607 int i;
28608 const char * pattern;
28610 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28612 switch (GET_MODE (x))
28614 case V2SImode: pattern = "%08x"; break;
28615 case V4HImode: pattern = "%04x"; break;
28616 case V8QImode: pattern = "%02x"; break;
28617 default: gcc_unreachable ();
28620 fprintf (file, "0x");
28621 for (i = CONST_VECTOR_NUNITS (x); i--;)
28623 rtx element;
28625 element = CONST_VECTOR_ELT (x, i);
28626 fprintf (file, pattern, INTVAL (element));
28629 return 1;
28632 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28633 HFmode constant pool entries are actually loaded with ldr. */
28634 void
28635 arm_emit_fp16_const (rtx c)
28637 REAL_VALUE_TYPE r;
28638 long bits;
28640 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28641 bits = real_to_target (NULL, &r, HFmode);
28642 if (WORDS_BIG_ENDIAN)
28643 assemble_zeros (2);
28644 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28645 if (!WORDS_BIG_ENDIAN)
28646 assemble_zeros (2);
28649 const char *
28650 arm_output_load_gr (rtx *operands)
28652 rtx reg;
28653 rtx offset;
28654 rtx wcgr;
28655 rtx sum;
28657 if (!MEM_P (operands [1])
28658 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28659 || !REG_P (reg = XEXP (sum, 0))
28660 || !CONST_INT_P (offset = XEXP (sum, 1))
28661 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28662 return "wldrw%?\t%0, %1";
28664 /* Fix up an out-of-range load of a GR register. */
28665 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28666 wcgr = operands[0];
28667 operands[0] = reg;
28668 output_asm_insn ("ldr%?\t%0, %1", operands);
28670 operands[0] = wcgr;
28671 operands[1] = reg;
28672 output_asm_insn ("tmcr%?\t%0, %1", operands);
28673 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28675 return "";
28678 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28680 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28681 named arg and all anonymous args onto the stack.
28682 XXX I know the prologue shouldn't be pushing registers, but it is faster
28683 that way. */
28685 static void
28686 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28687 enum machine_mode mode,
28688 tree type,
28689 int *pretend_size,
28690 int second_time ATTRIBUTE_UNUSED)
28692 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28693 int nregs;
28695 cfun->machine->uses_anonymous_args = 1;
28696 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28698 nregs = pcum->aapcs_ncrn;
28699 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28700 nregs++;
28702 else
28703 nregs = pcum->nregs;
28705 if (nregs < NUM_ARG_REGS)
28706 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28709 /* We can't rely on the caller doing the proper promotion when
28710 using APCS or ATPCS. */
28712 static bool
28713 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28715 return !TARGET_AAPCS_BASED;
28718 static enum machine_mode
28719 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28720 enum machine_mode mode,
28721 int *punsignedp ATTRIBUTE_UNUSED,
28722 const_tree fntype ATTRIBUTE_UNUSED,
28723 int for_return ATTRIBUTE_UNUSED)
28725 if (GET_MODE_CLASS (mode) == MODE_INT
28726 && GET_MODE_SIZE (mode) < 4)
28727 return SImode;
28729 return mode;
28732 /* AAPCS based ABIs use short enums by default. */
28734 static bool
28735 arm_default_short_enums (void)
28737 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28741 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28743 static bool
28744 arm_align_anon_bitfield (void)
28746 return TARGET_AAPCS_BASED;
28750 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28752 static tree
28753 arm_cxx_guard_type (void)
28755 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28759 /* The EABI says test the least significant bit of a guard variable. */
28761 static bool
28762 arm_cxx_guard_mask_bit (void)
28764 return TARGET_AAPCS_BASED;
28768 /* The EABI specifies that all array cookies are 8 bytes long. */
28770 static tree
28771 arm_get_cookie_size (tree type)
28773 tree size;
28775 if (!TARGET_AAPCS_BASED)
28776 return default_cxx_get_cookie_size (type);
28778 size = build_int_cst (sizetype, 8);
28779 return size;
28783 /* The EABI says that array cookies should also contain the element size. */
28785 static bool
28786 arm_cookie_has_size (void)
28788 return TARGET_AAPCS_BASED;
28792 /* The EABI says constructors and destructors should return a pointer to
28793 the object constructed/destroyed. */
28795 static bool
28796 arm_cxx_cdtor_returns_this (void)
28798 return TARGET_AAPCS_BASED;
28801 /* The EABI says that an inline function may never be the key
28802 method. */
28804 static bool
28805 arm_cxx_key_method_may_be_inline (void)
28807 return !TARGET_AAPCS_BASED;
28810 static void
28811 arm_cxx_determine_class_data_visibility (tree decl)
28813 if (!TARGET_AAPCS_BASED
28814 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28815 return;
28817 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28818 is exported. However, on systems without dynamic vague linkage,
28819 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28820 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28821 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28822 else
28823 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28824 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28827 static bool
28828 arm_cxx_class_data_always_comdat (void)
28830 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28831 vague linkage if the class has no key function. */
28832 return !TARGET_AAPCS_BASED;
28836 /* The EABI says __aeabi_atexit should be used to register static
28837 destructors. */
28839 static bool
28840 arm_cxx_use_aeabi_atexit (void)
28842 return TARGET_AAPCS_BASED;
28846 void
28847 arm_set_return_address (rtx source, rtx scratch)
28849 arm_stack_offsets *offsets;
28850 HOST_WIDE_INT delta;
28851 rtx addr;
28852 unsigned long saved_regs;
28854 offsets = arm_get_frame_offsets ();
28855 saved_regs = offsets->saved_regs_mask;
28857 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28858 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28859 else
28861 if (frame_pointer_needed)
28862 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28863 else
28865 /* LR will be the first saved register. */
28866 delta = offsets->outgoing_args - (offsets->frame + 4);
28869 if (delta >= 4096)
28871 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28872 GEN_INT (delta & ~4095)));
28873 addr = scratch;
28874 delta &= 4095;
28876 else
28877 addr = stack_pointer_rtx;
28879 addr = plus_constant (Pmode, addr, delta);
28881 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28886 void
28887 thumb_set_return_address (rtx source, rtx scratch)
28889 arm_stack_offsets *offsets;
28890 HOST_WIDE_INT delta;
28891 HOST_WIDE_INT limit;
28892 int reg;
28893 rtx addr;
28894 unsigned long mask;
28896 emit_use (source);
28898 offsets = arm_get_frame_offsets ();
28899 mask = offsets->saved_regs_mask;
28900 if (mask & (1 << LR_REGNUM))
28902 limit = 1024;
28903 /* Find the saved regs. */
28904 if (frame_pointer_needed)
28906 delta = offsets->soft_frame - offsets->saved_args;
28907 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28908 if (TARGET_THUMB1)
28909 limit = 128;
28911 else
28913 delta = offsets->outgoing_args - offsets->saved_args;
28914 reg = SP_REGNUM;
28916 /* Allow for the stack frame. */
28917 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28918 delta -= 16;
28919 /* The link register is always the first saved register. */
28920 delta -= 4;
28922 /* Construct the address. */
28923 addr = gen_rtx_REG (SImode, reg);
28924 if (delta > limit)
28926 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28927 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28928 addr = scratch;
28930 else
28931 addr = plus_constant (Pmode, addr, delta);
28933 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28935 else
28936 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28939 /* Implements target hook vector_mode_supported_p. */
28940 bool
28941 arm_vector_mode_supported_p (enum machine_mode mode)
28943 /* Neon also supports V2SImode, etc. listed in the clause below. */
28944 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28945 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28946 return true;
28948 if ((TARGET_NEON || TARGET_IWMMXT)
28949 && ((mode == V2SImode)
28950 || (mode == V4HImode)
28951 || (mode == V8QImode)))
28952 return true;
28954 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28955 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28956 || mode == V2HAmode))
28957 return true;
28959 return false;
28962 /* Implements target hook array_mode_supported_p. */
28964 static bool
28965 arm_array_mode_supported_p (enum machine_mode mode,
28966 unsigned HOST_WIDE_INT nelems)
28968 if (TARGET_NEON
28969 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28970 && (nelems >= 2 && nelems <= 4))
28971 return true;
28973 return false;
28976 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28977 registers when autovectorizing for Neon, at least until multiple vector
28978 widths are supported properly by the middle-end. */
28980 static enum machine_mode
28981 arm_preferred_simd_mode (enum machine_mode mode)
28983 if (TARGET_NEON)
28984 switch (mode)
28986 case SFmode:
28987 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28988 case SImode:
28989 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28990 case HImode:
28991 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28992 case QImode:
28993 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28994 case DImode:
28995 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28996 return V2DImode;
28997 break;
28999 default:;
29002 if (TARGET_REALLY_IWMMXT)
29003 switch (mode)
29005 case SImode:
29006 return V2SImode;
29007 case HImode:
29008 return V4HImode;
29009 case QImode:
29010 return V8QImode;
29012 default:;
29015 return word_mode;
29018 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29020 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29021 using r0-r4 for function arguments, r7 for the stack frame and don't have
29022 enough left over to do doubleword arithmetic. For Thumb-2 all the
29023 potentially problematic instructions accept high registers so this is not
29024 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29025 that require many low registers. */
29026 static bool
29027 arm_class_likely_spilled_p (reg_class_t rclass)
29029 if ((TARGET_THUMB1 && rclass == LO_REGS)
29030 || rclass == CC_REG)
29031 return true;
29033 return false;
29036 /* Implements target hook small_register_classes_for_mode_p. */
29037 bool
29038 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
29040 return TARGET_THUMB1;
29043 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29044 ARM insns and therefore guarantee that the shift count is modulo 256.
29045 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29046 guarantee no particular behavior for out-of-range counts. */
29048 static unsigned HOST_WIDE_INT
29049 arm_shift_truncation_mask (enum machine_mode mode)
29051 return mode == SImode ? 255 : 0;
29055 /* Map internal gcc register numbers to DWARF2 register numbers. */
29057 unsigned int
29058 arm_dbx_register_number (unsigned int regno)
29060 if (regno < 16)
29061 return regno;
29063 if (IS_VFP_REGNUM (regno))
29065 /* See comment in arm_dwarf_register_span. */
29066 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29067 return 64 + regno - FIRST_VFP_REGNUM;
29068 else
29069 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29072 if (IS_IWMMXT_GR_REGNUM (regno))
29073 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29075 if (IS_IWMMXT_REGNUM (regno))
29076 return 112 + regno - FIRST_IWMMXT_REGNUM;
29078 gcc_unreachable ();
29081 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29082 GCC models tham as 64 32-bit registers, so we need to describe this to
29083 the DWARF generation code. Other registers can use the default. */
29084 static rtx
29085 arm_dwarf_register_span (rtx rtl)
29087 enum machine_mode mode;
29088 unsigned regno;
29089 rtx parts[16];
29090 int nregs;
29091 int i;
29093 regno = REGNO (rtl);
29094 if (!IS_VFP_REGNUM (regno))
29095 return NULL_RTX;
29097 /* XXX FIXME: The EABI defines two VFP register ranges:
29098 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29099 256-287: D0-D31
29100 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29101 corresponding D register. Until GDB supports this, we shall use the
29102 legacy encodings. We also use these encodings for D0-D15 for
29103 compatibility with older debuggers. */
29104 mode = GET_MODE (rtl);
29105 if (GET_MODE_SIZE (mode) < 8)
29106 return NULL_RTX;
29108 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29110 nregs = GET_MODE_SIZE (mode) / 4;
29111 for (i = 0; i < nregs; i += 2)
29112 if (TARGET_BIG_END)
29114 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29115 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29117 else
29119 parts[i] = gen_rtx_REG (SImode, regno + i);
29120 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29123 else
29125 nregs = GET_MODE_SIZE (mode) / 8;
29126 for (i = 0; i < nregs; i++)
29127 parts[i] = gen_rtx_REG (DImode, regno + i);
29130 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29133 #if ARM_UNWIND_INFO
29134 /* Emit unwind directives for a store-multiple instruction or stack pointer
29135 push during alignment.
29136 These should only ever be generated by the function prologue code, so
29137 expect them to have a particular form.
29138 The store-multiple instruction sometimes pushes pc as the last register,
29139 although it should not be tracked into unwind information, or for -Os
29140 sometimes pushes some dummy registers before first register that needs
29141 to be tracked in unwind information; such dummy registers are there just
29142 to avoid separate stack adjustment, and will not be restored in the
29143 epilogue. */
29145 static void
29146 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29148 int i;
29149 HOST_WIDE_INT offset;
29150 HOST_WIDE_INT nregs;
29151 int reg_size;
29152 unsigned reg;
29153 unsigned lastreg;
29154 unsigned padfirst = 0, padlast = 0;
29155 rtx e;
29157 e = XVECEXP (p, 0, 0);
29158 gcc_assert (GET_CODE (e) == SET);
29160 /* First insn will adjust the stack pointer. */
29161 gcc_assert (GET_CODE (e) == SET
29162 && REG_P (SET_DEST (e))
29163 && REGNO (SET_DEST (e)) == SP_REGNUM
29164 && GET_CODE (SET_SRC (e)) == PLUS);
29166 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29167 nregs = XVECLEN (p, 0) - 1;
29168 gcc_assert (nregs);
29170 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29171 if (reg < 16)
29173 /* For -Os dummy registers can be pushed at the beginning to
29174 avoid separate stack pointer adjustment. */
29175 e = XVECEXP (p, 0, 1);
29176 e = XEXP (SET_DEST (e), 0);
29177 if (GET_CODE (e) == PLUS)
29178 padfirst = INTVAL (XEXP (e, 1));
29179 gcc_assert (padfirst == 0 || optimize_size);
29180 /* The function prologue may also push pc, but not annotate it as it is
29181 never restored. We turn this into a stack pointer adjustment. */
29182 e = XVECEXP (p, 0, nregs);
29183 e = XEXP (SET_DEST (e), 0);
29184 if (GET_CODE (e) == PLUS)
29185 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29186 else
29187 padlast = offset - 4;
29188 gcc_assert (padlast == 0 || padlast == 4);
29189 if (padlast == 4)
29190 fprintf (asm_out_file, "\t.pad #4\n");
29191 reg_size = 4;
29192 fprintf (asm_out_file, "\t.save {");
29194 else if (IS_VFP_REGNUM (reg))
29196 reg_size = 8;
29197 fprintf (asm_out_file, "\t.vsave {");
29199 else
29200 /* Unknown register type. */
29201 gcc_unreachable ();
29203 /* If the stack increment doesn't match the size of the saved registers,
29204 something has gone horribly wrong. */
29205 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29207 offset = padfirst;
29208 lastreg = 0;
29209 /* The remaining insns will describe the stores. */
29210 for (i = 1; i <= nregs; i++)
29212 /* Expect (set (mem <addr>) (reg)).
29213 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29214 e = XVECEXP (p, 0, i);
29215 gcc_assert (GET_CODE (e) == SET
29216 && MEM_P (SET_DEST (e))
29217 && REG_P (SET_SRC (e)));
29219 reg = REGNO (SET_SRC (e));
29220 gcc_assert (reg >= lastreg);
29222 if (i != 1)
29223 fprintf (asm_out_file, ", ");
29224 /* We can't use %r for vfp because we need to use the
29225 double precision register names. */
29226 if (IS_VFP_REGNUM (reg))
29227 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29228 else
29229 asm_fprintf (asm_out_file, "%r", reg);
29231 #ifdef ENABLE_CHECKING
29232 /* Check that the addresses are consecutive. */
29233 e = XEXP (SET_DEST (e), 0);
29234 if (GET_CODE (e) == PLUS)
29235 gcc_assert (REG_P (XEXP (e, 0))
29236 && REGNO (XEXP (e, 0)) == SP_REGNUM
29237 && CONST_INT_P (XEXP (e, 1))
29238 && offset == INTVAL (XEXP (e, 1)));
29239 else
29240 gcc_assert (i == 1
29241 && REG_P (e)
29242 && REGNO (e) == SP_REGNUM);
29243 offset += reg_size;
29244 #endif
29246 fprintf (asm_out_file, "}\n");
29247 if (padfirst)
29248 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29251 /* Emit unwind directives for a SET. */
29253 static void
29254 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29256 rtx e0;
29257 rtx e1;
29258 unsigned reg;
29260 e0 = XEXP (p, 0);
29261 e1 = XEXP (p, 1);
29262 switch (GET_CODE (e0))
29264 case MEM:
29265 /* Pushing a single register. */
29266 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29267 || !REG_P (XEXP (XEXP (e0, 0), 0))
29268 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29269 abort ();
29271 asm_fprintf (asm_out_file, "\t.save ");
29272 if (IS_VFP_REGNUM (REGNO (e1)))
29273 asm_fprintf(asm_out_file, "{d%d}\n",
29274 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29275 else
29276 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29277 break;
29279 case REG:
29280 if (REGNO (e0) == SP_REGNUM)
29282 /* A stack increment. */
29283 if (GET_CODE (e1) != PLUS
29284 || !REG_P (XEXP (e1, 0))
29285 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29286 || !CONST_INT_P (XEXP (e1, 1)))
29287 abort ();
29289 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29290 -INTVAL (XEXP (e1, 1)));
29292 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29294 HOST_WIDE_INT offset;
29296 if (GET_CODE (e1) == PLUS)
29298 if (!REG_P (XEXP (e1, 0))
29299 || !CONST_INT_P (XEXP (e1, 1)))
29300 abort ();
29301 reg = REGNO (XEXP (e1, 0));
29302 offset = INTVAL (XEXP (e1, 1));
29303 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29304 HARD_FRAME_POINTER_REGNUM, reg,
29305 offset);
29307 else if (REG_P (e1))
29309 reg = REGNO (e1);
29310 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29311 HARD_FRAME_POINTER_REGNUM, reg);
29313 else
29314 abort ();
29316 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29318 /* Move from sp to reg. */
29319 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29321 else if (GET_CODE (e1) == PLUS
29322 && REG_P (XEXP (e1, 0))
29323 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29324 && CONST_INT_P (XEXP (e1, 1)))
29326 /* Set reg to offset from sp. */
29327 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29328 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29330 else
29331 abort ();
29332 break;
29334 default:
29335 abort ();
29340 /* Emit unwind directives for the given insn. */
29342 static void
29343 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29345 rtx note, pat;
29346 bool handled_one = false;
29348 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29349 return;
29351 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29352 && (TREE_NOTHROW (current_function_decl)
29353 || crtl->all_throwers_are_sibcalls))
29354 return;
29356 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29357 return;
29359 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29361 switch (REG_NOTE_KIND (note))
29363 case REG_FRAME_RELATED_EXPR:
29364 pat = XEXP (note, 0);
29365 goto found;
29367 case REG_CFA_REGISTER:
29368 pat = XEXP (note, 0);
29369 if (pat == NULL)
29371 pat = PATTERN (insn);
29372 if (GET_CODE (pat) == PARALLEL)
29373 pat = XVECEXP (pat, 0, 0);
29376 /* Only emitted for IS_STACKALIGN re-alignment. */
29378 rtx dest, src;
29379 unsigned reg;
29381 src = SET_SRC (pat);
29382 dest = SET_DEST (pat);
29384 gcc_assert (src == stack_pointer_rtx);
29385 reg = REGNO (dest);
29386 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29387 reg + 0x90, reg);
29389 handled_one = true;
29390 break;
29392 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29393 to get correct dwarf information for shrink-wrap. We should not
29394 emit unwind information for it because these are used either for
29395 pretend arguments or notes to adjust sp and restore registers from
29396 stack. */
29397 case REG_CFA_DEF_CFA:
29398 case REG_CFA_ADJUST_CFA:
29399 case REG_CFA_RESTORE:
29400 return;
29402 case REG_CFA_EXPRESSION:
29403 case REG_CFA_OFFSET:
29404 /* ??? Only handling here what we actually emit. */
29405 gcc_unreachable ();
29407 default:
29408 break;
29411 if (handled_one)
29412 return;
29413 pat = PATTERN (insn);
29414 found:
29416 switch (GET_CODE (pat))
29418 case SET:
29419 arm_unwind_emit_set (asm_out_file, pat);
29420 break;
29422 case SEQUENCE:
29423 /* Store multiple. */
29424 arm_unwind_emit_sequence (asm_out_file, pat);
29425 break;
29427 default:
29428 abort();
29433 /* Output a reference from a function exception table to the type_info
29434 object X. The EABI specifies that the symbol should be relocated by
29435 an R_ARM_TARGET2 relocation. */
29437 static bool
29438 arm_output_ttype (rtx x)
29440 fputs ("\t.word\t", asm_out_file);
29441 output_addr_const (asm_out_file, x);
29442 /* Use special relocations for symbol references. */
29443 if (!CONST_INT_P (x))
29444 fputs ("(TARGET2)", asm_out_file);
29445 fputc ('\n', asm_out_file);
29447 return TRUE;
29450 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29452 static void
29453 arm_asm_emit_except_personality (rtx personality)
29455 fputs ("\t.personality\t", asm_out_file);
29456 output_addr_const (asm_out_file, personality);
29457 fputc ('\n', asm_out_file);
29460 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29462 static void
29463 arm_asm_init_sections (void)
29465 exception_section = get_unnamed_section (0, output_section_asm_op,
29466 "\t.handlerdata");
29468 #endif /* ARM_UNWIND_INFO */
29470 /* Output unwind directives for the start/end of a function. */
29472 void
29473 arm_output_fn_unwind (FILE * f, bool prologue)
29475 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29476 return;
29478 if (prologue)
29479 fputs ("\t.fnstart\n", f);
29480 else
29482 /* If this function will never be unwound, then mark it as such.
29483 The came condition is used in arm_unwind_emit to suppress
29484 the frame annotations. */
29485 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29486 && (TREE_NOTHROW (current_function_decl)
29487 || crtl->all_throwers_are_sibcalls))
29488 fputs("\t.cantunwind\n", f);
29490 fputs ("\t.fnend\n", f);
29494 static bool
29495 arm_emit_tls_decoration (FILE *fp, rtx x)
29497 enum tls_reloc reloc;
29498 rtx val;
29500 val = XVECEXP (x, 0, 0);
29501 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29503 output_addr_const (fp, val);
29505 switch (reloc)
29507 case TLS_GD32:
29508 fputs ("(tlsgd)", fp);
29509 break;
29510 case TLS_LDM32:
29511 fputs ("(tlsldm)", fp);
29512 break;
29513 case TLS_LDO32:
29514 fputs ("(tlsldo)", fp);
29515 break;
29516 case TLS_IE32:
29517 fputs ("(gottpoff)", fp);
29518 break;
29519 case TLS_LE32:
29520 fputs ("(tpoff)", fp);
29521 break;
29522 case TLS_DESCSEQ:
29523 fputs ("(tlsdesc)", fp);
29524 break;
29525 default:
29526 gcc_unreachable ();
29529 switch (reloc)
29531 case TLS_GD32:
29532 case TLS_LDM32:
29533 case TLS_IE32:
29534 case TLS_DESCSEQ:
29535 fputs (" + (. - ", fp);
29536 output_addr_const (fp, XVECEXP (x, 0, 2));
29537 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29538 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29539 output_addr_const (fp, XVECEXP (x, 0, 3));
29540 fputc (')', fp);
29541 break;
29542 default:
29543 break;
29546 return TRUE;
29549 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29551 static void
29552 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29554 gcc_assert (size == 4);
29555 fputs ("\t.word\t", file);
29556 output_addr_const (file, x);
29557 fputs ("(tlsldo)", file);
29560 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29562 static bool
29563 arm_output_addr_const_extra (FILE *fp, rtx x)
29565 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29566 return arm_emit_tls_decoration (fp, x);
29567 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29569 char label[256];
29570 int labelno = INTVAL (XVECEXP (x, 0, 0));
29572 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29573 assemble_name_raw (fp, label);
29575 return TRUE;
29577 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29579 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29580 if (GOT_PCREL)
29581 fputs ("+.", fp);
29582 fputs ("-(", fp);
29583 output_addr_const (fp, XVECEXP (x, 0, 0));
29584 fputc (')', fp);
29585 return TRUE;
29587 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29589 output_addr_const (fp, XVECEXP (x, 0, 0));
29590 if (GOT_PCREL)
29591 fputs ("+.", fp);
29592 fputs ("-(", fp);
29593 output_addr_const (fp, XVECEXP (x, 0, 1));
29594 fputc (')', fp);
29595 return TRUE;
29597 else if (GET_CODE (x) == CONST_VECTOR)
29598 return arm_emit_vector_const (fp, x);
29600 return FALSE;
29603 /* Output assembly for a shift instruction.
29604 SET_FLAGS determines how the instruction modifies the condition codes.
29605 0 - Do not set condition codes.
29606 1 - Set condition codes.
29607 2 - Use smallest instruction. */
29608 const char *
29609 arm_output_shift(rtx * operands, int set_flags)
29611 char pattern[100];
29612 static const char flag_chars[3] = {'?', '.', '!'};
29613 const char *shift;
29614 HOST_WIDE_INT val;
29615 char c;
29617 c = flag_chars[set_flags];
29618 if (TARGET_UNIFIED_ASM)
29620 shift = shift_op(operands[3], &val);
29621 if (shift)
29623 if (val != -1)
29624 operands[2] = GEN_INT(val);
29625 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29627 else
29628 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29630 else
29631 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29632 output_asm_insn (pattern, operands);
29633 return "";
29636 /* Output assembly for a WMMX immediate shift instruction. */
29637 const char *
29638 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29640 int shift = INTVAL (operands[2]);
29641 char templ[50];
29642 enum machine_mode opmode = GET_MODE (operands[0]);
29644 gcc_assert (shift >= 0);
29646 /* If the shift value in the register versions is > 63 (for D qualifier),
29647 31 (for W qualifier) or 15 (for H qualifier). */
29648 if (((opmode == V4HImode) && (shift > 15))
29649 || ((opmode == V2SImode) && (shift > 31))
29650 || ((opmode == DImode) && (shift > 63)))
29652 if (wror_or_wsra)
29654 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29655 output_asm_insn (templ, operands);
29656 if (opmode == DImode)
29658 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29659 output_asm_insn (templ, operands);
29662 else
29664 /* The destination register will contain all zeros. */
29665 sprintf (templ, "wzero\t%%0");
29666 output_asm_insn (templ, operands);
29668 return "";
29671 if ((opmode == DImode) && (shift > 32))
29673 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29674 output_asm_insn (templ, operands);
29675 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29676 output_asm_insn (templ, operands);
29678 else
29680 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29681 output_asm_insn (templ, operands);
29683 return "";
29686 /* Output assembly for a WMMX tinsr instruction. */
29687 const char *
29688 arm_output_iwmmxt_tinsr (rtx *operands)
29690 int mask = INTVAL (operands[3]);
29691 int i;
29692 char templ[50];
29693 int units = mode_nunits[GET_MODE (operands[0])];
29694 gcc_assert ((mask & (mask - 1)) == 0);
29695 for (i = 0; i < units; ++i)
29697 if ((mask & 0x01) == 1)
29699 break;
29701 mask >>= 1;
29703 gcc_assert (i < units);
29705 switch (GET_MODE (operands[0]))
29707 case V8QImode:
29708 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29709 break;
29710 case V4HImode:
29711 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29712 break;
29713 case V2SImode:
29714 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29715 break;
29716 default:
29717 gcc_unreachable ();
29718 break;
29720 output_asm_insn (templ, operands);
29722 return "";
29725 /* Output a Thumb-1 casesi dispatch sequence. */
29726 const char *
29727 thumb1_output_casesi (rtx *operands)
29729 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29731 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29733 switch (GET_MODE(diff_vec))
29735 case QImode:
29736 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29737 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29738 case HImode:
29739 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29740 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29741 case SImode:
29742 return "bl\t%___gnu_thumb1_case_si";
29743 default:
29744 gcc_unreachable ();
29748 /* Output a Thumb-2 casesi instruction. */
29749 const char *
29750 thumb2_output_casesi (rtx *operands)
29752 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29754 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29756 output_asm_insn ("cmp\t%0, %1", operands);
29757 output_asm_insn ("bhi\t%l3", operands);
29758 switch (GET_MODE(diff_vec))
29760 case QImode:
29761 return "tbb\t[%|pc, %0]";
29762 case HImode:
29763 return "tbh\t[%|pc, %0, lsl #1]";
29764 case SImode:
29765 if (flag_pic)
29767 output_asm_insn ("adr\t%4, %l2", operands);
29768 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29769 output_asm_insn ("add\t%4, %4, %5", operands);
29770 return "bx\t%4";
29772 else
29774 output_asm_insn ("adr\t%4, %l2", operands);
29775 return "ldr\t%|pc, [%4, %0, lsl #2]";
29777 default:
29778 gcc_unreachable ();
29782 /* Most ARM cores are single issue, but some newer ones can dual issue.
29783 The scheduler descriptions rely on this being correct. */
29784 static int
29785 arm_issue_rate (void)
29787 switch (arm_tune)
29789 case cortexa15:
29790 case cortexa57:
29791 return 3;
29793 case cortexr4:
29794 case cortexr4f:
29795 case cortexr5:
29796 case genericv7a:
29797 case cortexa5:
29798 case cortexa7:
29799 case cortexa8:
29800 case cortexa9:
29801 case cortexa12:
29802 case cortexa53:
29803 case fa726te:
29804 case marvell_pj4:
29805 return 2;
29807 default:
29808 return 1;
29812 /* A table and a function to perform ARM-specific name mangling for
29813 NEON vector types in order to conform to the AAPCS (see "Procedure
29814 Call Standard for the ARM Architecture", Appendix A). To qualify
29815 for emission with the mangled names defined in that document, a
29816 vector type must not only be of the correct mode but also be
29817 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29818 typedef struct
29820 enum machine_mode mode;
29821 const char *element_type_name;
29822 const char *aapcs_name;
29823 } arm_mangle_map_entry;
29825 static arm_mangle_map_entry arm_mangle_map[] = {
29826 /* 64-bit containerized types. */
29827 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29828 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29829 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29830 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29831 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29832 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29833 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29834 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29835 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29836 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29838 /* 128-bit containerized types. */
29839 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29840 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29841 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29842 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29843 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29844 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29845 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29846 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29847 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29848 { VOIDmode, NULL, NULL }
29851 const char *
29852 arm_mangle_type (const_tree type)
29854 arm_mangle_map_entry *pos = arm_mangle_map;
29856 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29857 has to be managled as if it is in the "std" namespace. */
29858 if (TARGET_AAPCS_BASED
29859 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29860 return "St9__va_list";
29862 /* Half-precision float. */
29863 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29864 return "Dh";
29866 if (TREE_CODE (type) != VECTOR_TYPE)
29867 return NULL;
29869 /* Check the mode of the vector type, and the name of the vector
29870 element type, against the table. */
29871 while (pos->mode != VOIDmode)
29873 tree elt_type = TREE_TYPE (type);
29875 if (pos->mode == TYPE_MODE (type)
29876 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29877 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29878 pos->element_type_name))
29879 return pos->aapcs_name;
29881 pos++;
29884 /* Use the default mangling for unrecognized (possibly user-defined)
29885 vector types. */
29886 return NULL;
29889 /* Order of allocation of core registers for Thumb: this allocation is
29890 written over the corresponding initial entries of the array
29891 initialized with REG_ALLOC_ORDER. We allocate all low registers
29892 first. Saving and restoring a low register is usually cheaper than
29893 using a call-clobbered high register. */
29895 static const int thumb_core_reg_alloc_order[] =
29897 3, 2, 1, 0, 4, 5, 6, 7,
29898 14, 12, 8, 9, 10, 11
29901 /* Adjust register allocation order when compiling for Thumb. */
29903 void
29904 arm_order_regs_for_local_alloc (void)
29906 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29907 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29908 if (TARGET_THUMB)
29909 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29910 sizeof (thumb_core_reg_alloc_order));
29913 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29915 bool
29916 arm_frame_pointer_required (void)
29918 return (cfun->has_nonlocal_label
29919 || SUBTARGET_FRAME_POINTER_REQUIRED
29920 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29923 /* Only thumb1 can't support conditional execution, so return true if
29924 the target is not thumb1. */
29925 static bool
29926 arm_have_conditional_execution (void)
29928 return !TARGET_THUMB1;
29931 tree
29932 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29934 enum machine_mode in_mode, out_mode;
29935 int in_n, out_n;
29937 if (TREE_CODE (type_out) != VECTOR_TYPE
29938 || TREE_CODE (type_in) != VECTOR_TYPE)
29939 return NULL_TREE;
29941 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29942 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29943 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29944 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29946 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29947 decl of the vectorized builtin for the appropriate vector mode.
29948 NULL_TREE is returned if no such builtin is available. */
29949 #undef ARM_CHECK_BUILTIN_MODE
29950 #define ARM_CHECK_BUILTIN_MODE(C) \
29951 (TARGET_NEON && TARGET_FPU_ARMV8 \
29952 && flag_unsafe_math_optimizations \
29953 && ARM_CHECK_BUILTIN_MODE_1 (C))
29955 #undef ARM_CHECK_BUILTIN_MODE_1
29956 #define ARM_CHECK_BUILTIN_MODE_1(C) \
29957 (out_mode == SFmode && out_n == C \
29958 && in_mode == SFmode && in_n == C)
29960 #undef ARM_FIND_VRINT_VARIANT
29961 #define ARM_FIND_VRINT_VARIANT(N) \
29962 (ARM_CHECK_BUILTIN_MODE (2) \
29963 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29964 : (ARM_CHECK_BUILTIN_MODE (4) \
29965 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29966 : NULL_TREE))
29968 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29970 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29971 switch (fn)
29973 case BUILT_IN_FLOORF:
29974 return ARM_FIND_VRINT_VARIANT (vrintm);
29975 case BUILT_IN_CEILF:
29976 return ARM_FIND_VRINT_VARIANT (vrintp);
29977 case BUILT_IN_TRUNCF:
29978 return ARM_FIND_VRINT_VARIANT (vrintz);
29979 case BUILT_IN_ROUNDF:
29980 return ARM_FIND_VRINT_VARIANT (vrinta);
29981 #undef ARM_CHECK_BUILTIN_MODE
29982 #define ARM_CHECK_BUILTIN_MODE(C, N) \
29983 (out_mode == N##Imode && out_n == C \
29984 && in_mode == N##Imode && in_n == C)
29985 case BUILT_IN_BSWAP16:
29986 if (ARM_CHECK_BUILTIN_MODE (4, H))
29987 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
29988 else if (ARM_CHECK_BUILTIN_MODE (8, H))
29989 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
29990 else
29991 return NULL_TREE;
29992 case BUILT_IN_BSWAP32:
29993 if (ARM_CHECK_BUILTIN_MODE (2, S))
29994 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
29995 else if (ARM_CHECK_BUILTIN_MODE (4, S))
29996 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
29997 else
29998 return NULL_TREE;
29999 case BUILT_IN_BSWAP64:
30000 if (ARM_CHECK_BUILTIN_MODE (2, D))
30001 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30002 else
30003 return NULL_TREE;
30005 default:
30006 return NULL_TREE;
30009 return NULL_TREE;
30011 #undef ARM_CHECK_BUILTIN_MODE
30012 #undef ARM_FIND_VRINT_VARIANT
30014 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30015 static HOST_WIDE_INT
30016 arm_vector_alignment (const_tree type)
30018 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30020 if (TARGET_AAPCS_BASED)
30021 align = MIN (align, 64);
30023 return align;
30026 static unsigned int
30027 arm_autovectorize_vector_sizes (void)
30029 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30032 static bool
30033 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30035 /* Vectors which aren't in packed structures will not be less aligned than
30036 the natural alignment of their element type, so this is safe. */
30037 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30038 return !is_packed;
30040 return default_builtin_vector_alignment_reachable (type, is_packed);
30043 static bool
30044 arm_builtin_support_vector_misalignment (enum machine_mode mode,
30045 const_tree type, int misalignment,
30046 bool is_packed)
30048 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30050 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30052 if (is_packed)
30053 return align == 1;
30055 /* If the misalignment is unknown, we should be able to handle the access
30056 so long as it is not to a member of a packed data structure. */
30057 if (misalignment == -1)
30058 return true;
30060 /* Return true if the misalignment is a multiple of the natural alignment
30061 of the vector's element type. This is probably always going to be
30062 true in practice, since we've already established that this isn't a
30063 packed access. */
30064 return ((misalignment % align) == 0);
30067 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30068 is_packed);
30071 static void
30072 arm_conditional_register_usage (void)
30074 int regno;
30076 if (TARGET_THUMB1 && optimize_size)
30078 /* When optimizing for size on Thumb-1, it's better not
30079 to use the HI regs, because of the overhead of
30080 stacking them. */
30081 for (regno = FIRST_HI_REGNUM;
30082 regno <= LAST_HI_REGNUM; ++regno)
30083 fixed_regs[regno] = call_used_regs[regno] = 1;
30086 /* The link register can be clobbered by any branch insn,
30087 but we have no way to track that at present, so mark
30088 it as unavailable. */
30089 if (TARGET_THUMB1)
30090 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30092 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30094 /* VFPv3 registers are disabled when earlier VFP
30095 versions are selected due to the definition of
30096 LAST_VFP_REGNUM. */
30097 for (regno = FIRST_VFP_REGNUM;
30098 regno <= LAST_VFP_REGNUM; ++ regno)
30100 fixed_regs[regno] = 0;
30101 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30102 || regno >= FIRST_VFP_REGNUM + 32;
30106 if (TARGET_REALLY_IWMMXT)
30108 regno = FIRST_IWMMXT_GR_REGNUM;
30109 /* The 2002/10/09 revision of the XScale ABI has wCG0
30110 and wCG1 as call-preserved registers. The 2002/11/21
30111 revision changed this so that all wCG registers are
30112 scratch registers. */
30113 for (regno = FIRST_IWMMXT_GR_REGNUM;
30114 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30115 fixed_regs[regno] = 0;
30116 /* The XScale ABI has wR0 - wR9 as scratch registers,
30117 the rest as call-preserved registers. */
30118 for (regno = FIRST_IWMMXT_REGNUM;
30119 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30121 fixed_regs[regno] = 0;
30122 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30126 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30128 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30129 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30131 else if (TARGET_APCS_STACK)
30133 fixed_regs[10] = 1;
30134 call_used_regs[10] = 1;
30136 /* -mcaller-super-interworking reserves r11 for calls to
30137 _interwork_r11_call_via_rN(). Making the register global
30138 is an easy way of ensuring that it remains valid for all
30139 calls. */
30140 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30141 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30143 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30144 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30145 if (TARGET_CALLER_INTERWORKING)
30146 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30148 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30151 static reg_class_t
30152 arm_preferred_rename_class (reg_class_t rclass)
30154 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30155 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30156 and code size can be reduced. */
30157 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30158 return LO_REGS;
30159 else
30160 return NO_REGS;
30163 /* Compute the atrribute "length" of insn "*push_multi".
30164 So this function MUST be kept in sync with that insn pattern. */
30166 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30168 int i, regno, hi_reg;
30169 int num_saves = XVECLEN (parallel_op, 0);
30171 /* ARM mode. */
30172 if (TARGET_ARM)
30173 return 4;
30174 /* Thumb1 mode. */
30175 if (TARGET_THUMB1)
30176 return 2;
30178 /* Thumb2 mode. */
30179 regno = REGNO (first_op);
30180 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30181 for (i = 1; i < num_saves && !hi_reg; i++)
30183 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30184 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30187 if (!hi_reg)
30188 return 2;
30189 return 4;
30192 /* Compute the number of instructions emitted by output_move_double. */
30194 arm_count_output_move_double_insns (rtx *operands)
30196 int count;
30197 rtx ops[2];
30198 /* output_move_double may modify the operands array, so call it
30199 here on a copy of the array. */
30200 ops[0] = operands[0];
30201 ops[1] = operands[1];
30202 output_move_double (ops, false, &count);
30203 return count;
30207 vfp3_const_double_for_fract_bits (rtx operand)
30209 REAL_VALUE_TYPE r0;
30211 if (!CONST_DOUBLE_P (operand))
30212 return 0;
30214 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30215 if (exact_real_inverse (DFmode, &r0))
30217 if (exact_real_truncate (DFmode, &r0))
30219 HOST_WIDE_INT value = real_to_integer (&r0);
30220 value = value & 0xffffffff;
30221 if ((value != 0) && ( (value & (value - 1)) == 0))
30222 return int_log2 (value);
30225 return 0;
30229 vfp3_const_double_for_bits (rtx operand)
30231 REAL_VALUE_TYPE r0;
30233 if (!CONST_DOUBLE_P (operand))
30234 return 0;
30236 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30237 if (exact_real_truncate (DFmode, &r0))
30239 HOST_WIDE_INT value = real_to_integer (&r0);
30240 value = value & 0xffffffff;
30241 if ((value != 0) && ( (value & (value - 1)) == 0))
30242 return int_log2 (value);
30245 return 0;
30248 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30250 static void
30251 arm_pre_atomic_barrier (enum memmodel model)
30253 if (need_atomic_barrier_p (model, true))
30254 emit_insn (gen_memory_barrier ());
30257 static void
30258 arm_post_atomic_barrier (enum memmodel model)
30260 if (need_atomic_barrier_p (model, false))
30261 emit_insn (gen_memory_barrier ());
30264 /* Emit the load-exclusive and store-exclusive instructions.
30265 Use acquire and release versions if necessary. */
30267 static void
30268 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30270 rtx (*gen) (rtx, rtx);
30272 if (acq)
30274 switch (mode)
30276 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30277 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30278 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30279 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30280 default:
30281 gcc_unreachable ();
30284 else
30286 switch (mode)
30288 case QImode: gen = gen_arm_load_exclusiveqi; break;
30289 case HImode: gen = gen_arm_load_exclusivehi; break;
30290 case SImode: gen = gen_arm_load_exclusivesi; break;
30291 case DImode: gen = gen_arm_load_exclusivedi; break;
30292 default:
30293 gcc_unreachable ();
30297 emit_insn (gen (rval, mem));
30300 static void
30301 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30302 rtx mem, bool rel)
30304 rtx (*gen) (rtx, rtx, rtx);
30306 if (rel)
30308 switch (mode)
30310 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30311 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30312 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30313 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30314 default:
30315 gcc_unreachable ();
30318 else
30320 switch (mode)
30322 case QImode: gen = gen_arm_store_exclusiveqi; break;
30323 case HImode: gen = gen_arm_store_exclusivehi; break;
30324 case SImode: gen = gen_arm_store_exclusivesi; break;
30325 case DImode: gen = gen_arm_store_exclusivedi; break;
30326 default:
30327 gcc_unreachable ();
30331 emit_insn (gen (bval, rval, mem));
30334 /* Mark the previous jump instruction as unlikely. */
30336 static void
30337 emit_unlikely_jump (rtx insn)
30339 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30341 insn = emit_jump_insn (insn);
30342 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30345 /* Expand a compare and swap pattern. */
30347 void
30348 arm_expand_compare_and_swap (rtx operands[])
30350 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30351 enum machine_mode mode;
30352 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30354 bval = operands[0];
30355 rval = operands[1];
30356 mem = operands[2];
30357 oldval = operands[3];
30358 newval = operands[4];
30359 is_weak = operands[5];
30360 mod_s = operands[6];
30361 mod_f = operands[7];
30362 mode = GET_MODE (mem);
30364 /* Normally the succ memory model must be stronger than fail, but in the
30365 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30366 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30368 if (TARGET_HAVE_LDACQ
30369 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30370 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30371 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30373 switch (mode)
30375 case QImode:
30376 case HImode:
30377 /* For narrow modes, we're going to perform the comparison in SImode,
30378 so do the zero-extension now. */
30379 rval = gen_reg_rtx (SImode);
30380 oldval = convert_modes (SImode, mode, oldval, true);
30381 /* FALLTHRU */
30383 case SImode:
30384 /* Force the value into a register if needed. We waited until after
30385 the zero-extension above to do this properly. */
30386 if (!arm_add_operand (oldval, SImode))
30387 oldval = force_reg (SImode, oldval);
30388 break;
30390 case DImode:
30391 if (!cmpdi_operand (oldval, mode))
30392 oldval = force_reg (mode, oldval);
30393 break;
30395 default:
30396 gcc_unreachable ();
30399 switch (mode)
30401 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30402 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30403 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30404 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30405 default:
30406 gcc_unreachable ();
30409 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30411 if (mode == QImode || mode == HImode)
30412 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30414 /* In all cases, we arrange for success to be signaled by Z set.
30415 This arrangement allows for the boolean result to be used directly
30416 in a subsequent branch, post optimization. */
30417 x = gen_rtx_REG (CCmode, CC_REGNUM);
30418 x = gen_rtx_EQ (SImode, x, const0_rtx);
30419 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30422 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30423 another memory store between the load-exclusive and store-exclusive can
30424 reset the monitor from Exclusive to Open state. This means we must wait
30425 until after reload to split the pattern, lest we get a register spill in
30426 the middle of the atomic sequence. */
30428 void
30429 arm_split_compare_and_swap (rtx operands[])
30431 rtx rval, mem, oldval, newval, scratch;
30432 enum machine_mode mode;
30433 enum memmodel mod_s, mod_f;
30434 bool is_weak;
30435 rtx label1, label2, x, cond;
30437 rval = operands[0];
30438 mem = operands[1];
30439 oldval = operands[2];
30440 newval = operands[3];
30441 is_weak = (operands[4] != const0_rtx);
30442 mod_s = (enum memmodel) INTVAL (operands[5]);
30443 mod_f = (enum memmodel) INTVAL (operands[6]);
30444 scratch = operands[7];
30445 mode = GET_MODE (mem);
30447 bool use_acquire = TARGET_HAVE_LDACQ
30448 && !(mod_s == MEMMODEL_RELAXED
30449 || mod_s == MEMMODEL_CONSUME
30450 || mod_s == MEMMODEL_RELEASE);
30452 bool use_release = TARGET_HAVE_LDACQ
30453 && !(mod_s == MEMMODEL_RELAXED
30454 || mod_s == MEMMODEL_CONSUME
30455 || mod_s == MEMMODEL_ACQUIRE);
30457 /* Checks whether a barrier is needed and emits one accordingly. */
30458 if (!(use_acquire || use_release))
30459 arm_pre_atomic_barrier (mod_s);
30461 label1 = NULL_RTX;
30462 if (!is_weak)
30464 label1 = gen_label_rtx ();
30465 emit_label (label1);
30467 label2 = gen_label_rtx ();
30469 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30471 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30472 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30473 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30474 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30475 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30477 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30479 /* Weak or strong, we want EQ to be true for success, so that we
30480 match the flags that we got from the compare above. */
30481 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30482 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30483 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30485 if (!is_weak)
30487 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30488 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30489 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30490 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30493 if (mod_f != MEMMODEL_RELAXED)
30494 emit_label (label2);
30496 /* Checks whether a barrier is needed and emits one accordingly. */
30497 if (!(use_acquire || use_release))
30498 arm_post_atomic_barrier (mod_s);
30500 if (mod_f == MEMMODEL_RELAXED)
30501 emit_label (label2);
30504 void
30505 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30506 rtx value, rtx model_rtx, rtx cond)
30508 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30509 enum machine_mode mode = GET_MODE (mem);
30510 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30511 rtx label, x;
30513 bool use_acquire = TARGET_HAVE_LDACQ
30514 && !(model == MEMMODEL_RELAXED
30515 || model == MEMMODEL_CONSUME
30516 || model == MEMMODEL_RELEASE);
30518 bool use_release = TARGET_HAVE_LDACQ
30519 && !(model == MEMMODEL_RELAXED
30520 || model == MEMMODEL_CONSUME
30521 || model == MEMMODEL_ACQUIRE);
30523 /* Checks whether a barrier is needed and emits one accordingly. */
30524 if (!(use_acquire || use_release))
30525 arm_pre_atomic_barrier (model);
30527 label = gen_label_rtx ();
30528 emit_label (label);
30530 if (new_out)
30531 new_out = gen_lowpart (wmode, new_out);
30532 if (old_out)
30533 old_out = gen_lowpart (wmode, old_out);
30534 else
30535 old_out = new_out;
30536 value = simplify_gen_subreg (wmode, value, mode, 0);
30538 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30540 switch (code)
30542 case SET:
30543 new_out = value;
30544 break;
30546 case NOT:
30547 x = gen_rtx_AND (wmode, old_out, value);
30548 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30549 x = gen_rtx_NOT (wmode, new_out);
30550 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30551 break;
30553 case MINUS:
30554 if (CONST_INT_P (value))
30556 value = GEN_INT (-INTVAL (value));
30557 code = PLUS;
30559 /* FALLTHRU */
30561 case PLUS:
30562 if (mode == DImode)
30564 /* DImode plus/minus need to clobber flags. */
30565 /* The adddi3 and subdi3 patterns are incorrectly written so that
30566 they require matching operands, even when we could easily support
30567 three operands. Thankfully, this can be fixed up post-splitting,
30568 as the individual add+adc patterns do accept three operands and
30569 post-reload cprop can make these moves go away. */
30570 emit_move_insn (new_out, old_out);
30571 if (code == PLUS)
30572 x = gen_adddi3 (new_out, new_out, value);
30573 else
30574 x = gen_subdi3 (new_out, new_out, value);
30575 emit_insn (x);
30576 break;
30578 /* FALLTHRU */
30580 default:
30581 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30582 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30583 break;
30586 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30587 use_release);
30589 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30590 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30592 /* Checks whether a barrier is needed and emits one accordingly. */
30593 if (!(use_acquire || use_release))
30594 arm_post_atomic_barrier (model);
30597 #define MAX_VECT_LEN 16
30599 struct expand_vec_perm_d
30601 rtx target, op0, op1;
30602 unsigned char perm[MAX_VECT_LEN];
30603 enum machine_mode vmode;
30604 unsigned char nelt;
30605 bool one_vector_p;
30606 bool testing_p;
30609 /* Generate a variable permutation. */
30611 static void
30612 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30614 enum machine_mode vmode = GET_MODE (target);
30615 bool one_vector_p = rtx_equal_p (op0, op1);
30617 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30618 gcc_checking_assert (GET_MODE (op0) == vmode);
30619 gcc_checking_assert (GET_MODE (op1) == vmode);
30620 gcc_checking_assert (GET_MODE (sel) == vmode);
30621 gcc_checking_assert (TARGET_NEON);
30623 if (one_vector_p)
30625 if (vmode == V8QImode)
30626 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30627 else
30628 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30630 else
30632 rtx pair;
30634 if (vmode == V8QImode)
30636 pair = gen_reg_rtx (V16QImode);
30637 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30638 pair = gen_lowpart (TImode, pair);
30639 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30641 else
30643 pair = gen_reg_rtx (OImode);
30644 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30645 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30650 void
30651 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30653 enum machine_mode vmode = GET_MODE (target);
30654 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30655 bool one_vector_p = rtx_equal_p (op0, op1);
30656 rtx rmask[MAX_VECT_LEN], mask;
30658 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30659 numbering of elements for big-endian, we must reverse the order. */
30660 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30662 /* The VTBL instruction does not use a modulo index, so we must take care
30663 of that ourselves. */
30664 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30665 for (i = 0; i < nelt; ++i)
30666 rmask[i] = mask;
30667 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30668 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30670 arm_expand_vec_perm_1 (target, op0, op1, sel);
30673 /* Generate or test for an insn that supports a constant permutation. */
30675 /* Recognize patterns for the VUZP insns. */
30677 static bool
30678 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30680 unsigned int i, odd, mask, nelt = d->nelt;
30681 rtx out0, out1, in0, in1, x;
30682 rtx (*gen)(rtx, rtx, rtx, rtx);
30684 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30685 return false;
30687 /* Note that these are little-endian tests. Adjust for big-endian later. */
30688 if (d->perm[0] == 0)
30689 odd = 0;
30690 else if (d->perm[0] == 1)
30691 odd = 1;
30692 else
30693 return false;
30694 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30696 for (i = 0; i < nelt; i++)
30698 unsigned elt = (i * 2 + odd) & mask;
30699 if (d->perm[i] != elt)
30700 return false;
30703 /* Success! */
30704 if (d->testing_p)
30705 return true;
30707 switch (d->vmode)
30709 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30710 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30711 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30712 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30713 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30714 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30715 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30716 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30717 default:
30718 gcc_unreachable ();
30721 in0 = d->op0;
30722 in1 = d->op1;
30723 if (BYTES_BIG_ENDIAN)
30725 x = in0, in0 = in1, in1 = x;
30726 odd = !odd;
30729 out0 = d->target;
30730 out1 = gen_reg_rtx (d->vmode);
30731 if (odd)
30732 x = out0, out0 = out1, out1 = x;
30734 emit_insn (gen (out0, in0, in1, out1));
30735 return true;
30738 /* Recognize patterns for the VZIP insns. */
30740 static bool
30741 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30743 unsigned int i, high, mask, nelt = d->nelt;
30744 rtx out0, out1, in0, in1, x;
30745 rtx (*gen)(rtx, rtx, rtx, rtx);
30747 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30748 return false;
30750 /* Note that these are little-endian tests. Adjust for big-endian later. */
30751 high = nelt / 2;
30752 if (d->perm[0] == high)
30754 else if (d->perm[0] == 0)
30755 high = 0;
30756 else
30757 return false;
30758 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30760 for (i = 0; i < nelt / 2; i++)
30762 unsigned elt = (i + high) & mask;
30763 if (d->perm[i * 2] != elt)
30764 return false;
30765 elt = (elt + nelt) & mask;
30766 if (d->perm[i * 2 + 1] != elt)
30767 return false;
30770 /* Success! */
30771 if (d->testing_p)
30772 return true;
30774 switch (d->vmode)
30776 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30777 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30778 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30779 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30780 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30781 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30782 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30783 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30784 default:
30785 gcc_unreachable ();
30788 in0 = d->op0;
30789 in1 = d->op1;
30790 if (BYTES_BIG_ENDIAN)
30792 x = in0, in0 = in1, in1 = x;
30793 high = !high;
30796 out0 = d->target;
30797 out1 = gen_reg_rtx (d->vmode);
30798 if (high)
30799 x = out0, out0 = out1, out1 = x;
30801 emit_insn (gen (out0, in0, in1, out1));
30802 return true;
30805 /* Recognize patterns for the VREV insns. */
30807 static bool
30808 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30810 unsigned int i, j, diff, nelt = d->nelt;
30811 rtx (*gen)(rtx, rtx, rtx);
30813 if (!d->one_vector_p)
30814 return false;
30816 diff = d->perm[0];
30817 switch (diff)
30819 case 7:
30820 switch (d->vmode)
30822 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30823 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30824 default:
30825 return false;
30827 break;
30828 case 3:
30829 switch (d->vmode)
30831 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30832 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30833 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30834 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30835 default:
30836 return false;
30838 break;
30839 case 1:
30840 switch (d->vmode)
30842 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30843 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30844 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30845 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30846 case V4SImode: gen = gen_neon_vrev64v4si; break;
30847 case V2SImode: gen = gen_neon_vrev64v2si; break;
30848 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30849 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30850 default:
30851 return false;
30853 break;
30854 default:
30855 return false;
30858 for (i = 0; i < nelt ; i += diff + 1)
30859 for (j = 0; j <= diff; j += 1)
30861 /* This is guaranteed to be true as the value of diff
30862 is 7, 3, 1 and we should have enough elements in the
30863 queue to generate this. Getting a vector mask with a
30864 value of diff other than these values implies that
30865 something is wrong by the time we get here. */
30866 gcc_assert (i + j < nelt);
30867 if (d->perm[i + j] != i + diff - j)
30868 return false;
30871 /* Success! */
30872 if (d->testing_p)
30873 return true;
30875 /* ??? The third operand is an artifact of the builtin infrastructure
30876 and is ignored by the actual instruction. */
30877 emit_insn (gen (d->target, d->op0, const0_rtx));
30878 return true;
30881 /* Recognize patterns for the VTRN insns. */
30883 static bool
30884 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30886 unsigned int i, odd, mask, nelt = d->nelt;
30887 rtx out0, out1, in0, in1, x;
30888 rtx (*gen)(rtx, rtx, rtx, rtx);
30890 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30891 return false;
30893 /* Note that these are little-endian tests. Adjust for big-endian later. */
30894 if (d->perm[0] == 0)
30895 odd = 0;
30896 else if (d->perm[0] == 1)
30897 odd = 1;
30898 else
30899 return false;
30900 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30902 for (i = 0; i < nelt; i += 2)
30904 if (d->perm[i] != i + odd)
30905 return false;
30906 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30907 return false;
30910 /* Success! */
30911 if (d->testing_p)
30912 return true;
30914 switch (d->vmode)
30916 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30917 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30918 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30919 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30920 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30921 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30922 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30923 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30924 default:
30925 gcc_unreachable ();
30928 in0 = d->op0;
30929 in1 = d->op1;
30930 if (BYTES_BIG_ENDIAN)
30932 x = in0, in0 = in1, in1 = x;
30933 odd = !odd;
30936 out0 = d->target;
30937 out1 = gen_reg_rtx (d->vmode);
30938 if (odd)
30939 x = out0, out0 = out1, out1 = x;
30941 emit_insn (gen (out0, in0, in1, out1));
30942 return true;
30945 /* Recognize patterns for the VEXT insns. */
30947 static bool
30948 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30950 unsigned int i, nelt = d->nelt;
30951 rtx (*gen) (rtx, rtx, rtx, rtx);
30952 rtx offset;
30954 unsigned int location;
30956 unsigned int next = d->perm[0] + 1;
30958 /* TODO: Handle GCC's numbering of elements for big-endian. */
30959 if (BYTES_BIG_ENDIAN)
30960 return false;
30962 /* Check if the extracted indexes are increasing by one. */
30963 for (i = 1; i < nelt; next++, i++)
30965 /* If we hit the most significant element of the 2nd vector in
30966 the previous iteration, no need to test further. */
30967 if (next == 2 * nelt)
30968 return false;
30970 /* If we are operating on only one vector: it could be a
30971 rotation. If there are only two elements of size < 64, let
30972 arm_evpc_neon_vrev catch it. */
30973 if (d->one_vector_p && (next == nelt))
30975 if ((nelt == 2) && (d->vmode != V2DImode))
30976 return false;
30977 else
30978 next = 0;
30981 if (d->perm[i] != next)
30982 return false;
30985 location = d->perm[0];
30987 switch (d->vmode)
30989 case V16QImode: gen = gen_neon_vextv16qi; break;
30990 case V8QImode: gen = gen_neon_vextv8qi; break;
30991 case V4HImode: gen = gen_neon_vextv4hi; break;
30992 case V8HImode: gen = gen_neon_vextv8hi; break;
30993 case V2SImode: gen = gen_neon_vextv2si; break;
30994 case V4SImode: gen = gen_neon_vextv4si; break;
30995 case V2SFmode: gen = gen_neon_vextv2sf; break;
30996 case V4SFmode: gen = gen_neon_vextv4sf; break;
30997 case V2DImode: gen = gen_neon_vextv2di; break;
30998 default:
30999 return false;
31002 /* Success! */
31003 if (d->testing_p)
31004 return true;
31006 offset = GEN_INT (location);
31007 emit_insn (gen (d->target, d->op0, d->op1, offset));
31008 return true;
31011 /* The NEON VTBL instruction is a fully variable permuation that's even
31012 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31013 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31014 can do slightly better by expanding this as a constant where we don't
31015 have to apply a mask. */
31017 static bool
31018 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31020 rtx rperm[MAX_VECT_LEN], sel;
31021 enum machine_mode vmode = d->vmode;
31022 unsigned int i, nelt = d->nelt;
31024 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31025 numbering of elements for big-endian, we must reverse the order. */
31026 if (BYTES_BIG_ENDIAN)
31027 return false;
31029 if (d->testing_p)
31030 return true;
31032 /* Generic code will try constant permutation twice. Once with the
31033 original mode and again with the elements lowered to QImode.
31034 So wait and don't do the selector expansion ourselves. */
31035 if (vmode != V8QImode && vmode != V16QImode)
31036 return false;
31038 for (i = 0; i < nelt; ++i)
31039 rperm[i] = GEN_INT (d->perm[i]);
31040 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31041 sel = force_reg (vmode, sel);
31043 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31044 return true;
31047 static bool
31048 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31050 /* Check if the input mask matches vext before reordering the
31051 operands. */
31052 if (TARGET_NEON)
31053 if (arm_evpc_neon_vext (d))
31054 return true;
31056 /* The pattern matching functions above are written to look for a small
31057 number to begin the sequence (0, 1, N/2). If we begin with an index
31058 from the second operand, we can swap the operands. */
31059 if (d->perm[0] >= d->nelt)
31061 unsigned i, nelt = d->nelt;
31062 rtx x;
31064 for (i = 0; i < nelt; ++i)
31065 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31067 x = d->op0;
31068 d->op0 = d->op1;
31069 d->op1 = x;
31072 if (TARGET_NEON)
31074 if (arm_evpc_neon_vuzp (d))
31075 return true;
31076 if (arm_evpc_neon_vzip (d))
31077 return true;
31078 if (arm_evpc_neon_vrev (d))
31079 return true;
31080 if (arm_evpc_neon_vtrn (d))
31081 return true;
31082 return arm_evpc_neon_vtbl (d);
31084 return false;
31087 /* Expand a vec_perm_const pattern. */
31089 bool
31090 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31092 struct expand_vec_perm_d d;
31093 int i, nelt, which;
31095 d.target = target;
31096 d.op0 = op0;
31097 d.op1 = op1;
31099 d.vmode = GET_MODE (target);
31100 gcc_assert (VECTOR_MODE_P (d.vmode));
31101 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31102 d.testing_p = false;
31104 for (i = which = 0; i < nelt; ++i)
31106 rtx e = XVECEXP (sel, 0, i);
31107 int ei = INTVAL (e) & (2 * nelt - 1);
31108 which |= (ei < nelt ? 1 : 2);
31109 d.perm[i] = ei;
31112 switch (which)
31114 default:
31115 gcc_unreachable();
31117 case 3:
31118 d.one_vector_p = false;
31119 if (!rtx_equal_p (op0, op1))
31120 break;
31122 /* The elements of PERM do not suggest that only the first operand
31123 is used, but both operands are identical. Allow easier matching
31124 of the permutation by folding the permutation into the single
31125 input vector. */
31126 /* FALLTHRU */
31127 case 2:
31128 for (i = 0; i < nelt; ++i)
31129 d.perm[i] &= nelt - 1;
31130 d.op0 = op1;
31131 d.one_vector_p = true;
31132 break;
31134 case 1:
31135 d.op1 = op0;
31136 d.one_vector_p = true;
31137 break;
31140 return arm_expand_vec_perm_const_1 (&d);
31143 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31145 static bool
31146 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
31147 const unsigned char *sel)
31149 struct expand_vec_perm_d d;
31150 unsigned int i, nelt, which;
31151 bool ret;
31153 d.vmode = vmode;
31154 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31155 d.testing_p = true;
31156 memcpy (d.perm, sel, nelt);
31158 /* Categorize the set of elements in the selector. */
31159 for (i = which = 0; i < nelt; ++i)
31161 unsigned char e = d.perm[i];
31162 gcc_assert (e < 2 * nelt);
31163 which |= (e < nelt ? 1 : 2);
31166 /* For all elements from second vector, fold the elements to first. */
31167 if (which == 2)
31168 for (i = 0; i < nelt; ++i)
31169 d.perm[i] -= nelt;
31171 /* Check whether the mask can be applied to the vector type. */
31172 d.one_vector_p = (which != 3);
31174 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31175 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31176 if (!d.one_vector_p)
31177 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31179 start_sequence ();
31180 ret = arm_expand_vec_perm_const_1 (&d);
31181 end_sequence ();
31183 return ret;
31186 bool
31187 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31189 /* If we are soft float and we do not have ldrd
31190 then all auto increment forms are ok. */
31191 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31192 return true;
31194 switch (code)
31196 /* Post increment and Pre Decrement are supported for all
31197 instruction forms except for vector forms. */
31198 case ARM_POST_INC:
31199 case ARM_PRE_DEC:
31200 if (VECTOR_MODE_P (mode))
31202 if (code != ARM_PRE_DEC)
31203 return true;
31204 else
31205 return false;
31208 return true;
31210 case ARM_POST_DEC:
31211 case ARM_PRE_INC:
31212 /* Without LDRD and mode size greater than
31213 word size, there is no point in auto-incrementing
31214 because ldm and stm will not have these forms. */
31215 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31216 return false;
31218 /* Vector and floating point modes do not support
31219 these auto increment forms. */
31220 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31221 return false;
31223 return true;
31225 default:
31226 return false;
31230 return false;
31233 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31234 on ARM, since we know that shifts by negative amounts are no-ops.
31235 Additionally, the default expansion code is not available or suitable
31236 for post-reload insn splits (this can occur when the register allocator
31237 chooses not to do a shift in NEON).
31239 This function is used in both initial expand and post-reload splits, and
31240 handles all kinds of 64-bit shifts.
31242 Input requirements:
31243 - It is safe for the input and output to be the same register, but
31244 early-clobber rules apply for the shift amount and scratch registers.
31245 - Shift by register requires both scratch registers. In all other cases
31246 the scratch registers may be NULL.
31247 - Ashiftrt by a register also clobbers the CC register. */
31248 void
31249 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31250 rtx amount, rtx scratch1, rtx scratch2)
31252 rtx out_high = gen_highpart (SImode, out);
31253 rtx out_low = gen_lowpart (SImode, out);
31254 rtx in_high = gen_highpart (SImode, in);
31255 rtx in_low = gen_lowpart (SImode, in);
31257 /* Terminology:
31258 in = the register pair containing the input value.
31259 out = the destination register pair.
31260 up = the high- or low-part of each pair.
31261 down = the opposite part to "up".
31262 In a shift, we can consider bits to shift from "up"-stream to
31263 "down"-stream, so in a left-shift "up" is the low-part and "down"
31264 is the high-part of each register pair. */
31266 rtx out_up = code == ASHIFT ? out_low : out_high;
31267 rtx out_down = code == ASHIFT ? out_high : out_low;
31268 rtx in_up = code == ASHIFT ? in_low : in_high;
31269 rtx in_down = code == ASHIFT ? in_high : in_low;
31271 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31272 gcc_assert (out
31273 && (REG_P (out) || GET_CODE (out) == SUBREG)
31274 && GET_MODE (out) == DImode);
31275 gcc_assert (in
31276 && (REG_P (in) || GET_CODE (in) == SUBREG)
31277 && GET_MODE (in) == DImode);
31278 gcc_assert (amount
31279 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31280 && GET_MODE (amount) == SImode)
31281 || CONST_INT_P (amount)));
31282 gcc_assert (scratch1 == NULL
31283 || (GET_CODE (scratch1) == SCRATCH)
31284 || (GET_MODE (scratch1) == SImode
31285 && REG_P (scratch1)));
31286 gcc_assert (scratch2 == NULL
31287 || (GET_CODE (scratch2) == SCRATCH)
31288 || (GET_MODE (scratch2) == SImode
31289 && REG_P (scratch2)));
31290 gcc_assert (!REG_P (out) || !REG_P (amount)
31291 || !HARD_REGISTER_P (out)
31292 || (REGNO (out) != REGNO (amount)
31293 && REGNO (out) + 1 != REGNO (amount)));
31295 /* Macros to make following code more readable. */
31296 #define SUB_32(DEST,SRC) \
31297 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31298 #define RSB_32(DEST,SRC) \
31299 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31300 #define SUB_S_32(DEST,SRC) \
31301 gen_addsi3_compare0 ((DEST), (SRC), \
31302 GEN_INT (-32))
31303 #define SET(DEST,SRC) \
31304 gen_rtx_SET (SImode, (DEST), (SRC))
31305 #define SHIFT(CODE,SRC,AMOUNT) \
31306 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31307 #define LSHIFT(CODE,SRC,AMOUNT) \
31308 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31309 SImode, (SRC), (AMOUNT))
31310 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31311 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31312 SImode, (SRC), (AMOUNT))
31313 #define ORR(A,B) \
31314 gen_rtx_IOR (SImode, (A), (B))
31315 #define BRANCH(COND,LABEL) \
31316 gen_arm_cond_branch ((LABEL), \
31317 gen_rtx_ ## COND (CCmode, cc_reg, \
31318 const0_rtx), \
31319 cc_reg)
31321 /* Shifts by register and shifts by constant are handled separately. */
31322 if (CONST_INT_P (amount))
31324 /* We have a shift-by-constant. */
31326 /* First, handle out-of-range shift amounts.
31327 In both cases we try to match the result an ARM instruction in a
31328 shift-by-register would give. This helps reduce execution
31329 differences between optimization levels, but it won't stop other
31330 parts of the compiler doing different things. This is "undefined
31331 behaviour, in any case. */
31332 if (INTVAL (amount) <= 0)
31333 emit_insn (gen_movdi (out, in));
31334 else if (INTVAL (amount) >= 64)
31336 if (code == ASHIFTRT)
31338 rtx const31_rtx = GEN_INT (31);
31339 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31340 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31342 else
31343 emit_insn (gen_movdi (out, const0_rtx));
31346 /* Now handle valid shifts. */
31347 else if (INTVAL (amount) < 32)
31349 /* Shifts by a constant less than 32. */
31350 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31352 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31353 emit_insn (SET (out_down,
31354 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31355 out_down)));
31356 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31358 else
31360 /* Shifts by a constant greater than 31. */
31361 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31363 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31364 if (code == ASHIFTRT)
31365 emit_insn (gen_ashrsi3 (out_up, in_up,
31366 GEN_INT (31)));
31367 else
31368 emit_insn (SET (out_up, const0_rtx));
31371 else
31373 /* We have a shift-by-register. */
31374 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31376 /* This alternative requires the scratch registers. */
31377 gcc_assert (scratch1 && REG_P (scratch1));
31378 gcc_assert (scratch2 && REG_P (scratch2));
31380 /* We will need the values "amount-32" and "32-amount" later.
31381 Swapping them around now allows the later code to be more general. */
31382 switch (code)
31384 case ASHIFT:
31385 emit_insn (SUB_32 (scratch1, amount));
31386 emit_insn (RSB_32 (scratch2, amount));
31387 break;
31388 case ASHIFTRT:
31389 emit_insn (RSB_32 (scratch1, amount));
31390 /* Also set CC = amount > 32. */
31391 emit_insn (SUB_S_32 (scratch2, amount));
31392 break;
31393 case LSHIFTRT:
31394 emit_insn (RSB_32 (scratch1, amount));
31395 emit_insn (SUB_32 (scratch2, amount));
31396 break;
31397 default:
31398 gcc_unreachable ();
31401 /* Emit code like this:
31403 arithmetic-left:
31404 out_down = in_down << amount;
31405 out_down = (in_up << (amount - 32)) | out_down;
31406 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31407 out_up = in_up << amount;
31409 arithmetic-right:
31410 out_down = in_down >> amount;
31411 out_down = (in_up << (32 - amount)) | out_down;
31412 if (amount < 32)
31413 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31414 out_up = in_up << amount;
31416 logical-right:
31417 out_down = in_down >> amount;
31418 out_down = (in_up << (32 - amount)) | out_down;
31419 if (amount < 32)
31420 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31421 out_up = in_up << amount;
31423 The ARM and Thumb2 variants are the same but implemented slightly
31424 differently. If this were only called during expand we could just
31425 use the Thumb2 case and let combine do the right thing, but this
31426 can also be called from post-reload splitters. */
31428 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31430 if (!TARGET_THUMB2)
31432 /* Emit code for ARM mode. */
31433 emit_insn (SET (out_down,
31434 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31435 if (code == ASHIFTRT)
31437 rtx done_label = gen_label_rtx ();
31438 emit_jump_insn (BRANCH (LT, done_label));
31439 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31440 out_down)));
31441 emit_label (done_label);
31443 else
31444 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31445 out_down)));
31447 else
31449 /* Emit code for Thumb2 mode.
31450 Thumb2 can't do shift and or in one insn. */
31451 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31452 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31454 if (code == ASHIFTRT)
31456 rtx done_label = gen_label_rtx ();
31457 emit_jump_insn (BRANCH (LT, done_label));
31458 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31459 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31460 emit_label (done_label);
31462 else
31464 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31465 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31469 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31472 #undef SUB_32
31473 #undef RSB_32
31474 #undef SUB_S_32
31475 #undef SET
31476 #undef SHIFT
31477 #undef LSHIFT
31478 #undef REV_LSHIFT
31479 #undef ORR
31480 #undef BRANCH
31484 /* Returns true if a valid comparison operation and makes
31485 the operands in a form that is valid. */
31486 bool
31487 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31489 enum rtx_code code = GET_CODE (*comparison);
31490 int code_int;
31491 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31492 ? GET_MODE (*op2) : GET_MODE (*op1);
31494 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31496 if (code == UNEQ || code == LTGT)
31497 return false;
31499 code_int = (int)code;
31500 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31501 PUT_CODE (*comparison, (enum rtx_code)code_int);
31503 switch (mode)
31505 case SImode:
31506 if (!arm_add_operand (*op1, mode))
31507 *op1 = force_reg (mode, *op1);
31508 if (!arm_add_operand (*op2, mode))
31509 *op2 = force_reg (mode, *op2);
31510 return true;
31512 case DImode:
31513 if (!cmpdi_operand (*op1, mode))
31514 *op1 = force_reg (mode, *op1);
31515 if (!cmpdi_operand (*op2, mode))
31516 *op2 = force_reg (mode, *op2);
31517 return true;
31519 case SFmode:
31520 case DFmode:
31521 if (!arm_float_compare_operand (*op1, mode))
31522 *op1 = force_reg (mode, *op1);
31523 if (!arm_float_compare_operand (*op2, mode))
31524 *op2 = force_reg (mode, *op2);
31525 return true;
31526 default:
31527 break;
31530 return false;
31534 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31536 static unsigned HOST_WIDE_INT
31537 arm_asan_shadow_offset (void)
31539 return (unsigned HOST_WIDE_INT) 1 << 29;
31543 /* This is a temporary fix for PR60655. Ideally we need
31544 to handle most of these cases in the generic part but
31545 currently we reject minus (..) (sym_ref). We try to
31546 ameliorate the case with minus (sym_ref1) (sym_ref2)
31547 where they are in the same section. */
31549 static bool
31550 arm_const_not_ok_for_debug_p (rtx p)
31552 tree decl_op0 = NULL;
31553 tree decl_op1 = NULL;
31555 if (GET_CODE (p) == MINUS)
31557 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31559 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31560 if (decl_op1
31561 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31562 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31564 if ((TREE_CODE (decl_op1) == VAR_DECL
31565 || TREE_CODE (decl_op1) == CONST_DECL)
31566 && (TREE_CODE (decl_op0) == VAR_DECL
31567 || TREE_CODE (decl_op0) == CONST_DECL))
31568 return (get_variable_section (decl_op1, false)
31569 != get_variable_section (decl_op0, false));
31571 if (TREE_CODE (decl_op1) == LABEL_DECL
31572 && TREE_CODE (decl_op0) == LABEL_DECL)
31573 return (DECL_CONTEXT (decl_op1)
31574 != DECL_CONTEXT (decl_op0));
31577 return true;
31581 return false;
31584 static void
31585 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
31587 const unsigned ARM_FE_INVALID = 1;
31588 const unsigned ARM_FE_DIVBYZERO = 2;
31589 const unsigned ARM_FE_OVERFLOW = 4;
31590 const unsigned ARM_FE_UNDERFLOW = 8;
31591 const unsigned ARM_FE_INEXACT = 16;
31592 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
31593 | ARM_FE_DIVBYZERO
31594 | ARM_FE_OVERFLOW
31595 | ARM_FE_UNDERFLOW
31596 | ARM_FE_INEXACT);
31597 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
31598 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
31599 tree new_fenv_var, reload_fenv, restore_fnenv;
31600 tree update_call, atomic_feraiseexcept, hold_fnclex;
31602 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
31603 return default_atomic_assign_expand_fenv (hold, clear, update);
31605 /* Generate the equivalent of :
31606 unsigned int fenv_var;
31607 fenv_var = __builtin_arm_get_fpscr ();
31609 unsigned int masked_fenv;
31610 masked_fenv = fenv_var & mask;
31612 __builtin_arm_set_fpscr (masked_fenv); */
31614 fenv_var = create_tmp_var (unsigned_type_node, NULL);
31615 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
31616 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
31617 mask = build_int_cst (unsigned_type_node,
31618 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
31619 | ARM_FE_ALL_EXCEPT));
31620 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
31621 fenv_var, build_call_expr (get_fpscr, 0));
31622 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
31623 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
31624 *hold = build2 (COMPOUND_EXPR, void_type_node,
31625 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
31626 hold_fnclex);
31628 /* Store the value of masked_fenv to clear the exceptions:
31629 __builtin_arm_set_fpscr (masked_fenv); */
31631 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
31633 /* Generate the equivalent of :
31634 unsigned int new_fenv_var;
31635 new_fenv_var = __builtin_arm_get_fpscr ();
31637 __builtin_arm_set_fpscr (fenv_var);
31639 __atomic_feraiseexcept (new_fenv_var); */
31641 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
31642 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
31643 build_call_expr (get_fpscr, 0));
31644 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
31645 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
31646 update_call = build_call_expr (atomic_feraiseexcept, 1,
31647 fold_convert (integer_type_node, new_fenv_var));
31648 *update = build2 (COMPOUND_EXPR, void_type_node,
31649 build2 (COMPOUND_EXPR, void_type_node,
31650 reload_fenv, restore_fnenv), update_call);
31653 /* return TRUE if x is a reference to a value in a constant pool */
31654 extern bool
31655 arm_is_constant_pool_ref (rtx x)
31657 return (MEM_P (x)
31658 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
31659 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
31662 #include "gt-arm.h"