gcc/
[official-gcc.git] / gcc / config / arm / arm.c
blob2e7c77b8bc14a4dd2f358718ae0af057fa04d167
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
91 void (*arm_lang_output_object_attributes_hook)(void);
93 struct four_ints
95 int i[4];
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
121 static void arm_print_operand (FILE *, rtx, int);
122 static void arm_print_operand_address (FILE *, rtx);
123 static bool arm_print_operand_punct_valid_p (unsigned char code);
124 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
125 static arm_cc get_arm_condition_code (rtx);
126 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
127 static const char *output_multi_immediate (rtx *, const char *, const char *,
128 int, HOST_WIDE_INT);
129 static const char *shift_op (rtx, HOST_WIDE_INT *);
130 static struct machine_function *arm_init_machine_status (void);
131 static void thumb_exit (FILE *, int);
132 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
133 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
134 static Mnode *add_minipool_forward_ref (Mfix *);
135 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
136 static Mnode *add_minipool_backward_ref (Mfix *);
137 static void assign_minipool_offsets (Mfix *);
138 static void arm_print_value (FILE *, rtx);
139 static void dump_minipool (rtx_insn *);
140 static int arm_barrier_cost (rtx);
141 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
142 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
143 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
144 machine_mode, rtx);
145 static void arm_reorg (void);
146 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
148 static unsigned long arm_compute_save_reg_mask (void);
149 static unsigned long arm_isr_value (tree);
150 static unsigned long arm_compute_func_type (void);
151 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
153 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
156 #endif
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
159 static int arm_comp_type_attributes (const_tree, const_tree);
160 static void arm_set_default_type_attributes (tree);
161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence);
166 static int optimal_immediate_sequence_1 (enum rtx_code code,
167 unsigned HOST_WIDE_INT val,
168 struct four_ints *return_sequence,
169 int i);
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree, tree);
172 static machine_mode arm_promote_function_mode (const_tree,
173 machine_mode, int *,
174 const_tree, int);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 static rtx arm_function_value (const_tree, const_tree, bool);
177 static rtx arm_libcall_value_1 (machine_mode);
178 static rtx arm_libcall_value (machine_mode, const_rtx);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
182 tree);
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
185 static bool arm_legitimate_constant_p (machine_mode, rtx);
186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
196 static void arm_init_builtins (void);
197 static void arm_init_iwmmxt_builtins (void);
198 static rtx safe_vector_operand (rtx, machine_mode);
199 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
200 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
201 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
202 static tree arm_builtin_decl (unsigned, bool);
203 static void emit_constant_insn (rtx cond, rtx pattern);
204 static rtx_insn *emit_set_insn (rtx, rtx);
205 static rtx emit_multi_reg_push (unsigned long, unsigned long);
206 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
207 tree, bool);
208 static rtx arm_function_arg (cumulative_args_t, machine_mode,
209 const_tree, bool);
210 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
211 const_tree, bool);
212 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
213 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
214 const_tree);
215 static rtx aapcs_libcall_value (machine_mode);
216 static int aapcs_select_return_coproc (const_tree, const_tree);
218 #ifdef OBJECT_FORMAT_ELF
219 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
220 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
221 #endif
222 #ifndef ARM_PE
223 static void arm_encode_section_info (tree, rtx, int);
224 #endif
226 static void arm_file_end (void);
227 static void arm_file_start (void);
229 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
230 tree, int *, int);
231 static bool arm_pass_by_reference (cumulative_args_t,
232 machine_mode, const_tree, bool);
233 static bool arm_promote_prototypes (const_tree);
234 static bool arm_default_short_enums (void);
235 static bool arm_align_anon_bitfield (void);
236 static bool arm_return_in_msb (const_tree);
237 static bool arm_must_pass_in_stack (machine_mode, const_tree);
238 static bool arm_return_in_memory (const_tree, const_tree);
239 #if ARM_UNWIND_INFO
240 static void arm_unwind_emit (FILE *, rtx_insn *);
241 static bool arm_output_ttype (rtx);
242 static void arm_asm_emit_except_personality (rtx);
243 static void arm_asm_init_sections (void);
244 #endif
245 static rtx arm_dwarf_register_span (rtx);
247 static tree arm_cxx_guard_type (void);
248 static bool arm_cxx_guard_mask_bit (void);
249 static tree arm_get_cookie_size (tree);
250 static bool arm_cookie_has_size (void);
251 static bool arm_cxx_cdtor_returns_this (void);
252 static bool arm_cxx_key_method_may_be_inline (void);
253 static void arm_cxx_determine_class_data_visibility (tree);
254 static bool arm_cxx_class_data_always_comdat (void);
255 static bool arm_cxx_use_aeabi_atexit (void);
256 static void arm_init_libfuncs (void);
257 static tree arm_build_builtin_va_list (void);
258 static void arm_expand_builtin_va_start (tree, rtx);
259 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static void arm_option_override (void);
261 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
262 static bool arm_cannot_copy_insn_p (rtx_insn *);
263 static int arm_issue_rate (void);
264 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
265 static bool arm_output_addr_const_extra (FILE *, rtx);
266 static bool arm_allocate_stack_slots_for_args (void);
267 static bool arm_warn_func_return (tree);
268 static const char *arm_invalid_parameter_type (const_tree t);
269 static const char *arm_invalid_return_type (const_tree t);
270 static tree arm_promoted_type (const_tree t);
271 static tree arm_convert_to_type (tree type, tree expr);
272 static bool arm_scalar_mode_supported_p (machine_mode);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx, tree, rtx);
277 static rtx arm_trampoline_adjust_address (rtx);
278 static rtx arm_pic_static_addr (rtx orig, rtx reg);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (machine_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
294 static unsigned int arm_autovectorize_vector_sizes (void);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
300 const unsigned char *sel);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
315 /* Table of machine attributes. */
316 static const struct attribute_spec arm_attribute_table[] =
318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
319 affects_type_identity } */
320 /* Function calls made to this symbol must be done indirectly, because
321 it may lie outside of the 26 bit addressing range of a normal function
322 call. */
323 { "long_call", 0, 0, false, true, true, NULL, false },
324 /* Whereas these functions are always known to reside within the 26 bit
325 addressing range. */
326 { "short_call", 0, 0, false, true, true, NULL, false },
327 /* Specify the procedure call conventions for a function. */
328 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
329 false },
330 /* Interrupt Service Routines have special prologue and epilogue requirements. */
331 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
332 false },
333 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
334 false },
335 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
336 false },
337 #ifdef ARM_PE
338 /* ARM/PE has three new attributes:
339 interfacearm - ?
340 dllexport - for exporting a function/variable that will live in a dll
341 dllimport - for importing a function/variable from a dll
343 Microsoft allows multiple declspecs in one __declspec, separating
344 them with spaces. We do NOT support this. Instead, use __declspec
345 multiple times.
347 { "dllimport", 0, 0, true, false, false, NULL, false },
348 { "dllexport", 0, 0, true, false, false, NULL, false },
349 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
354 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
355 false },
356 #endif
357 { NULL, 0, 0, false, false, false, NULL, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
364 #endif
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
369 #undef TARGET_LRA_P
370 #define TARGET_LRA_P arm_lra_p
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
401 #undef TARGET_OPTION_OVERRIDE
402 #define TARGET_OPTION_OVERRIDE arm_option_override
404 #undef TARGET_COMP_TYPE_ATTRIBUTES
405 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
407 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
408 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
410 #undef TARGET_SCHED_ADJUST_COST
411 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
413 #undef TARGET_SCHED_REORDER
414 #define TARGET_SCHED_REORDER arm_sched_reorder
416 #undef TARGET_REGISTER_MOVE_COST
417 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
419 #undef TARGET_MEMORY_MOVE_COST
420 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
422 #undef TARGET_ENCODE_SECTION_INFO
423 #ifdef ARM_PE
424 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
425 #else
426 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
427 #endif
429 #undef TARGET_STRIP_NAME_ENCODING
430 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
432 #undef TARGET_ASM_INTERNAL_LABEL
433 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
436 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
438 #undef TARGET_FUNCTION_VALUE
439 #define TARGET_FUNCTION_VALUE arm_function_value
441 #undef TARGET_LIBCALL_VALUE
442 #define TARGET_LIBCALL_VALUE arm_libcall_value
444 #undef TARGET_FUNCTION_VALUE_REGNO_P
445 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
447 #undef TARGET_ASM_OUTPUT_MI_THUNK
448 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
452 #undef TARGET_RTX_COSTS
453 #define TARGET_RTX_COSTS arm_rtx_costs
454 #undef TARGET_ADDRESS_COST
455 #define TARGET_ADDRESS_COST arm_address_cost
457 #undef TARGET_SHIFT_TRUNCATION_MASK
458 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
459 #undef TARGET_VECTOR_MODE_SUPPORTED_P
460 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
461 #undef TARGET_ARRAY_MODE_SUPPORTED_P
462 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
463 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
464 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
465 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
466 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
467 arm_autovectorize_vector_sizes
469 #undef TARGET_MACHINE_DEPENDENT_REORG
470 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
472 #undef TARGET_INIT_BUILTINS
473 #define TARGET_INIT_BUILTINS arm_init_builtins
474 #undef TARGET_EXPAND_BUILTIN
475 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
476 #undef TARGET_BUILTIN_DECL
477 #define TARGET_BUILTIN_DECL arm_builtin_decl
479 #undef TARGET_INIT_LIBFUNCS
480 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
482 #undef TARGET_PROMOTE_FUNCTION_MODE
483 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
484 #undef TARGET_PROMOTE_PROTOTYPES
485 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
486 #undef TARGET_PASS_BY_REFERENCE
487 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
488 #undef TARGET_ARG_PARTIAL_BYTES
489 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
490 #undef TARGET_FUNCTION_ARG
491 #define TARGET_FUNCTION_ARG arm_function_arg
492 #undef TARGET_FUNCTION_ARG_ADVANCE
493 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
494 #undef TARGET_FUNCTION_ARG_BOUNDARY
495 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
497 #undef TARGET_SETUP_INCOMING_VARARGS
498 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
500 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
501 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
503 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
504 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
505 #undef TARGET_TRAMPOLINE_INIT
506 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
507 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
508 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
510 #undef TARGET_WARN_FUNC_RETURN
511 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
513 #undef TARGET_DEFAULT_SHORT_ENUMS
514 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
516 #undef TARGET_ALIGN_ANON_BITFIELD
517 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
519 #undef TARGET_NARROW_VOLATILE_BITFIELD
520 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
522 #undef TARGET_CXX_GUARD_TYPE
523 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
525 #undef TARGET_CXX_GUARD_MASK_BIT
526 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
528 #undef TARGET_CXX_GET_COOKIE_SIZE
529 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
531 #undef TARGET_CXX_COOKIE_HAS_SIZE
532 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
534 #undef TARGET_CXX_CDTOR_RETURNS_THIS
535 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
537 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
538 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
540 #undef TARGET_CXX_USE_AEABI_ATEXIT
541 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
543 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
544 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
545 arm_cxx_determine_class_data_visibility
547 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
548 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
550 #undef TARGET_RETURN_IN_MSB
551 #define TARGET_RETURN_IN_MSB arm_return_in_msb
553 #undef TARGET_RETURN_IN_MEMORY
554 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
556 #undef TARGET_MUST_PASS_IN_STACK
557 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
559 #if ARM_UNWIND_INFO
560 #undef TARGET_ASM_UNWIND_EMIT
561 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
563 /* EABI unwinding tables use a different format for the typeinfo tables. */
564 #undef TARGET_ASM_TTYPE
565 #define TARGET_ASM_TTYPE arm_output_ttype
567 #undef TARGET_ARM_EABI_UNWINDER
568 #define TARGET_ARM_EABI_UNWINDER true
570 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
571 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
573 #undef TARGET_ASM_INIT_SECTIONS
574 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
575 #endif /* ARM_UNWIND_INFO */
577 #undef TARGET_DWARF_REGISTER_SPAN
578 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
580 #undef TARGET_CANNOT_COPY_INSN_P
581 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
583 #ifdef HAVE_AS_TLS
584 #undef TARGET_HAVE_TLS
585 #define TARGET_HAVE_TLS true
586 #endif
588 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
589 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
591 #undef TARGET_LEGITIMATE_CONSTANT_P
592 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
594 #undef TARGET_CANNOT_FORCE_CONST_MEM
595 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
597 #undef TARGET_MAX_ANCHOR_OFFSET
598 #define TARGET_MAX_ANCHOR_OFFSET 4095
600 /* The minimum is set such that the total size of the block
601 for a particular anchor is -4088 + 1 + 4095 bytes, which is
602 divisible by eight, ensuring natural spacing of anchors. */
603 #undef TARGET_MIN_ANCHOR_OFFSET
604 #define TARGET_MIN_ANCHOR_OFFSET -4088
606 #undef TARGET_SCHED_ISSUE_RATE
607 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
609 #undef TARGET_MANGLE_TYPE
610 #define TARGET_MANGLE_TYPE arm_mangle_type
612 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
613 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
615 #undef TARGET_BUILD_BUILTIN_VA_LIST
616 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
617 #undef TARGET_EXPAND_BUILTIN_VA_START
618 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
619 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
620 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
622 #ifdef HAVE_AS_TLS
623 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
624 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
625 #endif
627 #undef TARGET_LEGITIMATE_ADDRESS_P
628 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
630 #undef TARGET_PREFERRED_RELOAD_CLASS
631 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
633 #undef TARGET_INVALID_PARAMETER_TYPE
634 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
636 #undef TARGET_INVALID_RETURN_TYPE
637 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
639 #undef TARGET_PROMOTED_TYPE
640 #define TARGET_PROMOTED_TYPE arm_promoted_type
642 #undef TARGET_CONVERT_TO_TYPE
643 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
645 #undef TARGET_SCALAR_MODE_SUPPORTED_P
646 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
648 #undef TARGET_FRAME_POINTER_REQUIRED
649 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
651 #undef TARGET_CAN_ELIMINATE
652 #define TARGET_CAN_ELIMINATE arm_can_eliminate
654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
655 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
657 #undef TARGET_CLASS_LIKELY_SPILLED_P
658 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
660 #undef TARGET_VECTORIZE_BUILTINS
661 #define TARGET_VECTORIZE_BUILTINS
663 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
664 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
665 arm_builtin_vectorized_function
667 #undef TARGET_VECTOR_ALIGNMENT
668 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
670 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
671 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
672 arm_vector_alignment_reachable
674 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
675 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
676 arm_builtin_support_vector_misalignment
678 #undef TARGET_PREFERRED_RENAME_CLASS
679 #define TARGET_PREFERRED_RENAME_CLASS \
680 arm_preferred_rename_class
682 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
683 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
684 arm_vectorize_vec_perm_const_ok
686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
688 arm_builtin_vectorization_cost
689 #undef TARGET_VECTORIZE_ADD_STMT_COST
690 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
692 #undef TARGET_CANONICALIZE_COMPARISON
693 #define TARGET_CANONICALIZE_COMPARISON \
694 arm_canonicalize_comparison
696 #undef TARGET_ASAN_SHADOW_OFFSET
697 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
699 #undef MAX_INSN_PER_IT_BLOCK
700 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
702 #undef TARGET_CAN_USE_DOLOOP_P
703 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
705 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
706 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
708 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
709 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
711 struct gcc_target targetm = TARGET_INITIALIZER;
713 /* Obstack for minipool constant handling. */
714 static struct obstack minipool_obstack;
715 static char * minipool_startobj;
717 /* The maximum number of insns skipped which
718 will be conditionalised if possible. */
719 static int max_insns_skipped = 5;
721 extern FILE * asm_out_file;
723 /* True if we are currently building a constant table. */
724 int making_const_table;
726 /* The processor for which instructions should be scheduled. */
727 enum processor_type arm_tune = arm_none;
729 /* The current tuning set. */
730 const struct tune_params *current_tune;
732 /* Which floating point hardware to schedule for. */
733 int arm_fpu_attr;
735 /* Which floating popint hardware to use. */
736 const struct arm_fpu_desc *arm_fpu_desc;
738 /* Used for Thumb call_via trampolines. */
739 rtx thumb_call_via_label[14];
740 static int thumb_call_reg_needed;
742 /* Bit values used to identify processor capabilities. */
743 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
744 #define FL_ARCH3M (1 << 1) /* Extended multiply */
745 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
746 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
747 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
748 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
749 #define FL_THUMB (1 << 6) /* Thumb aware */
750 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
751 #define FL_STRONG (1 << 8) /* StrongARM */
752 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
753 #define FL_XSCALE (1 << 10) /* XScale */
754 /* spare (1 << 11) */
755 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
756 media instructions. */
757 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
758 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
759 Note: ARM6 & 7 derivatives only. */
760 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
761 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
762 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
763 profile. */
764 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
765 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
766 #define FL_NEON (1 << 20) /* Neon instructions. */
767 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
768 architecture. */
769 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
770 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
771 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
772 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
774 #define FL_SMALLMUL (1 << 26) /* Small multiply supported. */
776 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
777 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
779 /* Flags that only effect tuning, not available instructions. */
780 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
781 | FL_CO_PROC)
783 #define FL_FOR_ARCH2 FL_NOTM
784 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
785 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
786 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
787 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
788 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
789 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
790 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
791 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
792 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
793 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
794 #define FL_FOR_ARCH6J FL_FOR_ARCH6
795 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
796 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
797 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
798 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
799 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
800 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
801 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
802 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
803 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
804 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
805 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
806 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
808 /* The bits in this mask specify which
809 instructions we are allowed to generate. */
810 static unsigned long insn_flags = 0;
812 /* The bits in this mask specify which instruction scheduling options should
813 be used. */
814 static unsigned long tune_flags = 0;
816 /* The highest ARM architecture version supported by the
817 target. */
818 enum base_architecture arm_base_arch = BASE_ARCH_0;
820 /* The following are used in the arm.md file as equivalents to bits
821 in the above two flag variables. */
823 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
824 int arm_arch3m = 0;
826 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
827 int arm_arch4 = 0;
829 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
830 int arm_arch4t = 0;
832 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
833 int arm_arch5 = 0;
835 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
836 int arm_arch5e = 0;
838 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
839 int arm_arch6 = 0;
841 /* Nonzero if this chip supports the ARM 6K extensions. */
842 int arm_arch6k = 0;
844 /* Nonzero if instructions present in ARMv6-M can be used. */
845 int arm_arch6m = 0;
847 /* Nonzero if this chip supports the ARM 7 extensions. */
848 int arm_arch7 = 0;
850 /* Nonzero if instructions not present in the 'M' profile can be used. */
851 int arm_arch_notm = 0;
853 /* Nonzero if instructions present in ARMv7E-M can be used. */
854 int arm_arch7em = 0;
856 /* Nonzero if instructions present in ARMv8 can be used. */
857 int arm_arch8 = 0;
859 /* Nonzero if this chip can benefit from load scheduling. */
860 int arm_ld_sched = 0;
862 /* Nonzero if this chip is a StrongARM. */
863 int arm_tune_strongarm = 0;
865 /* Nonzero if this chip supports Intel Wireless MMX technology. */
866 int arm_arch_iwmmxt = 0;
868 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
869 int arm_arch_iwmmxt2 = 0;
871 /* Nonzero if this chip is an XScale. */
872 int arm_arch_xscale = 0;
874 /* Nonzero if tuning for XScale */
875 int arm_tune_xscale = 0;
877 /* Nonzero if we want to tune for stores that access the write-buffer.
878 This typically means an ARM6 or ARM7 with MMU or MPU. */
879 int arm_tune_wbuf = 0;
881 /* Nonzero if tuning for Cortex-A9. */
882 int arm_tune_cortex_a9 = 0;
884 /* Nonzero if generating Thumb instructions. */
885 int thumb_code = 0;
887 /* Nonzero if generating Thumb-1 instructions. */
888 int thumb1_code = 0;
890 /* Nonzero if we should define __THUMB_INTERWORK__ in the
891 preprocessor.
892 XXX This is a bit of a hack, it's intended to help work around
893 problems in GLD which doesn't understand that armv5t code is
894 interworking clean. */
895 int arm_cpp_interwork = 0;
897 /* Nonzero if chip supports Thumb 2. */
898 int arm_arch_thumb2;
900 /* Nonzero if chip supports integer division instruction. */
901 int arm_arch_arm_hwdiv;
902 int arm_arch_thumb_hwdiv;
904 /* Nonzero if we should use Neon to handle 64-bits operations rather
905 than core registers. */
906 int prefer_neon_for_64bits = 0;
908 /* Nonzero if we shouldn't use literal pools. */
909 bool arm_disable_literal_pool = false;
911 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
912 we must report the mode of the memory reference from
913 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
914 machine_mode output_memory_reference_mode;
916 /* The register number to be used for the PIC offset register. */
917 unsigned arm_pic_register = INVALID_REGNUM;
919 enum arm_pcs arm_pcs_default;
921 /* For an explanation of these variables, see final_prescan_insn below. */
922 int arm_ccfsm_state;
923 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
924 enum arm_cond_code arm_current_cc;
926 rtx arm_target_insn;
927 int arm_target_label;
928 /* The number of conditionally executed insns, including the current insn. */
929 int arm_condexec_count = 0;
930 /* A bitmask specifying the patterns for the IT block.
931 Zero means do not output an IT block before this insn. */
932 int arm_condexec_mask = 0;
933 /* The number of bits used in arm_condexec_mask. */
934 int arm_condexec_masklen = 0;
936 /* Nonzero if chip supports the ARMv8 CRC instructions. */
937 int arm_arch_crc = 0;
939 /* Nonzero if the core has a very small, high-latency, multiply unit. */
940 int arm_m_profile_small_mul = 0;
942 /* The condition codes of the ARM, and the inverse function. */
943 static const char * const arm_condition_codes[] =
945 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
946 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
949 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
950 int arm_regs_in_sequence[] =
952 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
955 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
956 #define streq(string1, string2) (strcmp (string1, string2) == 0)
958 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
959 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
960 | (1 << PIC_OFFSET_TABLE_REGNUM)))
962 /* Initialization code. */
964 struct processors
966 const char *const name;
967 enum processor_type core;
968 const char *arch;
969 enum base_architecture base_arch;
970 const unsigned long flags;
971 const struct tune_params *const tune;
975 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
976 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
977 prefetch_slots, \
978 l1_size, \
979 l1_line_size
981 /* arm generic vectorizer costs. */
982 static const
983 struct cpu_vec_costs arm_default_vec_cost = {
984 1, /* scalar_stmt_cost. */
985 1, /* scalar load_cost. */
986 1, /* scalar_store_cost. */
987 1, /* vec_stmt_cost. */
988 1, /* vec_to_scalar_cost. */
989 1, /* scalar_to_vec_cost. */
990 1, /* vec_align_load_cost. */
991 1, /* vec_unalign_load_cost. */
992 1, /* vec_unalign_store_cost. */
993 1, /* vec_store_cost. */
994 3, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
999 #include "aarch-cost-tables.h"
1003 const struct cpu_cost_table cortexa9_extra_costs =
1005 /* ALU */
1007 0, /* arith. */
1008 0, /* logical. */
1009 0, /* shift. */
1010 COSTS_N_INSNS (1), /* shift_reg. */
1011 COSTS_N_INSNS (1), /* arith_shift. */
1012 COSTS_N_INSNS (2), /* arith_shift_reg. */
1013 0, /* log_shift. */
1014 COSTS_N_INSNS (1), /* log_shift_reg. */
1015 COSTS_N_INSNS (1), /* extend. */
1016 COSTS_N_INSNS (2), /* extend_arith. */
1017 COSTS_N_INSNS (1), /* bfi. */
1018 COSTS_N_INSNS (1), /* bfx. */
1019 0, /* clz. */
1020 0, /* rev. */
1021 0, /* non_exec. */
1022 true /* non_exec_costs_exec. */
1025 /* MULT SImode */
1027 COSTS_N_INSNS (3), /* simple. */
1028 COSTS_N_INSNS (3), /* flag_setting. */
1029 COSTS_N_INSNS (2), /* extend. */
1030 COSTS_N_INSNS (3), /* add. */
1031 COSTS_N_INSNS (2), /* extend_add. */
1032 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1034 /* MULT DImode */
1036 0, /* simple (N/A). */
1037 0, /* flag_setting (N/A). */
1038 COSTS_N_INSNS (4), /* extend. */
1039 0, /* add (N/A). */
1040 COSTS_N_INSNS (4), /* extend_add. */
1041 0 /* idiv (N/A). */
1044 /* LD/ST */
1046 COSTS_N_INSNS (2), /* load. */
1047 COSTS_N_INSNS (2), /* load_sign_extend. */
1048 COSTS_N_INSNS (2), /* ldrd. */
1049 COSTS_N_INSNS (2), /* ldm_1st. */
1050 1, /* ldm_regs_per_insn_1st. */
1051 2, /* ldm_regs_per_insn_subsequent. */
1052 COSTS_N_INSNS (5), /* loadf. */
1053 COSTS_N_INSNS (5), /* loadd. */
1054 COSTS_N_INSNS (1), /* load_unaligned. */
1055 COSTS_N_INSNS (2), /* store. */
1056 COSTS_N_INSNS (2), /* strd. */
1057 COSTS_N_INSNS (2), /* stm_1st. */
1058 1, /* stm_regs_per_insn_1st. */
1059 2, /* stm_regs_per_insn_subsequent. */
1060 COSTS_N_INSNS (1), /* storef. */
1061 COSTS_N_INSNS (1), /* stored. */
1062 COSTS_N_INSNS (1) /* store_unaligned. */
1065 /* FP SFmode */
1067 COSTS_N_INSNS (14), /* div. */
1068 COSTS_N_INSNS (4), /* mult. */
1069 COSTS_N_INSNS (7), /* mult_addsub. */
1070 COSTS_N_INSNS (30), /* fma. */
1071 COSTS_N_INSNS (3), /* addsub. */
1072 COSTS_N_INSNS (1), /* fpconst. */
1073 COSTS_N_INSNS (1), /* neg. */
1074 COSTS_N_INSNS (3), /* compare. */
1075 COSTS_N_INSNS (3), /* widen. */
1076 COSTS_N_INSNS (3), /* narrow. */
1077 COSTS_N_INSNS (3), /* toint. */
1078 COSTS_N_INSNS (3), /* fromint. */
1079 COSTS_N_INSNS (3) /* roundint. */
1081 /* FP DFmode */
1083 COSTS_N_INSNS (24), /* div. */
1084 COSTS_N_INSNS (5), /* mult. */
1085 COSTS_N_INSNS (8), /* mult_addsub. */
1086 COSTS_N_INSNS (30), /* fma. */
1087 COSTS_N_INSNS (3), /* addsub. */
1088 COSTS_N_INSNS (1), /* fpconst. */
1089 COSTS_N_INSNS (1), /* neg. */
1090 COSTS_N_INSNS (3), /* compare. */
1091 COSTS_N_INSNS (3), /* widen. */
1092 COSTS_N_INSNS (3), /* narrow. */
1093 COSTS_N_INSNS (3), /* toint. */
1094 COSTS_N_INSNS (3), /* fromint. */
1095 COSTS_N_INSNS (3) /* roundint. */
1098 /* Vector */
1100 COSTS_N_INSNS (1) /* alu. */
1104 const struct cpu_cost_table cortexa8_extra_costs =
1106 /* ALU */
1108 0, /* arith. */
1109 0, /* logical. */
1110 COSTS_N_INSNS (1), /* shift. */
1111 0, /* shift_reg. */
1112 COSTS_N_INSNS (1), /* arith_shift. */
1113 0, /* arith_shift_reg. */
1114 COSTS_N_INSNS (1), /* log_shift. */
1115 0, /* log_shift_reg. */
1116 0, /* extend. */
1117 0, /* extend_arith. */
1118 0, /* bfi. */
1119 0, /* bfx. */
1120 0, /* clz. */
1121 0, /* rev. */
1122 0, /* non_exec. */
1123 true /* non_exec_costs_exec. */
1126 /* MULT SImode */
1128 COSTS_N_INSNS (1), /* simple. */
1129 COSTS_N_INSNS (1), /* flag_setting. */
1130 COSTS_N_INSNS (1), /* extend. */
1131 COSTS_N_INSNS (1), /* add. */
1132 COSTS_N_INSNS (1), /* extend_add. */
1133 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1135 /* MULT DImode */
1137 0, /* simple (N/A). */
1138 0, /* flag_setting (N/A). */
1139 COSTS_N_INSNS (2), /* extend. */
1140 0, /* add (N/A). */
1141 COSTS_N_INSNS (2), /* extend_add. */
1142 0 /* idiv (N/A). */
1145 /* LD/ST */
1147 COSTS_N_INSNS (1), /* load. */
1148 COSTS_N_INSNS (1), /* load_sign_extend. */
1149 COSTS_N_INSNS (1), /* ldrd. */
1150 COSTS_N_INSNS (1), /* ldm_1st. */
1151 1, /* ldm_regs_per_insn_1st. */
1152 2, /* ldm_regs_per_insn_subsequent. */
1153 COSTS_N_INSNS (1), /* loadf. */
1154 COSTS_N_INSNS (1), /* loadd. */
1155 COSTS_N_INSNS (1), /* load_unaligned. */
1156 COSTS_N_INSNS (1), /* store. */
1157 COSTS_N_INSNS (1), /* strd. */
1158 COSTS_N_INSNS (1), /* stm_1st. */
1159 1, /* stm_regs_per_insn_1st. */
1160 2, /* stm_regs_per_insn_subsequent. */
1161 COSTS_N_INSNS (1), /* storef. */
1162 COSTS_N_INSNS (1), /* stored. */
1163 COSTS_N_INSNS (1) /* store_unaligned. */
1166 /* FP SFmode */
1168 COSTS_N_INSNS (36), /* div. */
1169 COSTS_N_INSNS (11), /* mult. */
1170 COSTS_N_INSNS (20), /* mult_addsub. */
1171 COSTS_N_INSNS (30), /* fma. */
1172 COSTS_N_INSNS (9), /* addsub. */
1173 COSTS_N_INSNS (3), /* fpconst. */
1174 COSTS_N_INSNS (3), /* neg. */
1175 COSTS_N_INSNS (6), /* compare. */
1176 COSTS_N_INSNS (4), /* widen. */
1177 COSTS_N_INSNS (4), /* narrow. */
1178 COSTS_N_INSNS (8), /* toint. */
1179 COSTS_N_INSNS (8), /* fromint. */
1180 COSTS_N_INSNS (8) /* roundint. */
1182 /* FP DFmode */
1184 COSTS_N_INSNS (64), /* div. */
1185 COSTS_N_INSNS (16), /* mult. */
1186 COSTS_N_INSNS (25), /* mult_addsub. */
1187 COSTS_N_INSNS (30), /* fma. */
1188 COSTS_N_INSNS (9), /* addsub. */
1189 COSTS_N_INSNS (3), /* fpconst. */
1190 COSTS_N_INSNS (3), /* neg. */
1191 COSTS_N_INSNS (6), /* compare. */
1192 COSTS_N_INSNS (6), /* widen. */
1193 COSTS_N_INSNS (6), /* narrow. */
1194 COSTS_N_INSNS (8), /* toint. */
1195 COSTS_N_INSNS (8), /* fromint. */
1196 COSTS_N_INSNS (8) /* roundint. */
1199 /* Vector */
1201 COSTS_N_INSNS (1) /* alu. */
1205 const struct cpu_cost_table cortexa5_extra_costs =
1207 /* ALU */
1209 0, /* arith. */
1210 0, /* logical. */
1211 COSTS_N_INSNS (1), /* shift. */
1212 COSTS_N_INSNS (1), /* shift_reg. */
1213 COSTS_N_INSNS (1), /* arith_shift. */
1214 COSTS_N_INSNS (1), /* arith_shift_reg. */
1215 COSTS_N_INSNS (1), /* log_shift. */
1216 COSTS_N_INSNS (1), /* log_shift_reg. */
1217 COSTS_N_INSNS (1), /* extend. */
1218 COSTS_N_INSNS (1), /* extend_arith. */
1219 COSTS_N_INSNS (1), /* bfi. */
1220 COSTS_N_INSNS (1), /* bfx. */
1221 COSTS_N_INSNS (1), /* clz. */
1222 COSTS_N_INSNS (1), /* rev. */
1223 0, /* non_exec. */
1224 true /* non_exec_costs_exec. */
1228 /* MULT SImode */
1230 0, /* simple. */
1231 COSTS_N_INSNS (1), /* flag_setting. */
1232 COSTS_N_INSNS (1), /* extend. */
1233 COSTS_N_INSNS (1), /* add. */
1234 COSTS_N_INSNS (1), /* extend_add. */
1235 COSTS_N_INSNS (7) /* idiv. */
1237 /* MULT DImode */
1239 0, /* simple (N/A). */
1240 0, /* flag_setting (N/A). */
1241 COSTS_N_INSNS (1), /* extend. */
1242 0, /* add. */
1243 COSTS_N_INSNS (2), /* extend_add. */
1244 0 /* idiv (N/A). */
1247 /* LD/ST */
1249 COSTS_N_INSNS (1), /* load. */
1250 COSTS_N_INSNS (1), /* load_sign_extend. */
1251 COSTS_N_INSNS (6), /* ldrd. */
1252 COSTS_N_INSNS (1), /* ldm_1st. */
1253 1, /* ldm_regs_per_insn_1st. */
1254 2, /* ldm_regs_per_insn_subsequent. */
1255 COSTS_N_INSNS (2), /* loadf. */
1256 COSTS_N_INSNS (4), /* loadd. */
1257 COSTS_N_INSNS (1), /* load_unaligned. */
1258 COSTS_N_INSNS (1), /* store. */
1259 COSTS_N_INSNS (3), /* strd. */
1260 COSTS_N_INSNS (1), /* stm_1st. */
1261 1, /* stm_regs_per_insn_1st. */
1262 2, /* stm_regs_per_insn_subsequent. */
1263 COSTS_N_INSNS (2), /* storef. */
1264 COSTS_N_INSNS (2), /* stored. */
1265 COSTS_N_INSNS (1) /* store_unaligned. */
1268 /* FP SFmode */
1270 COSTS_N_INSNS (15), /* div. */
1271 COSTS_N_INSNS (3), /* mult. */
1272 COSTS_N_INSNS (7), /* mult_addsub. */
1273 COSTS_N_INSNS (7), /* fma. */
1274 COSTS_N_INSNS (3), /* addsub. */
1275 COSTS_N_INSNS (3), /* fpconst. */
1276 COSTS_N_INSNS (3), /* neg. */
1277 COSTS_N_INSNS (3), /* compare. */
1278 COSTS_N_INSNS (3), /* widen. */
1279 COSTS_N_INSNS (3), /* narrow. */
1280 COSTS_N_INSNS (3), /* toint. */
1281 COSTS_N_INSNS (3), /* fromint. */
1282 COSTS_N_INSNS (3) /* roundint. */
1284 /* FP DFmode */
1286 COSTS_N_INSNS (30), /* div. */
1287 COSTS_N_INSNS (6), /* mult. */
1288 COSTS_N_INSNS (10), /* mult_addsub. */
1289 COSTS_N_INSNS (7), /* fma. */
1290 COSTS_N_INSNS (3), /* addsub. */
1291 COSTS_N_INSNS (3), /* fpconst. */
1292 COSTS_N_INSNS (3), /* neg. */
1293 COSTS_N_INSNS (3), /* compare. */
1294 COSTS_N_INSNS (3), /* widen. */
1295 COSTS_N_INSNS (3), /* narrow. */
1296 COSTS_N_INSNS (3), /* toint. */
1297 COSTS_N_INSNS (3), /* fromint. */
1298 COSTS_N_INSNS (3) /* roundint. */
1301 /* Vector */
1303 COSTS_N_INSNS (1) /* alu. */
1308 const struct cpu_cost_table cortexa7_extra_costs =
1310 /* ALU */
1312 0, /* arith. */
1313 0, /* logical. */
1314 COSTS_N_INSNS (1), /* shift. */
1315 COSTS_N_INSNS (1), /* shift_reg. */
1316 COSTS_N_INSNS (1), /* arith_shift. */
1317 COSTS_N_INSNS (1), /* arith_shift_reg. */
1318 COSTS_N_INSNS (1), /* log_shift. */
1319 COSTS_N_INSNS (1), /* log_shift_reg. */
1320 COSTS_N_INSNS (1), /* extend. */
1321 COSTS_N_INSNS (1), /* extend_arith. */
1322 COSTS_N_INSNS (1), /* bfi. */
1323 COSTS_N_INSNS (1), /* bfx. */
1324 COSTS_N_INSNS (1), /* clz. */
1325 COSTS_N_INSNS (1), /* rev. */
1326 0, /* non_exec. */
1327 true /* non_exec_costs_exec. */
1331 /* MULT SImode */
1333 0, /* simple. */
1334 COSTS_N_INSNS (1), /* flag_setting. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* add. */
1337 COSTS_N_INSNS (1), /* extend_add. */
1338 COSTS_N_INSNS (7) /* idiv. */
1340 /* MULT DImode */
1342 0, /* simple (N/A). */
1343 0, /* flag_setting (N/A). */
1344 COSTS_N_INSNS (1), /* extend. */
1345 0, /* add. */
1346 COSTS_N_INSNS (2), /* extend_add. */
1347 0 /* idiv (N/A). */
1350 /* LD/ST */
1352 COSTS_N_INSNS (1), /* load. */
1353 COSTS_N_INSNS (1), /* load_sign_extend. */
1354 COSTS_N_INSNS (3), /* ldrd. */
1355 COSTS_N_INSNS (1), /* ldm_1st. */
1356 1, /* ldm_regs_per_insn_1st. */
1357 2, /* ldm_regs_per_insn_subsequent. */
1358 COSTS_N_INSNS (2), /* loadf. */
1359 COSTS_N_INSNS (2), /* loadd. */
1360 COSTS_N_INSNS (1), /* load_unaligned. */
1361 COSTS_N_INSNS (1), /* store. */
1362 COSTS_N_INSNS (3), /* strd. */
1363 COSTS_N_INSNS (1), /* stm_1st. */
1364 1, /* stm_regs_per_insn_1st. */
1365 2, /* stm_regs_per_insn_subsequent. */
1366 COSTS_N_INSNS (2), /* storef. */
1367 COSTS_N_INSNS (2), /* stored. */
1368 COSTS_N_INSNS (1) /* store_unaligned. */
1371 /* FP SFmode */
1373 COSTS_N_INSNS (15), /* div. */
1374 COSTS_N_INSNS (3), /* mult. */
1375 COSTS_N_INSNS (7), /* mult_addsub. */
1376 COSTS_N_INSNS (7), /* fma. */
1377 COSTS_N_INSNS (3), /* addsub. */
1378 COSTS_N_INSNS (3), /* fpconst. */
1379 COSTS_N_INSNS (3), /* neg. */
1380 COSTS_N_INSNS (3), /* compare. */
1381 COSTS_N_INSNS (3), /* widen. */
1382 COSTS_N_INSNS (3), /* narrow. */
1383 COSTS_N_INSNS (3), /* toint. */
1384 COSTS_N_INSNS (3), /* fromint. */
1385 COSTS_N_INSNS (3) /* roundint. */
1387 /* FP DFmode */
1389 COSTS_N_INSNS (30), /* div. */
1390 COSTS_N_INSNS (6), /* mult. */
1391 COSTS_N_INSNS (10), /* mult_addsub. */
1392 COSTS_N_INSNS (7), /* fma. */
1393 COSTS_N_INSNS (3), /* addsub. */
1394 COSTS_N_INSNS (3), /* fpconst. */
1395 COSTS_N_INSNS (3), /* neg. */
1396 COSTS_N_INSNS (3), /* compare. */
1397 COSTS_N_INSNS (3), /* widen. */
1398 COSTS_N_INSNS (3), /* narrow. */
1399 COSTS_N_INSNS (3), /* toint. */
1400 COSTS_N_INSNS (3), /* fromint. */
1401 COSTS_N_INSNS (3) /* roundint. */
1404 /* Vector */
1406 COSTS_N_INSNS (1) /* alu. */
1410 const struct cpu_cost_table cortexa12_extra_costs =
1412 /* ALU */
1414 0, /* arith. */
1415 0, /* logical. */
1416 0, /* shift. */
1417 COSTS_N_INSNS (1), /* shift_reg. */
1418 COSTS_N_INSNS (1), /* arith_shift. */
1419 COSTS_N_INSNS (1), /* arith_shift_reg. */
1420 COSTS_N_INSNS (1), /* log_shift. */
1421 COSTS_N_INSNS (1), /* log_shift_reg. */
1422 0, /* extend. */
1423 COSTS_N_INSNS (1), /* extend_arith. */
1424 0, /* bfi. */
1425 COSTS_N_INSNS (1), /* bfx. */
1426 COSTS_N_INSNS (1), /* clz. */
1427 COSTS_N_INSNS (1), /* rev. */
1428 0, /* non_exec. */
1429 true /* non_exec_costs_exec. */
1431 /* MULT SImode */
1434 COSTS_N_INSNS (2), /* simple. */
1435 COSTS_N_INSNS (3), /* flag_setting. */
1436 COSTS_N_INSNS (2), /* extend. */
1437 COSTS_N_INSNS (3), /* add. */
1438 COSTS_N_INSNS (2), /* extend_add. */
1439 COSTS_N_INSNS (18) /* idiv. */
1441 /* MULT DImode */
1443 0, /* simple (N/A). */
1444 0, /* flag_setting (N/A). */
1445 COSTS_N_INSNS (3), /* extend. */
1446 0, /* add (N/A). */
1447 COSTS_N_INSNS (3), /* extend_add. */
1448 0 /* idiv (N/A). */
1451 /* LD/ST */
1453 COSTS_N_INSNS (3), /* load. */
1454 COSTS_N_INSNS (3), /* load_sign_extend. */
1455 COSTS_N_INSNS (3), /* ldrd. */
1456 COSTS_N_INSNS (3), /* ldm_1st. */
1457 1, /* ldm_regs_per_insn_1st. */
1458 2, /* ldm_regs_per_insn_subsequent. */
1459 COSTS_N_INSNS (3), /* loadf. */
1460 COSTS_N_INSNS (3), /* loadd. */
1461 0, /* load_unaligned. */
1462 0, /* store. */
1463 0, /* strd. */
1464 0, /* stm_1st. */
1465 1, /* stm_regs_per_insn_1st. */
1466 2, /* stm_regs_per_insn_subsequent. */
1467 COSTS_N_INSNS (2), /* storef. */
1468 COSTS_N_INSNS (2), /* stored. */
1469 0 /* store_unaligned. */
1472 /* FP SFmode */
1474 COSTS_N_INSNS (17), /* div. */
1475 COSTS_N_INSNS (4), /* mult. */
1476 COSTS_N_INSNS (8), /* mult_addsub. */
1477 COSTS_N_INSNS (8), /* fma. */
1478 COSTS_N_INSNS (4), /* addsub. */
1479 COSTS_N_INSNS (2), /* fpconst. */
1480 COSTS_N_INSNS (2), /* neg. */
1481 COSTS_N_INSNS (2), /* compare. */
1482 COSTS_N_INSNS (4), /* widen. */
1483 COSTS_N_INSNS (4), /* narrow. */
1484 COSTS_N_INSNS (4), /* toint. */
1485 COSTS_N_INSNS (4), /* fromint. */
1486 COSTS_N_INSNS (4) /* roundint. */
1488 /* FP DFmode */
1490 COSTS_N_INSNS (31), /* div. */
1491 COSTS_N_INSNS (4), /* mult. */
1492 COSTS_N_INSNS (8), /* mult_addsub. */
1493 COSTS_N_INSNS (8), /* fma. */
1494 COSTS_N_INSNS (4), /* addsub. */
1495 COSTS_N_INSNS (2), /* fpconst. */
1496 COSTS_N_INSNS (2), /* neg. */
1497 COSTS_N_INSNS (2), /* compare. */
1498 COSTS_N_INSNS (4), /* widen. */
1499 COSTS_N_INSNS (4), /* narrow. */
1500 COSTS_N_INSNS (4), /* toint. */
1501 COSTS_N_INSNS (4), /* fromint. */
1502 COSTS_N_INSNS (4) /* roundint. */
1505 /* Vector */
1507 COSTS_N_INSNS (1) /* alu. */
1511 const struct cpu_cost_table cortexa15_extra_costs =
1513 /* ALU */
1515 0, /* arith. */
1516 0, /* logical. */
1517 0, /* shift. */
1518 0, /* shift_reg. */
1519 COSTS_N_INSNS (1), /* arith_shift. */
1520 COSTS_N_INSNS (1), /* arith_shift_reg. */
1521 COSTS_N_INSNS (1), /* log_shift. */
1522 COSTS_N_INSNS (1), /* log_shift_reg. */
1523 0, /* extend. */
1524 COSTS_N_INSNS (1), /* extend_arith. */
1525 COSTS_N_INSNS (1), /* bfi. */
1526 0, /* bfx. */
1527 0, /* clz. */
1528 0, /* rev. */
1529 0, /* non_exec. */
1530 true /* non_exec_costs_exec. */
1532 /* MULT SImode */
1535 COSTS_N_INSNS (2), /* simple. */
1536 COSTS_N_INSNS (3), /* flag_setting. */
1537 COSTS_N_INSNS (2), /* extend. */
1538 COSTS_N_INSNS (2), /* add. */
1539 COSTS_N_INSNS (2), /* extend_add. */
1540 COSTS_N_INSNS (18) /* idiv. */
1542 /* MULT DImode */
1544 0, /* simple (N/A). */
1545 0, /* flag_setting (N/A). */
1546 COSTS_N_INSNS (3), /* extend. */
1547 0, /* add (N/A). */
1548 COSTS_N_INSNS (3), /* extend_add. */
1549 0 /* idiv (N/A). */
1552 /* LD/ST */
1554 COSTS_N_INSNS (3), /* load. */
1555 COSTS_N_INSNS (3), /* load_sign_extend. */
1556 COSTS_N_INSNS (3), /* ldrd. */
1557 COSTS_N_INSNS (4), /* ldm_1st. */
1558 1, /* ldm_regs_per_insn_1st. */
1559 2, /* ldm_regs_per_insn_subsequent. */
1560 COSTS_N_INSNS (4), /* loadf. */
1561 COSTS_N_INSNS (4), /* loadd. */
1562 0, /* load_unaligned. */
1563 0, /* store. */
1564 0, /* strd. */
1565 COSTS_N_INSNS (1), /* stm_1st. */
1566 1, /* stm_regs_per_insn_1st. */
1567 2, /* stm_regs_per_insn_subsequent. */
1568 0, /* storef. */
1569 0, /* stored. */
1570 0 /* store_unaligned. */
1573 /* FP SFmode */
1575 COSTS_N_INSNS (17), /* div. */
1576 COSTS_N_INSNS (4), /* mult. */
1577 COSTS_N_INSNS (8), /* mult_addsub. */
1578 COSTS_N_INSNS (8), /* fma. */
1579 COSTS_N_INSNS (4), /* addsub. */
1580 COSTS_N_INSNS (2), /* fpconst. */
1581 COSTS_N_INSNS (2), /* neg. */
1582 COSTS_N_INSNS (5), /* compare. */
1583 COSTS_N_INSNS (4), /* widen. */
1584 COSTS_N_INSNS (4), /* narrow. */
1585 COSTS_N_INSNS (4), /* toint. */
1586 COSTS_N_INSNS (4), /* fromint. */
1587 COSTS_N_INSNS (4) /* roundint. */
1589 /* FP DFmode */
1591 COSTS_N_INSNS (31), /* div. */
1592 COSTS_N_INSNS (4), /* mult. */
1593 COSTS_N_INSNS (8), /* mult_addsub. */
1594 COSTS_N_INSNS (8), /* fma. */
1595 COSTS_N_INSNS (4), /* addsub. */
1596 COSTS_N_INSNS (2), /* fpconst. */
1597 COSTS_N_INSNS (2), /* neg. */
1598 COSTS_N_INSNS (2), /* compare. */
1599 COSTS_N_INSNS (4), /* widen. */
1600 COSTS_N_INSNS (4), /* narrow. */
1601 COSTS_N_INSNS (4), /* toint. */
1602 COSTS_N_INSNS (4), /* fromint. */
1603 COSTS_N_INSNS (4) /* roundint. */
1606 /* Vector */
1608 COSTS_N_INSNS (1) /* alu. */
1612 const struct cpu_cost_table v7m_extra_costs =
1614 /* ALU */
1616 0, /* arith. */
1617 0, /* logical. */
1618 0, /* shift. */
1619 0, /* shift_reg. */
1620 0, /* arith_shift. */
1621 COSTS_N_INSNS (1), /* arith_shift_reg. */
1622 0, /* log_shift. */
1623 COSTS_N_INSNS (1), /* log_shift_reg. */
1624 0, /* extend. */
1625 COSTS_N_INSNS (1), /* extend_arith. */
1626 0, /* bfi. */
1627 0, /* bfx. */
1628 0, /* clz. */
1629 0, /* rev. */
1630 COSTS_N_INSNS (1), /* non_exec. */
1631 false /* non_exec_costs_exec. */
1634 /* MULT SImode */
1636 COSTS_N_INSNS (1), /* simple. */
1637 COSTS_N_INSNS (1), /* flag_setting. */
1638 COSTS_N_INSNS (2), /* extend. */
1639 COSTS_N_INSNS (1), /* add. */
1640 COSTS_N_INSNS (3), /* extend_add. */
1641 COSTS_N_INSNS (8) /* idiv. */
1643 /* MULT DImode */
1645 0, /* simple (N/A). */
1646 0, /* flag_setting (N/A). */
1647 COSTS_N_INSNS (2), /* extend. */
1648 0, /* add (N/A). */
1649 COSTS_N_INSNS (3), /* extend_add. */
1650 0 /* idiv (N/A). */
1653 /* LD/ST */
1655 COSTS_N_INSNS (2), /* load. */
1656 0, /* load_sign_extend. */
1657 COSTS_N_INSNS (3), /* ldrd. */
1658 COSTS_N_INSNS (2), /* ldm_1st. */
1659 1, /* ldm_regs_per_insn_1st. */
1660 1, /* ldm_regs_per_insn_subsequent. */
1661 COSTS_N_INSNS (2), /* loadf. */
1662 COSTS_N_INSNS (3), /* loadd. */
1663 COSTS_N_INSNS (1), /* load_unaligned. */
1664 COSTS_N_INSNS (2), /* store. */
1665 COSTS_N_INSNS (3), /* strd. */
1666 COSTS_N_INSNS (2), /* stm_1st. */
1667 1, /* stm_regs_per_insn_1st. */
1668 1, /* stm_regs_per_insn_subsequent. */
1669 COSTS_N_INSNS (2), /* storef. */
1670 COSTS_N_INSNS (3), /* stored. */
1671 COSTS_N_INSNS (1) /* store_unaligned. */
1674 /* FP SFmode */
1676 COSTS_N_INSNS (7), /* div. */
1677 COSTS_N_INSNS (2), /* mult. */
1678 COSTS_N_INSNS (5), /* mult_addsub. */
1679 COSTS_N_INSNS (3), /* fma. */
1680 COSTS_N_INSNS (1), /* addsub. */
1681 0, /* fpconst. */
1682 0, /* neg. */
1683 0, /* compare. */
1684 0, /* widen. */
1685 0, /* narrow. */
1686 0, /* toint. */
1687 0, /* fromint. */
1688 0 /* roundint. */
1690 /* FP DFmode */
1692 COSTS_N_INSNS (15), /* div. */
1693 COSTS_N_INSNS (5), /* mult. */
1694 COSTS_N_INSNS (7), /* mult_addsub. */
1695 COSTS_N_INSNS (7), /* fma. */
1696 COSTS_N_INSNS (3), /* addsub. */
1697 0, /* fpconst. */
1698 0, /* neg. */
1699 0, /* compare. */
1700 0, /* widen. */
1701 0, /* narrow. */
1702 0, /* toint. */
1703 0, /* fromint. */
1704 0 /* roundint. */
1707 /* Vector */
1709 COSTS_N_INSNS (1) /* alu. */
1713 const struct tune_params arm_slowmul_tune =
1715 arm_slowmul_rtx_costs,
1716 NULL,
1717 NULL, /* Sched adj cost. */
1718 3, /* Constant limit. */
1719 5, /* Max cond insns. */
1720 ARM_PREFETCH_NOT_BENEFICIAL,
1721 true, /* Prefer constant pool. */
1722 arm_default_branch_cost,
1723 false, /* Prefer LDRD/STRD. */
1724 {true, true}, /* Prefer non short circuit. */
1725 &arm_default_vec_cost, /* Vectorizer costs. */
1726 false, /* Prefer Neon for 64-bits bitops. */
1727 false, false, /* Prefer 32-bit encodings. */
1728 false, /* Prefer Neon for stringops. */
1729 8 /* Maximum insns to inline memset. */
1732 const struct tune_params arm_fastmul_tune =
1734 arm_fastmul_rtx_costs,
1735 NULL,
1736 NULL, /* Sched adj cost. */
1737 1, /* Constant limit. */
1738 5, /* Max cond insns. */
1739 ARM_PREFETCH_NOT_BENEFICIAL,
1740 true, /* Prefer constant pool. */
1741 arm_default_branch_cost,
1742 false, /* Prefer LDRD/STRD. */
1743 {true, true}, /* Prefer non short circuit. */
1744 &arm_default_vec_cost, /* Vectorizer costs. */
1745 false, /* Prefer Neon for 64-bits bitops. */
1746 false, false, /* Prefer 32-bit encodings. */
1747 false, /* Prefer Neon for stringops. */
1748 8 /* Maximum insns to inline memset. */
1751 /* StrongARM has early execution of branches, so a sequence that is worth
1752 skipping is shorter. Set max_insns_skipped to a lower value. */
1754 const struct tune_params arm_strongarm_tune =
1756 arm_fastmul_rtx_costs,
1757 NULL,
1758 NULL, /* Sched adj cost. */
1759 1, /* Constant limit. */
1760 3, /* Max cond insns. */
1761 ARM_PREFETCH_NOT_BENEFICIAL,
1762 true, /* Prefer constant pool. */
1763 arm_default_branch_cost,
1764 false, /* Prefer LDRD/STRD. */
1765 {true, true}, /* Prefer non short circuit. */
1766 &arm_default_vec_cost, /* Vectorizer costs. */
1767 false, /* Prefer Neon for 64-bits bitops. */
1768 false, false, /* Prefer 32-bit encodings. */
1769 false, /* Prefer Neon for stringops. */
1770 8 /* Maximum insns to inline memset. */
1773 const struct tune_params arm_xscale_tune =
1775 arm_xscale_rtx_costs,
1776 NULL,
1777 xscale_sched_adjust_cost,
1778 2, /* Constant limit. */
1779 3, /* Max cond insns. */
1780 ARM_PREFETCH_NOT_BENEFICIAL,
1781 true, /* Prefer constant pool. */
1782 arm_default_branch_cost,
1783 false, /* Prefer LDRD/STRD. */
1784 {true, true}, /* Prefer non short circuit. */
1785 &arm_default_vec_cost, /* Vectorizer costs. */
1786 false, /* Prefer Neon for 64-bits bitops. */
1787 false, false, /* Prefer 32-bit encodings. */
1788 false, /* Prefer Neon for stringops. */
1789 8 /* Maximum insns to inline memset. */
1792 const struct tune_params arm_9e_tune =
1794 arm_9e_rtx_costs,
1795 NULL,
1796 NULL, /* Sched adj cost. */
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 ARM_PREFETCH_NOT_BENEFICIAL,
1800 true, /* Prefer constant pool. */
1801 arm_default_branch_cost,
1802 false, /* Prefer LDRD/STRD. */
1803 {true, true}, /* Prefer non short circuit. */
1804 &arm_default_vec_cost, /* Vectorizer costs. */
1805 false, /* Prefer Neon for 64-bits bitops. */
1806 false, false, /* Prefer 32-bit encodings. */
1807 false, /* Prefer Neon for stringops. */
1808 8 /* Maximum insns to inline memset. */
1811 const struct tune_params arm_v6t2_tune =
1813 arm_9e_rtx_costs,
1814 NULL,
1815 NULL, /* Sched adj cost. */
1816 1, /* Constant limit. */
1817 5, /* Max cond insns. */
1818 ARM_PREFETCH_NOT_BENEFICIAL,
1819 false, /* Prefer constant pool. */
1820 arm_default_branch_cost,
1821 false, /* Prefer LDRD/STRD. */
1822 {true, true}, /* Prefer non short circuit. */
1823 &arm_default_vec_cost, /* Vectorizer costs. */
1824 false, /* Prefer Neon for 64-bits bitops. */
1825 false, false, /* Prefer 32-bit encodings. */
1826 false, /* Prefer Neon for stringops. */
1827 8 /* Maximum insns to inline memset. */
1830 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1831 const struct tune_params arm_cortex_tune =
1833 arm_9e_rtx_costs,
1834 &generic_extra_costs,
1835 NULL, /* Sched adj cost. */
1836 1, /* Constant limit. */
1837 5, /* Max cond insns. */
1838 ARM_PREFETCH_NOT_BENEFICIAL,
1839 false, /* Prefer constant pool. */
1840 arm_default_branch_cost,
1841 false, /* Prefer LDRD/STRD. */
1842 {true, true}, /* Prefer non short circuit. */
1843 &arm_default_vec_cost, /* Vectorizer costs. */
1844 false, /* Prefer Neon for 64-bits bitops. */
1845 false, false, /* Prefer 32-bit encodings. */
1846 false, /* Prefer Neon for stringops. */
1847 8 /* Maximum insns to inline memset. */
1850 const struct tune_params arm_cortex_a8_tune =
1852 arm_9e_rtx_costs,
1853 &cortexa8_extra_costs,
1854 NULL, /* Sched adj cost. */
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 ARM_PREFETCH_NOT_BENEFICIAL,
1858 false, /* Prefer constant pool. */
1859 arm_default_branch_cost,
1860 false, /* Prefer LDRD/STRD. */
1861 {true, true}, /* Prefer non short circuit. */
1862 &arm_default_vec_cost, /* Vectorizer costs. */
1863 false, /* Prefer Neon for 64-bits bitops. */
1864 false, false, /* Prefer 32-bit encodings. */
1865 true, /* Prefer Neon for stringops. */
1866 8 /* Maximum insns to inline memset. */
1869 const struct tune_params arm_cortex_a7_tune =
1871 arm_9e_rtx_costs,
1872 &cortexa7_extra_costs,
1873 NULL,
1874 1, /* Constant limit. */
1875 5, /* Max cond insns. */
1876 ARM_PREFETCH_NOT_BENEFICIAL,
1877 false, /* Prefer constant pool. */
1878 arm_default_branch_cost,
1879 false, /* Prefer LDRD/STRD. */
1880 {true, true}, /* Prefer non short circuit. */
1881 &arm_default_vec_cost, /* Vectorizer costs. */
1882 false, /* Prefer Neon for 64-bits bitops. */
1883 false, false, /* Prefer 32-bit encodings. */
1884 true, /* Prefer Neon for stringops. */
1885 8 /* Maximum insns to inline memset. */
1888 const struct tune_params arm_cortex_a15_tune =
1890 arm_9e_rtx_costs,
1891 &cortexa15_extra_costs,
1892 NULL, /* Sched adj cost. */
1893 1, /* Constant limit. */
1894 2, /* Max cond insns. */
1895 ARM_PREFETCH_NOT_BENEFICIAL,
1896 false, /* Prefer constant pool. */
1897 arm_default_branch_cost,
1898 true, /* Prefer LDRD/STRD. */
1899 {true, true}, /* Prefer non short circuit. */
1900 &arm_default_vec_cost, /* Vectorizer costs. */
1901 false, /* Prefer Neon for 64-bits bitops. */
1902 true, true, /* Prefer 32-bit encodings. */
1903 true, /* Prefer Neon for stringops. */
1904 8 /* Maximum insns to inline memset. */
1907 const struct tune_params arm_cortex_a53_tune =
1909 arm_9e_rtx_costs,
1910 &cortexa53_extra_costs,
1911 NULL, /* Scheduler cost adjustment. */
1912 1, /* Constant limit. */
1913 5, /* Max cond insns. */
1914 ARM_PREFETCH_NOT_BENEFICIAL,
1915 false, /* Prefer constant pool. */
1916 arm_default_branch_cost,
1917 false, /* Prefer LDRD/STRD. */
1918 {true, true}, /* Prefer non short circuit. */
1919 &arm_default_vec_cost, /* Vectorizer costs. */
1920 false, /* Prefer Neon for 64-bits bitops. */
1921 false, false, /* Prefer 32-bit encodings. */
1922 false, /* Prefer Neon for stringops. */
1923 8 /* Maximum insns to inline memset. */
1926 const struct tune_params arm_cortex_a57_tune =
1928 arm_9e_rtx_costs,
1929 &cortexa57_extra_costs,
1930 NULL, /* Scheduler cost adjustment. */
1931 1, /* Constant limit. */
1932 2, /* Max cond insns. */
1933 ARM_PREFETCH_NOT_BENEFICIAL,
1934 false, /* Prefer constant pool. */
1935 arm_default_branch_cost,
1936 true, /* Prefer LDRD/STRD. */
1937 {true, true}, /* Prefer non short circuit. */
1938 &arm_default_vec_cost, /* Vectorizer costs. */
1939 false, /* Prefer Neon for 64-bits bitops. */
1940 true, true, /* Prefer 32-bit encodings. */
1941 false, /* Prefer Neon for stringops. */
1942 8 /* Maximum insns to inline memset. */
1945 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1946 less appealing. Set max_insns_skipped to a low value. */
1948 const struct tune_params arm_cortex_a5_tune =
1950 arm_9e_rtx_costs,
1951 &cortexa5_extra_costs,
1952 NULL, /* Sched adj cost. */
1953 1, /* Constant limit. */
1954 1, /* Max cond insns. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 false, /* Prefer constant pool. */
1957 arm_cortex_a5_branch_cost,
1958 false, /* Prefer LDRD/STRD. */
1959 {false, false}, /* Prefer non short circuit. */
1960 &arm_default_vec_cost, /* Vectorizer costs. */
1961 false, /* Prefer Neon for 64-bits bitops. */
1962 false, false, /* Prefer 32-bit encodings. */
1963 true, /* Prefer Neon for stringops. */
1964 8 /* Maximum insns to inline memset. */
1967 const struct tune_params arm_cortex_a9_tune =
1969 arm_9e_rtx_costs,
1970 &cortexa9_extra_costs,
1971 cortex_a9_sched_adjust_cost,
1972 1, /* Constant limit. */
1973 5, /* Max cond insns. */
1974 ARM_PREFETCH_BENEFICIAL(4,32,32),
1975 false, /* Prefer constant pool. */
1976 arm_default_branch_cost,
1977 false, /* Prefer LDRD/STRD. */
1978 {true, true}, /* Prefer non short circuit. */
1979 &arm_default_vec_cost, /* Vectorizer costs. */
1980 false, /* Prefer Neon for 64-bits bitops. */
1981 false, false, /* Prefer 32-bit encodings. */
1982 false, /* Prefer Neon for stringops. */
1983 8 /* Maximum insns to inline memset. */
1986 const struct tune_params arm_cortex_a12_tune =
1988 arm_9e_rtx_costs,
1989 &cortexa12_extra_costs,
1990 NULL,
1991 1, /* Constant limit. */
1992 5, /* Max cond insns. */
1993 ARM_PREFETCH_BENEFICIAL(4,32,32),
1994 false, /* Prefer constant pool. */
1995 arm_default_branch_cost,
1996 true, /* Prefer LDRD/STRD. */
1997 {true, true}, /* Prefer non short circuit. */
1998 &arm_default_vec_cost, /* Vectorizer costs. */
1999 false, /* Prefer Neon for 64-bits bitops. */
2000 false, false, /* Prefer 32-bit encodings. */
2001 true, /* Prefer Neon for stringops. */
2002 8 /* Maximum insns to inline memset. */
2005 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2006 cycle to execute each. An LDR from the constant pool also takes two cycles
2007 to execute, but mildly increases pipelining opportunity (consecutive
2008 loads/stores can be pipelined together, saving one cycle), and may also
2009 improve icache utilisation. Hence we prefer the constant pool for such
2010 processors. */
2012 const struct tune_params arm_v7m_tune =
2014 arm_9e_rtx_costs,
2015 &v7m_extra_costs,
2016 NULL, /* Sched adj cost. */
2017 1, /* Constant limit. */
2018 2, /* Max cond insns. */
2019 ARM_PREFETCH_NOT_BENEFICIAL,
2020 true, /* Prefer constant pool. */
2021 arm_cortex_m_branch_cost,
2022 false, /* Prefer LDRD/STRD. */
2023 {false, false}, /* Prefer non short circuit. */
2024 &arm_default_vec_cost, /* Vectorizer costs. */
2025 false, /* Prefer Neon for 64-bits bitops. */
2026 false, false, /* Prefer 32-bit encodings. */
2027 false, /* Prefer Neon for stringops. */
2028 8 /* Maximum insns to inline memset. */
2031 /* Cortex-M7 tuning. */
2033 const struct tune_params arm_cortex_m7_tune =
2035 arm_9e_rtx_costs,
2036 &v7m_extra_costs,
2037 NULL, /* Sched adj cost. */
2038 0, /* Constant limit. */
2039 0, /* Max cond insns. */
2040 ARM_PREFETCH_NOT_BENEFICIAL,
2041 true, /* Prefer constant pool. */
2042 arm_cortex_m_branch_cost,
2043 false, /* Prefer LDRD/STRD. */
2044 {true, true}, /* Prefer non short circuit. */
2045 &arm_default_vec_cost, /* Vectorizer costs. */
2046 false, /* Prefer Neon for 64-bits bitops. */
2047 false, false, /* Prefer 32-bit encodings. */
2048 false, /* Prefer Neon for stringops. */
2049 8 /* Maximum insns to inline memset. */
2052 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2053 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2054 const struct tune_params arm_v6m_tune =
2056 arm_9e_rtx_costs,
2057 NULL,
2058 NULL, /* Sched adj cost. */
2059 1, /* Constant limit. */
2060 5, /* Max cond insns. */
2061 ARM_PREFETCH_NOT_BENEFICIAL,
2062 false, /* Prefer constant pool. */
2063 arm_default_branch_cost,
2064 false, /* Prefer LDRD/STRD. */
2065 {false, false}, /* Prefer non short circuit. */
2066 &arm_default_vec_cost, /* Vectorizer costs. */
2067 false, /* Prefer Neon for 64-bits bitops. */
2068 false, false, /* Prefer 32-bit encodings. */
2069 false, /* Prefer Neon for stringops. */
2070 8 /* Maximum insns to inline memset. */
2073 const struct tune_params arm_fa726te_tune =
2075 arm_9e_rtx_costs,
2076 NULL,
2077 fa726te_sched_adjust_cost,
2078 1, /* Constant limit. */
2079 5, /* Max cond insns. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 true, /* Prefer constant pool. */
2082 arm_default_branch_cost,
2083 false, /* Prefer LDRD/STRD. */
2084 {true, true}, /* Prefer non short circuit. */
2085 &arm_default_vec_cost, /* Vectorizer costs. */
2086 false, /* Prefer Neon for 64-bits bitops. */
2087 false, false, /* Prefer 32-bit encodings. */
2088 false, /* Prefer Neon for stringops. */
2089 8 /* Maximum insns to inline memset. */
2093 /* Not all of these give usefully different compilation alternatives,
2094 but there is no simple way of generalizing them. */
2095 static const struct processors all_cores[] =
2097 /* ARM Cores */
2098 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2099 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2100 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2101 #include "arm-cores.def"
2102 #undef ARM_CORE
2103 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2106 static const struct processors all_architectures[] =
2108 /* ARM Architectures */
2109 /* We don't specify tuning costs here as it will be figured out
2110 from the core. */
2112 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2113 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2114 #include "arm-arches.def"
2115 #undef ARM_ARCH
2116 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2120 /* These are populated as commandline arguments are processed, or NULL
2121 if not specified. */
2122 static const struct processors *arm_selected_arch;
2123 static const struct processors *arm_selected_cpu;
2124 static const struct processors *arm_selected_tune;
2126 /* The name of the preprocessor macro to define for this architecture. */
2128 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2130 /* Available values for -mfpu=. */
2132 static const struct arm_fpu_desc all_fpus[] =
2134 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2135 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2136 #include "arm-fpus.def"
2137 #undef ARM_FPU
2141 /* Supported TLS relocations. */
2143 enum tls_reloc {
2144 TLS_GD32,
2145 TLS_LDM32,
2146 TLS_LDO32,
2147 TLS_IE32,
2148 TLS_LE32,
2149 TLS_DESCSEQ /* GNU scheme */
2152 /* The maximum number of insns to be used when loading a constant. */
2153 inline static int
2154 arm_constant_limit (bool size_p)
2156 return size_p ? 1 : current_tune->constant_limit;
2159 /* Emit an insn that's a simple single-set. Both the operands must be known
2160 to be valid. */
2161 inline static rtx_insn *
2162 emit_set_insn (rtx x, rtx y)
2164 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2167 /* Return the number of bits set in VALUE. */
2168 static unsigned
2169 bit_count (unsigned long value)
2171 unsigned long count = 0;
2173 while (value)
2175 count++;
2176 value &= value - 1; /* Clear the least-significant set bit. */
2179 return count;
2182 typedef struct
2184 machine_mode mode;
2185 const char *name;
2186 } arm_fixed_mode_set;
2188 /* A small helper for setting fixed-point library libfuncs. */
2190 static void
2191 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2192 const char *funcname, const char *modename,
2193 int num_suffix)
2195 char buffer[50];
2197 if (num_suffix == 0)
2198 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2199 else
2200 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2202 set_optab_libfunc (optable, mode, buffer);
2205 static void
2206 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2207 machine_mode from, const char *funcname,
2208 const char *toname, const char *fromname)
2210 char buffer[50];
2211 const char *maybe_suffix_2 = "";
2213 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2214 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2215 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2216 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2217 maybe_suffix_2 = "2";
2219 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2220 maybe_suffix_2);
2222 set_conv_libfunc (optable, to, from, buffer);
2225 /* Set up library functions unique to ARM. */
2227 static void
2228 arm_init_libfuncs (void)
2230 /* For Linux, we have access to kernel support for atomic operations. */
2231 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2232 init_sync_libfuncs (2 * UNITS_PER_WORD);
2234 /* There are no special library functions unless we are using the
2235 ARM BPABI. */
2236 if (!TARGET_BPABI)
2237 return;
2239 /* The functions below are described in Section 4 of the "Run-Time
2240 ABI for the ARM architecture", Version 1.0. */
2242 /* Double-precision floating-point arithmetic. Table 2. */
2243 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2244 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2245 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2246 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2247 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2249 /* Double-precision comparisons. Table 3. */
2250 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2251 set_optab_libfunc (ne_optab, DFmode, NULL);
2252 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2253 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2254 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2255 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2256 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2258 /* Single-precision floating-point arithmetic. Table 4. */
2259 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2260 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2261 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2262 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2263 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2265 /* Single-precision comparisons. Table 5. */
2266 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2267 set_optab_libfunc (ne_optab, SFmode, NULL);
2268 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2269 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2270 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2271 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2272 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2274 /* Floating-point to integer conversions. Table 6. */
2275 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2276 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2277 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2278 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2279 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2280 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2281 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2282 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2284 /* Conversions between floating types. Table 7. */
2285 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2286 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2288 /* Integer to floating-point conversions. Table 8. */
2289 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2290 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2291 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2292 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2293 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2294 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2295 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2296 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2298 /* Long long. Table 9. */
2299 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2300 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2301 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2302 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2303 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2304 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2305 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2306 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2308 /* Integer (32/32->32) division. \S 4.3.1. */
2309 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2310 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2312 /* The divmod functions are designed so that they can be used for
2313 plain division, even though they return both the quotient and the
2314 remainder. The quotient is returned in the usual location (i.e.,
2315 r0 for SImode, {r0, r1} for DImode), just as would be expected
2316 for an ordinary division routine. Because the AAPCS calling
2317 conventions specify that all of { r0, r1, r2, r3 } are
2318 callee-saved registers, there is no need to tell the compiler
2319 explicitly that those registers are clobbered by these
2320 routines. */
2321 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2322 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2324 /* For SImode division the ABI provides div-without-mod routines,
2325 which are faster. */
2326 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2327 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2329 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2330 divmod libcalls instead. */
2331 set_optab_libfunc (smod_optab, DImode, NULL);
2332 set_optab_libfunc (umod_optab, DImode, NULL);
2333 set_optab_libfunc (smod_optab, SImode, NULL);
2334 set_optab_libfunc (umod_optab, SImode, NULL);
2336 /* Half-precision float operations. The compiler handles all operations
2337 with NULL libfuncs by converting the SFmode. */
2338 switch (arm_fp16_format)
2340 case ARM_FP16_FORMAT_IEEE:
2341 case ARM_FP16_FORMAT_ALTERNATIVE:
2343 /* Conversions. */
2344 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2345 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2346 ? "__gnu_f2h_ieee"
2347 : "__gnu_f2h_alternative"));
2348 set_conv_libfunc (sext_optab, SFmode, HFmode,
2349 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2350 ? "__gnu_h2f_ieee"
2351 : "__gnu_h2f_alternative"));
2353 /* Arithmetic. */
2354 set_optab_libfunc (add_optab, HFmode, NULL);
2355 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2356 set_optab_libfunc (smul_optab, HFmode, NULL);
2357 set_optab_libfunc (neg_optab, HFmode, NULL);
2358 set_optab_libfunc (sub_optab, HFmode, NULL);
2360 /* Comparisons. */
2361 set_optab_libfunc (eq_optab, HFmode, NULL);
2362 set_optab_libfunc (ne_optab, HFmode, NULL);
2363 set_optab_libfunc (lt_optab, HFmode, NULL);
2364 set_optab_libfunc (le_optab, HFmode, NULL);
2365 set_optab_libfunc (ge_optab, HFmode, NULL);
2366 set_optab_libfunc (gt_optab, HFmode, NULL);
2367 set_optab_libfunc (unord_optab, HFmode, NULL);
2368 break;
2370 default:
2371 break;
2374 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2376 const arm_fixed_mode_set fixed_arith_modes[] =
2378 { QQmode, "qq" },
2379 { UQQmode, "uqq" },
2380 { HQmode, "hq" },
2381 { UHQmode, "uhq" },
2382 { SQmode, "sq" },
2383 { USQmode, "usq" },
2384 { DQmode, "dq" },
2385 { UDQmode, "udq" },
2386 { TQmode, "tq" },
2387 { UTQmode, "utq" },
2388 { HAmode, "ha" },
2389 { UHAmode, "uha" },
2390 { SAmode, "sa" },
2391 { USAmode, "usa" },
2392 { DAmode, "da" },
2393 { UDAmode, "uda" },
2394 { TAmode, "ta" },
2395 { UTAmode, "uta" }
2397 const arm_fixed_mode_set fixed_conv_modes[] =
2399 { QQmode, "qq" },
2400 { UQQmode, "uqq" },
2401 { HQmode, "hq" },
2402 { UHQmode, "uhq" },
2403 { SQmode, "sq" },
2404 { USQmode, "usq" },
2405 { DQmode, "dq" },
2406 { UDQmode, "udq" },
2407 { TQmode, "tq" },
2408 { UTQmode, "utq" },
2409 { HAmode, "ha" },
2410 { UHAmode, "uha" },
2411 { SAmode, "sa" },
2412 { USAmode, "usa" },
2413 { DAmode, "da" },
2414 { UDAmode, "uda" },
2415 { TAmode, "ta" },
2416 { UTAmode, "uta" },
2417 { QImode, "qi" },
2418 { HImode, "hi" },
2419 { SImode, "si" },
2420 { DImode, "di" },
2421 { TImode, "ti" },
2422 { SFmode, "sf" },
2423 { DFmode, "df" }
2425 unsigned int i, j;
2427 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2429 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2430 "add", fixed_arith_modes[i].name, 3);
2431 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2432 "ssadd", fixed_arith_modes[i].name, 3);
2433 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2434 "usadd", fixed_arith_modes[i].name, 3);
2435 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2436 "sub", fixed_arith_modes[i].name, 3);
2437 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2438 "sssub", fixed_arith_modes[i].name, 3);
2439 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2440 "ussub", fixed_arith_modes[i].name, 3);
2441 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2442 "mul", fixed_arith_modes[i].name, 3);
2443 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2444 "ssmul", fixed_arith_modes[i].name, 3);
2445 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2446 "usmul", fixed_arith_modes[i].name, 3);
2447 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2448 "div", fixed_arith_modes[i].name, 3);
2449 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2450 "udiv", fixed_arith_modes[i].name, 3);
2451 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2452 "ssdiv", fixed_arith_modes[i].name, 3);
2453 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2454 "usdiv", fixed_arith_modes[i].name, 3);
2455 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2456 "neg", fixed_arith_modes[i].name, 2);
2457 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2458 "ssneg", fixed_arith_modes[i].name, 2);
2459 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2460 "usneg", fixed_arith_modes[i].name, 2);
2461 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2462 "ashl", fixed_arith_modes[i].name, 3);
2463 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2464 "ashr", fixed_arith_modes[i].name, 3);
2465 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2466 "lshr", fixed_arith_modes[i].name, 3);
2467 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2468 "ssashl", fixed_arith_modes[i].name, 3);
2469 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2470 "usashl", fixed_arith_modes[i].name, 3);
2471 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2472 "cmp", fixed_arith_modes[i].name, 2);
2475 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2476 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2478 if (i == j
2479 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2480 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2481 continue;
2483 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2484 fixed_conv_modes[j].mode, "fract",
2485 fixed_conv_modes[i].name,
2486 fixed_conv_modes[j].name);
2487 arm_set_fixed_conv_libfunc (satfract_optab,
2488 fixed_conv_modes[i].mode,
2489 fixed_conv_modes[j].mode, "satfract",
2490 fixed_conv_modes[i].name,
2491 fixed_conv_modes[j].name);
2492 arm_set_fixed_conv_libfunc (fractuns_optab,
2493 fixed_conv_modes[i].mode,
2494 fixed_conv_modes[j].mode, "fractuns",
2495 fixed_conv_modes[i].name,
2496 fixed_conv_modes[j].name);
2497 arm_set_fixed_conv_libfunc (satfractuns_optab,
2498 fixed_conv_modes[i].mode,
2499 fixed_conv_modes[j].mode, "satfractuns",
2500 fixed_conv_modes[i].name,
2501 fixed_conv_modes[j].name);
2505 if (TARGET_AAPCS_BASED)
2506 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2509 /* On AAPCS systems, this is the "struct __va_list". */
2510 static GTY(()) tree va_list_type;
2512 /* Return the type to use as __builtin_va_list. */
2513 static tree
2514 arm_build_builtin_va_list (void)
2516 tree va_list_name;
2517 tree ap_field;
2519 if (!TARGET_AAPCS_BASED)
2520 return std_build_builtin_va_list ();
2522 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2523 defined as:
2525 struct __va_list
2527 void *__ap;
2530 The C Library ABI further reinforces this definition in \S
2531 4.1.
2533 We must follow this definition exactly. The structure tag
2534 name is visible in C++ mangled names, and thus forms a part
2535 of the ABI. The field name may be used by people who
2536 #include <stdarg.h>. */
2537 /* Create the type. */
2538 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2539 /* Give it the required name. */
2540 va_list_name = build_decl (BUILTINS_LOCATION,
2541 TYPE_DECL,
2542 get_identifier ("__va_list"),
2543 va_list_type);
2544 DECL_ARTIFICIAL (va_list_name) = 1;
2545 TYPE_NAME (va_list_type) = va_list_name;
2546 TYPE_STUB_DECL (va_list_type) = va_list_name;
2547 /* Create the __ap field. */
2548 ap_field = build_decl (BUILTINS_LOCATION,
2549 FIELD_DECL,
2550 get_identifier ("__ap"),
2551 ptr_type_node);
2552 DECL_ARTIFICIAL (ap_field) = 1;
2553 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2554 TYPE_FIELDS (va_list_type) = ap_field;
2555 /* Compute its layout. */
2556 layout_type (va_list_type);
2558 return va_list_type;
2561 /* Return an expression of type "void *" pointing to the next
2562 available argument in a variable-argument list. VALIST is the
2563 user-level va_list object, of type __builtin_va_list. */
2564 static tree
2565 arm_extract_valist_ptr (tree valist)
2567 if (TREE_TYPE (valist) == error_mark_node)
2568 return error_mark_node;
2570 /* On an AAPCS target, the pointer is stored within "struct
2571 va_list". */
2572 if (TARGET_AAPCS_BASED)
2574 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2575 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2576 valist, ap_field, NULL_TREE);
2579 return valist;
2582 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2583 static void
2584 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2586 valist = arm_extract_valist_ptr (valist);
2587 std_expand_builtin_va_start (valist, nextarg);
2590 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2591 static tree
2592 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2593 gimple_seq *post_p)
2595 valist = arm_extract_valist_ptr (valist);
2596 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2599 /* Fix up any incompatible options that the user has specified. */
2600 static void
2601 arm_option_override (void)
2603 if (global_options_set.x_arm_arch_option)
2604 arm_selected_arch = &all_architectures[arm_arch_option];
2606 if (global_options_set.x_arm_cpu_option)
2608 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2609 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2612 if (global_options_set.x_arm_tune_option)
2613 arm_selected_tune = &all_cores[(int) arm_tune_option];
2615 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2616 SUBTARGET_OVERRIDE_OPTIONS;
2617 #endif
2619 if (arm_selected_arch)
2621 if (arm_selected_cpu)
2623 /* Check for conflict between mcpu and march. */
2624 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2626 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2627 arm_selected_cpu->name, arm_selected_arch->name);
2628 /* -march wins for code generation.
2629 -mcpu wins for default tuning. */
2630 if (!arm_selected_tune)
2631 arm_selected_tune = arm_selected_cpu;
2633 arm_selected_cpu = arm_selected_arch;
2635 else
2636 /* -mcpu wins. */
2637 arm_selected_arch = NULL;
2639 else
2640 /* Pick a CPU based on the architecture. */
2641 arm_selected_cpu = arm_selected_arch;
2644 /* If the user did not specify a processor, choose one for them. */
2645 if (!arm_selected_cpu)
2647 const struct processors * sel;
2648 unsigned int sought;
2650 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2651 if (!arm_selected_cpu->name)
2653 #ifdef SUBTARGET_CPU_DEFAULT
2654 /* Use the subtarget default CPU if none was specified by
2655 configure. */
2656 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2657 #endif
2658 /* Default to ARM6. */
2659 if (!arm_selected_cpu->name)
2660 arm_selected_cpu = &all_cores[arm6];
2663 sel = arm_selected_cpu;
2664 insn_flags = sel->flags;
2666 /* Now check to see if the user has specified some command line
2667 switch that require certain abilities from the cpu. */
2668 sought = 0;
2670 if (TARGET_INTERWORK || TARGET_THUMB)
2672 sought |= (FL_THUMB | FL_MODE32);
2674 /* There are no ARM processors that support both APCS-26 and
2675 interworking. Therefore we force FL_MODE26 to be removed
2676 from insn_flags here (if it was set), so that the search
2677 below will always be able to find a compatible processor. */
2678 insn_flags &= ~FL_MODE26;
2681 if (sought != 0 && ((sought & insn_flags) != sought))
2683 /* Try to locate a CPU type that supports all of the abilities
2684 of the default CPU, plus the extra abilities requested by
2685 the user. */
2686 for (sel = all_cores; sel->name != NULL; sel++)
2687 if ((sel->flags & sought) == (sought | insn_flags))
2688 break;
2690 if (sel->name == NULL)
2692 unsigned current_bit_count = 0;
2693 const struct processors * best_fit = NULL;
2695 /* Ideally we would like to issue an error message here
2696 saying that it was not possible to find a CPU compatible
2697 with the default CPU, but which also supports the command
2698 line options specified by the programmer, and so they
2699 ought to use the -mcpu=<name> command line option to
2700 override the default CPU type.
2702 If we cannot find a cpu that has both the
2703 characteristics of the default cpu and the given
2704 command line options we scan the array again looking
2705 for a best match. */
2706 for (sel = all_cores; sel->name != NULL; sel++)
2707 if ((sel->flags & sought) == sought)
2709 unsigned count;
2711 count = bit_count (sel->flags & insn_flags);
2713 if (count >= current_bit_count)
2715 best_fit = sel;
2716 current_bit_count = count;
2720 gcc_assert (best_fit);
2721 sel = best_fit;
2724 arm_selected_cpu = sel;
2728 gcc_assert (arm_selected_cpu);
2729 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2730 if (!arm_selected_tune)
2731 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2733 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2734 insn_flags = arm_selected_cpu->flags;
2735 arm_base_arch = arm_selected_cpu->base_arch;
2737 arm_tune = arm_selected_tune->core;
2738 tune_flags = arm_selected_tune->flags;
2739 current_tune = arm_selected_tune->tune;
2741 /* Make sure that the processor choice does not conflict with any of the
2742 other command line choices. */
2743 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2744 error ("target CPU does not support ARM mode");
2746 /* BPABI targets use linker tricks to allow interworking on cores
2747 without thumb support. */
2748 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2750 warning (0, "target CPU does not support interworking" );
2751 target_flags &= ~MASK_INTERWORK;
2754 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2756 warning (0, "target CPU does not support THUMB instructions");
2757 target_flags &= ~MASK_THUMB;
2760 if (TARGET_APCS_FRAME && TARGET_THUMB)
2762 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2763 target_flags &= ~MASK_APCS_FRAME;
2766 /* Callee super interworking implies thumb interworking. Adding
2767 this to the flags here simplifies the logic elsewhere. */
2768 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2769 target_flags |= MASK_INTERWORK;
2771 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2772 from here where no function is being compiled currently. */
2773 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2774 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2776 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2777 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2779 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2781 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2782 target_flags |= MASK_APCS_FRAME;
2785 if (TARGET_POKE_FUNCTION_NAME)
2786 target_flags |= MASK_APCS_FRAME;
2788 if (TARGET_APCS_REENT && flag_pic)
2789 error ("-fpic and -mapcs-reent are incompatible");
2791 if (TARGET_APCS_REENT)
2792 warning (0, "APCS reentrant code not supported. Ignored");
2794 /* If this target is normally configured to use APCS frames, warn if they
2795 are turned off and debugging is turned on. */
2796 if (TARGET_ARM
2797 && write_symbols != NO_DEBUG
2798 && !TARGET_APCS_FRAME
2799 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2800 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2802 if (TARGET_APCS_FLOAT)
2803 warning (0, "passing floating point arguments in fp regs not yet supported");
2805 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2806 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2807 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2808 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2809 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2810 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2811 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2812 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2813 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2814 arm_arch6m = arm_arch6 && !arm_arch_notm;
2815 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2816 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2817 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2818 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2819 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2821 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2822 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2823 thumb_code = TARGET_ARM == 0;
2824 thumb1_code = TARGET_THUMB1 != 0;
2825 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2826 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2827 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2828 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2829 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2830 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2831 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2832 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2833 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2834 if (arm_restrict_it == 2)
2835 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2837 if (!TARGET_THUMB2)
2838 arm_restrict_it = 0;
2840 /* If we are not using the default (ARM mode) section anchor offset
2841 ranges, then set the correct ranges now. */
2842 if (TARGET_THUMB1)
2844 /* Thumb-1 LDR instructions cannot have negative offsets.
2845 Permissible positive offset ranges are 5-bit (for byte loads),
2846 6-bit (for halfword loads), or 7-bit (for word loads).
2847 Empirical results suggest a 7-bit anchor range gives the best
2848 overall code size. */
2849 targetm.min_anchor_offset = 0;
2850 targetm.max_anchor_offset = 127;
2852 else if (TARGET_THUMB2)
2854 /* The minimum is set such that the total size of the block
2855 for a particular anchor is 248 + 1 + 4095 bytes, which is
2856 divisible by eight, ensuring natural spacing of anchors. */
2857 targetm.min_anchor_offset = -248;
2858 targetm.max_anchor_offset = 4095;
2861 /* V5 code we generate is completely interworking capable, so we turn off
2862 TARGET_INTERWORK here to avoid many tests later on. */
2864 /* XXX However, we must pass the right pre-processor defines to CPP
2865 or GLD can get confused. This is a hack. */
2866 if (TARGET_INTERWORK)
2867 arm_cpp_interwork = 1;
2869 if (arm_arch5)
2870 target_flags &= ~MASK_INTERWORK;
2872 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2873 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2875 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2876 error ("iwmmxt abi requires an iwmmxt capable cpu");
2878 if (!global_options_set.x_arm_fpu_index)
2880 const char *target_fpu_name;
2881 bool ok;
2883 #ifdef FPUTYPE_DEFAULT
2884 target_fpu_name = FPUTYPE_DEFAULT;
2885 #else
2886 target_fpu_name = "vfp";
2887 #endif
2889 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2890 CL_TARGET);
2891 gcc_assert (ok);
2894 arm_fpu_desc = &all_fpus[arm_fpu_index];
2896 if (TARGET_NEON && !arm_arch7)
2897 error ("target CPU does not support NEON");
2899 switch (arm_fpu_desc->model)
2901 case ARM_FP_MODEL_VFP:
2902 arm_fpu_attr = FPU_VFP;
2903 break;
2905 default:
2906 gcc_unreachable();
2909 if (TARGET_AAPCS_BASED)
2911 if (TARGET_CALLER_INTERWORKING)
2912 error ("AAPCS does not support -mcaller-super-interworking");
2913 else
2914 if (TARGET_CALLEE_INTERWORKING)
2915 error ("AAPCS does not support -mcallee-super-interworking");
2918 /* iWMMXt and NEON are incompatible. */
2919 if (TARGET_IWMMXT && TARGET_NEON)
2920 error ("iWMMXt and NEON are incompatible");
2922 /* iWMMXt unsupported under Thumb mode. */
2923 if (TARGET_THUMB && TARGET_IWMMXT)
2924 error ("iWMMXt unsupported under Thumb mode");
2926 /* __fp16 support currently assumes the core has ldrh. */
2927 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2928 sorry ("__fp16 and no ldrh");
2930 /* If soft-float is specified then don't use FPU. */
2931 if (TARGET_SOFT_FLOAT)
2932 arm_fpu_attr = FPU_NONE;
2934 if (TARGET_AAPCS_BASED)
2936 if (arm_abi == ARM_ABI_IWMMXT)
2937 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2938 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2939 && TARGET_HARD_FLOAT
2940 && TARGET_VFP)
2941 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2942 else
2943 arm_pcs_default = ARM_PCS_AAPCS;
2945 else
2947 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2948 sorry ("-mfloat-abi=hard and VFP");
2950 if (arm_abi == ARM_ABI_APCS)
2951 arm_pcs_default = ARM_PCS_APCS;
2952 else
2953 arm_pcs_default = ARM_PCS_ATPCS;
2956 /* For arm2/3 there is no need to do any scheduling if we are doing
2957 software floating-point. */
2958 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2959 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2961 /* Use the cp15 method if it is available. */
2962 if (target_thread_pointer == TP_AUTO)
2964 if (arm_arch6k && !TARGET_THUMB1)
2965 target_thread_pointer = TP_CP15;
2966 else
2967 target_thread_pointer = TP_SOFT;
2970 if (TARGET_HARD_TP && TARGET_THUMB1)
2971 error ("can not use -mtp=cp15 with 16-bit Thumb");
2973 /* Override the default structure alignment for AAPCS ABI. */
2974 if (!global_options_set.x_arm_structure_size_boundary)
2976 if (TARGET_AAPCS_BASED)
2977 arm_structure_size_boundary = 8;
2979 else
2981 if (arm_structure_size_boundary != 8
2982 && arm_structure_size_boundary != 32
2983 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2985 if (ARM_DOUBLEWORD_ALIGN)
2986 warning (0,
2987 "structure size boundary can only be set to 8, 32 or 64");
2988 else
2989 warning (0, "structure size boundary can only be set to 8 or 32");
2990 arm_structure_size_boundary
2991 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2995 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2997 error ("RTP PIC is incompatible with Thumb");
2998 flag_pic = 0;
3001 /* If stack checking is disabled, we can use r10 as the PIC register,
3002 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3003 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3005 if (TARGET_VXWORKS_RTP)
3006 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3007 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3010 if (flag_pic && TARGET_VXWORKS_RTP)
3011 arm_pic_register = 9;
3013 if (arm_pic_register_string != NULL)
3015 int pic_register = decode_reg_name (arm_pic_register_string);
3017 if (!flag_pic)
3018 warning (0, "-mpic-register= is useless without -fpic");
3020 /* Prevent the user from choosing an obviously stupid PIC register. */
3021 else if (pic_register < 0 || call_used_regs[pic_register]
3022 || pic_register == HARD_FRAME_POINTER_REGNUM
3023 || pic_register == STACK_POINTER_REGNUM
3024 || pic_register >= PC_REGNUM
3025 || (TARGET_VXWORKS_RTP
3026 && (unsigned int) pic_register != arm_pic_register))
3027 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3028 else
3029 arm_pic_register = pic_register;
3032 if (TARGET_VXWORKS_RTP
3033 && !global_options_set.x_arm_pic_data_is_text_relative)
3034 arm_pic_data_is_text_relative = 0;
3036 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3037 if (fix_cm3_ldrd == 2)
3039 if (arm_selected_cpu->core == cortexm3)
3040 fix_cm3_ldrd = 1;
3041 else
3042 fix_cm3_ldrd = 0;
3045 /* Enable -munaligned-access by default for
3046 - all ARMv6 architecture-based processors
3047 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3048 - ARMv8 architecture-base processors.
3050 Disable -munaligned-access by default for
3051 - all pre-ARMv6 architecture-based processors
3052 - ARMv6-M architecture-based processors. */
3054 if (unaligned_access == 2)
3056 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3057 unaligned_access = 1;
3058 else
3059 unaligned_access = 0;
3061 else if (unaligned_access == 1
3062 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3064 warning (0, "target CPU does not support unaligned accesses");
3065 unaligned_access = 0;
3068 if (TARGET_THUMB1 && flag_schedule_insns)
3070 /* Don't warn since it's on by default in -O2. */
3071 flag_schedule_insns = 0;
3074 if (optimize_size)
3076 /* If optimizing for size, bump the number of instructions that we
3077 are prepared to conditionally execute (even on a StrongARM). */
3078 max_insns_skipped = 6;
3080 /* For THUMB2, we limit the conditional sequence to one IT block. */
3081 if (TARGET_THUMB2)
3082 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3084 else
3085 max_insns_skipped = current_tune->max_insns_skipped;
3087 /* Hot/Cold partitioning is not currently supported, since we can't
3088 handle literal pool placement in that case. */
3089 if (flag_reorder_blocks_and_partition)
3091 inform (input_location,
3092 "-freorder-blocks-and-partition not supported on this architecture");
3093 flag_reorder_blocks_and_partition = 0;
3094 flag_reorder_blocks = 1;
3097 if (flag_pic)
3098 /* Hoisting PIC address calculations more aggressively provides a small,
3099 but measurable, size reduction for PIC code. Therefore, we decrease
3100 the bar for unrestricted expression hoisting to the cost of PIC address
3101 calculation, which is 2 instructions. */
3102 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3103 global_options.x_param_values,
3104 global_options_set.x_param_values);
3106 /* ARM EABI defaults to strict volatile bitfields. */
3107 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3108 && abi_version_at_least(2))
3109 flag_strict_volatile_bitfields = 1;
3111 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3112 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3113 if (flag_prefetch_loop_arrays < 0
3114 && HAVE_prefetch
3115 && optimize >= 3
3116 && current_tune->num_prefetch_slots > 0)
3117 flag_prefetch_loop_arrays = 1;
3119 /* Set up parameters to be used in prefetching algorithm. Do not override the
3120 defaults unless we are tuning for a core we have researched values for. */
3121 if (current_tune->num_prefetch_slots > 0)
3122 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3123 current_tune->num_prefetch_slots,
3124 global_options.x_param_values,
3125 global_options_set.x_param_values);
3126 if (current_tune->l1_cache_line_size >= 0)
3127 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3128 current_tune->l1_cache_line_size,
3129 global_options.x_param_values,
3130 global_options_set.x_param_values);
3131 if (current_tune->l1_cache_size >= 0)
3132 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3133 current_tune->l1_cache_size,
3134 global_options.x_param_values,
3135 global_options_set.x_param_values);
3137 /* Use Neon to perform 64-bits operations rather than core
3138 registers. */
3139 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3140 if (use_neon_for_64bits == 1)
3141 prefer_neon_for_64bits = true;
3143 /* Use the alternative scheduling-pressure algorithm by default. */
3144 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3145 global_options.x_param_values,
3146 global_options_set.x_param_values);
3148 /* Disable shrink-wrap when optimizing function for size, since it tends to
3149 generate additional returns. */
3150 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3151 flag_shrink_wrap = false;
3152 /* TBD: Dwarf info for apcs frame is not handled yet. */
3153 if (TARGET_APCS_FRAME)
3154 flag_shrink_wrap = false;
3156 /* We only support -mslow-flash-data on armv7-m targets. */
3157 if (target_slow_flash_data
3158 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3159 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3160 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3162 /* Currently, for slow flash data, we just disable literal pools. */
3163 if (target_slow_flash_data)
3164 arm_disable_literal_pool = true;
3166 /* Thumb2 inline assembly code should always use unified syntax.
3167 This will apply to ARM and Thumb1 eventually. */
3168 if (TARGET_THUMB2)
3169 inline_asm_unified = 1;
3171 /* Register global variables with the garbage collector. */
3172 arm_add_gc_roots ();
3175 static void
3176 arm_add_gc_roots (void)
3178 gcc_obstack_init(&minipool_obstack);
3179 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3182 /* A table of known ARM exception types.
3183 For use with the interrupt function attribute. */
3185 typedef struct
3187 const char *const arg;
3188 const unsigned long return_value;
3190 isr_attribute_arg;
3192 static const isr_attribute_arg isr_attribute_args [] =
3194 { "IRQ", ARM_FT_ISR },
3195 { "irq", ARM_FT_ISR },
3196 { "FIQ", ARM_FT_FIQ },
3197 { "fiq", ARM_FT_FIQ },
3198 { "ABORT", ARM_FT_ISR },
3199 { "abort", ARM_FT_ISR },
3200 { "ABORT", ARM_FT_ISR },
3201 { "abort", ARM_FT_ISR },
3202 { "UNDEF", ARM_FT_EXCEPTION },
3203 { "undef", ARM_FT_EXCEPTION },
3204 { "SWI", ARM_FT_EXCEPTION },
3205 { "swi", ARM_FT_EXCEPTION },
3206 { NULL, ARM_FT_NORMAL }
3209 /* Returns the (interrupt) function type of the current
3210 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3212 static unsigned long
3213 arm_isr_value (tree argument)
3215 const isr_attribute_arg * ptr;
3216 const char * arg;
3218 if (!arm_arch_notm)
3219 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3221 /* No argument - default to IRQ. */
3222 if (argument == NULL_TREE)
3223 return ARM_FT_ISR;
3225 /* Get the value of the argument. */
3226 if (TREE_VALUE (argument) == NULL_TREE
3227 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3228 return ARM_FT_UNKNOWN;
3230 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3232 /* Check it against the list of known arguments. */
3233 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3234 if (streq (arg, ptr->arg))
3235 return ptr->return_value;
3237 /* An unrecognized interrupt type. */
3238 return ARM_FT_UNKNOWN;
3241 /* Computes the type of the current function. */
3243 static unsigned long
3244 arm_compute_func_type (void)
3246 unsigned long type = ARM_FT_UNKNOWN;
3247 tree a;
3248 tree attr;
3250 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3252 /* Decide if the current function is volatile. Such functions
3253 never return, and many memory cycles can be saved by not storing
3254 register values that will never be needed again. This optimization
3255 was added to speed up context switching in a kernel application. */
3256 if (optimize > 0
3257 && (TREE_NOTHROW (current_function_decl)
3258 || !(flag_unwind_tables
3259 || (flag_exceptions
3260 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3261 && TREE_THIS_VOLATILE (current_function_decl))
3262 type |= ARM_FT_VOLATILE;
3264 if (cfun->static_chain_decl != NULL)
3265 type |= ARM_FT_NESTED;
3267 attr = DECL_ATTRIBUTES (current_function_decl);
3269 a = lookup_attribute ("naked", attr);
3270 if (a != NULL_TREE)
3271 type |= ARM_FT_NAKED;
3273 a = lookup_attribute ("isr", attr);
3274 if (a == NULL_TREE)
3275 a = lookup_attribute ("interrupt", attr);
3277 if (a == NULL_TREE)
3278 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3279 else
3280 type |= arm_isr_value (TREE_VALUE (a));
3282 return type;
3285 /* Returns the type of the current function. */
3287 unsigned long
3288 arm_current_func_type (void)
3290 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3291 cfun->machine->func_type = arm_compute_func_type ();
3293 return cfun->machine->func_type;
3296 bool
3297 arm_allocate_stack_slots_for_args (void)
3299 /* Naked functions should not allocate stack slots for arguments. */
3300 return !IS_NAKED (arm_current_func_type ());
3303 static bool
3304 arm_warn_func_return (tree decl)
3306 /* Naked functions are implemented entirely in assembly, including the
3307 return sequence, so suppress warnings about this. */
3308 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3312 /* Output assembler code for a block containing the constant parts
3313 of a trampoline, leaving space for the variable parts.
3315 On the ARM, (if r8 is the static chain regnum, and remembering that
3316 referencing pc adds an offset of 8) the trampoline looks like:
3317 ldr r8, [pc, #0]
3318 ldr pc, [pc]
3319 .word static chain value
3320 .word function's address
3321 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3323 static void
3324 arm_asm_trampoline_template (FILE *f)
3326 if (TARGET_ARM)
3328 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3329 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3331 else if (TARGET_THUMB2)
3333 /* The Thumb-2 trampoline is similar to the arm implementation.
3334 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3335 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3336 STATIC_CHAIN_REGNUM, PC_REGNUM);
3337 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3339 else
3341 ASM_OUTPUT_ALIGN (f, 2);
3342 fprintf (f, "\t.code\t16\n");
3343 fprintf (f, ".Ltrampoline_start:\n");
3344 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3345 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3346 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3347 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3348 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3349 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3351 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3352 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3355 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3357 static void
3358 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3360 rtx fnaddr, mem, a_tramp;
3362 emit_block_move (m_tramp, assemble_trampoline_template (),
3363 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3365 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3366 emit_move_insn (mem, chain_value);
3368 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3369 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3370 emit_move_insn (mem, fnaddr);
3372 a_tramp = XEXP (m_tramp, 0);
3373 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3374 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3375 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3378 /* Thumb trampolines should be entered in thumb mode, so set
3379 the bottom bit of the address. */
3381 static rtx
3382 arm_trampoline_adjust_address (rtx addr)
3384 if (TARGET_THUMB)
3385 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3386 NULL, 0, OPTAB_LIB_WIDEN);
3387 return addr;
3390 /* Return 1 if it is possible to return using a single instruction.
3391 If SIBLING is non-null, this is a test for a return before a sibling
3392 call. SIBLING is the call insn, so we can examine its register usage. */
3395 use_return_insn (int iscond, rtx sibling)
3397 int regno;
3398 unsigned int func_type;
3399 unsigned long saved_int_regs;
3400 unsigned HOST_WIDE_INT stack_adjust;
3401 arm_stack_offsets *offsets;
3403 /* Never use a return instruction before reload has run. */
3404 if (!reload_completed)
3405 return 0;
3407 func_type = arm_current_func_type ();
3409 /* Naked, volatile and stack alignment functions need special
3410 consideration. */
3411 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3412 return 0;
3414 /* So do interrupt functions that use the frame pointer and Thumb
3415 interrupt functions. */
3416 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3417 return 0;
3419 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3420 && !optimize_function_for_size_p (cfun))
3421 return 0;
3423 offsets = arm_get_frame_offsets ();
3424 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3426 /* As do variadic functions. */
3427 if (crtl->args.pretend_args_size
3428 || cfun->machine->uses_anonymous_args
3429 /* Or if the function calls __builtin_eh_return () */
3430 || crtl->calls_eh_return
3431 /* Or if the function calls alloca */
3432 || cfun->calls_alloca
3433 /* Or if there is a stack adjustment. However, if the stack pointer
3434 is saved on the stack, we can use a pre-incrementing stack load. */
3435 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3436 && stack_adjust == 4)))
3437 return 0;
3439 saved_int_regs = offsets->saved_regs_mask;
3441 /* Unfortunately, the insn
3443 ldmib sp, {..., sp, ...}
3445 triggers a bug on most SA-110 based devices, such that the stack
3446 pointer won't be correctly restored if the instruction takes a
3447 page fault. We work around this problem by popping r3 along with
3448 the other registers, since that is never slower than executing
3449 another instruction.
3451 We test for !arm_arch5 here, because code for any architecture
3452 less than this could potentially be run on one of the buggy
3453 chips. */
3454 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3456 /* Validate that r3 is a call-clobbered register (always true in
3457 the default abi) ... */
3458 if (!call_used_regs[3])
3459 return 0;
3461 /* ... that it isn't being used for a return value ... */
3462 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3463 return 0;
3465 /* ... or for a tail-call argument ... */
3466 if (sibling)
3468 gcc_assert (CALL_P (sibling));
3470 if (find_regno_fusage (sibling, USE, 3))
3471 return 0;
3474 /* ... and that there are no call-saved registers in r0-r2
3475 (always true in the default ABI). */
3476 if (saved_int_regs & 0x7)
3477 return 0;
3480 /* Can't be done if interworking with Thumb, and any registers have been
3481 stacked. */
3482 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3483 return 0;
3485 /* On StrongARM, conditional returns are expensive if they aren't
3486 taken and multiple registers have been stacked. */
3487 if (iscond && arm_tune_strongarm)
3489 /* Conditional return when just the LR is stored is a simple
3490 conditional-load instruction, that's not expensive. */
3491 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3492 return 0;
3494 if (flag_pic
3495 && arm_pic_register != INVALID_REGNUM
3496 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3497 return 0;
3500 /* If there are saved registers but the LR isn't saved, then we need
3501 two instructions for the return. */
3502 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3503 return 0;
3505 /* Can't be done if any of the VFP regs are pushed,
3506 since this also requires an insn. */
3507 if (TARGET_HARD_FLOAT && TARGET_VFP)
3508 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3509 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3510 return 0;
3512 if (TARGET_REALLY_IWMMXT)
3513 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3514 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3515 return 0;
3517 return 1;
3520 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3521 shrink-wrapping if possible. This is the case if we need to emit a
3522 prologue, which we can test by looking at the offsets. */
3523 bool
3524 use_simple_return_p (void)
3526 arm_stack_offsets *offsets;
3528 offsets = arm_get_frame_offsets ();
3529 return offsets->outgoing_args != 0;
3532 /* Return TRUE if int I is a valid immediate ARM constant. */
3535 const_ok_for_arm (HOST_WIDE_INT i)
3537 int lowbit;
3539 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3540 be all zero, or all one. */
3541 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3542 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3543 != ((~(unsigned HOST_WIDE_INT) 0)
3544 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3545 return FALSE;
3547 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3549 /* Fast return for 0 and small values. We must do this for zero, since
3550 the code below can't handle that one case. */
3551 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3552 return TRUE;
3554 /* Get the number of trailing zeros. */
3555 lowbit = ffs((int) i) - 1;
3557 /* Only even shifts are allowed in ARM mode so round down to the
3558 nearest even number. */
3559 if (TARGET_ARM)
3560 lowbit &= ~1;
3562 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3563 return TRUE;
3565 if (TARGET_ARM)
3567 /* Allow rotated constants in ARM mode. */
3568 if (lowbit <= 4
3569 && ((i & ~0xc000003f) == 0
3570 || (i & ~0xf000000f) == 0
3571 || (i & ~0xfc000003) == 0))
3572 return TRUE;
3574 else
3576 HOST_WIDE_INT v;
3578 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3579 v = i & 0xff;
3580 v |= v << 16;
3581 if (i == v || i == (v | (v << 8)))
3582 return TRUE;
3584 /* Allow repeated pattern 0xXY00XY00. */
3585 v = i & 0xff00;
3586 v |= v << 16;
3587 if (i == v)
3588 return TRUE;
3591 return FALSE;
3594 /* Return true if I is a valid constant for the operation CODE. */
3596 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3598 if (const_ok_for_arm (i))
3599 return 1;
3601 switch (code)
3603 case SET:
3604 /* See if we can use movw. */
3605 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3606 return 1;
3607 else
3608 /* Otherwise, try mvn. */
3609 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3611 case PLUS:
3612 /* See if we can use addw or subw. */
3613 if (TARGET_THUMB2
3614 && ((i & 0xfffff000) == 0
3615 || ((-i) & 0xfffff000) == 0))
3616 return 1;
3617 /* else fall through. */
3619 case COMPARE:
3620 case EQ:
3621 case NE:
3622 case GT:
3623 case LE:
3624 case LT:
3625 case GE:
3626 case GEU:
3627 case LTU:
3628 case GTU:
3629 case LEU:
3630 case UNORDERED:
3631 case ORDERED:
3632 case UNEQ:
3633 case UNGE:
3634 case UNLT:
3635 case UNGT:
3636 case UNLE:
3637 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3639 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3640 case XOR:
3641 return 0;
3643 case IOR:
3644 if (TARGET_THUMB2)
3645 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3646 return 0;
3648 case AND:
3649 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3651 default:
3652 gcc_unreachable ();
3656 /* Return true if I is a valid di mode constant for the operation CODE. */
3658 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3660 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3661 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3662 rtx hi = GEN_INT (hi_val);
3663 rtx lo = GEN_INT (lo_val);
3665 if (TARGET_THUMB1)
3666 return 0;
3668 switch (code)
3670 case AND:
3671 case IOR:
3672 case XOR:
3673 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3674 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3675 case PLUS:
3676 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3678 default:
3679 return 0;
3683 /* Emit a sequence of insns to handle a large constant.
3684 CODE is the code of the operation required, it can be any of SET, PLUS,
3685 IOR, AND, XOR, MINUS;
3686 MODE is the mode in which the operation is being performed;
3687 VAL is the integer to operate on;
3688 SOURCE is the other operand (a register, or a null-pointer for SET);
3689 SUBTARGETS means it is safe to create scratch registers if that will
3690 either produce a simpler sequence, or we will want to cse the values.
3691 Return value is the number of insns emitted. */
3693 /* ??? Tweak this for thumb2. */
3695 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3696 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3698 rtx cond;
3700 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3701 cond = COND_EXEC_TEST (PATTERN (insn));
3702 else
3703 cond = NULL_RTX;
3705 if (subtargets || code == SET
3706 || (REG_P (target) && REG_P (source)
3707 && REGNO (target) != REGNO (source)))
3709 /* After arm_reorg has been called, we can't fix up expensive
3710 constants by pushing them into memory so we must synthesize
3711 them in-line, regardless of the cost. This is only likely to
3712 be more costly on chips that have load delay slots and we are
3713 compiling without running the scheduler (so no splitting
3714 occurred before the final instruction emission).
3716 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3718 if (!cfun->machine->after_arm_reorg
3719 && !cond
3720 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3721 1, 0)
3722 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3723 + (code != SET))))
3725 if (code == SET)
3727 /* Currently SET is the only monadic value for CODE, all
3728 the rest are diadic. */
3729 if (TARGET_USE_MOVT)
3730 arm_emit_movpair (target, GEN_INT (val));
3731 else
3732 emit_set_insn (target, GEN_INT (val));
3734 return 1;
3736 else
3738 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3740 if (TARGET_USE_MOVT)
3741 arm_emit_movpair (temp, GEN_INT (val));
3742 else
3743 emit_set_insn (temp, GEN_INT (val));
3745 /* For MINUS, the value is subtracted from, since we never
3746 have subtraction of a constant. */
3747 if (code == MINUS)
3748 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3749 else
3750 emit_set_insn (target,
3751 gen_rtx_fmt_ee (code, mode, source, temp));
3752 return 2;
3757 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3761 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3762 ARM/THUMB2 immediates, and add up to VAL.
3763 Thr function return value gives the number of insns required. */
3764 static int
3765 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3766 struct four_ints *return_sequence)
3768 int best_consecutive_zeros = 0;
3769 int i;
3770 int best_start = 0;
3771 int insns1, insns2;
3772 struct four_ints tmp_sequence;
3774 /* If we aren't targeting ARM, the best place to start is always at
3775 the bottom, otherwise look more closely. */
3776 if (TARGET_ARM)
3778 for (i = 0; i < 32; i += 2)
3780 int consecutive_zeros = 0;
3782 if (!(val & (3 << i)))
3784 while ((i < 32) && !(val & (3 << i)))
3786 consecutive_zeros += 2;
3787 i += 2;
3789 if (consecutive_zeros > best_consecutive_zeros)
3791 best_consecutive_zeros = consecutive_zeros;
3792 best_start = i - consecutive_zeros;
3794 i -= 2;
3799 /* So long as it won't require any more insns to do so, it's
3800 desirable to emit a small constant (in bits 0...9) in the last
3801 insn. This way there is more chance that it can be combined with
3802 a later addressing insn to form a pre-indexed load or store
3803 operation. Consider:
3805 *((volatile int *)0xe0000100) = 1;
3806 *((volatile int *)0xe0000110) = 2;
3808 We want this to wind up as:
3810 mov rA, #0xe0000000
3811 mov rB, #1
3812 str rB, [rA, #0x100]
3813 mov rB, #2
3814 str rB, [rA, #0x110]
3816 rather than having to synthesize both large constants from scratch.
3818 Therefore, we calculate how many insns would be required to emit
3819 the constant starting from `best_start', and also starting from
3820 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3821 yield a shorter sequence, we may as well use zero. */
3822 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3823 if (best_start != 0
3824 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3826 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3827 if (insns2 <= insns1)
3829 *return_sequence = tmp_sequence;
3830 insns1 = insns2;
3834 return insns1;
3837 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3838 static int
3839 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3840 struct four_ints *return_sequence, int i)
3842 int remainder = val & 0xffffffff;
3843 int insns = 0;
3845 /* Try and find a way of doing the job in either two or three
3846 instructions.
3848 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3849 location. We start at position I. This may be the MSB, or
3850 optimial_immediate_sequence may have positioned it at the largest block
3851 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3852 wrapping around to the top of the word when we drop off the bottom.
3853 In the worst case this code should produce no more than four insns.
3855 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3856 constants, shifted to any arbitrary location. We should always start
3857 at the MSB. */
3860 int end;
3861 unsigned int b1, b2, b3, b4;
3862 unsigned HOST_WIDE_INT result;
3863 int loc;
3865 gcc_assert (insns < 4);
3867 if (i <= 0)
3868 i += 32;
3870 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3871 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3873 loc = i;
3874 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3875 /* We can use addw/subw for the last 12 bits. */
3876 result = remainder;
3877 else
3879 /* Use an 8-bit shifted/rotated immediate. */
3880 end = i - 8;
3881 if (end < 0)
3882 end += 32;
3883 result = remainder & ((0x0ff << end)
3884 | ((i < end) ? (0xff >> (32 - end))
3885 : 0));
3886 i -= 8;
3889 else
3891 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3892 arbitrary shifts. */
3893 i -= TARGET_ARM ? 2 : 1;
3894 continue;
3897 /* Next, see if we can do a better job with a thumb2 replicated
3898 constant.
3900 We do it this way around to catch the cases like 0x01F001E0 where
3901 two 8-bit immediates would work, but a replicated constant would
3902 make it worse.
3904 TODO: 16-bit constants that don't clear all the bits, but still win.
3905 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3906 if (TARGET_THUMB2)
3908 b1 = (remainder & 0xff000000) >> 24;
3909 b2 = (remainder & 0x00ff0000) >> 16;
3910 b3 = (remainder & 0x0000ff00) >> 8;
3911 b4 = remainder & 0xff;
3913 if (loc > 24)
3915 /* The 8-bit immediate already found clears b1 (and maybe b2),
3916 but must leave b3 and b4 alone. */
3918 /* First try to find a 32-bit replicated constant that clears
3919 almost everything. We can assume that we can't do it in one,
3920 or else we wouldn't be here. */
3921 unsigned int tmp = b1 & b2 & b3 & b4;
3922 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3923 + (tmp << 24);
3924 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3925 + (tmp == b3) + (tmp == b4);
3926 if (tmp
3927 && (matching_bytes >= 3
3928 || (matching_bytes == 2
3929 && const_ok_for_op (remainder & ~tmp2, code))))
3931 /* At least 3 of the bytes match, and the fourth has at
3932 least as many bits set, or two of the bytes match
3933 and it will only require one more insn to finish. */
3934 result = tmp2;
3935 i = tmp != b1 ? 32
3936 : tmp != b2 ? 24
3937 : tmp != b3 ? 16
3938 : 8;
3941 /* Second, try to find a 16-bit replicated constant that can
3942 leave three of the bytes clear. If b2 or b4 is already
3943 zero, then we can. If the 8-bit from above would not
3944 clear b2 anyway, then we still win. */
3945 else if (b1 == b3 && (!b2 || !b4
3946 || (remainder & 0x00ff0000 & ~result)))
3948 result = remainder & 0xff00ff00;
3949 i = 24;
3952 else if (loc > 16)
3954 /* The 8-bit immediate already found clears b2 (and maybe b3)
3955 and we don't get here unless b1 is alredy clear, but it will
3956 leave b4 unchanged. */
3958 /* If we can clear b2 and b4 at once, then we win, since the
3959 8-bits couldn't possibly reach that far. */
3960 if (b2 == b4)
3962 result = remainder & 0x00ff00ff;
3963 i = 16;
3968 return_sequence->i[insns++] = result;
3969 remainder &= ~result;
3971 if (code == SET || code == MINUS)
3972 code = PLUS;
3974 while (remainder);
3976 return insns;
3979 /* Emit an instruction with the indicated PATTERN. If COND is
3980 non-NULL, conditionalize the execution of the instruction on COND
3981 being true. */
3983 static void
3984 emit_constant_insn (rtx cond, rtx pattern)
3986 if (cond)
3987 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3988 emit_insn (pattern);
3991 /* As above, but extra parameter GENERATE which, if clear, suppresses
3992 RTL generation. */
3994 static int
3995 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3996 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3997 int generate)
3999 int can_invert = 0;
4000 int can_negate = 0;
4001 int final_invert = 0;
4002 int i;
4003 int set_sign_bit_copies = 0;
4004 int clear_sign_bit_copies = 0;
4005 int clear_zero_bit_copies = 0;
4006 int set_zero_bit_copies = 0;
4007 int insns = 0, neg_insns, inv_insns;
4008 unsigned HOST_WIDE_INT temp1, temp2;
4009 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4010 struct four_ints *immediates;
4011 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4013 /* Find out which operations are safe for a given CODE. Also do a quick
4014 check for degenerate cases; these can occur when DImode operations
4015 are split. */
4016 switch (code)
4018 case SET:
4019 can_invert = 1;
4020 break;
4022 case PLUS:
4023 can_negate = 1;
4024 break;
4026 case IOR:
4027 if (remainder == 0xffffffff)
4029 if (generate)
4030 emit_constant_insn (cond,
4031 gen_rtx_SET (VOIDmode, target,
4032 GEN_INT (ARM_SIGN_EXTEND (val))));
4033 return 1;
4036 if (remainder == 0)
4038 if (reload_completed && rtx_equal_p (target, source))
4039 return 0;
4041 if (generate)
4042 emit_constant_insn (cond,
4043 gen_rtx_SET (VOIDmode, target, source));
4044 return 1;
4046 break;
4048 case AND:
4049 if (remainder == 0)
4051 if (generate)
4052 emit_constant_insn (cond,
4053 gen_rtx_SET (VOIDmode, target, const0_rtx));
4054 return 1;
4056 if (remainder == 0xffffffff)
4058 if (reload_completed && rtx_equal_p (target, source))
4059 return 0;
4060 if (generate)
4061 emit_constant_insn (cond,
4062 gen_rtx_SET (VOIDmode, target, source));
4063 return 1;
4065 can_invert = 1;
4066 break;
4068 case XOR:
4069 if (remainder == 0)
4071 if (reload_completed && rtx_equal_p (target, source))
4072 return 0;
4073 if (generate)
4074 emit_constant_insn (cond,
4075 gen_rtx_SET (VOIDmode, target, source));
4076 return 1;
4079 if (remainder == 0xffffffff)
4081 if (generate)
4082 emit_constant_insn (cond,
4083 gen_rtx_SET (VOIDmode, target,
4084 gen_rtx_NOT (mode, source)));
4085 return 1;
4087 final_invert = 1;
4088 break;
4090 case MINUS:
4091 /* We treat MINUS as (val - source), since (source - val) is always
4092 passed as (source + (-val)). */
4093 if (remainder == 0)
4095 if (generate)
4096 emit_constant_insn (cond,
4097 gen_rtx_SET (VOIDmode, target,
4098 gen_rtx_NEG (mode, source)));
4099 return 1;
4101 if (const_ok_for_arm (val))
4103 if (generate)
4104 emit_constant_insn (cond,
4105 gen_rtx_SET (VOIDmode, target,
4106 gen_rtx_MINUS (mode, GEN_INT (val),
4107 source)));
4108 return 1;
4111 break;
4113 default:
4114 gcc_unreachable ();
4117 /* If we can do it in one insn get out quickly. */
4118 if (const_ok_for_op (val, code))
4120 if (generate)
4121 emit_constant_insn (cond,
4122 gen_rtx_SET (VOIDmode, target,
4123 (source
4124 ? gen_rtx_fmt_ee (code, mode, source,
4125 GEN_INT (val))
4126 : GEN_INT (val))));
4127 return 1;
4130 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4131 insn. */
4132 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4133 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4135 if (generate)
4137 if (mode == SImode && i == 16)
4138 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4139 smaller insn. */
4140 emit_constant_insn (cond,
4141 gen_zero_extendhisi2
4142 (target, gen_lowpart (HImode, source)));
4143 else
4144 /* Extz only supports SImode, but we can coerce the operands
4145 into that mode. */
4146 emit_constant_insn (cond,
4147 gen_extzv_t2 (gen_lowpart (SImode, target),
4148 gen_lowpart (SImode, source),
4149 GEN_INT (i), const0_rtx));
4152 return 1;
4155 /* Calculate a few attributes that may be useful for specific
4156 optimizations. */
4157 /* Count number of leading zeros. */
4158 for (i = 31; i >= 0; i--)
4160 if ((remainder & (1 << i)) == 0)
4161 clear_sign_bit_copies++;
4162 else
4163 break;
4166 /* Count number of leading 1's. */
4167 for (i = 31; i >= 0; i--)
4169 if ((remainder & (1 << i)) != 0)
4170 set_sign_bit_copies++;
4171 else
4172 break;
4175 /* Count number of trailing zero's. */
4176 for (i = 0; i <= 31; i++)
4178 if ((remainder & (1 << i)) == 0)
4179 clear_zero_bit_copies++;
4180 else
4181 break;
4184 /* Count number of trailing 1's. */
4185 for (i = 0; i <= 31; i++)
4187 if ((remainder & (1 << i)) != 0)
4188 set_zero_bit_copies++;
4189 else
4190 break;
4193 switch (code)
4195 case SET:
4196 /* See if we can do this by sign_extending a constant that is known
4197 to be negative. This is a good, way of doing it, since the shift
4198 may well merge into a subsequent insn. */
4199 if (set_sign_bit_copies > 1)
4201 if (const_ok_for_arm
4202 (temp1 = ARM_SIGN_EXTEND (remainder
4203 << (set_sign_bit_copies - 1))))
4205 if (generate)
4207 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4208 emit_constant_insn (cond,
4209 gen_rtx_SET (VOIDmode, new_src,
4210 GEN_INT (temp1)));
4211 emit_constant_insn (cond,
4212 gen_ashrsi3 (target, new_src,
4213 GEN_INT (set_sign_bit_copies - 1)));
4215 return 2;
4217 /* For an inverted constant, we will need to set the low bits,
4218 these will be shifted out of harm's way. */
4219 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4220 if (const_ok_for_arm (~temp1))
4222 if (generate)
4224 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4225 emit_constant_insn (cond,
4226 gen_rtx_SET (VOIDmode, new_src,
4227 GEN_INT (temp1)));
4228 emit_constant_insn (cond,
4229 gen_ashrsi3 (target, new_src,
4230 GEN_INT (set_sign_bit_copies - 1)));
4232 return 2;
4236 /* See if we can calculate the value as the difference between two
4237 valid immediates. */
4238 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4240 int topshift = clear_sign_bit_copies & ~1;
4242 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4243 & (0xff000000 >> topshift));
4245 /* If temp1 is zero, then that means the 9 most significant
4246 bits of remainder were 1 and we've caused it to overflow.
4247 When topshift is 0 we don't need to do anything since we
4248 can borrow from 'bit 32'. */
4249 if (temp1 == 0 && topshift != 0)
4250 temp1 = 0x80000000 >> (topshift - 1);
4252 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4254 if (const_ok_for_arm (temp2))
4256 if (generate)
4258 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4259 emit_constant_insn (cond,
4260 gen_rtx_SET (VOIDmode, new_src,
4261 GEN_INT (temp1)));
4262 emit_constant_insn (cond,
4263 gen_addsi3 (target, new_src,
4264 GEN_INT (-temp2)));
4267 return 2;
4271 /* See if we can generate this by setting the bottom (or the top)
4272 16 bits, and then shifting these into the other half of the
4273 word. We only look for the simplest cases, to do more would cost
4274 too much. Be careful, however, not to generate this when the
4275 alternative would take fewer insns. */
4276 if (val & 0xffff0000)
4278 temp1 = remainder & 0xffff0000;
4279 temp2 = remainder & 0x0000ffff;
4281 /* Overlaps outside this range are best done using other methods. */
4282 for (i = 9; i < 24; i++)
4284 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4285 && !const_ok_for_arm (temp2))
4287 rtx new_src = (subtargets
4288 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4289 : target);
4290 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4291 source, subtargets, generate);
4292 source = new_src;
4293 if (generate)
4294 emit_constant_insn
4295 (cond,
4296 gen_rtx_SET
4297 (VOIDmode, target,
4298 gen_rtx_IOR (mode,
4299 gen_rtx_ASHIFT (mode, source,
4300 GEN_INT (i)),
4301 source)));
4302 return insns + 1;
4306 /* Don't duplicate cases already considered. */
4307 for (i = 17; i < 24; i++)
4309 if (((temp1 | (temp1 >> i)) == remainder)
4310 && !const_ok_for_arm (temp1))
4312 rtx new_src = (subtargets
4313 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4314 : target);
4315 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4316 source, subtargets, generate);
4317 source = new_src;
4318 if (generate)
4319 emit_constant_insn
4320 (cond,
4321 gen_rtx_SET (VOIDmode, target,
4322 gen_rtx_IOR
4323 (mode,
4324 gen_rtx_LSHIFTRT (mode, source,
4325 GEN_INT (i)),
4326 source)));
4327 return insns + 1;
4331 break;
4333 case IOR:
4334 case XOR:
4335 /* If we have IOR or XOR, and the constant can be loaded in a
4336 single instruction, and we can find a temporary to put it in,
4337 then this can be done in two instructions instead of 3-4. */
4338 if (subtargets
4339 /* TARGET can't be NULL if SUBTARGETS is 0 */
4340 || (reload_completed && !reg_mentioned_p (target, source)))
4342 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4344 if (generate)
4346 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4348 emit_constant_insn (cond,
4349 gen_rtx_SET (VOIDmode, sub,
4350 GEN_INT (val)));
4351 emit_constant_insn (cond,
4352 gen_rtx_SET (VOIDmode, target,
4353 gen_rtx_fmt_ee (code, mode,
4354 source, sub)));
4356 return 2;
4360 if (code == XOR)
4361 break;
4363 /* Convert.
4364 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4365 and the remainder 0s for e.g. 0xfff00000)
4366 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4368 This can be done in 2 instructions by using shifts with mov or mvn.
4369 e.g. for
4370 x = x | 0xfff00000;
4371 we generate.
4372 mvn r0, r0, asl #12
4373 mvn r0, r0, lsr #12 */
4374 if (set_sign_bit_copies > 8
4375 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4377 if (generate)
4379 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4380 rtx shift = GEN_INT (set_sign_bit_copies);
4382 emit_constant_insn
4383 (cond,
4384 gen_rtx_SET (VOIDmode, sub,
4385 gen_rtx_NOT (mode,
4386 gen_rtx_ASHIFT (mode,
4387 source,
4388 shift))));
4389 emit_constant_insn
4390 (cond,
4391 gen_rtx_SET (VOIDmode, target,
4392 gen_rtx_NOT (mode,
4393 gen_rtx_LSHIFTRT (mode, sub,
4394 shift))));
4396 return 2;
4399 /* Convert
4400 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4402 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4404 For eg. r0 = r0 | 0xfff
4405 mvn r0, r0, lsr #12
4406 mvn r0, r0, asl #12
4409 if (set_zero_bit_copies > 8
4410 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4412 if (generate)
4414 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4415 rtx shift = GEN_INT (set_zero_bit_copies);
4417 emit_constant_insn
4418 (cond,
4419 gen_rtx_SET (VOIDmode, sub,
4420 gen_rtx_NOT (mode,
4421 gen_rtx_LSHIFTRT (mode,
4422 source,
4423 shift))));
4424 emit_constant_insn
4425 (cond,
4426 gen_rtx_SET (VOIDmode, target,
4427 gen_rtx_NOT (mode,
4428 gen_rtx_ASHIFT (mode, sub,
4429 shift))));
4431 return 2;
4434 /* This will never be reached for Thumb2 because orn is a valid
4435 instruction. This is for Thumb1 and the ARM 32 bit cases.
4437 x = y | constant (such that ~constant is a valid constant)
4438 Transform this to
4439 x = ~(~y & ~constant).
4441 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4443 if (generate)
4445 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4446 emit_constant_insn (cond,
4447 gen_rtx_SET (VOIDmode, sub,
4448 gen_rtx_NOT (mode, source)));
4449 source = sub;
4450 if (subtargets)
4451 sub = gen_reg_rtx (mode);
4452 emit_constant_insn (cond,
4453 gen_rtx_SET (VOIDmode, sub,
4454 gen_rtx_AND (mode, source,
4455 GEN_INT (temp1))));
4456 emit_constant_insn (cond,
4457 gen_rtx_SET (VOIDmode, target,
4458 gen_rtx_NOT (mode, sub)));
4460 return 3;
4462 break;
4464 case AND:
4465 /* See if two shifts will do 2 or more insn's worth of work. */
4466 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4468 HOST_WIDE_INT shift_mask = ((0xffffffff
4469 << (32 - clear_sign_bit_copies))
4470 & 0xffffffff);
4472 if ((remainder | shift_mask) != 0xffffffff)
4474 if (generate)
4476 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4477 insns = arm_gen_constant (AND, mode, cond,
4478 remainder | shift_mask,
4479 new_src, source, subtargets, 1);
4480 source = new_src;
4482 else
4484 rtx targ = subtargets ? NULL_RTX : target;
4485 insns = arm_gen_constant (AND, mode, cond,
4486 remainder | shift_mask,
4487 targ, source, subtargets, 0);
4491 if (generate)
4493 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4494 rtx shift = GEN_INT (clear_sign_bit_copies);
4496 emit_insn (gen_ashlsi3 (new_src, source, shift));
4497 emit_insn (gen_lshrsi3 (target, new_src, shift));
4500 return insns + 2;
4503 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4505 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4507 if ((remainder | shift_mask) != 0xffffffff)
4509 if (generate)
4511 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4513 insns = arm_gen_constant (AND, mode, cond,
4514 remainder | shift_mask,
4515 new_src, source, subtargets, 1);
4516 source = new_src;
4518 else
4520 rtx targ = subtargets ? NULL_RTX : target;
4522 insns = arm_gen_constant (AND, mode, cond,
4523 remainder | shift_mask,
4524 targ, source, subtargets, 0);
4528 if (generate)
4530 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4531 rtx shift = GEN_INT (clear_zero_bit_copies);
4533 emit_insn (gen_lshrsi3 (new_src, source, shift));
4534 emit_insn (gen_ashlsi3 (target, new_src, shift));
4537 return insns + 2;
4540 break;
4542 default:
4543 break;
4546 /* Calculate what the instruction sequences would be if we generated it
4547 normally, negated, or inverted. */
4548 if (code == AND)
4549 /* AND cannot be split into multiple insns, so invert and use BIC. */
4550 insns = 99;
4551 else
4552 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4554 if (can_negate)
4555 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4556 &neg_immediates);
4557 else
4558 neg_insns = 99;
4560 if (can_invert || final_invert)
4561 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4562 &inv_immediates);
4563 else
4564 inv_insns = 99;
4566 immediates = &pos_immediates;
4568 /* Is the negated immediate sequence more efficient? */
4569 if (neg_insns < insns && neg_insns <= inv_insns)
4571 insns = neg_insns;
4572 immediates = &neg_immediates;
4574 else
4575 can_negate = 0;
4577 /* Is the inverted immediate sequence more efficient?
4578 We must allow for an extra NOT instruction for XOR operations, although
4579 there is some chance that the final 'mvn' will get optimized later. */
4580 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4582 insns = inv_insns;
4583 immediates = &inv_immediates;
4585 else
4587 can_invert = 0;
4588 final_invert = 0;
4591 /* Now output the chosen sequence as instructions. */
4592 if (generate)
4594 for (i = 0; i < insns; i++)
4596 rtx new_src, temp1_rtx;
4598 temp1 = immediates->i[i];
4600 if (code == SET || code == MINUS)
4601 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4602 else if ((final_invert || i < (insns - 1)) && subtargets)
4603 new_src = gen_reg_rtx (mode);
4604 else
4605 new_src = target;
4607 if (can_invert)
4608 temp1 = ~temp1;
4609 else if (can_negate)
4610 temp1 = -temp1;
4612 temp1 = trunc_int_for_mode (temp1, mode);
4613 temp1_rtx = GEN_INT (temp1);
4615 if (code == SET)
4617 else if (code == MINUS)
4618 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4619 else
4620 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4622 emit_constant_insn (cond,
4623 gen_rtx_SET (VOIDmode, new_src,
4624 temp1_rtx));
4625 source = new_src;
4627 if (code == SET)
4629 can_negate = can_invert;
4630 can_invert = 0;
4631 code = PLUS;
4633 else if (code == MINUS)
4634 code = PLUS;
4638 if (final_invert)
4640 if (generate)
4641 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4642 gen_rtx_NOT (mode, source)));
4643 insns++;
4646 return insns;
4649 /* Canonicalize a comparison so that we are more likely to recognize it.
4650 This can be done for a few constant compares, where we can make the
4651 immediate value easier to load. */
4653 static void
4654 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4655 bool op0_preserve_value)
4657 machine_mode mode;
4658 unsigned HOST_WIDE_INT i, maxval;
4660 mode = GET_MODE (*op0);
4661 if (mode == VOIDmode)
4662 mode = GET_MODE (*op1);
4664 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4666 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4667 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4668 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4669 for GTU/LEU in Thumb mode. */
4670 if (mode == DImode)
4672 rtx tem;
4674 if (*code == GT || *code == LE
4675 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4677 /* Missing comparison. First try to use an available
4678 comparison. */
4679 if (CONST_INT_P (*op1))
4681 i = INTVAL (*op1);
4682 switch (*code)
4684 case GT:
4685 case LE:
4686 if (i != maxval
4687 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4689 *op1 = GEN_INT (i + 1);
4690 *code = *code == GT ? GE : LT;
4691 return;
4693 break;
4694 case GTU:
4695 case LEU:
4696 if (i != ~((unsigned HOST_WIDE_INT) 0)
4697 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4699 *op1 = GEN_INT (i + 1);
4700 *code = *code == GTU ? GEU : LTU;
4701 return;
4703 break;
4704 default:
4705 gcc_unreachable ();
4709 /* If that did not work, reverse the condition. */
4710 if (!op0_preserve_value)
4712 tem = *op0;
4713 *op0 = *op1;
4714 *op1 = tem;
4715 *code = (int)swap_condition ((enum rtx_code)*code);
4718 return;
4721 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4722 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4723 to facilitate possible combining with a cmp into 'ands'. */
4724 if (mode == SImode
4725 && GET_CODE (*op0) == ZERO_EXTEND
4726 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4727 && GET_MODE (XEXP (*op0, 0)) == QImode
4728 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4729 && subreg_lowpart_p (XEXP (*op0, 0))
4730 && *op1 == const0_rtx)
4731 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4732 GEN_INT (255));
4734 /* Comparisons smaller than DImode. Only adjust comparisons against
4735 an out-of-range constant. */
4736 if (!CONST_INT_P (*op1)
4737 || const_ok_for_arm (INTVAL (*op1))
4738 || const_ok_for_arm (- INTVAL (*op1)))
4739 return;
4741 i = INTVAL (*op1);
4743 switch (*code)
4745 case EQ:
4746 case NE:
4747 return;
4749 case GT:
4750 case LE:
4751 if (i != maxval
4752 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4754 *op1 = GEN_INT (i + 1);
4755 *code = *code == GT ? GE : LT;
4756 return;
4758 break;
4760 case GE:
4761 case LT:
4762 if (i != ~maxval
4763 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4765 *op1 = GEN_INT (i - 1);
4766 *code = *code == GE ? GT : LE;
4767 return;
4769 break;
4771 case GTU:
4772 case LEU:
4773 if (i != ~((unsigned HOST_WIDE_INT) 0)
4774 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4776 *op1 = GEN_INT (i + 1);
4777 *code = *code == GTU ? GEU : LTU;
4778 return;
4780 break;
4782 case GEU:
4783 case LTU:
4784 if (i != 0
4785 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4787 *op1 = GEN_INT (i - 1);
4788 *code = *code == GEU ? GTU : LEU;
4789 return;
4791 break;
4793 default:
4794 gcc_unreachable ();
4799 /* Define how to find the value returned by a function. */
4801 static rtx
4802 arm_function_value(const_tree type, const_tree func,
4803 bool outgoing ATTRIBUTE_UNUSED)
4805 machine_mode mode;
4806 int unsignedp ATTRIBUTE_UNUSED;
4807 rtx r ATTRIBUTE_UNUSED;
4809 mode = TYPE_MODE (type);
4811 if (TARGET_AAPCS_BASED)
4812 return aapcs_allocate_return_reg (mode, type, func);
4814 /* Promote integer types. */
4815 if (INTEGRAL_TYPE_P (type))
4816 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4818 /* Promotes small structs returned in a register to full-word size
4819 for big-endian AAPCS. */
4820 if (arm_return_in_msb (type))
4822 HOST_WIDE_INT size = int_size_in_bytes (type);
4823 if (size % UNITS_PER_WORD != 0)
4825 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4826 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4830 return arm_libcall_value_1 (mode);
4833 /* libcall hashtable helpers. */
4835 struct libcall_hasher : typed_noop_remove <rtx_def>
4837 typedef rtx_def value_type;
4838 typedef rtx_def compare_type;
4839 static inline hashval_t hash (const value_type *);
4840 static inline bool equal (const value_type *, const compare_type *);
4841 static inline void remove (value_type *);
4844 inline bool
4845 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4847 return rtx_equal_p (p1, p2);
4850 inline hashval_t
4851 libcall_hasher::hash (const value_type *p1)
4853 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4856 typedef hash_table<libcall_hasher> libcall_table_type;
4858 static void
4859 add_libcall (libcall_table_type *htab, rtx libcall)
4861 *htab->find_slot (libcall, INSERT) = libcall;
4864 static bool
4865 arm_libcall_uses_aapcs_base (const_rtx libcall)
4867 static bool init_done = false;
4868 static libcall_table_type *libcall_htab = NULL;
4870 if (!init_done)
4872 init_done = true;
4874 libcall_htab = new libcall_table_type (31);
4875 add_libcall (libcall_htab,
4876 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4877 add_libcall (libcall_htab,
4878 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4879 add_libcall (libcall_htab,
4880 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4881 add_libcall (libcall_htab,
4882 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4884 add_libcall (libcall_htab,
4885 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4886 add_libcall (libcall_htab,
4887 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4888 add_libcall (libcall_htab,
4889 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4890 add_libcall (libcall_htab,
4891 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4893 add_libcall (libcall_htab,
4894 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4895 add_libcall (libcall_htab,
4896 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4897 add_libcall (libcall_htab,
4898 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4899 add_libcall (libcall_htab,
4900 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4901 add_libcall (libcall_htab,
4902 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4903 add_libcall (libcall_htab,
4904 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4905 add_libcall (libcall_htab,
4906 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4907 add_libcall (libcall_htab,
4908 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4910 /* Values from double-precision helper functions are returned in core
4911 registers if the selected core only supports single-precision
4912 arithmetic, even if we are using the hard-float ABI. The same is
4913 true for single-precision helpers, but we will never be using the
4914 hard-float ABI on a CPU which doesn't support single-precision
4915 operations in hardware. */
4916 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4917 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4918 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4919 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4920 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4921 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4922 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4923 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4924 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4925 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4926 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4927 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4928 SFmode));
4929 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4930 DFmode));
4933 return libcall && libcall_htab->find (libcall) != NULL;
4936 static rtx
4937 arm_libcall_value_1 (machine_mode mode)
4939 if (TARGET_AAPCS_BASED)
4940 return aapcs_libcall_value (mode);
4941 else if (TARGET_IWMMXT_ABI
4942 && arm_vector_mode_supported_p (mode))
4943 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4944 else
4945 return gen_rtx_REG (mode, ARG_REGISTER (1));
4948 /* Define how to find the value returned by a library function
4949 assuming the value has mode MODE. */
4951 static rtx
4952 arm_libcall_value (machine_mode mode, const_rtx libcall)
4954 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4955 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4957 /* The following libcalls return their result in integer registers,
4958 even though they return a floating point value. */
4959 if (arm_libcall_uses_aapcs_base (libcall))
4960 return gen_rtx_REG (mode, ARG_REGISTER(1));
4964 return arm_libcall_value_1 (mode);
4967 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4969 static bool
4970 arm_function_value_regno_p (const unsigned int regno)
4972 if (regno == ARG_REGISTER (1)
4973 || (TARGET_32BIT
4974 && TARGET_AAPCS_BASED
4975 && TARGET_VFP
4976 && TARGET_HARD_FLOAT
4977 && regno == FIRST_VFP_REGNUM)
4978 || (TARGET_IWMMXT_ABI
4979 && regno == FIRST_IWMMXT_REGNUM))
4980 return true;
4982 return false;
4985 /* Determine the amount of memory needed to store the possible return
4986 registers of an untyped call. */
4988 arm_apply_result_size (void)
4990 int size = 16;
4992 if (TARGET_32BIT)
4994 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4995 size += 32;
4996 if (TARGET_IWMMXT_ABI)
4997 size += 8;
5000 return size;
5003 /* Decide whether TYPE should be returned in memory (true)
5004 or in a register (false). FNTYPE is the type of the function making
5005 the call. */
5006 static bool
5007 arm_return_in_memory (const_tree type, const_tree fntype)
5009 HOST_WIDE_INT size;
5011 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5013 if (TARGET_AAPCS_BASED)
5015 /* Simple, non-aggregate types (ie not including vectors and
5016 complex) are always returned in a register (or registers).
5017 We don't care about which register here, so we can short-cut
5018 some of the detail. */
5019 if (!AGGREGATE_TYPE_P (type)
5020 && TREE_CODE (type) != VECTOR_TYPE
5021 && TREE_CODE (type) != COMPLEX_TYPE)
5022 return false;
5024 /* Any return value that is no larger than one word can be
5025 returned in r0. */
5026 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5027 return false;
5029 /* Check any available co-processors to see if they accept the
5030 type as a register candidate (VFP, for example, can return
5031 some aggregates in consecutive registers). These aren't
5032 available if the call is variadic. */
5033 if (aapcs_select_return_coproc (type, fntype) >= 0)
5034 return false;
5036 /* Vector values should be returned using ARM registers, not
5037 memory (unless they're over 16 bytes, which will break since
5038 we only have four call-clobbered registers to play with). */
5039 if (TREE_CODE (type) == VECTOR_TYPE)
5040 return (size < 0 || size > (4 * UNITS_PER_WORD));
5042 /* The rest go in memory. */
5043 return true;
5046 if (TREE_CODE (type) == VECTOR_TYPE)
5047 return (size < 0 || size > (4 * UNITS_PER_WORD));
5049 if (!AGGREGATE_TYPE_P (type) &&
5050 (TREE_CODE (type) != VECTOR_TYPE))
5051 /* All simple types are returned in registers. */
5052 return false;
5054 if (arm_abi != ARM_ABI_APCS)
5056 /* ATPCS and later return aggregate types in memory only if they are
5057 larger than a word (or are variable size). */
5058 return (size < 0 || size > UNITS_PER_WORD);
5061 /* For the arm-wince targets we choose to be compatible with Microsoft's
5062 ARM and Thumb compilers, which always return aggregates in memory. */
5063 #ifndef ARM_WINCE
5064 /* All structures/unions bigger than one word are returned in memory.
5065 Also catch the case where int_size_in_bytes returns -1. In this case
5066 the aggregate is either huge or of variable size, and in either case
5067 we will want to return it via memory and not in a register. */
5068 if (size < 0 || size > UNITS_PER_WORD)
5069 return true;
5071 if (TREE_CODE (type) == RECORD_TYPE)
5073 tree field;
5075 /* For a struct the APCS says that we only return in a register
5076 if the type is 'integer like' and every addressable element
5077 has an offset of zero. For practical purposes this means
5078 that the structure can have at most one non bit-field element
5079 and that this element must be the first one in the structure. */
5081 /* Find the first field, ignoring non FIELD_DECL things which will
5082 have been created by C++. */
5083 for (field = TYPE_FIELDS (type);
5084 field && TREE_CODE (field) != FIELD_DECL;
5085 field = DECL_CHAIN (field))
5086 continue;
5088 if (field == NULL)
5089 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5091 /* Check that the first field is valid for returning in a register. */
5093 /* ... Floats are not allowed */
5094 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5095 return true;
5097 /* ... Aggregates that are not themselves valid for returning in
5098 a register are not allowed. */
5099 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5100 return true;
5102 /* Now check the remaining fields, if any. Only bitfields are allowed,
5103 since they are not addressable. */
5104 for (field = DECL_CHAIN (field);
5105 field;
5106 field = DECL_CHAIN (field))
5108 if (TREE_CODE (field) != FIELD_DECL)
5109 continue;
5111 if (!DECL_BIT_FIELD_TYPE (field))
5112 return true;
5115 return false;
5118 if (TREE_CODE (type) == UNION_TYPE)
5120 tree field;
5122 /* Unions can be returned in registers if every element is
5123 integral, or can be returned in an integer register. */
5124 for (field = TYPE_FIELDS (type);
5125 field;
5126 field = DECL_CHAIN (field))
5128 if (TREE_CODE (field) != FIELD_DECL)
5129 continue;
5131 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5132 return true;
5134 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5135 return true;
5138 return false;
5140 #endif /* not ARM_WINCE */
5142 /* Return all other types in memory. */
5143 return true;
5146 const struct pcs_attribute_arg
5148 const char *arg;
5149 enum arm_pcs value;
5150 } pcs_attribute_args[] =
5152 {"aapcs", ARM_PCS_AAPCS},
5153 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5154 #if 0
5155 /* We could recognize these, but changes would be needed elsewhere
5156 * to implement them. */
5157 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5158 {"atpcs", ARM_PCS_ATPCS},
5159 {"apcs", ARM_PCS_APCS},
5160 #endif
5161 {NULL, ARM_PCS_UNKNOWN}
5164 static enum arm_pcs
5165 arm_pcs_from_attribute (tree attr)
5167 const struct pcs_attribute_arg *ptr;
5168 const char *arg;
5170 /* Get the value of the argument. */
5171 if (TREE_VALUE (attr) == NULL_TREE
5172 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5173 return ARM_PCS_UNKNOWN;
5175 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5177 /* Check it against the list of known arguments. */
5178 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5179 if (streq (arg, ptr->arg))
5180 return ptr->value;
5182 /* An unrecognized interrupt type. */
5183 return ARM_PCS_UNKNOWN;
5186 /* Get the PCS variant to use for this call. TYPE is the function's type
5187 specification, DECL is the specific declartion. DECL may be null if
5188 the call could be indirect or if this is a library call. */
5189 static enum arm_pcs
5190 arm_get_pcs_model (const_tree type, const_tree decl)
5192 bool user_convention = false;
5193 enum arm_pcs user_pcs = arm_pcs_default;
5194 tree attr;
5196 gcc_assert (type);
5198 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5199 if (attr)
5201 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5202 user_convention = true;
5205 if (TARGET_AAPCS_BASED)
5207 /* Detect varargs functions. These always use the base rules
5208 (no argument is ever a candidate for a co-processor
5209 register). */
5210 bool base_rules = stdarg_p (type);
5212 if (user_convention)
5214 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5215 sorry ("non-AAPCS derived PCS variant");
5216 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5217 error ("variadic functions must use the base AAPCS variant");
5220 if (base_rules)
5221 return ARM_PCS_AAPCS;
5222 else if (user_convention)
5223 return user_pcs;
5224 else if (decl && flag_unit_at_a_time)
5226 /* Local functions never leak outside this compilation unit,
5227 so we are free to use whatever conventions are
5228 appropriate. */
5229 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5230 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5231 if (i && i->local)
5232 return ARM_PCS_AAPCS_LOCAL;
5235 else if (user_convention && user_pcs != arm_pcs_default)
5236 sorry ("PCS variant");
5238 /* For everything else we use the target's default. */
5239 return arm_pcs_default;
5243 static void
5244 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5245 const_tree fntype ATTRIBUTE_UNUSED,
5246 rtx libcall ATTRIBUTE_UNUSED,
5247 const_tree fndecl ATTRIBUTE_UNUSED)
5249 /* Record the unallocated VFP registers. */
5250 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5251 pcum->aapcs_vfp_reg_alloc = 0;
5254 /* Walk down the type tree of TYPE counting consecutive base elements.
5255 If *MODEP is VOIDmode, then set it to the first valid floating point
5256 type. If a non-floating point type is found, or if a floating point
5257 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5258 otherwise return the count in the sub-tree. */
5259 static int
5260 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5262 machine_mode mode;
5263 HOST_WIDE_INT size;
5265 switch (TREE_CODE (type))
5267 case REAL_TYPE:
5268 mode = TYPE_MODE (type);
5269 if (mode != DFmode && mode != SFmode)
5270 return -1;
5272 if (*modep == VOIDmode)
5273 *modep = mode;
5275 if (*modep == mode)
5276 return 1;
5278 break;
5280 case COMPLEX_TYPE:
5281 mode = TYPE_MODE (TREE_TYPE (type));
5282 if (mode != DFmode && mode != SFmode)
5283 return -1;
5285 if (*modep == VOIDmode)
5286 *modep = mode;
5288 if (*modep == mode)
5289 return 2;
5291 break;
5293 case VECTOR_TYPE:
5294 /* Use V2SImode and V4SImode as representatives of all 64-bit
5295 and 128-bit vector types, whether or not those modes are
5296 supported with the present options. */
5297 size = int_size_in_bytes (type);
5298 switch (size)
5300 case 8:
5301 mode = V2SImode;
5302 break;
5303 case 16:
5304 mode = V4SImode;
5305 break;
5306 default:
5307 return -1;
5310 if (*modep == VOIDmode)
5311 *modep = mode;
5313 /* Vector modes are considered to be opaque: two vectors are
5314 equivalent for the purposes of being homogeneous aggregates
5315 if they are the same size. */
5316 if (*modep == mode)
5317 return 1;
5319 break;
5321 case ARRAY_TYPE:
5323 int count;
5324 tree index = TYPE_DOMAIN (type);
5326 /* Can't handle incomplete types nor sizes that are not
5327 fixed. */
5328 if (!COMPLETE_TYPE_P (type)
5329 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5330 return -1;
5332 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5333 if (count == -1
5334 || !index
5335 || !TYPE_MAX_VALUE (index)
5336 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5337 || !TYPE_MIN_VALUE (index)
5338 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5339 || count < 0)
5340 return -1;
5342 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5343 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5345 /* There must be no padding. */
5346 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5347 return -1;
5349 return count;
5352 case RECORD_TYPE:
5354 int count = 0;
5355 int sub_count;
5356 tree field;
5358 /* Can't handle incomplete types nor sizes that are not
5359 fixed. */
5360 if (!COMPLETE_TYPE_P (type)
5361 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5362 return -1;
5364 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5366 if (TREE_CODE (field) != FIELD_DECL)
5367 continue;
5369 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5370 if (sub_count < 0)
5371 return -1;
5372 count += sub_count;
5375 /* There must be no padding. */
5376 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5377 return -1;
5379 return count;
5382 case UNION_TYPE:
5383 case QUAL_UNION_TYPE:
5385 /* These aren't very interesting except in a degenerate case. */
5386 int count = 0;
5387 int sub_count;
5388 tree field;
5390 /* Can't handle incomplete types nor sizes that are not
5391 fixed. */
5392 if (!COMPLETE_TYPE_P (type)
5393 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5394 return -1;
5396 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5398 if (TREE_CODE (field) != FIELD_DECL)
5399 continue;
5401 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5402 if (sub_count < 0)
5403 return -1;
5404 count = count > sub_count ? count : sub_count;
5407 /* There must be no padding. */
5408 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5409 return -1;
5411 return count;
5414 default:
5415 break;
5418 return -1;
5421 /* Return true if PCS_VARIANT should use VFP registers. */
5422 static bool
5423 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5425 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5427 static bool seen_thumb1_vfp = false;
5429 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5431 sorry ("Thumb-1 hard-float VFP ABI");
5432 /* sorry() is not immediately fatal, so only display this once. */
5433 seen_thumb1_vfp = true;
5436 return true;
5439 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5440 return false;
5442 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5443 (TARGET_VFP_DOUBLE || !is_double));
5446 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5447 suitable for passing or returning in VFP registers for the PCS
5448 variant selected. If it is, then *BASE_MODE is updated to contain
5449 a machine mode describing each element of the argument's type and
5450 *COUNT to hold the number of such elements. */
5451 static bool
5452 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5453 machine_mode mode, const_tree type,
5454 machine_mode *base_mode, int *count)
5456 machine_mode new_mode = VOIDmode;
5458 /* If we have the type information, prefer that to working things
5459 out from the mode. */
5460 if (type)
5462 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5464 if (ag_count > 0 && ag_count <= 4)
5465 *count = ag_count;
5466 else
5467 return false;
5469 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5470 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5471 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5473 *count = 1;
5474 new_mode = mode;
5476 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5478 *count = 2;
5479 new_mode = (mode == DCmode ? DFmode : SFmode);
5481 else
5482 return false;
5485 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5486 return false;
5488 *base_mode = new_mode;
5489 return true;
5492 static bool
5493 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5494 machine_mode mode, const_tree type)
5496 int count ATTRIBUTE_UNUSED;
5497 machine_mode ag_mode ATTRIBUTE_UNUSED;
5499 if (!use_vfp_abi (pcs_variant, false))
5500 return false;
5501 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5502 &ag_mode, &count);
5505 static bool
5506 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5507 const_tree type)
5509 if (!use_vfp_abi (pcum->pcs_variant, false))
5510 return false;
5512 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5513 &pcum->aapcs_vfp_rmode,
5514 &pcum->aapcs_vfp_rcount);
5517 static bool
5518 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5519 const_tree type ATTRIBUTE_UNUSED)
5521 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5522 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5523 int regno;
5525 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5526 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5528 pcum->aapcs_vfp_reg_alloc = mask << regno;
5529 if (mode == BLKmode
5530 || (mode == TImode && ! TARGET_NEON)
5531 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5533 int i;
5534 int rcount = pcum->aapcs_vfp_rcount;
5535 int rshift = shift;
5536 machine_mode rmode = pcum->aapcs_vfp_rmode;
5537 rtx par;
5538 if (!TARGET_NEON)
5540 /* Avoid using unsupported vector modes. */
5541 if (rmode == V2SImode)
5542 rmode = DImode;
5543 else if (rmode == V4SImode)
5545 rmode = DImode;
5546 rcount *= 2;
5547 rshift /= 2;
5550 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5551 for (i = 0; i < rcount; i++)
5553 rtx tmp = gen_rtx_REG (rmode,
5554 FIRST_VFP_REGNUM + regno + i * rshift);
5555 tmp = gen_rtx_EXPR_LIST
5556 (VOIDmode, tmp,
5557 GEN_INT (i * GET_MODE_SIZE (rmode)));
5558 XVECEXP (par, 0, i) = tmp;
5561 pcum->aapcs_reg = par;
5563 else
5564 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5565 return true;
5567 return false;
5570 static rtx
5571 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5572 machine_mode mode,
5573 const_tree type ATTRIBUTE_UNUSED)
5575 if (!use_vfp_abi (pcs_variant, false))
5576 return NULL;
5578 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5580 int count;
5581 machine_mode ag_mode;
5582 int i;
5583 rtx par;
5584 int shift;
5586 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5587 &ag_mode, &count);
5589 if (!TARGET_NEON)
5591 if (ag_mode == V2SImode)
5592 ag_mode = DImode;
5593 else if (ag_mode == V4SImode)
5595 ag_mode = DImode;
5596 count *= 2;
5599 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5600 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5601 for (i = 0; i < count; i++)
5603 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5604 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5605 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5606 XVECEXP (par, 0, i) = tmp;
5609 return par;
5612 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5615 static void
5616 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5617 machine_mode mode ATTRIBUTE_UNUSED,
5618 const_tree type ATTRIBUTE_UNUSED)
5620 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5621 pcum->aapcs_vfp_reg_alloc = 0;
5622 return;
5625 #define AAPCS_CP(X) \
5627 aapcs_ ## X ## _cum_init, \
5628 aapcs_ ## X ## _is_call_candidate, \
5629 aapcs_ ## X ## _allocate, \
5630 aapcs_ ## X ## _is_return_candidate, \
5631 aapcs_ ## X ## _allocate_return_reg, \
5632 aapcs_ ## X ## _advance \
5635 /* Table of co-processors that can be used to pass arguments in
5636 registers. Idealy no arugment should be a candidate for more than
5637 one co-processor table entry, but the table is processed in order
5638 and stops after the first match. If that entry then fails to put
5639 the argument into a co-processor register, the argument will go on
5640 the stack. */
5641 static struct
5643 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5644 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5646 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5647 BLKmode) is a candidate for this co-processor's registers; this
5648 function should ignore any position-dependent state in
5649 CUMULATIVE_ARGS and only use call-type dependent information. */
5650 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5652 /* Return true if the argument does get a co-processor register; it
5653 should set aapcs_reg to an RTX of the register allocated as is
5654 required for a return from FUNCTION_ARG. */
5655 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5657 /* Return true if a result of mode MODE (or type TYPE if MODE is
5658 BLKmode) is can be returned in this co-processor's registers. */
5659 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5661 /* Allocate and return an RTX element to hold the return type of a
5662 call, this routine must not fail and will only be called if
5663 is_return_candidate returned true with the same parameters. */
5664 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5666 /* Finish processing this argument and prepare to start processing
5667 the next one. */
5668 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5669 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5671 AAPCS_CP(vfp)
5674 #undef AAPCS_CP
5676 static int
5677 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5678 const_tree type)
5680 int i;
5682 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5683 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5684 return i;
5686 return -1;
5689 static int
5690 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5692 /* We aren't passed a decl, so we can't check that a call is local.
5693 However, it isn't clear that that would be a win anyway, since it
5694 might limit some tail-calling opportunities. */
5695 enum arm_pcs pcs_variant;
5697 if (fntype)
5699 const_tree fndecl = NULL_TREE;
5701 if (TREE_CODE (fntype) == FUNCTION_DECL)
5703 fndecl = fntype;
5704 fntype = TREE_TYPE (fntype);
5707 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5709 else
5710 pcs_variant = arm_pcs_default;
5712 if (pcs_variant != ARM_PCS_AAPCS)
5714 int i;
5716 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5717 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5718 TYPE_MODE (type),
5719 type))
5720 return i;
5722 return -1;
5725 static rtx
5726 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5727 const_tree fntype)
5729 /* We aren't passed a decl, so we can't check that a call is local.
5730 However, it isn't clear that that would be a win anyway, since it
5731 might limit some tail-calling opportunities. */
5732 enum arm_pcs pcs_variant;
5733 int unsignedp ATTRIBUTE_UNUSED;
5735 if (fntype)
5737 const_tree fndecl = NULL_TREE;
5739 if (TREE_CODE (fntype) == FUNCTION_DECL)
5741 fndecl = fntype;
5742 fntype = TREE_TYPE (fntype);
5745 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5747 else
5748 pcs_variant = arm_pcs_default;
5750 /* Promote integer types. */
5751 if (type && INTEGRAL_TYPE_P (type))
5752 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5754 if (pcs_variant != ARM_PCS_AAPCS)
5756 int i;
5758 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5759 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5760 type))
5761 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5762 mode, type);
5765 /* Promotes small structs returned in a register to full-word size
5766 for big-endian AAPCS. */
5767 if (type && arm_return_in_msb (type))
5769 HOST_WIDE_INT size = int_size_in_bytes (type);
5770 if (size % UNITS_PER_WORD != 0)
5772 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5773 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5777 return gen_rtx_REG (mode, R0_REGNUM);
5780 static rtx
5781 aapcs_libcall_value (machine_mode mode)
5783 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5784 && GET_MODE_SIZE (mode) <= 4)
5785 mode = SImode;
5787 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5790 /* Lay out a function argument using the AAPCS rules. The rule
5791 numbers referred to here are those in the AAPCS. */
5792 static void
5793 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5794 const_tree type, bool named)
5796 int nregs, nregs2;
5797 int ncrn;
5799 /* We only need to do this once per argument. */
5800 if (pcum->aapcs_arg_processed)
5801 return;
5803 pcum->aapcs_arg_processed = true;
5805 /* Special case: if named is false then we are handling an incoming
5806 anonymous argument which is on the stack. */
5807 if (!named)
5808 return;
5810 /* Is this a potential co-processor register candidate? */
5811 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5813 int slot = aapcs_select_call_coproc (pcum, mode, type);
5814 pcum->aapcs_cprc_slot = slot;
5816 /* We don't have to apply any of the rules from part B of the
5817 preparation phase, these are handled elsewhere in the
5818 compiler. */
5820 if (slot >= 0)
5822 /* A Co-processor register candidate goes either in its own
5823 class of registers or on the stack. */
5824 if (!pcum->aapcs_cprc_failed[slot])
5826 /* C1.cp - Try to allocate the argument to co-processor
5827 registers. */
5828 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5829 return;
5831 /* C2.cp - Put the argument on the stack and note that we
5832 can't assign any more candidates in this slot. We also
5833 need to note that we have allocated stack space, so that
5834 we won't later try to split a non-cprc candidate between
5835 core registers and the stack. */
5836 pcum->aapcs_cprc_failed[slot] = true;
5837 pcum->can_split = false;
5840 /* We didn't get a register, so this argument goes on the
5841 stack. */
5842 gcc_assert (pcum->can_split == false);
5843 return;
5847 /* C3 - For double-word aligned arguments, round the NCRN up to the
5848 next even number. */
5849 ncrn = pcum->aapcs_ncrn;
5850 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5851 ncrn++;
5853 nregs = ARM_NUM_REGS2(mode, type);
5855 /* Sigh, this test should really assert that nregs > 0, but a GCC
5856 extension allows empty structs and then gives them empty size; it
5857 then allows such a structure to be passed by value. For some of
5858 the code below we have to pretend that such an argument has
5859 non-zero size so that we 'locate' it correctly either in
5860 registers or on the stack. */
5861 gcc_assert (nregs >= 0);
5863 nregs2 = nregs ? nregs : 1;
5865 /* C4 - Argument fits entirely in core registers. */
5866 if (ncrn + nregs2 <= NUM_ARG_REGS)
5868 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5869 pcum->aapcs_next_ncrn = ncrn + nregs;
5870 return;
5873 /* C5 - Some core registers left and there are no arguments already
5874 on the stack: split this argument between the remaining core
5875 registers and the stack. */
5876 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5878 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5879 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5880 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5881 return;
5884 /* C6 - NCRN is set to 4. */
5885 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5887 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5888 return;
5891 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5892 for a call to a function whose data type is FNTYPE.
5893 For a library call, FNTYPE is NULL. */
5894 void
5895 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5896 rtx libname,
5897 tree fndecl ATTRIBUTE_UNUSED)
5899 /* Long call handling. */
5900 if (fntype)
5901 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5902 else
5903 pcum->pcs_variant = arm_pcs_default;
5905 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5907 if (arm_libcall_uses_aapcs_base (libname))
5908 pcum->pcs_variant = ARM_PCS_AAPCS;
5910 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5911 pcum->aapcs_reg = NULL_RTX;
5912 pcum->aapcs_partial = 0;
5913 pcum->aapcs_arg_processed = false;
5914 pcum->aapcs_cprc_slot = -1;
5915 pcum->can_split = true;
5917 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5919 int i;
5921 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5923 pcum->aapcs_cprc_failed[i] = false;
5924 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5927 return;
5930 /* Legacy ABIs */
5932 /* On the ARM, the offset starts at 0. */
5933 pcum->nregs = 0;
5934 pcum->iwmmxt_nregs = 0;
5935 pcum->can_split = true;
5937 /* Varargs vectors are treated the same as long long.
5938 named_count avoids having to change the way arm handles 'named' */
5939 pcum->named_count = 0;
5940 pcum->nargs = 0;
5942 if (TARGET_REALLY_IWMMXT && fntype)
5944 tree fn_arg;
5946 for (fn_arg = TYPE_ARG_TYPES (fntype);
5947 fn_arg;
5948 fn_arg = TREE_CHAIN (fn_arg))
5949 pcum->named_count += 1;
5951 if (! pcum->named_count)
5952 pcum->named_count = INT_MAX;
5956 /* Return true if we use LRA instead of reload pass. */
5957 static bool
5958 arm_lra_p (void)
5960 return arm_lra_flag;
5963 /* Return true if mode/type need doubleword alignment. */
5964 static bool
5965 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5967 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5968 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5972 /* Determine where to put an argument to a function.
5973 Value is zero to push the argument on the stack,
5974 or a hard register in which to store the argument.
5976 MODE is the argument's machine mode.
5977 TYPE is the data type of the argument (as a tree).
5978 This is null for libcalls where that information may
5979 not be available.
5980 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5981 the preceding args and about the function being called.
5982 NAMED is nonzero if this argument is a named parameter
5983 (otherwise it is an extra parameter matching an ellipsis).
5985 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5986 other arguments are passed on the stack. If (NAMED == 0) (which happens
5987 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5988 defined), say it is passed in the stack (function_prologue will
5989 indeed make it pass in the stack if necessary). */
5991 static rtx
5992 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5993 const_tree type, bool named)
5995 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5996 int nregs;
5998 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5999 a call insn (op3 of a call_value insn). */
6000 if (mode == VOIDmode)
6001 return const0_rtx;
6003 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6005 aapcs_layout_arg (pcum, mode, type, named);
6006 return pcum->aapcs_reg;
6009 /* Varargs vectors are treated the same as long long.
6010 named_count avoids having to change the way arm handles 'named' */
6011 if (TARGET_IWMMXT_ABI
6012 && arm_vector_mode_supported_p (mode)
6013 && pcum->named_count > pcum->nargs + 1)
6015 if (pcum->iwmmxt_nregs <= 9)
6016 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6017 else
6019 pcum->can_split = false;
6020 return NULL_RTX;
6024 /* Put doubleword aligned quantities in even register pairs. */
6025 if (pcum->nregs & 1
6026 && ARM_DOUBLEWORD_ALIGN
6027 && arm_needs_doubleword_align (mode, type))
6028 pcum->nregs++;
6030 /* Only allow splitting an arg between regs and memory if all preceding
6031 args were allocated to regs. For args passed by reference we only count
6032 the reference pointer. */
6033 if (pcum->can_split)
6034 nregs = 1;
6035 else
6036 nregs = ARM_NUM_REGS2 (mode, type);
6038 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6039 return NULL_RTX;
6041 return gen_rtx_REG (mode, pcum->nregs);
6044 static unsigned int
6045 arm_function_arg_boundary (machine_mode mode, const_tree type)
6047 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6048 ? DOUBLEWORD_ALIGNMENT
6049 : PARM_BOUNDARY);
6052 static int
6053 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6054 tree type, bool named)
6056 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6057 int nregs = pcum->nregs;
6059 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6061 aapcs_layout_arg (pcum, mode, type, named);
6062 return pcum->aapcs_partial;
6065 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6066 return 0;
6068 if (NUM_ARG_REGS > nregs
6069 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6070 && pcum->can_split)
6071 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6073 return 0;
6076 /* Update the data in PCUM to advance over an argument
6077 of mode MODE and data type TYPE.
6078 (TYPE is null for libcalls where that information may not be available.) */
6080 static void
6081 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6082 const_tree type, bool named)
6084 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6086 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6088 aapcs_layout_arg (pcum, mode, type, named);
6090 if (pcum->aapcs_cprc_slot >= 0)
6092 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6093 type);
6094 pcum->aapcs_cprc_slot = -1;
6097 /* Generic stuff. */
6098 pcum->aapcs_arg_processed = false;
6099 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6100 pcum->aapcs_reg = NULL_RTX;
6101 pcum->aapcs_partial = 0;
6103 else
6105 pcum->nargs += 1;
6106 if (arm_vector_mode_supported_p (mode)
6107 && pcum->named_count > pcum->nargs
6108 && TARGET_IWMMXT_ABI)
6109 pcum->iwmmxt_nregs += 1;
6110 else
6111 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6115 /* Variable sized types are passed by reference. This is a GCC
6116 extension to the ARM ABI. */
6118 static bool
6119 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6120 machine_mode mode ATTRIBUTE_UNUSED,
6121 const_tree type, bool named ATTRIBUTE_UNUSED)
6123 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6126 /* Encode the current state of the #pragma [no_]long_calls. */
6127 typedef enum
6129 OFF, /* No #pragma [no_]long_calls is in effect. */
6130 LONG, /* #pragma long_calls is in effect. */
6131 SHORT /* #pragma no_long_calls is in effect. */
6132 } arm_pragma_enum;
6134 static arm_pragma_enum arm_pragma_long_calls = OFF;
6136 void
6137 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6139 arm_pragma_long_calls = LONG;
6142 void
6143 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6145 arm_pragma_long_calls = SHORT;
6148 void
6149 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6151 arm_pragma_long_calls = OFF;
6154 /* Handle an attribute requiring a FUNCTION_DECL;
6155 arguments as in struct attribute_spec.handler. */
6156 static tree
6157 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6158 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6160 if (TREE_CODE (*node) != FUNCTION_DECL)
6162 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6163 name);
6164 *no_add_attrs = true;
6167 return NULL_TREE;
6170 /* Handle an "interrupt" or "isr" attribute;
6171 arguments as in struct attribute_spec.handler. */
6172 static tree
6173 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6174 bool *no_add_attrs)
6176 if (DECL_P (*node))
6178 if (TREE_CODE (*node) != FUNCTION_DECL)
6180 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6181 name);
6182 *no_add_attrs = true;
6184 /* FIXME: the argument if any is checked for type attributes;
6185 should it be checked for decl ones? */
6187 else
6189 if (TREE_CODE (*node) == FUNCTION_TYPE
6190 || TREE_CODE (*node) == METHOD_TYPE)
6192 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6194 warning (OPT_Wattributes, "%qE attribute ignored",
6195 name);
6196 *no_add_attrs = true;
6199 else if (TREE_CODE (*node) == POINTER_TYPE
6200 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6201 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6202 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6204 *node = build_variant_type_copy (*node);
6205 TREE_TYPE (*node) = build_type_attribute_variant
6206 (TREE_TYPE (*node),
6207 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6208 *no_add_attrs = true;
6210 else
6212 /* Possibly pass this attribute on from the type to a decl. */
6213 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6214 | (int) ATTR_FLAG_FUNCTION_NEXT
6215 | (int) ATTR_FLAG_ARRAY_NEXT))
6217 *no_add_attrs = true;
6218 return tree_cons (name, args, NULL_TREE);
6220 else
6222 warning (OPT_Wattributes, "%qE attribute ignored",
6223 name);
6228 return NULL_TREE;
6231 /* Handle a "pcs" attribute; arguments as in struct
6232 attribute_spec.handler. */
6233 static tree
6234 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6235 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6237 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6239 warning (OPT_Wattributes, "%qE attribute ignored", name);
6240 *no_add_attrs = true;
6242 return NULL_TREE;
6245 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6246 /* Handle the "notshared" attribute. This attribute is another way of
6247 requesting hidden visibility. ARM's compiler supports
6248 "__declspec(notshared)"; we support the same thing via an
6249 attribute. */
6251 static tree
6252 arm_handle_notshared_attribute (tree *node,
6253 tree name ATTRIBUTE_UNUSED,
6254 tree args ATTRIBUTE_UNUSED,
6255 int flags ATTRIBUTE_UNUSED,
6256 bool *no_add_attrs)
6258 tree decl = TYPE_NAME (*node);
6260 if (decl)
6262 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6263 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6264 *no_add_attrs = false;
6266 return NULL_TREE;
6268 #endif
6270 /* Return 0 if the attributes for two types are incompatible, 1 if they
6271 are compatible, and 2 if they are nearly compatible (which causes a
6272 warning to be generated). */
6273 static int
6274 arm_comp_type_attributes (const_tree type1, const_tree type2)
6276 int l1, l2, s1, s2;
6278 /* Check for mismatch of non-default calling convention. */
6279 if (TREE_CODE (type1) != FUNCTION_TYPE)
6280 return 1;
6282 /* Check for mismatched call attributes. */
6283 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6284 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6285 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6286 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6288 /* Only bother to check if an attribute is defined. */
6289 if (l1 | l2 | s1 | s2)
6291 /* If one type has an attribute, the other must have the same attribute. */
6292 if ((l1 != l2) || (s1 != s2))
6293 return 0;
6295 /* Disallow mixed attributes. */
6296 if ((l1 & s2) || (l2 & s1))
6297 return 0;
6300 /* Check for mismatched ISR attribute. */
6301 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6302 if (! l1)
6303 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6304 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6305 if (! l2)
6306 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6307 if (l1 != l2)
6308 return 0;
6310 return 1;
6313 /* Assigns default attributes to newly defined type. This is used to
6314 set short_call/long_call attributes for function types of
6315 functions defined inside corresponding #pragma scopes. */
6316 static void
6317 arm_set_default_type_attributes (tree type)
6319 /* Add __attribute__ ((long_call)) to all functions, when
6320 inside #pragma long_calls or __attribute__ ((short_call)),
6321 when inside #pragma no_long_calls. */
6322 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6324 tree type_attr_list, attr_name;
6325 type_attr_list = TYPE_ATTRIBUTES (type);
6327 if (arm_pragma_long_calls == LONG)
6328 attr_name = get_identifier ("long_call");
6329 else if (arm_pragma_long_calls == SHORT)
6330 attr_name = get_identifier ("short_call");
6331 else
6332 return;
6334 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6335 TYPE_ATTRIBUTES (type) = type_attr_list;
6339 /* Return true if DECL is known to be linked into section SECTION. */
6341 static bool
6342 arm_function_in_section_p (tree decl, section *section)
6344 /* We can only be certain about functions defined in the same
6345 compilation unit. */
6346 if (!TREE_STATIC (decl))
6347 return false;
6349 /* Make sure that SYMBOL always binds to the definition in this
6350 compilation unit. */
6351 if (!targetm.binds_local_p (decl))
6352 return false;
6354 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6355 if (!DECL_SECTION_NAME (decl))
6357 /* Make sure that we will not create a unique section for DECL. */
6358 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6359 return false;
6362 return function_section (decl) == section;
6365 /* Return nonzero if a 32-bit "long_call" should be generated for
6366 a call from the current function to DECL. We generate a long_call
6367 if the function:
6369 a. has an __attribute__((long call))
6370 or b. is within the scope of a #pragma long_calls
6371 or c. the -mlong-calls command line switch has been specified
6373 However we do not generate a long call if the function:
6375 d. has an __attribute__ ((short_call))
6376 or e. is inside the scope of a #pragma no_long_calls
6377 or f. is defined in the same section as the current function. */
6379 bool
6380 arm_is_long_call_p (tree decl)
6382 tree attrs;
6384 if (!decl)
6385 return TARGET_LONG_CALLS;
6387 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6388 if (lookup_attribute ("short_call", attrs))
6389 return false;
6391 /* For "f", be conservative, and only cater for cases in which the
6392 whole of the current function is placed in the same section. */
6393 if (!flag_reorder_blocks_and_partition
6394 && TREE_CODE (decl) == FUNCTION_DECL
6395 && arm_function_in_section_p (decl, current_function_section ()))
6396 return false;
6398 if (lookup_attribute ("long_call", attrs))
6399 return true;
6401 return TARGET_LONG_CALLS;
6404 /* Return nonzero if it is ok to make a tail-call to DECL. */
6405 static bool
6406 arm_function_ok_for_sibcall (tree decl, tree exp)
6408 unsigned long func_type;
6410 if (cfun->machine->sibcall_blocked)
6411 return false;
6413 /* Never tailcall something if we are generating code for Thumb-1. */
6414 if (TARGET_THUMB1)
6415 return false;
6417 /* The PIC register is live on entry to VxWorks PLT entries, so we
6418 must make the call before restoring the PIC register. */
6419 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6420 return false;
6422 /* If we are interworking and the function is not declared static
6423 then we can't tail-call it unless we know that it exists in this
6424 compilation unit (since it might be a Thumb routine). */
6425 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6426 && !TREE_ASM_WRITTEN (decl))
6427 return false;
6429 func_type = arm_current_func_type ();
6430 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6431 if (IS_INTERRUPT (func_type))
6432 return false;
6434 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6436 /* Check that the return value locations are the same. For
6437 example that we aren't returning a value from the sibling in
6438 a VFP register but then need to transfer it to a core
6439 register. */
6440 rtx a, b;
6442 a = arm_function_value (TREE_TYPE (exp), decl, false);
6443 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6444 cfun->decl, false);
6445 if (!rtx_equal_p (a, b))
6446 return false;
6449 /* Never tailcall if function may be called with a misaligned SP. */
6450 if (IS_STACKALIGN (func_type))
6451 return false;
6453 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6454 references should become a NOP. Don't convert such calls into
6455 sibling calls. */
6456 if (TARGET_AAPCS_BASED
6457 && arm_abi == ARM_ABI_AAPCS
6458 && decl
6459 && DECL_WEAK (decl))
6460 return false;
6462 /* Everything else is ok. */
6463 return true;
6467 /* Addressing mode support functions. */
6469 /* Return nonzero if X is a legitimate immediate operand when compiling
6470 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6472 legitimate_pic_operand_p (rtx x)
6474 if (GET_CODE (x) == SYMBOL_REF
6475 || (GET_CODE (x) == CONST
6476 && GET_CODE (XEXP (x, 0)) == PLUS
6477 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6478 return 0;
6480 return 1;
6483 /* Record that the current function needs a PIC register. Initialize
6484 cfun->machine->pic_reg if we have not already done so. */
6486 static void
6487 require_pic_register (void)
6489 /* A lot of the logic here is made obscure by the fact that this
6490 routine gets called as part of the rtx cost estimation process.
6491 We don't want those calls to affect any assumptions about the real
6492 function; and further, we can't call entry_of_function() until we
6493 start the real expansion process. */
6494 if (!crtl->uses_pic_offset_table)
6496 gcc_assert (can_create_pseudo_p ());
6497 if (arm_pic_register != INVALID_REGNUM
6498 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6500 if (!cfun->machine->pic_reg)
6501 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6503 /* Play games to avoid marking the function as needing pic
6504 if we are being called as part of the cost-estimation
6505 process. */
6506 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6507 crtl->uses_pic_offset_table = 1;
6509 else
6511 rtx_insn *seq, *insn;
6513 if (!cfun->machine->pic_reg)
6514 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6516 /* Play games to avoid marking the function as needing pic
6517 if we are being called as part of the cost-estimation
6518 process. */
6519 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6521 crtl->uses_pic_offset_table = 1;
6522 start_sequence ();
6524 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6525 && arm_pic_register > LAST_LO_REGNUM)
6526 emit_move_insn (cfun->machine->pic_reg,
6527 gen_rtx_REG (Pmode, arm_pic_register));
6528 else
6529 arm_load_pic_register (0UL);
6531 seq = get_insns ();
6532 end_sequence ();
6534 for (insn = seq; insn; insn = NEXT_INSN (insn))
6535 if (INSN_P (insn))
6536 INSN_LOCATION (insn) = prologue_location;
6538 /* We can be called during expansion of PHI nodes, where
6539 we can't yet emit instructions directly in the final
6540 insn stream. Queue the insns on the entry edge, they will
6541 be committed after everything else is expanded. */
6542 insert_insn_on_edge (seq,
6543 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6550 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6552 if (GET_CODE (orig) == SYMBOL_REF
6553 || GET_CODE (orig) == LABEL_REF)
6555 rtx insn;
6557 if (reg == 0)
6559 gcc_assert (can_create_pseudo_p ());
6560 reg = gen_reg_rtx (Pmode);
6563 /* VxWorks does not impose a fixed gap between segments; the run-time
6564 gap can be different from the object-file gap. We therefore can't
6565 use GOTOFF unless we are absolutely sure that the symbol is in the
6566 same segment as the GOT. Unfortunately, the flexibility of linker
6567 scripts means that we can't be sure of that in general, so assume
6568 that GOTOFF is never valid on VxWorks. */
6569 if ((GET_CODE (orig) == LABEL_REF
6570 || (GET_CODE (orig) == SYMBOL_REF &&
6571 SYMBOL_REF_LOCAL_P (orig)))
6572 && NEED_GOT_RELOC
6573 && arm_pic_data_is_text_relative)
6574 insn = arm_pic_static_addr (orig, reg);
6575 else
6577 rtx pat;
6578 rtx mem;
6580 /* If this function doesn't have a pic register, create one now. */
6581 require_pic_register ();
6583 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6585 /* Make the MEM as close to a constant as possible. */
6586 mem = SET_SRC (pat);
6587 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6588 MEM_READONLY_P (mem) = 1;
6589 MEM_NOTRAP_P (mem) = 1;
6591 insn = emit_insn (pat);
6594 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6595 by loop. */
6596 set_unique_reg_note (insn, REG_EQUAL, orig);
6598 return reg;
6600 else if (GET_CODE (orig) == CONST)
6602 rtx base, offset;
6604 if (GET_CODE (XEXP (orig, 0)) == PLUS
6605 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6606 return orig;
6608 /* Handle the case where we have: const (UNSPEC_TLS). */
6609 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6610 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6611 return orig;
6613 /* Handle the case where we have:
6614 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6615 CONST_INT. */
6616 if (GET_CODE (XEXP (orig, 0)) == PLUS
6617 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6618 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6620 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6621 return orig;
6624 if (reg == 0)
6626 gcc_assert (can_create_pseudo_p ());
6627 reg = gen_reg_rtx (Pmode);
6630 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6632 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6633 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6634 base == reg ? 0 : reg);
6636 if (CONST_INT_P (offset))
6638 /* The base register doesn't really matter, we only want to
6639 test the index for the appropriate mode. */
6640 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6642 gcc_assert (can_create_pseudo_p ());
6643 offset = force_reg (Pmode, offset);
6646 if (CONST_INT_P (offset))
6647 return plus_constant (Pmode, base, INTVAL (offset));
6650 if (GET_MODE_SIZE (mode) > 4
6651 && (GET_MODE_CLASS (mode) == MODE_INT
6652 || TARGET_SOFT_FLOAT))
6654 emit_insn (gen_addsi3 (reg, base, offset));
6655 return reg;
6658 return gen_rtx_PLUS (Pmode, base, offset);
6661 return orig;
6665 /* Find a spare register to use during the prolog of a function. */
6667 static int
6668 thumb_find_work_register (unsigned long pushed_regs_mask)
6670 int reg;
6672 /* Check the argument registers first as these are call-used. The
6673 register allocation order means that sometimes r3 might be used
6674 but earlier argument registers might not, so check them all. */
6675 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6676 if (!df_regs_ever_live_p (reg))
6677 return reg;
6679 /* Before going on to check the call-saved registers we can try a couple
6680 more ways of deducing that r3 is available. The first is when we are
6681 pushing anonymous arguments onto the stack and we have less than 4
6682 registers worth of fixed arguments(*). In this case r3 will be part of
6683 the variable argument list and so we can be sure that it will be
6684 pushed right at the start of the function. Hence it will be available
6685 for the rest of the prologue.
6686 (*): ie crtl->args.pretend_args_size is greater than 0. */
6687 if (cfun->machine->uses_anonymous_args
6688 && crtl->args.pretend_args_size > 0)
6689 return LAST_ARG_REGNUM;
6691 /* The other case is when we have fixed arguments but less than 4 registers
6692 worth. In this case r3 might be used in the body of the function, but
6693 it is not being used to convey an argument into the function. In theory
6694 we could just check crtl->args.size to see how many bytes are
6695 being passed in argument registers, but it seems that it is unreliable.
6696 Sometimes it will have the value 0 when in fact arguments are being
6697 passed. (See testcase execute/20021111-1.c for an example). So we also
6698 check the args_info.nregs field as well. The problem with this field is
6699 that it makes no allowances for arguments that are passed to the
6700 function but which are not used. Hence we could miss an opportunity
6701 when a function has an unused argument in r3. But it is better to be
6702 safe than to be sorry. */
6703 if (! cfun->machine->uses_anonymous_args
6704 && crtl->args.size >= 0
6705 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6706 && (TARGET_AAPCS_BASED
6707 ? crtl->args.info.aapcs_ncrn < 4
6708 : crtl->args.info.nregs < 4))
6709 return LAST_ARG_REGNUM;
6711 /* Otherwise look for a call-saved register that is going to be pushed. */
6712 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6713 if (pushed_regs_mask & (1 << reg))
6714 return reg;
6716 if (TARGET_THUMB2)
6718 /* Thumb-2 can use high regs. */
6719 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6720 if (pushed_regs_mask & (1 << reg))
6721 return reg;
6723 /* Something went wrong - thumb_compute_save_reg_mask()
6724 should have arranged for a suitable register to be pushed. */
6725 gcc_unreachable ();
6728 static GTY(()) int pic_labelno;
6730 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6731 low register. */
6733 void
6734 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6736 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6738 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6739 return;
6741 gcc_assert (flag_pic);
6743 pic_reg = cfun->machine->pic_reg;
6744 if (TARGET_VXWORKS_RTP)
6746 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6747 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6748 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6750 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6752 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6753 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6755 else
6757 /* We use an UNSPEC rather than a LABEL_REF because this label
6758 never appears in the code stream. */
6760 labelno = GEN_INT (pic_labelno++);
6761 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6762 l1 = gen_rtx_CONST (VOIDmode, l1);
6764 /* On the ARM the PC register contains 'dot + 8' at the time of the
6765 addition, on the Thumb it is 'dot + 4'. */
6766 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6767 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6768 UNSPEC_GOTSYM_OFF);
6769 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6771 if (TARGET_32BIT)
6773 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6775 else /* TARGET_THUMB1 */
6777 if (arm_pic_register != INVALID_REGNUM
6778 && REGNO (pic_reg) > LAST_LO_REGNUM)
6780 /* We will have pushed the pic register, so we should always be
6781 able to find a work register. */
6782 pic_tmp = gen_rtx_REG (SImode,
6783 thumb_find_work_register (saved_regs));
6784 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6785 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6786 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6788 else if (arm_pic_register != INVALID_REGNUM
6789 && arm_pic_register > LAST_LO_REGNUM
6790 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6792 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6793 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6794 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6796 else
6797 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6801 /* Need to emit this whether or not we obey regdecls,
6802 since setjmp/longjmp can cause life info to screw up. */
6803 emit_use (pic_reg);
6806 /* Generate code to load the address of a static var when flag_pic is set. */
6807 static rtx
6808 arm_pic_static_addr (rtx orig, rtx reg)
6810 rtx l1, labelno, offset_rtx, insn;
6812 gcc_assert (flag_pic);
6814 /* We use an UNSPEC rather than a LABEL_REF because this label
6815 never appears in the code stream. */
6816 labelno = GEN_INT (pic_labelno++);
6817 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6818 l1 = gen_rtx_CONST (VOIDmode, l1);
6820 /* On the ARM the PC register contains 'dot + 8' at the time of the
6821 addition, on the Thumb it is 'dot + 4'. */
6822 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6823 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6824 UNSPEC_SYMBOL_OFFSET);
6825 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6827 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6828 return insn;
6831 /* Return nonzero if X is valid as an ARM state addressing register. */
6832 static int
6833 arm_address_register_rtx_p (rtx x, int strict_p)
6835 int regno;
6837 if (!REG_P (x))
6838 return 0;
6840 regno = REGNO (x);
6842 if (strict_p)
6843 return ARM_REGNO_OK_FOR_BASE_P (regno);
6845 return (regno <= LAST_ARM_REGNUM
6846 || regno >= FIRST_PSEUDO_REGISTER
6847 || regno == FRAME_POINTER_REGNUM
6848 || regno == ARG_POINTER_REGNUM);
6851 /* Return TRUE if this rtx is the difference of a symbol and a label,
6852 and will reduce to a PC-relative relocation in the object file.
6853 Expressions like this can be left alone when generating PIC, rather
6854 than forced through the GOT. */
6855 static int
6856 pcrel_constant_p (rtx x)
6858 if (GET_CODE (x) == MINUS)
6859 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6861 return FALSE;
6864 /* Return true if X will surely end up in an index register after next
6865 splitting pass. */
6866 static bool
6867 will_be_in_index_register (const_rtx x)
6869 /* arm.md: calculate_pic_address will split this into a register. */
6870 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6873 /* Return nonzero if X is a valid ARM state address operand. */
6875 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6876 int strict_p)
6878 bool use_ldrd;
6879 enum rtx_code code = GET_CODE (x);
6881 if (arm_address_register_rtx_p (x, strict_p))
6882 return 1;
6884 use_ldrd = (TARGET_LDRD
6885 && (mode == DImode
6886 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6888 if (code == POST_INC || code == PRE_DEC
6889 || ((code == PRE_INC || code == POST_DEC)
6890 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6891 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6893 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6894 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6895 && GET_CODE (XEXP (x, 1)) == PLUS
6896 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6898 rtx addend = XEXP (XEXP (x, 1), 1);
6900 /* Don't allow ldrd post increment by register because it's hard
6901 to fixup invalid register choices. */
6902 if (use_ldrd
6903 && GET_CODE (x) == POST_MODIFY
6904 && REG_P (addend))
6905 return 0;
6907 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6908 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6911 /* After reload constants split into minipools will have addresses
6912 from a LABEL_REF. */
6913 else if (reload_completed
6914 && (code == LABEL_REF
6915 || (code == CONST
6916 && GET_CODE (XEXP (x, 0)) == PLUS
6917 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6918 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6919 return 1;
6921 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6922 return 0;
6924 else if (code == PLUS)
6926 rtx xop0 = XEXP (x, 0);
6927 rtx xop1 = XEXP (x, 1);
6929 return ((arm_address_register_rtx_p (xop0, strict_p)
6930 && ((CONST_INT_P (xop1)
6931 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6932 || (!strict_p && will_be_in_index_register (xop1))))
6933 || (arm_address_register_rtx_p (xop1, strict_p)
6934 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6937 #if 0
6938 /* Reload currently can't handle MINUS, so disable this for now */
6939 else if (GET_CODE (x) == MINUS)
6941 rtx xop0 = XEXP (x, 0);
6942 rtx xop1 = XEXP (x, 1);
6944 return (arm_address_register_rtx_p (xop0, strict_p)
6945 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6947 #endif
6949 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6950 && code == SYMBOL_REF
6951 && CONSTANT_POOL_ADDRESS_P (x)
6952 && ! (flag_pic
6953 && symbol_mentioned_p (get_pool_constant (x))
6954 && ! pcrel_constant_p (get_pool_constant (x))))
6955 return 1;
6957 return 0;
6960 /* Return nonzero if X is a valid Thumb-2 address operand. */
6961 static int
6962 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6964 bool use_ldrd;
6965 enum rtx_code code = GET_CODE (x);
6967 if (arm_address_register_rtx_p (x, strict_p))
6968 return 1;
6970 use_ldrd = (TARGET_LDRD
6971 && (mode == DImode
6972 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6974 if (code == POST_INC || code == PRE_DEC
6975 || ((code == PRE_INC || code == POST_DEC)
6976 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6977 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6979 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6980 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6981 && GET_CODE (XEXP (x, 1)) == PLUS
6982 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6984 /* Thumb-2 only has autoincrement by constant. */
6985 rtx addend = XEXP (XEXP (x, 1), 1);
6986 HOST_WIDE_INT offset;
6988 if (!CONST_INT_P (addend))
6989 return 0;
6991 offset = INTVAL(addend);
6992 if (GET_MODE_SIZE (mode) <= 4)
6993 return (offset > -256 && offset < 256);
6995 return (use_ldrd && offset > -1024 && offset < 1024
6996 && (offset & 3) == 0);
6999 /* After reload constants split into minipools will have addresses
7000 from a LABEL_REF. */
7001 else if (reload_completed
7002 && (code == LABEL_REF
7003 || (code == CONST
7004 && GET_CODE (XEXP (x, 0)) == PLUS
7005 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7006 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7007 return 1;
7009 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7010 return 0;
7012 else if (code == PLUS)
7014 rtx xop0 = XEXP (x, 0);
7015 rtx xop1 = XEXP (x, 1);
7017 return ((arm_address_register_rtx_p (xop0, strict_p)
7018 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7019 || (!strict_p && will_be_in_index_register (xop1))))
7020 || (arm_address_register_rtx_p (xop1, strict_p)
7021 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7024 /* Normally we can assign constant values to target registers without
7025 the help of constant pool. But there are cases we have to use constant
7026 pool like:
7027 1) assign a label to register.
7028 2) sign-extend a 8bit value to 32bit and then assign to register.
7030 Constant pool access in format:
7031 (set (reg r0) (mem (symbol_ref (".LC0"))))
7032 will cause the use of literal pool (later in function arm_reorg).
7033 So here we mark such format as an invalid format, then the compiler
7034 will adjust it into:
7035 (set (reg r0) (symbol_ref (".LC0")))
7036 (set (reg r0) (mem (reg r0))).
7037 No extra register is required, and (mem (reg r0)) won't cause the use
7038 of literal pools. */
7039 else if (arm_disable_literal_pool && code == SYMBOL_REF
7040 && CONSTANT_POOL_ADDRESS_P (x))
7041 return 0;
7043 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7044 && code == SYMBOL_REF
7045 && CONSTANT_POOL_ADDRESS_P (x)
7046 && ! (flag_pic
7047 && symbol_mentioned_p (get_pool_constant (x))
7048 && ! pcrel_constant_p (get_pool_constant (x))))
7049 return 1;
7051 return 0;
7054 /* Return nonzero if INDEX is valid for an address index operand in
7055 ARM state. */
7056 static int
7057 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7058 int strict_p)
7060 HOST_WIDE_INT range;
7061 enum rtx_code code = GET_CODE (index);
7063 /* Standard coprocessor addressing modes. */
7064 if (TARGET_HARD_FLOAT
7065 && TARGET_VFP
7066 && (mode == SFmode || mode == DFmode))
7067 return (code == CONST_INT && INTVAL (index) < 1024
7068 && INTVAL (index) > -1024
7069 && (INTVAL (index) & 3) == 0);
7071 /* For quad modes, we restrict the constant offset to be slightly less
7072 than what the instruction format permits. We do this because for
7073 quad mode moves, we will actually decompose them into two separate
7074 double-mode reads or writes. INDEX must therefore be a valid
7075 (double-mode) offset and so should INDEX+8. */
7076 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7077 return (code == CONST_INT
7078 && INTVAL (index) < 1016
7079 && INTVAL (index) > -1024
7080 && (INTVAL (index) & 3) == 0);
7082 /* We have no such constraint on double mode offsets, so we permit the
7083 full range of the instruction format. */
7084 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7085 return (code == CONST_INT
7086 && INTVAL (index) < 1024
7087 && INTVAL (index) > -1024
7088 && (INTVAL (index) & 3) == 0);
7090 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7091 return (code == CONST_INT
7092 && INTVAL (index) < 1024
7093 && INTVAL (index) > -1024
7094 && (INTVAL (index) & 3) == 0);
7096 if (arm_address_register_rtx_p (index, strict_p)
7097 && (GET_MODE_SIZE (mode) <= 4))
7098 return 1;
7100 if (mode == DImode || mode == DFmode)
7102 if (code == CONST_INT)
7104 HOST_WIDE_INT val = INTVAL (index);
7106 if (TARGET_LDRD)
7107 return val > -256 && val < 256;
7108 else
7109 return val > -4096 && val < 4092;
7112 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7115 if (GET_MODE_SIZE (mode) <= 4
7116 && ! (arm_arch4
7117 && (mode == HImode
7118 || mode == HFmode
7119 || (mode == QImode && outer == SIGN_EXTEND))))
7121 if (code == MULT)
7123 rtx xiop0 = XEXP (index, 0);
7124 rtx xiop1 = XEXP (index, 1);
7126 return ((arm_address_register_rtx_p (xiop0, strict_p)
7127 && power_of_two_operand (xiop1, SImode))
7128 || (arm_address_register_rtx_p (xiop1, strict_p)
7129 && power_of_two_operand (xiop0, SImode)));
7131 else if (code == LSHIFTRT || code == ASHIFTRT
7132 || code == ASHIFT || code == ROTATERT)
7134 rtx op = XEXP (index, 1);
7136 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7137 && CONST_INT_P (op)
7138 && INTVAL (op) > 0
7139 && INTVAL (op) <= 31);
7143 /* For ARM v4 we may be doing a sign-extend operation during the
7144 load. */
7145 if (arm_arch4)
7147 if (mode == HImode
7148 || mode == HFmode
7149 || (outer == SIGN_EXTEND && mode == QImode))
7150 range = 256;
7151 else
7152 range = 4096;
7154 else
7155 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7157 return (code == CONST_INT
7158 && INTVAL (index) < range
7159 && INTVAL (index) > -range);
7162 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7163 index operand. i.e. 1, 2, 4 or 8. */
7164 static bool
7165 thumb2_index_mul_operand (rtx op)
7167 HOST_WIDE_INT val;
7169 if (!CONST_INT_P (op))
7170 return false;
7172 val = INTVAL(op);
7173 return (val == 1 || val == 2 || val == 4 || val == 8);
7176 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7177 static int
7178 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7180 enum rtx_code code = GET_CODE (index);
7182 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7183 /* Standard coprocessor addressing modes. */
7184 if (TARGET_HARD_FLOAT
7185 && TARGET_VFP
7186 && (mode == SFmode || mode == DFmode))
7187 return (code == CONST_INT && INTVAL (index) < 1024
7188 /* Thumb-2 allows only > -256 index range for it's core register
7189 load/stores. Since we allow SF/DF in core registers, we have
7190 to use the intersection between -256~4096 (core) and -1024~1024
7191 (coprocessor). */
7192 && INTVAL (index) > -256
7193 && (INTVAL (index) & 3) == 0);
7195 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7197 /* For DImode assume values will usually live in core regs
7198 and only allow LDRD addressing modes. */
7199 if (!TARGET_LDRD || mode != DImode)
7200 return (code == CONST_INT
7201 && INTVAL (index) < 1024
7202 && INTVAL (index) > -1024
7203 && (INTVAL (index) & 3) == 0);
7206 /* For quad modes, we restrict the constant offset to be slightly less
7207 than what the instruction format permits. We do this because for
7208 quad mode moves, we will actually decompose them into two separate
7209 double-mode reads or writes. INDEX must therefore be a valid
7210 (double-mode) offset and so should INDEX+8. */
7211 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7212 return (code == CONST_INT
7213 && INTVAL (index) < 1016
7214 && INTVAL (index) > -1024
7215 && (INTVAL (index) & 3) == 0);
7217 /* We have no such constraint on double mode offsets, so we permit the
7218 full range of the instruction format. */
7219 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7220 return (code == CONST_INT
7221 && INTVAL (index) < 1024
7222 && INTVAL (index) > -1024
7223 && (INTVAL (index) & 3) == 0);
7225 if (arm_address_register_rtx_p (index, strict_p)
7226 && (GET_MODE_SIZE (mode) <= 4))
7227 return 1;
7229 if (mode == DImode || mode == DFmode)
7231 if (code == CONST_INT)
7233 HOST_WIDE_INT val = INTVAL (index);
7234 /* ??? Can we assume ldrd for thumb2? */
7235 /* Thumb-2 ldrd only has reg+const addressing modes. */
7236 /* ldrd supports offsets of +-1020.
7237 However the ldr fallback does not. */
7238 return val > -256 && val < 256 && (val & 3) == 0;
7240 else
7241 return 0;
7244 if (code == MULT)
7246 rtx xiop0 = XEXP (index, 0);
7247 rtx xiop1 = XEXP (index, 1);
7249 return ((arm_address_register_rtx_p (xiop0, strict_p)
7250 && thumb2_index_mul_operand (xiop1))
7251 || (arm_address_register_rtx_p (xiop1, strict_p)
7252 && thumb2_index_mul_operand (xiop0)));
7254 else if (code == ASHIFT)
7256 rtx op = XEXP (index, 1);
7258 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7259 && CONST_INT_P (op)
7260 && INTVAL (op) > 0
7261 && INTVAL (op) <= 3);
7264 return (code == CONST_INT
7265 && INTVAL (index) < 4096
7266 && INTVAL (index) > -256);
7269 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7270 static int
7271 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7273 int regno;
7275 if (!REG_P (x))
7276 return 0;
7278 regno = REGNO (x);
7280 if (strict_p)
7281 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7283 return (regno <= LAST_LO_REGNUM
7284 || regno > LAST_VIRTUAL_REGISTER
7285 || regno == FRAME_POINTER_REGNUM
7286 || (GET_MODE_SIZE (mode) >= 4
7287 && (regno == STACK_POINTER_REGNUM
7288 || regno >= FIRST_PSEUDO_REGISTER
7289 || x == hard_frame_pointer_rtx
7290 || x == arg_pointer_rtx)));
7293 /* Return nonzero if x is a legitimate index register. This is the case
7294 for any base register that can access a QImode object. */
7295 inline static int
7296 thumb1_index_register_rtx_p (rtx x, int strict_p)
7298 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7301 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7303 The AP may be eliminated to either the SP or the FP, so we use the
7304 least common denominator, e.g. SImode, and offsets from 0 to 64.
7306 ??? Verify whether the above is the right approach.
7308 ??? Also, the FP may be eliminated to the SP, so perhaps that
7309 needs special handling also.
7311 ??? Look at how the mips16 port solves this problem. It probably uses
7312 better ways to solve some of these problems.
7314 Although it is not incorrect, we don't accept QImode and HImode
7315 addresses based on the frame pointer or arg pointer until the
7316 reload pass starts. This is so that eliminating such addresses
7317 into stack based ones won't produce impossible code. */
7319 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7321 /* ??? Not clear if this is right. Experiment. */
7322 if (GET_MODE_SIZE (mode) < 4
7323 && !(reload_in_progress || reload_completed)
7324 && (reg_mentioned_p (frame_pointer_rtx, x)
7325 || reg_mentioned_p (arg_pointer_rtx, x)
7326 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7327 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7328 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7329 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7330 return 0;
7332 /* Accept any base register. SP only in SImode or larger. */
7333 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7334 return 1;
7336 /* This is PC relative data before arm_reorg runs. */
7337 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7338 && GET_CODE (x) == SYMBOL_REF
7339 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7340 return 1;
7342 /* This is PC relative data after arm_reorg runs. */
7343 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7344 && reload_completed
7345 && (GET_CODE (x) == LABEL_REF
7346 || (GET_CODE (x) == CONST
7347 && GET_CODE (XEXP (x, 0)) == PLUS
7348 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7349 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7350 return 1;
7352 /* Post-inc indexing only supported for SImode and larger. */
7353 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7354 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7355 return 1;
7357 else if (GET_CODE (x) == PLUS)
7359 /* REG+REG address can be any two index registers. */
7360 /* We disallow FRAME+REG addressing since we know that FRAME
7361 will be replaced with STACK, and SP relative addressing only
7362 permits SP+OFFSET. */
7363 if (GET_MODE_SIZE (mode) <= 4
7364 && XEXP (x, 0) != frame_pointer_rtx
7365 && XEXP (x, 1) != frame_pointer_rtx
7366 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7367 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7368 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7369 return 1;
7371 /* REG+const has 5-7 bit offset for non-SP registers. */
7372 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7373 || XEXP (x, 0) == arg_pointer_rtx)
7374 && CONST_INT_P (XEXP (x, 1))
7375 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7376 return 1;
7378 /* REG+const has 10-bit offset for SP, but only SImode and
7379 larger is supported. */
7380 /* ??? Should probably check for DI/DFmode overflow here
7381 just like GO_IF_LEGITIMATE_OFFSET does. */
7382 else if (REG_P (XEXP (x, 0))
7383 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7384 && GET_MODE_SIZE (mode) >= 4
7385 && CONST_INT_P (XEXP (x, 1))
7386 && INTVAL (XEXP (x, 1)) >= 0
7387 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7388 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7389 return 1;
7391 else if (REG_P (XEXP (x, 0))
7392 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7393 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7394 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7395 && REGNO (XEXP (x, 0))
7396 <= LAST_VIRTUAL_POINTER_REGISTER))
7397 && GET_MODE_SIZE (mode) >= 4
7398 && CONST_INT_P (XEXP (x, 1))
7399 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7400 return 1;
7403 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7404 && GET_MODE_SIZE (mode) == 4
7405 && GET_CODE (x) == SYMBOL_REF
7406 && CONSTANT_POOL_ADDRESS_P (x)
7407 && ! (flag_pic
7408 && symbol_mentioned_p (get_pool_constant (x))
7409 && ! pcrel_constant_p (get_pool_constant (x))))
7410 return 1;
7412 return 0;
7415 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7416 instruction of mode MODE. */
7418 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7420 switch (GET_MODE_SIZE (mode))
7422 case 1:
7423 return val >= 0 && val < 32;
7425 case 2:
7426 return val >= 0 && val < 64 && (val & 1) == 0;
7428 default:
7429 return (val >= 0
7430 && (val + GET_MODE_SIZE (mode)) <= 128
7431 && (val & 3) == 0);
7435 bool
7436 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7438 if (TARGET_ARM)
7439 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7440 else if (TARGET_THUMB2)
7441 return thumb2_legitimate_address_p (mode, x, strict_p);
7442 else /* if (TARGET_THUMB1) */
7443 return thumb1_legitimate_address_p (mode, x, strict_p);
7446 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7448 Given an rtx X being reloaded into a reg required to be
7449 in class CLASS, return the class of reg to actually use.
7450 In general this is just CLASS, but for the Thumb core registers and
7451 immediate constants we prefer a LO_REGS class or a subset. */
7453 static reg_class_t
7454 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7456 if (TARGET_32BIT)
7457 return rclass;
7458 else
7460 if (rclass == GENERAL_REGS)
7461 return LO_REGS;
7462 else
7463 return rclass;
7467 /* Build the SYMBOL_REF for __tls_get_addr. */
7469 static GTY(()) rtx tls_get_addr_libfunc;
7471 static rtx
7472 get_tls_get_addr (void)
7474 if (!tls_get_addr_libfunc)
7475 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7476 return tls_get_addr_libfunc;
7480 arm_load_tp (rtx target)
7482 if (!target)
7483 target = gen_reg_rtx (SImode);
7485 if (TARGET_HARD_TP)
7487 /* Can return in any reg. */
7488 emit_insn (gen_load_tp_hard (target));
7490 else
7492 /* Always returned in r0. Immediately copy the result into a pseudo,
7493 otherwise other uses of r0 (e.g. setting up function arguments) may
7494 clobber the value. */
7496 rtx tmp;
7498 emit_insn (gen_load_tp_soft ());
7500 tmp = gen_rtx_REG (SImode, 0);
7501 emit_move_insn (target, tmp);
7503 return target;
7506 static rtx
7507 load_tls_operand (rtx x, rtx reg)
7509 rtx tmp;
7511 if (reg == NULL_RTX)
7512 reg = gen_reg_rtx (SImode);
7514 tmp = gen_rtx_CONST (SImode, x);
7516 emit_move_insn (reg, tmp);
7518 return reg;
7521 static rtx
7522 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7524 rtx insns, label, labelno, sum;
7526 gcc_assert (reloc != TLS_DESCSEQ);
7527 start_sequence ();
7529 labelno = GEN_INT (pic_labelno++);
7530 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7531 label = gen_rtx_CONST (VOIDmode, label);
7533 sum = gen_rtx_UNSPEC (Pmode,
7534 gen_rtvec (4, x, GEN_INT (reloc), label,
7535 GEN_INT (TARGET_ARM ? 8 : 4)),
7536 UNSPEC_TLS);
7537 reg = load_tls_operand (sum, reg);
7539 if (TARGET_ARM)
7540 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7541 else
7542 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7544 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7545 LCT_PURE, /* LCT_CONST? */
7546 Pmode, 1, reg, Pmode);
7548 insns = get_insns ();
7549 end_sequence ();
7551 return insns;
7554 static rtx
7555 arm_tls_descseq_addr (rtx x, rtx reg)
7557 rtx labelno = GEN_INT (pic_labelno++);
7558 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7559 rtx sum = gen_rtx_UNSPEC (Pmode,
7560 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7561 gen_rtx_CONST (VOIDmode, label),
7562 GEN_INT (!TARGET_ARM)),
7563 UNSPEC_TLS);
7564 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7566 emit_insn (gen_tlscall (x, labelno));
7567 if (!reg)
7568 reg = gen_reg_rtx (SImode);
7569 else
7570 gcc_assert (REGNO (reg) != 0);
7572 emit_move_insn (reg, reg0);
7574 return reg;
7578 legitimize_tls_address (rtx x, rtx reg)
7580 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7581 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7583 switch (model)
7585 case TLS_MODEL_GLOBAL_DYNAMIC:
7586 if (TARGET_GNU2_TLS)
7588 reg = arm_tls_descseq_addr (x, reg);
7590 tp = arm_load_tp (NULL_RTX);
7592 dest = gen_rtx_PLUS (Pmode, tp, reg);
7594 else
7596 /* Original scheme */
7597 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7598 dest = gen_reg_rtx (Pmode);
7599 emit_libcall_block (insns, dest, ret, x);
7601 return dest;
7603 case TLS_MODEL_LOCAL_DYNAMIC:
7604 if (TARGET_GNU2_TLS)
7606 reg = arm_tls_descseq_addr (x, reg);
7608 tp = arm_load_tp (NULL_RTX);
7610 dest = gen_rtx_PLUS (Pmode, tp, reg);
7612 else
7614 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7616 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7617 share the LDM result with other LD model accesses. */
7618 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7619 UNSPEC_TLS);
7620 dest = gen_reg_rtx (Pmode);
7621 emit_libcall_block (insns, dest, ret, eqv);
7623 /* Load the addend. */
7624 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7625 GEN_INT (TLS_LDO32)),
7626 UNSPEC_TLS);
7627 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7628 dest = gen_rtx_PLUS (Pmode, dest, addend);
7630 return dest;
7632 case TLS_MODEL_INITIAL_EXEC:
7633 labelno = GEN_INT (pic_labelno++);
7634 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7635 label = gen_rtx_CONST (VOIDmode, label);
7636 sum = gen_rtx_UNSPEC (Pmode,
7637 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7638 GEN_INT (TARGET_ARM ? 8 : 4)),
7639 UNSPEC_TLS);
7640 reg = load_tls_operand (sum, reg);
7642 if (TARGET_ARM)
7643 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7644 else if (TARGET_THUMB2)
7645 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7646 else
7648 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7649 emit_move_insn (reg, gen_const_mem (SImode, reg));
7652 tp = arm_load_tp (NULL_RTX);
7654 return gen_rtx_PLUS (Pmode, tp, reg);
7656 case TLS_MODEL_LOCAL_EXEC:
7657 tp = arm_load_tp (NULL_RTX);
7659 reg = gen_rtx_UNSPEC (Pmode,
7660 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7661 UNSPEC_TLS);
7662 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7664 return gen_rtx_PLUS (Pmode, tp, reg);
7666 default:
7667 abort ();
7671 /* Try machine-dependent ways of modifying an illegitimate address
7672 to be legitimate. If we find one, return the new, valid address. */
7674 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7676 if (arm_tls_referenced_p (x))
7678 rtx addend = NULL;
7680 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7682 addend = XEXP (XEXP (x, 0), 1);
7683 x = XEXP (XEXP (x, 0), 0);
7686 if (GET_CODE (x) != SYMBOL_REF)
7687 return x;
7689 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7691 x = legitimize_tls_address (x, NULL_RTX);
7693 if (addend)
7695 x = gen_rtx_PLUS (SImode, x, addend);
7696 orig_x = x;
7698 else
7699 return x;
7702 if (!TARGET_ARM)
7704 /* TODO: legitimize_address for Thumb2. */
7705 if (TARGET_THUMB2)
7706 return x;
7707 return thumb_legitimize_address (x, orig_x, mode);
7710 if (GET_CODE (x) == PLUS)
7712 rtx xop0 = XEXP (x, 0);
7713 rtx xop1 = XEXP (x, 1);
7715 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7716 xop0 = force_reg (SImode, xop0);
7718 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7719 && !symbol_mentioned_p (xop1))
7720 xop1 = force_reg (SImode, xop1);
7722 if (ARM_BASE_REGISTER_RTX_P (xop0)
7723 && CONST_INT_P (xop1))
7725 HOST_WIDE_INT n, low_n;
7726 rtx base_reg, val;
7727 n = INTVAL (xop1);
7729 /* VFP addressing modes actually allow greater offsets, but for
7730 now we just stick with the lowest common denominator. */
7731 if (mode == DImode
7732 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7734 low_n = n & 0x0f;
7735 n &= ~0x0f;
7736 if (low_n > 4)
7738 n += 16;
7739 low_n -= 16;
7742 else
7744 low_n = ((mode) == TImode ? 0
7745 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7746 n -= low_n;
7749 base_reg = gen_reg_rtx (SImode);
7750 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7751 emit_move_insn (base_reg, val);
7752 x = plus_constant (Pmode, base_reg, low_n);
7754 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7755 x = gen_rtx_PLUS (SImode, xop0, xop1);
7758 /* XXX We don't allow MINUS any more -- see comment in
7759 arm_legitimate_address_outer_p (). */
7760 else if (GET_CODE (x) == MINUS)
7762 rtx xop0 = XEXP (x, 0);
7763 rtx xop1 = XEXP (x, 1);
7765 if (CONSTANT_P (xop0))
7766 xop0 = force_reg (SImode, xop0);
7768 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7769 xop1 = force_reg (SImode, xop1);
7771 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7772 x = gen_rtx_MINUS (SImode, xop0, xop1);
7775 /* Make sure to take full advantage of the pre-indexed addressing mode
7776 with absolute addresses which often allows for the base register to
7777 be factorized for multiple adjacent memory references, and it might
7778 even allows for the mini pool to be avoided entirely. */
7779 else if (CONST_INT_P (x) && optimize > 0)
7781 unsigned int bits;
7782 HOST_WIDE_INT mask, base, index;
7783 rtx base_reg;
7785 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7786 use a 8-bit index. So let's use a 12-bit index for SImode only and
7787 hope that arm_gen_constant will enable ldrb to use more bits. */
7788 bits = (mode == SImode) ? 12 : 8;
7789 mask = (1 << bits) - 1;
7790 base = INTVAL (x) & ~mask;
7791 index = INTVAL (x) & mask;
7792 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7794 /* It'll most probably be more efficient to generate the base
7795 with more bits set and use a negative index instead. */
7796 base |= mask;
7797 index -= mask;
7799 base_reg = force_reg (SImode, GEN_INT (base));
7800 x = plus_constant (Pmode, base_reg, index);
7803 if (flag_pic)
7805 /* We need to find and carefully transform any SYMBOL and LABEL
7806 references; so go back to the original address expression. */
7807 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7809 if (new_x != orig_x)
7810 x = new_x;
7813 return x;
7817 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7818 to be legitimate. If we find one, return the new, valid address. */
7820 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7822 if (GET_CODE (x) == PLUS
7823 && CONST_INT_P (XEXP (x, 1))
7824 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7825 || INTVAL (XEXP (x, 1)) < 0))
7827 rtx xop0 = XEXP (x, 0);
7828 rtx xop1 = XEXP (x, 1);
7829 HOST_WIDE_INT offset = INTVAL (xop1);
7831 /* Try and fold the offset into a biasing of the base register and
7832 then offsetting that. Don't do this when optimizing for space
7833 since it can cause too many CSEs. */
7834 if (optimize_size && offset >= 0
7835 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7837 HOST_WIDE_INT delta;
7839 if (offset >= 256)
7840 delta = offset - (256 - GET_MODE_SIZE (mode));
7841 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7842 delta = 31 * GET_MODE_SIZE (mode);
7843 else
7844 delta = offset & (~31 * GET_MODE_SIZE (mode));
7846 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7847 NULL_RTX);
7848 x = plus_constant (Pmode, xop0, delta);
7850 else if (offset < 0 && offset > -256)
7851 /* Small negative offsets are best done with a subtract before the
7852 dereference, forcing these into a register normally takes two
7853 instructions. */
7854 x = force_operand (x, NULL_RTX);
7855 else
7857 /* For the remaining cases, force the constant into a register. */
7858 xop1 = force_reg (SImode, xop1);
7859 x = gen_rtx_PLUS (SImode, xop0, xop1);
7862 else if (GET_CODE (x) == PLUS
7863 && s_register_operand (XEXP (x, 1), SImode)
7864 && !s_register_operand (XEXP (x, 0), SImode))
7866 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7868 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7871 if (flag_pic)
7873 /* We need to find and carefully transform any SYMBOL and LABEL
7874 references; so go back to the original address expression. */
7875 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7877 if (new_x != orig_x)
7878 x = new_x;
7881 return x;
7884 bool
7885 arm_legitimize_reload_address (rtx *p,
7886 machine_mode mode,
7887 int opnum, int type,
7888 int ind_levels ATTRIBUTE_UNUSED)
7890 /* We must recognize output that we have already generated ourselves. */
7891 if (GET_CODE (*p) == PLUS
7892 && GET_CODE (XEXP (*p, 0)) == PLUS
7893 && REG_P (XEXP (XEXP (*p, 0), 0))
7894 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7895 && CONST_INT_P (XEXP (*p, 1)))
7897 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7898 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7899 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7900 return true;
7903 if (GET_CODE (*p) == PLUS
7904 && REG_P (XEXP (*p, 0))
7905 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7906 /* If the base register is equivalent to a constant, let the generic
7907 code handle it. Otherwise we will run into problems if a future
7908 reload pass decides to rematerialize the constant. */
7909 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7910 && CONST_INT_P (XEXP (*p, 1)))
7912 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7913 HOST_WIDE_INT low, high;
7915 /* Detect coprocessor load/stores. */
7916 bool coproc_p = ((TARGET_HARD_FLOAT
7917 && TARGET_VFP
7918 && (mode == SFmode || mode == DFmode))
7919 || (TARGET_REALLY_IWMMXT
7920 && VALID_IWMMXT_REG_MODE (mode))
7921 || (TARGET_NEON
7922 && (VALID_NEON_DREG_MODE (mode)
7923 || VALID_NEON_QREG_MODE (mode))));
7925 /* For some conditions, bail out when lower two bits are unaligned. */
7926 if ((val & 0x3) != 0
7927 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7928 && (coproc_p
7929 /* For DI, and DF under soft-float: */
7930 || ((mode == DImode || mode == DFmode)
7931 /* Without ldrd, we use stm/ldm, which does not
7932 fair well with unaligned bits. */
7933 && (! TARGET_LDRD
7934 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7935 || TARGET_THUMB2))))
7936 return false;
7938 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7939 of which the (reg+high) gets turned into a reload add insn,
7940 we try to decompose the index into high/low values that can often
7941 also lead to better reload CSE.
7942 For example:
7943 ldr r0, [r2, #4100] // Offset too large
7944 ldr r1, [r2, #4104] // Offset too large
7946 is best reloaded as:
7947 add t1, r2, #4096
7948 ldr r0, [t1, #4]
7949 add t2, r2, #4096
7950 ldr r1, [t2, #8]
7952 which post-reload CSE can simplify in most cases to eliminate the
7953 second add instruction:
7954 add t1, r2, #4096
7955 ldr r0, [t1, #4]
7956 ldr r1, [t1, #8]
7958 The idea here is that we want to split out the bits of the constant
7959 as a mask, rather than as subtracting the maximum offset that the
7960 respective type of load/store used can handle.
7962 When encountering negative offsets, we can still utilize it even if
7963 the overall offset is positive; sometimes this may lead to an immediate
7964 that can be constructed with fewer instructions.
7965 For example:
7966 ldr r0, [r2, #0x3FFFFC]
7968 This is best reloaded as:
7969 add t1, r2, #0x400000
7970 ldr r0, [t1, #-4]
7972 The trick for spotting this for a load insn with N bits of offset
7973 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7974 negative offset that is going to make bit N and all the bits below
7975 it become zero in the remainder part.
7977 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7978 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7979 used in most cases of ARM load/store instructions. */
7981 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7982 (((VAL) & ((1 << (N)) - 1)) \
7983 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7984 : 0)
7986 if (coproc_p)
7988 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7990 /* NEON quad-word load/stores are made of two double-word accesses,
7991 so the valid index range is reduced by 8. Treat as 9-bit range if
7992 we go over it. */
7993 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7994 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7996 else if (GET_MODE_SIZE (mode) == 8)
7998 if (TARGET_LDRD)
7999 low = (TARGET_THUMB2
8000 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8001 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8002 else
8003 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8004 to access doublewords. The supported load/store offsets are
8005 -8, -4, and 4, which we try to produce here. */
8006 low = ((val & 0xf) ^ 0x8) - 0x8;
8008 else if (GET_MODE_SIZE (mode) < 8)
8010 /* NEON element load/stores do not have an offset. */
8011 if (TARGET_NEON_FP16 && mode == HFmode)
8012 return false;
8014 if (TARGET_THUMB2)
8016 /* Thumb-2 has an asymmetrical index range of (-256,4096).
8017 Try the wider 12-bit range first, and re-try if the result
8018 is out of range. */
8019 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8020 if (low < -255)
8021 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8023 else
8025 if (mode == HImode || mode == HFmode)
8027 if (arm_arch4)
8028 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8029 else
8031 /* The storehi/movhi_bytes fallbacks can use only
8032 [-4094,+4094] of the full ldrb/strb index range. */
8033 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8034 if (low == 4095 || low == -4095)
8035 return false;
8038 else
8039 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8042 else
8043 return false;
8045 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8046 ^ (unsigned HOST_WIDE_INT) 0x80000000)
8047 - (unsigned HOST_WIDE_INT) 0x80000000);
8048 /* Check for overflow or zero */
8049 if (low == 0 || high == 0 || (high + low != val))
8050 return false;
8052 /* Reload the high part into a base reg; leave the low part
8053 in the mem.
8054 Note that replacing this gen_rtx_PLUS with plus_constant is
8055 wrong in this case because we rely on the
8056 (plus (plus reg c1) c2) structure being preserved so that
8057 XEXP (*p, 0) in push_reload below uses the correct term. */
8058 *p = gen_rtx_PLUS (GET_MODE (*p),
8059 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8060 GEN_INT (high)),
8061 GEN_INT (low));
8062 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8063 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8064 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8065 return true;
8068 return false;
8072 thumb_legitimize_reload_address (rtx *x_p,
8073 machine_mode mode,
8074 int opnum, int type,
8075 int ind_levels ATTRIBUTE_UNUSED)
8077 rtx x = *x_p;
8079 if (GET_CODE (x) == PLUS
8080 && GET_MODE_SIZE (mode) < 4
8081 && REG_P (XEXP (x, 0))
8082 && XEXP (x, 0) == stack_pointer_rtx
8083 && CONST_INT_P (XEXP (x, 1))
8084 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8086 rtx orig_x = x;
8088 x = copy_rtx (x);
8089 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8090 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8091 return x;
8094 /* If both registers are hi-regs, then it's better to reload the
8095 entire expression rather than each register individually. That
8096 only requires one reload register rather than two. */
8097 if (GET_CODE (x) == PLUS
8098 && REG_P (XEXP (x, 0))
8099 && REG_P (XEXP (x, 1))
8100 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8101 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8103 rtx orig_x = x;
8105 x = copy_rtx (x);
8106 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8107 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8108 return x;
8111 return NULL;
8114 /* Return TRUE if X contains any TLS symbol references. */
8116 bool
8117 arm_tls_referenced_p (rtx x)
8119 if (! TARGET_HAVE_TLS)
8120 return false;
8122 subrtx_iterator::array_type array;
8123 FOR_EACH_SUBRTX (iter, array, x, ALL)
8125 const_rtx x = *iter;
8126 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8127 return true;
8129 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8130 TLS offsets, not real symbol references. */
8131 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8132 iter.skip_subrtxes ();
8134 return false;
8137 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8139 On the ARM, allow any integer (invalid ones are removed later by insn
8140 patterns), nice doubles and symbol_refs which refer to the function's
8141 constant pool XXX.
8143 When generating pic allow anything. */
8145 static bool
8146 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8148 /* At present, we have no support for Neon structure constants, so forbid
8149 them here. It might be possible to handle simple cases like 0 and -1
8150 in future. */
8151 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8152 return false;
8154 return flag_pic || !label_mentioned_p (x);
8157 static bool
8158 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8160 return (CONST_INT_P (x)
8161 || CONST_DOUBLE_P (x)
8162 || CONSTANT_ADDRESS_P (x)
8163 || flag_pic);
8166 static bool
8167 arm_legitimate_constant_p (machine_mode mode, rtx x)
8169 return (!arm_cannot_force_const_mem (mode, x)
8170 && (TARGET_32BIT
8171 ? arm_legitimate_constant_p_1 (mode, x)
8172 : thumb_legitimate_constant_p (mode, x)));
8175 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8177 static bool
8178 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8180 rtx base, offset;
8182 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8184 split_const (x, &base, &offset);
8185 if (GET_CODE (base) == SYMBOL_REF
8186 && !offset_within_block_p (base, INTVAL (offset)))
8187 return true;
8189 return arm_tls_referenced_p (x);
8192 #define REG_OR_SUBREG_REG(X) \
8193 (REG_P (X) \
8194 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8196 #define REG_OR_SUBREG_RTX(X) \
8197 (REG_P (X) ? (X) : SUBREG_REG (X))
8199 static inline int
8200 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8202 machine_mode mode = GET_MODE (x);
8203 int total, words;
8205 switch (code)
8207 case ASHIFT:
8208 case ASHIFTRT:
8209 case LSHIFTRT:
8210 case ROTATERT:
8211 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8213 case PLUS:
8214 case MINUS:
8215 case COMPARE:
8216 case NEG:
8217 case NOT:
8218 return COSTS_N_INSNS (1);
8220 case MULT:
8221 if (CONST_INT_P (XEXP (x, 1)))
8223 int cycles = 0;
8224 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8226 while (i)
8228 i >>= 2;
8229 cycles++;
8231 return COSTS_N_INSNS (2) + cycles;
8233 return COSTS_N_INSNS (1) + 16;
8235 case SET:
8236 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8237 the mode. */
8238 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8239 return (COSTS_N_INSNS (words)
8240 + 4 * ((MEM_P (SET_SRC (x)))
8241 + MEM_P (SET_DEST (x))));
8243 case CONST_INT:
8244 if (outer == SET)
8246 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8247 return 0;
8248 if (thumb_shiftable_const (INTVAL (x)))
8249 return COSTS_N_INSNS (2);
8250 return COSTS_N_INSNS (3);
8252 else if ((outer == PLUS || outer == COMPARE)
8253 && INTVAL (x) < 256 && INTVAL (x) > -256)
8254 return 0;
8255 else if ((outer == IOR || outer == XOR || outer == AND)
8256 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8257 return COSTS_N_INSNS (1);
8258 else if (outer == AND)
8260 int i;
8261 /* This duplicates the tests in the andsi3 expander. */
8262 for (i = 9; i <= 31; i++)
8263 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8264 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8265 return COSTS_N_INSNS (2);
8267 else if (outer == ASHIFT || outer == ASHIFTRT
8268 || outer == LSHIFTRT)
8269 return 0;
8270 return COSTS_N_INSNS (2);
8272 case CONST:
8273 case CONST_DOUBLE:
8274 case LABEL_REF:
8275 case SYMBOL_REF:
8276 return COSTS_N_INSNS (3);
8278 case UDIV:
8279 case UMOD:
8280 case DIV:
8281 case MOD:
8282 return 100;
8284 case TRUNCATE:
8285 return 99;
8287 case AND:
8288 case XOR:
8289 case IOR:
8290 /* XXX guess. */
8291 return 8;
8293 case MEM:
8294 /* XXX another guess. */
8295 /* Memory costs quite a lot for the first word, but subsequent words
8296 load at the equivalent of a single insn each. */
8297 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8298 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8299 ? 4 : 0));
8301 case IF_THEN_ELSE:
8302 /* XXX a guess. */
8303 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8304 return 14;
8305 return 2;
8307 case SIGN_EXTEND:
8308 case ZERO_EXTEND:
8309 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8310 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8312 if (mode == SImode)
8313 return total;
8315 if (arm_arch6)
8316 return total + COSTS_N_INSNS (1);
8318 /* Assume a two-shift sequence. Increase the cost slightly so
8319 we prefer actual shifts over an extend operation. */
8320 return total + 1 + COSTS_N_INSNS (2);
8322 default:
8323 return 99;
8327 static inline bool
8328 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8330 machine_mode mode = GET_MODE (x);
8331 enum rtx_code subcode;
8332 rtx operand;
8333 enum rtx_code code = GET_CODE (x);
8334 *total = 0;
8336 switch (code)
8338 case MEM:
8339 /* Memory costs quite a lot for the first word, but subsequent words
8340 load at the equivalent of a single insn each. */
8341 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8342 return true;
8344 case DIV:
8345 case MOD:
8346 case UDIV:
8347 case UMOD:
8348 if (TARGET_HARD_FLOAT && mode == SFmode)
8349 *total = COSTS_N_INSNS (2);
8350 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8351 *total = COSTS_N_INSNS (4);
8352 else
8353 *total = COSTS_N_INSNS (20);
8354 return false;
8356 case ROTATE:
8357 if (REG_P (XEXP (x, 1)))
8358 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8359 else if (!CONST_INT_P (XEXP (x, 1)))
8360 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8362 /* Fall through */
8363 case ROTATERT:
8364 if (mode != SImode)
8366 *total += COSTS_N_INSNS (4);
8367 return true;
8370 /* Fall through */
8371 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8372 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8373 if (mode == DImode)
8375 *total += COSTS_N_INSNS (3);
8376 return true;
8379 *total += COSTS_N_INSNS (1);
8380 /* Increase the cost of complex shifts because they aren't any faster,
8381 and reduce dual issue opportunities. */
8382 if (arm_tune_cortex_a9
8383 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8384 ++*total;
8386 return true;
8388 case MINUS:
8389 if (mode == DImode)
8391 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8392 if (CONST_INT_P (XEXP (x, 0))
8393 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8395 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8396 return true;
8399 if (CONST_INT_P (XEXP (x, 1))
8400 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8402 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8403 return true;
8406 return false;
8409 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8411 if (TARGET_HARD_FLOAT
8412 && (mode == SFmode
8413 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8415 *total = COSTS_N_INSNS (1);
8416 if (CONST_DOUBLE_P (XEXP (x, 0))
8417 && arm_const_double_rtx (XEXP (x, 0)))
8419 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8420 return true;
8423 if (CONST_DOUBLE_P (XEXP (x, 1))
8424 && arm_const_double_rtx (XEXP (x, 1)))
8426 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8427 return true;
8430 return false;
8432 *total = COSTS_N_INSNS (20);
8433 return false;
8436 *total = COSTS_N_INSNS (1);
8437 if (CONST_INT_P (XEXP (x, 0))
8438 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8440 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8441 return true;
8444 subcode = GET_CODE (XEXP (x, 1));
8445 if (subcode == ASHIFT || subcode == ASHIFTRT
8446 || subcode == LSHIFTRT
8447 || subcode == ROTATE || subcode == ROTATERT)
8449 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8450 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8451 return true;
8454 /* A shift as a part of RSB costs no more than RSB itself. */
8455 if (GET_CODE (XEXP (x, 0)) == MULT
8456 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8458 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8459 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8460 return true;
8463 if (subcode == MULT
8464 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8466 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8467 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8468 return true;
8471 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8472 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8474 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8475 if (REG_P (XEXP (XEXP (x, 1), 0))
8476 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8477 *total += COSTS_N_INSNS (1);
8479 return true;
8482 /* Fall through */
8484 case PLUS:
8485 if (code == PLUS && arm_arch6 && mode == SImode
8486 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8487 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8489 *total = COSTS_N_INSNS (1);
8490 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8491 0, speed);
8492 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8493 return true;
8496 /* MLA: All arguments must be registers. We filter out
8497 multiplication by a power of two, so that we fall down into
8498 the code below. */
8499 if (GET_CODE (XEXP (x, 0)) == MULT
8500 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8502 /* The cost comes from the cost of the multiply. */
8503 return false;
8506 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8508 if (TARGET_HARD_FLOAT
8509 && (mode == SFmode
8510 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8512 *total = COSTS_N_INSNS (1);
8513 if (CONST_DOUBLE_P (XEXP (x, 1))
8514 && arm_const_double_rtx (XEXP (x, 1)))
8516 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8517 return true;
8520 return false;
8523 *total = COSTS_N_INSNS (20);
8524 return false;
8527 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8528 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8530 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8531 if (REG_P (XEXP (XEXP (x, 0), 0))
8532 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8533 *total += COSTS_N_INSNS (1);
8534 return true;
8537 /* Fall through */
8539 case AND: case XOR: case IOR:
8541 /* Normally the frame registers will be spilt into reg+const during
8542 reload, so it is a bad idea to combine them with other instructions,
8543 since then they might not be moved outside of loops. As a compromise
8544 we allow integration with ops that have a constant as their second
8545 operand. */
8546 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8547 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8548 && !CONST_INT_P (XEXP (x, 1)))
8549 *total = COSTS_N_INSNS (1);
8551 if (mode == DImode)
8553 *total += COSTS_N_INSNS (2);
8554 if (CONST_INT_P (XEXP (x, 1))
8555 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8557 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8558 return true;
8561 return false;
8564 *total += COSTS_N_INSNS (1);
8565 if (CONST_INT_P (XEXP (x, 1))
8566 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8568 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8569 return true;
8571 subcode = GET_CODE (XEXP (x, 0));
8572 if (subcode == ASHIFT || subcode == ASHIFTRT
8573 || subcode == LSHIFTRT
8574 || subcode == ROTATE || subcode == ROTATERT)
8576 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8577 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8578 return true;
8581 if (subcode == MULT
8582 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8584 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8585 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8586 return true;
8589 if (subcode == UMIN || subcode == UMAX
8590 || subcode == SMIN || subcode == SMAX)
8592 *total = COSTS_N_INSNS (3);
8593 return true;
8596 return false;
8598 case MULT:
8599 /* This should have been handled by the CPU specific routines. */
8600 gcc_unreachable ();
8602 case TRUNCATE:
8603 if (arm_arch3m && mode == SImode
8604 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8605 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8606 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8607 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8608 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8609 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8611 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8612 return true;
8614 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8615 return false;
8617 case NEG:
8618 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8620 if (TARGET_HARD_FLOAT
8621 && (mode == SFmode
8622 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8624 *total = COSTS_N_INSNS (1);
8625 return false;
8627 *total = COSTS_N_INSNS (2);
8628 return false;
8631 /* Fall through */
8632 case NOT:
8633 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8634 if (mode == SImode && code == NOT)
8636 subcode = GET_CODE (XEXP (x, 0));
8637 if (subcode == ASHIFT || subcode == ASHIFTRT
8638 || subcode == LSHIFTRT
8639 || subcode == ROTATE || subcode == ROTATERT
8640 || (subcode == MULT
8641 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8643 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8644 /* Register shifts cost an extra cycle. */
8645 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8646 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8647 subcode, 1, speed);
8648 return true;
8652 return false;
8654 case IF_THEN_ELSE:
8655 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8657 *total = COSTS_N_INSNS (4);
8658 return true;
8661 operand = XEXP (x, 0);
8663 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8664 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8665 && REG_P (XEXP (operand, 0))
8666 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8667 *total += COSTS_N_INSNS (1);
8668 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8669 + rtx_cost (XEXP (x, 2), code, 2, speed));
8670 return true;
8672 case NE:
8673 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8675 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8676 return true;
8678 goto scc_insn;
8680 case GE:
8681 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8682 && mode == SImode && XEXP (x, 1) == const0_rtx)
8684 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8685 return true;
8687 goto scc_insn;
8689 case LT:
8690 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8691 && mode == SImode && XEXP (x, 1) == const0_rtx)
8693 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8694 return true;
8696 goto scc_insn;
8698 case EQ:
8699 case GT:
8700 case LE:
8701 case GEU:
8702 case LTU:
8703 case GTU:
8704 case LEU:
8705 case UNORDERED:
8706 case ORDERED:
8707 case UNEQ:
8708 case UNGE:
8709 case UNLT:
8710 case UNGT:
8711 case UNLE:
8712 scc_insn:
8713 /* SCC insns. In the case where the comparison has already been
8714 performed, then they cost 2 instructions. Otherwise they need
8715 an additional comparison before them. */
8716 *total = COSTS_N_INSNS (2);
8717 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8719 return true;
8722 /* Fall through */
8723 case COMPARE:
8724 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8726 *total = 0;
8727 return true;
8730 *total += COSTS_N_INSNS (1);
8731 if (CONST_INT_P (XEXP (x, 1))
8732 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8734 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8735 return true;
8738 subcode = GET_CODE (XEXP (x, 0));
8739 if (subcode == ASHIFT || subcode == ASHIFTRT
8740 || subcode == LSHIFTRT
8741 || subcode == ROTATE || subcode == ROTATERT)
8743 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8744 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8745 return true;
8748 if (subcode == MULT
8749 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8751 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8752 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8753 return true;
8756 return false;
8758 case UMIN:
8759 case UMAX:
8760 case SMIN:
8761 case SMAX:
8762 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8763 if (!CONST_INT_P (XEXP (x, 1))
8764 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8765 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8766 return true;
8768 case ABS:
8769 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8771 if (TARGET_HARD_FLOAT
8772 && (mode == SFmode
8773 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8775 *total = COSTS_N_INSNS (1);
8776 return false;
8778 *total = COSTS_N_INSNS (20);
8779 return false;
8781 *total = COSTS_N_INSNS (1);
8782 if (mode == DImode)
8783 *total += COSTS_N_INSNS (3);
8784 return false;
8786 case SIGN_EXTEND:
8787 case ZERO_EXTEND:
8788 *total = 0;
8789 if (GET_MODE_CLASS (mode) == MODE_INT)
8791 rtx op = XEXP (x, 0);
8792 machine_mode opmode = GET_MODE (op);
8794 if (mode == DImode)
8795 *total += COSTS_N_INSNS (1);
8797 if (opmode != SImode)
8799 if (MEM_P (op))
8801 /* If !arm_arch4, we use one of the extendhisi2_mem
8802 or movhi_bytes patterns for HImode. For a QImode
8803 sign extension, we first zero-extend from memory
8804 and then perform a shift sequence. */
8805 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8806 *total += COSTS_N_INSNS (2);
8808 else if (arm_arch6)
8809 *total += COSTS_N_INSNS (1);
8811 /* We don't have the necessary insn, so we need to perform some
8812 other operation. */
8813 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8814 /* An and with constant 255. */
8815 *total += COSTS_N_INSNS (1);
8816 else
8817 /* A shift sequence. Increase costs slightly to avoid
8818 combining two shifts into an extend operation. */
8819 *total += COSTS_N_INSNS (2) + 1;
8822 return false;
8825 switch (GET_MODE (XEXP (x, 0)))
8827 case V8QImode:
8828 case V4HImode:
8829 case V2SImode:
8830 case V4QImode:
8831 case V2HImode:
8832 *total = COSTS_N_INSNS (1);
8833 return false;
8835 default:
8836 gcc_unreachable ();
8838 gcc_unreachable ();
8840 case ZERO_EXTRACT:
8841 case SIGN_EXTRACT:
8842 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8843 return true;
8845 case CONST_INT:
8846 if (const_ok_for_arm (INTVAL (x))
8847 || const_ok_for_arm (~INTVAL (x)))
8848 *total = COSTS_N_INSNS (1);
8849 else
8850 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8851 INTVAL (x), NULL_RTX,
8852 NULL_RTX, 0, 0));
8853 return true;
8855 case CONST:
8856 case LABEL_REF:
8857 case SYMBOL_REF:
8858 *total = COSTS_N_INSNS (3);
8859 return true;
8861 case HIGH:
8862 *total = COSTS_N_INSNS (1);
8863 return true;
8865 case LO_SUM:
8866 *total = COSTS_N_INSNS (1);
8867 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8868 return true;
8870 case CONST_DOUBLE:
8871 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8872 && (mode == SFmode || !TARGET_VFP_SINGLE))
8873 *total = COSTS_N_INSNS (1);
8874 else
8875 *total = COSTS_N_INSNS (4);
8876 return true;
8878 case SET:
8879 /* The vec_extract patterns accept memory operands that require an
8880 address reload. Account for the cost of that reload to give the
8881 auto-inc-dec pass an incentive to try to replace them. */
8882 if (TARGET_NEON && MEM_P (SET_DEST (x))
8883 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8885 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8886 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8887 *total += COSTS_N_INSNS (1);
8888 return true;
8890 /* Likewise for the vec_set patterns. */
8891 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8892 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8893 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8895 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8896 *total = rtx_cost (mem, code, 0, speed);
8897 if (!neon_vector_mem_operand (mem, 2, true))
8898 *total += COSTS_N_INSNS (1);
8899 return true;
8901 return false;
8903 case UNSPEC:
8904 /* We cost this as high as our memory costs to allow this to
8905 be hoisted from loops. */
8906 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8908 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8910 return true;
8912 case CONST_VECTOR:
8913 if (TARGET_NEON
8914 && TARGET_HARD_FLOAT
8915 && outer == SET
8916 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8917 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8918 *total = COSTS_N_INSNS (1);
8919 else
8920 *total = COSTS_N_INSNS (4);
8921 return true;
8923 default:
8924 *total = COSTS_N_INSNS (4);
8925 return false;
8929 /* Estimates the size cost of thumb1 instructions.
8930 For now most of the code is copied from thumb1_rtx_costs. We need more
8931 fine grain tuning when we have more related test cases. */
8932 static inline int
8933 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8935 machine_mode mode = GET_MODE (x);
8936 int words;
8938 switch (code)
8940 case ASHIFT:
8941 case ASHIFTRT:
8942 case LSHIFTRT:
8943 case ROTATERT:
8944 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8946 case PLUS:
8947 case MINUS:
8948 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8949 defined by RTL expansion, especially for the expansion of
8950 multiplication. */
8951 if ((GET_CODE (XEXP (x, 0)) == MULT
8952 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8953 || (GET_CODE (XEXP (x, 1)) == MULT
8954 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8955 return COSTS_N_INSNS (2);
8956 /* On purpose fall through for normal RTX. */
8957 case COMPARE:
8958 case NEG:
8959 case NOT:
8960 return COSTS_N_INSNS (1);
8962 case MULT:
8963 if (CONST_INT_P (XEXP (x, 1)))
8965 /* Thumb1 mul instruction can't operate on const. We must Load it
8966 into a register first. */
8967 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8968 /* For the targets which have a very small and high-latency multiply
8969 unit, we prefer to synthesize the mult with up to 5 instructions,
8970 giving a good balance between size and performance. */
8971 if (arm_arch6m && arm_m_profile_small_mul)
8972 return COSTS_N_INSNS (5);
8973 else
8974 return COSTS_N_INSNS (1) + const_size;
8976 return COSTS_N_INSNS (1);
8978 case SET:
8979 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8980 the mode. */
8981 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8982 return COSTS_N_INSNS (words)
8983 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8984 || satisfies_constraint_K (SET_SRC (x))
8985 /* thumb1_movdi_insn. */
8986 || ((words > 1) && MEM_P (SET_SRC (x))));
8988 case CONST_INT:
8989 if (outer == SET)
8991 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8992 return COSTS_N_INSNS (1);
8993 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8994 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8995 return COSTS_N_INSNS (2);
8996 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8997 if (thumb_shiftable_const (INTVAL (x)))
8998 return COSTS_N_INSNS (2);
8999 return COSTS_N_INSNS (3);
9001 else if ((outer == PLUS || outer == COMPARE)
9002 && INTVAL (x) < 256 && INTVAL (x) > -256)
9003 return 0;
9004 else if ((outer == IOR || outer == XOR || outer == AND)
9005 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9006 return COSTS_N_INSNS (1);
9007 else if (outer == AND)
9009 int i;
9010 /* This duplicates the tests in the andsi3 expander. */
9011 for (i = 9; i <= 31; i++)
9012 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9013 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9014 return COSTS_N_INSNS (2);
9016 else if (outer == ASHIFT || outer == ASHIFTRT
9017 || outer == LSHIFTRT)
9018 return 0;
9019 return COSTS_N_INSNS (2);
9021 case CONST:
9022 case CONST_DOUBLE:
9023 case LABEL_REF:
9024 case SYMBOL_REF:
9025 return COSTS_N_INSNS (3);
9027 case UDIV:
9028 case UMOD:
9029 case DIV:
9030 case MOD:
9031 return 100;
9033 case TRUNCATE:
9034 return 99;
9036 case AND:
9037 case XOR:
9038 case IOR:
9039 return COSTS_N_INSNS (1);
9041 case MEM:
9042 return (COSTS_N_INSNS (1)
9043 + COSTS_N_INSNS (1)
9044 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9045 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9046 ? COSTS_N_INSNS (1) : 0));
9048 case IF_THEN_ELSE:
9049 /* XXX a guess. */
9050 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9051 return 14;
9052 return 2;
9054 case ZERO_EXTEND:
9055 /* XXX still guessing. */
9056 switch (GET_MODE (XEXP (x, 0)))
9058 case QImode:
9059 return (1 + (mode == DImode ? 4 : 0)
9060 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9062 case HImode:
9063 return (4 + (mode == DImode ? 4 : 0)
9064 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9066 case SImode:
9067 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9069 default:
9070 return 99;
9073 default:
9074 return 99;
9078 /* RTX costs when optimizing for size. */
9079 static bool
9080 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9081 int *total)
9083 machine_mode mode = GET_MODE (x);
9084 if (TARGET_THUMB1)
9086 *total = thumb1_size_rtx_costs (x, code, outer_code);
9087 return true;
9090 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9091 switch (code)
9093 case MEM:
9094 /* A memory access costs 1 insn if the mode is small, or the address is
9095 a single register, otherwise it costs one insn per word. */
9096 if (REG_P (XEXP (x, 0)))
9097 *total = COSTS_N_INSNS (1);
9098 else if (flag_pic
9099 && GET_CODE (XEXP (x, 0)) == PLUS
9100 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9101 /* This will be split into two instructions.
9102 See arm.md:calculate_pic_address. */
9103 *total = COSTS_N_INSNS (2);
9104 else
9105 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9106 return true;
9108 case DIV:
9109 case MOD:
9110 case UDIV:
9111 case UMOD:
9112 /* Needs a libcall, so it costs about this. */
9113 *total = COSTS_N_INSNS (2);
9114 return false;
9116 case ROTATE:
9117 if (mode == SImode && REG_P (XEXP (x, 1)))
9119 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9120 return true;
9122 /* Fall through */
9123 case ROTATERT:
9124 case ASHIFT:
9125 case LSHIFTRT:
9126 case ASHIFTRT:
9127 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9129 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9130 return true;
9132 else if (mode == SImode)
9134 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9135 /* Slightly disparage register shifts, but not by much. */
9136 if (!CONST_INT_P (XEXP (x, 1)))
9137 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9138 return true;
9141 /* Needs a libcall. */
9142 *total = COSTS_N_INSNS (2);
9143 return false;
9145 case MINUS:
9146 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9147 && (mode == SFmode || !TARGET_VFP_SINGLE))
9149 *total = COSTS_N_INSNS (1);
9150 return false;
9153 if (mode == SImode)
9155 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9156 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9158 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9159 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9160 || subcode1 == ROTATE || subcode1 == ROTATERT
9161 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9162 || subcode1 == ASHIFTRT)
9164 /* It's just the cost of the two operands. */
9165 *total = 0;
9166 return false;
9169 *total = COSTS_N_INSNS (1);
9170 return false;
9173 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9174 return false;
9176 case PLUS:
9177 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9178 && (mode == SFmode || !TARGET_VFP_SINGLE))
9180 *total = COSTS_N_INSNS (1);
9181 return false;
9184 /* A shift as a part of ADD costs nothing. */
9185 if (GET_CODE (XEXP (x, 0)) == MULT
9186 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9188 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9189 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9190 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9191 return true;
9194 /* Fall through */
9195 case AND: case XOR: case IOR:
9196 if (mode == SImode)
9198 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9200 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9201 || subcode == LSHIFTRT || subcode == ASHIFTRT
9202 || (code == AND && subcode == NOT))
9204 /* It's just the cost of the two operands. */
9205 *total = 0;
9206 return false;
9210 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9211 return false;
9213 case MULT:
9214 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9215 return false;
9217 case NEG:
9218 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9219 && (mode == SFmode || !TARGET_VFP_SINGLE))
9221 *total = COSTS_N_INSNS (1);
9222 return false;
9225 /* Fall through */
9226 case NOT:
9227 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9229 return false;
9231 case IF_THEN_ELSE:
9232 *total = 0;
9233 return false;
9235 case COMPARE:
9236 if (cc_register (XEXP (x, 0), VOIDmode))
9237 * total = 0;
9238 else
9239 *total = COSTS_N_INSNS (1);
9240 return false;
9242 case ABS:
9243 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9244 && (mode == SFmode || !TARGET_VFP_SINGLE))
9245 *total = COSTS_N_INSNS (1);
9246 else
9247 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9248 return false;
9250 case SIGN_EXTEND:
9251 case ZERO_EXTEND:
9252 return arm_rtx_costs_1 (x, outer_code, total, 0);
9254 case CONST_INT:
9255 if (const_ok_for_arm (INTVAL (x)))
9256 /* A multiplication by a constant requires another instruction
9257 to load the constant to a register. */
9258 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9259 ? 1 : 0);
9260 else if (const_ok_for_arm (~INTVAL (x)))
9261 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9262 else if (const_ok_for_arm (-INTVAL (x)))
9264 if (outer_code == COMPARE || outer_code == PLUS
9265 || outer_code == MINUS)
9266 *total = 0;
9267 else
9268 *total = COSTS_N_INSNS (1);
9270 else
9271 *total = COSTS_N_INSNS (2);
9272 return true;
9274 case CONST:
9275 case LABEL_REF:
9276 case SYMBOL_REF:
9277 *total = COSTS_N_INSNS (2);
9278 return true;
9280 case CONST_DOUBLE:
9281 *total = COSTS_N_INSNS (4);
9282 return true;
9284 case CONST_VECTOR:
9285 if (TARGET_NEON
9286 && TARGET_HARD_FLOAT
9287 && outer_code == SET
9288 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9289 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9290 *total = COSTS_N_INSNS (1);
9291 else
9292 *total = COSTS_N_INSNS (4);
9293 return true;
9295 case HIGH:
9296 case LO_SUM:
9297 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9298 cost of these slightly. */
9299 *total = COSTS_N_INSNS (1) + 1;
9300 return true;
9302 case SET:
9303 return false;
9305 default:
9306 if (mode != VOIDmode)
9307 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9308 else
9309 *total = COSTS_N_INSNS (4); /* How knows? */
9310 return false;
9314 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9315 operand, then return the operand that is being shifted. If the shift
9316 is not by a constant, then set SHIFT_REG to point to the operand.
9317 Return NULL if OP is not a shifter operand. */
9318 static rtx
9319 shifter_op_p (rtx op, rtx *shift_reg)
9321 enum rtx_code code = GET_CODE (op);
9323 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9324 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9325 return XEXP (op, 0);
9326 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9327 return XEXP (op, 0);
9328 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9329 || code == ASHIFTRT)
9331 if (!CONST_INT_P (XEXP (op, 1)))
9332 *shift_reg = XEXP (op, 1);
9333 return XEXP (op, 0);
9336 return NULL;
9339 static bool
9340 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9342 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9343 gcc_assert (GET_CODE (x) == UNSPEC);
9345 switch (XINT (x, 1))
9347 case UNSPEC_UNALIGNED_LOAD:
9348 /* We can only do unaligned loads into the integer unit, and we can't
9349 use LDM or LDRD. */
9350 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9351 if (speed_p)
9352 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9353 + extra_cost->ldst.load_unaligned);
9355 #ifdef NOT_YET
9356 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9357 ADDR_SPACE_GENERIC, speed_p);
9358 #endif
9359 return true;
9361 case UNSPEC_UNALIGNED_STORE:
9362 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9363 if (speed_p)
9364 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9365 + extra_cost->ldst.store_unaligned);
9367 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9368 #ifdef NOT_YET
9369 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9370 ADDR_SPACE_GENERIC, speed_p);
9371 #endif
9372 return true;
9374 case UNSPEC_VRINTZ:
9375 case UNSPEC_VRINTP:
9376 case UNSPEC_VRINTM:
9377 case UNSPEC_VRINTR:
9378 case UNSPEC_VRINTX:
9379 case UNSPEC_VRINTA:
9380 *cost = COSTS_N_INSNS (1);
9381 if (speed_p)
9382 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9384 return true;
9385 default:
9386 *cost = COSTS_N_INSNS (2);
9387 break;
9389 return false;
9392 /* Cost of a libcall. We assume one insn per argument, an amount for the
9393 call (one insn for -Os) and then one for processing the result. */
9394 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9396 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9397 do \
9399 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9400 if (shift_op != NULL \
9401 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9403 if (shift_reg) \
9405 if (speed_p) \
9406 *cost += extra_cost->alu.arith_shift_reg; \
9407 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9409 else if (speed_p) \
9410 *cost += extra_cost->alu.arith_shift; \
9412 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9413 + rtx_cost (XEXP (x, 1 - IDX), \
9414 OP, 1, speed_p)); \
9415 return true; \
9418 while (0);
9420 /* RTX costs. Make an estimate of the cost of executing the operation
9421 X, which is contained with an operation with code OUTER_CODE.
9422 SPEED_P indicates whether the cost desired is the performance cost,
9423 or the size cost. The estimate is stored in COST and the return
9424 value is TRUE if the cost calculation is final, or FALSE if the
9425 caller should recurse through the operands of X to add additional
9426 costs.
9428 We currently make no attempt to model the size savings of Thumb-2
9429 16-bit instructions. At the normal points in compilation where
9430 this code is called we have no measure of whether the condition
9431 flags are live or not, and thus no realistic way to determine what
9432 the size will eventually be. */
9433 static bool
9434 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9435 const struct cpu_cost_table *extra_cost,
9436 int *cost, bool speed_p)
9438 machine_mode mode = GET_MODE (x);
9440 if (TARGET_THUMB1)
9442 if (speed_p)
9443 *cost = thumb1_rtx_costs (x, code, outer_code);
9444 else
9445 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9446 return true;
9449 switch (code)
9451 case SET:
9452 *cost = 0;
9453 /* SET RTXs don't have a mode so we get it from the destination. */
9454 mode = GET_MODE (SET_DEST (x));
9456 if (REG_P (SET_SRC (x))
9457 && REG_P (SET_DEST (x)))
9459 /* Assume that most copies can be done with a single insn,
9460 unless we don't have HW FP, in which case everything
9461 larger than word mode will require two insns. */
9462 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9463 && GET_MODE_SIZE (mode) > 4)
9464 || mode == DImode)
9465 ? 2 : 1);
9466 /* Conditional register moves can be encoded
9467 in 16 bits in Thumb mode. */
9468 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9469 *cost >>= 1;
9471 return true;
9474 if (CONST_INT_P (SET_SRC (x)))
9476 /* Handle CONST_INT here, since the value doesn't have a mode
9477 and we would otherwise be unable to work out the true cost. */
9478 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9479 outer_code = SET;
9480 /* Slightly lower the cost of setting a core reg to a constant.
9481 This helps break up chains and allows for better scheduling. */
9482 if (REG_P (SET_DEST (x))
9483 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9484 *cost -= 1;
9485 x = SET_SRC (x);
9486 /* Immediate moves with an immediate in the range [0, 255] can be
9487 encoded in 16 bits in Thumb mode. */
9488 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9489 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9490 *cost >>= 1;
9491 goto const_int_cost;
9494 return false;
9496 case MEM:
9497 /* A memory access costs 1 insn if the mode is small, or the address is
9498 a single register, otherwise it costs one insn per word. */
9499 if (REG_P (XEXP (x, 0)))
9500 *cost = COSTS_N_INSNS (1);
9501 else if (flag_pic
9502 && GET_CODE (XEXP (x, 0)) == PLUS
9503 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9504 /* This will be split into two instructions.
9505 See arm.md:calculate_pic_address. */
9506 *cost = COSTS_N_INSNS (2);
9507 else
9508 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9510 /* For speed optimizations, add the costs of the address and
9511 accessing memory. */
9512 if (speed_p)
9513 #ifdef NOT_YET
9514 *cost += (extra_cost->ldst.load
9515 + arm_address_cost (XEXP (x, 0), mode,
9516 ADDR_SPACE_GENERIC, speed_p));
9517 #else
9518 *cost += extra_cost->ldst.load;
9519 #endif
9520 return true;
9522 case PARALLEL:
9524 /* Calculations of LDM costs are complex. We assume an initial cost
9525 (ldm_1st) which will load the number of registers mentioned in
9526 ldm_regs_per_insn_1st registers; then each additional
9527 ldm_regs_per_insn_subsequent registers cost one more insn. The
9528 formula for N regs is thus:
9530 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9531 + ldm_regs_per_insn_subsequent - 1)
9532 / ldm_regs_per_insn_subsequent).
9534 Additional costs may also be added for addressing. A similar
9535 formula is used for STM. */
9537 bool is_ldm = load_multiple_operation (x, SImode);
9538 bool is_stm = store_multiple_operation (x, SImode);
9540 *cost = COSTS_N_INSNS (1);
9542 if (is_ldm || is_stm)
9544 if (speed_p)
9546 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9547 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9548 ? extra_cost->ldst.ldm_regs_per_insn_1st
9549 : extra_cost->ldst.stm_regs_per_insn_1st;
9550 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9551 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9552 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9554 *cost += regs_per_insn_1st
9555 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9556 + regs_per_insn_sub - 1)
9557 / regs_per_insn_sub);
9558 return true;
9562 return false;
9564 case DIV:
9565 case UDIV:
9566 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9567 && (mode == SFmode || !TARGET_VFP_SINGLE))
9568 *cost = COSTS_N_INSNS (speed_p
9569 ? extra_cost->fp[mode != SFmode].div : 1);
9570 else if (mode == SImode && TARGET_IDIV)
9571 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9572 else
9573 *cost = LIBCALL_COST (2);
9574 return false; /* All arguments must be in registers. */
9576 case MOD:
9577 case UMOD:
9578 *cost = LIBCALL_COST (2);
9579 return false; /* All arguments must be in registers. */
9581 case ROTATE:
9582 if (mode == SImode && REG_P (XEXP (x, 1)))
9584 *cost = (COSTS_N_INSNS (2)
9585 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9586 if (speed_p)
9587 *cost += extra_cost->alu.shift_reg;
9588 return true;
9590 /* Fall through */
9591 case ROTATERT:
9592 case ASHIFT:
9593 case LSHIFTRT:
9594 case ASHIFTRT:
9595 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9597 *cost = (COSTS_N_INSNS (3)
9598 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9599 if (speed_p)
9600 *cost += 2 * extra_cost->alu.shift;
9601 return true;
9603 else if (mode == SImode)
9605 *cost = (COSTS_N_INSNS (1)
9606 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9607 /* Slightly disparage register shifts at -Os, but not by much. */
9608 if (!CONST_INT_P (XEXP (x, 1)))
9609 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9610 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9611 return true;
9613 else if (GET_MODE_CLASS (mode) == MODE_INT
9614 && GET_MODE_SIZE (mode) < 4)
9616 if (code == ASHIFT)
9618 *cost = (COSTS_N_INSNS (1)
9619 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9620 /* Slightly disparage register shifts at -Os, but not by
9621 much. */
9622 if (!CONST_INT_P (XEXP (x, 1)))
9623 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9624 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9626 else if (code == LSHIFTRT || code == ASHIFTRT)
9628 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9630 /* Can use SBFX/UBFX. */
9631 *cost = COSTS_N_INSNS (1);
9632 if (speed_p)
9633 *cost += extra_cost->alu.bfx;
9634 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9636 else
9638 *cost = COSTS_N_INSNS (2);
9639 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9640 if (speed_p)
9642 if (CONST_INT_P (XEXP (x, 1)))
9643 *cost += 2 * extra_cost->alu.shift;
9644 else
9645 *cost += (extra_cost->alu.shift
9646 + extra_cost->alu.shift_reg);
9648 else
9649 /* Slightly disparage register shifts. */
9650 *cost += !CONST_INT_P (XEXP (x, 1));
9653 else /* Rotates. */
9655 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9656 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9657 if (speed_p)
9659 if (CONST_INT_P (XEXP (x, 1)))
9660 *cost += (2 * extra_cost->alu.shift
9661 + extra_cost->alu.log_shift);
9662 else
9663 *cost += (extra_cost->alu.shift
9664 + extra_cost->alu.shift_reg
9665 + extra_cost->alu.log_shift_reg);
9668 return true;
9671 *cost = LIBCALL_COST (2);
9672 return false;
9674 case BSWAP:
9675 if (arm_arch6)
9677 if (mode == SImode)
9679 *cost = COSTS_N_INSNS (1);
9680 if (speed_p)
9681 *cost += extra_cost->alu.rev;
9683 return false;
9686 else
9688 /* No rev instruction available. Look at arm_legacy_rev
9689 and thumb_legacy_rev for the form of RTL used then. */
9690 if (TARGET_THUMB)
9692 *cost = COSTS_N_INSNS (10);
9694 if (speed_p)
9696 *cost += 6 * extra_cost->alu.shift;
9697 *cost += 3 * extra_cost->alu.logical;
9700 else
9702 *cost = COSTS_N_INSNS (5);
9704 if (speed_p)
9706 *cost += 2 * extra_cost->alu.shift;
9707 *cost += extra_cost->alu.arith_shift;
9708 *cost += 2 * extra_cost->alu.logical;
9711 return true;
9713 return false;
9715 case MINUS:
9716 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9717 && (mode == SFmode || !TARGET_VFP_SINGLE))
9719 *cost = COSTS_N_INSNS (1);
9720 if (GET_CODE (XEXP (x, 0)) == MULT
9721 || GET_CODE (XEXP (x, 1)) == MULT)
9723 rtx mul_op0, mul_op1, sub_op;
9725 if (speed_p)
9726 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9728 if (GET_CODE (XEXP (x, 0)) == MULT)
9730 mul_op0 = XEXP (XEXP (x, 0), 0);
9731 mul_op1 = XEXP (XEXP (x, 0), 1);
9732 sub_op = XEXP (x, 1);
9734 else
9736 mul_op0 = XEXP (XEXP (x, 1), 0);
9737 mul_op1 = XEXP (XEXP (x, 1), 1);
9738 sub_op = XEXP (x, 0);
9741 /* The first operand of the multiply may be optionally
9742 negated. */
9743 if (GET_CODE (mul_op0) == NEG)
9744 mul_op0 = XEXP (mul_op0, 0);
9746 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9747 + rtx_cost (mul_op1, code, 0, speed_p)
9748 + rtx_cost (sub_op, code, 0, speed_p));
9750 return true;
9753 if (speed_p)
9754 *cost += extra_cost->fp[mode != SFmode].addsub;
9755 return false;
9758 if (mode == SImode)
9760 rtx shift_by_reg = NULL;
9761 rtx shift_op;
9762 rtx non_shift_op;
9764 *cost = COSTS_N_INSNS (1);
9766 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9767 if (shift_op == NULL)
9769 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9770 non_shift_op = XEXP (x, 0);
9772 else
9773 non_shift_op = XEXP (x, 1);
9775 if (shift_op != NULL)
9777 if (shift_by_reg != NULL)
9779 if (speed_p)
9780 *cost += extra_cost->alu.arith_shift_reg;
9781 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9783 else if (speed_p)
9784 *cost += extra_cost->alu.arith_shift;
9786 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9787 + rtx_cost (non_shift_op, code, 0, speed_p));
9788 return true;
9791 if (arm_arch_thumb2
9792 && GET_CODE (XEXP (x, 1)) == MULT)
9794 /* MLS. */
9795 if (speed_p)
9796 *cost += extra_cost->mult[0].add;
9797 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9798 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9799 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9800 return true;
9803 if (CONST_INT_P (XEXP (x, 0)))
9805 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9806 INTVAL (XEXP (x, 0)), NULL_RTX,
9807 NULL_RTX, 1, 0);
9808 *cost = COSTS_N_INSNS (insns);
9809 if (speed_p)
9810 *cost += insns * extra_cost->alu.arith;
9811 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9812 return true;
9815 return false;
9818 if (GET_MODE_CLASS (mode) == MODE_INT
9819 && GET_MODE_SIZE (mode) < 4)
9821 rtx shift_op, shift_reg;
9822 shift_reg = NULL;
9824 /* We check both sides of the MINUS for shifter operands since,
9825 unlike PLUS, it's not commutative. */
9827 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9828 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9830 /* Slightly disparage, as we might need to widen the result. */
9831 *cost = 1 + COSTS_N_INSNS (1);
9832 if (speed_p)
9833 *cost += extra_cost->alu.arith;
9835 if (CONST_INT_P (XEXP (x, 0)))
9837 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9838 return true;
9841 return false;
9844 if (mode == DImode)
9846 *cost = COSTS_N_INSNS (2);
9848 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9850 rtx op1 = XEXP (x, 1);
9852 if (speed_p)
9853 *cost += 2 * extra_cost->alu.arith;
9855 if (GET_CODE (op1) == ZERO_EXTEND)
9856 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9857 else
9858 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9859 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9860 0, speed_p);
9861 return true;
9863 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9865 if (speed_p)
9866 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9867 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9868 0, speed_p)
9869 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9870 return true;
9872 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9873 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9875 if (speed_p)
9876 *cost += (extra_cost->alu.arith
9877 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9878 ? extra_cost->alu.arith
9879 : extra_cost->alu.arith_shift));
9880 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9881 + rtx_cost (XEXP (XEXP (x, 1), 0),
9882 GET_CODE (XEXP (x, 1)), 0, speed_p));
9883 return true;
9886 if (speed_p)
9887 *cost += 2 * extra_cost->alu.arith;
9888 return false;
9891 /* Vector mode? */
9893 *cost = LIBCALL_COST (2);
9894 return false;
9896 case PLUS:
9897 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9898 && (mode == SFmode || !TARGET_VFP_SINGLE))
9900 *cost = COSTS_N_INSNS (1);
9901 if (GET_CODE (XEXP (x, 0)) == MULT)
9903 rtx mul_op0, mul_op1, add_op;
9905 if (speed_p)
9906 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9908 mul_op0 = XEXP (XEXP (x, 0), 0);
9909 mul_op1 = XEXP (XEXP (x, 0), 1);
9910 add_op = XEXP (x, 1);
9912 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9913 + rtx_cost (mul_op1, code, 0, speed_p)
9914 + rtx_cost (add_op, code, 0, speed_p));
9916 return true;
9919 if (speed_p)
9920 *cost += extra_cost->fp[mode != SFmode].addsub;
9921 return false;
9923 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9925 *cost = LIBCALL_COST (2);
9926 return false;
9929 /* Narrow modes can be synthesized in SImode, but the range
9930 of useful sub-operations is limited. Check for shift operations
9931 on one of the operands. Only left shifts can be used in the
9932 narrow modes. */
9933 if (GET_MODE_CLASS (mode) == MODE_INT
9934 && GET_MODE_SIZE (mode) < 4)
9936 rtx shift_op, shift_reg;
9937 shift_reg = NULL;
9939 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9941 if (CONST_INT_P (XEXP (x, 1)))
9943 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9944 INTVAL (XEXP (x, 1)), NULL_RTX,
9945 NULL_RTX, 1, 0);
9946 *cost = COSTS_N_INSNS (insns);
9947 if (speed_p)
9948 *cost += insns * extra_cost->alu.arith;
9949 /* Slightly penalize a narrow operation as the result may
9950 need widening. */
9951 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9952 return true;
9955 /* Slightly penalize a narrow operation as the result may
9956 need widening. */
9957 *cost = 1 + COSTS_N_INSNS (1);
9958 if (speed_p)
9959 *cost += extra_cost->alu.arith;
9961 return false;
9964 if (mode == SImode)
9966 rtx shift_op, shift_reg;
9968 *cost = COSTS_N_INSNS (1);
9969 if (TARGET_INT_SIMD
9970 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9971 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9973 /* UXTA[BH] or SXTA[BH]. */
9974 if (speed_p)
9975 *cost += extra_cost->alu.extend_arith;
9976 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9977 speed_p)
9978 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9979 return true;
9982 shift_reg = NULL;
9983 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9984 if (shift_op != NULL)
9986 if (shift_reg)
9988 if (speed_p)
9989 *cost += extra_cost->alu.arith_shift_reg;
9990 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9992 else if (speed_p)
9993 *cost += extra_cost->alu.arith_shift;
9995 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9996 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9997 return true;
9999 if (GET_CODE (XEXP (x, 0)) == MULT)
10001 rtx mul_op = XEXP (x, 0);
10003 *cost = COSTS_N_INSNS (1);
10005 if (TARGET_DSP_MULTIPLY
10006 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10007 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10008 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10009 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10010 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10011 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10012 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10013 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10014 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10015 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10016 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10017 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10018 == 16))))))
10020 /* SMLA[BT][BT]. */
10021 if (speed_p)
10022 *cost += extra_cost->mult[0].extend_add;
10023 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10024 SIGN_EXTEND, 0, speed_p)
10025 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10026 SIGN_EXTEND, 0, speed_p)
10027 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10028 return true;
10031 if (speed_p)
10032 *cost += extra_cost->mult[0].add;
10033 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10034 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10035 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036 return true;
10038 if (CONST_INT_P (XEXP (x, 1)))
10040 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10041 INTVAL (XEXP (x, 1)), NULL_RTX,
10042 NULL_RTX, 1, 0);
10043 *cost = COSTS_N_INSNS (insns);
10044 if (speed_p)
10045 *cost += insns * extra_cost->alu.arith;
10046 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10047 return true;
10049 return false;
10052 if (mode == DImode)
10054 if (arm_arch3m
10055 && GET_CODE (XEXP (x, 0)) == MULT
10056 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10058 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10059 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10061 *cost = COSTS_N_INSNS (1);
10062 if (speed_p)
10063 *cost += extra_cost->mult[1].extend_add;
10064 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10065 ZERO_EXTEND, 0, speed_p)
10066 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10067 ZERO_EXTEND, 0, speed_p)
10068 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10069 return true;
10072 *cost = COSTS_N_INSNS (2);
10074 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10075 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10077 if (speed_p)
10078 *cost += (extra_cost->alu.arith
10079 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10080 ? extra_cost->alu.arith
10081 : extra_cost->alu.arith_shift));
10083 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10084 speed_p)
10085 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10086 return true;
10089 if (speed_p)
10090 *cost += 2 * extra_cost->alu.arith;
10091 return false;
10094 /* Vector mode? */
10095 *cost = LIBCALL_COST (2);
10096 return false;
10097 case IOR:
10098 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10100 *cost = COSTS_N_INSNS (1);
10101 if (speed_p)
10102 *cost += extra_cost->alu.rev;
10104 return true;
10106 /* Fall through. */
10107 case AND: case XOR:
10108 if (mode == SImode)
10110 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10111 rtx op0 = XEXP (x, 0);
10112 rtx shift_op, shift_reg;
10114 *cost = COSTS_N_INSNS (1);
10116 if (subcode == NOT
10117 && (code == AND
10118 || (code == IOR && TARGET_THUMB2)))
10119 op0 = XEXP (op0, 0);
10121 shift_reg = NULL;
10122 shift_op = shifter_op_p (op0, &shift_reg);
10123 if (shift_op != NULL)
10125 if (shift_reg)
10127 if (speed_p)
10128 *cost += extra_cost->alu.log_shift_reg;
10129 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10131 else if (speed_p)
10132 *cost += extra_cost->alu.log_shift;
10134 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10135 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10136 return true;
10139 if (CONST_INT_P (XEXP (x, 1)))
10141 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10142 INTVAL (XEXP (x, 1)), NULL_RTX,
10143 NULL_RTX, 1, 0);
10145 *cost = COSTS_N_INSNS (insns);
10146 if (speed_p)
10147 *cost += insns * extra_cost->alu.logical;
10148 *cost += rtx_cost (op0, code, 0, speed_p);
10149 return true;
10152 if (speed_p)
10153 *cost += extra_cost->alu.logical;
10154 *cost += (rtx_cost (op0, code, 0, speed_p)
10155 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10156 return true;
10159 if (mode == DImode)
10161 rtx op0 = XEXP (x, 0);
10162 enum rtx_code subcode = GET_CODE (op0);
10164 *cost = COSTS_N_INSNS (2);
10166 if (subcode == NOT
10167 && (code == AND
10168 || (code == IOR && TARGET_THUMB2)))
10169 op0 = XEXP (op0, 0);
10171 if (GET_CODE (op0) == ZERO_EXTEND)
10173 if (speed_p)
10174 *cost += 2 * extra_cost->alu.logical;
10176 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10177 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10178 return true;
10180 else if (GET_CODE (op0) == SIGN_EXTEND)
10182 if (speed_p)
10183 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10185 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10186 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10187 return true;
10190 if (speed_p)
10191 *cost += 2 * extra_cost->alu.logical;
10193 return true;
10195 /* Vector mode? */
10197 *cost = LIBCALL_COST (2);
10198 return false;
10200 case MULT:
10201 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10202 && (mode == SFmode || !TARGET_VFP_SINGLE))
10204 rtx op0 = XEXP (x, 0);
10206 *cost = COSTS_N_INSNS (1);
10208 if (GET_CODE (op0) == NEG)
10209 op0 = XEXP (op0, 0);
10211 if (speed_p)
10212 *cost += extra_cost->fp[mode != SFmode].mult;
10214 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10215 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10216 return true;
10218 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10220 *cost = LIBCALL_COST (2);
10221 return false;
10224 if (mode == SImode)
10226 *cost = COSTS_N_INSNS (1);
10227 if (TARGET_DSP_MULTIPLY
10228 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10229 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10230 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10231 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10232 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10233 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10234 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10235 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10236 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10237 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10238 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10239 && (INTVAL (XEXP (XEXP (x, 1), 1))
10240 == 16))))))
10242 /* SMUL[TB][TB]. */
10243 if (speed_p)
10244 *cost += extra_cost->mult[0].extend;
10245 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10246 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10247 return true;
10249 if (speed_p)
10250 *cost += extra_cost->mult[0].simple;
10251 return false;
10254 if (mode == DImode)
10256 if (arm_arch3m
10257 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10258 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10259 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10260 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10262 *cost = COSTS_N_INSNS (1);
10263 if (speed_p)
10264 *cost += extra_cost->mult[1].extend;
10265 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10266 ZERO_EXTEND, 0, speed_p)
10267 + rtx_cost (XEXP (XEXP (x, 1), 0),
10268 ZERO_EXTEND, 0, speed_p));
10269 return true;
10272 *cost = LIBCALL_COST (2);
10273 return false;
10276 /* Vector mode? */
10277 *cost = LIBCALL_COST (2);
10278 return false;
10280 case NEG:
10281 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10282 && (mode == SFmode || !TARGET_VFP_SINGLE))
10284 *cost = COSTS_N_INSNS (1);
10285 if (speed_p)
10286 *cost += extra_cost->fp[mode != SFmode].neg;
10288 return false;
10290 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10292 *cost = LIBCALL_COST (1);
10293 return false;
10296 if (mode == SImode)
10298 if (GET_CODE (XEXP (x, 0)) == ABS)
10300 *cost = COSTS_N_INSNS (2);
10301 /* Assume the non-flag-changing variant. */
10302 if (speed_p)
10303 *cost += (extra_cost->alu.log_shift
10304 + extra_cost->alu.arith_shift);
10305 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10306 return true;
10309 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10310 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10312 *cost = COSTS_N_INSNS (2);
10313 /* No extra cost for MOV imm and MVN imm. */
10314 /* If the comparison op is using the flags, there's no further
10315 cost, otherwise we need to add the cost of the comparison. */
10316 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10317 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10318 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10320 *cost += (COSTS_N_INSNS (1)
10321 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10322 speed_p)
10323 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10324 speed_p));
10325 if (speed_p)
10326 *cost += extra_cost->alu.arith;
10328 return true;
10330 *cost = COSTS_N_INSNS (1);
10331 if (speed_p)
10332 *cost += extra_cost->alu.arith;
10333 return false;
10336 if (GET_MODE_CLASS (mode) == MODE_INT
10337 && GET_MODE_SIZE (mode) < 4)
10339 /* Slightly disparage, as we might need an extend operation. */
10340 *cost = 1 + COSTS_N_INSNS (1);
10341 if (speed_p)
10342 *cost += extra_cost->alu.arith;
10343 return false;
10346 if (mode == DImode)
10348 *cost = COSTS_N_INSNS (2);
10349 if (speed_p)
10350 *cost += 2 * extra_cost->alu.arith;
10351 return false;
10354 /* Vector mode? */
10355 *cost = LIBCALL_COST (1);
10356 return false;
10358 case NOT:
10359 if (mode == SImode)
10361 rtx shift_op;
10362 rtx shift_reg = NULL;
10364 *cost = COSTS_N_INSNS (1);
10365 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10367 if (shift_op)
10369 if (shift_reg != NULL)
10371 if (speed_p)
10372 *cost += extra_cost->alu.log_shift_reg;
10373 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10375 else if (speed_p)
10376 *cost += extra_cost->alu.log_shift;
10377 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10378 return true;
10381 if (speed_p)
10382 *cost += extra_cost->alu.logical;
10383 return false;
10385 if (mode == DImode)
10387 *cost = COSTS_N_INSNS (2);
10388 return false;
10391 /* Vector mode? */
10393 *cost += LIBCALL_COST (1);
10394 return false;
10396 case IF_THEN_ELSE:
10398 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10400 *cost = COSTS_N_INSNS (4);
10401 return true;
10403 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10404 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10406 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10407 /* Assume that if one arm of the if_then_else is a register,
10408 that it will be tied with the result and eliminate the
10409 conditional insn. */
10410 if (REG_P (XEXP (x, 1)))
10411 *cost += op2cost;
10412 else if (REG_P (XEXP (x, 2)))
10413 *cost += op1cost;
10414 else
10416 if (speed_p)
10418 if (extra_cost->alu.non_exec_costs_exec)
10419 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10420 else
10421 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10423 else
10424 *cost += op1cost + op2cost;
10427 return true;
10429 case COMPARE:
10430 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10431 *cost = 0;
10432 else
10434 machine_mode op0mode;
10435 /* We'll mostly assume that the cost of a compare is the cost of the
10436 LHS. However, there are some notable exceptions. */
10438 /* Floating point compares are never done as side-effects. */
10439 op0mode = GET_MODE (XEXP (x, 0));
10440 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10441 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10443 *cost = COSTS_N_INSNS (1);
10444 if (speed_p)
10445 *cost += extra_cost->fp[op0mode != SFmode].compare;
10447 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10449 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10450 return true;
10453 return false;
10455 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10457 *cost = LIBCALL_COST (2);
10458 return false;
10461 /* DImode compares normally take two insns. */
10462 if (op0mode == DImode)
10464 *cost = COSTS_N_INSNS (2);
10465 if (speed_p)
10466 *cost += 2 * extra_cost->alu.arith;
10467 return false;
10470 if (op0mode == SImode)
10472 rtx shift_op;
10473 rtx shift_reg;
10475 if (XEXP (x, 1) == const0_rtx
10476 && !(REG_P (XEXP (x, 0))
10477 || (GET_CODE (XEXP (x, 0)) == SUBREG
10478 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10480 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10482 /* Multiply operations that set the flags are often
10483 significantly more expensive. */
10484 if (speed_p
10485 && GET_CODE (XEXP (x, 0)) == MULT
10486 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10487 *cost += extra_cost->mult[0].flag_setting;
10489 if (speed_p
10490 && GET_CODE (XEXP (x, 0)) == PLUS
10491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10492 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10493 0), 1), mode))
10494 *cost += extra_cost->mult[0].flag_setting;
10495 return true;
10498 shift_reg = NULL;
10499 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10500 if (shift_op != NULL)
10502 *cost = COSTS_N_INSNS (1);
10503 if (shift_reg != NULL)
10505 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10506 if (speed_p)
10507 *cost += extra_cost->alu.arith_shift_reg;
10509 else if (speed_p)
10510 *cost += extra_cost->alu.arith_shift;
10511 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10512 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10513 return true;
10516 *cost = COSTS_N_INSNS (1);
10517 if (speed_p)
10518 *cost += extra_cost->alu.arith;
10519 if (CONST_INT_P (XEXP (x, 1))
10520 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10522 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10523 return true;
10525 return false;
10528 /* Vector mode? */
10530 *cost = LIBCALL_COST (2);
10531 return false;
10533 return true;
10535 case EQ:
10536 case NE:
10537 case LT:
10538 case LE:
10539 case GT:
10540 case GE:
10541 case LTU:
10542 case LEU:
10543 case GEU:
10544 case GTU:
10545 case ORDERED:
10546 case UNORDERED:
10547 case UNEQ:
10548 case UNLE:
10549 case UNLT:
10550 case UNGE:
10551 case UNGT:
10552 case LTGT:
10553 if (outer_code == SET)
10555 /* Is it a store-flag operation? */
10556 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10557 && XEXP (x, 1) == const0_rtx)
10559 /* Thumb also needs an IT insn. */
10560 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10561 return true;
10563 if (XEXP (x, 1) == const0_rtx)
10565 switch (code)
10567 case LT:
10568 /* LSR Rd, Rn, #31. */
10569 *cost = COSTS_N_INSNS (1);
10570 if (speed_p)
10571 *cost += extra_cost->alu.shift;
10572 break;
10574 case EQ:
10575 /* RSBS T1, Rn, #0
10576 ADC Rd, Rn, T1. */
10578 case NE:
10579 /* SUBS T1, Rn, #1
10580 SBC Rd, Rn, T1. */
10581 *cost = COSTS_N_INSNS (2);
10582 break;
10584 case LE:
10585 /* RSBS T1, Rn, Rn, LSR #31
10586 ADC Rd, Rn, T1. */
10587 *cost = COSTS_N_INSNS (2);
10588 if (speed_p)
10589 *cost += extra_cost->alu.arith_shift;
10590 break;
10592 case GT:
10593 /* RSB Rd, Rn, Rn, ASR #1
10594 LSR Rd, Rd, #31. */
10595 *cost = COSTS_N_INSNS (2);
10596 if (speed_p)
10597 *cost += (extra_cost->alu.arith_shift
10598 + extra_cost->alu.shift);
10599 break;
10601 case GE:
10602 /* ASR Rd, Rn, #31
10603 ADD Rd, Rn, #1. */
10604 *cost = COSTS_N_INSNS (2);
10605 if (speed_p)
10606 *cost += extra_cost->alu.shift;
10607 break;
10609 default:
10610 /* Remaining cases are either meaningless or would take
10611 three insns anyway. */
10612 *cost = COSTS_N_INSNS (3);
10613 break;
10615 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10616 return true;
10618 else
10620 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10621 if (CONST_INT_P (XEXP (x, 1))
10622 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10624 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10625 return true;
10628 return false;
10631 /* Not directly inside a set. If it involves the condition code
10632 register it must be the condition for a branch, cond_exec or
10633 I_T_E operation. Since the comparison is performed elsewhere
10634 this is just the control part which has no additional
10635 cost. */
10636 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10637 && XEXP (x, 1) == const0_rtx)
10639 *cost = 0;
10640 return true;
10642 return false;
10644 case ABS:
10645 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10646 && (mode == SFmode || !TARGET_VFP_SINGLE))
10648 *cost = COSTS_N_INSNS (1);
10649 if (speed_p)
10650 *cost += extra_cost->fp[mode != SFmode].neg;
10652 return false;
10654 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10656 *cost = LIBCALL_COST (1);
10657 return false;
10660 if (mode == SImode)
10662 *cost = COSTS_N_INSNS (1);
10663 if (speed_p)
10664 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10665 return false;
10667 /* Vector mode? */
10668 *cost = LIBCALL_COST (1);
10669 return false;
10671 case SIGN_EXTEND:
10672 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10673 && MEM_P (XEXP (x, 0)))
10675 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10677 if (mode == DImode)
10678 *cost += COSTS_N_INSNS (1);
10680 if (!speed_p)
10681 return true;
10683 if (GET_MODE (XEXP (x, 0)) == SImode)
10684 *cost += extra_cost->ldst.load;
10685 else
10686 *cost += extra_cost->ldst.load_sign_extend;
10688 if (mode == DImode)
10689 *cost += extra_cost->alu.shift;
10691 return true;
10694 /* Widening from less than 32-bits requires an extend operation. */
10695 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10697 /* We have SXTB/SXTH. */
10698 *cost = COSTS_N_INSNS (1);
10699 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10700 if (speed_p)
10701 *cost += extra_cost->alu.extend;
10703 else if (GET_MODE (XEXP (x, 0)) != SImode)
10705 /* Needs two shifts. */
10706 *cost = COSTS_N_INSNS (2);
10707 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10708 if (speed_p)
10709 *cost += 2 * extra_cost->alu.shift;
10712 /* Widening beyond 32-bits requires one more insn. */
10713 if (mode == DImode)
10715 *cost += COSTS_N_INSNS (1);
10716 if (speed_p)
10717 *cost += extra_cost->alu.shift;
10720 return true;
10722 case ZERO_EXTEND:
10723 if ((arm_arch4
10724 || GET_MODE (XEXP (x, 0)) == SImode
10725 || GET_MODE (XEXP (x, 0)) == QImode)
10726 && MEM_P (XEXP (x, 0)))
10728 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10730 if (mode == DImode)
10731 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10733 return true;
10736 /* Widening from less than 32-bits requires an extend operation. */
10737 if (GET_MODE (XEXP (x, 0)) == QImode)
10739 /* UXTB can be a shorter instruction in Thumb2, but it might
10740 be slower than the AND Rd, Rn, #255 alternative. When
10741 optimizing for speed it should never be slower to use
10742 AND, and we don't really model 16-bit vs 32-bit insns
10743 here. */
10744 *cost = COSTS_N_INSNS (1);
10745 if (speed_p)
10746 *cost += extra_cost->alu.logical;
10748 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10750 /* We have UXTB/UXTH. */
10751 *cost = COSTS_N_INSNS (1);
10752 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10753 if (speed_p)
10754 *cost += extra_cost->alu.extend;
10756 else if (GET_MODE (XEXP (x, 0)) != SImode)
10758 /* Needs two shifts. It's marginally preferable to use
10759 shifts rather than two BIC instructions as the second
10760 shift may merge with a subsequent insn as a shifter
10761 op. */
10762 *cost = COSTS_N_INSNS (2);
10763 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10764 if (speed_p)
10765 *cost += 2 * extra_cost->alu.shift;
10767 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10768 *cost = COSTS_N_INSNS (1);
10770 /* Widening beyond 32-bits requires one more insn. */
10771 if (mode == DImode)
10773 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10776 return true;
10778 case CONST_INT:
10779 *cost = 0;
10780 /* CONST_INT has no mode, so we cannot tell for sure how many
10781 insns are really going to be needed. The best we can do is
10782 look at the value passed. If it fits in SImode, then assume
10783 that's the mode it will be used for. Otherwise assume it
10784 will be used in DImode. */
10785 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10786 mode = SImode;
10787 else
10788 mode = DImode;
10790 /* Avoid blowing up in arm_gen_constant (). */
10791 if (!(outer_code == PLUS
10792 || outer_code == AND
10793 || outer_code == IOR
10794 || outer_code == XOR
10795 || outer_code == MINUS))
10796 outer_code = SET;
10798 const_int_cost:
10799 if (mode == SImode)
10801 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10802 INTVAL (x), NULL, NULL,
10803 0, 0));
10804 /* Extra costs? */
10806 else
10808 *cost += COSTS_N_INSNS (arm_gen_constant
10809 (outer_code, SImode, NULL,
10810 trunc_int_for_mode (INTVAL (x), SImode),
10811 NULL, NULL, 0, 0)
10812 + arm_gen_constant (outer_code, SImode, NULL,
10813 INTVAL (x) >> 32, NULL,
10814 NULL, 0, 0));
10815 /* Extra costs? */
10818 return true;
10820 case CONST:
10821 case LABEL_REF:
10822 case SYMBOL_REF:
10823 if (speed_p)
10825 if (arm_arch_thumb2 && !flag_pic)
10826 *cost = COSTS_N_INSNS (2);
10827 else
10828 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10830 else
10831 *cost = COSTS_N_INSNS (2);
10833 if (flag_pic)
10835 *cost += COSTS_N_INSNS (1);
10836 if (speed_p)
10837 *cost += extra_cost->alu.arith;
10840 return true;
10842 case CONST_FIXED:
10843 *cost = COSTS_N_INSNS (4);
10844 /* Fixme. */
10845 return true;
10847 case CONST_DOUBLE:
10848 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10849 && (mode == SFmode || !TARGET_VFP_SINGLE))
10851 if (vfp3_const_double_rtx (x))
10853 *cost = COSTS_N_INSNS (1);
10854 if (speed_p)
10855 *cost += extra_cost->fp[mode == DFmode].fpconst;
10856 return true;
10859 if (speed_p)
10861 *cost = COSTS_N_INSNS (1);
10862 if (mode == DFmode)
10863 *cost += extra_cost->ldst.loadd;
10864 else
10865 *cost += extra_cost->ldst.loadf;
10867 else
10868 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10870 return true;
10872 *cost = COSTS_N_INSNS (4);
10873 return true;
10875 case CONST_VECTOR:
10876 /* Fixme. */
10877 if (TARGET_NEON
10878 && TARGET_HARD_FLOAT
10879 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10880 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10881 *cost = COSTS_N_INSNS (1);
10882 else
10883 *cost = COSTS_N_INSNS (4);
10884 return true;
10886 case HIGH:
10887 case LO_SUM:
10888 *cost = COSTS_N_INSNS (1);
10889 /* When optimizing for size, we prefer constant pool entries to
10890 MOVW/MOVT pairs, so bump the cost of these slightly. */
10891 if (!speed_p)
10892 *cost += 1;
10893 return true;
10895 case CLZ:
10896 *cost = COSTS_N_INSNS (1);
10897 if (speed_p)
10898 *cost += extra_cost->alu.clz;
10899 return false;
10901 case SMIN:
10902 if (XEXP (x, 1) == const0_rtx)
10904 *cost = COSTS_N_INSNS (1);
10905 if (speed_p)
10906 *cost += extra_cost->alu.log_shift;
10907 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10908 return true;
10910 /* Fall through. */
10911 case SMAX:
10912 case UMIN:
10913 case UMAX:
10914 *cost = COSTS_N_INSNS (2);
10915 return false;
10917 case TRUNCATE:
10918 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10919 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10920 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10921 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10922 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10923 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10924 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10925 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10926 == ZERO_EXTEND))))
10928 *cost = COSTS_N_INSNS (1);
10929 if (speed_p)
10930 *cost += extra_cost->mult[1].extend;
10931 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10932 speed_p)
10933 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10934 0, speed_p));
10935 return true;
10937 *cost = LIBCALL_COST (1);
10938 return false;
10940 case UNSPEC:
10941 return arm_unspec_cost (x, outer_code, speed_p, cost);
10943 case PC:
10944 /* Reading the PC is like reading any other register. Writing it
10945 is more expensive, but we take that into account elsewhere. */
10946 *cost = 0;
10947 return true;
10949 case ZERO_EXTRACT:
10950 /* TODO: Simple zero_extract of bottom bits using AND. */
10951 /* Fall through. */
10952 case SIGN_EXTRACT:
10953 if (arm_arch6
10954 && mode == SImode
10955 && CONST_INT_P (XEXP (x, 1))
10956 && CONST_INT_P (XEXP (x, 2)))
10958 *cost = COSTS_N_INSNS (1);
10959 if (speed_p)
10960 *cost += extra_cost->alu.bfx;
10961 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10962 return true;
10964 /* Without UBFX/SBFX, need to resort to shift operations. */
10965 *cost = COSTS_N_INSNS (2);
10966 if (speed_p)
10967 *cost += 2 * extra_cost->alu.shift;
10968 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10969 return true;
10971 case FLOAT_EXTEND:
10972 if (TARGET_HARD_FLOAT)
10974 *cost = COSTS_N_INSNS (1);
10975 if (speed_p)
10976 *cost += extra_cost->fp[mode == DFmode].widen;
10977 if (!TARGET_FPU_ARMV8
10978 && GET_MODE (XEXP (x, 0)) == HFmode)
10980 /* Pre v8, widening HF->DF is a two-step process, first
10981 widening to SFmode. */
10982 *cost += COSTS_N_INSNS (1);
10983 if (speed_p)
10984 *cost += extra_cost->fp[0].widen;
10986 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10987 return true;
10990 *cost = LIBCALL_COST (1);
10991 return false;
10993 case FLOAT_TRUNCATE:
10994 if (TARGET_HARD_FLOAT)
10996 *cost = COSTS_N_INSNS (1);
10997 if (speed_p)
10998 *cost += extra_cost->fp[mode == DFmode].narrow;
10999 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11000 return true;
11001 /* Vector modes? */
11003 *cost = LIBCALL_COST (1);
11004 return false;
11006 case FMA:
11007 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11009 rtx op0 = XEXP (x, 0);
11010 rtx op1 = XEXP (x, 1);
11011 rtx op2 = XEXP (x, 2);
11013 *cost = COSTS_N_INSNS (1);
11015 /* vfms or vfnma. */
11016 if (GET_CODE (op0) == NEG)
11017 op0 = XEXP (op0, 0);
11019 /* vfnms or vfnma. */
11020 if (GET_CODE (op2) == NEG)
11021 op2 = XEXP (op2, 0);
11023 *cost += rtx_cost (op0, FMA, 0, speed_p);
11024 *cost += rtx_cost (op1, FMA, 1, speed_p);
11025 *cost += rtx_cost (op2, FMA, 2, speed_p);
11027 if (speed_p)
11028 *cost += extra_cost->fp[mode ==DFmode].fma;
11030 return true;
11033 *cost = LIBCALL_COST (3);
11034 return false;
11036 case FIX:
11037 case UNSIGNED_FIX:
11038 if (TARGET_HARD_FLOAT)
11040 if (GET_MODE_CLASS (mode) == MODE_INT)
11042 *cost = COSTS_N_INSNS (1);
11043 if (speed_p)
11044 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11045 /* Strip of the 'cost' of rounding towards zero. */
11046 if (GET_CODE (XEXP (x, 0)) == FIX)
11047 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11048 else
11049 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11050 /* ??? Increase the cost to deal with transferring from
11051 FP -> CORE registers? */
11052 return true;
11054 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11055 && TARGET_FPU_ARMV8)
11057 *cost = COSTS_N_INSNS (1);
11058 if (speed_p)
11059 *cost += extra_cost->fp[mode == DFmode].roundint;
11060 return false;
11062 /* Vector costs? */
11064 *cost = LIBCALL_COST (1);
11065 return false;
11067 case FLOAT:
11068 case UNSIGNED_FLOAT:
11069 if (TARGET_HARD_FLOAT)
11071 /* ??? Increase the cost to deal with transferring from CORE
11072 -> FP registers? */
11073 *cost = COSTS_N_INSNS (1);
11074 if (speed_p)
11075 *cost += extra_cost->fp[mode == DFmode].fromint;
11076 return false;
11078 *cost = LIBCALL_COST (1);
11079 return false;
11081 case CALL:
11082 *cost = COSTS_N_INSNS (1);
11083 return true;
11085 case ASM_OPERANDS:
11087 /* Just a guess. Guess number of instructions in the asm
11088 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11089 though (see PR60663). */
11090 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11091 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11093 *cost = COSTS_N_INSNS (asm_length + num_operands);
11094 return true;
11096 default:
11097 if (mode != VOIDmode)
11098 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11099 else
11100 *cost = COSTS_N_INSNS (4); /* Who knows? */
11101 return false;
11105 #undef HANDLE_NARROW_SHIFT_ARITH
11107 /* RTX costs when optimizing for size. */
11108 static bool
11109 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11110 int *total, bool speed)
11112 bool result;
11114 if (TARGET_OLD_RTX_COSTS
11115 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11117 /* Old way. (Deprecated.) */
11118 if (!speed)
11119 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11120 (enum rtx_code) outer_code, total);
11121 else
11122 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11123 (enum rtx_code) outer_code, total,
11124 speed);
11126 else
11128 /* New way. */
11129 if (current_tune->insn_extra_cost)
11130 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11131 (enum rtx_code) outer_code,
11132 current_tune->insn_extra_cost,
11133 total, speed);
11134 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11135 && current_tune->insn_extra_cost != NULL */
11136 else
11137 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11138 (enum rtx_code) outer_code,
11139 &generic_extra_costs, total, speed);
11142 if (dump_file && (dump_flags & TDF_DETAILS))
11144 print_rtl_single (dump_file, x);
11145 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11146 *total, result ? "final" : "partial");
11148 return result;
11151 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11152 supported on any "slowmul" cores, so it can be ignored. */
11154 static bool
11155 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11156 int *total, bool speed)
11158 machine_mode mode = GET_MODE (x);
11160 if (TARGET_THUMB)
11162 *total = thumb1_rtx_costs (x, code, outer_code);
11163 return true;
11166 switch (code)
11168 case MULT:
11169 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11170 || mode == DImode)
11172 *total = COSTS_N_INSNS (20);
11173 return false;
11176 if (CONST_INT_P (XEXP (x, 1)))
11178 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11179 & (unsigned HOST_WIDE_INT) 0xffffffff);
11180 int cost, const_ok = const_ok_for_arm (i);
11181 int j, booth_unit_size;
11183 /* Tune as appropriate. */
11184 cost = const_ok ? 4 : 8;
11185 booth_unit_size = 2;
11186 for (j = 0; i && j < 32; j += booth_unit_size)
11188 i >>= booth_unit_size;
11189 cost++;
11192 *total = COSTS_N_INSNS (cost);
11193 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11194 return true;
11197 *total = COSTS_N_INSNS (20);
11198 return false;
11200 default:
11201 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11206 /* RTX cost for cores with a fast multiply unit (M variants). */
11208 static bool
11209 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11210 int *total, bool speed)
11212 machine_mode mode = GET_MODE (x);
11214 if (TARGET_THUMB1)
11216 *total = thumb1_rtx_costs (x, code, outer_code);
11217 return true;
11220 /* ??? should thumb2 use different costs? */
11221 switch (code)
11223 case MULT:
11224 /* There is no point basing this on the tuning, since it is always the
11225 fast variant if it exists at all. */
11226 if (mode == DImode
11227 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11228 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11229 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11231 *total = COSTS_N_INSNS(2);
11232 return false;
11236 if (mode == DImode)
11238 *total = COSTS_N_INSNS (5);
11239 return false;
11242 if (CONST_INT_P (XEXP (x, 1)))
11244 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11245 & (unsigned HOST_WIDE_INT) 0xffffffff);
11246 int cost, const_ok = const_ok_for_arm (i);
11247 int j, booth_unit_size;
11249 /* Tune as appropriate. */
11250 cost = const_ok ? 4 : 8;
11251 booth_unit_size = 8;
11252 for (j = 0; i && j < 32; j += booth_unit_size)
11254 i >>= booth_unit_size;
11255 cost++;
11258 *total = COSTS_N_INSNS(cost);
11259 return false;
11262 if (mode == SImode)
11264 *total = COSTS_N_INSNS (4);
11265 return false;
11268 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11270 if (TARGET_HARD_FLOAT
11271 && (mode == SFmode
11272 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11274 *total = COSTS_N_INSNS (1);
11275 return false;
11279 /* Requires a lib call */
11280 *total = COSTS_N_INSNS (20);
11281 return false;
11283 default:
11284 return arm_rtx_costs_1 (x, outer_code, total, speed);
11289 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11290 so it can be ignored. */
11292 static bool
11293 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11294 int *total, bool speed)
11296 machine_mode mode = GET_MODE (x);
11298 if (TARGET_THUMB)
11300 *total = thumb1_rtx_costs (x, code, outer_code);
11301 return true;
11304 switch (code)
11306 case COMPARE:
11307 if (GET_CODE (XEXP (x, 0)) != MULT)
11308 return arm_rtx_costs_1 (x, outer_code, total, speed);
11310 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11311 will stall until the multiplication is complete. */
11312 *total = COSTS_N_INSNS (3);
11313 return false;
11315 case MULT:
11316 /* There is no point basing this on the tuning, since it is always the
11317 fast variant if it exists at all. */
11318 if (mode == DImode
11319 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11320 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11321 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11323 *total = COSTS_N_INSNS (2);
11324 return false;
11328 if (mode == DImode)
11330 *total = COSTS_N_INSNS (5);
11331 return false;
11334 if (CONST_INT_P (XEXP (x, 1)))
11336 /* If operand 1 is a constant we can more accurately
11337 calculate the cost of the multiply. The multiplier can
11338 retire 15 bits on the first cycle and a further 12 on the
11339 second. We do, of course, have to load the constant into
11340 a register first. */
11341 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11342 /* There's a general overhead of one cycle. */
11343 int cost = 1;
11344 unsigned HOST_WIDE_INT masked_const;
11346 if (i & 0x80000000)
11347 i = ~i;
11349 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11351 masked_const = i & 0xffff8000;
11352 if (masked_const != 0)
11354 cost++;
11355 masked_const = i & 0xf8000000;
11356 if (masked_const != 0)
11357 cost++;
11359 *total = COSTS_N_INSNS (cost);
11360 return false;
11363 if (mode == SImode)
11365 *total = COSTS_N_INSNS (3);
11366 return false;
11369 /* Requires a lib call */
11370 *total = COSTS_N_INSNS (20);
11371 return false;
11373 default:
11374 return arm_rtx_costs_1 (x, outer_code, total, speed);
11379 /* RTX costs for 9e (and later) cores. */
11381 static bool
11382 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11383 int *total, bool speed)
11385 machine_mode mode = GET_MODE (x);
11387 if (TARGET_THUMB1)
11389 switch (code)
11391 case MULT:
11392 /* Small multiply: 32 cycles for an integer multiply inst. */
11393 if (arm_arch6m && arm_m_profile_small_mul)
11394 *total = COSTS_N_INSNS (32);
11395 else
11396 *total = COSTS_N_INSNS (3);
11397 return true;
11399 default:
11400 *total = thumb1_rtx_costs (x, code, outer_code);
11401 return true;
11405 switch (code)
11407 case MULT:
11408 /* There is no point basing this on the tuning, since it is always the
11409 fast variant if it exists at all. */
11410 if (mode == DImode
11411 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11412 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11413 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11415 *total = COSTS_N_INSNS (2);
11416 return false;
11420 if (mode == DImode)
11422 *total = COSTS_N_INSNS (5);
11423 return false;
11426 if (mode == SImode)
11428 *total = COSTS_N_INSNS (2);
11429 return false;
11432 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11434 if (TARGET_HARD_FLOAT
11435 && (mode == SFmode
11436 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11438 *total = COSTS_N_INSNS (1);
11439 return false;
11443 *total = COSTS_N_INSNS (20);
11444 return false;
11446 default:
11447 return arm_rtx_costs_1 (x, outer_code, total, speed);
11450 /* All address computations that can be done are free, but rtx cost returns
11451 the same for practically all of them. So we weight the different types
11452 of address here in the order (most pref first):
11453 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11454 static inline int
11455 arm_arm_address_cost (rtx x)
11457 enum rtx_code c = GET_CODE (x);
11459 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11460 return 0;
11461 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11462 return 10;
11464 if (c == PLUS)
11466 if (CONST_INT_P (XEXP (x, 1)))
11467 return 2;
11469 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11470 return 3;
11472 return 4;
11475 return 6;
11478 static inline int
11479 arm_thumb_address_cost (rtx x)
11481 enum rtx_code c = GET_CODE (x);
11483 if (c == REG)
11484 return 1;
11485 if (c == PLUS
11486 && REG_P (XEXP (x, 0))
11487 && CONST_INT_P (XEXP (x, 1)))
11488 return 1;
11490 return 2;
11493 static int
11494 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11495 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11497 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11500 /* Adjust cost hook for XScale. */
11501 static bool
11502 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11504 /* Some true dependencies can have a higher cost depending
11505 on precisely how certain input operands are used. */
11506 if (REG_NOTE_KIND(link) == 0
11507 && recog_memoized (insn) >= 0
11508 && recog_memoized (dep) >= 0)
11510 int shift_opnum = get_attr_shift (insn);
11511 enum attr_type attr_type = get_attr_type (dep);
11513 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11514 operand for INSN. If we have a shifted input operand and the
11515 instruction we depend on is another ALU instruction, then we may
11516 have to account for an additional stall. */
11517 if (shift_opnum != 0
11518 && (attr_type == TYPE_ALU_SHIFT_IMM
11519 || attr_type == TYPE_ALUS_SHIFT_IMM
11520 || attr_type == TYPE_LOGIC_SHIFT_IMM
11521 || attr_type == TYPE_LOGICS_SHIFT_IMM
11522 || attr_type == TYPE_ALU_SHIFT_REG
11523 || attr_type == TYPE_ALUS_SHIFT_REG
11524 || attr_type == TYPE_LOGIC_SHIFT_REG
11525 || attr_type == TYPE_LOGICS_SHIFT_REG
11526 || attr_type == TYPE_MOV_SHIFT
11527 || attr_type == TYPE_MVN_SHIFT
11528 || attr_type == TYPE_MOV_SHIFT_REG
11529 || attr_type == TYPE_MVN_SHIFT_REG))
11531 rtx shifted_operand;
11532 int opno;
11534 /* Get the shifted operand. */
11535 extract_insn (insn);
11536 shifted_operand = recog_data.operand[shift_opnum];
11538 /* Iterate over all the operands in DEP. If we write an operand
11539 that overlaps with SHIFTED_OPERAND, then we have increase the
11540 cost of this dependency. */
11541 extract_insn (dep);
11542 preprocess_constraints (dep);
11543 for (opno = 0; opno < recog_data.n_operands; opno++)
11545 /* We can ignore strict inputs. */
11546 if (recog_data.operand_type[opno] == OP_IN)
11547 continue;
11549 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11550 shifted_operand))
11552 *cost = 2;
11553 return false;
11558 return true;
11561 /* Adjust cost hook for Cortex A9. */
11562 static bool
11563 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11565 switch (REG_NOTE_KIND (link))
11567 case REG_DEP_ANTI:
11568 *cost = 0;
11569 return false;
11571 case REG_DEP_TRUE:
11572 case REG_DEP_OUTPUT:
11573 if (recog_memoized (insn) >= 0
11574 && recog_memoized (dep) >= 0)
11576 if (GET_CODE (PATTERN (insn)) == SET)
11578 if (GET_MODE_CLASS
11579 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11580 || GET_MODE_CLASS
11581 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11583 enum attr_type attr_type_insn = get_attr_type (insn);
11584 enum attr_type attr_type_dep = get_attr_type (dep);
11586 /* By default all dependencies of the form
11587 s0 = s0 <op> s1
11588 s0 = s0 <op> s2
11589 have an extra latency of 1 cycle because
11590 of the input and output dependency in this
11591 case. However this gets modeled as an true
11592 dependency and hence all these checks. */
11593 if (REG_P (SET_DEST (PATTERN (insn)))
11594 && REG_P (SET_DEST (PATTERN (dep)))
11595 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11596 SET_DEST (PATTERN (dep))))
11598 /* FMACS is a special case where the dependent
11599 instruction can be issued 3 cycles before
11600 the normal latency in case of an output
11601 dependency. */
11602 if ((attr_type_insn == TYPE_FMACS
11603 || attr_type_insn == TYPE_FMACD)
11604 && (attr_type_dep == TYPE_FMACS
11605 || attr_type_dep == TYPE_FMACD))
11607 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11608 *cost = insn_default_latency (dep) - 3;
11609 else
11610 *cost = insn_default_latency (dep);
11611 return false;
11613 else
11615 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11616 *cost = insn_default_latency (dep) + 1;
11617 else
11618 *cost = insn_default_latency (dep);
11620 return false;
11625 break;
11627 default:
11628 gcc_unreachable ();
11631 return true;
11634 /* Adjust cost hook for FA726TE. */
11635 static bool
11636 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11638 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11639 have penalty of 3. */
11640 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11641 && recog_memoized (insn) >= 0
11642 && recog_memoized (dep) >= 0
11643 && get_attr_conds (dep) == CONDS_SET)
11645 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11646 if (get_attr_conds (insn) == CONDS_USE
11647 && get_attr_type (insn) != TYPE_BRANCH)
11649 *cost = 3;
11650 return false;
11653 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11654 || get_attr_conds (insn) == CONDS_USE)
11656 *cost = 0;
11657 return false;
11661 return true;
11664 /* Implement TARGET_REGISTER_MOVE_COST.
11666 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11667 it is typically more expensive than a single memory access. We set
11668 the cost to less than two memory accesses so that floating
11669 point to integer conversion does not go through memory. */
11672 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11673 reg_class_t from, reg_class_t to)
11675 if (TARGET_32BIT)
11677 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11678 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11679 return 15;
11680 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11681 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11682 return 4;
11683 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11684 return 20;
11685 else
11686 return 2;
11688 else
11690 if (from == HI_REGS || to == HI_REGS)
11691 return 4;
11692 else
11693 return 2;
11697 /* Implement TARGET_MEMORY_MOVE_COST. */
11700 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11701 bool in ATTRIBUTE_UNUSED)
11703 if (TARGET_32BIT)
11704 return 10;
11705 else
11707 if (GET_MODE_SIZE (mode) < 4)
11708 return 8;
11709 else
11710 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11714 /* Vectorizer cost model implementation. */
11716 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11717 static int
11718 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11719 tree vectype,
11720 int misalign ATTRIBUTE_UNUSED)
11722 unsigned elements;
11724 switch (type_of_cost)
11726 case scalar_stmt:
11727 return current_tune->vec_costs->scalar_stmt_cost;
11729 case scalar_load:
11730 return current_tune->vec_costs->scalar_load_cost;
11732 case scalar_store:
11733 return current_tune->vec_costs->scalar_store_cost;
11735 case vector_stmt:
11736 return current_tune->vec_costs->vec_stmt_cost;
11738 case vector_load:
11739 return current_tune->vec_costs->vec_align_load_cost;
11741 case vector_store:
11742 return current_tune->vec_costs->vec_store_cost;
11744 case vec_to_scalar:
11745 return current_tune->vec_costs->vec_to_scalar_cost;
11747 case scalar_to_vec:
11748 return current_tune->vec_costs->scalar_to_vec_cost;
11750 case unaligned_load:
11751 return current_tune->vec_costs->vec_unalign_load_cost;
11753 case unaligned_store:
11754 return current_tune->vec_costs->vec_unalign_store_cost;
11756 case cond_branch_taken:
11757 return current_tune->vec_costs->cond_taken_branch_cost;
11759 case cond_branch_not_taken:
11760 return current_tune->vec_costs->cond_not_taken_branch_cost;
11762 case vec_perm:
11763 case vec_promote_demote:
11764 return current_tune->vec_costs->vec_stmt_cost;
11766 case vec_construct:
11767 elements = TYPE_VECTOR_SUBPARTS (vectype);
11768 return elements / 2 + 1;
11770 default:
11771 gcc_unreachable ();
11775 /* Implement targetm.vectorize.add_stmt_cost. */
11777 static unsigned
11778 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11779 struct _stmt_vec_info *stmt_info, int misalign,
11780 enum vect_cost_model_location where)
11782 unsigned *cost = (unsigned *) data;
11783 unsigned retval = 0;
11785 if (flag_vect_cost_model)
11787 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11788 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11790 /* Statements in an inner loop relative to the loop being
11791 vectorized are weighted more heavily. The value here is
11792 arbitrary and could potentially be improved with analysis. */
11793 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11794 count *= 50; /* FIXME. */
11796 retval = (unsigned) (count * stmt_cost);
11797 cost[where] += retval;
11800 return retval;
11803 /* Return true if and only if this insn can dual-issue only as older. */
11804 static bool
11805 cortexa7_older_only (rtx_insn *insn)
11807 if (recog_memoized (insn) < 0)
11808 return false;
11810 switch (get_attr_type (insn))
11812 case TYPE_ALU_DSP_REG:
11813 case TYPE_ALU_SREG:
11814 case TYPE_ALUS_SREG:
11815 case TYPE_LOGIC_REG:
11816 case TYPE_LOGICS_REG:
11817 case TYPE_ADC_REG:
11818 case TYPE_ADCS_REG:
11819 case TYPE_ADR:
11820 case TYPE_BFM:
11821 case TYPE_REV:
11822 case TYPE_MVN_REG:
11823 case TYPE_SHIFT_IMM:
11824 case TYPE_SHIFT_REG:
11825 case TYPE_LOAD_BYTE:
11826 case TYPE_LOAD1:
11827 case TYPE_STORE1:
11828 case TYPE_FFARITHS:
11829 case TYPE_FADDS:
11830 case TYPE_FFARITHD:
11831 case TYPE_FADDD:
11832 case TYPE_FMOV:
11833 case TYPE_F_CVT:
11834 case TYPE_FCMPS:
11835 case TYPE_FCMPD:
11836 case TYPE_FCONSTS:
11837 case TYPE_FCONSTD:
11838 case TYPE_FMULS:
11839 case TYPE_FMACS:
11840 case TYPE_FMULD:
11841 case TYPE_FMACD:
11842 case TYPE_FDIVS:
11843 case TYPE_FDIVD:
11844 case TYPE_F_MRC:
11845 case TYPE_F_MRRC:
11846 case TYPE_F_FLAG:
11847 case TYPE_F_LOADS:
11848 case TYPE_F_STORES:
11849 return true;
11850 default:
11851 return false;
11855 /* Return true if and only if this insn can dual-issue as younger. */
11856 static bool
11857 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11859 if (recog_memoized (insn) < 0)
11861 if (verbose > 5)
11862 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11863 return false;
11866 switch (get_attr_type (insn))
11868 case TYPE_ALU_IMM:
11869 case TYPE_ALUS_IMM:
11870 case TYPE_LOGIC_IMM:
11871 case TYPE_LOGICS_IMM:
11872 case TYPE_EXTEND:
11873 case TYPE_MVN_IMM:
11874 case TYPE_MOV_IMM:
11875 case TYPE_MOV_REG:
11876 case TYPE_MOV_SHIFT:
11877 case TYPE_MOV_SHIFT_REG:
11878 case TYPE_BRANCH:
11879 case TYPE_CALL:
11880 return true;
11881 default:
11882 return false;
11887 /* Look for an instruction that can dual issue only as an older
11888 instruction, and move it in front of any instructions that can
11889 dual-issue as younger, while preserving the relative order of all
11890 other instructions in the ready list. This is a hueuristic to help
11891 dual-issue in later cycles, by postponing issue of more flexible
11892 instructions. This heuristic may affect dual issue opportunities
11893 in the current cycle. */
11894 static void
11895 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11896 int *n_readyp, int clock)
11898 int i;
11899 int first_older_only = -1, first_younger = -1;
11901 if (verbose > 5)
11902 fprintf (file,
11903 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11904 clock,
11905 *n_readyp);
11907 /* Traverse the ready list from the head (the instruction to issue
11908 first), and looking for the first instruction that can issue as
11909 younger and the first instruction that can dual-issue only as
11910 older. */
11911 for (i = *n_readyp - 1; i >= 0; i--)
11913 rtx_insn *insn = ready[i];
11914 if (cortexa7_older_only (insn))
11916 first_older_only = i;
11917 if (verbose > 5)
11918 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11919 break;
11921 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11922 first_younger = i;
11925 /* Nothing to reorder because either no younger insn found or insn
11926 that can dual-issue only as older appears before any insn that
11927 can dual-issue as younger. */
11928 if (first_younger == -1)
11930 if (verbose > 5)
11931 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11932 return;
11935 /* Nothing to reorder because no older-only insn in the ready list. */
11936 if (first_older_only == -1)
11938 if (verbose > 5)
11939 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11940 return;
11943 /* Move first_older_only insn before first_younger. */
11944 if (verbose > 5)
11945 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11946 INSN_UID(ready [first_older_only]),
11947 INSN_UID(ready [first_younger]));
11948 rtx_insn *first_older_only_insn = ready [first_older_only];
11949 for (i = first_older_only; i < first_younger; i++)
11951 ready[i] = ready[i+1];
11954 ready[i] = first_older_only_insn;
11955 return;
11958 /* Implement TARGET_SCHED_REORDER. */
11959 static int
11960 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11961 int clock)
11963 switch (arm_tune)
11965 case cortexa7:
11966 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11967 break;
11968 default:
11969 /* Do nothing for other cores. */
11970 break;
11973 return arm_issue_rate ();
11976 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11977 It corrects the value of COST based on the relationship between
11978 INSN and DEP through the dependence LINK. It returns the new
11979 value. There is a per-core adjust_cost hook to adjust scheduler costs
11980 and the per-core hook can choose to completely override the generic
11981 adjust_cost function. Only put bits of code into arm_adjust_cost that
11982 are common across all cores. */
11983 static int
11984 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11986 rtx i_pat, d_pat;
11988 /* When generating Thumb-1 code, we want to place flag-setting operations
11989 close to a conditional branch which depends on them, so that we can
11990 omit the comparison. */
11991 if (TARGET_THUMB1
11992 && REG_NOTE_KIND (link) == 0
11993 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11994 && recog_memoized (dep) >= 0
11995 && get_attr_conds (dep) == CONDS_SET)
11996 return 0;
11998 if (current_tune->sched_adjust_cost != NULL)
12000 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12001 return cost;
12004 /* XXX Is this strictly true? */
12005 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12006 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12007 return 0;
12009 /* Call insns don't incur a stall, even if they follow a load. */
12010 if (REG_NOTE_KIND (link) == 0
12011 && CALL_P (insn))
12012 return 1;
12014 if ((i_pat = single_set (insn)) != NULL
12015 && MEM_P (SET_SRC (i_pat))
12016 && (d_pat = single_set (dep)) != NULL
12017 && MEM_P (SET_DEST (d_pat)))
12019 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12020 /* This is a load after a store, there is no conflict if the load reads
12021 from a cached area. Assume that loads from the stack, and from the
12022 constant pool are cached, and that others will miss. This is a
12023 hack. */
12025 if ((GET_CODE (src_mem) == SYMBOL_REF
12026 && CONSTANT_POOL_ADDRESS_P (src_mem))
12027 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12028 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12029 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12030 return 1;
12033 return cost;
12037 arm_max_conditional_execute (void)
12039 return max_insns_skipped;
12042 static int
12043 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12045 if (TARGET_32BIT)
12046 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12047 else
12048 return (optimize > 0) ? 2 : 0;
12051 static int
12052 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12054 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12057 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12058 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12059 sequences of non-executed instructions in IT blocks probably take the same
12060 amount of time as executed instructions (and the IT instruction itself takes
12061 space in icache). This function was experimentally determined to give good
12062 results on a popular embedded benchmark. */
12064 static int
12065 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12067 return (TARGET_32BIT && speed_p) ? 1
12068 : arm_default_branch_cost (speed_p, predictable_p);
12071 static bool fp_consts_inited = false;
12073 static REAL_VALUE_TYPE value_fp0;
12075 static void
12076 init_fp_table (void)
12078 REAL_VALUE_TYPE r;
12080 r = REAL_VALUE_ATOF ("0", DFmode);
12081 value_fp0 = r;
12082 fp_consts_inited = true;
12085 /* Return TRUE if rtx X is a valid immediate FP constant. */
12087 arm_const_double_rtx (rtx x)
12089 REAL_VALUE_TYPE r;
12091 if (!fp_consts_inited)
12092 init_fp_table ();
12094 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12095 if (REAL_VALUE_MINUS_ZERO (r))
12096 return 0;
12098 if (REAL_VALUES_EQUAL (r, value_fp0))
12099 return 1;
12101 return 0;
12104 /* VFPv3 has a fairly wide range of representable immediates, formed from
12105 "quarter-precision" floating-point values. These can be evaluated using this
12106 formula (with ^ for exponentiation):
12108 -1^s * n * 2^-r
12110 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12111 16 <= n <= 31 and 0 <= r <= 7.
12113 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12115 - A (most-significant) is the sign bit.
12116 - BCD are the exponent (encoded as r XOR 3).
12117 - EFGH are the mantissa (encoded as n - 16).
12120 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12121 fconst[sd] instruction, or -1 if X isn't suitable. */
12122 static int
12123 vfp3_const_double_index (rtx x)
12125 REAL_VALUE_TYPE r, m;
12126 int sign, exponent;
12127 unsigned HOST_WIDE_INT mantissa, mant_hi;
12128 unsigned HOST_WIDE_INT mask;
12129 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12130 bool fail;
12132 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12133 return -1;
12135 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12137 /* We can't represent these things, so detect them first. */
12138 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12139 return -1;
12141 /* Extract sign, exponent and mantissa. */
12142 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12143 r = real_value_abs (&r);
12144 exponent = REAL_EXP (&r);
12145 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12146 highest (sign) bit, with a fixed binary point at bit point_pos.
12147 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12148 bits for the mantissa, this may fail (low bits would be lost). */
12149 real_ldexp (&m, &r, point_pos - exponent);
12150 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12151 mantissa = w.elt (0);
12152 mant_hi = w.elt (1);
12154 /* If there are bits set in the low part of the mantissa, we can't
12155 represent this value. */
12156 if (mantissa != 0)
12157 return -1;
12159 /* Now make it so that mantissa contains the most-significant bits, and move
12160 the point_pos to indicate that the least-significant bits have been
12161 discarded. */
12162 point_pos -= HOST_BITS_PER_WIDE_INT;
12163 mantissa = mant_hi;
12165 /* We can permit four significant bits of mantissa only, plus a high bit
12166 which is always 1. */
12167 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12168 if ((mantissa & mask) != 0)
12169 return -1;
12171 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12172 mantissa >>= point_pos - 5;
12174 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12175 floating-point immediate zero with Neon using an integer-zero load, but
12176 that case is handled elsewhere.) */
12177 if (mantissa == 0)
12178 return -1;
12180 gcc_assert (mantissa >= 16 && mantissa <= 31);
12182 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12183 normalized significands are in the range [1, 2). (Our mantissa is shifted
12184 left 4 places at this point relative to normalized IEEE754 values). GCC
12185 internally uses [0.5, 1) (see real.c), so the exponent returned from
12186 REAL_EXP must be altered. */
12187 exponent = 5 - exponent;
12189 if (exponent < 0 || exponent > 7)
12190 return -1;
12192 /* Sign, mantissa and exponent are now in the correct form to plug into the
12193 formula described in the comment above. */
12194 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12197 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12199 vfp3_const_double_rtx (rtx x)
12201 if (!TARGET_VFP3)
12202 return 0;
12204 return vfp3_const_double_index (x) != -1;
12207 /* Recognize immediates which can be used in various Neon instructions. Legal
12208 immediates are described by the following table (for VMVN variants, the
12209 bitwise inverse of the constant shown is recognized. In either case, VMOV
12210 is output and the correct instruction to use for a given constant is chosen
12211 by the assembler). The constant shown is replicated across all elements of
12212 the destination vector.
12214 insn elems variant constant (binary)
12215 ---- ----- ------- -----------------
12216 vmov i32 0 00000000 00000000 00000000 abcdefgh
12217 vmov i32 1 00000000 00000000 abcdefgh 00000000
12218 vmov i32 2 00000000 abcdefgh 00000000 00000000
12219 vmov i32 3 abcdefgh 00000000 00000000 00000000
12220 vmov i16 4 00000000 abcdefgh
12221 vmov i16 5 abcdefgh 00000000
12222 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12223 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12224 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12225 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12226 vmvn i16 10 00000000 abcdefgh
12227 vmvn i16 11 abcdefgh 00000000
12228 vmov i32 12 00000000 00000000 abcdefgh 11111111
12229 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12230 vmov i32 14 00000000 abcdefgh 11111111 11111111
12231 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12232 vmov i8 16 abcdefgh
12233 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12234 eeeeeeee ffffffff gggggggg hhhhhhhh
12235 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12236 vmov f32 19 00000000 00000000 00000000 00000000
12238 For case 18, B = !b. Representable values are exactly those accepted by
12239 vfp3_const_double_index, but are output as floating-point numbers rather
12240 than indices.
12242 For case 19, we will change it to vmov.i32 when assembling.
12244 Variants 0-5 (inclusive) may also be used as immediates for the second
12245 operand of VORR/VBIC instructions.
12247 The INVERSE argument causes the bitwise inverse of the given operand to be
12248 recognized instead (used for recognizing legal immediates for the VAND/VORN
12249 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12250 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12251 output, rather than the real insns vbic/vorr).
12253 INVERSE makes no difference to the recognition of float vectors.
12255 The return value is the variant of immediate as shown in the above table, or
12256 -1 if the given value doesn't match any of the listed patterns.
12258 static int
12259 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12260 rtx *modconst, int *elementwidth)
12262 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12263 matches = 1; \
12264 for (i = 0; i < idx; i += (STRIDE)) \
12265 if (!(TEST)) \
12266 matches = 0; \
12267 if (matches) \
12269 immtype = (CLASS); \
12270 elsize = (ELSIZE); \
12271 break; \
12274 unsigned int i, elsize = 0, idx = 0, n_elts;
12275 unsigned int innersize;
12276 unsigned char bytes[16];
12277 int immtype = -1, matches;
12278 unsigned int invmask = inverse ? 0xff : 0;
12279 bool vector = GET_CODE (op) == CONST_VECTOR;
12281 if (vector)
12283 n_elts = CONST_VECTOR_NUNITS (op);
12284 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12286 else
12288 n_elts = 1;
12289 if (mode == VOIDmode)
12290 mode = DImode;
12291 innersize = GET_MODE_SIZE (mode);
12294 /* Vectors of float constants. */
12295 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12297 rtx el0 = CONST_VECTOR_ELT (op, 0);
12298 REAL_VALUE_TYPE r0;
12300 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12301 return -1;
12303 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12305 for (i = 1; i < n_elts; i++)
12307 rtx elt = CONST_VECTOR_ELT (op, i);
12308 REAL_VALUE_TYPE re;
12310 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12312 if (!REAL_VALUES_EQUAL (r0, re))
12313 return -1;
12316 if (modconst)
12317 *modconst = CONST_VECTOR_ELT (op, 0);
12319 if (elementwidth)
12320 *elementwidth = 0;
12322 if (el0 == CONST0_RTX (GET_MODE (el0)))
12323 return 19;
12324 else
12325 return 18;
12328 /* Splat vector constant out into a byte vector. */
12329 for (i = 0; i < n_elts; i++)
12331 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12332 unsigned HOST_WIDE_INT elpart;
12333 unsigned int part, parts;
12335 if (CONST_INT_P (el))
12337 elpart = INTVAL (el);
12338 parts = 1;
12340 else if (CONST_DOUBLE_P (el))
12342 elpart = CONST_DOUBLE_LOW (el);
12343 parts = 2;
12345 else
12346 gcc_unreachable ();
12348 for (part = 0; part < parts; part++)
12350 unsigned int byte;
12351 for (byte = 0; byte < innersize; byte++)
12353 bytes[idx++] = (elpart & 0xff) ^ invmask;
12354 elpart >>= BITS_PER_UNIT;
12356 if (CONST_DOUBLE_P (el))
12357 elpart = CONST_DOUBLE_HIGH (el);
12361 /* Sanity check. */
12362 gcc_assert (idx == GET_MODE_SIZE (mode));
12366 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12367 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12369 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12370 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12372 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12373 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12375 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12376 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12378 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12380 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12382 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12383 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12385 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12386 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12388 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12389 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12391 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12392 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12394 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12396 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12398 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12399 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12401 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12402 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12404 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12405 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12407 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12408 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12410 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12412 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12413 && bytes[i] == bytes[(i + 8) % idx]);
12415 while (0);
12417 if (immtype == -1)
12418 return -1;
12420 if (elementwidth)
12421 *elementwidth = elsize;
12423 if (modconst)
12425 unsigned HOST_WIDE_INT imm = 0;
12427 /* Un-invert bytes of recognized vector, if necessary. */
12428 if (invmask != 0)
12429 for (i = 0; i < idx; i++)
12430 bytes[i] ^= invmask;
12432 if (immtype == 17)
12434 /* FIXME: Broken on 32-bit H_W_I hosts. */
12435 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12437 for (i = 0; i < 8; i++)
12438 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12439 << (i * BITS_PER_UNIT);
12441 *modconst = GEN_INT (imm);
12443 else
12445 unsigned HOST_WIDE_INT imm = 0;
12447 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12448 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12450 *modconst = GEN_INT (imm);
12454 return immtype;
12455 #undef CHECK
12458 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12459 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12460 float elements), and a modified constant (whatever should be output for a
12461 VMOV) in *MODCONST. */
12464 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12465 rtx *modconst, int *elementwidth)
12467 rtx tmpconst;
12468 int tmpwidth;
12469 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12471 if (retval == -1)
12472 return 0;
12474 if (modconst)
12475 *modconst = tmpconst;
12477 if (elementwidth)
12478 *elementwidth = tmpwidth;
12480 return 1;
12483 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12484 the immediate is valid, write a constant suitable for using as an operand
12485 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12486 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12489 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12490 rtx *modconst, int *elementwidth)
12492 rtx tmpconst;
12493 int tmpwidth;
12494 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12496 if (retval < 0 || retval > 5)
12497 return 0;
12499 if (modconst)
12500 *modconst = tmpconst;
12502 if (elementwidth)
12503 *elementwidth = tmpwidth;
12505 return 1;
12508 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12509 the immediate is valid, write a constant suitable for using as an operand
12510 to VSHR/VSHL to *MODCONST and the corresponding element width to
12511 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12512 because they have different limitations. */
12515 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12516 rtx *modconst, int *elementwidth,
12517 bool isleftshift)
12519 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12520 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12521 unsigned HOST_WIDE_INT last_elt = 0;
12522 unsigned HOST_WIDE_INT maxshift;
12524 /* Split vector constant out into a byte vector. */
12525 for (i = 0; i < n_elts; i++)
12527 rtx el = CONST_VECTOR_ELT (op, i);
12528 unsigned HOST_WIDE_INT elpart;
12530 if (CONST_INT_P (el))
12531 elpart = INTVAL (el);
12532 else if (CONST_DOUBLE_P (el))
12533 return 0;
12534 else
12535 gcc_unreachable ();
12537 if (i != 0 && elpart != last_elt)
12538 return 0;
12540 last_elt = elpart;
12543 /* Shift less than element size. */
12544 maxshift = innersize * 8;
12546 if (isleftshift)
12548 /* Left shift immediate value can be from 0 to <size>-1. */
12549 if (last_elt >= maxshift)
12550 return 0;
12552 else
12554 /* Right shift immediate value can be from 1 to <size>. */
12555 if (last_elt == 0 || last_elt > maxshift)
12556 return 0;
12559 if (elementwidth)
12560 *elementwidth = innersize * 8;
12562 if (modconst)
12563 *modconst = CONST_VECTOR_ELT (op, 0);
12565 return 1;
12568 /* Return a string suitable for output of Neon immediate logic operation
12569 MNEM. */
12571 char *
12572 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12573 int inverse, int quad)
12575 int width, is_valid;
12576 static char templ[40];
12578 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12580 gcc_assert (is_valid != 0);
12582 if (quad)
12583 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12584 else
12585 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12587 return templ;
12590 /* Return a string suitable for output of Neon immediate shift operation
12591 (VSHR or VSHL) MNEM. */
12593 char *
12594 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12595 machine_mode mode, int quad,
12596 bool isleftshift)
12598 int width, is_valid;
12599 static char templ[40];
12601 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12602 gcc_assert (is_valid != 0);
12604 if (quad)
12605 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12606 else
12607 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12609 return templ;
12612 /* Output a sequence of pairwise operations to implement a reduction.
12613 NOTE: We do "too much work" here, because pairwise operations work on two
12614 registers-worth of operands in one go. Unfortunately we can't exploit those
12615 extra calculations to do the full operation in fewer steps, I don't think.
12616 Although all vector elements of the result but the first are ignored, we
12617 actually calculate the same result in each of the elements. An alternative
12618 such as initially loading a vector with zero to use as each of the second
12619 operands would use up an additional register and take an extra instruction,
12620 for no particular gain. */
12622 void
12623 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12624 rtx (*reduc) (rtx, rtx, rtx))
12626 machine_mode inner = GET_MODE_INNER (mode);
12627 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12628 rtx tmpsum = op1;
12630 for (i = parts / 2; i >= 1; i /= 2)
12632 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12633 emit_insn (reduc (dest, tmpsum, tmpsum));
12634 tmpsum = dest;
12638 /* If VALS is a vector constant that can be loaded into a register
12639 using VDUP, generate instructions to do so and return an RTX to
12640 assign to the register. Otherwise return NULL_RTX. */
12642 static rtx
12643 neon_vdup_constant (rtx vals)
12645 machine_mode mode = GET_MODE (vals);
12646 machine_mode inner_mode = GET_MODE_INNER (mode);
12647 int n_elts = GET_MODE_NUNITS (mode);
12648 bool all_same = true;
12649 rtx x;
12650 int i;
12652 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12653 return NULL_RTX;
12655 for (i = 0; i < n_elts; ++i)
12657 x = XVECEXP (vals, 0, i);
12658 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12659 all_same = false;
12662 if (!all_same)
12663 /* The elements are not all the same. We could handle repeating
12664 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12665 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12666 vdup.i16). */
12667 return NULL_RTX;
12669 /* We can load this constant by using VDUP and a constant in a
12670 single ARM register. This will be cheaper than a vector
12671 load. */
12673 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12674 return gen_rtx_VEC_DUPLICATE (mode, x);
12677 /* Generate code to load VALS, which is a PARALLEL containing only
12678 constants (for vec_init) or CONST_VECTOR, efficiently into a
12679 register. Returns an RTX to copy into the register, or NULL_RTX
12680 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12683 neon_make_constant (rtx vals)
12685 machine_mode mode = GET_MODE (vals);
12686 rtx target;
12687 rtx const_vec = NULL_RTX;
12688 int n_elts = GET_MODE_NUNITS (mode);
12689 int n_const = 0;
12690 int i;
12692 if (GET_CODE (vals) == CONST_VECTOR)
12693 const_vec = vals;
12694 else if (GET_CODE (vals) == PARALLEL)
12696 /* A CONST_VECTOR must contain only CONST_INTs and
12697 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12698 Only store valid constants in a CONST_VECTOR. */
12699 for (i = 0; i < n_elts; ++i)
12701 rtx x = XVECEXP (vals, 0, i);
12702 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12703 n_const++;
12705 if (n_const == n_elts)
12706 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12708 else
12709 gcc_unreachable ();
12711 if (const_vec != NULL
12712 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12713 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12714 return const_vec;
12715 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12716 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12717 pipeline cycle; creating the constant takes one or two ARM
12718 pipeline cycles. */
12719 return target;
12720 else if (const_vec != NULL_RTX)
12721 /* Load from constant pool. On Cortex-A8 this takes two cycles
12722 (for either double or quad vectors). We can not take advantage
12723 of single-cycle VLD1 because we need a PC-relative addressing
12724 mode. */
12725 return const_vec;
12726 else
12727 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12728 We can not construct an initializer. */
12729 return NULL_RTX;
12732 /* Initialize vector TARGET to VALS. */
12734 void
12735 neon_expand_vector_init (rtx target, rtx vals)
12737 machine_mode mode = GET_MODE (target);
12738 machine_mode inner_mode = GET_MODE_INNER (mode);
12739 int n_elts = GET_MODE_NUNITS (mode);
12740 int n_var = 0, one_var = -1;
12741 bool all_same = true;
12742 rtx x, mem;
12743 int i;
12745 for (i = 0; i < n_elts; ++i)
12747 x = XVECEXP (vals, 0, i);
12748 if (!CONSTANT_P (x))
12749 ++n_var, one_var = i;
12751 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12752 all_same = false;
12755 if (n_var == 0)
12757 rtx constant = neon_make_constant (vals);
12758 if (constant != NULL_RTX)
12760 emit_move_insn (target, constant);
12761 return;
12765 /* Splat a single non-constant element if we can. */
12766 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12768 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12769 emit_insn (gen_rtx_SET (VOIDmode, target,
12770 gen_rtx_VEC_DUPLICATE (mode, x)));
12771 return;
12774 /* One field is non-constant. Load constant then overwrite varying
12775 field. This is more efficient than using the stack. */
12776 if (n_var == 1)
12778 rtx copy = copy_rtx (vals);
12779 rtx index = GEN_INT (one_var);
12781 /* Load constant part of vector, substitute neighboring value for
12782 varying element. */
12783 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12784 neon_expand_vector_init (target, copy);
12786 /* Insert variable. */
12787 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12788 switch (mode)
12790 case V8QImode:
12791 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12792 break;
12793 case V16QImode:
12794 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12795 break;
12796 case V4HImode:
12797 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12798 break;
12799 case V8HImode:
12800 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12801 break;
12802 case V2SImode:
12803 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12804 break;
12805 case V4SImode:
12806 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12807 break;
12808 case V2SFmode:
12809 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12810 break;
12811 case V4SFmode:
12812 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12813 break;
12814 case V2DImode:
12815 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12816 break;
12817 default:
12818 gcc_unreachable ();
12820 return;
12823 /* Construct the vector in memory one field at a time
12824 and load the whole vector. */
12825 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12826 for (i = 0; i < n_elts; i++)
12827 emit_move_insn (adjust_address_nv (mem, inner_mode,
12828 i * GET_MODE_SIZE (inner_mode)),
12829 XVECEXP (vals, 0, i));
12830 emit_move_insn (target, mem);
12833 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12834 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12835 reported source locations are bogus. */
12837 static void
12838 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12839 const char *err)
12841 HOST_WIDE_INT lane;
12843 gcc_assert (CONST_INT_P (operand));
12845 lane = INTVAL (operand);
12847 if (lane < low || lane >= high)
12848 error (err);
12851 /* Bounds-check lanes. */
12853 void
12854 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12856 bounds_check (operand, low, high, "lane out of range");
12859 /* Bounds-check constants. */
12861 void
12862 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12864 bounds_check (operand, low, high, "constant out of range");
12867 HOST_WIDE_INT
12868 neon_element_bits (machine_mode mode)
12870 if (mode == DImode)
12871 return GET_MODE_BITSIZE (mode);
12872 else
12873 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12877 /* Predicates for `match_operand' and `match_operator'. */
12879 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12880 WB is true if full writeback address modes are allowed and is false
12881 if limited writeback address modes (POST_INC and PRE_DEC) are
12882 allowed. */
12885 arm_coproc_mem_operand (rtx op, bool wb)
12887 rtx ind;
12889 /* Reject eliminable registers. */
12890 if (! (reload_in_progress || reload_completed || lra_in_progress)
12891 && ( reg_mentioned_p (frame_pointer_rtx, op)
12892 || reg_mentioned_p (arg_pointer_rtx, op)
12893 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12894 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12895 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12896 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12897 return FALSE;
12899 /* Constants are converted into offsets from labels. */
12900 if (!MEM_P (op))
12901 return FALSE;
12903 ind = XEXP (op, 0);
12905 if (reload_completed
12906 && (GET_CODE (ind) == LABEL_REF
12907 || (GET_CODE (ind) == CONST
12908 && GET_CODE (XEXP (ind, 0)) == PLUS
12909 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12910 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12911 return TRUE;
12913 /* Match: (mem (reg)). */
12914 if (REG_P (ind))
12915 return arm_address_register_rtx_p (ind, 0);
12917 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12918 acceptable in any case (subject to verification by
12919 arm_address_register_rtx_p). We need WB to be true to accept
12920 PRE_INC and POST_DEC. */
12921 if (GET_CODE (ind) == POST_INC
12922 || GET_CODE (ind) == PRE_DEC
12923 || (wb
12924 && (GET_CODE (ind) == PRE_INC
12925 || GET_CODE (ind) == POST_DEC)))
12926 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12928 if (wb
12929 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12930 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12931 && GET_CODE (XEXP (ind, 1)) == PLUS
12932 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12933 ind = XEXP (ind, 1);
12935 /* Match:
12936 (plus (reg)
12937 (const)). */
12938 if (GET_CODE (ind) == PLUS
12939 && REG_P (XEXP (ind, 0))
12940 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12941 && CONST_INT_P (XEXP (ind, 1))
12942 && INTVAL (XEXP (ind, 1)) > -1024
12943 && INTVAL (XEXP (ind, 1)) < 1024
12944 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12945 return TRUE;
12947 return FALSE;
12950 /* Return TRUE if OP is a memory operand which we can load or store a vector
12951 to/from. TYPE is one of the following values:
12952 0 - Vector load/stor (vldr)
12953 1 - Core registers (ldm)
12954 2 - Element/structure loads (vld1)
12957 neon_vector_mem_operand (rtx op, int type, bool strict)
12959 rtx ind;
12961 /* Reject eliminable registers. */
12962 if (! (reload_in_progress || reload_completed)
12963 && ( reg_mentioned_p (frame_pointer_rtx, op)
12964 || reg_mentioned_p (arg_pointer_rtx, op)
12965 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12966 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12967 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12968 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12969 return !strict;
12971 /* Constants are converted into offsets from labels. */
12972 if (!MEM_P (op))
12973 return FALSE;
12975 ind = XEXP (op, 0);
12977 if (reload_completed
12978 && (GET_CODE (ind) == LABEL_REF
12979 || (GET_CODE (ind) == CONST
12980 && GET_CODE (XEXP (ind, 0)) == PLUS
12981 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12982 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12983 return TRUE;
12985 /* Match: (mem (reg)). */
12986 if (REG_P (ind))
12987 return arm_address_register_rtx_p (ind, 0);
12989 /* Allow post-increment with Neon registers. */
12990 if ((type != 1 && GET_CODE (ind) == POST_INC)
12991 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12992 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12994 /* Allow post-increment by register for VLDn */
12995 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12996 && GET_CODE (XEXP (ind, 1)) == PLUS
12997 && REG_P (XEXP (XEXP (ind, 1), 1)))
12998 return true;
13000 /* Match:
13001 (plus (reg)
13002 (const)). */
13003 if (type == 0
13004 && GET_CODE (ind) == PLUS
13005 && REG_P (XEXP (ind, 0))
13006 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13007 && CONST_INT_P (XEXP (ind, 1))
13008 && INTVAL (XEXP (ind, 1)) > -1024
13009 /* For quad modes, we restrict the constant offset to be slightly less
13010 than what the instruction format permits. We have no such constraint
13011 on double mode offsets. (This must match arm_legitimate_index_p.) */
13012 && (INTVAL (XEXP (ind, 1))
13013 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13014 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13015 return TRUE;
13017 return FALSE;
13020 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13021 type. */
13023 neon_struct_mem_operand (rtx op)
13025 rtx ind;
13027 /* Reject eliminable registers. */
13028 if (! (reload_in_progress || reload_completed)
13029 && ( reg_mentioned_p (frame_pointer_rtx, op)
13030 || reg_mentioned_p (arg_pointer_rtx, op)
13031 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13032 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13033 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13034 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13035 return FALSE;
13037 /* Constants are converted into offsets from labels. */
13038 if (!MEM_P (op))
13039 return FALSE;
13041 ind = XEXP (op, 0);
13043 if (reload_completed
13044 && (GET_CODE (ind) == LABEL_REF
13045 || (GET_CODE (ind) == CONST
13046 && GET_CODE (XEXP (ind, 0)) == PLUS
13047 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13048 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13049 return TRUE;
13051 /* Match: (mem (reg)). */
13052 if (REG_P (ind))
13053 return arm_address_register_rtx_p (ind, 0);
13055 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13056 if (GET_CODE (ind) == POST_INC
13057 || GET_CODE (ind) == PRE_DEC)
13058 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13060 return FALSE;
13063 /* Return true if X is a register that will be eliminated later on. */
13065 arm_eliminable_register (rtx x)
13067 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13068 || REGNO (x) == ARG_POINTER_REGNUM
13069 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13070 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13073 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13074 coprocessor registers. Otherwise return NO_REGS. */
13076 enum reg_class
13077 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13079 if (mode == HFmode)
13081 if (!TARGET_NEON_FP16)
13082 return GENERAL_REGS;
13083 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13084 return NO_REGS;
13085 return GENERAL_REGS;
13088 /* The neon move patterns handle all legitimate vector and struct
13089 addresses. */
13090 if (TARGET_NEON
13091 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13092 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13093 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13094 || VALID_NEON_STRUCT_MODE (mode)))
13095 return NO_REGS;
13097 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13098 return NO_REGS;
13100 return GENERAL_REGS;
13103 /* Values which must be returned in the most-significant end of the return
13104 register. */
13106 static bool
13107 arm_return_in_msb (const_tree valtype)
13109 return (TARGET_AAPCS_BASED
13110 && BYTES_BIG_ENDIAN
13111 && (AGGREGATE_TYPE_P (valtype)
13112 || TREE_CODE (valtype) == COMPLEX_TYPE
13113 || FIXED_POINT_TYPE_P (valtype)));
13116 /* Return TRUE if X references a SYMBOL_REF. */
13118 symbol_mentioned_p (rtx x)
13120 const char * fmt;
13121 int i;
13123 if (GET_CODE (x) == SYMBOL_REF)
13124 return 1;
13126 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13127 are constant offsets, not symbols. */
13128 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13129 return 0;
13131 fmt = GET_RTX_FORMAT (GET_CODE (x));
13133 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13135 if (fmt[i] == 'E')
13137 int j;
13139 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13140 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13141 return 1;
13143 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13144 return 1;
13147 return 0;
13150 /* Return TRUE if X references a LABEL_REF. */
13152 label_mentioned_p (rtx x)
13154 const char * fmt;
13155 int i;
13157 if (GET_CODE (x) == LABEL_REF)
13158 return 1;
13160 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13161 instruction, but they are constant offsets, not symbols. */
13162 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13163 return 0;
13165 fmt = GET_RTX_FORMAT (GET_CODE (x));
13166 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13168 if (fmt[i] == 'E')
13170 int j;
13172 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13173 if (label_mentioned_p (XVECEXP (x, i, j)))
13174 return 1;
13176 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13177 return 1;
13180 return 0;
13184 tls_mentioned_p (rtx x)
13186 switch (GET_CODE (x))
13188 case CONST:
13189 return tls_mentioned_p (XEXP (x, 0));
13191 case UNSPEC:
13192 if (XINT (x, 1) == UNSPEC_TLS)
13193 return 1;
13195 default:
13196 return 0;
13200 /* Must not copy any rtx that uses a pc-relative address. */
13202 static int
13203 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13205 if (GET_CODE (*x) == UNSPEC
13206 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13207 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13208 return 1;
13209 return 0;
13212 static bool
13213 arm_cannot_copy_insn_p (rtx_insn *insn)
13215 /* The tls call insn cannot be copied, as it is paired with a data
13216 word. */
13217 if (recog_memoized (insn) == CODE_FOR_tlscall)
13218 return true;
13220 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13223 enum rtx_code
13224 minmax_code (rtx x)
13226 enum rtx_code code = GET_CODE (x);
13228 switch (code)
13230 case SMAX:
13231 return GE;
13232 case SMIN:
13233 return LE;
13234 case UMIN:
13235 return LEU;
13236 case UMAX:
13237 return GEU;
13238 default:
13239 gcc_unreachable ();
13243 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13245 bool
13246 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13247 int *mask, bool *signed_sat)
13249 /* The high bound must be a power of two minus one. */
13250 int log = exact_log2 (INTVAL (hi_bound) + 1);
13251 if (log == -1)
13252 return false;
13254 /* The low bound is either zero (for usat) or one less than the
13255 negation of the high bound (for ssat). */
13256 if (INTVAL (lo_bound) == 0)
13258 if (mask)
13259 *mask = log;
13260 if (signed_sat)
13261 *signed_sat = false;
13263 return true;
13266 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13268 if (mask)
13269 *mask = log + 1;
13270 if (signed_sat)
13271 *signed_sat = true;
13273 return true;
13276 return false;
13279 /* Return 1 if memory locations are adjacent. */
13281 adjacent_mem_locations (rtx a, rtx b)
13283 /* We don't guarantee to preserve the order of these memory refs. */
13284 if (volatile_refs_p (a) || volatile_refs_p (b))
13285 return 0;
13287 if ((REG_P (XEXP (a, 0))
13288 || (GET_CODE (XEXP (a, 0)) == PLUS
13289 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13290 && (REG_P (XEXP (b, 0))
13291 || (GET_CODE (XEXP (b, 0)) == PLUS
13292 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13294 HOST_WIDE_INT val0 = 0, val1 = 0;
13295 rtx reg0, reg1;
13296 int val_diff;
13298 if (GET_CODE (XEXP (a, 0)) == PLUS)
13300 reg0 = XEXP (XEXP (a, 0), 0);
13301 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13303 else
13304 reg0 = XEXP (a, 0);
13306 if (GET_CODE (XEXP (b, 0)) == PLUS)
13308 reg1 = XEXP (XEXP (b, 0), 0);
13309 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13311 else
13312 reg1 = XEXP (b, 0);
13314 /* Don't accept any offset that will require multiple
13315 instructions to handle, since this would cause the
13316 arith_adjacentmem pattern to output an overlong sequence. */
13317 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13318 return 0;
13320 /* Don't allow an eliminable register: register elimination can make
13321 the offset too large. */
13322 if (arm_eliminable_register (reg0))
13323 return 0;
13325 val_diff = val1 - val0;
13327 if (arm_ld_sched)
13329 /* If the target has load delay slots, then there's no benefit
13330 to using an ldm instruction unless the offset is zero and
13331 we are optimizing for size. */
13332 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13333 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13334 && (val_diff == 4 || val_diff == -4));
13337 return ((REGNO (reg0) == REGNO (reg1))
13338 && (val_diff == 4 || val_diff == -4));
13341 return 0;
13344 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13345 for load operations, false for store operations. CONSECUTIVE is true
13346 if the register numbers in the operation must be consecutive in the register
13347 bank. RETURN_PC is true if value is to be loaded in PC.
13348 The pattern we are trying to match for load is:
13349 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13350 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13353 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13355 where
13356 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13357 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13358 3. If consecutive is TRUE, then for kth register being loaded,
13359 REGNO (R_dk) = REGNO (R_d0) + k.
13360 The pattern for store is similar. */
13361 bool
13362 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13363 bool consecutive, bool return_pc)
13365 HOST_WIDE_INT count = XVECLEN (op, 0);
13366 rtx reg, mem, addr;
13367 unsigned regno;
13368 unsigned first_regno;
13369 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13370 rtx elt;
13371 bool addr_reg_in_reglist = false;
13372 bool update = false;
13373 int reg_increment;
13374 int offset_adj;
13375 int regs_per_val;
13377 /* If not in SImode, then registers must be consecutive
13378 (e.g., VLDM instructions for DFmode). */
13379 gcc_assert ((mode == SImode) || consecutive);
13380 /* Setting return_pc for stores is illegal. */
13381 gcc_assert (!return_pc || load);
13383 /* Set up the increments and the regs per val based on the mode. */
13384 reg_increment = GET_MODE_SIZE (mode);
13385 regs_per_val = reg_increment / 4;
13386 offset_adj = return_pc ? 1 : 0;
13388 if (count <= 1
13389 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13390 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13391 return false;
13393 /* Check if this is a write-back. */
13394 elt = XVECEXP (op, 0, offset_adj);
13395 if (GET_CODE (SET_SRC (elt)) == PLUS)
13397 i++;
13398 base = 1;
13399 update = true;
13401 /* The offset adjustment must be the number of registers being
13402 popped times the size of a single register. */
13403 if (!REG_P (SET_DEST (elt))
13404 || !REG_P (XEXP (SET_SRC (elt), 0))
13405 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13406 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13407 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13408 ((count - 1 - offset_adj) * reg_increment))
13409 return false;
13412 i = i + offset_adj;
13413 base = base + offset_adj;
13414 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13415 success depends on the type: VLDM can do just one reg,
13416 LDM must do at least two. */
13417 if ((count <= i) && (mode == SImode))
13418 return false;
13420 elt = XVECEXP (op, 0, i - 1);
13421 if (GET_CODE (elt) != SET)
13422 return false;
13424 if (load)
13426 reg = SET_DEST (elt);
13427 mem = SET_SRC (elt);
13429 else
13431 reg = SET_SRC (elt);
13432 mem = SET_DEST (elt);
13435 if (!REG_P (reg) || !MEM_P (mem))
13436 return false;
13438 regno = REGNO (reg);
13439 first_regno = regno;
13440 addr = XEXP (mem, 0);
13441 if (GET_CODE (addr) == PLUS)
13443 if (!CONST_INT_P (XEXP (addr, 1)))
13444 return false;
13446 offset = INTVAL (XEXP (addr, 1));
13447 addr = XEXP (addr, 0);
13450 if (!REG_P (addr))
13451 return false;
13453 /* Don't allow SP to be loaded unless it is also the base register. It
13454 guarantees that SP is reset correctly when an LDM instruction
13455 is interrupted. Otherwise, we might end up with a corrupt stack. */
13456 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13457 return false;
13459 for (; i < count; i++)
13461 elt = XVECEXP (op, 0, i);
13462 if (GET_CODE (elt) != SET)
13463 return false;
13465 if (load)
13467 reg = SET_DEST (elt);
13468 mem = SET_SRC (elt);
13470 else
13472 reg = SET_SRC (elt);
13473 mem = SET_DEST (elt);
13476 if (!REG_P (reg)
13477 || GET_MODE (reg) != mode
13478 || REGNO (reg) <= regno
13479 || (consecutive
13480 && (REGNO (reg) !=
13481 (unsigned int) (first_regno + regs_per_val * (i - base))))
13482 /* Don't allow SP to be loaded unless it is also the base register. It
13483 guarantees that SP is reset correctly when an LDM instruction
13484 is interrupted. Otherwise, we might end up with a corrupt stack. */
13485 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13486 || !MEM_P (mem)
13487 || GET_MODE (mem) != mode
13488 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13489 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13490 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13491 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13492 offset + (i - base) * reg_increment))
13493 && (!REG_P (XEXP (mem, 0))
13494 || offset + (i - base) * reg_increment != 0)))
13495 return false;
13497 regno = REGNO (reg);
13498 if (regno == REGNO (addr))
13499 addr_reg_in_reglist = true;
13502 if (load)
13504 if (update && addr_reg_in_reglist)
13505 return false;
13507 /* For Thumb-1, address register is always modified - either by write-back
13508 or by explicit load. If the pattern does not describe an update,
13509 then the address register must be in the list of loaded registers. */
13510 if (TARGET_THUMB1)
13511 return update || addr_reg_in_reglist;
13514 return true;
13517 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13518 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13519 instruction. ADD_OFFSET is nonzero if the base address register needs
13520 to be modified with an add instruction before we can use it. */
13522 static bool
13523 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13524 int nops, HOST_WIDE_INT add_offset)
13526 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13527 if the offset isn't small enough. The reason 2 ldrs are faster
13528 is because these ARMs are able to do more than one cache access
13529 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13530 whilst the ARM8 has a double bandwidth cache. This means that
13531 these cores can do both an instruction fetch and a data fetch in
13532 a single cycle, so the trick of calculating the address into a
13533 scratch register (one of the result regs) and then doing a load
13534 multiple actually becomes slower (and no smaller in code size).
13535 That is the transformation
13537 ldr rd1, [rbase + offset]
13538 ldr rd2, [rbase + offset + 4]
13542 add rd1, rbase, offset
13543 ldmia rd1, {rd1, rd2}
13545 produces worse code -- '3 cycles + any stalls on rd2' instead of
13546 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13547 access per cycle, the first sequence could never complete in less
13548 than 6 cycles, whereas the ldm sequence would only take 5 and
13549 would make better use of sequential accesses if not hitting the
13550 cache.
13552 We cheat here and test 'arm_ld_sched' which we currently know to
13553 only be true for the ARM8, ARM9 and StrongARM. If this ever
13554 changes, then the test below needs to be reworked. */
13555 if (nops == 2 && arm_ld_sched && add_offset != 0)
13556 return false;
13558 /* XScale has load-store double instructions, but they have stricter
13559 alignment requirements than load-store multiple, so we cannot
13560 use them.
13562 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13563 the pipeline until completion.
13565 NREGS CYCLES
13571 An ldr instruction takes 1-3 cycles, but does not block the
13572 pipeline.
13574 NREGS CYCLES
13575 1 1-3
13576 2 2-6
13577 3 3-9
13578 4 4-12
13580 Best case ldr will always win. However, the more ldr instructions
13581 we issue, the less likely we are to be able to schedule them well.
13582 Using ldr instructions also increases code size.
13584 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13585 for counts of 3 or 4 regs. */
13586 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13587 return false;
13588 return true;
13591 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13592 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13593 an array ORDER which describes the sequence to use when accessing the
13594 offsets that produces an ascending order. In this sequence, each
13595 offset must be larger by exactly 4 than the previous one. ORDER[0]
13596 must have been filled in with the lowest offset by the caller.
13597 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13598 we use to verify that ORDER produces an ascending order of registers.
13599 Return true if it was possible to construct such an order, false if
13600 not. */
13602 static bool
13603 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13604 int *unsorted_regs)
13606 int i;
13607 for (i = 1; i < nops; i++)
13609 int j;
13611 order[i] = order[i - 1];
13612 for (j = 0; j < nops; j++)
13613 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13615 /* We must find exactly one offset that is higher than the
13616 previous one by 4. */
13617 if (order[i] != order[i - 1])
13618 return false;
13619 order[i] = j;
13621 if (order[i] == order[i - 1])
13622 return false;
13623 /* The register numbers must be ascending. */
13624 if (unsorted_regs != NULL
13625 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13626 return false;
13628 return true;
13631 /* Used to determine in a peephole whether a sequence of load
13632 instructions can be changed into a load-multiple instruction.
13633 NOPS is the number of separate load instructions we are examining. The
13634 first NOPS entries in OPERANDS are the destination registers, the
13635 next NOPS entries are memory operands. If this function is
13636 successful, *BASE is set to the common base register of the memory
13637 accesses; *LOAD_OFFSET is set to the first memory location's offset
13638 from that base register.
13639 REGS is an array filled in with the destination register numbers.
13640 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13641 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13642 the sequence of registers in REGS matches the loads from ascending memory
13643 locations, and the function verifies that the register numbers are
13644 themselves ascending. If CHECK_REGS is false, the register numbers
13645 are stored in the order they are found in the operands. */
13646 static int
13647 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13648 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13650 int unsorted_regs[MAX_LDM_STM_OPS];
13651 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13652 int order[MAX_LDM_STM_OPS];
13653 rtx base_reg_rtx = NULL;
13654 int base_reg = -1;
13655 int i, ldm_case;
13657 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13658 easily extended if required. */
13659 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13661 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13663 /* Loop over the operands and check that the memory references are
13664 suitable (i.e. immediate offsets from the same base register). At
13665 the same time, extract the target register, and the memory
13666 offsets. */
13667 for (i = 0; i < nops; i++)
13669 rtx reg;
13670 rtx offset;
13672 /* Convert a subreg of a mem into the mem itself. */
13673 if (GET_CODE (operands[nops + i]) == SUBREG)
13674 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13676 gcc_assert (MEM_P (operands[nops + i]));
13678 /* Don't reorder volatile memory references; it doesn't seem worth
13679 looking for the case where the order is ok anyway. */
13680 if (MEM_VOLATILE_P (operands[nops + i]))
13681 return 0;
13683 offset = const0_rtx;
13685 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13686 || (GET_CODE (reg) == SUBREG
13687 && REG_P (reg = SUBREG_REG (reg))))
13688 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13689 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13690 || (GET_CODE (reg) == SUBREG
13691 && REG_P (reg = SUBREG_REG (reg))))
13692 && (CONST_INT_P (offset
13693 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13695 if (i == 0)
13697 base_reg = REGNO (reg);
13698 base_reg_rtx = reg;
13699 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13700 return 0;
13702 else if (base_reg != (int) REGNO (reg))
13703 /* Not addressed from the same base register. */
13704 return 0;
13706 unsorted_regs[i] = (REG_P (operands[i])
13707 ? REGNO (operands[i])
13708 : REGNO (SUBREG_REG (operands[i])));
13710 /* If it isn't an integer register, or if it overwrites the
13711 base register but isn't the last insn in the list, then
13712 we can't do this. */
13713 if (unsorted_regs[i] < 0
13714 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13715 || unsorted_regs[i] > 14
13716 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13717 return 0;
13719 /* Don't allow SP to be loaded unless it is also the base
13720 register. It guarantees that SP is reset correctly when
13721 an LDM instruction is interrupted. Otherwise, we might
13722 end up with a corrupt stack. */
13723 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13724 return 0;
13726 unsorted_offsets[i] = INTVAL (offset);
13727 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13728 order[0] = i;
13730 else
13731 /* Not a suitable memory address. */
13732 return 0;
13735 /* All the useful information has now been extracted from the
13736 operands into unsorted_regs and unsorted_offsets; additionally,
13737 order[0] has been set to the lowest offset in the list. Sort
13738 the offsets into order, verifying that they are adjacent, and
13739 check that the register numbers are ascending. */
13740 if (!compute_offset_order (nops, unsorted_offsets, order,
13741 check_regs ? unsorted_regs : NULL))
13742 return 0;
13744 if (saved_order)
13745 memcpy (saved_order, order, sizeof order);
13747 if (base)
13749 *base = base_reg;
13751 for (i = 0; i < nops; i++)
13752 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13754 *load_offset = unsorted_offsets[order[0]];
13757 if (TARGET_THUMB1
13758 && !peep2_reg_dead_p (nops, base_reg_rtx))
13759 return 0;
13761 if (unsorted_offsets[order[0]] == 0)
13762 ldm_case = 1; /* ldmia */
13763 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13764 ldm_case = 2; /* ldmib */
13765 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13766 ldm_case = 3; /* ldmda */
13767 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13768 ldm_case = 4; /* ldmdb */
13769 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13770 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13771 ldm_case = 5;
13772 else
13773 return 0;
13775 if (!multiple_operation_profitable_p (false, nops,
13776 ldm_case == 5
13777 ? unsorted_offsets[order[0]] : 0))
13778 return 0;
13780 return ldm_case;
13783 /* Used to determine in a peephole whether a sequence of store instructions can
13784 be changed into a store-multiple instruction.
13785 NOPS is the number of separate store instructions we are examining.
13786 NOPS_TOTAL is the total number of instructions recognized by the peephole
13787 pattern.
13788 The first NOPS entries in OPERANDS are the source registers, the next
13789 NOPS entries are memory operands. If this function is successful, *BASE is
13790 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13791 to the first memory location's offset from that base register. REGS is an
13792 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13793 likewise filled with the corresponding rtx's.
13794 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13795 numbers to an ascending order of stores.
13796 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13797 from ascending memory locations, and the function verifies that the register
13798 numbers are themselves ascending. If CHECK_REGS is false, the register
13799 numbers are stored in the order they are found in the operands. */
13800 static int
13801 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13802 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13803 HOST_WIDE_INT *load_offset, bool check_regs)
13805 int unsorted_regs[MAX_LDM_STM_OPS];
13806 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13807 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13808 int order[MAX_LDM_STM_OPS];
13809 int base_reg = -1;
13810 rtx base_reg_rtx = NULL;
13811 int i, stm_case;
13813 /* Write back of base register is currently only supported for Thumb 1. */
13814 int base_writeback = TARGET_THUMB1;
13816 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13817 easily extended if required. */
13818 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13820 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13822 /* Loop over the operands and check that the memory references are
13823 suitable (i.e. immediate offsets from the same base register). At
13824 the same time, extract the target register, and the memory
13825 offsets. */
13826 for (i = 0; i < nops; i++)
13828 rtx reg;
13829 rtx offset;
13831 /* Convert a subreg of a mem into the mem itself. */
13832 if (GET_CODE (operands[nops + i]) == SUBREG)
13833 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13835 gcc_assert (MEM_P (operands[nops + i]));
13837 /* Don't reorder volatile memory references; it doesn't seem worth
13838 looking for the case where the order is ok anyway. */
13839 if (MEM_VOLATILE_P (operands[nops + i]))
13840 return 0;
13842 offset = const0_rtx;
13844 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13845 || (GET_CODE (reg) == SUBREG
13846 && REG_P (reg = SUBREG_REG (reg))))
13847 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13848 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13849 || (GET_CODE (reg) == SUBREG
13850 && REG_P (reg = SUBREG_REG (reg))))
13851 && (CONST_INT_P (offset
13852 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13854 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13855 ? operands[i] : SUBREG_REG (operands[i]));
13856 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13858 if (i == 0)
13860 base_reg = REGNO (reg);
13861 base_reg_rtx = reg;
13862 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13863 return 0;
13865 else if (base_reg != (int) REGNO (reg))
13866 /* Not addressed from the same base register. */
13867 return 0;
13869 /* If it isn't an integer register, then we can't do this. */
13870 if (unsorted_regs[i] < 0
13871 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13872 /* The effects are unpredictable if the base register is
13873 both updated and stored. */
13874 || (base_writeback && unsorted_regs[i] == base_reg)
13875 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13876 || unsorted_regs[i] > 14)
13877 return 0;
13879 unsorted_offsets[i] = INTVAL (offset);
13880 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13881 order[0] = i;
13883 else
13884 /* Not a suitable memory address. */
13885 return 0;
13888 /* All the useful information has now been extracted from the
13889 operands into unsorted_regs and unsorted_offsets; additionally,
13890 order[0] has been set to the lowest offset in the list. Sort
13891 the offsets into order, verifying that they are adjacent, and
13892 check that the register numbers are ascending. */
13893 if (!compute_offset_order (nops, unsorted_offsets, order,
13894 check_regs ? unsorted_regs : NULL))
13895 return 0;
13897 if (saved_order)
13898 memcpy (saved_order, order, sizeof order);
13900 if (base)
13902 *base = base_reg;
13904 for (i = 0; i < nops; i++)
13906 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13907 if (reg_rtxs)
13908 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13911 *load_offset = unsorted_offsets[order[0]];
13914 if (TARGET_THUMB1
13915 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13916 return 0;
13918 if (unsorted_offsets[order[0]] == 0)
13919 stm_case = 1; /* stmia */
13920 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13921 stm_case = 2; /* stmib */
13922 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13923 stm_case = 3; /* stmda */
13924 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13925 stm_case = 4; /* stmdb */
13926 else
13927 return 0;
13929 if (!multiple_operation_profitable_p (false, nops, 0))
13930 return 0;
13932 return stm_case;
13935 /* Routines for use in generating RTL. */
13937 /* Generate a load-multiple instruction. COUNT is the number of loads in
13938 the instruction; REGS and MEMS are arrays containing the operands.
13939 BASEREG is the base register to be used in addressing the memory operands.
13940 WBACK_OFFSET is nonzero if the instruction should update the base
13941 register. */
13943 static rtx
13944 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13945 HOST_WIDE_INT wback_offset)
13947 int i = 0, j;
13948 rtx result;
13950 if (!multiple_operation_profitable_p (false, count, 0))
13952 rtx seq;
13954 start_sequence ();
13956 for (i = 0; i < count; i++)
13957 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13959 if (wback_offset != 0)
13960 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13962 seq = get_insns ();
13963 end_sequence ();
13965 return seq;
13968 result = gen_rtx_PARALLEL (VOIDmode,
13969 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13970 if (wback_offset != 0)
13972 XVECEXP (result, 0, 0)
13973 = gen_rtx_SET (VOIDmode, basereg,
13974 plus_constant (Pmode, basereg, wback_offset));
13975 i = 1;
13976 count++;
13979 for (j = 0; i < count; i++, j++)
13980 XVECEXP (result, 0, i)
13981 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13983 return result;
13986 /* Generate a store-multiple instruction. COUNT is the number of stores in
13987 the instruction; REGS and MEMS are arrays containing the operands.
13988 BASEREG is the base register to be used in addressing the memory operands.
13989 WBACK_OFFSET is nonzero if the instruction should update the base
13990 register. */
13992 static rtx
13993 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13994 HOST_WIDE_INT wback_offset)
13996 int i = 0, j;
13997 rtx result;
13999 if (GET_CODE (basereg) == PLUS)
14000 basereg = XEXP (basereg, 0);
14002 if (!multiple_operation_profitable_p (false, count, 0))
14004 rtx seq;
14006 start_sequence ();
14008 for (i = 0; i < count; i++)
14009 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14011 if (wback_offset != 0)
14012 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14014 seq = get_insns ();
14015 end_sequence ();
14017 return seq;
14020 result = gen_rtx_PARALLEL (VOIDmode,
14021 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14022 if (wback_offset != 0)
14024 XVECEXP (result, 0, 0)
14025 = gen_rtx_SET (VOIDmode, basereg,
14026 plus_constant (Pmode, basereg, wback_offset));
14027 i = 1;
14028 count++;
14031 for (j = 0; i < count; i++, j++)
14032 XVECEXP (result, 0, i)
14033 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14035 return result;
14038 /* Generate either a load-multiple or a store-multiple instruction. This
14039 function can be used in situations where we can start with a single MEM
14040 rtx and adjust its address upwards.
14041 COUNT is the number of operations in the instruction, not counting a
14042 possible update of the base register. REGS is an array containing the
14043 register operands.
14044 BASEREG is the base register to be used in addressing the memory operands,
14045 which are constructed from BASEMEM.
14046 WRITE_BACK specifies whether the generated instruction should include an
14047 update of the base register.
14048 OFFSETP is used to pass an offset to and from this function; this offset
14049 is not used when constructing the address (instead BASEMEM should have an
14050 appropriate offset in its address), it is used only for setting
14051 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14053 static rtx
14054 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14055 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14057 rtx mems[MAX_LDM_STM_OPS];
14058 HOST_WIDE_INT offset = *offsetp;
14059 int i;
14061 gcc_assert (count <= MAX_LDM_STM_OPS);
14063 if (GET_CODE (basereg) == PLUS)
14064 basereg = XEXP (basereg, 0);
14066 for (i = 0; i < count; i++)
14068 rtx addr = plus_constant (Pmode, basereg, i * 4);
14069 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14070 offset += 4;
14073 if (write_back)
14074 *offsetp = offset;
14076 if (is_load)
14077 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14078 write_back ? 4 * count : 0);
14079 else
14080 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14081 write_back ? 4 * count : 0);
14085 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14086 rtx basemem, HOST_WIDE_INT *offsetp)
14088 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14089 offsetp);
14093 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14094 rtx basemem, HOST_WIDE_INT *offsetp)
14096 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14097 offsetp);
14100 /* Called from a peephole2 expander to turn a sequence of loads into an
14101 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14102 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14103 is true if we can reorder the registers because they are used commutatively
14104 subsequently.
14105 Returns true iff we could generate a new instruction. */
14107 bool
14108 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14110 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14111 rtx mems[MAX_LDM_STM_OPS];
14112 int i, j, base_reg;
14113 rtx base_reg_rtx;
14114 HOST_WIDE_INT offset;
14115 int write_back = FALSE;
14116 int ldm_case;
14117 rtx addr;
14119 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14120 &base_reg, &offset, !sort_regs);
14122 if (ldm_case == 0)
14123 return false;
14125 if (sort_regs)
14126 for (i = 0; i < nops - 1; i++)
14127 for (j = i + 1; j < nops; j++)
14128 if (regs[i] > regs[j])
14130 int t = regs[i];
14131 regs[i] = regs[j];
14132 regs[j] = t;
14134 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14136 if (TARGET_THUMB1)
14138 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14139 gcc_assert (ldm_case == 1 || ldm_case == 5);
14140 write_back = TRUE;
14143 if (ldm_case == 5)
14145 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14146 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14147 offset = 0;
14148 if (!TARGET_THUMB1)
14150 base_reg = regs[0];
14151 base_reg_rtx = newbase;
14155 for (i = 0; i < nops; i++)
14157 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14158 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14159 SImode, addr, 0);
14161 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14162 write_back ? offset + i * 4 : 0));
14163 return true;
14166 /* Called from a peephole2 expander to turn a sequence of stores into an
14167 STM instruction. OPERANDS are the operands found by the peephole matcher;
14168 NOPS indicates how many separate stores we are trying to combine.
14169 Returns true iff we could generate a new instruction. */
14171 bool
14172 gen_stm_seq (rtx *operands, int nops)
14174 int i;
14175 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14176 rtx mems[MAX_LDM_STM_OPS];
14177 int base_reg;
14178 rtx base_reg_rtx;
14179 HOST_WIDE_INT offset;
14180 int write_back = FALSE;
14181 int stm_case;
14182 rtx addr;
14183 bool base_reg_dies;
14185 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14186 mem_order, &base_reg, &offset, true);
14188 if (stm_case == 0)
14189 return false;
14191 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14193 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14194 if (TARGET_THUMB1)
14196 gcc_assert (base_reg_dies);
14197 write_back = TRUE;
14200 if (stm_case == 5)
14202 gcc_assert (base_reg_dies);
14203 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14204 offset = 0;
14207 addr = plus_constant (Pmode, base_reg_rtx, offset);
14209 for (i = 0; i < nops; i++)
14211 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14212 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14213 SImode, addr, 0);
14215 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14216 write_back ? offset + i * 4 : 0));
14217 return true;
14220 /* Called from a peephole2 expander to turn a sequence of stores that are
14221 preceded by constant loads into an STM instruction. OPERANDS are the
14222 operands found by the peephole matcher; NOPS indicates how many
14223 separate stores we are trying to combine; there are 2 * NOPS
14224 instructions in the peephole.
14225 Returns true iff we could generate a new instruction. */
14227 bool
14228 gen_const_stm_seq (rtx *operands, int nops)
14230 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14231 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14232 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14233 rtx mems[MAX_LDM_STM_OPS];
14234 int base_reg;
14235 rtx base_reg_rtx;
14236 HOST_WIDE_INT offset;
14237 int write_back = FALSE;
14238 int stm_case;
14239 rtx addr;
14240 bool base_reg_dies;
14241 int i, j;
14242 HARD_REG_SET allocated;
14244 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14245 mem_order, &base_reg, &offset, false);
14247 if (stm_case == 0)
14248 return false;
14250 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14252 /* If the same register is used more than once, try to find a free
14253 register. */
14254 CLEAR_HARD_REG_SET (allocated);
14255 for (i = 0; i < nops; i++)
14257 for (j = i + 1; j < nops; j++)
14258 if (regs[i] == regs[j])
14260 rtx t = peep2_find_free_register (0, nops * 2,
14261 TARGET_THUMB1 ? "l" : "r",
14262 SImode, &allocated);
14263 if (t == NULL_RTX)
14264 return false;
14265 reg_rtxs[i] = t;
14266 regs[i] = REGNO (t);
14270 /* Compute an ordering that maps the register numbers to an ascending
14271 sequence. */
14272 reg_order[0] = 0;
14273 for (i = 0; i < nops; i++)
14274 if (regs[i] < regs[reg_order[0]])
14275 reg_order[0] = i;
14277 for (i = 1; i < nops; i++)
14279 int this_order = reg_order[i - 1];
14280 for (j = 0; j < nops; j++)
14281 if (regs[j] > regs[reg_order[i - 1]]
14282 && (this_order == reg_order[i - 1]
14283 || regs[j] < regs[this_order]))
14284 this_order = j;
14285 reg_order[i] = this_order;
14288 /* Ensure that registers that must be live after the instruction end
14289 up with the correct value. */
14290 for (i = 0; i < nops; i++)
14292 int this_order = reg_order[i];
14293 if ((this_order != mem_order[i]
14294 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14295 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14296 return false;
14299 /* Load the constants. */
14300 for (i = 0; i < nops; i++)
14302 rtx op = operands[2 * nops + mem_order[i]];
14303 sorted_regs[i] = regs[reg_order[i]];
14304 emit_move_insn (reg_rtxs[reg_order[i]], op);
14307 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14309 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14310 if (TARGET_THUMB1)
14312 gcc_assert (base_reg_dies);
14313 write_back = TRUE;
14316 if (stm_case == 5)
14318 gcc_assert (base_reg_dies);
14319 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14320 offset = 0;
14323 addr = plus_constant (Pmode, base_reg_rtx, offset);
14325 for (i = 0; i < nops; i++)
14327 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14328 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14329 SImode, addr, 0);
14331 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14332 write_back ? offset + i * 4 : 0));
14333 return true;
14336 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14337 unaligned copies on processors which support unaligned semantics for those
14338 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14339 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14340 An interleave factor of 1 (the minimum) will perform no interleaving.
14341 Load/store multiple are used for aligned addresses where possible. */
14343 static void
14344 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14345 HOST_WIDE_INT length,
14346 unsigned int interleave_factor)
14348 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14349 int *regnos = XALLOCAVEC (int, interleave_factor);
14350 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14351 HOST_WIDE_INT i, j;
14352 HOST_WIDE_INT remaining = length, words;
14353 rtx halfword_tmp = NULL, byte_tmp = NULL;
14354 rtx dst, src;
14355 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14356 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14357 HOST_WIDE_INT srcoffset, dstoffset;
14358 HOST_WIDE_INT src_autoinc, dst_autoinc;
14359 rtx mem, addr;
14361 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14363 /* Use hard registers if we have aligned source or destination so we can use
14364 load/store multiple with contiguous registers. */
14365 if (dst_aligned || src_aligned)
14366 for (i = 0; i < interleave_factor; i++)
14367 regs[i] = gen_rtx_REG (SImode, i);
14368 else
14369 for (i = 0; i < interleave_factor; i++)
14370 regs[i] = gen_reg_rtx (SImode);
14372 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14373 src = copy_addr_to_reg (XEXP (srcbase, 0));
14375 srcoffset = dstoffset = 0;
14377 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14378 For copying the last bytes we want to subtract this offset again. */
14379 src_autoinc = dst_autoinc = 0;
14381 for (i = 0; i < interleave_factor; i++)
14382 regnos[i] = i;
14384 /* Copy BLOCK_SIZE_BYTES chunks. */
14386 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14388 /* Load words. */
14389 if (src_aligned && interleave_factor > 1)
14391 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14392 TRUE, srcbase, &srcoffset));
14393 src_autoinc += UNITS_PER_WORD * interleave_factor;
14395 else
14397 for (j = 0; j < interleave_factor; j++)
14399 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14400 - src_autoinc));
14401 mem = adjust_automodify_address (srcbase, SImode, addr,
14402 srcoffset + j * UNITS_PER_WORD);
14403 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14405 srcoffset += block_size_bytes;
14408 /* Store words. */
14409 if (dst_aligned && interleave_factor > 1)
14411 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14412 TRUE, dstbase, &dstoffset));
14413 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14415 else
14417 for (j = 0; j < interleave_factor; j++)
14419 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14420 - dst_autoinc));
14421 mem = adjust_automodify_address (dstbase, SImode, addr,
14422 dstoffset + j * UNITS_PER_WORD);
14423 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14425 dstoffset += block_size_bytes;
14428 remaining -= block_size_bytes;
14431 /* Copy any whole words left (note these aren't interleaved with any
14432 subsequent halfword/byte load/stores in the interests of simplicity). */
14434 words = remaining / UNITS_PER_WORD;
14436 gcc_assert (words < interleave_factor);
14438 if (src_aligned && words > 1)
14440 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14441 &srcoffset));
14442 src_autoinc += UNITS_PER_WORD * words;
14444 else
14446 for (j = 0; j < words; j++)
14448 addr = plus_constant (Pmode, src,
14449 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14450 mem = adjust_automodify_address (srcbase, SImode, addr,
14451 srcoffset + j * UNITS_PER_WORD);
14452 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14454 srcoffset += words * UNITS_PER_WORD;
14457 if (dst_aligned && words > 1)
14459 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14460 &dstoffset));
14461 dst_autoinc += words * UNITS_PER_WORD;
14463 else
14465 for (j = 0; j < words; j++)
14467 addr = plus_constant (Pmode, dst,
14468 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14469 mem = adjust_automodify_address (dstbase, SImode, addr,
14470 dstoffset + j * UNITS_PER_WORD);
14471 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14473 dstoffset += words * UNITS_PER_WORD;
14476 remaining -= words * UNITS_PER_WORD;
14478 gcc_assert (remaining < 4);
14480 /* Copy a halfword if necessary. */
14482 if (remaining >= 2)
14484 halfword_tmp = gen_reg_rtx (SImode);
14486 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14487 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14488 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14490 /* Either write out immediately, or delay until we've loaded the last
14491 byte, depending on interleave factor. */
14492 if (interleave_factor == 1)
14494 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14495 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14496 emit_insn (gen_unaligned_storehi (mem,
14497 gen_lowpart (HImode, halfword_tmp)));
14498 halfword_tmp = NULL;
14499 dstoffset += 2;
14502 remaining -= 2;
14503 srcoffset += 2;
14506 gcc_assert (remaining < 2);
14508 /* Copy last byte. */
14510 if ((remaining & 1) != 0)
14512 byte_tmp = gen_reg_rtx (SImode);
14514 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14515 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14516 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14518 if (interleave_factor == 1)
14520 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14521 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14522 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14523 byte_tmp = NULL;
14524 dstoffset++;
14527 remaining--;
14528 srcoffset++;
14531 /* Store last halfword if we haven't done so already. */
14533 if (halfword_tmp)
14535 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14536 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14537 emit_insn (gen_unaligned_storehi (mem,
14538 gen_lowpart (HImode, halfword_tmp)));
14539 dstoffset += 2;
14542 /* Likewise for last byte. */
14544 if (byte_tmp)
14546 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14547 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14548 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14549 dstoffset++;
14552 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14555 /* From mips_adjust_block_mem:
14557 Helper function for doing a loop-based block operation on memory
14558 reference MEM. Each iteration of the loop will operate on LENGTH
14559 bytes of MEM.
14561 Create a new base register for use within the loop and point it to
14562 the start of MEM. Create a new memory reference that uses this
14563 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14565 static void
14566 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14567 rtx *loop_mem)
14569 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14571 /* Although the new mem does not refer to a known location,
14572 it does keep up to LENGTH bytes of alignment. */
14573 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14574 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14577 /* From mips_block_move_loop:
14579 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14580 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14581 the memory regions do not overlap. */
14583 static void
14584 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14585 unsigned int interleave_factor,
14586 HOST_WIDE_INT bytes_per_iter)
14588 rtx src_reg, dest_reg, final_src, test;
14589 HOST_WIDE_INT leftover;
14591 leftover = length % bytes_per_iter;
14592 length -= leftover;
14594 /* Create registers and memory references for use within the loop. */
14595 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14596 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14598 /* Calculate the value that SRC_REG should have after the last iteration of
14599 the loop. */
14600 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14601 0, 0, OPTAB_WIDEN);
14603 /* Emit the start of the loop. */
14604 rtx_code_label *label = gen_label_rtx ();
14605 emit_label (label);
14607 /* Emit the loop body. */
14608 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14609 interleave_factor);
14611 /* Move on to the next block. */
14612 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14613 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14615 /* Emit the loop condition. */
14616 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14617 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14619 /* Mop up any left-over bytes. */
14620 if (leftover)
14621 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14624 /* Emit a block move when either the source or destination is unaligned (not
14625 aligned to a four-byte boundary). This may need further tuning depending on
14626 core type, optimize_size setting, etc. */
14628 static int
14629 arm_movmemqi_unaligned (rtx *operands)
14631 HOST_WIDE_INT length = INTVAL (operands[2]);
14633 if (optimize_size)
14635 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14636 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14637 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14638 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14639 or dst_aligned though: allow more interleaving in those cases since the
14640 resulting code can be smaller. */
14641 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14642 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14644 if (length > 12)
14645 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14646 interleave_factor, bytes_per_iter);
14647 else
14648 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14649 interleave_factor);
14651 else
14653 /* Note that the loop created by arm_block_move_unaligned_loop may be
14654 subject to loop unrolling, which makes tuning this condition a little
14655 redundant. */
14656 if (length > 32)
14657 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14658 else
14659 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14662 return 1;
14666 arm_gen_movmemqi (rtx *operands)
14668 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14669 HOST_WIDE_INT srcoffset, dstoffset;
14670 int i;
14671 rtx src, dst, srcbase, dstbase;
14672 rtx part_bytes_reg = NULL;
14673 rtx mem;
14675 if (!CONST_INT_P (operands[2])
14676 || !CONST_INT_P (operands[3])
14677 || INTVAL (operands[2]) > 64)
14678 return 0;
14680 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14681 return arm_movmemqi_unaligned (operands);
14683 if (INTVAL (operands[3]) & 3)
14684 return 0;
14686 dstbase = operands[0];
14687 srcbase = operands[1];
14689 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14690 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14692 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14693 out_words_to_go = INTVAL (operands[2]) / 4;
14694 last_bytes = INTVAL (operands[2]) & 3;
14695 dstoffset = srcoffset = 0;
14697 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14698 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14700 for (i = 0; in_words_to_go >= 2; i+=4)
14702 if (in_words_to_go > 4)
14703 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14704 TRUE, srcbase, &srcoffset));
14705 else
14706 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14707 src, FALSE, srcbase,
14708 &srcoffset));
14710 if (out_words_to_go)
14712 if (out_words_to_go > 4)
14713 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14714 TRUE, dstbase, &dstoffset));
14715 else if (out_words_to_go != 1)
14716 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14717 out_words_to_go, dst,
14718 (last_bytes == 0
14719 ? FALSE : TRUE),
14720 dstbase, &dstoffset));
14721 else
14723 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14724 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14725 if (last_bytes != 0)
14727 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14728 dstoffset += 4;
14733 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14734 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14737 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14738 if (out_words_to_go)
14740 rtx sreg;
14742 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14743 sreg = copy_to_reg (mem);
14745 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14746 emit_move_insn (mem, sreg);
14747 in_words_to_go--;
14749 gcc_assert (!in_words_to_go); /* Sanity check */
14752 if (in_words_to_go)
14754 gcc_assert (in_words_to_go > 0);
14756 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14757 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14760 gcc_assert (!last_bytes || part_bytes_reg);
14762 if (BYTES_BIG_ENDIAN && last_bytes)
14764 rtx tmp = gen_reg_rtx (SImode);
14766 /* The bytes we want are in the top end of the word. */
14767 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14768 GEN_INT (8 * (4 - last_bytes))));
14769 part_bytes_reg = tmp;
14771 while (last_bytes)
14773 mem = adjust_automodify_address (dstbase, QImode,
14774 plus_constant (Pmode, dst,
14775 last_bytes - 1),
14776 dstoffset + last_bytes - 1);
14777 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14779 if (--last_bytes)
14781 tmp = gen_reg_rtx (SImode);
14782 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14783 part_bytes_reg = tmp;
14788 else
14790 if (last_bytes > 1)
14792 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14793 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14794 last_bytes -= 2;
14795 if (last_bytes)
14797 rtx tmp = gen_reg_rtx (SImode);
14798 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14799 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14800 part_bytes_reg = tmp;
14801 dstoffset += 2;
14805 if (last_bytes)
14807 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14808 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14812 return 1;
14815 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14816 by mode size. */
14817 inline static rtx
14818 next_consecutive_mem (rtx mem)
14820 machine_mode mode = GET_MODE (mem);
14821 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14822 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14824 return adjust_automodify_address (mem, mode, addr, offset);
14827 /* Copy using LDRD/STRD instructions whenever possible.
14828 Returns true upon success. */
14829 bool
14830 gen_movmem_ldrd_strd (rtx *operands)
14832 unsigned HOST_WIDE_INT len;
14833 HOST_WIDE_INT align;
14834 rtx src, dst, base;
14835 rtx reg0;
14836 bool src_aligned, dst_aligned;
14837 bool src_volatile, dst_volatile;
14839 gcc_assert (CONST_INT_P (operands[2]));
14840 gcc_assert (CONST_INT_P (operands[3]));
14842 len = UINTVAL (operands[2]);
14843 if (len > 64)
14844 return false;
14846 /* Maximum alignment we can assume for both src and dst buffers. */
14847 align = INTVAL (operands[3]);
14849 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14850 return false;
14852 /* Place src and dst addresses in registers
14853 and update the corresponding mem rtx. */
14854 dst = operands[0];
14855 dst_volatile = MEM_VOLATILE_P (dst);
14856 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14857 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14858 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14860 src = operands[1];
14861 src_volatile = MEM_VOLATILE_P (src);
14862 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14863 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14864 src = adjust_automodify_address (src, VOIDmode, base, 0);
14866 if (!unaligned_access && !(src_aligned && dst_aligned))
14867 return false;
14869 if (src_volatile || dst_volatile)
14870 return false;
14872 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14873 if (!(dst_aligned || src_aligned))
14874 return arm_gen_movmemqi (operands);
14876 src = adjust_address (src, DImode, 0);
14877 dst = adjust_address (dst, DImode, 0);
14878 while (len >= 8)
14880 len -= 8;
14881 reg0 = gen_reg_rtx (DImode);
14882 if (src_aligned)
14883 emit_move_insn (reg0, src);
14884 else
14885 emit_insn (gen_unaligned_loaddi (reg0, src));
14887 if (dst_aligned)
14888 emit_move_insn (dst, reg0);
14889 else
14890 emit_insn (gen_unaligned_storedi (dst, reg0));
14892 src = next_consecutive_mem (src);
14893 dst = next_consecutive_mem (dst);
14896 gcc_assert (len < 8);
14897 if (len >= 4)
14899 /* More than a word but less than a double-word to copy. Copy a word. */
14900 reg0 = gen_reg_rtx (SImode);
14901 src = adjust_address (src, SImode, 0);
14902 dst = adjust_address (dst, SImode, 0);
14903 if (src_aligned)
14904 emit_move_insn (reg0, src);
14905 else
14906 emit_insn (gen_unaligned_loadsi (reg0, src));
14908 if (dst_aligned)
14909 emit_move_insn (dst, reg0);
14910 else
14911 emit_insn (gen_unaligned_storesi (dst, reg0));
14913 src = next_consecutive_mem (src);
14914 dst = next_consecutive_mem (dst);
14915 len -= 4;
14918 if (len == 0)
14919 return true;
14921 /* Copy the remaining bytes. */
14922 if (len >= 2)
14924 dst = adjust_address (dst, HImode, 0);
14925 src = adjust_address (src, HImode, 0);
14926 reg0 = gen_reg_rtx (SImode);
14927 if (src_aligned)
14928 emit_insn (gen_zero_extendhisi2 (reg0, src));
14929 else
14930 emit_insn (gen_unaligned_loadhiu (reg0, src));
14932 if (dst_aligned)
14933 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14934 else
14935 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14937 src = next_consecutive_mem (src);
14938 dst = next_consecutive_mem (dst);
14939 if (len == 2)
14940 return true;
14943 dst = adjust_address (dst, QImode, 0);
14944 src = adjust_address (src, QImode, 0);
14945 reg0 = gen_reg_rtx (QImode);
14946 emit_move_insn (reg0, src);
14947 emit_move_insn (dst, reg0);
14948 return true;
14951 /* Select a dominance comparison mode if possible for a test of the general
14952 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14953 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14954 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14955 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14956 In all cases OP will be either EQ or NE, but we don't need to know which
14957 here. If we are unable to support a dominance comparison we return
14958 CC mode. This will then fail to match for the RTL expressions that
14959 generate this call. */
14960 machine_mode
14961 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14963 enum rtx_code cond1, cond2;
14964 int swapped = 0;
14966 /* Currently we will probably get the wrong result if the individual
14967 comparisons are not simple. This also ensures that it is safe to
14968 reverse a comparison if necessary. */
14969 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14970 != CCmode)
14971 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14972 != CCmode))
14973 return CCmode;
14975 /* The if_then_else variant of this tests the second condition if the
14976 first passes, but is true if the first fails. Reverse the first
14977 condition to get a true "inclusive-or" expression. */
14978 if (cond_or == DOM_CC_NX_OR_Y)
14979 cond1 = reverse_condition (cond1);
14981 /* If the comparisons are not equal, and one doesn't dominate the other,
14982 then we can't do this. */
14983 if (cond1 != cond2
14984 && !comparison_dominates_p (cond1, cond2)
14985 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14986 return CCmode;
14988 if (swapped)
14990 enum rtx_code temp = cond1;
14991 cond1 = cond2;
14992 cond2 = temp;
14995 switch (cond1)
14997 case EQ:
14998 if (cond_or == DOM_CC_X_AND_Y)
14999 return CC_DEQmode;
15001 switch (cond2)
15003 case EQ: return CC_DEQmode;
15004 case LE: return CC_DLEmode;
15005 case LEU: return CC_DLEUmode;
15006 case GE: return CC_DGEmode;
15007 case GEU: return CC_DGEUmode;
15008 default: gcc_unreachable ();
15011 case LT:
15012 if (cond_or == DOM_CC_X_AND_Y)
15013 return CC_DLTmode;
15015 switch (cond2)
15017 case LT:
15018 return CC_DLTmode;
15019 case LE:
15020 return CC_DLEmode;
15021 case NE:
15022 return CC_DNEmode;
15023 default:
15024 gcc_unreachable ();
15027 case GT:
15028 if (cond_or == DOM_CC_X_AND_Y)
15029 return CC_DGTmode;
15031 switch (cond2)
15033 case GT:
15034 return CC_DGTmode;
15035 case GE:
15036 return CC_DGEmode;
15037 case NE:
15038 return CC_DNEmode;
15039 default:
15040 gcc_unreachable ();
15043 case LTU:
15044 if (cond_or == DOM_CC_X_AND_Y)
15045 return CC_DLTUmode;
15047 switch (cond2)
15049 case LTU:
15050 return CC_DLTUmode;
15051 case LEU:
15052 return CC_DLEUmode;
15053 case NE:
15054 return CC_DNEmode;
15055 default:
15056 gcc_unreachable ();
15059 case GTU:
15060 if (cond_or == DOM_CC_X_AND_Y)
15061 return CC_DGTUmode;
15063 switch (cond2)
15065 case GTU:
15066 return CC_DGTUmode;
15067 case GEU:
15068 return CC_DGEUmode;
15069 case NE:
15070 return CC_DNEmode;
15071 default:
15072 gcc_unreachable ();
15075 /* The remaining cases only occur when both comparisons are the
15076 same. */
15077 case NE:
15078 gcc_assert (cond1 == cond2);
15079 return CC_DNEmode;
15081 case LE:
15082 gcc_assert (cond1 == cond2);
15083 return CC_DLEmode;
15085 case GE:
15086 gcc_assert (cond1 == cond2);
15087 return CC_DGEmode;
15089 case LEU:
15090 gcc_assert (cond1 == cond2);
15091 return CC_DLEUmode;
15093 case GEU:
15094 gcc_assert (cond1 == cond2);
15095 return CC_DGEUmode;
15097 default:
15098 gcc_unreachable ();
15102 machine_mode
15103 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15105 /* All floating point compares return CCFP if it is an equality
15106 comparison, and CCFPE otherwise. */
15107 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15109 switch (op)
15111 case EQ:
15112 case NE:
15113 case UNORDERED:
15114 case ORDERED:
15115 case UNLT:
15116 case UNLE:
15117 case UNGT:
15118 case UNGE:
15119 case UNEQ:
15120 case LTGT:
15121 return CCFPmode;
15123 case LT:
15124 case LE:
15125 case GT:
15126 case GE:
15127 return CCFPEmode;
15129 default:
15130 gcc_unreachable ();
15134 /* A compare with a shifted operand. Because of canonicalization, the
15135 comparison will have to be swapped when we emit the assembler. */
15136 if (GET_MODE (y) == SImode
15137 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15138 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15139 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15140 || GET_CODE (x) == ROTATERT))
15141 return CC_SWPmode;
15143 /* This operation is performed swapped, but since we only rely on the Z
15144 flag we don't need an additional mode. */
15145 if (GET_MODE (y) == SImode
15146 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15147 && GET_CODE (x) == NEG
15148 && (op == EQ || op == NE))
15149 return CC_Zmode;
15151 /* This is a special case that is used by combine to allow a
15152 comparison of a shifted byte load to be split into a zero-extend
15153 followed by a comparison of the shifted integer (only valid for
15154 equalities and unsigned inequalities). */
15155 if (GET_MODE (x) == SImode
15156 && GET_CODE (x) == ASHIFT
15157 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15158 && GET_CODE (XEXP (x, 0)) == SUBREG
15159 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15160 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15161 && (op == EQ || op == NE
15162 || op == GEU || op == GTU || op == LTU || op == LEU)
15163 && CONST_INT_P (y))
15164 return CC_Zmode;
15166 /* A construct for a conditional compare, if the false arm contains
15167 0, then both conditions must be true, otherwise either condition
15168 must be true. Not all conditions are possible, so CCmode is
15169 returned if it can't be done. */
15170 if (GET_CODE (x) == IF_THEN_ELSE
15171 && (XEXP (x, 2) == const0_rtx
15172 || XEXP (x, 2) == const1_rtx)
15173 && COMPARISON_P (XEXP (x, 0))
15174 && COMPARISON_P (XEXP (x, 1)))
15175 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15176 INTVAL (XEXP (x, 2)));
15178 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15179 if (GET_CODE (x) == AND
15180 && (op == EQ || op == NE)
15181 && COMPARISON_P (XEXP (x, 0))
15182 && COMPARISON_P (XEXP (x, 1)))
15183 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15184 DOM_CC_X_AND_Y);
15186 if (GET_CODE (x) == IOR
15187 && (op == EQ || op == NE)
15188 && COMPARISON_P (XEXP (x, 0))
15189 && COMPARISON_P (XEXP (x, 1)))
15190 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15191 DOM_CC_X_OR_Y);
15193 /* An operation (on Thumb) where we want to test for a single bit.
15194 This is done by shifting that bit up into the top bit of a
15195 scratch register; we can then branch on the sign bit. */
15196 if (TARGET_THUMB1
15197 && GET_MODE (x) == SImode
15198 && (op == EQ || op == NE)
15199 && GET_CODE (x) == ZERO_EXTRACT
15200 && XEXP (x, 1) == const1_rtx)
15201 return CC_Nmode;
15203 /* An operation that sets the condition codes as a side-effect, the
15204 V flag is not set correctly, so we can only use comparisons where
15205 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15206 instead.) */
15207 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15208 if (GET_MODE (x) == SImode
15209 && y == const0_rtx
15210 && (op == EQ || op == NE || op == LT || op == GE)
15211 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15212 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15213 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15214 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15215 || GET_CODE (x) == LSHIFTRT
15216 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15217 || GET_CODE (x) == ROTATERT
15218 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15219 return CC_NOOVmode;
15221 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15222 return CC_Zmode;
15224 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15225 && GET_CODE (x) == PLUS
15226 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15227 return CC_Cmode;
15229 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15231 switch (op)
15233 case EQ:
15234 case NE:
15235 /* A DImode comparison against zero can be implemented by
15236 or'ing the two halves together. */
15237 if (y == const0_rtx)
15238 return CC_Zmode;
15240 /* We can do an equality test in three Thumb instructions. */
15241 if (!TARGET_32BIT)
15242 return CC_Zmode;
15244 /* FALLTHROUGH */
15246 case LTU:
15247 case LEU:
15248 case GTU:
15249 case GEU:
15250 /* DImode unsigned comparisons can be implemented by cmp +
15251 cmpeq without a scratch register. Not worth doing in
15252 Thumb-2. */
15253 if (TARGET_32BIT)
15254 return CC_CZmode;
15256 /* FALLTHROUGH */
15258 case LT:
15259 case LE:
15260 case GT:
15261 case GE:
15262 /* DImode signed and unsigned comparisons can be implemented
15263 by cmp + sbcs with a scratch register, but that does not
15264 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15265 gcc_assert (op != EQ && op != NE);
15266 return CC_NCVmode;
15268 default:
15269 gcc_unreachable ();
15273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15274 return GET_MODE (x);
15276 return CCmode;
15279 /* X and Y are two things to compare using CODE. Emit the compare insn and
15280 return the rtx for register 0 in the proper mode. FP means this is a
15281 floating point compare: I don't think that it is needed on the arm. */
15283 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15285 machine_mode mode;
15286 rtx cc_reg;
15287 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15289 /* We might have X as a constant, Y as a register because of the predicates
15290 used for cmpdi. If so, force X to a register here. */
15291 if (dimode_comparison && !REG_P (x))
15292 x = force_reg (DImode, x);
15294 mode = SELECT_CC_MODE (code, x, y);
15295 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15297 if (dimode_comparison
15298 && mode != CC_CZmode)
15300 rtx clobber, set;
15302 /* To compare two non-zero values for equality, XOR them and
15303 then compare against zero. Not used for ARM mode; there
15304 CC_CZmode is cheaper. */
15305 if (mode == CC_Zmode && y != const0_rtx)
15307 gcc_assert (!reload_completed);
15308 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15309 y = const0_rtx;
15312 /* A scratch register is required. */
15313 if (reload_completed)
15314 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15315 else
15316 scratch = gen_rtx_SCRATCH (SImode);
15318 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15319 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15320 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15322 else
15323 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15325 return cc_reg;
15328 /* Generate a sequence of insns that will generate the correct return
15329 address mask depending on the physical architecture that the program
15330 is running on. */
15332 arm_gen_return_addr_mask (void)
15334 rtx reg = gen_reg_rtx (Pmode);
15336 emit_insn (gen_return_addr_mask (reg));
15337 return reg;
15340 void
15341 arm_reload_in_hi (rtx *operands)
15343 rtx ref = operands[1];
15344 rtx base, scratch;
15345 HOST_WIDE_INT offset = 0;
15347 if (GET_CODE (ref) == SUBREG)
15349 offset = SUBREG_BYTE (ref);
15350 ref = SUBREG_REG (ref);
15353 if (REG_P (ref))
15355 /* We have a pseudo which has been spilt onto the stack; there
15356 are two cases here: the first where there is a simple
15357 stack-slot replacement and a second where the stack-slot is
15358 out of range, or is used as a subreg. */
15359 if (reg_equiv_mem (REGNO (ref)))
15361 ref = reg_equiv_mem (REGNO (ref));
15362 base = find_replacement (&XEXP (ref, 0));
15364 else
15365 /* The slot is out of range, or was dressed up in a SUBREG. */
15366 base = reg_equiv_address (REGNO (ref));
15368 else
15369 base = find_replacement (&XEXP (ref, 0));
15371 /* Handle the case where the address is too complex to be offset by 1. */
15372 if (GET_CODE (base) == MINUS
15373 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15375 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15377 emit_set_insn (base_plus, base);
15378 base = base_plus;
15380 else if (GET_CODE (base) == PLUS)
15382 /* The addend must be CONST_INT, or we would have dealt with it above. */
15383 HOST_WIDE_INT hi, lo;
15385 offset += INTVAL (XEXP (base, 1));
15386 base = XEXP (base, 0);
15388 /* Rework the address into a legal sequence of insns. */
15389 /* Valid range for lo is -4095 -> 4095 */
15390 lo = (offset >= 0
15391 ? (offset & 0xfff)
15392 : -((-offset) & 0xfff));
15394 /* Corner case, if lo is the max offset then we would be out of range
15395 once we have added the additional 1 below, so bump the msb into the
15396 pre-loading insn(s). */
15397 if (lo == 4095)
15398 lo &= 0x7ff;
15400 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15401 ^ (HOST_WIDE_INT) 0x80000000)
15402 - (HOST_WIDE_INT) 0x80000000);
15404 gcc_assert (hi + lo == offset);
15406 if (hi != 0)
15408 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15410 /* Get the base address; addsi3 knows how to handle constants
15411 that require more than one insn. */
15412 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15413 base = base_plus;
15414 offset = lo;
15418 /* Operands[2] may overlap operands[0] (though it won't overlap
15419 operands[1]), that's why we asked for a DImode reg -- so we can
15420 use the bit that does not overlap. */
15421 if (REGNO (operands[2]) == REGNO (operands[0]))
15422 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15423 else
15424 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15426 emit_insn (gen_zero_extendqisi2 (scratch,
15427 gen_rtx_MEM (QImode,
15428 plus_constant (Pmode, base,
15429 offset))));
15430 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15431 gen_rtx_MEM (QImode,
15432 plus_constant (Pmode, base,
15433 offset + 1))));
15434 if (!BYTES_BIG_ENDIAN)
15435 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15436 gen_rtx_IOR (SImode,
15437 gen_rtx_ASHIFT
15438 (SImode,
15439 gen_rtx_SUBREG (SImode, operands[0], 0),
15440 GEN_INT (8)),
15441 scratch));
15442 else
15443 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15444 gen_rtx_IOR (SImode,
15445 gen_rtx_ASHIFT (SImode, scratch,
15446 GEN_INT (8)),
15447 gen_rtx_SUBREG (SImode, operands[0], 0)));
15450 /* Handle storing a half-word to memory during reload by synthesizing as two
15451 byte stores. Take care not to clobber the input values until after we
15452 have moved them somewhere safe. This code assumes that if the DImode
15453 scratch in operands[2] overlaps either the input value or output address
15454 in some way, then that value must die in this insn (we absolutely need
15455 two scratch registers for some corner cases). */
15456 void
15457 arm_reload_out_hi (rtx *operands)
15459 rtx ref = operands[0];
15460 rtx outval = operands[1];
15461 rtx base, scratch;
15462 HOST_WIDE_INT offset = 0;
15464 if (GET_CODE (ref) == SUBREG)
15466 offset = SUBREG_BYTE (ref);
15467 ref = SUBREG_REG (ref);
15470 if (REG_P (ref))
15472 /* We have a pseudo which has been spilt onto the stack; there
15473 are two cases here: the first where there is a simple
15474 stack-slot replacement and a second where the stack-slot is
15475 out of range, or is used as a subreg. */
15476 if (reg_equiv_mem (REGNO (ref)))
15478 ref = reg_equiv_mem (REGNO (ref));
15479 base = find_replacement (&XEXP (ref, 0));
15481 else
15482 /* The slot is out of range, or was dressed up in a SUBREG. */
15483 base = reg_equiv_address (REGNO (ref));
15485 else
15486 base = find_replacement (&XEXP (ref, 0));
15488 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15490 /* Handle the case where the address is too complex to be offset by 1. */
15491 if (GET_CODE (base) == MINUS
15492 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15494 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15496 /* Be careful not to destroy OUTVAL. */
15497 if (reg_overlap_mentioned_p (base_plus, outval))
15499 /* Updating base_plus might destroy outval, see if we can
15500 swap the scratch and base_plus. */
15501 if (!reg_overlap_mentioned_p (scratch, outval))
15503 rtx tmp = scratch;
15504 scratch = base_plus;
15505 base_plus = tmp;
15507 else
15509 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15511 /* Be conservative and copy OUTVAL into the scratch now,
15512 this should only be necessary if outval is a subreg
15513 of something larger than a word. */
15514 /* XXX Might this clobber base? I can't see how it can,
15515 since scratch is known to overlap with OUTVAL, and
15516 must be wider than a word. */
15517 emit_insn (gen_movhi (scratch_hi, outval));
15518 outval = scratch_hi;
15522 emit_set_insn (base_plus, base);
15523 base = base_plus;
15525 else if (GET_CODE (base) == PLUS)
15527 /* The addend must be CONST_INT, or we would have dealt with it above. */
15528 HOST_WIDE_INT hi, lo;
15530 offset += INTVAL (XEXP (base, 1));
15531 base = XEXP (base, 0);
15533 /* Rework the address into a legal sequence of insns. */
15534 /* Valid range for lo is -4095 -> 4095 */
15535 lo = (offset >= 0
15536 ? (offset & 0xfff)
15537 : -((-offset) & 0xfff));
15539 /* Corner case, if lo is the max offset then we would be out of range
15540 once we have added the additional 1 below, so bump the msb into the
15541 pre-loading insn(s). */
15542 if (lo == 4095)
15543 lo &= 0x7ff;
15545 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15546 ^ (HOST_WIDE_INT) 0x80000000)
15547 - (HOST_WIDE_INT) 0x80000000);
15549 gcc_assert (hi + lo == offset);
15551 if (hi != 0)
15553 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15555 /* Be careful not to destroy OUTVAL. */
15556 if (reg_overlap_mentioned_p (base_plus, outval))
15558 /* Updating base_plus might destroy outval, see if we
15559 can swap the scratch and base_plus. */
15560 if (!reg_overlap_mentioned_p (scratch, outval))
15562 rtx tmp = scratch;
15563 scratch = base_plus;
15564 base_plus = tmp;
15566 else
15568 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15570 /* Be conservative and copy outval into scratch now,
15571 this should only be necessary if outval is a
15572 subreg of something larger than a word. */
15573 /* XXX Might this clobber base? I can't see how it
15574 can, since scratch is known to overlap with
15575 outval. */
15576 emit_insn (gen_movhi (scratch_hi, outval));
15577 outval = scratch_hi;
15581 /* Get the base address; addsi3 knows how to handle constants
15582 that require more than one insn. */
15583 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15584 base = base_plus;
15585 offset = lo;
15589 if (BYTES_BIG_ENDIAN)
15591 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15592 plus_constant (Pmode, base,
15593 offset + 1)),
15594 gen_lowpart (QImode, outval)));
15595 emit_insn (gen_lshrsi3 (scratch,
15596 gen_rtx_SUBREG (SImode, outval, 0),
15597 GEN_INT (8)));
15598 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15599 offset)),
15600 gen_lowpart (QImode, scratch)));
15602 else
15604 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15605 offset)),
15606 gen_lowpart (QImode, outval)));
15607 emit_insn (gen_lshrsi3 (scratch,
15608 gen_rtx_SUBREG (SImode, outval, 0),
15609 GEN_INT (8)));
15610 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15611 plus_constant (Pmode, base,
15612 offset + 1)),
15613 gen_lowpart (QImode, scratch)));
15617 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15618 (padded to the size of a word) should be passed in a register. */
15620 static bool
15621 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15623 if (TARGET_AAPCS_BASED)
15624 return must_pass_in_stack_var_size (mode, type);
15625 else
15626 return must_pass_in_stack_var_size_or_pad (mode, type);
15630 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15631 Return true if an argument passed on the stack should be padded upwards,
15632 i.e. if the least-significant byte has useful data.
15633 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15634 aggregate types are placed in the lowest memory address. */
15636 bool
15637 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15639 if (!TARGET_AAPCS_BASED)
15640 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15642 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15643 return false;
15645 return true;
15649 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15650 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15651 register has useful data, and return the opposite if the most
15652 significant byte does. */
15654 bool
15655 arm_pad_reg_upward (machine_mode mode,
15656 tree type, int first ATTRIBUTE_UNUSED)
15658 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15660 /* For AAPCS, small aggregates, small fixed-point types,
15661 and small complex types are always padded upwards. */
15662 if (type)
15664 if ((AGGREGATE_TYPE_P (type)
15665 || TREE_CODE (type) == COMPLEX_TYPE
15666 || FIXED_POINT_TYPE_P (type))
15667 && int_size_in_bytes (type) <= 4)
15668 return true;
15670 else
15672 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15673 && GET_MODE_SIZE (mode) <= 4)
15674 return true;
15678 /* Otherwise, use default padding. */
15679 return !BYTES_BIG_ENDIAN;
15682 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15683 assuming that the address in the base register is word aligned. */
15684 bool
15685 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15687 HOST_WIDE_INT max_offset;
15689 /* Offset must be a multiple of 4 in Thumb mode. */
15690 if (TARGET_THUMB2 && ((offset & 3) != 0))
15691 return false;
15693 if (TARGET_THUMB2)
15694 max_offset = 1020;
15695 else if (TARGET_ARM)
15696 max_offset = 255;
15697 else
15698 return false;
15700 return ((offset <= max_offset) && (offset >= -max_offset));
15703 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15704 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15705 Assumes that the address in the base register RN is word aligned. Pattern
15706 guarantees that both memory accesses use the same base register,
15707 the offsets are constants within the range, and the gap between the offsets is 4.
15708 If preload complete then check that registers are legal. WBACK indicates whether
15709 address is updated. LOAD indicates whether memory access is load or store. */
15710 bool
15711 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15712 bool wback, bool load)
15714 unsigned int t, t2, n;
15716 if (!reload_completed)
15717 return true;
15719 if (!offset_ok_for_ldrd_strd (offset))
15720 return false;
15722 t = REGNO (rt);
15723 t2 = REGNO (rt2);
15724 n = REGNO (rn);
15726 if ((TARGET_THUMB2)
15727 && ((wback && (n == t || n == t2))
15728 || (t == SP_REGNUM)
15729 || (t == PC_REGNUM)
15730 || (t2 == SP_REGNUM)
15731 || (t2 == PC_REGNUM)
15732 || (!load && (n == PC_REGNUM))
15733 || (load && (t == t2))
15734 /* Triggers Cortex-M3 LDRD errata. */
15735 || (!wback && load && fix_cm3_ldrd && (n == t))))
15736 return false;
15738 if ((TARGET_ARM)
15739 && ((wback && (n == t || n == t2))
15740 || (t2 == PC_REGNUM)
15741 || (t % 2 != 0) /* First destination register is not even. */
15742 || (t2 != t + 1)
15743 /* PC can be used as base register (for offset addressing only),
15744 but it is depricated. */
15745 || (n == PC_REGNUM)))
15746 return false;
15748 return true;
15751 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15752 operand MEM's address contains an immediate offset from the base
15753 register and has no side effects, in which case it sets BASE and
15754 OFFSET accordingly. */
15755 static bool
15756 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15758 rtx addr;
15760 gcc_assert (base != NULL && offset != NULL);
15762 /* TODO: Handle more general memory operand patterns, such as
15763 PRE_DEC and PRE_INC. */
15765 if (side_effects_p (mem))
15766 return false;
15768 /* Can't deal with subregs. */
15769 if (GET_CODE (mem) == SUBREG)
15770 return false;
15772 gcc_assert (MEM_P (mem));
15774 *offset = const0_rtx;
15776 addr = XEXP (mem, 0);
15778 /* If addr isn't valid for DImode, then we can't handle it. */
15779 if (!arm_legitimate_address_p (DImode, addr,
15780 reload_in_progress || reload_completed))
15781 return false;
15783 if (REG_P (addr))
15785 *base = addr;
15786 return true;
15788 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15790 *base = XEXP (addr, 0);
15791 *offset = XEXP (addr, 1);
15792 return (REG_P (*base) && CONST_INT_P (*offset));
15795 return false;
15798 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15800 /* Called from a peephole2 to replace two word-size accesses with a
15801 single LDRD/STRD instruction. Returns true iff we can generate a
15802 new instruction sequence. That is, both accesses use the same base
15803 register and the gap between constant offsets is 4. This function
15804 may reorder its operands to match ldrd/strd RTL templates.
15805 OPERANDS are the operands found by the peephole matcher;
15806 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15807 corresponding memory operands. LOAD indicaates whether the access
15808 is load or store. CONST_STORE indicates a store of constant
15809 integer values held in OPERANDS[4,5] and assumes that the pattern
15810 is of length 4 insn, for the purpose of checking dead registers.
15811 COMMUTE indicates that register operands may be reordered. */
15812 bool
15813 gen_operands_ldrd_strd (rtx *operands, bool load,
15814 bool const_store, bool commute)
15816 int nops = 2;
15817 HOST_WIDE_INT offsets[2], offset;
15818 rtx base = NULL_RTX;
15819 rtx cur_base, cur_offset, tmp;
15820 int i, gap;
15821 HARD_REG_SET regset;
15823 gcc_assert (!const_store || !load);
15824 /* Check that the memory references are immediate offsets from the
15825 same base register. Extract the base register, the destination
15826 registers, and the corresponding memory offsets. */
15827 for (i = 0; i < nops; i++)
15829 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15830 return false;
15832 if (i == 0)
15833 base = cur_base;
15834 else if (REGNO (base) != REGNO (cur_base))
15835 return false;
15837 offsets[i] = INTVAL (cur_offset);
15838 if (GET_CODE (operands[i]) == SUBREG)
15840 tmp = SUBREG_REG (operands[i]);
15841 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15842 operands[i] = tmp;
15846 /* Make sure there is no dependency between the individual loads. */
15847 if (load && REGNO (operands[0]) == REGNO (base))
15848 return false; /* RAW */
15850 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15851 return false; /* WAW */
15853 /* If the same input register is used in both stores
15854 when storing different constants, try to find a free register.
15855 For example, the code
15856 mov r0, 0
15857 str r0, [r2]
15858 mov r0, 1
15859 str r0, [r2, #4]
15860 can be transformed into
15861 mov r1, 0
15862 strd r1, r0, [r2]
15863 in Thumb mode assuming that r1 is free. */
15864 if (const_store
15865 && REGNO (operands[0]) == REGNO (operands[1])
15866 && INTVAL (operands[4]) != INTVAL (operands[5]))
15868 if (TARGET_THUMB2)
15870 CLEAR_HARD_REG_SET (regset);
15871 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15872 if (tmp == NULL_RTX)
15873 return false;
15875 /* Use the new register in the first load to ensure that
15876 if the original input register is not dead after peephole,
15877 then it will have the correct constant value. */
15878 operands[0] = tmp;
15880 else if (TARGET_ARM)
15882 return false;
15883 int regno = REGNO (operands[0]);
15884 if (!peep2_reg_dead_p (4, operands[0]))
15886 /* When the input register is even and is not dead after the
15887 pattern, it has to hold the second constant but we cannot
15888 form a legal STRD in ARM mode with this register as the second
15889 register. */
15890 if (regno % 2 == 0)
15891 return false;
15893 /* Is regno-1 free? */
15894 SET_HARD_REG_SET (regset);
15895 CLEAR_HARD_REG_BIT(regset, regno - 1);
15896 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15897 if (tmp == NULL_RTX)
15898 return false;
15900 operands[0] = tmp;
15902 else
15904 /* Find a DImode register. */
15905 CLEAR_HARD_REG_SET (regset);
15906 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15907 if (tmp != NULL_RTX)
15909 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15910 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15912 else
15914 /* Can we use the input register to form a DI register? */
15915 SET_HARD_REG_SET (regset);
15916 CLEAR_HARD_REG_BIT(regset,
15917 regno % 2 == 0 ? regno + 1 : regno - 1);
15918 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15919 if (tmp == NULL_RTX)
15920 return false;
15921 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15925 gcc_assert (operands[0] != NULL_RTX);
15926 gcc_assert (operands[1] != NULL_RTX);
15927 gcc_assert (REGNO (operands[0]) % 2 == 0);
15928 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15932 /* Make sure the instructions are ordered with lower memory access first. */
15933 if (offsets[0] > offsets[1])
15935 gap = offsets[0] - offsets[1];
15936 offset = offsets[1];
15938 /* Swap the instructions such that lower memory is accessed first. */
15939 SWAP_RTX (operands[0], operands[1]);
15940 SWAP_RTX (operands[2], operands[3]);
15941 if (const_store)
15942 SWAP_RTX (operands[4], operands[5]);
15944 else
15946 gap = offsets[1] - offsets[0];
15947 offset = offsets[0];
15950 /* Make sure accesses are to consecutive memory locations. */
15951 if (gap != 4)
15952 return false;
15954 /* Make sure we generate legal instructions. */
15955 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15956 false, load))
15957 return true;
15959 /* In Thumb state, where registers are almost unconstrained, there
15960 is little hope to fix it. */
15961 if (TARGET_THUMB2)
15962 return false;
15964 if (load && commute)
15966 /* Try reordering registers. */
15967 SWAP_RTX (operands[0], operands[1]);
15968 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15969 false, load))
15970 return true;
15973 if (const_store)
15975 /* If input registers are dead after this pattern, they can be
15976 reordered or replaced by other registers that are free in the
15977 current pattern. */
15978 if (!peep2_reg_dead_p (4, operands[0])
15979 || !peep2_reg_dead_p (4, operands[1]))
15980 return false;
15982 /* Try to reorder the input registers. */
15983 /* For example, the code
15984 mov r0, 0
15985 mov r1, 1
15986 str r1, [r2]
15987 str r0, [r2, #4]
15988 can be transformed into
15989 mov r1, 0
15990 mov r0, 1
15991 strd r0, [r2]
15993 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15994 false, false))
15996 SWAP_RTX (operands[0], operands[1]);
15997 return true;
16000 /* Try to find a free DI register. */
16001 CLEAR_HARD_REG_SET (regset);
16002 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16003 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16004 while (true)
16006 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16007 if (tmp == NULL_RTX)
16008 return false;
16010 /* DREG must be an even-numbered register in DImode.
16011 Split it into SI registers. */
16012 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16013 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16014 gcc_assert (operands[0] != NULL_RTX);
16015 gcc_assert (operands[1] != NULL_RTX);
16016 gcc_assert (REGNO (operands[0]) % 2 == 0);
16017 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16019 return (operands_ok_ldrd_strd (operands[0], operands[1],
16020 base, offset,
16021 false, load));
16025 return false;
16027 #undef SWAP_RTX
16032 /* Print a symbolic form of X to the debug file, F. */
16033 static void
16034 arm_print_value (FILE *f, rtx x)
16036 switch (GET_CODE (x))
16038 case CONST_INT:
16039 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16040 return;
16042 case CONST_DOUBLE:
16043 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16044 return;
16046 case CONST_VECTOR:
16048 int i;
16050 fprintf (f, "<");
16051 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16053 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16054 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16055 fputc (',', f);
16057 fprintf (f, ">");
16059 return;
16061 case CONST_STRING:
16062 fprintf (f, "\"%s\"", XSTR (x, 0));
16063 return;
16065 case SYMBOL_REF:
16066 fprintf (f, "`%s'", XSTR (x, 0));
16067 return;
16069 case LABEL_REF:
16070 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16071 return;
16073 case CONST:
16074 arm_print_value (f, XEXP (x, 0));
16075 return;
16077 case PLUS:
16078 arm_print_value (f, XEXP (x, 0));
16079 fprintf (f, "+");
16080 arm_print_value (f, XEXP (x, 1));
16081 return;
16083 case PC:
16084 fprintf (f, "pc");
16085 return;
16087 default:
16088 fprintf (f, "????");
16089 return;
16093 /* Routines for manipulation of the constant pool. */
16095 /* Arm instructions cannot load a large constant directly into a
16096 register; they have to come from a pc relative load. The constant
16097 must therefore be placed in the addressable range of the pc
16098 relative load. Depending on the precise pc relative load
16099 instruction the range is somewhere between 256 bytes and 4k. This
16100 means that we often have to dump a constant inside a function, and
16101 generate code to branch around it.
16103 It is important to minimize this, since the branches will slow
16104 things down and make the code larger.
16106 Normally we can hide the table after an existing unconditional
16107 branch so that there is no interruption of the flow, but in the
16108 worst case the code looks like this:
16110 ldr rn, L1
16112 b L2
16113 align
16114 L1: .long value
16118 ldr rn, L3
16120 b L4
16121 align
16122 L3: .long value
16126 We fix this by performing a scan after scheduling, which notices
16127 which instructions need to have their operands fetched from the
16128 constant table and builds the table.
16130 The algorithm starts by building a table of all the constants that
16131 need fixing up and all the natural barriers in the function (places
16132 where a constant table can be dropped without breaking the flow).
16133 For each fixup we note how far the pc-relative replacement will be
16134 able to reach and the offset of the instruction into the function.
16136 Having built the table we then group the fixes together to form
16137 tables that are as large as possible (subject to addressing
16138 constraints) and emit each table of constants after the last
16139 barrier that is within range of all the instructions in the group.
16140 If a group does not contain a barrier, then we forcibly create one
16141 by inserting a jump instruction into the flow. Once the table has
16142 been inserted, the insns are then modified to reference the
16143 relevant entry in the pool.
16145 Possible enhancements to the algorithm (not implemented) are:
16147 1) For some processors and object formats, there may be benefit in
16148 aligning the pools to the start of cache lines; this alignment
16149 would need to be taken into account when calculating addressability
16150 of a pool. */
16152 /* These typedefs are located at the start of this file, so that
16153 they can be used in the prototypes there. This comment is to
16154 remind readers of that fact so that the following structures
16155 can be understood more easily.
16157 typedef struct minipool_node Mnode;
16158 typedef struct minipool_fixup Mfix; */
16160 struct minipool_node
16162 /* Doubly linked chain of entries. */
16163 Mnode * next;
16164 Mnode * prev;
16165 /* The maximum offset into the code that this entry can be placed. While
16166 pushing fixes for forward references, all entries are sorted in order
16167 of increasing max_address. */
16168 HOST_WIDE_INT max_address;
16169 /* Similarly for an entry inserted for a backwards ref. */
16170 HOST_WIDE_INT min_address;
16171 /* The number of fixes referencing this entry. This can become zero
16172 if we "unpush" an entry. In this case we ignore the entry when we
16173 come to emit the code. */
16174 int refcount;
16175 /* The offset from the start of the minipool. */
16176 HOST_WIDE_INT offset;
16177 /* The value in table. */
16178 rtx value;
16179 /* The mode of value. */
16180 machine_mode mode;
16181 /* The size of the value. With iWMMXt enabled
16182 sizes > 4 also imply an alignment of 8-bytes. */
16183 int fix_size;
16186 struct minipool_fixup
16188 Mfix * next;
16189 rtx_insn * insn;
16190 HOST_WIDE_INT address;
16191 rtx * loc;
16192 machine_mode mode;
16193 int fix_size;
16194 rtx value;
16195 Mnode * minipool;
16196 HOST_WIDE_INT forwards;
16197 HOST_WIDE_INT backwards;
16200 /* Fixes less than a word need padding out to a word boundary. */
16201 #define MINIPOOL_FIX_SIZE(mode) \
16202 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16204 static Mnode * minipool_vector_head;
16205 static Mnode * minipool_vector_tail;
16206 static rtx_code_label *minipool_vector_label;
16207 static int minipool_pad;
16209 /* The linked list of all minipool fixes required for this function. */
16210 Mfix * minipool_fix_head;
16211 Mfix * minipool_fix_tail;
16212 /* The fix entry for the current minipool, once it has been placed. */
16213 Mfix * minipool_barrier;
16215 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16216 #define JUMP_TABLES_IN_TEXT_SECTION 0
16217 #endif
16219 static HOST_WIDE_INT
16220 get_jump_table_size (rtx_jump_table_data *insn)
16222 /* ADDR_VECs only take room if read-only data does into the text
16223 section. */
16224 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16226 rtx body = PATTERN (insn);
16227 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16228 HOST_WIDE_INT size;
16229 HOST_WIDE_INT modesize;
16231 modesize = GET_MODE_SIZE (GET_MODE (body));
16232 size = modesize * XVECLEN (body, elt);
16233 switch (modesize)
16235 case 1:
16236 /* Round up size of TBB table to a halfword boundary. */
16237 size = (size + 1) & ~(HOST_WIDE_INT)1;
16238 break;
16239 case 2:
16240 /* No padding necessary for TBH. */
16241 break;
16242 case 4:
16243 /* Add two bytes for alignment on Thumb. */
16244 if (TARGET_THUMB)
16245 size += 2;
16246 break;
16247 default:
16248 gcc_unreachable ();
16250 return size;
16253 return 0;
16256 /* Return the maximum amount of padding that will be inserted before
16257 label LABEL. */
16259 static HOST_WIDE_INT
16260 get_label_padding (rtx label)
16262 HOST_WIDE_INT align, min_insn_size;
16264 align = 1 << label_to_alignment (label);
16265 min_insn_size = TARGET_THUMB ? 2 : 4;
16266 return align > min_insn_size ? align - min_insn_size : 0;
16269 /* Move a minipool fix MP from its current location to before MAX_MP.
16270 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16271 constraints may need updating. */
16272 static Mnode *
16273 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16274 HOST_WIDE_INT max_address)
16276 /* The code below assumes these are different. */
16277 gcc_assert (mp != max_mp);
16279 if (max_mp == NULL)
16281 if (max_address < mp->max_address)
16282 mp->max_address = max_address;
16284 else
16286 if (max_address > max_mp->max_address - mp->fix_size)
16287 mp->max_address = max_mp->max_address - mp->fix_size;
16288 else
16289 mp->max_address = max_address;
16291 /* Unlink MP from its current position. Since max_mp is non-null,
16292 mp->prev must be non-null. */
16293 mp->prev->next = mp->next;
16294 if (mp->next != NULL)
16295 mp->next->prev = mp->prev;
16296 else
16297 minipool_vector_tail = mp->prev;
16299 /* Re-insert it before MAX_MP. */
16300 mp->next = max_mp;
16301 mp->prev = max_mp->prev;
16302 max_mp->prev = mp;
16304 if (mp->prev != NULL)
16305 mp->prev->next = mp;
16306 else
16307 minipool_vector_head = mp;
16310 /* Save the new entry. */
16311 max_mp = mp;
16313 /* Scan over the preceding entries and adjust their addresses as
16314 required. */
16315 while (mp->prev != NULL
16316 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16318 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16319 mp = mp->prev;
16322 return max_mp;
16325 /* Add a constant to the minipool for a forward reference. Returns the
16326 node added or NULL if the constant will not fit in this pool. */
16327 static Mnode *
16328 add_minipool_forward_ref (Mfix *fix)
16330 /* If set, max_mp is the first pool_entry that has a lower
16331 constraint than the one we are trying to add. */
16332 Mnode * max_mp = NULL;
16333 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16334 Mnode * mp;
16336 /* If the minipool starts before the end of FIX->INSN then this FIX
16337 can not be placed into the current pool. Furthermore, adding the
16338 new constant pool entry may cause the pool to start FIX_SIZE bytes
16339 earlier. */
16340 if (minipool_vector_head &&
16341 (fix->address + get_attr_length (fix->insn)
16342 >= minipool_vector_head->max_address - fix->fix_size))
16343 return NULL;
16345 /* Scan the pool to see if a constant with the same value has
16346 already been added. While we are doing this, also note the
16347 location where we must insert the constant if it doesn't already
16348 exist. */
16349 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16351 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16352 && fix->mode == mp->mode
16353 && (!LABEL_P (fix->value)
16354 || (CODE_LABEL_NUMBER (fix->value)
16355 == CODE_LABEL_NUMBER (mp->value)))
16356 && rtx_equal_p (fix->value, mp->value))
16358 /* More than one fix references this entry. */
16359 mp->refcount++;
16360 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16363 /* Note the insertion point if necessary. */
16364 if (max_mp == NULL
16365 && mp->max_address > max_address)
16366 max_mp = mp;
16368 /* If we are inserting an 8-bytes aligned quantity and
16369 we have not already found an insertion point, then
16370 make sure that all such 8-byte aligned quantities are
16371 placed at the start of the pool. */
16372 if (ARM_DOUBLEWORD_ALIGN
16373 && max_mp == NULL
16374 && fix->fix_size >= 8
16375 && mp->fix_size < 8)
16377 max_mp = mp;
16378 max_address = mp->max_address;
16382 /* The value is not currently in the minipool, so we need to create
16383 a new entry for it. If MAX_MP is NULL, the entry will be put on
16384 the end of the list since the placement is less constrained than
16385 any existing entry. Otherwise, we insert the new fix before
16386 MAX_MP and, if necessary, adjust the constraints on the other
16387 entries. */
16388 mp = XNEW (Mnode);
16389 mp->fix_size = fix->fix_size;
16390 mp->mode = fix->mode;
16391 mp->value = fix->value;
16392 mp->refcount = 1;
16393 /* Not yet required for a backwards ref. */
16394 mp->min_address = -65536;
16396 if (max_mp == NULL)
16398 mp->max_address = max_address;
16399 mp->next = NULL;
16400 mp->prev = minipool_vector_tail;
16402 if (mp->prev == NULL)
16404 minipool_vector_head = mp;
16405 minipool_vector_label = gen_label_rtx ();
16407 else
16408 mp->prev->next = mp;
16410 minipool_vector_tail = mp;
16412 else
16414 if (max_address > max_mp->max_address - mp->fix_size)
16415 mp->max_address = max_mp->max_address - mp->fix_size;
16416 else
16417 mp->max_address = max_address;
16419 mp->next = max_mp;
16420 mp->prev = max_mp->prev;
16421 max_mp->prev = mp;
16422 if (mp->prev != NULL)
16423 mp->prev->next = mp;
16424 else
16425 minipool_vector_head = mp;
16428 /* Save the new entry. */
16429 max_mp = mp;
16431 /* Scan over the preceding entries and adjust their addresses as
16432 required. */
16433 while (mp->prev != NULL
16434 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16436 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16437 mp = mp->prev;
16440 return max_mp;
16443 static Mnode *
16444 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16445 HOST_WIDE_INT min_address)
16447 HOST_WIDE_INT offset;
16449 /* The code below assumes these are different. */
16450 gcc_assert (mp != min_mp);
16452 if (min_mp == NULL)
16454 if (min_address > mp->min_address)
16455 mp->min_address = min_address;
16457 else
16459 /* We will adjust this below if it is too loose. */
16460 mp->min_address = min_address;
16462 /* Unlink MP from its current position. Since min_mp is non-null,
16463 mp->next must be non-null. */
16464 mp->next->prev = mp->prev;
16465 if (mp->prev != NULL)
16466 mp->prev->next = mp->next;
16467 else
16468 minipool_vector_head = mp->next;
16470 /* Reinsert it after MIN_MP. */
16471 mp->prev = min_mp;
16472 mp->next = min_mp->next;
16473 min_mp->next = mp;
16474 if (mp->next != NULL)
16475 mp->next->prev = mp;
16476 else
16477 minipool_vector_tail = mp;
16480 min_mp = mp;
16482 offset = 0;
16483 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16485 mp->offset = offset;
16486 if (mp->refcount > 0)
16487 offset += mp->fix_size;
16489 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16490 mp->next->min_address = mp->min_address + mp->fix_size;
16493 return min_mp;
16496 /* Add a constant to the minipool for a backward reference. Returns the
16497 node added or NULL if the constant will not fit in this pool.
16499 Note that the code for insertion for a backwards reference can be
16500 somewhat confusing because the calculated offsets for each fix do
16501 not take into account the size of the pool (which is still under
16502 construction. */
16503 static Mnode *
16504 add_minipool_backward_ref (Mfix *fix)
16506 /* If set, min_mp is the last pool_entry that has a lower constraint
16507 than the one we are trying to add. */
16508 Mnode *min_mp = NULL;
16509 /* This can be negative, since it is only a constraint. */
16510 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16511 Mnode *mp;
16513 /* If we can't reach the current pool from this insn, or if we can't
16514 insert this entry at the end of the pool without pushing other
16515 fixes out of range, then we don't try. This ensures that we
16516 can't fail later on. */
16517 if (min_address >= minipool_barrier->address
16518 || (minipool_vector_tail->min_address + fix->fix_size
16519 >= minipool_barrier->address))
16520 return NULL;
16522 /* Scan the pool to see if a constant with the same value has
16523 already been added. While we are doing this, also note the
16524 location where we must insert the constant if it doesn't already
16525 exist. */
16526 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16528 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16529 && fix->mode == mp->mode
16530 && (!LABEL_P (fix->value)
16531 || (CODE_LABEL_NUMBER (fix->value)
16532 == CODE_LABEL_NUMBER (mp->value)))
16533 && rtx_equal_p (fix->value, mp->value)
16534 /* Check that there is enough slack to move this entry to the
16535 end of the table (this is conservative). */
16536 && (mp->max_address
16537 > (minipool_barrier->address
16538 + minipool_vector_tail->offset
16539 + minipool_vector_tail->fix_size)))
16541 mp->refcount++;
16542 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16545 if (min_mp != NULL)
16546 mp->min_address += fix->fix_size;
16547 else
16549 /* Note the insertion point if necessary. */
16550 if (mp->min_address < min_address)
16552 /* For now, we do not allow the insertion of 8-byte alignment
16553 requiring nodes anywhere but at the start of the pool. */
16554 if (ARM_DOUBLEWORD_ALIGN
16555 && fix->fix_size >= 8 && mp->fix_size < 8)
16556 return NULL;
16557 else
16558 min_mp = mp;
16560 else if (mp->max_address
16561 < minipool_barrier->address + mp->offset + fix->fix_size)
16563 /* Inserting before this entry would push the fix beyond
16564 its maximum address (which can happen if we have
16565 re-located a forwards fix); force the new fix to come
16566 after it. */
16567 if (ARM_DOUBLEWORD_ALIGN
16568 && fix->fix_size >= 8 && mp->fix_size < 8)
16569 return NULL;
16570 else
16572 min_mp = mp;
16573 min_address = mp->min_address + fix->fix_size;
16576 /* Do not insert a non-8-byte aligned quantity before 8-byte
16577 aligned quantities. */
16578 else if (ARM_DOUBLEWORD_ALIGN
16579 && fix->fix_size < 8
16580 && mp->fix_size >= 8)
16582 min_mp = mp;
16583 min_address = mp->min_address + fix->fix_size;
16588 /* We need to create a new entry. */
16589 mp = XNEW (Mnode);
16590 mp->fix_size = fix->fix_size;
16591 mp->mode = fix->mode;
16592 mp->value = fix->value;
16593 mp->refcount = 1;
16594 mp->max_address = minipool_barrier->address + 65536;
16596 mp->min_address = min_address;
16598 if (min_mp == NULL)
16600 mp->prev = NULL;
16601 mp->next = minipool_vector_head;
16603 if (mp->next == NULL)
16605 minipool_vector_tail = mp;
16606 minipool_vector_label = gen_label_rtx ();
16608 else
16609 mp->next->prev = mp;
16611 minipool_vector_head = mp;
16613 else
16615 mp->next = min_mp->next;
16616 mp->prev = min_mp;
16617 min_mp->next = mp;
16619 if (mp->next != NULL)
16620 mp->next->prev = mp;
16621 else
16622 minipool_vector_tail = mp;
16625 /* Save the new entry. */
16626 min_mp = mp;
16628 if (mp->prev)
16629 mp = mp->prev;
16630 else
16631 mp->offset = 0;
16633 /* Scan over the following entries and adjust their offsets. */
16634 while (mp->next != NULL)
16636 if (mp->next->min_address < mp->min_address + mp->fix_size)
16637 mp->next->min_address = mp->min_address + mp->fix_size;
16639 if (mp->refcount)
16640 mp->next->offset = mp->offset + mp->fix_size;
16641 else
16642 mp->next->offset = mp->offset;
16644 mp = mp->next;
16647 return min_mp;
16650 static void
16651 assign_minipool_offsets (Mfix *barrier)
16653 HOST_WIDE_INT offset = 0;
16654 Mnode *mp;
16656 minipool_barrier = barrier;
16658 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16660 mp->offset = offset;
16662 if (mp->refcount > 0)
16663 offset += mp->fix_size;
16667 /* Output the literal table */
16668 static void
16669 dump_minipool (rtx_insn *scan)
16671 Mnode * mp;
16672 Mnode * nmp;
16673 int align64 = 0;
16675 if (ARM_DOUBLEWORD_ALIGN)
16676 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16677 if (mp->refcount > 0 && mp->fix_size >= 8)
16679 align64 = 1;
16680 break;
16683 if (dump_file)
16684 fprintf (dump_file,
16685 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16686 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16688 scan = emit_label_after (gen_label_rtx (), scan);
16689 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16690 scan = emit_label_after (minipool_vector_label, scan);
16692 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16694 if (mp->refcount > 0)
16696 if (dump_file)
16698 fprintf (dump_file,
16699 ";; Offset %u, min %ld, max %ld ",
16700 (unsigned) mp->offset, (unsigned long) mp->min_address,
16701 (unsigned long) mp->max_address);
16702 arm_print_value (dump_file, mp->value);
16703 fputc ('\n', dump_file);
16706 switch (mp->fix_size)
16708 #ifdef HAVE_consttable_1
16709 case 1:
16710 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16711 break;
16713 #endif
16714 #ifdef HAVE_consttable_2
16715 case 2:
16716 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16717 break;
16719 #endif
16720 #ifdef HAVE_consttable_4
16721 case 4:
16722 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16723 break;
16725 #endif
16726 #ifdef HAVE_consttable_8
16727 case 8:
16728 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16729 break;
16731 #endif
16732 #ifdef HAVE_consttable_16
16733 case 16:
16734 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16735 break;
16737 #endif
16738 default:
16739 gcc_unreachable ();
16743 nmp = mp->next;
16744 free (mp);
16747 minipool_vector_head = minipool_vector_tail = NULL;
16748 scan = emit_insn_after (gen_consttable_end (), scan);
16749 scan = emit_barrier_after (scan);
16752 /* Return the cost of forcibly inserting a barrier after INSN. */
16753 static int
16754 arm_barrier_cost (rtx insn)
16756 /* Basing the location of the pool on the loop depth is preferable,
16757 but at the moment, the basic block information seems to be
16758 corrupt by this stage of the compilation. */
16759 int base_cost = 50;
16760 rtx next = next_nonnote_insn (insn);
16762 if (next != NULL && LABEL_P (next))
16763 base_cost -= 20;
16765 switch (GET_CODE (insn))
16767 case CODE_LABEL:
16768 /* It will always be better to place the table before the label, rather
16769 than after it. */
16770 return 50;
16772 case INSN:
16773 case CALL_INSN:
16774 return base_cost;
16776 case JUMP_INSN:
16777 return base_cost - 10;
16779 default:
16780 return base_cost + 10;
16784 /* Find the best place in the insn stream in the range
16785 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16786 Create the barrier by inserting a jump and add a new fix entry for
16787 it. */
16788 static Mfix *
16789 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16791 HOST_WIDE_INT count = 0;
16792 rtx_barrier *barrier;
16793 rtx_insn *from = fix->insn;
16794 /* The instruction after which we will insert the jump. */
16795 rtx_insn *selected = NULL;
16796 int selected_cost;
16797 /* The address at which the jump instruction will be placed. */
16798 HOST_WIDE_INT selected_address;
16799 Mfix * new_fix;
16800 HOST_WIDE_INT max_count = max_address - fix->address;
16801 rtx_code_label *label = gen_label_rtx ();
16803 selected_cost = arm_barrier_cost (from);
16804 selected_address = fix->address;
16806 while (from && count < max_count)
16808 rtx_jump_table_data *tmp;
16809 int new_cost;
16811 /* This code shouldn't have been called if there was a natural barrier
16812 within range. */
16813 gcc_assert (!BARRIER_P (from));
16815 /* Count the length of this insn. This must stay in sync with the
16816 code that pushes minipool fixes. */
16817 if (LABEL_P (from))
16818 count += get_label_padding (from);
16819 else
16820 count += get_attr_length (from);
16822 /* If there is a jump table, add its length. */
16823 if (tablejump_p (from, NULL, &tmp))
16825 count += get_jump_table_size (tmp);
16827 /* Jump tables aren't in a basic block, so base the cost on
16828 the dispatch insn. If we select this location, we will
16829 still put the pool after the table. */
16830 new_cost = arm_barrier_cost (from);
16832 if (count < max_count
16833 && (!selected || new_cost <= selected_cost))
16835 selected = tmp;
16836 selected_cost = new_cost;
16837 selected_address = fix->address + count;
16840 /* Continue after the dispatch table. */
16841 from = NEXT_INSN (tmp);
16842 continue;
16845 new_cost = arm_barrier_cost (from);
16847 if (count < max_count
16848 && (!selected || new_cost <= selected_cost))
16850 selected = from;
16851 selected_cost = new_cost;
16852 selected_address = fix->address + count;
16855 from = NEXT_INSN (from);
16858 /* Make sure that we found a place to insert the jump. */
16859 gcc_assert (selected);
16861 /* Make sure we do not split a call and its corresponding
16862 CALL_ARG_LOCATION note. */
16863 if (CALL_P (selected))
16865 rtx_insn *next = NEXT_INSN (selected);
16866 if (next && NOTE_P (next)
16867 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16868 selected = next;
16871 /* Create a new JUMP_INSN that branches around a barrier. */
16872 from = emit_jump_insn_after (gen_jump (label), selected);
16873 JUMP_LABEL (from) = label;
16874 barrier = emit_barrier_after (from);
16875 emit_label_after (label, barrier);
16877 /* Create a minipool barrier entry for the new barrier. */
16878 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16879 new_fix->insn = barrier;
16880 new_fix->address = selected_address;
16881 new_fix->next = fix->next;
16882 fix->next = new_fix;
16884 return new_fix;
16887 /* Record that there is a natural barrier in the insn stream at
16888 ADDRESS. */
16889 static void
16890 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16892 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16894 fix->insn = insn;
16895 fix->address = address;
16897 fix->next = NULL;
16898 if (minipool_fix_head != NULL)
16899 minipool_fix_tail->next = fix;
16900 else
16901 minipool_fix_head = fix;
16903 minipool_fix_tail = fix;
16906 /* Record INSN, which will need fixing up to load a value from the
16907 minipool. ADDRESS is the offset of the insn since the start of the
16908 function; LOC is a pointer to the part of the insn which requires
16909 fixing; VALUE is the constant that must be loaded, which is of type
16910 MODE. */
16911 static void
16912 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16913 machine_mode mode, rtx value)
16915 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16917 fix->insn = insn;
16918 fix->address = address;
16919 fix->loc = loc;
16920 fix->mode = mode;
16921 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16922 fix->value = value;
16923 fix->forwards = get_attr_pool_range (insn);
16924 fix->backwards = get_attr_neg_pool_range (insn);
16925 fix->minipool = NULL;
16927 /* If an insn doesn't have a range defined for it, then it isn't
16928 expecting to be reworked by this code. Better to stop now than
16929 to generate duff assembly code. */
16930 gcc_assert (fix->forwards || fix->backwards);
16932 /* If an entry requires 8-byte alignment then assume all constant pools
16933 require 4 bytes of padding. Trying to do this later on a per-pool
16934 basis is awkward because existing pool entries have to be modified. */
16935 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16936 minipool_pad = 4;
16938 if (dump_file)
16940 fprintf (dump_file,
16941 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16942 GET_MODE_NAME (mode),
16943 INSN_UID (insn), (unsigned long) address,
16944 -1 * (long)fix->backwards, (long)fix->forwards);
16945 arm_print_value (dump_file, fix->value);
16946 fprintf (dump_file, "\n");
16949 /* Add it to the chain of fixes. */
16950 fix->next = NULL;
16952 if (minipool_fix_head != NULL)
16953 minipool_fix_tail->next = fix;
16954 else
16955 minipool_fix_head = fix;
16957 minipool_fix_tail = fix;
16960 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16961 Returns the number of insns needed, or 99 if we always want to synthesize
16962 the value. */
16964 arm_max_const_double_inline_cost ()
16966 /* Let the value get synthesized to avoid the use of literal pools. */
16967 if (arm_disable_literal_pool)
16968 return 99;
16970 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16973 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16974 Returns the number of insns needed, or 99 if we don't know how to
16975 do it. */
16977 arm_const_double_inline_cost (rtx val)
16979 rtx lowpart, highpart;
16980 machine_mode mode;
16982 mode = GET_MODE (val);
16984 if (mode == VOIDmode)
16985 mode = DImode;
16987 gcc_assert (GET_MODE_SIZE (mode) == 8);
16989 lowpart = gen_lowpart (SImode, val);
16990 highpart = gen_highpart_mode (SImode, mode, val);
16992 gcc_assert (CONST_INT_P (lowpart));
16993 gcc_assert (CONST_INT_P (highpart));
16995 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16996 NULL_RTX, NULL_RTX, 0, 0)
16997 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16998 NULL_RTX, NULL_RTX, 0, 0));
17001 /* Cost of loading a SImode constant. */
17002 static inline int
17003 arm_const_inline_cost (enum rtx_code code, rtx val)
17005 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17006 NULL_RTX, NULL_RTX, 1, 0);
17009 /* Return true if it is worthwhile to split a 64-bit constant into two
17010 32-bit operations. This is the case if optimizing for size, or
17011 if we have load delay slots, or if one 32-bit part can be done with
17012 a single data operation. */
17013 bool
17014 arm_const_double_by_parts (rtx val)
17016 machine_mode mode = GET_MODE (val);
17017 rtx part;
17019 if (optimize_size || arm_ld_sched)
17020 return true;
17022 if (mode == VOIDmode)
17023 mode = DImode;
17025 part = gen_highpart_mode (SImode, mode, val);
17027 gcc_assert (CONST_INT_P (part));
17029 if (const_ok_for_arm (INTVAL (part))
17030 || const_ok_for_arm (~INTVAL (part)))
17031 return true;
17033 part = gen_lowpart (SImode, val);
17035 gcc_assert (CONST_INT_P (part));
17037 if (const_ok_for_arm (INTVAL (part))
17038 || const_ok_for_arm (~INTVAL (part)))
17039 return true;
17041 return false;
17044 /* Return true if it is possible to inline both the high and low parts
17045 of a 64-bit constant into 32-bit data processing instructions. */
17046 bool
17047 arm_const_double_by_immediates (rtx val)
17049 machine_mode mode = GET_MODE (val);
17050 rtx part;
17052 if (mode == VOIDmode)
17053 mode = DImode;
17055 part = gen_highpart_mode (SImode, mode, val);
17057 gcc_assert (CONST_INT_P (part));
17059 if (!const_ok_for_arm (INTVAL (part)))
17060 return false;
17062 part = gen_lowpart (SImode, val);
17064 gcc_assert (CONST_INT_P (part));
17066 if (!const_ok_for_arm (INTVAL (part)))
17067 return false;
17069 return true;
17072 /* Scan INSN and note any of its operands that need fixing.
17073 If DO_PUSHES is false we do not actually push any of the fixups
17074 needed. */
17075 static void
17076 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17078 int opno;
17080 extract_constrain_insn (insn);
17082 if (recog_data.n_alternatives == 0)
17083 return;
17085 /* Fill in recog_op_alt with information about the constraints of
17086 this insn. */
17087 preprocess_constraints (insn);
17089 const operand_alternative *op_alt = which_op_alt ();
17090 for (opno = 0; opno < recog_data.n_operands; opno++)
17092 /* Things we need to fix can only occur in inputs. */
17093 if (recog_data.operand_type[opno] != OP_IN)
17094 continue;
17096 /* If this alternative is a memory reference, then any mention
17097 of constants in this alternative is really to fool reload
17098 into allowing us to accept one there. We need to fix them up
17099 now so that we output the right code. */
17100 if (op_alt[opno].memory_ok)
17102 rtx op = recog_data.operand[opno];
17104 if (CONSTANT_P (op))
17106 if (do_pushes)
17107 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17108 recog_data.operand_mode[opno], op);
17110 else if (MEM_P (op)
17111 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17112 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17114 if (do_pushes)
17116 rtx cop = avoid_constant_pool_reference (op);
17118 /* Casting the address of something to a mode narrower
17119 than a word can cause avoid_constant_pool_reference()
17120 to return the pool reference itself. That's no good to
17121 us here. Lets just hope that we can use the
17122 constant pool value directly. */
17123 if (op == cop)
17124 cop = get_pool_constant (XEXP (op, 0));
17126 push_minipool_fix (insn, address,
17127 recog_data.operand_loc[opno],
17128 recog_data.operand_mode[opno], cop);
17135 return;
17138 /* Rewrite move insn into subtract of 0 if the condition codes will
17139 be useful in next conditional jump insn. */
17141 static void
17142 thumb1_reorg (void)
17144 basic_block bb;
17146 FOR_EACH_BB_FN (bb, cfun)
17148 rtx dest, src;
17149 rtx pat, op0, set = NULL;
17150 rtx_insn *prev, *insn = BB_END (bb);
17151 bool insn_clobbered = false;
17153 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17154 insn = PREV_INSN (insn);
17156 /* Find the last cbranchsi4_insn in basic block BB. */
17157 if (insn == BB_HEAD (bb)
17158 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17159 continue;
17161 /* Get the register with which we are comparing. */
17162 pat = PATTERN (insn);
17163 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17165 /* Find the first flag setting insn before INSN in basic block BB. */
17166 gcc_assert (insn != BB_HEAD (bb));
17167 for (prev = PREV_INSN (insn);
17168 (!insn_clobbered
17169 && prev != BB_HEAD (bb)
17170 && (NOTE_P (prev)
17171 || DEBUG_INSN_P (prev)
17172 || ((set = single_set (prev)) != NULL
17173 && get_attr_conds (prev) == CONDS_NOCOND)));
17174 prev = PREV_INSN (prev))
17176 if (reg_set_p (op0, prev))
17177 insn_clobbered = true;
17180 /* Skip if op0 is clobbered by insn other than prev. */
17181 if (insn_clobbered)
17182 continue;
17184 if (!set)
17185 continue;
17187 dest = SET_DEST (set);
17188 src = SET_SRC (set);
17189 if (!low_register_operand (dest, SImode)
17190 || !low_register_operand (src, SImode))
17191 continue;
17193 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17194 in INSN. Both src and dest of the move insn are checked. */
17195 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17197 dest = copy_rtx (dest);
17198 src = copy_rtx (src);
17199 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17200 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17201 INSN_CODE (prev) = -1;
17202 /* Set test register in INSN to dest. */
17203 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17204 INSN_CODE (insn) = -1;
17209 /* Convert instructions to their cc-clobbering variant if possible, since
17210 that allows us to use smaller encodings. */
17212 static void
17213 thumb2_reorg (void)
17215 basic_block bb;
17216 regset_head live;
17218 INIT_REG_SET (&live);
17220 /* We are freeing block_for_insn in the toplev to keep compatibility
17221 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17222 compute_bb_for_insn ();
17223 df_analyze ();
17225 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17227 FOR_EACH_BB_FN (bb, cfun)
17229 if (current_tune->disparage_flag_setting_t16_encodings
17230 && optimize_bb_for_speed_p (bb))
17231 continue;
17233 rtx_insn *insn;
17234 Convert_Action action = SKIP;
17235 Convert_Action action_for_partial_flag_setting
17236 = (current_tune->disparage_partial_flag_setting_t16_encodings
17237 && optimize_bb_for_speed_p (bb))
17238 ? SKIP : CONV;
17240 COPY_REG_SET (&live, DF_LR_OUT (bb));
17241 df_simulate_initialize_backwards (bb, &live);
17242 FOR_BB_INSNS_REVERSE (bb, insn)
17244 if (NONJUMP_INSN_P (insn)
17245 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17246 && GET_CODE (PATTERN (insn)) == SET)
17248 action = SKIP;
17249 rtx pat = PATTERN (insn);
17250 rtx dst = XEXP (pat, 0);
17251 rtx src = XEXP (pat, 1);
17252 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17254 if (!OBJECT_P (src))
17255 op0 = XEXP (src, 0);
17257 if (BINARY_P (src))
17258 op1 = XEXP (src, 1);
17260 if (low_register_operand (dst, SImode))
17262 switch (GET_CODE (src))
17264 case PLUS:
17265 /* Adding two registers and storing the result
17266 in the first source is already a 16-bit
17267 operation. */
17268 if (rtx_equal_p (dst, op0)
17269 && register_operand (op1, SImode))
17270 break;
17272 if (low_register_operand (op0, SImode))
17274 /* ADDS <Rd>,<Rn>,<Rm> */
17275 if (low_register_operand (op1, SImode))
17276 action = CONV;
17277 /* ADDS <Rdn>,#<imm8> */
17278 /* SUBS <Rdn>,#<imm8> */
17279 else if (rtx_equal_p (dst, op0)
17280 && CONST_INT_P (op1)
17281 && IN_RANGE (INTVAL (op1), -255, 255))
17282 action = CONV;
17283 /* ADDS <Rd>,<Rn>,#<imm3> */
17284 /* SUBS <Rd>,<Rn>,#<imm3> */
17285 else if (CONST_INT_P (op1)
17286 && IN_RANGE (INTVAL (op1), -7, 7))
17287 action = CONV;
17289 /* ADCS <Rd>, <Rn> */
17290 else if (GET_CODE (XEXP (src, 0)) == PLUS
17291 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17292 && low_register_operand (XEXP (XEXP (src, 0), 1),
17293 SImode)
17294 && COMPARISON_P (op1)
17295 && cc_register (XEXP (op1, 0), VOIDmode)
17296 && maybe_get_arm_condition_code (op1) == ARM_CS
17297 && XEXP (op1, 1) == const0_rtx)
17298 action = CONV;
17299 break;
17301 case MINUS:
17302 /* RSBS <Rd>,<Rn>,#0
17303 Not handled here: see NEG below. */
17304 /* SUBS <Rd>,<Rn>,#<imm3>
17305 SUBS <Rdn>,#<imm8>
17306 Not handled here: see PLUS above. */
17307 /* SUBS <Rd>,<Rn>,<Rm> */
17308 if (low_register_operand (op0, SImode)
17309 && low_register_operand (op1, SImode))
17310 action = CONV;
17311 break;
17313 case MULT:
17314 /* MULS <Rdm>,<Rn>,<Rdm>
17315 As an exception to the rule, this is only used
17316 when optimizing for size since MULS is slow on all
17317 known implementations. We do not even want to use
17318 MULS in cold code, if optimizing for speed, so we
17319 test the global flag here. */
17320 if (!optimize_size)
17321 break;
17322 /* else fall through. */
17323 case AND:
17324 case IOR:
17325 case XOR:
17326 /* ANDS <Rdn>,<Rm> */
17327 if (rtx_equal_p (dst, op0)
17328 && low_register_operand (op1, SImode))
17329 action = action_for_partial_flag_setting;
17330 else if (rtx_equal_p (dst, op1)
17331 && low_register_operand (op0, SImode))
17332 action = action_for_partial_flag_setting == SKIP
17333 ? SKIP : SWAP_CONV;
17334 break;
17336 case ASHIFTRT:
17337 case ASHIFT:
17338 case LSHIFTRT:
17339 /* ASRS <Rdn>,<Rm> */
17340 /* LSRS <Rdn>,<Rm> */
17341 /* LSLS <Rdn>,<Rm> */
17342 if (rtx_equal_p (dst, op0)
17343 && low_register_operand (op1, SImode))
17344 action = action_for_partial_flag_setting;
17345 /* ASRS <Rd>,<Rm>,#<imm5> */
17346 /* LSRS <Rd>,<Rm>,#<imm5> */
17347 /* LSLS <Rd>,<Rm>,#<imm5> */
17348 else if (low_register_operand (op0, SImode)
17349 && CONST_INT_P (op1)
17350 && IN_RANGE (INTVAL (op1), 0, 31))
17351 action = action_for_partial_flag_setting;
17352 break;
17354 case ROTATERT:
17355 /* RORS <Rdn>,<Rm> */
17356 if (rtx_equal_p (dst, op0)
17357 && low_register_operand (op1, SImode))
17358 action = action_for_partial_flag_setting;
17359 break;
17361 case NOT:
17362 /* MVNS <Rd>,<Rm> */
17363 if (low_register_operand (op0, SImode))
17364 action = action_for_partial_flag_setting;
17365 break;
17367 case NEG:
17368 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17369 if (low_register_operand (op0, SImode))
17370 action = CONV;
17371 break;
17373 case CONST_INT:
17374 /* MOVS <Rd>,#<imm8> */
17375 if (CONST_INT_P (src)
17376 && IN_RANGE (INTVAL (src), 0, 255))
17377 action = action_for_partial_flag_setting;
17378 break;
17380 case REG:
17381 /* MOVS and MOV<c> with registers have different
17382 encodings, so are not relevant here. */
17383 break;
17385 default:
17386 break;
17390 if (action != SKIP)
17392 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17393 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17394 rtvec vec;
17396 if (action == SWAP_CONV)
17398 src = copy_rtx (src);
17399 XEXP (src, 0) = op1;
17400 XEXP (src, 1) = op0;
17401 pat = gen_rtx_SET (VOIDmode, dst, src);
17402 vec = gen_rtvec (2, pat, clobber);
17404 else /* action == CONV */
17405 vec = gen_rtvec (2, pat, clobber);
17407 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17408 INSN_CODE (insn) = -1;
17412 if (NONDEBUG_INSN_P (insn))
17413 df_simulate_one_insn_backwards (bb, insn, &live);
17417 CLEAR_REG_SET (&live);
17420 /* Gcc puts the pool in the wrong place for ARM, since we can only
17421 load addresses a limited distance around the pc. We do some
17422 special munging to move the constant pool values to the correct
17423 point in the code. */
17424 static void
17425 arm_reorg (void)
17427 rtx_insn *insn;
17428 HOST_WIDE_INT address = 0;
17429 Mfix * fix;
17431 if (TARGET_THUMB1)
17432 thumb1_reorg ();
17433 else if (TARGET_THUMB2)
17434 thumb2_reorg ();
17436 /* Ensure all insns that must be split have been split at this point.
17437 Otherwise, the pool placement code below may compute incorrect
17438 insn lengths. Note that when optimizing, all insns have already
17439 been split at this point. */
17440 if (!optimize)
17441 split_all_insns_noflow ();
17443 minipool_fix_head = minipool_fix_tail = NULL;
17445 /* The first insn must always be a note, or the code below won't
17446 scan it properly. */
17447 insn = get_insns ();
17448 gcc_assert (NOTE_P (insn));
17449 minipool_pad = 0;
17451 /* Scan all the insns and record the operands that will need fixing. */
17452 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17454 if (BARRIER_P (insn))
17455 push_minipool_barrier (insn, address);
17456 else if (INSN_P (insn))
17458 rtx_jump_table_data *table;
17460 note_invalid_constants (insn, address, true);
17461 address += get_attr_length (insn);
17463 /* If the insn is a vector jump, add the size of the table
17464 and skip the table. */
17465 if (tablejump_p (insn, NULL, &table))
17467 address += get_jump_table_size (table);
17468 insn = table;
17471 else if (LABEL_P (insn))
17472 /* Add the worst-case padding due to alignment. We don't add
17473 the _current_ padding because the minipool insertions
17474 themselves might change it. */
17475 address += get_label_padding (insn);
17478 fix = minipool_fix_head;
17480 /* Now scan the fixups and perform the required changes. */
17481 while (fix)
17483 Mfix * ftmp;
17484 Mfix * fdel;
17485 Mfix * last_added_fix;
17486 Mfix * last_barrier = NULL;
17487 Mfix * this_fix;
17489 /* Skip any further barriers before the next fix. */
17490 while (fix && BARRIER_P (fix->insn))
17491 fix = fix->next;
17493 /* No more fixes. */
17494 if (fix == NULL)
17495 break;
17497 last_added_fix = NULL;
17499 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17501 if (BARRIER_P (ftmp->insn))
17503 if (ftmp->address >= minipool_vector_head->max_address)
17504 break;
17506 last_barrier = ftmp;
17508 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17509 break;
17511 last_added_fix = ftmp; /* Keep track of the last fix added. */
17514 /* If we found a barrier, drop back to that; any fixes that we
17515 could have reached but come after the barrier will now go in
17516 the next mini-pool. */
17517 if (last_barrier != NULL)
17519 /* Reduce the refcount for those fixes that won't go into this
17520 pool after all. */
17521 for (fdel = last_barrier->next;
17522 fdel && fdel != ftmp;
17523 fdel = fdel->next)
17525 fdel->minipool->refcount--;
17526 fdel->minipool = NULL;
17529 ftmp = last_barrier;
17531 else
17533 /* ftmp is first fix that we can't fit into this pool and
17534 there no natural barriers that we could use. Insert a
17535 new barrier in the code somewhere between the previous
17536 fix and this one, and arrange to jump around it. */
17537 HOST_WIDE_INT max_address;
17539 /* The last item on the list of fixes must be a barrier, so
17540 we can never run off the end of the list of fixes without
17541 last_barrier being set. */
17542 gcc_assert (ftmp);
17544 max_address = minipool_vector_head->max_address;
17545 /* Check that there isn't another fix that is in range that
17546 we couldn't fit into this pool because the pool was
17547 already too large: we need to put the pool before such an
17548 instruction. The pool itself may come just after the
17549 fix because create_fix_barrier also allows space for a
17550 jump instruction. */
17551 if (ftmp->address < max_address)
17552 max_address = ftmp->address + 1;
17554 last_barrier = create_fix_barrier (last_added_fix, max_address);
17557 assign_minipool_offsets (last_barrier);
17559 while (ftmp)
17561 if (!BARRIER_P (ftmp->insn)
17562 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17563 == NULL))
17564 break;
17566 ftmp = ftmp->next;
17569 /* Scan over the fixes we have identified for this pool, fixing them
17570 up and adding the constants to the pool itself. */
17571 for (this_fix = fix; this_fix && ftmp != this_fix;
17572 this_fix = this_fix->next)
17573 if (!BARRIER_P (this_fix->insn))
17575 rtx addr
17576 = plus_constant (Pmode,
17577 gen_rtx_LABEL_REF (VOIDmode,
17578 minipool_vector_label),
17579 this_fix->minipool->offset);
17580 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17583 dump_minipool (last_barrier->insn);
17584 fix = ftmp;
17587 /* From now on we must synthesize any constants that we can't handle
17588 directly. This can happen if the RTL gets split during final
17589 instruction generation. */
17590 cfun->machine->after_arm_reorg = 1;
17592 /* Free the minipool memory. */
17593 obstack_free (&minipool_obstack, minipool_startobj);
17596 /* Routines to output assembly language. */
17598 /* Return string representation of passed in real value. */
17599 static const char *
17600 fp_const_from_val (REAL_VALUE_TYPE *r)
17602 if (!fp_consts_inited)
17603 init_fp_table ();
17605 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17606 return "0";
17609 /* OPERANDS[0] is the entire list of insns that constitute pop,
17610 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17611 is in the list, UPDATE is true iff the list contains explicit
17612 update of base register. */
17613 void
17614 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17615 bool update)
17617 int i;
17618 char pattern[100];
17619 int offset;
17620 const char *conditional;
17621 int num_saves = XVECLEN (operands[0], 0);
17622 unsigned int regno;
17623 unsigned int regno_base = REGNO (operands[1]);
17625 offset = 0;
17626 offset += update ? 1 : 0;
17627 offset += return_pc ? 1 : 0;
17629 /* Is the base register in the list? */
17630 for (i = offset; i < num_saves; i++)
17632 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17633 /* If SP is in the list, then the base register must be SP. */
17634 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17635 /* If base register is in the list, there must be no explicit update. */
17636 if (regno == regno_base)
17637 gcc_assert (!update);
17640 conditional = reverse ? "%?%D0" : "%?%d0";
17641 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17643 /* Output pop (not stmfd) because it has a shorter encoding. */
17644 gcc_assert (update);
17645 sprintf (pattern, "pop%s\t{", conditional);
17647 else
17649 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17650 It's just a convention, their semantics are identical. */
17651 if (regno_base == SP_REGNUM)
17652 sprintf (pattern, "ldm%sfd\t", conditional);
17653 else if (TARGET_UNIFIED_ASM)
17654 sprintf (pattern, "ldmia%s\t", conditional);
17655 else
17656 sprintf (pattern, "ldm%sia\t", conditional);
17658 strcat (pattern, reg_names[regno_base]);
17659 if (update)
17660 strcat (pattern, "!, {");
17661 else
17662 strcat (pattern, ", {");
17665 /* Output the first destination register. */
17666 strcat (pattern,
17667 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17669 /* Output the rest of the destination registers. */
17670 for (i = offset + 1; i < num_saves; i++)
17672 strcat (pattern, ", ");
17673 strcat (pattern,
17674 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17677 strcat (pattern, "}");
17679 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17680 strcat (pattern, "^");
17682 output_asm_insn (pattern, &cond);
17686 /* Output the assembly for a store multiple. */
17688 const char *
17689 vfp_output_vstmd (rtx * operands)
17691 char pattern[100];
17692 int p;
17693 int base;
17694 int i;
17695 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17696 ? XEXP (operands[0], 0)
17697 : XEXP (XEXP (operands[0], 0), 0);
17698 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17700 if (push_p)
17701 strcpy (pattern, "vpush%?.64\t{%P1");
17702 else
17703 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17705 p = strlen (pattern);
17707 gcc_assert (REG_P (operands[1]));
17709 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17710 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17712 p += sprintf (&pattern[p], ", d%d", base + i);
17714 strcpy (&pattern[p], "}");
17716 output_asm_insn (pattern, operands);
17717 return "";
17721 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17722 number of bytes pushed. */
17724 static int
17725 vfp_emit_fstmd (int base_reg, int count)
17727 rtx par;
17728 rtx dwarf;
17729 rtx tmp, reg;
17730 int i;
17732 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17733 register pairs are stored by a store multiple insn. We avoid this
17734 by pushing an extra pair. */
17735 if (count == 2 && !arm_arch6)
17737 if (base_reg == LAST_VFP_REGNUM - 3)
17738 base_reg -= 2;
17739 count++;
17742 /* FSTMD may not store more than 16 doubleword registers at once. Split
17743 larger stores into multiple parts (up to a maximum of two, in
17744 practice). */
17745 if (count > 16)
17747 int saved;
17748 /* NOTE: base_reg is an internal register number, so each D register
17749 counts as 2. */
17750 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17751 saved += vfp_emit_fstmd (base_reg, 16);
17752 return saved;
17755 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17756 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17758 reg = gen_rtx_REG (DFmode, base_reg);
17759 base_reg += 2;
17761 XVECEXP (par, 0, 0)
17762 = gen_rtx_SET (VOIDmode,
17763 gen_frame_mem
17764 (BLKmode,
17765 gen_rtx_PRE_MODIFY (Pmode,
17766 stack_pointer_rtx,
17767 plus_constant
17768 (Pmode, stack_pointer_rtx,
17769 - (count * 8)))
17771 gen_rtx_UNSPEC (BLKmode,
17772 gen_rtvec (1, reg),
17773 UNSPEC_PUSH_MULT));
17775 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17776 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17777 RTX_FRAME_RELATED_P (tmp) = 1;
17778 XVECEXP (dwarf, 0, 0) = tmp;
17780 tmp = gen_rtx_SET (VOIDmode,
17781 gen_frame_mem (DFmode, stack_pointer_rtx),
17782 reg);
17783 RTX_FRAME_RELATED_P (tmp) = 1;
17784 XVECEXP (dwarf, 0, 1) = tmp;
17786 for (i = 1; i < count; i++)
17788 reg = gen_rtx_REG (DFmode, base_reg);
17789 base_reg += 2;
17790 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17792 tmp = gen_rtx_SET (VOIDmode,
17793 gen_frame_mem (DFmode,
17794 plus_constant (Pmode,
17795 stack_pointer_rtx,
17796 i * 8)),
17797 reg);
17798 RTX_FRAME_RELATED_P (tmp) = 1;
17799 XVECEXP (dwarf, 0, i + 1) = tmp;
17802 par = emit_insn (par);
17803 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17804 RTX_FRAME_RELATED_P (par) = 1;
17806 return count * 8;
17809 /* Emit a call instruction with pattern PAT. ADDR is the address of
17810 the call target. */
17812 void
17813 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17815 rtx insn;
17817 insn = emit_call_insn (pat);
17819 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17820 If the call might use such an entry, add a use of the PIC register
17821 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17822 if (TARGET_VXWORKS_RTP
17823 && flag_pic
17824 && !sibcall
17825 && GET_CODE (addr) == SYMBOL_REF
17826 && (SYMBOL_REF_DECL (addr)
17827 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17828 : !SYMBOL_REF_LOCAL_P (addr)))
17830 require_pic_register ();
17831 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17834 if (TARGET_AAPCS_BASED)
17836 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17837 linker. We need to add an IP clobber to allow setting
17838 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17839 is not needed since it's a fixed register. */
17840 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17841 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17845 /* Output a 'call' insn. */
17846 const char *
17847 output_call (rtx *operands)
17849 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17851 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17852 if (REGNO (operands[0]) == LR_REGNUM)
17854 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17855 output_asm_insn ("mov%?\t%0, %|lr", operands);
17858 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17860 if (TARGET_INTERWORK || arm_arch4t)
17861 output_asm_insn ("bx%?\t%0", operands);
17862 else
17863 output_asm_insn ("mov%?\t%|pc, %0", operands);
17865 return "";
17868 /* Output a 'call' insn that is a reference in memory. This is
17869 disabled for ARMv5 and we prefer a blx instead because otherwise
17870 there's a significant performance overhead. */
17871 const char *
17872 output_call_mem (rtx *operands)
17874 gcc_assert (!arm_arch5);
17875 if (TARGET_INTERWORK)
17877 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17878 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17879 output_asm_insn ("bx%?\t%|ip", operands);
17881 else if (regno_use_in (LR_REGNUM, operands[0]))
17883 /* LR is used in the memory address. We load the address in the
17884 first instruction. It's safe to use IP as the target of the
17885 load since the call will kill it anyway. */
17886 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17887 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17888 if (arm_arch4t)
17889 output_asm_insn ("bx%?\t%|ip", operands);
17890 else
17891 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17893 else
17895 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17896 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17899 return "";
17903 /* Output a move from arm registers to arm registers of a long double
17904 OPERANDS[0] is the destination.
17905 OPERANDS[1] is the source. */
17906 const char *
17907 output_mov_long_double_arm_from_arm (rtx *operands)
17909 /* We have to be careful here because the two might overlap. */
17910 int dest_start = REGNO (operands[0]);
17911 int src_start = REGNO (operands[1]);
17912 rtx ops[2];
17913 int i;
17915 if (dest_start < src_start)
17917 for (i = 0; i < 3; i++)
17919 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17920 ops[1] = gen_rtx_REG (SImode, src_start + i);
17921 output_asm_insn ("mov%?\t%0, %1", ops);
17924 else
17926 for (i = 2; i >= 0; i--)
17928 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17929 ops[1] = gen_rtx_REG (SImode, src_start + i);
17930 output_asm_insn ("mov%?\t%0, %1", ops);
17934 return "";
17937 void
17938 arm_emit_movpair (rtx dest, rtx src)
17940 /* If the src is an immediate, simplify it. */
17941 if (CONST_INT_P (src))
17943 HOST_WIDE_INT val = INTVAL (src);
17944 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17945 if ((val >> 16) & 0x0000ffff)
17946 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17947 GEN_INT (16)),
17948 GEN_INT ((val >> 16) & 0x0000ffff));
17949 return;
17951 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17952 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17955 /* Output a move between double words. It must be REG<-MEM
17956 or MEM<-REG. */
17957 const char *
17958 output_move_double (rtx *operands, bool emit, int *count)
17960 enum rtx_code code0 = GET_CODE (operands[0]);
17961 enum rtx_code code1 = GET_CODE (operands[1]);
17962 rtx otherops[3];
17963 if (count)
17964 *count = 1;
17966 /* The only case when this might happen is when
17967 you are looking at the length of a DImode instruction
17968 that has an invalid constant in it. */
17969 if (code0 == REG && code1 != MEM)
17971 gcc_assert (!emit);
17972 *count = 2;
17973 return "";
17976 if (code0 == REG)
17978 unsigned int reg0 = REGNO (operands[0]);
17980 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17982 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17984 switch (GET_CODE (XEXP (operands[1], 0)))
17986 case REG:
17988 if (emit)
17990 if (TARGET_LDRD
17991 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17992 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17993 else
17994 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17996 break;
17998 case PRE_INC:
17999 gcc_assert (TARGET_LDRD);
18000 if (emit)
18001 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18002 break;
18004 case PRE_DEC:
18005 if (emit)
18007 if (TARGET_LDRD)
18008 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18009 else
18010 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18012 break;
18014 case POST_INC:
18015 if (emit)
18017 if (TARGET_LDRD)
18018 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18019 else
18020 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18022 break;
18024 case POST_DEC:
18025 gcc_assert (TARGET_LDRD);
18026 if (emit)
18027 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18028 break;
18030 case PRE_MODIFY:
18031 case POST_MODIFY:
18032 /* Autoicrement addressing modes should never have overlapping
18033 base and destination registers, and overlapping index registers
18034 are already prohibited, so this doesn't need to worry about
18035 fix_cm3_ldrd. */
18036 otherops[0] = operands[0];
18037 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18038 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18040 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18042 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18044 /* Registers overlap so split out the increment. */
18045 if (emit)
18047 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18048 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18050 if (count)
18051 *count = 2;
18053 else
18055 /* Use a single insn if we can.
18056 FIXME: IWMMXT allows offsets larger than ldrd can
18057 handle, fix these up with a pair of ldr. */
18058 if (TARGET_THUMB2
18059 || !CONST_INT_P (otherops[2])
18060 || (INTVAL (otherops[2]) > -256
18061 && INTVAL (otherops[2]) < 256))
18063 if (emit)
18064 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18066 else
18068 if (emit)
18070 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18071 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18073 if (count)
18074 *count = 2;
18079 else
18081 /* Use a single insn if we can.
18082 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18083 fix these up with a pair of ldr. */
18084 if (TARGET_THUMB2
18085 || !CONST_INT_P (otherops[2])
18086 || (INTVAL (otherops[2]) > -256
18087 && INTVAL (otherops[2]) < 256))
18089 if (emit)
18090 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18092 else
18094 if (emit)
18096 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18097 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18099 if (count)
18100 *count = 2;
18103 break;
18105 case LABEL_REF:
18106 case CONST:
18107 /* We might be able to use ldrd %0, %1 here. However the range is
18108 different to ldr/adr, and it is broken on some ARMv7-M
18109 implementations. */
18110 /* Use the second register of the pair to avoid problematic
18111 overlap. */
18112 otherops[1] = operands[1];
18113 if (emit)
18114 output_asm_insn ("adr%?\t%0, %1", otherops);
18115 operands[1] = otherops[0];
18116 if (emit)
18118 if (TARGET_LDRD)
18119 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18120 else
18121 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18124 if (count)
18125 *count = 2;
18126 break;
18128 /* ??? This needs checking for thumb2. */
18129 default:
18130 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18131 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18133 otherops[0] = operands[0];
18134 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18135 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18137 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18139 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18141 switch ((int) INTVAL (otherops[2]))
18143 case -8:
18144 if (emit)
18145 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18146 return "";
18147 case -4:
18148 if (TARGET_THUMB2)
18149 break;
18150 if (emit)
18151 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18152 return "";
18153 case 4:
18154 if (TARGET_THUMB2)
18155 break;
18156 if (emit)
18157 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18158 return "";
18161 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18162 operands[1] = otherops[0];
18163 if (TARGET_LDRD
18164 && (REG_P (otherops[2])
18165 || TARGET_THUMB2
18166 || (CONST_INT_P (otherops[2])
18167 && INTVAL (otherops[2]) > -256
18168 && INTVAL (otherops[2]) < 256)))
18170 if (reg_overlap_mentioned_p (operands[0],
18171 otherops[2]))
18173 rtx tmp;
18174 /* Swap base and index registers over to
18175 avoid a conflict. */
18176 tmp = otherops[1];
18177 otherops[1] = otherops[2];
18178 otherops[2] = tmp;
18180 /* If both registers conflict, it will usually
18181 have been fixed by a splitter. */
18182 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18183 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18185 if (emit)
18187 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18188 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18190 if (count)
18191 *count = 2;
18193 else
18195 otherops[0] = operands[0];
18196 if (emit)
18197 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18199 return "";
18202 if (CONST_INT_P (otherops[2]))
18204 if (emit)
18206 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18207 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18208 else
18209 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18212 else
18214 if (emit)
18215 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18218 else
18220 if (emit)
18221 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18224 if (count)
18225 *count = 2;
18227 if (TARGET_LDRD)
18228 return "ldr%(d%)\t%0, [%1]";
18230 return "ldm%(ia%)\t%1, %M0";
18232 else
18234 otherops[1] = adjust_address (operands[1], SImode, 4);
18235 /* Take care of overlapping base/data reg. */
18236 if (reg_mentioned_p (operands[0], operands[1]))
18238 if (emit)
18240 output_asm_insn ("ldr%?\t%0, %1", otherops);
18241 output_asm_insn ("ldr%?\t%0, %1", operands);
18243 if (count)
18244 *count = 2;
18247 else
18249 if (emit)
18251 output_asm_insn ("ldr%?\t%0, %1", operands);
18252 output_asm_insn ("ldr%?\t%0, %1", otherops);
18254 if (count)
18255 *count = 2;
18260 else
18262 /* Constraints should ensure this. */
18263 gcc_assert (code0 == MEM && code1 == REG);
18264 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18265 || (TARGET_ARM && TARGET_LDRD));
18267 switch (GET_CODE (XEXP (operands[0], 0)))
18269 case REG:
18270 if (emit)
18272 if (TARGET_LDRD)
18273 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18274 else
18275 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18277 break;
18279 case PRE_INC:
18280 gcc_assert (TARGET_LDRD);
18281 if (emit)
18282 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18283 break;
18285 case PRE_DEC:
18286 if (emit)
18288 if (TARGET_LDRD)
18289 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18290 else
18291 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18293 break;
18295 case POST_INC:
18296 if (emit)
18298 if (TARGET_LDRD)
18299 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18300 else
18301 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18303 break;
18305 case POST_DEC:
18306 gcc_assert (TARGET_LDRD);
18307 if (emit)
18308 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18309 break;
18311 case PRE_MODIFY:
18312 case POST_MODIFY:
18313 otherops[0] = operands[1];
18314 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18315 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18317 /* IWMMXT allows offsets larger than ldrd can handle,
18318 fix these up with a pair of ldr. */
18319 if (!TARGET_THUMB2
18320 && CONST_INT_P (otherops[2])
18321 && (INTVAL(otherops[2]) <= -256
18322 || INTVAL(otherops[2]) >= 256))
18324 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18326 if (emit)
18328 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18329 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18331 if (count)
18332 *count = 2;
18334 else
18336 if (emit)
18338 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18339 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18341 if (count)
18342 *count = 2;
18345 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18347 if (emit)
18348 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18350 else
18352 if (emit)
18353 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18355 break;
18357 case PLUS:
18358 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18359 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18361 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18363 case -8:
18364 if (emit)
18365 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18366 return "";
18368 case -4:
18369 if (TARGET_THUMB2)
18370 break;
18371 if (emit)
18372 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18373 return "";
18375 case 4:
18376 if (TARGET_THUMB2)
18377 break;
18378 if (emit)
18379 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18380 return "";
18383 if (TARGET_LDRD
18384 && (REG_P (otherops[2])
18385 || TARGET_THUMB2
18386 || (CONST_INT_P (otherops[2])
18387 && INTVAL (otherops[2]) > -256
18388 && INTVAL (otherops[2]) < 256)))
18390 otherops[0] = operands[1];
18391 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18392 if (emit)
18393 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18394 return "";
18396 /* Fall through */
18398 default:
18399 otherops[0] = adjust_address (operands[0], SImode, 4);
18400 otherops[1] = operands[1];
18401 if (emit)
18403 output_asm_insn ("str%?\t%1, %0", operands);
18404 output_asm_insn ("str%?\t%H1, %0", otherops);
18406 if (count)
18407 *count = 2;
18411 return "";
18414 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18415 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18417 const char *
18418 output_move_quad (rtx *operands)
18420 if (REG_P (operands[0]))
18422 /* Load, or reg->reg move. */
18424 if (MEM_P (operands[1]))
18426 switch (GET_CODE (XEXP (operands[1], 0)))
18428 case REG:
18429 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18430 break;
18432 case LABEL_REF:
18433 case CONST:
18434 output_asm_insn ("adr%?\t%0, %1", operands);
18435 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18436 break;
18438 default:
18439 gcc_unreachable ();
18442 else
18444 rtx ops[2];
18445 int dest, src, i;
18447 gcc_assert (REG_P (operands[1]));
18449 dest = REGNO (operands[0]);
18450 src = REGNO (operands[1]);
18452 /* This seems pretty dumb, but hopefully GCC won't try to do it
18453 very often. */
18454 if (dest < src)
18455 for (i = 0; i < 4; i++)
18457 ops[0] = gen_rtx_REG (SImode, dest + i);
18458 ops[1] = gen_rtx_REG (SImode, src + i);
18459 output_asm_insn ("mov%?\t%0, %1", ops);
18461 else
18462 for (i = 3; i >= 0; i--)
18464 ops[0] = gen_rtx_REG (SImode, dest + i);
18465 ops[1] = gen_rtx_REG (SImode, src + i);
18466 output_asm_insn ("mov%?\t%0, %1", ops);
18470 else
18472 gcc_assert (MEM_P (operands[0]));
18473 gcc_assert (REG_P (operands[1]));
18474 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18476 switch (GET_CODE (XEXP (operands[0], 0)))
18478 case REG:
18479 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18480 break;
18482 default:
18483 gcc_unreachable ();
18487 return "";
18490 /* Output a VFP load or store instruction. */
18492 const char *
18493 output_move_vfp (rtx *operands)
18495 rtx reg, mem, addr, ops[2];
18496 int load = REG_P (operands[0]);
18497 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18498 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18499 const char *templ;
18500 char buff[50];
18501 machine_mode mode;
18503 reg = operands[!load];
18504 mem = operands[load];
18506 mode = GET_MODE (reg);
18508 gcc_assert (REG_P (reg));
18509 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18510 gcc_assert (mode == SFmode
18511 || mode == DFmode
18512 || mode == SImode
18513 || mode == DImode
18514 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18515 gcc_assert (MEM_P (mem));
18517 addr = XEXP (mem, 0);
18519 switch (GET_CODE (addr))
18521 case PRE_DEC:
18522 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18523 ops[0] = XEXP (addr, 0);
18524 ops[1] = reg;
18525 break;
18527 case POST_INC:
18528 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18529 ops[0] = XEXP (addr, 0);
18530 ops[1] = reg;
18531 break;
18533 default:
18534 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18535 ops[0] = reg;
18536 ops[1] = mem;
18537 break;
18540 sprintf (buff, templ,
18541 load ? "ld" : "st",
18542 dp ? "64" : "32",
18543 dp ? "P" : "",
18544 integer_p ? "\t%@ int" : "");
18545 output_asm_insn (buff, ops);
18547 return "";
18550 /* Output a Neon double-word or quad-word load or store, or a load
18551 or store for larger structure modes.
18553 WARNING: The ordering of elements is weird in big-endian mode,
18554 because the EABI requires that vectors stored in memory appear
18555 as though they were stored by a VSTM, as required by the EABI.
18556 GCC RTL defines element ordering based on in-memory order.
18557 This can be different from the architectural ordering of elements
18558 within a NEON register. The intrinsics defined in arm_neon.h use the
18559 NEON register element ordering, not the GCC RTL element ordering.
18561 For example, the in-memory ordering of a big-endian a quadword
18562 vector with 16-bit elements when stored from register pair {d0,d1}
18563 will be (lowest address first, d0[N] is NEON register element N):
18565 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18567 When necessary, quadword registers (dN, dN+1) are moved to ARM
18568 registers from rN in the order:
18570 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18572 So that STM/LDM can be used on vectors in ARM registers, and the
18573 same memory layout will result as if VSTM/VLDM were used.
18575 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18576 possible, which allows use of appropriate alignment tags.
18577 Note that the choice of "64" is independent of the actual vector
18578 element size; this size simply ensures that the behavior is
18579 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18581 Due to limitations of those instructions, use of VST1.64/VLD1.64
18582 is not possible if:
18583 - the address contains PRE_DEC, or
18584 - the mode refers to more than 4 double-word registers
18586 In those cases, it would be possible to replace VSTM/VLDM by a
18587 sequence of instructions; this is not currently implemented since
18588 this is not certain to actually improve performance. */
18590 const char *
18591 output_move_neon (rtx *operands)
18593 rtx reg, mem, addr, ops[2];
18594 int regno, nregs, load = REG_P (operands[0]);
18595 const char *templ;
18596 char buff[50];
18597 machine_mode mode;
18599 reg = operands[!load];
18600 mem = operands[load];
18602 mode = GET_MODE (reg);
18604 gcc_assert (REG_P (reg));
18605 regno = REGNO (reg);
18606 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18607 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18608 || NEON_REGNO_OK_FOR_QUAD (regno));
18609 gcc_assert (VALID_NEON_DREG_MODE (mode)
18610 || VALID_NEON_QREG_MODE (mode)
18611 || VALID_NEON_STRUCT_MODE (mode));
18612 gcc_assert (MEM_P (mem));
18614 addr = XEXP (mem, 0);
18616 /* Strip off const from addresses like (const (plus (...))). */
18617 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18618 addr = XEXP (addr, 0);
18620 switch (GET_CODE (addr))
18622 case POST_INC:
18623 /* We have to use vldm / vstm for too-large modes. */
18624 if (nregs > 4)
18626 templ = "v%smia%%?\t%%0!, %%h1";
18627 ops[0] = XEXP (addr, 0);
18629 else
18631 templ = "v%s1.64\t%%h1, %%A0";
18632 ops[0] = mem;
18634 ops[1] = reg;
18635 break;
18637 case PRE_DEC:
18638 /* We have to use vldm / vstm in this case, since there is no
18639 pre-decrement form of the vld1 / vst1 instructions. */
18640 templ = "v%smdb%%?\t%%0!, %%h1";
18641 ops[0] = XEXP (addr, 0);
18642 ops[1] = reg;
18643 break;
18645 case POST_MODIFY:
18646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18647 gcc_unreachable ();
18649 case REG:
18650 /* We have to use vldm / vstm for too-large modes. */
18651 if (nregs > 1)
18653 if (nregs > 4)
18654 templ = "v%smia%%?\t%%m0, %%h1";
18655 else
18656 templ = "v%s1.64\t%%h1, %%A0";
18658 ops[0] = mem;
18659 ops[1] = reg;
18660 break;
18662 /* Fall through. */
18663 case LABEL_REF:
18664 case PLUS:
18666 int i;
18667 int overlap = -1;
18668 for (i = 0; i < nregs; i++)
18670 /* We're only using DImode here because it's a convenient size. */
18671 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18672 ops[1] = adjust_address (mem, DImode, 8 * i);
18673 if (reg_overlap_mentioned_p (ops[0], mem))
18675 gcc_assert (overlap == -1);
18676 overlap = i;
18678 else
18680 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18681 output_asm_insn (buff, ops);
18684 if (overlap != -1)
18686 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18687 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18688 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18689 output_asm_insn (buff, ops);
18692 return "";
18695 default:
18696 gcc_unreachable ();
18699 sprintf (buff, templ, load ? "ld" : "st");
18700 output_asm_insn (buff, ops);
18702 return "";
18705 /* Compute and return the length of neon_mov<mode>, where <mode> is
18706 one of VSTRUCT modes: EI, OI, CI or XI. */
18708 arm_attr_length_move_neon (rtx_insn *insn)
18710 rtx reg, mem, addr;
18711 int load;
18712 machine_mode mode;
18714 extract_insn_cached (insn);
18716 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18718 mode = GET_MODE (recog_data.operand[0]);
18719 switch (mode)
18721 case EImode:
18722 case OImode:
18723 return 8;
18724 case CImode:
18725 return 12;
18726 case XImode:
18727 return 16;
18728 default:
18729 gcc_unreachable ();
18733 load = REG_P (recog_data.operand[0]);
18734 reg = recog_data.operand[!load];
18735 mem = recog_data.operand[load];
18737 gcc_assert (MEM_P (mem));
18739 mode = GET_MODE (reg);
18740 addr = XEXP (mem, 0);
18742 /* Strip off const from addresses like (const (plus (...))). */
18743 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18744 addr = XEXP (addr, 0);
18746 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18748 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18749 return insns * 4;
18751 else
18752 return 4;
18755 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18756 return zero. */
18759 arm_address_offset_is_imm (rtx_insn *insn)
18761 rtx mem, addr;
18763 extract_insn_cached (insn);
18765 if (REG_P (recog_data.operand[0]))
18766 return 0;
18768 mem = recog_data.operand[0];
18770 gcc_assert (MEM_P (mem));
18772 addr = XEXP (mem, 0);
18774 if (REG_P (addr)
18775 || (GET_CODE (addr) == PLUS
18776 && REG_P (XEXP (addr, 0))
18777 && CONST_INT_P (XEXP (addr, 1))))
18778 return 1;
18779 else
18780 return 0;
18783 /* Output an ADD r, s, #n where n may be too big for one instruction.
18784 If adding zero to one register, output nothing. */
18785 const char *
18786 output_add_immediate (rtx *operands)
18788 HOST_WIDE_INT n = INTVAL (operands[2]);
18790 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18792 if (n < 0)
18793 output_multi_immediate (operands,
18794 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18795 -n);
18796 else
18797 output_multi_immediate (operands,
18798 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18802 return "";
18805 /* Output a multiple immediate operation.
18806 OPERANDS is the vector of operands referred to in the output patterns.
18807 INSTR1 is the output pattern to use for the first constant.
18808 INSTR2 is the output pattern to use for subsequent constants.
18809 IMMED_OP is the index of the constant slot in OPERANDS.
18810 N is the constant value. */
18811 static const char *
18812 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18813 int immed_op, HOST_WIDE_INT n)
18815 #if HOST_BITS_PER_WIDE_INT > 32
18816 n &= 0xffffffff;
18817 #endif
18819 if (n == 0)
18821 /* Quick and easy output. */
18822 operands[immed_op] = const0_rtx;
18823 output_asm_insn (instr1, operands);
18825 else
18827 int i;
18828 const char * instr = instr1;
18830 /* Note that n is never zero here (which would give no output). */
18831 for (i = 0; i < 32; i += 2)
18833 if (n & (3 << i))
18835 operands[immed_op] = GEN_INT (n & (255 << i));
18836 output_asm_insn (instr, operands);
18837 instr = instr2;
18838 i += 6;
18843 return "";
18846 /* Return the name of a shifter operation. */
18847 static const char *
18848 arm_shift_nmem(enum rtx_code code)
18850 switch (code)
18852 case ASHIFT:
18853 return ARM_LSL_NAME;
18855 case ASHIFTRT:
18856 return "asr";
18858 case LSHIFTRT:
18859 return "lsr";
18861 case ROTATERT:
18862 return "ror";
18864 default:
18865 abort();
18869 /* Return the appropriate ARM instruction for the operation code.
18870 The returned result should not be overwritten. OP is the rtx of the
18871 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18872 was shifted. */
18873 const char *
18874 arithmetic_instr (rtx op, int shift_first_arg)
18876 switch (GET_CODE (op))
18878 case PLUS:
18879 return "add";
18881 case MINUS:
18882 return shift_first_arg ? "rsb" : "sub";
18884 case IOR:
18885 return "orr";
18887 case XOR:
18888 return "eor";
18890 case AND:
18891 return "and";
18893 case ASHIFT:
18894 case ASHIFTRT:
18895 case LSHIFTRT:
18896 case ROTATERT:
18897 return arm_shift_nmem(GET_CODE(op));
18899 default:
18900 gcc_unreachable ();
18904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18905 for the operation code. The returned result should not be overwritten.
18906 OP is the rtx code of the shift.
18907 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18908 shift. */
18909 static const char *
18910 shift_op (rtx op, HOST_WIDE_INT *amountp)
18912 const char * mnem;
18913 enum rtx_code code = GET_CODE (op);
18915 switch (code)
18917 case ROTATE:
18918 if (!CONST_INT_P (XEXP (op, 1)))
18920 output_operand_lossage ("invalid shift operand");
18921 return NULL;
18924 code = ROTATERT;
18925 *amountp = 32 - INTVAL (XEXP (op, 1));
18926 mnem = "ror";
18927 break;
18929 case ASHIFT:
18930 case ASHIFTRT:
18931 case LSHIFTRT:
18932 case ROTATERT:
18933 mnem = arm_shift_nmem(code);
18934 if (CONST_INT_P (XEXP (op, 1)))
18936 *amountp = INTVAL (XEXP (op, 1));
18938 else if (REG_P (XEXP (op, 1)))
18940 *amountp = -1;
18941 return mnem;
18943 else
18945 output_operand_lossage ("invalid shift operand");
18946 return NULL;
18948 break;
18950 case MULT:
18951 /* We never have to worry about the amount being other than a
18952 power of 2, since this case can never be reloaded from a reg. */
18953 if (!CONST_INT_P (XEXP (op, 1)))
18955 output_operand_lossage ("invalid shift operand");
18956 return NULL;
18959 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18961 /* Amount must be a power of two. */
18962 if (*amountp & (*amountp - 1))
18964 output_operand_lossage ("invalid shift operand");
18965 return NULL;
18968 *amountp = int_log2 (*amountp);
18969 return ARM_LSL_NAME;
18971 default:
18972 output_operand_lossage ("invalid shift operand");
18973 return NULL;
18976 /* This is not 100% correct, but follows from the desire to merge
18977 multiplication by a power of 2 with the recognizer for a
18978 shift. >=32 is not a valid shift for "lsl", so we must try and
18979 output a shift that produces the correct arithmetical result.
18980 Using lsr #32 is identical except for the fact that the carry bit
18981 is not set correctly if we set the flags; but we never use the
18982 carry bit from such an operation, so we can ignore that. */
18983 if (code == ROTATERT)
18984 /* Rotate is just modulo 32. */
18985 *amountp &= 31;
18986 else if (*amountp != (*amountp & 31))
18988 if (code == ASHIFT)
18989 mnem = "lsr";
18990 *amountp = 32;
18993 /* Shifts of 0 are no-ops. */
18994 if (*amountp == 0)
18995 return NULL;
18997 return mnem;
19000 /* Obtain the shift from the POWER of two. */
19002 static HOST_WIDE_INT
19003 int_log2 (HOST_WIDE_INT power)
19005 HOST_WIDE_INT shift = 0;
19007 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19009 gcc_assert (shift <= 31);
19010 shift++;
19013 return shift;
19016 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19017 because /bin/as is horribly restrictive. The judgement about
19018 whether or not each character is 'printable' (and can be output as
19019 is) or not (and must be printed with an octal escape) must be made
19020 with reference to the *host* character set -- the situation is
19021 similar to that discussed in the comments above pp_c_char in
19022 c-pretty-print.c. */
19024 #define MAX_ASCII_LEN 51
19026 void
19027 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19029 int i;
19030 int len_so_far = 0;
19032 fputs ("\t.ascii\t\"", stream);
19034 for (i = 0; i < len; i++)
19036 int c = p[i];
19038 if (len_so_far >= MAX_ASCII_LEN)
19040 fputs ("\"\n\t.ascii\t\"", stream);
19041 len_so_far = 0;
19044 if (ISPRINT (c))
19046 if (c == '\\' || c == '\"')
19048 putc ('\\', stream);
19049 len_so_far++;
19051 putc (c, stream);
19052 len_so_far++;
19054 else
19056 fprintf (stream, "\\%03o", c);
19057 len_so_far += 4;
19061 fputs ("\"\n", stream);
19064 /* Compute the register save mask for registers 0 through 12
19065 inclusive. This code is used by arm_compute_save_reg_mask. */
19067 static unsigned long
19068 arm_compute_save_reg0_reg12_mask (void)
19070 unsigned long func_type = arm_current_func_type ();
19071 unsigned long save_reg_mask = 0;
19072 unsigned int reg;
19074 if (IS_INTERRUPT (func_type))
19076 unsigned int max_reg;
19077 /* Interrupt functions must not corrupt any registers,
19078 even call clobbered ones. If this is a leaf function
19079 we can just examine the registers used by the RTL, but
19080 otherwise we have to assume that whatever function is
19081 called might clobber anything, and so we have to save
19082 all the call-clobbered registers as well. */
19083 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19084 /* FIQ handlers have registers r8 - r12 banked, so
19085 we only need to check r0 - r7, Normal ISRs only
19086 bank r14 and r15, so we must check up to r12.
19087 r13 is the stack pointer which is always preserved,
19088 so we do not need to consider it here. */
19089 max_reg = 7;
19090 else
19091 max_reg = 12;
19093 for (reg = 0; reg <= max_reg; reg++)
19094 if (df_regs_ever_live_p (reg)
19095 || (! crtl->is_leaf && call_used_regs[reg]))
19096 save_reg_mask |= (1 << reg);
19098 /* Also save the pic base register if necessary. */
19099 if (flag_pic
19100 && !TARGET_SINGLE_PIC_BASE
19101 && arm_pic_register != INVALID_REGNUM
19102 && crtl->uses_pic_offset_table)
19103 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19105 else if (IS_VOLATILE(func_type))
19107 /* For noreturn functions we historically omitted register saves
19108 altogether. However this really messes up debugging. As a
19109 compromise save just the frame pointers. Combined with the link
19110 register saved elsewhere this should be sufficient to get
19111 a backtrace. */
19112 if (frame_pointer_needed)
19113 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19114 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19115 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19116 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19117 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19119 else
19121 /* In the normal case we only need to save those registers
19122 which are call saved and which are used by this function. */
19123 for (reg = 0; reg <= 11; reg++)
19124 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19125 save_reg_mask |= (1 << reg);
19127 /* Handle the frame pointer as a special case. */
19128 if (frame_pointer_needed)
19129 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19131 /* If we aren't loading the PIC register,
19132 don't stack it even though it may be live. */
19133 if (flag_pic
19134 && !TARGET_SINGLE_PIC_BASE
19135 && arm_pic_register != INVALID_REGNUM
19136 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19137 || crtl->uses_pic_offset_table))
19138 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19140 /* The prologue will copy SP into R0, so save it. */
19141 if (IS_STACKALIGN (func_type))
19142 save_reg_mask |= 1;
19145 /* Save registers so the exception handler can modify them. */
19146 if (crtl->calls_eh_return)
19148 unsigned int i;
19150 for (i = 0; ; i++)
19152 reg = EH_RETURN_DATA_REGNO (i);
19153 if (reg == INVALID_REGNUM)
19154 break;
19155 save_reg_mask |= 1 << reg;
19159 return save_reg_mask;
19162 /* Return true if r3 is live at the start of the function. */
19164 static bool
19165 arm_r3_live_at_start_p (void)
19167 /* Just look at cfg info, which is still close enough to correct at this
19168 point. This gives false positives for broken functions that might use
19169 uninitialized data that happens to be allocated in r3, but who cares? */
19170 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19173 /* Compute the number of bytes used to store the static chain register on the
19174 stack, above the stack frame. We need to know this accurately to get the
19175 alignment of the rest of the stack frame correct. */
19177 static int
19178 arm_compute_static_chain_stack_bytes (void)
19180 /* See the defining assertion in arm_expand_prologue. */
19181 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19182 && IS_NESTED (arm_current_func_type ())
19183 && arm_r3_live_at_start_p ()
19184 && crtl->args.pretend_args_size == 0)
19185 return 4;
19187 return 0;
19190 /* Compute a bit mask of which registers need to be
19191 saved on the stack for the current function.
19192 This is used by arm_get_frame_offsets, which may add extra registers. */
19194 static unsigned long
19195 arm_compute_save_reg_mask (void)
19197 unsigned int save_reg_mask = 0;
19198 unsigned long func_type = arm_current_func_type ();
19199 unsigned int reg;
19201 if (IS_NAKED (func_type))
19202 /* This should never really happen. */
19203 return 0;
19205 /* If we are creating a stack frame, then we must save the frame pointer,
19206 IP (which will hold the old stack pointer), LR and the PC. */
19207 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19208 save_reg_mask |=
19209 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19210 | (1 << IP_REGNUM)
19211 | (1 << LR_REGNUM)
19212 | (1 << PC_REGNUM);
19214 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19216 /* Decide if we need to save the link register.
19217 Interrupt routines have their own banked link register,
19218 so they never need to save it.
19219 Otherwise if we do not use the link register we do not need to save
19220 it. If we are pushing other registers onto the stack however, we
19221 can save an instruction in the epilogue by pushing the link register
19222 now and then popping it back into the PC. This incurs extra memory
19223 accesses though, so we only do it when optimizing for size, and only
19224 if we know that we will not need a fancy return sequence. */
19225 if (df_regs_ever_live_p (LR_REGNUM)
19226 || (save_reg_mask
19227 && optimize_size
19228 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19229 && !crtl->calls_eh_return))
19230 save_reg_mask |= 1 << LR_REGNUM;
19232 if (cfun->machine->lr_save_eliminated)
19233 save_reg_mask &= ~ (1 << LR_REGNUM);
19235 if (TARGET_REALLY_IWMMXT
19236 && ((bit_count (save_reg_mask)
19237 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19238 arm_compute_static_chain_stack_bytes())
19239 ) % 2) != 0)
19241 /* The total number of registers that are going to be pushed
19242 onto the stack is odd. We need to ensure that the stack
19243 is 64-bit aligned before we start to save iWMMXt registers,
19244 and also before we start to create locals. (A local variable
19245 might be a double or long long which we will load/store using
19246 an iWMMXt instruction). Therefore we need to push another
19247 ARM register, so that the stack will be 64-bit aligned. We
19248 try to avoid using the arg registers (r0 -r3) as they might be
19249 used to pass values in a tail call. */
19250 for (reg = 4; reg <= 12; reg++)
19251 if ((save_reg_mask & (1 << reg)) == 0)
19252 break;
19254 if (reg <= 12)
19255 save_reg_mask |= (1 << reg);
19256 else
19258 cfun->machine->sibcall_blocked = 1;
19259 save_reg_mask |= (1 << 3);
19263 /* We may need to push an additional register for use initializing the
19264 PIC base register. */
19265 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19266 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19268 reg = thumb_find_work_register (1 << 4);
19269 if (!call_used_regs[reg])
19270 save_reg_mask |= (1 << reg);
19273 return save_reg_mask;
19277 /* Compute a bit mask of which registers need to be
19278 saved on the stack for the current function. */
19279 static unsigned long
19280 thumb1_compute_save_reg_mask (void)
19282 unsigned long mask;
19283 unsigned reg;
19285 mask = 0;
19286 for (reg = 0; reg < 12; reg ++)
19287 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19288 mask |= 1 << reg;
19290 if (flag_pic
19291 && !TARGET_SINGLE_PIC_BASE
19292 && arm_pic_register != INVALID_REGNUM
19293 && crtl->uses_pic_offset_table)
19294 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19296 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19297 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19298 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19300 /* LR will also be pushed if any lo regs are pushed. */
19301 if (mask & 0xff || thumb_force_lr_save ())
19302 mask |= (1 << LR_REGNUM);
19304 /* Make sure we have a low work register if we need one.
19305 We will need one if we are going to push a high register,
19306 but we are not currently intending to push a low register. */
19307 if ((mask & 0xff) == 0
19308 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19310 /* Use thumb_find_work_register to choose which register
19311 we will use. If the register is live then we will
19312 have to push it. Use LAST_LO_REGNUM as our fallback
19313 choice for the register to select. */
19314 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19315 /* Make sure the register returned by thumb_find_work_register is
19316 not part of the return value. */
19317 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19318 reg = LAST_LO_REGNUM;
19320 if (! call_used_regs[reg])
19321 mask |= 1 << reg;
19324 /* The 504 below is 8 bytes less than 512 because there are two possible
19325 alignment words. We can't tell here if they will be present or not so we
19326 have to play it safe and assume that they are. */
19327 if ((CALLER_INTERWORKING_SLOT_SIZE +
19328 ROUND_UP_WORD (get_frame_size ()) +
19329 crtl->outgoing_args_size) >= 504)
19331 /* This is the same as the code in thumb1_expand_prologue() which
19332 determines which register to use for stack decrement. */
19333 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19334 if (mask & (1 << reg))
19335 break;
19337 if (reg > LAST_LO_REGNUM)
19339 /* Make sure we have a register available for stack decrement. */
19340 mask |= 1 << LAST_LO_REGNUM;
19344 return mask;
19348 /* Return the number of bytes required to save VFP registers. */
19349 static int
19350 arm_get_vfp_saved_size (void)
19352 unsigned int regno;
19353 int count;
19354 int saved;
19356 saved = 0;
19357 /* Space for saved VFP registers. */
19358 if (TARGET_HARD_FLOAT && TARGET_VFP)
19360 count = 0;
19361 for (regno = FIRST_VFP_REGNUM;
19362 regno < LAST_VFP_REGNUM;
19363 regno += 2)
19365 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19366 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19368 if (count > 0)
19370 /* Workaround ARM10 VFPr1 bug. */
19371 if (count == 2 && !arm_arch6)
19372 count++;
19373 saved += count * 8;
19375 count = 0;
19377 else
19378 count++;
19380 if (count > 0)
19382 if (count == 2 && !arm_arch6)
19383 count++;
19384 saved += count * 8;
19387 return saved;
19391 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19392 everything bar the final return instruction. If simple_return is true,
19393 then do not output epilogue, because it has already been emitted in RTL. */
19394 const char *
19395 output_return_instruction (rtx operand, bool really_return, bool reverse,
19396 bool simple_return)
19398 char conditional[10];
19399 char instr[100];
19400 unsigned reg;
19401 unsigned long live_regs_mask;
19402 unsigned long func_type;
19403 arm_stack_offsets *offsets;
19405 func_type = arm_current_func_type ();
19407 if (IS_NAKED (func_type))
19408 return "";
19410 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19412 /* If this function was declared non-returning, and we have
19413 found a tail call, then we have to trust that the called
19414 function won't return. */
19415 if (really_return)
19417 rtx ops[2];
19419 /* Otherwise, trap an attempted return by aborting. */
19420 ops[0] = operand;
19421 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19422 : "abort");
19423 assemble_external_libcall (ops[1]);
19424 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19427 return "";
19430 gcc_assert (!cfun->calls_alloca || really_return);
19432 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19434 cfun->machine->return_used_this_function = 1;
19436 offsets = arm_get_frame_offsets ();
19437 live_regs_mask = offsets->saved_regs_mask;
19439 if (!simple_return && live_regs_mask)
19441 const char * return_reg;
19443 /* If we do not have any special requirements for function exit
19444 (e.g. interworking) then we can load the return address
19445 directly into the PC. Otherwise we must load it into LR. */
19446 if (really_return
19447 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19448 return_reg = reg_names[PC_REGNUM];
19449 else
19450 return_reg = reg_names[LR_REGNUM];
19452 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19454 /* There are three possible reasons for the IP register
19455 being saved. 1) a stack frame was created, in which case
19456 IP contains the old stack pointer, or 2) an ISR routine
19457 corrupted it, or 3) it was saved to align the stack on
19458 iWMMXt. In case 1, restore IP into SP, otherwise just
19459 restore IP. */
19460 if (frame_pointer_needed)
19462 live_regs_mask &= ~ (1 << IP_REGNUM);
19463 live_regs_mask |= (1 << SP_REGNUM);
19465 else
19466 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19469 /* On some ARM architectures it is faster to use LDR rather than
19470 LDM to load a single register. On other architectures, the
19471 cost is the same. In 26 bit mode, or for exception handlers,
19472 we have to use LDM to load the PC so that the CPSR is also
19473 restored. */
19474 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19475 if (live_regs_mask == (1U << reg))
19476 break;
19478 if (reg <= LAST_ARM_REGNUM
19479 && (reg != LR_REGNUM
19480 || ! really_return
19481 || ! IS_INTERRUPT (func_type)))
19483 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19484 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19486 else
19488 char *p;
19489 int first = 1;
19491 /* Generate the load multiple instruction to restore the
19492 registers. Note we can get here, even if
19493 frame_pointer_needed is true, but only if sp already
19494 points to the base of the saved core registers. */
19495 if (live_regs_mask & (1 << SP_REGNUM))
19497 unsigned HOST_WIDE_INT stack_adjust;
19499 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19500 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19502 if (stack_adjust && arm_arch5 && TARGET_ARM)
19503 if (TARGET_UNIFIED_ASM)
19504 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19505 else
19506 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19507 else
19509 /* If we can't use ldmib (SA110 bug),
19510 then try to pop r3 instead. */
19511 if (stack_adjust)
19512 live_regs_mask |= 1 << 3;
19514 if (TARGET_UNIFIED_ASM)
19515 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19516 else
19517 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19520 else
19521 if (TARGET_UNIFIED_ASM)
19522 sprintf (instr, "pop%s\t{", conditional);
19523 else
19524 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19526 p = instr + strlen (instr);
19528 for (reg = 0; reg <= SP_REGNUM; reg++)
19529 if (live_regs_mask & (1 << reg))
19531 int l = strlen (reg_names[reg]);
19533 if (first)
19534 first = 0;
19535 else
19537 memcpy (p, ", ", 2);
19538 p += 2;
19541 memcpy (p, "%|", 2);
19542 memcpy (p + 2, reg_names[reg], l);
19543 p += l + 2;
19546 if (live_regs_mask & (1 << LR_REGNUM))
19548 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19549 /* If returning from an interrupt, restore the CPSR. */
19550 if (IS_INTERRUPT (func_type))
19551 strcat (p, "^");
19553 else
19554 strcpy (p, "}");
19557 output_asm_insn (instr, & operand);
19559 /* See if we need to generate an extra instruction to
19560 perform the actual function return. */
19561 if (really_return
19562 && func_type != ARM_FT_INTERWORKED
19563 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19565 /* The return has already been handled
19566 by loading the LR into the PC. */
19567 return "";
19571 if (really_return)
19573 switch ((int) ARM_FUNC_TYPE (func_type))
19575 case ARM_FT_ISR:
19576 case ARM_FT_FIQ:
19577 /* ??? This is wrong for unified assembly syntax. */
19578 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19579 break;
19581 case ARM_FT_INTERWORKED:
19582 sprintf (instr, "bx%s\t%%|lr", conditional);
19583 break;
19585 case ARM_FT_EXCEPTION:
19586 /* ??? This is wrong for unified assembly syntax. */
19587 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19588 break;
19590 default:
19591 /* Use bx if it's available. */
19592 if (arm_arch5 || arm_arch4t)
19593 sprintf (instr, "bx%s\t%%|lr", conditional);
19594 else
19595 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19596 break;
19599 output_asm_insn (instr, & operand);
19602 return "";
19605 /* Write the function name into the code section, directly preceding
19606 the function prologue.
19608 Code will be output similar to this:
19610 .ascii "arm_poke_function_name", 0
19611 .align
19613 .word 0xff000000 + (t1 - t0)
19614 arm_poke_function_name
19615 mov ip, sp
19616 stmfd sp!, {fp, ip, lr, pc}
19617 sub fp, ip, #4
19619 When performing a stack backtrace, code can inspect the value
19620 of 'pc' stored at 'fp' + 0. If the trace function then looks
19621 at location pc - 12 and the top 8 bits are set, then we know
19622 that there is a function name embedded immediately preceding this
19623 location and has length ((pc[-3]) & 0xff000000).
19625 We assume that pc is declared as a pointer to an unsigned long.
19627 It is of no benefit to output the function name if we are assembling
19628 a leaf function. These function types will not contain a stack
19629 backtrace structure, therefore it is not possible to determine the
19630 function name. */
19631 void
19632 arm_poke_function_name (FILE *stream, const char *name)
19634 unsigned long alignlength;
19635 unsigned long length;
19636 rtx x;
19638 length = strlen (name) + 1;
19639 alignlength = ROUND_UP_WORD (length);
19641 ASM_OUTPUT_ASCII (stream, name, length);
19642 ASM_OUTPUT_ALIGN (stream, 2);
19643 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19644 assemble_aligned_integer (UNITS_PER_WORD, x);
19647 /* Place some comments into the assembler stream
19648 describing the current function. */
19649 static void
19650 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19652 unsigned long func_type;
19654 /* ??? Do we want to print some of the below anyway? */
19655 if (TARGET_THUMB1)
19656 return;
19658 /* Sanity check. */
19659 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19661 func_type = arm_current_func_type ();
19663 switch ((int) ARM_FUNC_TYPE (func_type))
19665 default:
19666 case ARM_FT_NORMAL:
19667 break;
19668 case ARM_FT_INTERWORKED:
19669 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19670 break;
19671 case ARM_FT_ISR:
19672 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19673 break;
19674 case ARM_FT_FIQ:
19675 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19676 break;
19677 case ARM_FT_EXCEPTION:
19678 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19679 break;
19682 if (IS_NAKED (func_type))
19683 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19685 if (IS_VOLATILE (func_type))
19686 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19688 if (IS_NESTED (func_type))
19689 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19690 if (IS_STACKALIGN (func_type))
19691 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19693 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19694 crtl->args.size,
19695 crtl->args.pretend_args_size, frame_size);
19697 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19698 frame_pointer_needed,
19699 cfun->machine->uses_anonymous_args);
19701 if (cfun->machine->lr_save_eliminated)
19702 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19704 if (crtl->calls_eh_return)
19705 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19709 static void
19710 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19711 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19713 arm_stack_offsets *offsets;
19715 if (TARGET_THUMB1)
19717 int regno;
19719 /* Emit any call-via-reg trampolines that are needed for v4t support
19720 of call_reg and call_value_reg type insns. */
19721 for (regno = 0; regno < LR_REGNUM; regno++)
19723 rtx label = cfun->machine->call_via[regno];
19725 if (label != NULL)
19727 switch_to_section (function_section (current_function_decl));
19728 targetm.asm_out.internal_label (asm_out_file, "L",
19729 CODE_LABEL_NUMBER (label));
19730 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19734 /* ??? Probably not safe to set this here, since it assumes that a
19735 function will be emitted as assembly immediately after we generate
19736 RTL for it. This does not happen for inline functions. */
19737 cfun->machine->return_used_this_function = 0;
19739 else /* TARGET_32BIT */
19741 /* We need to take into account any stack-frame rounding. */
19742 offsets = arm_get_frame_offsets ();
19744 gcc_assert (!use_return_insn (FALSE, NULL)
19745 || (cfun->machine->return_used_this_function != 0)
19746 || offsets->saved_regs == offsets->outgoing_args
19747 || frame_pointer_needed);
19751 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19752 STR and STRD. If an even number of registers are being pushed, one
19753 or more STRD patterns are created for each register pair. If an
19754 odd number of registers are pushed, emit an initial STR followed by
19755 as many STRD instructions as are needed. This works best when the
19756 stack is initially 64-bit aligned (the normal case), since it
19757 ensures that each STRD is also 64-bit aligned. */
19758 static void
19759 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19761 int num_regs = 0;
19762 int i;
19763 int regno;
19764 rtx par = NULL_RTX;
19765 rtx dwarf = NULL_RTX;
19766 rtx tmp;
19767 bool first = true;
19769 num_regs = bit_count (saved_regs_mask);
19771 /* Must be at least one register to save, and can't save SP or PC. */
19772 gcc_assert (num_regs > 0 && num_regs <= 14);
19773 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19774 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19776 /* Create sequence for DWARF info. All the frame-related data for
19777 debugging is held in this wrapper. */
19778 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19780 /* Describe the stack adjustment. */
19781 tmp = gen_rtx_SET (VOIDmode,
19782 stack_pointer_rtx,
19783 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19784 RTX_FRAME_RELATED_P (tmp) = 1;
19785 XVECEXP (dwarf, 0, 0) = tmp;
19787 /* Find the first register. */
19788 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19791 i = 0;
19793 /* If there's an odd number of registers to push. Start off by
19794 pushing a single register. This ensures that subsequent strd
19795 operations are dword aligned (assuming that SP was originally
19796 64-bit aligned). */
19797 if ((num_regs & 1) != 0)
19799 rtx reg, mem, insn;
19801 reg = gen_rtx_REG (SImode, regno);
19802 if (num_regs == 1)
19803 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19804 stack_pointer_rtx));
19805 else
19806 mem = gen_frame_mem (Pmode,
19807 gen_rtx_PRE_MODIFY
19808 (Pmode, stack_pointer_rtx,
19809 plus_constant (Pmode, stack_pointer_rtx,
19810 -4 * num_regs)));
19812 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19813 RTX_FRAME_RELATED_P (tmp) = 1;
19814 insn = emit_insn (tmp);
19815 RTX_FRAME_RELATED_P (insn) = 1;
19816 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19817 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19818 reg);
19819 RTX_FRAME_RELATED_P (tmp) = 1;
19820 i++;
19821 regno++;
19822 XVECEXP (dwarf, 0, i) = tmp;
19823 first = false;
19826 while (i < num_regs)
19827 if (saved_regs_mask & (1 << regno))
19829 rtx reg1, reg2, mem1, mem2;
19830 rtx tmp0, tmp1, tmp2;
19831 int regno2;
19833 /* Find the register to pair with this one. */
19834 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19835 regno2++)
19838 reg1 = gen_rtx_REG (SImode, regno);
19839 reg2 = gen_rtx_REG (SImode, regno2);
19841 if (first)
19843 rtx insn;
19845 first = false;
19846 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19847 stack_pointer_rtx,
19848 -4 * num_regs));
19849 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19850 stack_pointer_rtx,
19851 -4 * (num_regs - 1)));
19852 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19853 plus_constant (Pmode, stack_pointer_rtx,
19854 -4 * (num_regs)));
19855 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19856 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19857 RTX_FRAME_RELATED_P (tmp0) = 1;
19858 RTX_FRAME_RELATED_P (tmp1) = 1;
19859 RTX_FRAME_RELATED_P (tmp2) = 1;
19860 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19861 XVECEXP (par, 0, 0) = tmp0;
19862 XVECEXP (par, 0, 1) = tmp1;
19863 XVECEXP (par, 0, 2) = tmp2;
19864 insn = emit_insn (par);
19865 RTX_FRAME_RELATED_P (insn) = 1;
19866 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19868 else
19870 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19871 stack_pointer_rtx,
19872 4 * i));
19873 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19874 stack_pointer_rtx,
19875 4 * (i + 1)));
19876 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19877 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19878 RTX_FRAME_RELATED_P (tmp1) = 1;
19879 RTX_FRAME_RELATED_P (tmp2) = 1;
19880 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19881 XVECEXP (par, 0, 0) = tmp1;
19882 XVECEXP (par, 0, 1) = tmp2;
19883 emit_insn (par);
19886 /* Create unwind information. This is an approximation. */
19887 tmp1 = gen_rtx_SET (VOIDmode,
19888 gen_frame_mem (Pmode,
19889 plus_constant (Pmode,
19890 stack_pointer_rtx,
19891 4 * i)),
19892 reg1);
19893 tmp2 = gen_rtx_SET (VOIDmode,
19894 gen_frame_mem (Pmode,
19895 plus_constant (Pmode,
19896 stack_pointer_rtx,
19897 4 * (i + 1))),
19898 reg2);
19900 RTX_FRAME_RELATED_P (tmp1) = 1;
19901 RTX_FRAME_RELATED_P (tmp2) = 1;
19902 XVECEXP (dwarf, 0, i + 1) = tmp1;
19903 XVECEXP (dwarf, 0, i + 2) = tmp2;
19904 i += 2;
19905 regno = regno2 + 1;
19907 else
19908 regno++;
19910 return;
19913 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19914 whenever possible, otherwise it emits single-word stores. The first store
19915 also allocates stack space for all saved registers, using writeback with
19916 post-addressing mode. All other stores use offset addressing. If no STRD
19917 can be emitted, this function emits a sequence of single-word stores,
19918 and not an STM as before, because single-word stores provide more freedom
19919 scheduling and can be turned into an STM by peephole optimizations. */
19920 static void
19921 arm_emit_strd_push (unsigned long saved_regs_mask)
19923 int num_regs = 0;
19924 int i, j, dwarf_index = 0;
19925 int offset = 0;
19926 rtx dwarf = NULL_RTX;
19927 rtx insn = NULL_RTX;
19928 rtx tmp, mem;
19930 /* TODO: A more efficient code can be emitted by changing the
19931 layout, e.g., first push all pairs that can use STRD to keep the
19932 stack aligned, and then push all other registers. */
19933 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19934 if (saved_regs_mask & (1 << i))
19935 num_regs++;
19937 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19938 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19939 gcc_assert (num_regs > 0);
19941 /* Create sequence for DWARF info. */
19942 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19944 /* For dwarf info, we generate explicit stack update. */
19945 tmp = gen_rtx_SET (VOIDmode,
19946 stack_pointer_rtx,
19947 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19948 RTX_FRAME_RELATED_P (tmp) = 1;
19949 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19951 /* Save registers. */
19952 offset = - 4 * num_regs;
19953 j = 0;
19954 while (j <= LAST_ARM_REGNUM)
19955 if (saved_regs_mask & (1 << j))
19957 if ((j % 2 == 0)
19958 && (saved_regs_mask & (1 << (j + 1))))
19960 /* Current register and previous register form register pair for
19961 which STRD can be generated. */
19962 if (offset < 0)
19964 /* Allocate stack space for all saved registers. */
19965 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19966 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19967 mem = gen_frame_mem (DImode, tmp);
19968 offset = 0;
19970 else if (offset > 0)
19971 mem = gen_frame_mem (DImode,
19972 plus_constant (Pmode,
19973 stack_pointer_rtx,
19974 offset));
19975 else
19976 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19978 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19979 RTX_FRAME_RELATED_P (tmp) = 1;
19980 tmp = emit_insn (tmp);
19982 /* Record the first store insn. */
19983 if (dwarf_index == 1)
19984 insn = tmp;
19986 /* Generate dwarf info. */
19987 mem = gen_frame_mem (SImode,
19988 plus_constant (Pmode,
19989 stack_pointer_rtx,
19990 offset));
19991 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19992 RTX_FRAME_RELATED_P (tmp) = 1;
19993 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19995 mem = gen_frame_mem (SImode,
19996 plus_constant (Pmode,
19997 stack_pointer_rtx,
19998 offset + 4));
19999 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
20000 RTX_FRAME_RELATED_P (tmp) = 1;
20001 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20003 offset += 8;
20004 j += 2;
20006 else
20008 /* Emit a single word store. */
20009 if (offset < 0)
20011 /* Allocate stack space for all saved registers. */
20012 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20013 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20014 mem = gen_frame_mem (SImode, tmp);
20015 offset = 0;
20017 else if (offset > 0)
20018 mem = gen_frame_mem (SImode,
20019 plus_constant (Pmode,
20020 stack_pointer_rtx,
20021 offset));
20022 else
20023 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20025 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20026 RTX_FRAME_RELATED_P (tmp) = 1;
20027 tmp = emit_insn (tmp);
20029 /* Record the first store insn. */
20030 if (dwarf_index == 1)
20031 insn = tmp;
20033 /* Generate dwarf info. */
20034 mem = gen_frame_mem (SImode,
20035 plus_constant(Pmode,
20036 stack_pointer_rtx,
20037 offset));
20038 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20039 RTX_FRAME_RELATED_P (tmp) = 1;
20040 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20042 offset += 4;
20043 j += 1;
20046 else
20047 j++;
20049 /* Attach dwarf info to the first insn we generate. */
20050 gcc_assert (insn != NULL_RTX);
20051 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20052 RTX_FRAME_RELATED_P (insn) = 1;
20055 /* Generate and emit an insn that we will recognize as a push_multi.
20056 Unfortunately, since this insn does not reflect very well the actual
20057 semantics of the operation, we need to annotate the insn for the benefit
20058 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20059 MASK for registers that should be annotated for DWARF2 frame unwind
20060 information. */
20061 static rtx
20062 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20064 int num_regs = 0;
20065 int num_dwarf_regs = 0;
20066 int i, j;
20067 rtx par;
20068 rtx dwarf;
20069 int dwarf_par_index;
20070 rtx tmp, reg;
20072 /* We don't record the PC in the dwarf frame information. */
20073 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20075 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20077 if (mask & (1 << i))
20078 num_regs++;
20079 if (dwarf_regs_mask & (1 << i))
20080 num_dwarf_regs++;
20083 gcc_assert (num_regs && num_regs <= 16);
20084 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20086 /* For the body of the insn we are going to generate an UNSPEC in
20087 parallel with several USEs. This allows the insn to be recognized
20088 by the push_multi pattern in the arm.md file.
20090 The body of the insn looks something like this:
20092 (parallel [
20093 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20094 (const_int:SI <num>)))
20095 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20096 (use (reg:SI XX))
20097 (use (reg:SI YY))
20101 For the frame note however, we try to be more explicit and actually
20102 show each register being stored into the stack frame, plus a (single)
20103 decrement of the stack pointer. We do it this way in order to be
20104 friendly to the stack unwinding code, which only wants to see a single
20105 stack decrement per instruction. The RTL we generate for the note looks
20106 something like this:
20108 (sequence [
20109 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20110 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20111 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20112 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20116 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20117 instead we'd have a parallel expression detailing all
20118 the stores to the various memory addresses so that debug
20119 information is more up-to-date. Remember however while writing
20120 this to take care of the constraints with the push instruction.
20122 Note also that this has to be taken care of for the VFP registers.
20124 For more see PR43399. */
20126 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20127 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20128 dwarf_par_index = 1;
20130 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20132 if (mask & (1 << i))
20134 reg = gen_rtx_REG (SImode, i);
20136 XVECEXP (par, 0, 0)
20137 = gen_rtx_SET (VOIDmode,
20138 gen_frame_mem
20139 (BLKmode,
20140 gen_rtx_PRE_MODIFY (Pmode,
20141 stack_pointer_rtx,
20142 plus_constant
20143 (Pmode, stack_pointer_rtx,
20144 -4 * num_regs))
20146 gen_rtx_UNSPEC (BLKmode,
20147 gen_rtvec (1, reg),
20148 UNSPEC_PUSH_MULT));
20150 if (dwarf_regs_mask & (1 << i))
20152 tmp = gen_rtx_SET (VOIDmode,
20153 gen_frame_mem (SImode, stack_pointer_rtx),
20154 reg);
20155 RTX_FRAME_RELATED_P (tmp) = 1;
20156 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20159 break;
20163 for (j = 1, i++; j < num_regs; i++)
20165 if (mask & (1 << i))
20167 reg = gen_rtx_REG (SImode, i);
20169 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20171 if (dwarf_regs_mask & (1 << i))
20174 = gen_rtx_SET (VOIDmode,
20175 gen_frame_mem
20176 (SImode,
20177 plus_constant (Pmode, stack_pointer_rtx,
20178 4 * j)),
20179 reg);
20180 RTX_FRAME_RELATED_P (tmp) = 1;
20181 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20184 j++;
20188 par = emit_insn (par);
20190 tmp = gen_rtx_SET (VOIDmode,
20191 stack_pointer_rtx,
20192 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20193 RTX_FRAME_RELATED_P (tmp) = 1;
20194 XVECEXP (dwarf, 0, 0) = tmp;
20196 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20198 return par;
20201 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20202 SIZE is the offset to be adjusted.
20203 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20204 static void
20205 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20207 rtx dwarf;
20209 RTX_FRAME_RELATED_P (insn) = 1;
20210 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20211 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20214 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20215 SAVED_REGS_MASK shows which registers need to be restored.
20217 Unfortunately, since this insn does not reflect very well the actual
20218 semantics of the operation, we need to annotate the insn for the benefit
20219 of DWARF2 frame unwind information. */
20220 static void
20221 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20223 int num_regs = 0;
20224 int i, j;
20225 rtx par;
20226 rtx dwarf = NULL_RTX;
20227 rtx tmp, reg;
20228 bool return_in_pc;
20229 int offset_adj;
20230 int emit_update;
20232 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20233 offset_adj = return_in_pc ? 1 : 0;
20234 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20235 if (saved_regs_mask & (1 << i))
20236 num_regs++;
20238 gcc_assert (num_regs && num_regs <= 16);
20240 /* If SP is in reglist, then we don't emit SP update insn. */
20241 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20243 /* The parallel needs to hold num_regs SETs
20244 and one SET for the stack update. */
20245 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20247 if (return_in_pc)
20249 tmp = ret_rtx;
20250 XVECEXP (par, 0, 0) = tmp;
20253 if (emit_update)
20255 /* Increment the stack pointer, based on there being
20256 num_regs 4-byte registers to restore. */
20257 tmp = gen_rtx_SET (VOIDmode,
20258 stack_pointer_rtx,
20259 plus_constant (Pmode,
20260 stack_pointer_rtx,
20261 4 * num_regs));
20262 RTX_FRAME_RELATED_P (tmp) = 1;
20263 XVECEXP (par, 0, offset_adj) = tmp;
20266 /* Now restore every reg, which may include PC. */
20267 for (j = 0, i = 0; j < num_regs; i++)
20268 if (saved_regs_mask & (1 << i))
20270 reg = gen_rtx_REG (SImode, i);
20271 if ((num_regs == 1) && emit_update && !return_in_pc)
20273 /* Emit single load with writeback. */
20274 tmp = gen_frame_mem (SImode,
20275 gen_rtx_POST_INC (Pmode,
20276 stack_pointer_rtx));
20277 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20278 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20279 return;
20282 tmp = gen_rtx_SET (VOIDmode,
20283 reg,
20284 gen_frame_mem
20285 (SImode,
20286 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20287 RTX_FRAME_RELATED_P (tmp) = 1;
20288 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20290 /* We need to maintain a sequence for DWARF info too. As dwarf info
20291 should not have PC, skip PC. */
20292 if (i != PC_REGNUM)
20293 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20295 j++;
20298 if (return_in_pc)
20299 par = emit_jump_insn (par);
20300 else
20301 par = emit_insn (par);
20303 REG_NOTES (par) = dwarf;
20304 if (!return_in_pc)
20305 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20306 stack_pointer_rtx, stack_pointer_rtx);
20309 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20310 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20312 Unfortunately, since this insn does not reflect very well the actual
20313 semantics of the operation, we need to annotate the insn for the benefit
20314 of DWARF2 frame unwind information. */
20315 static void
20316 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20318 int i, j;
20319 rtx par;
20320 rtx dwarf = NULL_RTX;
20321 rtx tmp, reg;
20323 gcc_assert (num_regs && num_regs <= 32);
20325 /* Workaround ARM10 VFPr1 bug. */
20326 if (num_regs == 2 && !arm_arch6)
20328 if (first_reg == 15)
20329 first_reg--;
20331 num_regs++;
20334 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20335 there could be up to 32 D-registers to restore.
20336 If there are more than 16 D-registers, make two recursive calls,
20337 each of which emits one pop_multi instruction. */
20338 if (num_regs > 16)
20340 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20341 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20342 return;
20345 /* The parallel needs to hold num_regs SETs
20346 and one SET for the stack update. */
20347 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20349 /* Increment the stack pointer, based on there being
20350 num_regs 8-byte registers to restore. */
20351 tmp = gen_rtx_SET (VOIDmode,
20352 base_reg,
20353 plus_constant (Pmode, base_reg, 8 * num_regs));
20354 RTX_FRAME_RELATED_P (tmp) = 1;
20355 XVECEXP (par, 0, 0) = tmp;
20357 /* Now show every reg that will be restored, using a SET for each. */
20358 for (j = 0, i=first_reg; j < num_regs; i += 2)
20360 reg = gen_rtx_REG (DFmode, i);
20362 tmp = gen_rtx_SET (VOIDmode,
20363 reg,
20364 gen_frame_mem
20365 (DFmode,
20366 plus_constant (Pmode, base_reg, 8 * j)));
20367 RTX_FRAME_RELATED_P (tmp) = 1;
20368 XVECEXP (par, 0, j + 1) = tmp;
20370 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20372 j++;
20375 par = emit_insn (par);
20376 REG_NOTES (par) = dwarf;
20378 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20379 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20381 RTX_FRAME_RELATED_P (par) = 1;
20382 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20384 else
20385 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20386 base_reg, base_reg);
20389 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20390 number of registers are being popped, multiple LDRD patterns are created for
20391 all register pairs. If odd number of registers are popped, last register is
20392 loaded by using LDR pattern. */
20393 static void
20394 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20396 int num_regs = 0;
20397 int i, j;
20398 rtx par = NULL_RTX;
20399 rtx dwarf = NULL_RTX;
20400 rtx tmp, reg, tmp1;
20401 bool return_in_pc;
20403 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20404 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20405 if (saved_regs_mask & (1 << i))
20406 num_regs++;
20408 gcc_assert (num_regs && num_regs <= 16);
20410 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20411 to be popped. So, if num_regs is even, now it will become odd,
20412 and we can generate pop with PC. If num_regs is odd, it will be
20413 even now, and ldr with return can be generated for PC. */
20414 if (return_in_pc)
20415 num_regs--;
20417 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20419 /* Var j iterates over all the registers to gather all the registers in
20420 saved_regs_mask. Var i gives index of saved registers in stack frame.
20421 A PARALLEL RTX of register-pair is created here, so that pattern for
20422 LDRD can be matched. As PC is always last register to be popped, and
20423 we have already decremented num_regs if PC, we don't have to worry
20424 about PC in this loop. */
20425 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20426 if (saved_regs_mask & (1 << j))
20428 /* Create RTX for memory load. */
20429 reg = gen_rtx_REG (SImode, j);
20430 tmp = gen_rtx_SET (SImode,
20431 reg,
20432 gen_frame_mem (SImode,
20433 plus_constant (Pmode,
20434 stack_pointer_rtx, 4 * i)));
20435 RTX_FRAME_RELATED_P (tmp) = 1;
20437 if (i % 2 == 0)
20439 /* When saved-register index (i) is even, the RTX to be emitted is
20440 yet to be created. Hence create it first. The LDRD pattern we
20441 are generating is :
20442 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20443 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20444 where target registers need not be consecutive. */
20445 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20446 dwarf = NULL_RTX;
20449 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20450 added as 0th element and if i is odd, reg_i is added as 1st element
20451 of LDRD pattern shown above. */
20452 XVECEXP (par, 0, (i % 2)) = tmp;
20453 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20455 if ((i % 2) == 1)
20457 /* When saved-register index (i) is odd, RTXs for both the registers
20458 to be loaded are generated in above given LDRD pattern, and the
20459 pattern can be emitted now. */
20460 par = emit_insn (par);
20461 REG_NOTES (par) = dwarf;
20462 RTX_FRAME_RELATED_P (par) = 1;
20465 i++;
20468 /* If the number of registers pushed is odd AND return_in_pc is false OR
20469 number of registers are even AND return_in_pc is true, last register is
20470 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20471 then LDR with post increment. */
20473 /* Increment the stack pointer, based on there being
20474 num_regs 4-byte registers to restore. */
20475 tmp = gen_rtx_SET (VOIDmode,
20476 stack_pointer_rtx,
20477 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20478 RTX_FRAME_RELATED_P (tmp) = 1;
20479 tmp = emit_insn (tmp);
20480 if (!return_in_pc)
20482 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20483 stack_pointer_rtx, stack_pointer_rtx);
20486 dwarf = NULL_RTX;
20488 if (((num_regs % 2) == 1 && !return_in_pc)
20489 || ((num_regs % 2) == 0 && return_in_pc))
20491 /* Scan for the single register to be popped. Skip until the saved
20492 register is found. */
20493 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20495 /* Gen LDR with post increment here. */
20496 tmp1 = gen_rtx_MEM (SImode,
20497 gen_rtx_POST_INC (SImode,
20498 stack_pointer_rtx));
20499 set_mem_alias_set (tmp1, get_frame_alias_set ());
20501 reg = gen_rtx_REG (SImode, j);
20502 tmp = gen_rtx_SET (SImode, reg, tmp1);
20503 RTX_FRAME_RELATED_P (tmp) = 1;
20504 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20506 if (return_in_pc)
20508 /* If return_in_pc, j must be PC_REGNUM. */
20509 gcc_assert (j == PC_REGNUM);
20510 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20511 XVECEXP (par, 0, 0) = ret_rtx;
20512 XVECEXP (par, 0, 1) = tmp;
20513 par = emit_jump_insn (par);
20515 else
20517 par = emit_insn (tmp);
20518 REG_NOTES (par) = dwarf;
20519 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20520 stack_pointer_rtx, stack_pointer_rtx);
20524 else if ((num_regs % 2) == 1 && return_in_pc)
20526 /* There are 2 registers to be popped. So, generate the pattern
20527 pop_multiple_with_stack_update_and_return to pop in PC. */
20528 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20531 return;
20534 /* LDRD in ARM mode needs consecutive registers as operands. This function
20535 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20536 offset addressing and then generates one separate stack udpate. This provides
20537 more scheduling freedom, compared to writeback on every load. However,
20538 if the function returns using load into PC directly
20539 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20540 before the last load. TODO: Add a peephole optimization to recognize
20541 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20542 peephole optimization to merge the load at stack-offset zero
20543 with the stack update instruction using load with writeback
20544 in post-index addressing mode. */
20545 static void
20546 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20548 int j = 0;
20549 int offset = 0;
20550 rtx par = NULL_RTX;
20551 rtx dwarf = NULL_RTX;
20552 rtx tmp, mem;
20554 /* Restore saved registers. */
20555 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20556 j = 0;
20557 while (j <= LAST_ARM_REGNUM)
20558 if (saved_regs_mask & (1 << j))
20560 if ((j % 2) == 0
20561 && (saved_regs_mask & (1 << (j + 1)))
20562 && (j + 1) != PC_REGNUM)
20564 /* Current register and next register form register pair for which
20565 LDRD can be generated. PC is always the last register popped, and
20566 we handle it separately. */
20567 if (offset > 0)
20568 mem = gen_frame_mem (DImode,
20569 plus_constant (Pmode,
20570 stack_pointer_rtx,
20571 offset));
20572 else
20573 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20575 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20576 tmp = emit_insn (tmp);
20577 RTX_FRAME_RELATED_P (tmp) = 1;
20579 /* Generate dwarf info. */
20581 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20582 gen_rtx_REG (SImode, j),
20583 NULL_RTX);
20584 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20585 gen_rtx_REG (SImode, j + 1),
20586 dwarf);
20588 REG_NOTES (tmp) = dwarf;
20590 offset += 8;
20591 j += 2;
20593 else if (j != PC_REGNUM)
20595 /* Emit a single word load. */
20596 if (offset > 0)
20597 mem = gen_frame_mem (SImode,
20598 plus_constant (Pmode,
20599 stack_pointer_rtx,
20600 offset));
20601 else
20602 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20604 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20605 tmp = emit_insn (tmp);
20606 RTX_FRAME_RELATED_P (tmp) = 1;
20608 /* Generate dwarf info. */
20609 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20610 gen_rtx_REG (SImode, j),
20611 NULL_RTX);
20613 offset += 4;
20614 j += 1;
20616 else /* j == PC_REGNUM */
20617 j++;
20619 else
20620 j++;
20622 /* Update the stack. */
20623 if (offset > 0)
20625 tmp = gen_rtx_SET (Pmode,
20626 stack_pointer_rtx,
20627 plus_constant (Pmode,
20628 stack_pointer_rtx,
20629 offset));
20630 tmp = emit_insn (tmp);
20631 arm_add_cfa_adjust_cfa_note (tmp, offset,
20632 stack_pointer_rtx, stack_pointer_rtx);
20633 offset = 0;
20636 if (saved_regs_mask & (1 << PC_REGNUM))
20638 /* Only PC is to be popped. */
20639 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20640 XVECEXP (par, 0, 0) = ret_rtx;
20641 tmp = gen_rtx_SET (SImode,
20642 gen_rtx_REG (SImode, PC_REGNUM),
20643 gen_frame_mem (SImode,
20644 gen_rtx_POST_INC (SImode,
20645 stack_pointer_rtx)));
20646 RTX_FRAME_RELATED_P (tmp) = 1;
20647 XVECEXP (par, 0, 1) = tmp;
20648 par = emit_jump_insn (par);
20650 /* Generate dwarf info. */
20651 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20652 gen_rtx_REG (SImode, PC_REGNUM),
20653 NULL_RTX);
20654 REG_NOTES (par) = dwarf;
20655 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20656 stack_pointer_rtx, stack_pointer_rtx);
20660 /* Calculate the size of the return value that is passed in registers. */
20661 static unsigned
20662 arm_size_return_regs (void)
20664 machine_mode mode;
20666 if (crtl->return_rtx != 0)
20667 mode = GET_MODE (crtl->return_rtx);
20668 else
20669 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20671 return GET_MODE_SIZE (mode);
20674 /* Return true if the current function needs to save/restore LR. */
20675 static bool
20676 thumb_force_lr_save (void)
20678 return !cfun->machine->lr_save_eliminated
20679 && (!leaf_function_p ()
20680 || thumb_far_jump_used_p ()
20681 || df_regs_ever_live_p (LR_REGNUM));
20684 /* We do not know if r3 will be available because
20685 we do have an indirect tailcall happening in this
20686 particular case. */
20687 static bool
20688 is_indirect_tailcall_p (rtx call)
20690 rtx pat = PATTERN (call);
20692 /* Indirect tail call. */
20693 pat = XVECEXP (pat, 0, 0);
20694 if (GET_CODE (pat) == SET)
20695 pat = SET_SRC (pat);
20697 pat = XEXP (XEXP (pat, 0), 0);
20698 return REG_P (pat);
20701 /* Return true if r3 is used by any of the tail call insns in the
20702 current function. */
20703 static bool
20704 any_sibcall_could_use_r3 (void)
20706 edge_iterator ei;
20707 edge e;
20709 if (!crtl->tail_call_emit)
20710 return false;
20711 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20712 if (e->flags & EDGE_SIBCALL)
20714 rtx call = BB_END (e->src);
20715 if (!CALL_P (call))
20716 call = prev_nonnote_nondebug_insn (call);
20717 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20718 if (find_regno_fusage (call, USE, 3)
20719 || is_indirect_tailcall_p (call))
20720 return true;
20722 return false;
20726 /* Compute the distance from register FROM to register TO.
20727 These can be the arg pointer (26), the soft frame pointer (25),
20728 the stack pointer (13) or the hard frame pointer (11).
20729 In thumb mode r7 is used as the soft frame pointer, if needed.
20730 Typical stack layout looks like this:
20732 old stack pointer -> | |
20733 ----
20734 | | \
20735 | | saved arguments for
20736 | | vararg functions
20737 | | /
20739 hard FP & arg pointer -> | | \
20740 | | stack
20741 | | frame
20742 | | /
20744 | | \
20745 | | call saved
20746 | | registers
20747 soft frame pointer -> | | /
20749 | | \
20750 | | local
20751 | | variables
20752 locals base pointer -> | | /
20754 | | \
20755 | | outgoing
20756 | | arguments
20757 current stack pointer -> | | /
20760 For a given function some or all of these stack components
20761 may not be needed, giving rise to the possibility of
20762 eliminating some of the registers.
20764 The values returned by this function must reflect the behavior
20765 of arm_expand_prologue() and arm_compute_save_reg_mask().
20767 The sign of the number returned reflects the direction of stack
20768 growth, so the values are positive for all eliminations except
20769 from the soft frame pointer to the hard frame pointer.
20771 SFP may point just inside the local variables block to ensure correct
20772 alignment. */
20775 /* Calculate stack offsets. These are used to calculate register elimination
20776 offsets and in prologue/epilogue code. Also calculates which registers
20777 should be saved. */
20779 static arm_stack_offsets *
20780 arm_get_frame_offsets (void)
20782 struct arm_stack_offsets *offsets;
20783 unsigned long func_type;
20784 int leaf;
20785 int saved;
20786 int core_saved;
20787 HOST_WIDE_INT frame_size;
20788 int i;
20790 offsets = &cfun->machine->stack_offsets;
20792 /* We need to know if we are a leaf function. Unfortunately, it
20793 is possible to be called after start_sequence has been called,
20794 which causes get_insns to return the insns for the sequence,
20795 not the function, which will cause leaf_function_p to return
20796 the incorrect result.
20798 to know about leaf functions once reload has completed, and the
20799 frame size cannot be changed after that time, so we can safely
20800 use the cached value. */
20802 if (reload_completed)
20803 return offsets;
20805 /* Initially this is the size of the local variables. It will translated
20806 into an offset once we have determined the size of preceding data. */
20807 frame_size = ROUND_UP_WORD (get_frame_size ());
20809 leaf = leaf_function_p ();
20811 /* Space for variadic functions. */
20812 offsets->saved_args = crtl->args.pretend_args_size;
20814 /* In Thumb mode this is incorrect, but never used. */
20815 offsets->frame
20816 = (offsets->saved_args
20817 + arm_compute_static_chain_stack_bytes ()
20818 + (frame_pointer_needed ? 4 : 0));
20820 if (TARGET_32BIT)
20822 unsigned int regno;
20824 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20825 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20826 saved = core_saved;
20828 /* We know that SP will be doubleword aligned on entry, and we must
20829 preserve that condition at any subroutine call. We also require the
20830 soft frame pointer to be doubleword aligned. */
20832 if (TARGET_REALLY_IWMMXT)
20834 /* Check for the call-saved iWMMXt registers. */
20835 for (regno = FIRST_IWMMXT_REGNUM;
20836 regno <= LAST_IWMMXT_REGNUM;
20837 regno++)
20838 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20839 saved += 8;
20842 func_type = arm_current_func_type ();
20843 /* Space for saved VFP registers. */
20844 if (! IS_VOLATILE (func_type)
20845 && TARGET_HARD_FLOAT && TARGET_VFP)
20846 saved += arm_get_vfp_saved_size ();
20848 else /* TARGET_THUMB1 */
20850 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20851 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20852 saved = core_saved;
20853 if (TARGET_BACKTRACE)
20854 saved += 16;
20857 /* Saved registers include the stack frame. */
20858 offsets->saved_regs
20859 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20860 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20862 /* A leaf function does not need any stack alignment if it has nothing
20863 on the stack. */
20864 if (leaf && frame_size == 0
20865 /* However if it calls alloca(), we have a dynamically allocated
20866 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20867 && ! cfun->calls_alloca)
20869 offsets->outgoing_args = offsets->soft_frame;
20870 offsets->locals_base = offsets->soft_frame;
20871 return offsets;
20874 /* Ensure SFP has the correct alignment. */
20875 if (ARM_DOUBLEWORD_ALIGN
20876 && (offsets->soft_frame & 7))
20878 offsets->soft_frame += 4;
20879 /* Try to align stack by pushing an extra reg. Don't bother doing this
20880 when there is a stack frame as the alignment will be rolled into
20881 the normal stack adjustment. */
20882 if (frame_size + crtl->outgoing_args_size == 0)
20884 int reg = -1;
20886 /* Register r3 is caller-saved. Normally it does not need to be
20887 saved on entry by the prologue. However if we choose to save
20888 it for padding then we may confuse the compiler into thinking
20889 a prologue sequence is required when in fact it is not. This
20890 will occur when shrink-wrapping if r3 is used as a scratch
20891 register and there are no other callee-saved writes.
20893 This situation can be avoided when other callee-saved registers
20894 are available and r3 is not mandatory if we choose a callee-saved
20895 register for padding. */
20896 bool prefer_callee_reg_p = false;
20898 /* If it is safe to use r3, then do so. This sometimes
20899 generates better code on Thumb-2 by avoiding the need to
20900 use 32-bit push/pop instructions. */
20901 if (! any_sibcall_could_use_r3 ()
20902 && arm_size_return_regs () <= 12
20903 && (offsets->saved_regs_mask & (1 << 3)) == 0
20904 && (TARGET_THUMB2
20905 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20907 reg = 3;
20908 if (!TARGET_THUMB2)
20909 prefer_callee_reg_p = true;
20911 if (reg == -1
20912 || prefer_callee_reg_p)
20914 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20916 /* Avoid fixed registers; they may be changed at
20917 arbitrary times so it's unsafe to restore them
20918 during the epilogue. */
20919 if (!fixed_regs[i]
20920 && (offsets->saved_regs_mask & (1 << i)) == 0)
20922 reg = i;
20923 break;
20928 if (reg != -1)
20930 offsets->saved_regs += 4;
20931 offsets->saved_regs_mask |= (1 << reg);
20936 offsets->locals_base = offsets->soft_frame + frame_size;
20937 offsets->outgoing_args = (offsets->locals_base
20938 + crtl->outgoing_args_size);
20940 if (ARM_DOUBLEWORD_ALIGN)
20942 /* Ensure SP remains doubleword aligned. */
20943 if (offsets->outgoing_args & 7)
20944 offsets->outgoing_args += 4;
20945 gcc_assert (!(offsets->outgoing_args & 7));
20948 return offsets;
20952 /* Calculate the relative offsets for the different stack pointers. Positive
20953 offsets are in the direction of stack growth. */
20955 HOST_WIDE_INT
20956 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20958 arm_stack_offsets *offsets;
20960 offsets = arm_get_frame_offsets ();
20962 /* OK, now we have enough information to compute the distances.
20963 There must be an entry in these switch tables for each pair
20964 of registers in ELIMINABLE_REGS, even if some of the entries
20965 seem to be redundant or useless. */
20966 switch (from)
20968 case ARG_POINTER_REGNUM:
20969 switch (to)
20971 case THUMB_HARD_FRAME_POINTER_REGNUM:
20972 return 0;
20974 case FRAME_POINTER_REGNUM:
20975 /* This is the reverse of the soft frame pointer
20976 to hard frame pointer elimination below. */
20977 return offsets->soft_frame - offsets->saved_args;
20979 case ARM_HARD_FRAME_POINTER_REGNUM:
20980 /* This is only non-zero in the case where the static chain register
20981 is stored above the frame. */
20982 return offsets->frame - offsets->saved_args - 4;
20984 case STACK_POINTER_REGNUM:
20985 /* If nothing has been pushed on the stack at all
20986 then this will return -4. This *is* correct! */
20987 return offsets->outgoing_args - (offsets->saved_args + 4);
20989 default:
20990 gcc_unreachable ();
20992 gcc_unreachable ();
20994 case FRAME_POINTER_REGNUM:
20995 switch (to)
20997 case THUMB_HARD_FRAME_POINTER_REGNUM:
20998 return 0;
21000 case ARM_HARD_FRAME_POINTER_REGNUM:
21001 /* The hard frame pointer points to the top entry in the
21002 stack frame. The soft frame pointer to the bottom entry
21003 in the stack frame. If there is no stack frame at all,
21004 then they are identical. */
21006 return offsets->frame - offsets->soft_frame;
21008 case STACK_POINTER_REGNUM:
21009 return offsets->outgoing_args - offsets->soft_frame;
21011 default:
21012 gcc_unreachable ();
21014 gcc_unreachable ();
21016 default:
21017 /* You cannot eliminate from the stack pointer.
21018 In theory you could eliminate from the hard frame
21019 pointer to the stack pointer, but this will never
21020 happen, since if a stack frame is not needed the
21021 hard frame pointer will never be used. */
21022 gcc_unreachable ();
21026 /* Given FROM and TO register numbers, say whether this elimination is
21027 allowed. Frame pointer elimination is automatically handled.
21029 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21030 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21031 pointer, we must eliminate FRAME_POINTER_REGNUM into
21032 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21033 ARG_POINTER_REGNUM. */
21035 bool
21036 arm_can_eliminate (const int from, const int to)
21038 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21039 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21040 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21041 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21042 true);
21045 /* Emit RTL to save coprocessor registers on function entry. Returns the
21046 number of bytes pushed. */
21048 static int
21049 arm_save_coproc_regs(void)
21051 int saved_size = 0;
21052 unsigned reg;
21053 unsigned start_reg;
21054 rtx insn;
21056 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21057 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21059 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21060 insn = gen_rtx_MEM (V2SImode, insn);
21061 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21062 RTX_FRAME_RELATED_P (insn) = 1;
21063 saved_size += 8;
21066 if (TARGET_HARD_FLOAT && TARGET_VFP)
21068 start_reg = FIRST_VFP_REGNUM;
21070 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21072 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21073 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21075 if (start_reg != reg)
21076 saved_size += vfp_emit_fstmd (start_reg,
21077 (reg - start_reg) / 2);
21078 start_reg = reg + 2;
21081 if (start_reg != reg)
21082 saved_size += vfp_emit_fstmd (start_reg,
21083 (reg - start_reg) / 2);
21085 return saved_size;
21089 /* Set the Thumb frame pointer from the stack pointer. */
21091 static void
21092 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21094 HOST_WIDE_INT amount;
21095 rtx insn, dwarf;
21097 amount = offsets->outgoing_args - offsets->locals_base;
21098 if (amount < 1024)
21099 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21100 stack_pointer_rtx, GEN_INT (amount)));
21101 else
21103 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21104 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21105 expects the first two operands to be the same. */
21106 if (TARGET_THUMB2)
21108 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21109 stack_pointer_rtx,
21110 hard_frame_pointer_rtx));
21112 else
21114 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21115 hard_frame_pointer_rtx,
21116 stack_pointer_rtx));
21118 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21119 plus_constant (Pmode, stack_pointer_rtx, amount));
21120 RTX_FRAME_RELATED_P (dwarf) = 1;
21121 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21124 RTX_FRAME_RELATED_P (insn) = 1;
21127 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21128 function. */
21129 void
21130 arm_expand_prologue (void)
21132 rtx amount;
21133 rtx insn;
21134 rtx ip_rtx;
21135 unsigned long live_regs_mask;
21136 unsigned long func_type;
21137 int fp_offset = 0;
21138 int saved_pretend_args = 0;
21139 int saved_regs = 0;
21140 unsigned HOST_WIDE_INT args_to_push;
21141 arm_stack_offsets *offsets;
21143 func_type = arm_current_func_type ();
21145 /* Naked functions don't have prologues. */
21146 if (IS_NAKED (func_type))
21147 return;
21149 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21150 args_to_push = crtl->args.pretend_args_size;
21152 /* Compute which register we will have to save onto the stack. */
21153 offsets = arm_get_frame_offsets ();
21154 live_regs_mask = offsets->saved_regs_mask;
21156 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21158 if (IS_STACKALIGN (func_type))
21160 rtx r0, r1;
21162 /* Handle a word-aligned stack pointer. We generate the following:
21164 mov r0, sp
21165 bic r1, r0, #7
21166 mov sp, r1
21167 <save and restore r0 in normal prologue/epilogue>
21168 mov sp, r0
21169 bx lr
21171 The unwinder doesn't need to know about the stack realignment.
21172 Just tell it we saved SP in r0. */
21173 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21175 r0 = gen_rtx_REG (SImode, 0);
21176 r1 = gen_rtx_REG (SImode, 1);
21178 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21179 RTX_FRAME_RELATED_P (insn) = 1;
21180 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21182 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21184 /* ??? The CFA changes here, which may cause GDB to conclude that it
21185 has entered a different function. That said, the unwind info is
21186 correct, individually, before and after this instruction because
21187 we've described the save of SP, which will override the default
21188 handling of SP as restoring from the CFA. */
21189 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21192 /* For APCS frames, if IP register is clobbered
21193 when creating frame, save that register in a special
21194 way. */
21195 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21197 if (IS_INTERRUPT (func_type))
21199 /* Interrupt functions must not corrupt any registers.
21200 Creating a frame pointer however, corrupts the IP
21201 register, so we must push it first. */
21202 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21204 /* Do not set RTX_FRAME_RELATED_P on this insn.
21205 The dwarf stack unwinding code only wants to see one
21206 stack decrement per function, and this is not it. If
21207 this instruction is labeled as being part of the frame
21208 creation sequence then dwarf2out_frame_debug_expr will
21209 die when it encounters the assignment of IP to FP
21210 later on, since the use of SP here establishes SP as
21211 the CFA register and not IP.
21213 Anyway this instruction is not really part of the stack
21214 frame creation although it is part of the prologue. */
21216 else if (IS_NESTED (func_type))
21218 /* The static chain register is the same as the IP register
21219 used as a scratch register during stack frame creation.
21220 To get around this need to find somewhere to store IP
21221 whilst the frame is being created. We try the following
21222 places in order:
21224 1. The last argument register r3 if it is available.
21225 2. A slot on the stack above the frame if there are no
21226 arguments to push onto the stack.
21227 3. Register r3 again, after pushing the argument registers
21228 onto the stack, if this is a varargs function.
21229 4. The last slot on the stack created for the arguments to
21230 push, if this isn't a varargs function.
21232 Note - we only need to tell the dwarf2 backend about the SP
21233 adjustment in the second variant; the static chain register
21234 doesn't need to be unwound, as it doesn't contain a value
21235 inherited from the caller. */
21237 if (!arm_r3_live_at_start_p ())
21238 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21239 else if (args_to_push == 0)
21241 rtx addr, dwarf;
21243 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21244 saved_regs += 4;
21246 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21247 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21248 fp_offset = 4;
21250 /* Just tell the dwarf backend that we adjusted SP. */
21251 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21252 plus_constant (Pmode, stack_pointer_rtx,
21253 -fp_offset));
21254 RTX_FRAME_RELATED_P (insn) = 1;
21255 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21257 else
21259 /* Store the args on the stack. */
21260 if (cfun->machine->uses_anonymous_args)
21262 insn
21263 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21264 (0xf0 >> (args_to_push / 4)) & 0xf);
21265 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21266 saved_pretend_args = 1;
21268 else
21270 rtx addr, dwarf;
21272 if (args_to_push == 4)
21273 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21274 else
21275 addr
21276 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21277 plus_constant (Pmode,
21278 stack_pointer_rtx,
21279 -args_to_push));
21281 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21283 /* Just tell the dwarf backend that we adjusted SP. */
21284 dwarf
21285 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21286 plus_constant (Pmode, stack_pointer_rtx,
21287 -args_to_push));
21288 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21291 RTX_FRAME_RELATED_P (insn) = 1;
21292 fp_offset = args_to_push;
21293 args_to_push = 0;
21297 insn = emit_set_insn (ip_rtx,
21298 plus_constant (Pmode, stack_pointer_rtx,
21299 fp_offset));
21300 RTX_FRAME_RELATED_P (insn) = 1;
21303 if (args_to_push)
21305 /* Push the argument registers, or reserve space for them. */
21306 if (cfun->machine->uses_anonymous_args)
21307 insn = emit_multi_reg_push
21308 ((0xf0 >> (args_to_push / 4)) & 0xf,
21309 (0xf0 >> (args_to_push / 4)) & 0xf);
21310 else
21311 insn = emit_insn
21312 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21313 GEN_INT (- args_to_push)));
21314 RTX_FRAME_RELATED_P (insn) = 1;
21317 /* If this is an interrupt service routine, and the link register
21318 is going to be pushed, and we're not generating extra
21319 push of IP (needed when frame is needed and frame layout if apcs),
21320 subtracting four from LR now will mean that the function return
21321 can be done with a single instruction. */
21322 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21323 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21324 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21325 && TARGET_ARM)
21327 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21329 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21332 if (live_regs_mask)
21334 unsigned long dwarf_regs_mask = live_regs_mask;
21336 saved_regs += bit_count (live_regs_mask) * 4;
21337 if (optimize_size && !frame_pointer_needed
21338 && saved_regs == offsets->saved_regs - offsets->saved_args)
21340 /* If no coprocessor registers are being pushed and we don't have
21341 to worry about a frame pointer then push extra registers to
21342 create the stack frame. This is done is a way that does not
21343 alter the frame layout, so is independent of the epilogue. */
21344 int n;
21345 int frame;
21346 n = 0;
21347 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21348 n++;
21349 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21350 if (frame && n * 4 >= frame)
21352 n = frame / 4;
21353 live_regs_mask |= (1 << n) - 1;
21354 saved_regs += frame;
21358 if (TARGET_LDRD
21359 && current_tune->prefer_ldrd_strd
21360 && !optimize_function_for_size_p (cfun))
21362 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21363 if (TARGET_THUMB2)
21364 thumb2_emit_strd_push (live_regs_mask);
21365 else if (TARGET_ARM
21366 && !TARGET_APCS_FRAME
21367 && !IS_INTERRUPT (func_type))
21368 arm_emit_strd_push (live_regs_mask);
21369 else
21371 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21372 RTX_FRAME_RELATED_P (insn) = 1;
21375 else
21377 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21378 RTX_FRAME_RELATED_P (insn) = 1;
21382 if (! IS_VOLATILE (func_type))
21383 saved_regs += arm_save_coproc_regs ();
21385 if (frame_pointer_needed && TARGET_ARM)
21387 /* Create the new frame pointer. */
21388 if (TARGET_APCS_FRAME)
21390 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21391 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21392 RTX_FRAME_RELATED_P (insn) = 1;
21394 if (IS_NESTED (func_type))
21396 /* Recover the static chain register. */
21397 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21398 insn = gen_rtx_REG (SImode, 3);
21399 else
21401 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21402 insn = gen_frame_mem (SImode, insn);
21404 emit_set_insn (ip_rtx, insn);
21405 /* Add a USE to stop propagate_one_insn() from barfing. */
21406 emit_insn (gen_force_register_use (ip_rtx));
21409 else
21411 insn = GEN_INT (saved_regs - 4);
21412 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21413 stack_pointer_rtx, insn));
21414 RTX_FRAME_RELATED_P (insn) = 1;
21418 if (flag_stack_usage_info)
21419 current_function_static_stack_size
21420 = offsets->outgoing_args - offsets->saved_args;
21422 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21424 /* This add can produce multiple insns for a large constant, so we
21425 need to get tricky. */
21426 rtx_insn *last = get_last_insn ();
21428 amount = GEN_INT (offsets->saved_args + saved_regs
21429 - offsets->outgoing_args);
21431 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21432 amount));
21435 last = last ? NEXT_INSN (last) : get_insns ();
21436 RTX_FRAME_RELATED_P (last) = 1;
21438 while (last != insn);
21440 /* If the frame pointer is needed, emit a special barrier that
21441 will prevent the scheduler from moving stores to the frame
21442 before the stack adjustment. */
21443 if (frame_pointer_needed)
21444 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21445 hard_frame_pointer_rtx));
21449 if (frame_pointer_needed && TARGET_THUMB2)
21450 thumb_set_frame_pointer (offsets);
21452 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21454 unsigned long mask;
21456 mask = live_regs_mask;
21457 mask &= THUMB2_WORK_REGS;
21458 if (!IS_NESTED (func_type))
21459 mask |= (1 << IP_REGNUM);
21460 arm_load_pic_register (mask);
21463 /* If we are profiling, make sure no instructions are scheduled before
21464 the call to mcount. Similarly if the user has requested no
21465 scheduling in the prolog. Similarly if we want non-call exceptions
21466 using the EABI unwinder, to prevent faulting instructions from being
21467 swapped with a stack adjustment. */
21468 if (crtl->profile || !TARGET_SCHED_PROLOG
21469 || (arm_except_unwind_info (&global_options) == UI_TARGET
21470 && cfun->can_throw_non_call_exceptions))
21471 emit_insn (gen_blockage ());
21473 /* If the link register is being kept alive, with the return address in it,
21474 then make sure that it does not get reused by the ce2 pass. */
21475 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21476 cfun->machine->lr_save_eliminated = 1;
21479 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21480 static void
21481 arm_print_condition (FILE *stream)
21483 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21485 /* Branch conversion is not implemented for Thumb-2. */
21486 if (TARGET_THUMB)
21488 output_operand_lossage ("predicated Thumb instruction");
21489 return;
21491 if (current_insn_predicate != NULL)
21493 output_operand_lossage
21494 ("predicated instruction in conditional sequence");
21495 return;
21498 fputs (arm_condition_codes[arm_current_cc], stream);
21500 else if (current_insn_predicate)
21502 enum arm_cond_code code;
21504 if (TARGET_THUMB1)
21506 output_operand_lossage ("predicated Thumb instruction");
21507 return;
21510 code = get_arm_condition_code (current_insn_predicate);
21511 fputs (arm_condition_codes[code], stream);
21516 /* Globally reserved letters: acln
21517 Puncutation letters currently used: @_|?().!#
21518 Lower case letters currently used: bcdefhimpqtvwxyz
21519 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21520 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21522 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21524 If CODE is 'd', then the X is a condition operand and the instruction
21525 should only be executed if the condition is true.
21526 if CODE is 'D', then the X is a condition operand and the instruction
21527 should only be executed if the condition is false: however, if the mode
21528 of the comparison is CCFPEmode, then always execute the instruction -- we
21529 do this because in these circumstances !GE does not necessarily imply LT;
21530 in these cases the instruction pattern will take care to make sure that
21531 an instruction containing %d will follow, thereby undoing the effects of
21532 doing this instruction unconditionally.
21533 If CODE is 'N' then X is a floating point operand that must be negated
21534 before output.
21535 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21536 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21537 static void
21538 arm_print_operand (FILE *stream, rtx x, int code)
21540 switch (code)
21542 case '@':
21543 fputs (ASM_COMMENT_START, stream);
21544 return;
21546 case '_':
21547 fputs (user_label_prefix, stream);
21548 return;
21550 case '|':
21551 fputs (REGISTER_PREFIX, stream);
21552 return;
21554 case '?':
21555 arm_print_condition (stream);
21556 return;
21558 case '(':
21559 /* Nothing in unified syntax, otherwise the current condition code. */
21560 if (!TARGET_UNIFIED_ASM)
21561 arm_print_condition (stream);
21562 break;
21564 case ')':
21565 /* The current condition code in unified syntax, otherwise nothing. */
21566 if (TARGET_UNIFIED_ASM)
21567 arm_print_condition (stream);
21568 break;
21570 case '.':
21571 /* The current condition code for a condition code setting instruction.
21572 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21573 if (TARGET_UNIFIED_ASM)
21575 fputc('s', stream);
21576 arm_print_condition (stream);
21578 else
21580 arm_print_condition (stream);
21581 fputc('s', stream);
21583 return;
21585 case '!':
21586 /* If the instruction is conditionally executed then print
21587 the current condition code, otherwise print 's'. */
21588 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21589 if (current_insn_predicate)
21590 arm_print_condition (stream);
21591 else
21592 fputc('s', stream);
21593 break;
21595 /* %# is a "break" sequence. It doesn't output anything, but is used to
21596 separate e.g. operand numbers from following text, if that text consists
21597 of further digits which we don't want to be part of the operand
21598 number. */
21599 case '#':
21600 return;
21602 case 'N':
21604 REAL_VALUE_TYPE r;
21605 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21606 r = real_value_negate (&r);
21607 fprintf (stream, "%s", fp_const_from_val (&r));
21609 return;
21611 /* An integer or symbol address without a preceding # sign. */
21612 case 'c':
21613 switch (GET_CODE (x))
21615 case CONST_INT:
21616 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21617 break;
21619 case SYMBOL_REF:
21620 output_addr_const (stream, x);
21621 break;
21623 case CONST:
21624 if (GET_CODE (XEXP (x, 0)) == PLUS
21625 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21627 output_addr_const (stream, x);
21628 break;
21630 /* Fall through. */
21632 default:
21633 output_operand_lossage ("Unsupported operand for code '%c'", code);
21635 return;
21637 /* An integer that we want to print in HEX. */
21638 case 'x':
21639 switch (GET_CODE (x))
21641 case CONST_INT:
21642 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21643 break;
21645 default:
21646 output_operand_lossage ("Unsupported operand for code '%c'", code);
21648 return;
21650 case 'B':
21651 if (CONST_INT_P (x))
21653 HOST_WIDE_INT val;
21654 val = ARM_SIGN_EXTEND (~INTVAL (x));
21655 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21657 else
21659 putc ('~', stream);
21660 output_addr_const (stream, x);
21662 return;
21664 case 'b':
21665 /* Print the log2 of a CONST_INT. */
21667 HOST_WIDE_INT val;
21669 if (!CONST_INT_P (x)
21670 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21671 output_operand_lossage ("Unsupported operand for code '%c'", code);
21672 else
21673 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21675 return;
21677 case 'L':
21678 /* The low 16 bits of an immediate constant. */
21679 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21680 return;
21682 case 'i':
21683 fprintf (stream, "%s", arithmetic_instr (x, 1));
21684 return;
21686 case 'I':
21687 fprintf (stream, "%s", arithmetic_instr (x, 0));
21688 return;
21690 case 'S':
21692 HOST_WIDE_INT val;
21693 const char *shift;
21695 shift = shift_op (x, &val);
21697 if (shift)
21699 fprintf (stream, ", %s ", shift);
21700 if (val == -1)
21701 arm_print_operand (stream, XEXP (x, 1), 0);
21702 else
21703 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21706 return;
21708 /* An explanation of the 'Q', 'R' and 'H' register operands:
21710 In a pair of registers containing a DI or DF value the 'Q'
21711 operand returns the register number of the register containing
21712 the least significant part of the value. The 'R' operand returns
21713 the register number of the register containing the most
21714 significant part of the value.
21716 The 'H' operand returns the higher of the two register numbers.
21717 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21718 same as the 'Q' operand, since the most significant part of the
21719 value is held in the lower number register. The reverse is true
21720 on systems where WORDS_BIG_ENDIAN is false.
21722 The purpose of these operands is to distinguish between cases
21723 where the endian-ness of the values is important (for example
21724 when they are added together), and cases where the endian-ness
21725 is irrelevant, but the order of register operations is important.
21726 For example when loading a value from memory into a register
21727 pair, the endian-ness does not matter. Provided that the value
21728 from the lower memory address is put into the lower numbered
21729 register, and the value from the higher address is put into the
21730 higher numbered register, the load will work regardless of whether
21731 the value being loaded is big-wordian or little-wordian. The
21732 order of the two register loads can matter however, if the address
21733 of the memory location is actually held in one of the registers
21734 being overwritten by the load.
21736 The 'Q' and 'R' constraints are also available for 64-bit
21737 constants. */
21738 case 'Q':
21739 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21741 rtx part = gen_lowpart (SImode, x);
21742 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21743 return;
21746 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21748 output_operand_lossage ("invalid operand for code '%c'", code);
21749 return;
21752 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21753 return;
21755 case 'R':
21756 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21758 machine_mode mode = GET_MODE (x);
21759 rtx part;
21761 if (mode == VOIDmode)
21762 mode = DImode;
21763 part = gen_highpart_mode (SImode, mode, x);
21764 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21765 return;
21768 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21770 output_operand_lossage ("invalid operand for code '%c'", code);
21771 return;
21774 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21775 return;
21777 case 'H':
21778 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21780 output_operand_lossage ("invalid operand for code '%c'", code);
21781 return;
21784 asm_fprintf (stream, "%r", REGNO (x) + 1);
21785 return;
21787 case 'J':
21788 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21790 output_operand_lossage ("invalid operand for code '%c'", code);
21791 return;
21794 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21795 return;
21797 case 'K':
21798 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21800 output_operand_lossage ("invalid operand for code '%c'", code);
21801 return;
21804 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21805 return;
21807 case 'm':
21808 asm_fprintf (stream, "%r",
21809 REG_P (XEXP (x, 0))
21810 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21811 return;
21813 case 'M':
21814 asm_fprintf (stream, "{%r-%r}",
21815 REGNO (x),
21816 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21817 return;
21819 /* Like 'M', but writing doubleword vector registers, for use by Neon
21820 insns. */
21821 case 'h':
21823 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21824 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21825 if (numregs == 1)
21826 asm_fprintf (stream, "{d%d}", regno);
21827 else
21828 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21830 return;
21832 case 'd':
21833 /* CONST_TRUE_RTX means always -- that's the default. */
21834 if (x == const_true_rtx)
21835 return;
21837 if (!COMPARISON_P (x))
21839 output_operand_lossage ("invalid operand for code '%c'", code);
21840 return;
21843 fputs (arm_condition_codes[get_arm_condition_code (x)],
21844 stream);
21845 return;
21847 case 'D':
21848 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21849 want to do that. */
21850 if (x == const_true_rtx)
21852 output_operand_lossage ("instruction never executed");
21853 return;
21855 if (!COMPARISON_P (x))
21857 output_operand_lossage ("invalid operand for code '%c'", code);
21858 return;
21861 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21862 (get_arm_condition_code (x))],
21863 stream);
21864 return;
21866 case 's':
21867 case 'V':
21868 case 'W':
21869 case 'X':
21870 case 'Y':
21871 case 'Z':
21872 /* Former Maverick support, removed after GCC-4.7. */
21873 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21874 return;
21876 case 'U':
21877 if (!REG_P (x)
21878 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21879 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21880 /* Bad value for wCG register number. */
21882 output_operand_lossage ("invalid operand for code '%c'", code);
21883 return;
21886 else
21887 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21888 return;
21890 /* Print an iWMMXt control register name. */
21891 case 'w':
21892 if (!CONST_INT_P (x)
21893 || INTVAL (x) < 0
21894 || INTVAL (x) >= 16)
21895 /* Bad value for wC register number. */
21897 output_operand_lossage ("invalid operand for code '%c'", code);
21898 return;
21901 else
21903 static const char * wc_reg_names [16] =
21905 "wCID", "wCon", "wCSSF", "wCASF",
21906 "wC4", "wC5", "wC6", "wC7",
21907 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21908 "wC12", "wC13", "wC14", "wC15"
21911 fputs (wc_reg_names [INTVAL (x)], stream);
21913 return;
21915 /* Print the high single-precision register of a VFP double-precision
21916 register. */
21917 case 'p':
21919 machine_mode mode = GET_MODE (x);
21920 int regno;
21922 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21924 output_operand_lossage ("invalid operand for code '%c'", code);
21925 return;
21928 regno = REGNO (x);
21929 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21931 output_operand_lossage ("invalid operand for code '%c'", code);
21932 return;
21935 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21937 return;
21939 /* Print a VFP/Neon double precision or quad precision register name. */
21940 case 'P':
21941 case 'q':
21943 machine_mode mode = GET_MODE (x);
21944 int is_quad = (code == 'q');
21945 int regno;
21947 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21949 output_operand_lossage ("invalid operand for code '%c'", code);
21950 return;
21953 if (!REG_P (x)
21954 || !IS_VFP_REGNUM (REGNO (x)))
21956 output_operand_lossage ("invalid operand for code '%c'", code);
21957 return;
21960 regno = REGNO (x);
21961 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21962 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21964 output_operand_lossage ("invalid operand for code '%c'", code);
21965 return;
21968 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21969 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21971 return;
21973 /* These two codes print the low/high doubleword register of a Neon quad
21974 register, respectively. For pair-structure types, can also print
21975 low/high quadword registers. */
21976 case 'e':
21977 case 'f':
21979 machine_mode mode = GET_MODE (x);
21980 int regno;
21982 if ((GET_MODE_SIZE (mode) != 16
21983 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21985 output_operand_lossage ("invalid operand for code '%c'", code);
21986 return;
21989 regno = REGNO (x);
21990 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21992 output_operand_lossage ("invalid operand for code '%c'", code);
21993 return;
21996 if (GET_MODE_SIZE (mode) == 16)
21997 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21998 + (code == 'f' ? 1 : 0));
21999 else
22000 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22001 + (code == 'f' ? 1 : 0));
22003 return;
22005 /* Print a VFPv3 floating-point constant, represented as an integer
22006 index. */
22007 case 'G':
22009 int index = vfp3_const_double_index (x);
22010 gcc_assert (index != -1);
22011 fprintf (stream, "%d", index);
22013 return;
22015 /* Print bits representing opcode features for Neon.
22017 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22018 and polynomials as unsigned.
22020 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22022 Bit 2 is 1 for rounding functions, 0 otherwise. */
22024 /* Identify the type as 's', 'u', 'p' or 'f'. */
22025 case 'T':
22027 HOST_WIDE_INT bits = INTVAL (x);
22028 fputc ("uspf"[bits & 3], stream);
22030 return;
22032 /* Likewise, but signed and unsigned integers are both 'i'. */
22033 case 'F':
22035 HOST_WIDE_INT bits = INTVAL (x);
22036 fputc ("iipf"[bits & 3], stream);
22038 return;
22040 /* As for 'T', but emit 'u' instead of 'p'. */
22041 case 't':
22043 HOST_WIDE_INT bits = INTVAL (x);
22044 fputc ("usuf"[bits & 3], stream);
22046 return;
22048 /* Bit 2: rounding (vs none). */
22049 case 'O':
22051 HOST_WIDE_INT bits = INTVAL (x);
22052 fputs ((bits & 4) != 0 ? "r" : "", stream);
22054 return;
22056 /* Memory operand for vld1/vst1 instruction. */
22057 case 'A':
22059 rtx addr;
22060 bool postinc = FALSE;
22061 rtx postinc_reg = NULL;
22062 unsigned align, memsize, align_bits;
22064 gcc_assert (MEM_P (x));
22065 addr = XEXP (x, 0);
22066 if (GET_CODE (addr) == POST_INC)
22068 postinc = 1;
22069 addr = XEXP (addr, 0);
22071 if (GET_CODE (addr) == POST_MODIFY)
22073 postinc_reg = XEXP( XEXP (addr, 1), 1);
22074 addr = XEXP (addr, 0);
22076 asm_fprintf (stream, "[%r", REGNO (addr));
22078 /* We know the alignment of this access, so we can emit a hint in the
22079 instruction (for some alignments) as an aid to the memory subsystem
22080 of the target. */
22081 align = MEM_ALIGN (x) >> 3;
22082 memsize = MEM_SIZE (x);
22084 /* Only certain alignment specifiers are supported by the hardware. */
22085 if (memsize == 32 && (align % 32) == 0)
22086 align_bits = 256;
22087 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22088 align_bits = 128;
22089 else if (memsize >= 8 && (align % 8) == 0)
22090 align_bits = 64;
22091 else
22092 align_bits = 0;
22094 if (align_bits != 0)
22095 asm_fprintf (stream, ":%d", align_bits);
22097 asm_fprintf (stream, "]");
22099 if (postinc)
22100 fputs("!", stream);
22101 if (postinc_reg)
22102 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22104 return;
22106 case 'C':
22108 rtx addr;
22110 gcc_assert (MEM_P (x));
22111 addr = XEXP (x, 0);
22112 gcc_assert (REG_P (addr));
22113 asm_fprintf (stream, "[%r]", REGNO (addr));
22115 return;
22117 /* Translate an S register number into a D register number and element index. */
22118 case 'y':
22120 machine_mode mode = GET_MODE (x);
22121 int regno;
22123 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22125 output_operand_lossage ("invalid operand for code '%c'", code);
22126 return;
22129 regno = REGNO (x);
22130 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22132 output_operand_lossage ("invalid operand for code '%c'", code);
22133 return;
22136 regno = regno - FIRST_VFP_REGNUM;
22137 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22139 return;
22141 case 'v':
22142 gcc_assert (CONST_DOUBLE_P (x));
22143 int result;
22144 result = vfp3_const_double_for_fract_bits (x);
22145 if (result == 0)
22146 result = vfp3_const_double_for_bits (x);
22147 fprintf (stream, "#%d", result);
22148 return;
22150 /* Register specifier for vld1.16/vst1.16. Translate the S register
22151 number into a D register number and element index. */
22152 case 'z':
22154 machine_mode mode = GET_MODE (x);
22155 int regno;
22157 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22159 output_operand_lossage ("invalid operand for code '%c'", code);
22160 return;
22163 regno = REGNO (x);
22164 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22166 output_operand_lossage ("invalid operand for code '%c'", code);
22167 return;
22170 regno = regno - FIRST_VFP_REGNUM;
22171 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22173 return;
22175 default:
22176 if (x == 0)
22178 output_operand_lossage ("missing operand");
22179 return;
22182 switch (GET_CODE (x))
22184 case REG:
22185 asm_fprintf (stream, "%r", REGNO (x));
22186 break;
22188 case MEM:
22189 output_memory_reference_mode = GET_MODE (x);
22190 output_address (XEXP (x, 0));
22191 break;
22193 case CONST_DOUBLE:
22195 char fpstr[20];
22196 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22197 sizeof (fpstr), 0, 1);
22198 fprintf (stream, "#%s", fpstr);
22200 break;
22202 default:
22203 gcc_assert (GET_CODE (x) != NEG);
22204 fputc ('#', stream);
22205 if (GET_CODE (x) == HIGH)
22207 fputs (":lower16:", stream);
22208 x = XEXP (x, 0);
22211 output_addr_const (stream, x);
22212 break;
22217 /* Target hook for printing a memory address. */
22218 static void
22219 arm_print_operand_address (FILE *stream, rtx x)
22221 if (TARGET_32BIT)
22223 int is_minus = GET_CODE (x) == MINUS;
22225 if (REG_P (x))
22226 asm_fprintf (stream, "[%r]", REGNO (x));
22227 else if (GET_CODE (x) == PLUS || is_minus)
22229 rtx base = XEXP (x, 0);
22230 rtx index = XEXP (x, 1);
22231 HOST_WIDE_INT offset = 0;
22232 if (!REG_P (base)
22233 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22235 /* Ensure that BASE is a register. */
22236 /* (one of them must be). */
22237 /* Also ensure the SP is not used as in index register. */
22238 rtx temp = base;
22239 base = index;
22240 index = temp;
22242 switch (GET_CODE (index))
22244 case CONST_INT:
22245 offset = INTVAL (index);
22246 if (is_minus)
22247 offset = -offset;
22248 asm_fprintf (stream, "[%r, #%wd]",
22249 REGNO (base), offset);
22250 break;
22252 case REG:
22253 asm_fprintf (stream, "[%r, %s%r]",
22254 REGNO (base), is_minus ? "-" : "",
22255 REGNO (index));
22256 break;
22258 case MULT:
22259 case ASHIFTRT:
22260 case LSHIFTRT:
22261 case ASHIFT:
22262 case ROTATERT:
22264 asm_fprintf (stream, "[%r, %s%r",
22265 REGNO (base), is_minus ? "-" : "",
22266 REGNO (XEXP (index, 0)));
22267 arm_print_operand (stream, index, 'S');
22268 fputs ("]", stream);
22269 break;
22272 default:
22273 gcc_unreachable ();
22276 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22277 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22279 extern machine_mode output_memory_reference_mode;
22281 gcc_assert (REG_P (XEXP (x, 0)));
22283 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22284 asm_fprintf (stream, "[%r, #%s%d]!",
22285 REGNO (XEXP (x, 0)),
22286 GET_CODE (x) == PRE_DEC ? "-" : "",
22287 GET_MODE_SIZE (output_memory_reference_mode));
22288 else
22289 asm_fprintf (stream, "[%r], #%s%d",
22290 REGNO (XEXP (x, 0)),
22291 GET_CODE (x) == POST_DEC ? "-" : "",
22292 GET_MODE_SIZE (output_memory_reference_mode));
22294 else if (GET_CODE (x) == PRE_MODIFY)
22296 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22297 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22298 asm_fprintf (stream, "#%wd]!",
22299 INTVAL (XEXP (XEXP (x, 1), 1)));
22300 else
22301 asm_fprintf (stream, "%r]!",
22302 REGNO (XEXP (XEXP (x, 1), 1)));
22304 else if (GET_CODE (x) == POST_MODIFY)
22306 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22307 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22308 asm_fprintf (stream, "#%wd",
22309 INTVAL (XEXP (XEXP (x, 1), 1)));
22310 else
22311 asm_fprintf (stream, "%r",
22312 REGNO (XEXP (XEXP (x, 1), 1)));
22314 else output_addr_const (stream, x);
22316 else
22318 if (REG_P (x))
22319 asm_fprintf (stream, "[%r]", REGNO (x));
22320 else if (GET_CODE (x) == POST_INC)
22321 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22322 else if (GET_CODE (x) == PLUS)
22324 gcc_assert (REG_P (XEXP (x, 0)));
22325 if (CONST_INT_P (XEXP (x, 1)))
22326 asm_fprintf (stream, "[%r, #%wd]",
22327 REGNO (XEXP (x, 0)),
22328 INTVAL (XEXP (x, 1)));
22329 else
22330 asm_fprintf (stream, "[%r, %r]",
22331 REGNO (XEXP (x, 0)),
22332 REGNO (XEXP (x, 1)));
22334 else
22335 output_addr_const (stream, x);
22339 /* Target hook for indicating whether a punctuation character for
22340 TARGET_PRINT_OPERAND is valid. */
22341 static bool
22342 arm_print_operand_punct_valid_p (unsigned char code)
22344 return (code == '@' || code == '|' || code == '.'
22345 || code == '(' || code == ')' || code == '#'
22346 || (TARGET_32BIT && (code == '?'))
22347 || (TARGET_THUMB2 && (code == '!'))
22348 || (TARGET_THUMB && (code == '_')));
22351 /* Target hook for assembling integer objects. The ARM version needs to
22352 handle word-sized values specially. */
22353 static bool
22354 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22356 machine_mode mode;
22358 if (size == UNITS_PER_WORD && aligned_p)
22360 fputs ("\t.word\t", asm_out_file);
22361 output_addr_const (asm_out_file, x);
22363 /* Mark symbols as position independent. We only do this in the
22364 .text segment, not in the .data segment. */
22365 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22366 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22368 /* See legitimize_pic_address for an explanation of the
22369 TARGET_VXWORKS_RTP check. */
22370 if (!arm_pic_data_is_text_relative
22371 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22372 fputs ("(GOT)", asm_out_file);
22373 else
22374 fputs ("(GOTOFF)", asm_out_file);
22376 fputc ('\n', asm_out_file);
22377 return true;
22380 mode = GET_MODE (x);
22382 if (arm_vector_mode_supported_p (mode))
22384 int i, units;
22386 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22388 units = CONST_VECTOR_NUNITS (x);
22389 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22391 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22392 for (i = 0; i < units; i++)
22394 rtx elt = CONST_VECTOR_ELT (x, i);
22395 assemble_integer
22396 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22398 else
22399 for (i = 0; i < units; i++)
22401 rtx elt = CONST_VECTOR_ELT (x, i);
22402 REAL_VALUE_TYPE rval;
22404 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22406 assemble_real
22407 (rval, GET_MODE_INNER (mode),
22408 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22411 return true;
22414 return default_assemble_integer (x, size, aligned_p);
22417 static void
22418 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22420 section *s;
22422 if (!TARGET_AAPCS_BASED)
22424 (is_ctor ?
22425 default_named_section_asm_out_constructor
22426 : default_named_section_asm_out_destructor) (symbol, priority);
22427 return;
22430 /* Put these in the .init_array section, using a special relocation. */
22431 if (priority != DEFAULT_INIT_PRIORITY)
22433 char buf[18];
22434 sprintf (buf, "%s.%.5u",
22435 is_ctor ? ".init_array" : ".fini_array",
22436 priority);
22437 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22439 else if (is_ctor)
22440 s = ctors_section;
22441 else
22442 s = dtors_section;
22444 switch_to_section (s);
22445 assemble_align (POINTER_SIZE);
22446 fputs ("\t.word\t", asm_out_file);
22447 output_addr_const (asm_out_file, symbol);
22448 fputs ("(target1)\n", asm_out_file);
22451 /* Add a function to the list of static constructors. */
22453 static void
22454 arm_elf_asm_constructor (rtx symbol, int priority)
22456 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22459 /* Add a function to the list of static destructors. */
22461 static void
22462 arm_elf_asm_destructor (rtx symbol, int priority)
22464 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22467 /* A finite state machine takes care of noticing whether or not instructions
22468 can be conditionally executed, and thus decrease execution time and code
22469 size by deleting branch instructions. The fsm is controlled by
22470 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22472 /* The state of the fsm controlling condition codes are:
22473 0: normal, do nothing special
22474 1: make ASM_OUTPUT_OPCODE not output this instruction
22475 2: make ASM_OUTPUT_OPCODE not output this instruction
22476 3: make instructions conditional
22477 4: make instructions conditional
22479 State transitions (state->state by whom under condition):
22480 0 -> 1 final_prescan_insn if the `target' is a label
22481 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22482 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22483 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22484 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22485 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22486 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22487 (the target insn is arm_target_insn).
22489 If the jump clobbers the conditions then we use states 2 and 4.
22491 A similar thing can be done with conditional return insns.
22493 XXX In case the `target' is an unconditional branch, this conditionalising
22494 of the instructions always reduces code size, but not always execution
22495 time. But then, I want to reduce the code size to somewhere near what
22496 /bin/cc produces. */
22498 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22499 instructions. When a COND_EXEC instruction is seen the subsequent
22500 instructions are scanned so that multiple conditional instructions can be
22501 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22502 specify the length and true/false mask for the IT block. These will be
22503 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22505 /* Returns the index of the ARM condition code string in
22506 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22507 COMPARISON should be an rtx like `(eq (...) (...))'. */
22509 enum arm_cond_code
22510 maybe_get_arm_condition_code (rtx comparison)
22512 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22513 enum arm_cond_code code;
22514 enum rtx_code comp_code = GET_CODE (comparison);
22516 if (GET_MODE_CLASS (mode) != MODE_CC)
22517 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22518 XEXP (comparison, 1));
22520 switch (mode)
22522 case CC_DNEmode: code = ARM_NE; goto dominance;
22523 case CC_DEQmode: code = ARM_EQ; goto dominance;
22524 case CC_DGEmode: code = ARM_GE; goto dominance;
22525 case CC_DGTmode: code = ARM_GT; goto dominance;
22526 case CC_DLEmode: code = ARM_LE; goto dominance;
22527 case CC_DLTmode: code = ARM_LT; goto dominance;
22528 case CC_DGEUmode: code = ARM_CS; goto dominance;
22529 case CC_DGTUmode: code = ARM_HI; goto dominance;
22530 case CC_DLEUmode: code = ARM_LS; goto dominance;
22531 case CC_DLTUmode: code = ARM_CC;
22533 dominance:
22534 if (comp_code == EQ)
22535 return ARM_INVERSE_CONDITION_CODE (code);
22536 if (comp_code == NE)
22537 return code;
22538 return ARM_NV;
22540 case CC_NOOVmode:
22541 switch (comp_code)
22543 case NE: return ARM_NE;
22544 case EQ: return ARM_EQ;
22545 case GE: return ARM_PL;
22546 case LT: return ARM_MI;
22547 default: return ARM_NV;
22550 case CC_Zmode:
22551 switch (comp_code)
22553 case NE: return ARM_NE;
22554 case EQ: return ARM_EQ;
22555 default: return ARM_NV;
22558 case CC_Nmode:
22559 switch (comp_code)
22561 case NE: return ARM_MI;
22562 case EQ: return ARM_PL;
22563 default: return ARM_NV;
22566 case CCFPEmode:
22567 case CCFPmode:
22568 /* We can handle all cases except UNEQ and LTGT. */
22569 switch (comp_code)
22571 case GE: return ARM_GE;
22572 case GT: return ARM_GT;
22573 case LE: return ARM_LS;
22574 case LT: return ARM_MI;
22575 case NE: return ARM_NE;
22576 case EQ: return ARM_EQ;
22577 case ORDERED: return ARM_VC;
22578 case UNORDERED: return ARM_VS;
22579 case UNLT: return ARM_LT;
22580 case UNLE: return ARM_LE;
22581 case UNGT: return ARM_HI;
22582 case UNGE: return ARM_PL;
22583 /* UNEQ and LTGT do not have a representation. */
22584 case UNEQ: /* Fall through. */
22585 case LTGT: /* Fall through. */
22586 default: return ARM_NV;
22589 case CC_SWPmode:
22590 switch (comp_code)
22592 case NE: return ARM_NE;
22593 case EQ: return ARM_EQ;
22594 case GE: return ARM_LE;
22595 case GT: return ARM_LT;
22596 case LE: return ARM_GE;
22597 case LT: return ARM_GT;
22598 case GEU: return ARM_LS;
22599 case GTU: return ARM_CC;
22600 case LEU: return ARM_CS;
22601 case LTU: return ARM_HI;
22602 default: return ARM_NV;
22605 case CC_Cmode:
22606 switch (comp_code)
22608 case LTU: return ARM_CS;
22609 case GEU: return ARM_CC;
22610 default: return ARM_NV;
22613 case CC_CZmode:
22614 switch (comp_code)
22616 case NE: return ARM_NE;
22617 case EQ: return ARM_EQ;
22618 case GEU: return ARM_CS;
22619 case GTU: return ARM_HI;
22620 case LEU: return ARM_LS;
22621 case LTU: return ARM_CC;
22622 default: return ARM_NV;
22625 case CC_NCVmode:
22626 switch (comp_code)
22628 case GE: return ARM_GE;
22629 case LT: return ARM_LT;
22630 case GEU: return ARM_CS;
22631 case LTU: return ARM_CC;
22632 default: return ARM_NV;
22635 case CCmode:
22636 switch (comp_code)
22638 case NE: return ARM_NE;
22639 case EQ: return ARM_EQ;
22640 case GE: return ARM_GE;
22641 case GT: return ARM_GT;
22642 case LE: return ARM_LE;
22643 case LT: return ARM_LT;
22644 case GEU: return ARM_CS;
22645 case GTU: return ARM_HI;
22646 case LEU: return ARM_LS;
22647 case LTU: return ARM_CC;
22648 default: return ARM_NV;
22651 default: gcc_unreachable ();
22655 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22656 static enum arm_cond_code
22657 get_arm_condition_code (rtx comparison)
22659 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22660 gcc_assert (code != ARM_NV);
22661 return code;
22664 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22665 instructions. */
22666 void
22667 thumb2_final_prescan_insn (rtx_insn *insn)
22669 rtx_insn *first_insn = insn;
22670 rtx body = PATTERN (insn);
22671 rtx predicate;
22672 enum arm_cond_code code;
22673 int n;
22674 int mask;
22675 int max;
22677 /* max_insns_skipped in the tune was already taken into account in the
22678 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22679 just emit the IT blocks as we can. It does not make sense to split
22680 the IT blocks. */
22681 max = MAX_INSN_PER_IT_BLOCK;
22683 /* Remove the previous insn from the count of insns to be output. */
22684 if (arm_condexec_count)
22685 arm_condexec_count--;
22687 /* Nothing to do if we are already inside a conditional block. */
22688 if (arm_condexec_count)
22689 return;
22691 if (GET_CODE (body) != COND_EXEC)
22692 return;
22694 /* Conditional jumps are implemented directly. */
22695 if (JUMP_P (insn))
22696 return;
22698 predicate = COND_EXEC_TEST (body);
22699 arm_current_cc = get_arm_condition_code (predicate);
22701 n = get_attr_ce_count (insn);
22702 arm_condexec_count = 1;
22703 arm_condexec_mask = (1 << n) - 1;
22704 arm_condexec_masklen = n;
22705 /* See if subsequent instructions can be combined into the same block. */
22706 for (;;)
22708 insn = next_nonnote_insn (insn);
22710 /* Jumping into the middle of an IT block is illegal, so a label or
22711 barrier terminates the block. */
22712 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22713 break;
22715 body = PATTERN (insn);
22716 /* USE and CLOBBER aren't really insns, so just skip them. */
22717 if (GET_CODE (body) == USE
22718 || GET_CODE (body) == CLOBBER)
22719 continue;
22721 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22722 if (GET_CODE (body) != COND_EXEC)
22723 break;
22724 /* Maximum number of conditionally executed instructions in a block. */
22725 n = get_attr_ce_count (insn);
22726 if (arm_condexec_masklen + n > max)
22727 break;
22729 predicate = COND_EXEC_TEST (body);
22730 code = get_arm_condition_code (predicate);
22731 mask = (1 << n) - 1;
22732 if (arm_current_cc == code)
22733 arm_condexec_mask |= (mask << arm_condexec_masklen);
22734 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22735 break;
22737 arm_condexec_count++;
22738 arm_condexec_masklen += n;
22740 /* A jump must be the last instruction in a conditional block. */
22741 if (JUMP_P (insn))
22742 break;
22744 /* Restore recog_data (getting the attributes of other insns can
22745 destroy this array, but final.c assumes that it remains intact
22746 across this call). */
22747 extract_constrain_insn_cached (first_insn);
22750 void
22751 arm_final_prescan_insn (rtx_insn *insn)
22753 /* BODY will hold the body of INSN. */
22754 rtx body = PATTERN (insn);
22756 /* This will be 1 if trying to repeat the trick, and things need to be
22757 reversed if it appears to fail. */
22758 int reverse = 0;
22760 /* If we start with a return insn, we only succeed if we find another one. */
22761 int seeking_return = 0;
22762 enum rtx_code return_code = UNKNOWN;
22764 /* START_INSN will hold the insn from where we start looking. This is the
22765 first insn after the following code_label if REVERSE is true. */
22766 rtx_insn *start_insn = insn;
22768 /* If in state 4, check if the target branch is reached, in order to
22769 change back to state 0. */
22770 if (arm_ccfsm_state == 4)
22772 if (insn == arm_target_insn)
22774 arm_target_insn = NULL;
22775 arm_ccfsm_state = 0;
22777 return;
22780 /* If in state 3, it is possible to repeat the trick, if this insn is an
22781 unconditional branch to a label, and immediately following this branch
22782 is the previous target label which is only used once, and the label this
22783 branch jumps to is not too far off. */
22784 if (arm_ccfsm_state == 3)
22786 if (simplejump_p (insn))
22788 start_insn = next_nonnote_insn (start_insn);
22789 if (BARRIER_P (start_insn))
22791 /* XXX Isn't this always a barrier? */
22792 start_insn = next_nonnote_insn (start_insn);
22794 if (LABEL_P (start_insn)
22795 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22796 && LABEL_NUSES (start_insn) == 1)
22797 reverse = TRUE;
22798 else
22799 return;
22801 else if (ANY_RETURN_P (body))
22803 start_insn = next_nonnote_insn (start_insn);
22804 if (BARRIER_P (start_insn))
22805 start_insn = next_nonnote_insn (start_insn);
22806 if (LABEL_P (start_insn)
22807 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22808 && LABEL_NUSES (start_insn) == 1)
22810 reverse = TRUE;
22811 seeking_return = 1;
22812 return_code = GET_CODE (body);
22814 else
22815 return;
22817 else
22818 return;
22821 gcc_assert (!arm_ccfsm_state || reverse);
22822 if (!JUMP_P (insn))
22823 return;
22825 /* This jump might be paralleled with a clobber of the condition codes
22826 the jump should always come first */
22827 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22828 body = XVECEXP (body, 0, 0);
22830 if (reverse
22831 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22832 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22834 int insns_skipped;
22835 int fail = FALSE, succeed = FALSE;
22836 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22837 int then_not_else = TRUE;
22838 rtx_insn *this_insn = start_insn;
22839 rtx label = 0;
22841 /* Register the insn jumped to. */
22842 if (reverse)
22844 if (!seeking_return)
22845 label = XEXP (SET_SRC (body), 0);
22847 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22848 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22849 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22851 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22852 then_not_else = FALSE;
22854 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22856 seeking_return = 1;
22857 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22859 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22861 seeking_return = 1;
22862 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22863 then_not_else = FALSE;
22865 else
22866 gcc_unreachable ();
22868 /* See how many insns this branch skips, and what kind of insns. If all
22869 insns are okay, and the label or unconditional branch to the same
22870 label is not too far away, succeed. */
22871 for (insns_skipped = 0;
22872 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22874 rtx scanbody;
22876 this_insn = next_nonnote_insn (this_insn);
22877 if (!this_insn)
22878 break;
22880 switch (GET_CODE (this_insn))
22882 case CODE_LABEL:
22883 /* Succeed if it is the target label, otherwise fail since
22884 control falls in from somewhere else. */
22885 if (this_insn == label)
22887 arm_ccfsm_state = 1;
22888 succeed = TRUE;
22890 else
22891 fail = TRUE;
22892 break;
22894 case BARRIER:
22895 /* Succeed if the following insn is the target label.
22896 Otherwise fail.
22897 If return insns are used then the last insn in a function
22898 will be a barrier. */
22899 this_insn = next_nonnote_insn (this_insn);
22900 if (this_insn && this_insn == label)
22902 arm_ccfsm_state = 1;
22903 succeed = TRUE;
22905 else
22906 fail = TRUE;
22907 break;
22909 case CALL_INSN:
22910 /* The AAPCS says that conditional calls should not be
22911 used since they make interworking inefficient (the
22912 linker can't transform BL<cond> into BLX). That's
22913 only a problem if the machine has BLX. */
22914 if (arm_arch5)
22916 fail = TRUE;
22917 break;
22920 /* Succeed if the following insn is the target label, or
22921 if the following two insns are a barrier and the
22922 target label. */
22923 this_insn = next_nonnote_insn (this_insn);
22924 if (this_insn && BARRIER_P (this_insn))
22925 this_insn = next_nonnote_insn (this_insn);
22927 if (this_insn && this_insn == label
22928 && insns_skipped < max_insns_skipped)
22930 arm_ccfsm_state = 1;
22931 succeed = TRUE;
22933 else
22934 fail = TRUE;
22935 break;
22937 case JUMP_INSN:
22938 /* If this is an unconditional branch to the same label, succeed.
22939 If it is to another label, do nothing. If it is conditional,
22940 fail. */
22941 /* XXX Probably, the tests for SET and the PC are
22942 unnecessary. */
22944 scanbody = PATTERN (this_insn);
22945 if (GET_CODE (scanbody) == SET
22946 && GET_CODE (SET_DEST (scanbody)) == PC)
22948 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22949 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22951 arm_ccfsm_state = 2;
22952 succeed = TRUE;
22954 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22955 fail = TRUE;
22957 /* Fail if a conditional return is undesirable (e.g. on a
22958 StrongARM), but still allow this if optimizing for size. */
22959 else if (GET_CODE (scanbody) == return_code
22960 && !use_return_insn (TRUE, NULL)
22961 && !optimize_size)
22962 fail = TRUE;
22963 else if (GET_CODE (scanbody) == return_code)
22965 arm_ccfsm_state = 2;
22966 succeed = TRUE;
22968 else if (GET_CODE (scanbody) == PARALLEL)
22970 switch (get_attr_conds (this_insn))
22972 case CONDS_NOCOND:
22973 break;
22974 default:
22975 fail = TRUE;
22976 break;
22979 else
22980 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22982 break;
22984 case INSN:
22985 /* Instructions using or affecting the condition codes make it
22986 fail. */
22987 scanbody = PATTERN (this_insn);
22988 if (!(GET_CODE (scanbody) == SET
22989 || GET_CODE (scanbody) == PARALLEL)
22990 || get_attr_conds (this_insn) != CONDS_NOCOND)
22991 fail = TRUE;
22992 break;
22994 default:
22995 break;
22998 if (succeed)
23000 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23001 arm_target_label = CODE_LABEL_NUMBER (label);
23002 else
23004 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23006 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23008 this_insn = next_nonnote_insn (this_insn);
23009 gcc_assert (!this_insn
23010 || (!BARRIER_P (this_insn)
23011 && !LABEL_P (this_insn)));
23013 if (!this_insn)
23015 /* Oh, dear! we ran off the end.. give up. */
23016 extract_constrain_insn_cached (insn);
23017 arm_ccfsm_state = 0;
23018 arm_target_insn = NULL;
23019 return;
23021 arm_target_insn = this_insn;
23024 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23025 what it was. */
23026 if (!reverse)
23027 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23029 if (reverse || then_not_else)
23030 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23033 /* Restore recog_data (getting the attributes of other insns can
23034 destroy this array, but final.c assumes that it remains intact
23035 across this call. */
23036 extract_constrain_insn_cached (insn);
23040 /* Output IT instructions. */
23041 void
23042 thumb2_asm_output_opcode (FILE * stream)
23044 char buff[5];
23045 int n;
23047 if (arm_condexec_mask)
23049 for (n = 0; n < arm_condexec_masklen; n++)
23050 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23051 buff[n] = 0;
23052 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23053 arm_condition_codes[arm_current_cc]);
23054 arm_condexec_mask = 0;
23058 /* Returns true if REGNO is a valid register
23059 for holding a quantity of type MODE. */
23061 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23063 if (GET_MODE_CLASS (mode) == MODE_CC)
23064 return (regno == CC_REGNUM
23065 || (TARGET_HARD_FLOAT && TARGET_VFP
23066 && regno == VFPCC_REGNUM));
23068 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23069 return false;
23071 if (TARGET_THUMB1)
23072 /* For the Thumb we only allow values bigger than SImode in
23073 registers 0 - 6, so that there is always a second low
23074 register available to hold the upper part of the value.
23075 We probably we ought to ensure that the register is the
23076 start of an even numbered register pair. */
23077 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23079 if (TARGET_HARD_FLOAT && TARGET_VFP
23080 && IS_VFP_REGNUM (regno))
23082 if (mode == SFmode || mode == SImode)
23083 return VFP_REGNO_OK_FOR_SINGLE (regno);
23085 if (mode == DFmode)
23086 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23088 /* VFP registers can hold HFmode values, but there is no point in
23089 putting them there unless we have hardware conversion insns. */
23090 if (mode == HFmode)
23091 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23093 if (TARGET_NEON)
23094 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23095 || (VALID_NEON_QREG_MODE (mode)
23096 && NEON_REGNO_OK_FOR_QUAD (regno))
23097 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23098 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23099 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23100 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23101 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23103 return FALSE;
23106 if (TARGET_REALLY_IWMMXT)
23108 if (IS_IWMMXT_GR_REGNUM (regno))
23109 return mode == SImode;
23111 if (IS_IWMMXT_REGNUM (regno))
23112 return VALID_IWMMXT_REG_MODE (mode);
23115 /* We allow almost any value to be stored in the general registers.
23116 Restrict doubleword quantities to even register pairs in ARM state
23117 so that we can use ldrd. Do not allow very large Neon structure
23118 opaque modes in general registers; they would use too many. */
23119 if (regno <= LAST_ARM_REGNUM)
23121 if (ARM_NUM_REGS (mode) > 4)
23122 return FALSE;
23124 if (TARGET_THUMB2)
23125 return TRUE;
23127 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23130 if (regno == FRAME_POINTER_REGNUM
23131 || regno == ARG_POINTER_REGNUM)
23132 /* We only allow integers in the fake hard registers. */
23133 return GET_MODE_CLASS (mode) == MODE_INT;
23135 return FALSE;
23138 /* Implement MODES_TIEABLE_P. */
23140 bool
23141 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23143 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23144 return true;
23146 /* We specifically want to allow elements of "structure" modes to
23147 be tieable to the structure. This more general condition allows
23148 other rarer situations too. */
23149 if (TARGET_NEON
23150 && (VALID_NEON_DREG_MODE (mode1)
23151 || VALID_NEON_QREG_MODE (mode1)
23152 || VALID_NEON_STRUCT_MODE (mode1))
23153 && (VALID_NEON_DREG_MODE (mode2)
23154 || VALID_NEON_QREG_MODE (mode2)
23155 || VALID_NEON_STRUCT_MODE (mode2)))
23156 return true;
23158 return false;
23161 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23162 not used in arm mode. */
23164 enum reg_class
23165 arm_regno_class (int regno)
23167 if (regno == PC_REGNUM)
23168 return NO_REGS;
23170 if (TARGET_THUMB1)
23172 if (regno == STACK_POINTER_REGNUM)
23173 return STACK_REG;
23174 if (regno == CC_REGNUM)
23175 return CC_REG;
23176 if (regno < 8)
23177 return LO_REGS;
23178 return HI_REGS;
23181 if (TARGET_THUMB2 && regno < 8)
23182 return LO_REGS;
23184 if ( regno <= LAST_ARM_REGNUM
23185 || regno == FRAME_POINTER_REGNUM
23186 || regno == ARG_POINTER_REGNUM)
23187 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23189 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23190 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23192 if (IS_VFP_REGNUM (regno))
23194 if (regno <= D7_VFP_REGNUM)
23195 return VFP_D0_D7_REGS;
23196 else if (regno <= LAST_LO_VFP_REGNUM)
23197 return VFP_LO_REGS;
23198 else
23199 return VFP_HI_REGS;
23202 if (IS_IWMMXT_REGNUM (regno))
23203 return IWMMXT_REGS;
23205 if (IS_IWMMXT_GR_REGNUM (regno))
23206 return IWMMXT_GR_REGS;
23208 return NO_REGS;
23211 /* Handle a special case when computing the offset
23212 of an argument from the frame pointer. */
23214 arm_debugger_arg_offset (int value, rtx addr)
23216 rtx_insn *insn;
23218 /* We are only interested if dbxout_parms() failed to compute the offset. */
23219 if (value != 0)
23220 return 0;
23222 /* We can only cope with the case where the address is held in a register. */
23223 if (!REG_P (addr))
23224 return 0;
23226 /* If we are using the frame pointer to point at the argument, then
23227 an offset of 0 is correct. */
23228 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23229 return 0;
23231 /* If we are using the stack pointer to point at the
23232 argument, then an offset of 0 is correct. */
23233 /* ??? Check this is consistent with thumb2 frame layout. */
23234 if ((TARGET_THUMB || !frame_pointer_needed)
23235 && REGNO (addr) == SP_REGNUM)
23236 return 0;
23238 /* Oh dear. The argument is pointed to by a register rather
23239 than being held in a register, or being stored at a known
23240 offset from the frame pointer. Since GDB only understands
23241 those two kinds of argument we must translate the address
23242 held in the register into an offset from the frame pointer.
23243 We do this by searching through the insns for the function
23244 looking to see where this register gets its value. If the
23245 register is initialized from the frame pointer plus an offset
23246 then we are in luck and we can continue, otherwise we give up.
23248 This code is exercised by producing debugging information
23249 for a function with arguments like this:
23251 double func (double a, double b, int c, double d) {return d;}
23253 Without this code the stab for parameter 'd' will be set to
23254 an offset of 0 from the frame pointer, rather than 8. */
23256 /* The if() statement says:
23258 If the insn is a normal instruction
23259 and if the insn is setting the value in a register
23260 and if the register being set is the register holding the address of the argument
23261 and if the address is computing by an addition
23262 that involves adding to a register
23263 which is the frame pointer
23264 a constant integer
23266 then... */
23268 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23270 if ( NONJUMP_INSN_P (insn)
23271 && GET_CODE (PATTERN (insn)) == SET
23272 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23273 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23274 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23275 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23276 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23279 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23281 break;
23285 if (value == 0)
23287 debug_rtx (addr);
23288 warning (0, "unable to compute real location of stacked parameter");
23289 value = 8; /* XXX magic hack */
23292 return value;
23295 typedef enum {
23296 T_V8QI,
23297 T_V4HI,
23298 T_V4HF,
23299 T_V2SI,
23300 T_V2SF,
23301 T_DI,
23302 T_V16QI,
23303 T_V8HI,
23304 T_V4SI,
23305 T_V4SF,
23306 T_V2DI,
23307 T_TI,
23308 T_EI,
23309 T_OI,
23310 T_MAX /* Size of enum. Keep last. */
23311 } neon_builtin_type_mode;
23313 #define TYPE_MODE_BIT(X) (1 << (X))
23315 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23316 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23317 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23318 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23319 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23320 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23322 #define v8qi_UP T_V8QI
23323 #define v4hi_UP T_V4HI
23324 #define v4hf_UP T_V4HF
23325 #define v2si_UP T_V2SI
23326 #define v2sf_UP T_V2SF
23327 #define di_UP T_DI
23328 #define v16qi_UP T_V16QI
23329 #define v8hi_UP T_V8HI
23330 #define v4si_UP T_V4SI
23331 #define v4sf_UP T_V4SF
23332 #define v2di_UP T_V2DI
23333 #define ti_UP T_TI
23334 #define ei_UP T_EI
23335 #define oi_UP T_OI
23337 #define UP(X) X##_UP
23339 typedef enum {
23340 NEON_BINOP,
23341 NEON_TERNOP,
23342 NEON_UNOP,
23343 NEON_BSWAP,
23344 NEON_GETLANE,
23345 NEON_SETLANE,
23346 NEON_CREATE,
23347 NEON_RINT,
23348 NEON_COPYSIGNF,
23349 NEON_DUP,
23350 NEON_DUPLANE,
23351 NEON_COMBINE,
23352 NEON_SPLIT,
23353 NEON_LANEMUL,
23354 NEON_LANEMULL,
23355 NEON_LANEMULH,
23356 NEON_LANEMAC,
23357 NEON_SCALARMUL,
23358 NEON_SCALARMULL,
23359 NEON_SCALARMULH,
23360 NEON_SCALARMAC,
23361 NEON_CONVERT,
23362 NEON_FLOAT_WIDEN,
23363 NEON_FLOAT_NARROW,
23364 NEON_FIXCONV,
23365 NEON_SELECT,
23366 NEON_REINTERP,
23367 NEON_VTBL,
23368 NEON_VTBX,
23369 NEON_LOAD1,
23370 NEON_LOAD1LANE,
23371 NEON_STORE1,
23372 NEON_STORE1LANE,
23373 NEON_LOADSTRUCT,
23374 NEON_LOADSTRUCTLANE,
23375 NEON_STORESTRUCT,
23376 NEON_STORESTRUCTLANE,
23377 NEON_LOGICBINOP,
23378 NEON_SHIFTINSERT,
23379 NEON_SHIFTIMM,
23380 NEON_SHIFTACC
23381 } neon_itype;
23383 typedef struct {
23384 const char *name;
23385 const neon_itype itype;
23386 const neon_builtin_type_mode mode;
23387 const enum insn_code code;
23388 unsigned int fcode;
23389 } neon_builtin_datum;
23391 #define CF(N,X) CODE_FOR_neon_##N##X
23393 #define VAR1(T, N, A) \
23394 {#N, NEON_##T, UP (A), CF (N, A), 0}
23395 #define VAR2(T, N, A, B) \
23396 VAR1 (T, N, A), \
23397 {#N, NEON_##T, UP (B), CF (N, B), 0}
23398 #define VAR3(T, N, A, B, C) \
23399 VAR2 (T, N, A, B), \
23400 {#N, NEON_##T, UP (C), CF (N, C), 0}
23401 #define VAR4(T, N, A, B, C, D) \
23402 VAR3 (T, N, A, B, C), \
23403 {#N, NEON_##T, UP (D), CF (N, D), 0}
23404 #define VAR5(T, N, A, B, C, D, E) \
23405 VAR4 (T, N, A, B, C, D), \
23406 {#N, NEON_##T, UP (E), CF (N, E), 0}
23407 #define VAR6(T, N, A, B, C, D, E, F) \
23408 VAR5 (T, N, A, B, C, D, E), \
23409 {#N, NEON_##T, UP (F), CF (N, F), 0}
23410 #define VAR7(T, N, A, B, C, D, E, F, G) \
23411 VAR6 (T, N, A, B, C, D, E, F), \
23412 {#N, NEON_##T, UP (G), CF (N, G), 0}
23413 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23414 VAR7 (T, N, A, B, C, D, E, F, G), \
23415 {#N, NEON_##T, UP (H), CF (N, H), 0}
23416 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23417 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23418 {#N, NEON_##T, UP (I), CF (N, I), 0}
23419 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23420 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23421 {#N, NEON_##T, UP (J), CF (N, J), 0}
23423 /* The NEON builtin data can be found in arm_neon_builtins.def.
23424 The mode entries in the following table correspond to the "key" type of the
23425 instruction variant, i.e. equivalent to that which would be specified after
23426 the assembler mnemonic, which usually refers to the last vector operand.
23427 (Signed/unsigned/polynomial types are not differentiated between though, and
23428 are all mapped onto the same mode for a given element size.) The modes
23429 listed per instruction should be the same as those defined for that
23430 instruction's pattern in neon.md. */
23432 static neon_builtin_datum neon_builtin_data[] =
23434 #include "arm_neon_builtins.def"
23437 #undef CF
23438 #undef VAR1
23439 #undef VAR2
23440 #undef VAR3
23441 #undef VAR4
23442 #undef VAR5
23443 #undef VAR6
23444 #undef VAR7
23445 #undef VAR8
23446 #undef VAR9
23447 #undef VAR10
23449 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23450 #define VAR1(T, N, A) \
23451 CF (N, A)
23452 #define VAR2(T, N, A, B) \
23453 VAR1 (T, N, A), \
23454 CF (N, B)
23455 #define VAR3(T, N, A, B, C) \
23456 VAR2 (T, N, A, B), \
23457 CF (N, C)
23458 #define VAR4(T, N, A, B, C, D) \
23459 VAR3 (T, N, A, B, C), \
23460 CF (N, D)
23461 #define VAR5(T, N, A, B, C, D, E) \
23462 VAR4 (T, N, A, B, C, D), \
23463 CF (N, E)
23464 #define VAR6(T, N, A, B, C, D, E, F) \
23465 VAR5 (T, N, A, B, C, D, E), \
23466 CF (N, F)
23467 #define VAR7(T, N, A, B, C, D, E, F, G) \
23468 VAR6 (T, N, A, B, C, D, E, F), \
23469 CF (N, G)
23470 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23471 VAR7 (T, N, A, B, C, D, E, F, G), \
23472 CF (N, H)
23473 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23474 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23475 CF (N, I)
23476 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23477 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23478 CF (N, J)
23479 enum arm_builtins
23481 ARM_BUILTIN_GETWCGR0,
23482 ARM_BUILTIN_GETWCGR1,
23483 ARM_BUILTIN_GETWCGR2,
23484 ARM_BUILTIN_GETWCGR3,
23486 ARM_BUILTIN_SETWCGR0,
23487 ARM_BUILTIN_SETWCGR1,
23488 ARM_BUILTIN_SETWCGR2,
23489 ARM_BUILTIN_SETWCGR3,
23491 ARM_BUILTIN_WZERO,
23493 ARM_BUILTIN_WAVG2BR,
23494 ARM_BUILTIN_WAVG2HR,
23495 ARM_BUILTIN_WAVG2B,
23496 ARM_BUILTIN_WAVG2H,
23498 ARM_BUILTIN_WACCB,
23499 ARM_BUILTIN_WACCH,
23500 ARM_BUILTIN_WACCW,
23502 ARM_BUILTIN_WMACS,
23503 ARM_BUILTIN_WMACSZ,
23504 ARM_BUILTIN_WMACU,
23505 ARM_BUILTIN_WMACUZ,
23507 ARM_BUILTIN_WSADB,
23508 ARM_BUILTIN_WSADBZ,
23509 ARM_BUILTIN_WSADH,
23510 ARM_BUILTIN_WSADHZ,
23512 ARM_BUILTIN_WALIGNI,
23513 ARM_BUILTIN_WALIGNR0,
23514 ARM_BUILTIN_WALIGNR1,
23515 ARM_BUILTIN_WALIGNR2,
23516 ARM_BUILTIN_WALIGNR3,
23518 ARM_BUILTIN_TMIA,
23519 ARM_BUILTIN_TMIAPH,
23520 ARM_BUILTIN_TMIABB,
23521 ARM_BUILTIN_TMIABT,
23522 ARM_BUILTIN_TMIATB,
23523 ARM_BUILTIN_TMIATT,
23525 ARM_BUILTIN_TMOVMSKB,
23526 ARM_BUILTIN_TMOVMSKH,
23527 ARM_BUILTIN_TMOVMSKW,
23529 ARM_BUILTIN_TBCSTB,
23530 ARM_BUILTIN_TBCSTH,
23531 ARM_BUILTIN_TBCSTW,
23533 ARM_BUILTIN_WMADDS,
23534 ARM_BUILTIN_WMADDU,
23536 ARM_BUILTIN_WPACKHSS,
23537 ARM_BUILTIN_WPACKWSS,
23538 ARM_BUILTIN_WPACKDSS,
23539 ARM_BUILTIN_WPACKHUS,
23540 ARM_BUILTIN_WPACKWUS,
23541 ARM_BUILTIN_WPACKDUS,
23543 ARM_BUILTIN_WADDB,
23544 ARM_BUILTIN_WADDH,
23545 ARM_BUILTIN_WADDW,
23546 ARM_BUILTIN_WADDSSB,
23547 ARM_BUILTIN_WADDSSH,
23548 ARM_BUILTIN_WADDSSW,
23549 ARM_BUILTIN_WADDUSB,
23550 ARM_BUILTIN_WADDUSH,
23551 ARM_BUILTIN_WADDUSW,
23552 ARM_BUILTIN_WSUBB,
23553 ARM_BUILTIN_WSUBH,
23554 ARM_BUILTIN_WSUBW,
23555 ARM_BUILTIN_WSUBSSB,
23556 ARM_BUILTIN_WSUBSSH,
23557 ARM_BUILTIN_WSUBSSW,
23558 ARM_BUILTIN_WSUBUSB,
23559 ARM_BUILTIN_WSUBUSH,
23560 ARM_BUILTIN_WSUBUSW,
23562 ARM_BUILTIN_WAND,
23563 ARM_BUILTIN_WANDN,
23564 ARM_BUILTIN_WOR,
23565 ARM_BUILTIN_WXOR,
23567 ARM_BUILTIN_WCMPEQB,
23568 ARM_BUILTIN_WCMPEQH,
23569 ARM_BUILTIN_WCMPEQW,
23570 ARM_BUILTIN_WCMPGTUB,
23571 ARM_BUILTIN_WCMPGTUH,
23572 ARM_BUILTIN_WCMPGTUW,
23573 ARM_BUILTIN_WCMPGTSB,
23574 ARM_BUILTIN_WCMPGTSH,
23575 ARM_BUILTIN_WCMPGTSW,
23577 ARM_BUILTIN_TEXTRMSB,
23578 ARM_BUILTIN_TEXTRMSH,
23579 ARM_BUILTIN_TEXTRMSW,
23580 ARM_BUILTIN_TEXTRMUB,
23581 ARM_BUILTIN_TEXTRMUH,
23582 ARM_BUILTIN_TEXTRMUW,
23583 ARM_BUILTIN_TINSRB,
23584 ARM_BUILTIN_TINSRH,
23585 ARM_BUILTIN_TINSRW,
23587 ARM_BUILTIN_WMAXSW,
23588 ARM_BUILTIN_WMAXSH,
23589 ARM_BUILTIN_WMAXSB,
23590 ARM_BUILTIN_WMAXUW,
23591 ARM_BUILTIN_WMAXUH,
23592 ARM_BUILTIN_WMAXUB,
23593 ARM_BUILTIN_WMINSW,
23594 ARM_BUILTIN_WMINSH,
23595 ARM_BUILTIN_WMINSB,
23596 ARM_BUILTIN_WMINUW,
23597 ARM_BUILTIN_WMINUH,
23598 ARM_BUILTIN_WMINUB,
23600 ARM_BUILTIN_WMULUM,
23601 ARM_BUILTIN_WMULSM,
23602 ARM_BUILTIN_WMULUL,
23604 ARM_BUILTIN_PSADBH,
23605 ARM_BUILTIN_WSHUFH,
23607 ARM_BUILTIN_WSLLH,
23608 ARM_BUILTIN_WSLLW,
23609 ARM_BUILTIN_WSLLD,
23610 ARM_BUILTIN_WSRAH,
23611 ARM_BUILTIN_WSRAW,
23612 ARM_BUILTIN_WSRAD,
23613 ARM_BUILTIN_WSRLH,
23614 ARM_BUILTIN_WSRLW,
23615 ARM_BUILTIN_WSRLD,
23616 ARM_BUILTIN_WRORH,
23617 ARM_BUILTIN_WRORW,
23618 ARM_BUILTIN_WRORD,
23619 ARM_BUILTIN_WSLLHI,
23620 ARM_BUILTIN_WSLLWI,
23621 ARM_BUILTIN_WSLLDI,
23622 ARM_BUILTIN_WSRAHI,
23623 ARM_BUILTIN_WSRAWI,
23624 ARM_BUILTIN_WSRADI,
23625 ARM_BUILTIN_WSRLHI,
23626 ARM_BUILTIN_WSRLWI,
23627 ARM_BUILTIN_WSRLDI,
23628 ARM_BUILTIN_WRORHI,
23629 ARM_BUILTIN_WRORWI,
23630 ARM_BUILTIN_WRORDI,
23632 ARM_BUILTIN_WUNPCKIHB,
23633 ARM_BUILTIN_WUNPCKIHH,
23634 ARM_BUILTIN_WUNPCKIHW,
23635 ARM_BUILTIN_WUNPCKILB,
23636 ARM_BUILTIN_WUNPCKILH,
23637 ARM_BUILTIN_WUNPCKILW,
23639 ARM_BUILTIN_WUNPCKEHSB,
23640 ARM_BUILTIN_WUNPCKEHSH,
23641 ARM_BUILTIN_WUNPCKEHSW,
23642 ARM_BUILTIN_WUNPCKEHUB,
23643 ARM_BUILTIN_WUNPCKEHUH,
23644 ARM_BUILTIN_WUNPCKEHUW,
23645 ARM_BUILTIN_WUNPCKELSB,
23646 ARM_BUILTIN_WUNPCKELSH,
23647 ARM_BUILTIN_WUNPCKELSW,
23648 ARM_BUILTIN_WUNPCKELUB,
23649 ARM_BUILTIN_WUNPCKELUH,
23650 ARM_BUILTIN_WUNPCKELUW,
23652 ARM_BUILTIN_WABSB,
23653 ARM_BUILTIN_WABSH,
23654 ARM_BUILTIN_WABSW,
23656 ARM_BUILTIN_WADDSUBHX,
23657 ARM_BUILTIN_WSUBADDHX,
23659 ARM_BUILTIN_WABSDIFFB,
23660 ARM_BUILTIN_WABSDIFFH,
23661 ARM_BUILTIN_WABSDIFFW,
23663 ARM_BUILTIN_WADDCH,
23664 ARM_BUILTIN_WADDCW,
23666 ARM_BUILTIN_WAVG4,
23667 ARM_BUILTIN_WAVG4R,
23669 ARM_BUILTIN_WMADDSX,
23670 ARM_BUILTIN_WMADDUX,
23672 ARM_BUILTIN_WMADDSN,
23673 ARM_BUILTIN_WMADDUN,
23675 ARM_BUILTIN_WMULWSM,
23676 ARM_BUILTIN_WMULWUM,
23678 ARM_BUILTIN_WMULWSMR,
23679 ARM_BUILTIN_WMULWUMR,
23681 ARM_BUILTIN_WMULWL,
23683 ARM_BUILTIN_WMULSMR,
23684 ARM_BUILTIN_WMULUMR,
23686 ARM_BUILTIN_WQMULM,
23687 ARM_BUILTIN_WQMULMR,
23689 ARM_BUILTIN_WQMULWM,
23690 ARM_BUILTIN_WQMULWMR,
23692 ARM_BUILTIN_WADDBHUSM,
23693 ARM_BUILTIN_WADDBHUSL,
23695 ARM_BUILTIN_WQMIABB,
23696 ARM_BUILTIN_WQMIABT,
23697 ARM_BUILTIN_WQMIATB,
23698 ARM_BUILTIN_WQMIATT,
23700 ARM_BUILTIN_WQMIABBN,
23701 ARM_BUILTIN_WQMIABTN,
23702 ARM_BUILTIN_WQMIATBN,
23703 ARM_BUILTIN_WQMIATTN,
23705 ARM_BUILTIN_WMIABB,
23706 ARM_BUILTIN_WMIABT,
23707 ARM_BUILTIN_WMIATB,
23708 ARM_BUILTIN_WMIATT,
23710 ARM_BUILTIN_WMIABBN,
23711 ARM_BUILTIN_WMIABTN,
23712 ARM_BUILTIN_WMIATBN,
23713 ARM_BUILTIN_WMIATTN,
23715 ARM_BUILTIN_WMIAWBB,
23716 ARM_BUILTIN_WMIAWBT,
23717 ARM_BUILTIN_WMIAWTB,
23718 ARM_BUILTIN_WMIAWTT,
23720 ARM_BUILTIN_WMIAWBBN,
23721 ARM_BUILTIN_WMIAWBTN,
23722 ARM_BUILTIN_WMIAWTBN,
23723 ARM_BUILTIN_WMIAWTTN,
23725 ARM_BUILTIN_WMERGE,
23727 ARM_BUILTIN_CRC32B,
23728 ARM_BUILTIN_CRC32H,
23729 ARM_BUILTIN_CRC32W,
23730 ARM_BUILTIN_CRC32CB,
23731 ARM_BUILTIN_CRC32CH,
23732 ARM_BUILTIN_CRC32CW,
23734 ARM_BUILTIN_GET_FPSCR,
23735 ARM_BUILTIN_SET_FPSCR,
23737 #undef CRYPTO1
23738 #undef CRYPTO2
23739 #undef CRYPTO3
23741 #define CRYPTO1(L, U, M1, M2) \
23742 ARM_BUILTIN_CRYPTO_##U,
23743 #define CRYPTO2(L, U, M1, M2, M3) \
23744 ARM_BUILTIN_CRYPTO_##U,
23745 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23746 ARM_BUILTIN_CRYPTO_##U,
23748 #include "crypto.def"
23750 #undef CRYPTO1
23751 #undef CRYPTO2
23752 #undef CRYPTO3
23754 #include "arm_neon_builtins.def"
23756 ,ARM_BUILTIN_MAX
23759 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23761 #undef CF
23762 #undef VAR1
23763 #undef VAR2
23764 #undef VAR3
23765 #undef VAR4
23766 #undef VAR5
23767 #undef VAR6
23768 #undef VAR7
23769 #undef VAR8
23770 #undef VAR9
23771 #undef VAR10
23773 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23775 #define NUM_DREG_TYPES 5
23776 #define NUM_QREG_TYPES 6
23778 static void
23779 arm_init_neon_builtins (void)
23781 unsigned int i, fcode;
23782 tree decl;
23784 tree neon_intQI_type_node;
23785 tree neon_intHI_type_node;
23786 tree neon_floatHF_type_node;
23787 tree neon_polyQI_type_node;
23788 tree neon_polyHI_type_node;
23789 tree neon_intSI_type_node;
23790 tree neon_intDI_type_node;
23791 tree neon_intUTI_type_node;
23792 tree neon_float_type_node;
23794 tree intQI_pointer_node;
23795 tree intHI_pointer_node;
23796 tree intSI_pointer_node;
23797 tree intDI_pointer_node;
23798 tree float_pointer_node;
23800 tree const_intQI_node;
23801 tree const_intHI_node;
23802 tree const_intSI_node;
23803 tree const_intDI_node;
23804 tree const_float_node;
23806 tree const_intQI_pointer_node;
23807 tree const_intHI_pointer_node;
23808 tree const_intSI_pointer_node;
23809 tree const_intDI_pointer_node;
23810 tree const_float_pointer_node;
23812 tree V8QI_type_node;
23813 tree V4HI_type_node;
23814 tree V4UHI_type_node;
23815 tree V4HF_type_node;
23816 tree V2SI_type_node;
23817 tree V2USI_type_node;
23818 tree V2SF_type_node;
23819 tree V16QI_type_node;
23820 tree V8HI_type_node;
23821 tree V8UHI_type_node;
23822 tree V4SI_type_node;
23823 tree V4USI_type_node;
23824 tree V4SF_type_node;
23825 tree V2DI_type_node;
23826 tree V2UDI_type_node;
23828 tree intUQI_type_node;
23829 tree intUHI_type_node;
23830 tree intUSI_type_node;
23831 tree intUDI_type_node;
23833 tree intEI_type_node;
23834 tree intOI_type_node;
23835 tree intCI_type_node;
23836 tree intXI_type_node;
23838 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23839 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23840 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23842 /* Create distinguished type nodes for NEON vector element types,
23843 and pointers to values of such types, so we can detect them later. */
23844 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23845 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23846 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23847 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23848 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23849 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23850 neon_float_type_node = make_node (REAL_TYPE);
23851 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23852 layout_type (neon_float_type_node);
23853 neon_floatHF_type_node = make_node (REAL_TYPE);
23854 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23855 layout_type (neon_floatHF_type_node);
23857 /* Define typedefs which exactly correspond to the modes we are basing vector
23858 types on. If you change these names you'll need to change
23859 the table used by arm_mangle_type too. */
23860 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23861 "__builtin_neon_qi");
23862 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23863 "__builtin_neon_hi");
23864 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23865 "__builtin_neon_hf");
23866 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23867 "__builtin_neon_si");
23868 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23869 "__builtin_neon_sf");
23870 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23871 "__builtin_neon_di");
23872 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23873 "__builtin_neon_poly8");
23874 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23875 "__builtin_neon_poly16");
23877 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23878 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23879 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23880 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23881 float_pointer_node = build_pointer_type (neon_float_type_node);
23883 /* Next create constant-qualified versions of the above types. */
23884 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23885 TYPE_QUAL_CONST);
23886 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23887 TYPE_QUAL_CONST);
23888 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23889 TYPE_QUAL_CONST);
23890 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23891 TYPE_QUAL_CONST);
23892 const_float_node = build_qualified_type (neon_float_type_node,
23893 TYPE_QUAL_CONST);
23895 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23896 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23897 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23898 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23899 const_float_pointer_node = build_pointer_type (const_float_node);
23901 /* Unsigned integer types for various mode sizes. */
23902 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23903 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23904 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23905 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23906 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23907 /* Now create vector types based on our NEON element types. */
23908 /* 64-bit vectors. */
23909 V8QI_type_node =
23910 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23911 V4HI_type_node =
23912 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23913 V4UHI_type_node =
23914 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23915 V4HF_type_node =
23916 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23917 V2SI_type_node =
23918 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23919 V2USI_type_node =
23920 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23921 V2SF_type_node =
23922 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23923 /* 128-bit vectors. */
23924 V16QI_type_node =
23925 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23926 V8HI_type_node =
23927 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23928 V8UHI_type_node =
23929 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23930 V4SI_type_node =
23931 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23932 V4USI_type_node =
23933 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23934 V4SF_type_node =
23935 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23936 V2DI_type_node =
23937 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23938 V2UDI_type_node =
23939 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23942 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23943 "__builtin_neon_uqi");
23944 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23945 "__builtin_neon_uhi");
23946 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23947 "__builtin_neon_usi");
23948 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23949 "__builtin_neon_udi");
23950 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23951 "__builtin_neon_poly64");
23952 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23953 "__builtin_neon_poly128");
23955 /* Opaque integer types for structures of vectors. */
23956 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23957 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23958 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23959 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23961 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23962 "__builtin_neon_ti");
23963 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23964 "__builtin_neon_ei");
23965 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23966 "__builtin_neon_oi");
23967 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23968 "__builtin_neon_ci");
23969 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23970 "__builtin_neon_xi");
23972 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23975 tree V16UQI_type_node =
23976 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23978 tree v16uqi_ftype_v16uqi
23979 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23981 tree v16uqi_ftype_v16uqi_v16uqi
23982 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23983 V16UQI_type_node, NULL_TREE);
23985 tree v4usi_ftype_v4usi
23986 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23988 tree v4usi_ftype_v4usi_v4usi
23989 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23990 V4USI_type_node, NULL_TREE);
23992 tree v4usi_ftype_v4usi_v4usi_v4usi
23993 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23994 V4USI_type_node, V4USI_type_node, NULL_TREE);
23996 tree uti_ftype_udi_udi
23997 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23998 intUDI_type_node, NULL_TREE);
24000 #undef CRYPTO1
24001 #undef CRYPTO2
24002 #undef CRYPTO3
24003 #undef C
24004 #undef N
24005 #undef CF
24006 #undef FT1
24007 #undef FT2
24008 #undef FT3
24010 #define C(U) \
24011 ARM_BUILTIN_CRYPTO_##U
24012 #define N(L) \
24013 "__builtin_arm_crypto_"#L
24014 #define FT1(R, A) \
24015 R##_ftype_##A
24016 #define FT2(R, A1, A2) \
24017 R##_ftype_##A1##_##A2
24018 #define FT3(R, A1, A2, A3) \
24019 R##_ftype_##A1##_##A2##_##A3
24020 #define CRYPTO1(L, U, R, A) \
24021 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
24022 C (U), BUILT_IN_MD, \
24023 NULL, NULL_TREE);
24024 #define CRYPTO2(L, U, R, A1, A2) \
24025 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
24026 C (U), BUILT_IN_MD, \
24027 NULL, NULL_TREE);
24029 #define CRYPTO3(L, U, R, A1, A2, A3) \
24030 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
24031 C (U), BUILT_IN_MD, \
24032 NULL, NULL_TREE);
24033 #include "crypto.def"
24035 #undef CRYPTO1
24036 #undef CRYPTO2
24037 #undef CRYPTO3
24038 #undef C
24039 #undef N
24040 #undef FT1
24041 #undef FT2
24042 #undef FT3
24044 dreg_types[0] = V8QI_type_node;
24045 dreg_types[1] = V4HI_type_node;
24046 dreg_types[2] = V2SI_type_node;
24047 dreg_types[3] = V2SF_type_node;
24048 dreg_types[4] = neon_intDI_type_node;
24050 qreg_types[0] = V16QI_type_node;
24051 qreg_types[1] = V8HI_type_node;
24052 qreg_types[2] = V4SI_type_node;
24053 qreg_types[3] = V4SF_type_node;
24054 qreg_types[4] = V2DI_type_node;
24055 qreg_types[5] = neon_intUTI_type_node;
24057 for (i = 0; i < NUM_QREG_TYPES; i++)
24059 int j;
24060 for (j = 0; j < NUM_QREG_TYPES; j++)
24062 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24063 reinterp_ftype_dreg[i][j]
24064 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24066 reinterp_ftype_qreg[i][j]
24067 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24071 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24072 i < ARRAY_SIZE (neon_builtin_data);
24073 i++, fcode++)
24075 neon_builtin_datum *d = &neon_builtin_data[i];
24077 const char* const modenames[] = {
24078 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24079 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24080 "ti", "ei", "oi"
24082 char namebuf[60];
24083 tree ftype = NULL;
24084 int is_load = 0, is_store = 0;
24086 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24088 d->fcode = fcode;
24090 switch (d->itype)
24092 case NEON_LOAD1:
24093 case NEON_LOAD1LANE:
24094 case NEON_LOADSTRUCT:
24095 case NEON_LOADSTRUCTLANE:
24096 is_load = 1;
24097 /* Fall through. */
24098 case NEON_STORE1:
24099 case NEON_STORE1LANE:
24100 case NEON_STORESTRUCT:
24101 case NEON_STORESTRUCTLANE:
24102 if (!is_load)
24103 is_store = 1;
24104 /* Fall through. */
24105 case NEON_UNOP:
24106 case NEON_RINT:
24107 case NEON_BINOP:
24108 case NEON_LOGICBINOP:
24109 case NEON_SHIFTINSERT:
24110 case NEON_TERNOP:
24111 case NEON_GETLANE:
24112 case NEON_SETLANE:
24113 case NEON_CREATE:
24114 case NEON_DUP:
24115 case NEON_DUPLANE:
24116 case NEON_SHIFTIMM:
24117 case NEON_SHIFTACC:
24118 case NEON_COMBINE:
24119 case NEON_SPLIT:
24120 case NEON_CONVERT:
24121 case NEON_FIXCONV:
24122 case NEON_LANEMUL:
24123 case NEON_LANEMULL:
24124 case NEON_LANEMULH:
24125 case NEON_LANEMAC:
24126 case NEON_SCALARMUL:
24127 case NEON_SCALARMULL:
24128 case NEON_SCALARMULH:
24129 case NEON_SCALARMAC:
24130 case NEON_SELECT:
24131 case NEON_VTBL:
24132 case NEON_VTBX:
24134 int k;
24135 tree return_type = void_type_node, args = void_list_node;
24137 /* Build a function type directly from the insn_data for
24138 this builtin. The build_function_type() function takes
24139 care of removing duplicates for us. */
24140 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24142 tree eltype;
24144 if (is_load && k == 1)
24146 /* Neon load patterns always have the memory
24147 operand in the operand 1 position. */
24148 gcc_assert (insn_data[d->code].operand[k].predicate
24149 == neon_struct_operand);
24151 switch (d->mode)
24153 case T_V8QI:
24154 case T_V16QI:
24155 eltype = const_intQI_pointer_node;
24156 break;
24158 case T_V4HI:
24159 case T_V8HI:
24160 eltype = const_intHI_pointer_node;
24161 break;
24163 case T_V2SI:
24164 case T_V4SI:
24165 eltype = const_intSI_pointer_node;
24166 break;
24168 case T_V2SF:
24169 case T_V4SF:
24170 eltype = const_float_pointer_node;
24171 break;
24173 case T_DI:
24174 case T_V2DI:
24175 eltype = const_intDI_pointer_node;
24176 break;
24178 default: gcc_unreachable ();
24181 else if (is_store && k == 0)
24183 /* Similarly, Neon store patterns use operand 0 as
24184 the memory location to store to. */
24185 gcc_assert (insn_data[d->code].operand[k].predicate
24186 == neon_struct_operand);
24188 switch (d->mode)
24190 case T_V8QI:
24191 case T_V16QI:
24192 eltype = intQI_pointer_node;
24193 break;
24195 case T_V4HI:
24196 case T_V8HI:
24197 eltype = intHI_pointer_node;
24198 break;
24200 case T_V2SI:
24201 case T_V4SI:
24202 eltype = intSI_pointer_node;
24203 break;
24205 case T_V2SF:
24206 case T_V4SF:
24207 eltype = float_pointer_node;
24208 break;
24210 case T_DI:
24211 case T_V2DI:
24212 eltype = intDI_pointer_node;
24213 break;
24215 default: gcc_unreachable ();
24218 else
24220 switch (insn_data[d->code].operand[k].mode)
24222 case VOIDmode: eltype = void_type_node; break;
24223 /* Scalars. */
24224 case QImode: eltype = neon_intQI_type_node; break;
24225 case HImode: eltype = neon_intHI_type_node; break;
24226 case SImode: eltype = neon_intSI_type_node; break;
24227 case SFmode: eltype = neon_float_type_node; break;
24228 case DImode: eltype = neon_intDI_type_node; break;
24229 case TImode: eltype = intTI_type_node; break;
24230 case EImode: eltype = intEI_type_node; break;
24231 case OImode: eltype = intOI_type_node; break;
24232 case CImode: eltype = intCI_type_node; break;
24233 case XImode: eltype = intXI_type_node; break;
24234 /* 64-bit vectors. */
24235 case V8QImode: eltype = V8QI_type_node; break;
24236 case V4HImode: eltype = V4HI_type_node; break;
24237 case V2SImode: eltype = V2SI_type_node; break;
24238 case V2SFmode: eltype = V2SF_type_node; break;
24239 /* 128-bit vectors. */
24240 case V16QImode: eltype = V16QI_type_node; break;
24241 case V8HImode: eltype = V8HI_type_node; break;
24242 case V4SImode: eltype = V4SI_type_node; break;
24243 case V4SFmode: eltype = V4SF_type_node; break;
24244 case V2DImode: eltype = V2DI_type_node; break;
24245 default: gcc_unreachable ();
24249 if (k == 0 && !is_store)
24250 return_type = eltype;
24251 else
24252 args = tree_cons (NULL_TREE, eltype, args);
24255 ftype = build_function_type (return_type, args);
24257 break;
24259 case NEON_REINTERP:
24261 /* We iterate over NUM_DREG_TYPES doubleword types,
24262 then NUM_QREG_TYPES quadword types.
24263 V4HF is not a type used in reinterpret, so we translate
24264 d->mode to the correct index in reinterp_ftype_dreg. */
24265 bool qreg_p
24266 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24267 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24268 % NUM_QREG_TYPES;
24269 switch (insn_data[d->code].operand[0].mode)
24271 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24272 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24273 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24274 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24275 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24276 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24277 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24278 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24279 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24280 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24281 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24282 default: gcc_unreachable ();
24285 break;
24286 case NEON_FLOAT_WIDEN:
24288 tree eltype = NULL_TREE;
24289 tree return_type = NULL_TREE;
24291 switch (insn_data[d->code].operand[1].mode)
24293 case V4HFmode:
24294 eltype = V4HF_type_node;
24295 return_type = V4SF_type_node;
24296 break;
24297 default: gcc_unreachable ();
24299 ftype = build_function_type_list (return_type, eltype, NULL);
24300 break;
24302 case NEON_FLOAT_NARROW:
24304 tree eltype = NULL_TREE;
24305 tree return_type = NULL_TREE;
24307 switch (insn_data[d->code].operand[1].mode)
24309 case V4SFmode:
24310 eltype = V4SF_type_node;
24311 return_type = V4HF_type_node;
24312 break;
24313 default: gcc_unreachable ();
24315 ftype = build_function_type_list (return_type, eltype, NULL);
24316 break;
24318 case NEON_BSWAP:
24320 tree eltype = NULL_TREE;
24321 switch (insn_data[d->code].operand[1].mode)
24323 case V4HImode:
24324 eltype = V4UHI_type_node;
24325 break;
24326 case V8HImode:
24327 eltype = V8UHI_type_node;
24328 break;
24329 case V2SImode:
24330 eltype = V2USI_type_node;
24331 break;
24332 case V4SImode:
24333 eltype = V4USI_type_node;
24334 break;
24335 case V2DImode:
24336 eltype = V2UDI_type_node;
24337 break;
24338 default: gcc_unreachable ();
24340 ftype = build_function_type_list (eltype, eltype, NULL);
24341 break;
24343 case NEON_COPYSIGNF:
24345 tree eltype = NULL_TREE;
24346 switch (insn_data[d->code].operand[1].mode)
24348 case V2SFmode:
24349 eltype = V2SF_type_node;
24350 break;
24351 case V4SFmode:
24352 eltype = V4SF_type_node;
24353 break;
24354 default: gcc_unreachable ();
24356 ftype = build_function_type_list (eltype, eltype, NULL);
24357 break;
24359 default:
24360 gcc_unreachable ();
24363 gcc_assert (ftype != NULL);
24365 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24367 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24368 NULL_TREE);
24369 arm_builtin_decls[fcode] = decl;
24373 #undef NUM_DREG_TYPES
24374 #undef NUM_QREG_TYPES
24376 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24377 do \
24379 if ((MASK) & insn_flags) \
24381 tree bdecl; \
24382 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24383 BUILT_IN_MD, NULL, NULL_TREE); \
24384 arm_builtin_decls[CODE] = bdecl; \
24387 while (0)
24389 struct builtin_description
24391 const unsigned int mask;
24392 const enum insn_code icode;
24393 const char * const name;
24394 const enum arm_builtins code;
24395 const enum rtx_code comparison;
24396 const unsigned int flag;
24399 static const struct builtin_description bdesc_2arg[] =
24401 #define IWMMXT_BUILTIN(code, string, builtin) \
24402 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24403 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24405 #define IWMMXT2_BUILTIN(code, string, builtin) \
24406 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24407 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24409 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24410 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24411 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24412 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24413 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24414 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24415 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24416 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24417 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24418 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24419 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24420 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24421 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24422 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24423 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24424 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24425 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24426 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24427 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24428 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24429 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24430 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24431 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24432 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24433 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24434 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24435 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24436 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24437 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24438 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24439 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24440 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24441 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24442 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24443 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24444 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24445 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24446 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24447 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24448 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24449 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24450 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24451 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24452 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24453 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24454 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24455 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24456 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24457 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24458 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24459 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24460 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24461 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24462 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24463 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24464 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24465 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24466 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24467 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24468 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24469 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24470 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24471 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24472 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24473 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24474 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24475 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24476 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24477 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24478 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24479 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24480 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24481 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24482 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24483 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24484 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24485 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24486 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24488 #define IWMMXT_BUILTIN2(code, builtin) \
24489 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24491 #define IWMMXT2_BUILTIN2(code, builtin) \
24492 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24494 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24495 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24496 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24497 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24498 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24499 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24500 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24501 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24502 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24503 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24506 #define FP_BUILTIN(L, U) \
24507 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24508 UNKNOWN, 0},
24510 FP_BUILTIN (get_fpscr, GET_FPSCR)
24511 FP_BUILTIN (set_fpscr, SET_FPSCR)
24512 #undef FP_BUILTIN
24514 #define CRC32_BUILTIN(L, U) \
24515 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24516 UNKNOWN, 0},
24517 CRC32_BUILTIN (crc32b, CRC32B)
24518 CRC32_BUILTIN (crc32h, CRC32H)
24519 CRC32_BUILTIN (crc32w, CRC32W)
24520 CRC32_BUILTIN (crc32cb, CRC32CB)
24521 CRC32_BUILTIN (crc32ch, CRC32CH)
24522 CRC32_BUILTIN (crc32cw, CRC32CW)
24523 #undef CRC32_BUILTIN
24526 #define CRYPTO_BUILTIN(L, U) \
24527 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24528 UNKNOWN, 0},
24529 #undef CRYPTO1
24530 #undef CRYPTO2
24531 #undef CRYPTO3
24532 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24533 #define CRYPTO1(L, U, R, A)
24534 #define CRYPTO3(L, U, R, A1, A2, A3)
24535 #include "crypto.def"
24536 #undef CRYPTO1
24537 #undef CRYPTO2
24538 #undef CRYPTO3
24542 static const struct builtin_description bdesc_1arg[] =
24544 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24545 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24546 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24547 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24548 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24549 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24550 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24551 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24552 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24553 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24554 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24555 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24556 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24557 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24558 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24559 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24560 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24561 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24562 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24563 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24564 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24565 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24566 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24567 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24569 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24570 #define CRYPTO2(L, U, R, A1, A2)
24571 #define CRYPTO3(L, U, R, A1, A2, A3)
24572 #include "crypto.def"
24573 #undef CRYPTO1
24574 #undef CRYPTO2
24575 #undef CRYPTO3
24578 static const struct builtin_description bdesc_3arg[] =
24580 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24581 #define CRYPTO1(L, U, R, A)
24582 #define CRYPTO2(L, U, R, A1, A2)
24583 #include "crypto.def"
24584 #undef CRYPTO1
24585 #undef CRYPTO2
24586 #undef CRYPTO3
24588 #undef CRYPTO_BUILTIN
24590 /* Set up all the iWMMXt builtins. This is not called if
24591 TARGET_IWMMXT is zero. */
24593 static void
24594 arm_init_iwmmxt_builtins (void)
24596 const struct builtin_description * d;
24597 size_t i;
24599 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24600 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24601 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24603 tree v8qi_ftype_v8qi_v8qi_int
24604 = build_function_type_list (V8QI_type_node,
24605 V8QI_type_node, V8QI_type_node,
24606 integer_type_node, NULL_TREE);
24607 tree v4hi_ftype_v4hi_int
24608 = build_function_type_list (V4HI_type_node,
24609 V4HI_type_node, integer_type_node, NULL_TREE);
24610 tree v2si_ftype_v2si_int
24611 = build_function_type_list (V2SI_type_node,
24612 V2SI_type_node, integer_type_node, NULL_TREE);
24613 tree v2si_ftype_di_di
24614 = build_function_type_list (V2SI_type_node,
24615 long_long_integer_type_node,
24616 long_long_integer_type_node,
24617 NULL_TREE);
24618 tree di_ftype_di_int
24619 = build_function_type_list (long_long_integer_type_node,
24620 long_long_integer_type_node,
24621 integer_type_node, NULL_TREE);
24622 tree di_ftype_di_int_int
24623 = build_function_type_list (long_long_integer_type_node,
24624 long_long_integer_type_node,
24625 integer_type_node,
24626 integer_type_node, NULL_TREE);
24627 tree int_ftype_v8qi
24628 = build_function_type_list (integer_type_node,
24629 V8QI_type_node, NULL_TREE);
24630 tree int_ftype_v4hi
24631 = build_function_type_list (integer_type_node,
24632 V4HI_type_node, NULL_TREE);
24633 tree int_ftype_v2si
24634 = build_function_type_list (integer_type_node,
24635 V2SI_type_node, NULL_TREE);
24636 tree int_ftype_v8qi_int
24637 = build_function_type_list (integer_type_node,
24638 V8QI_type_node, integer_type_node, NULL_TREE);
24639 tree int_ftype_v4hi_int
24640 = build_function_type_list (integer_type_node,
24641 V4HI_type_node, integer_type_node, NULL_TREE);
24642 tree int_ftype_v2si_int
24643 = build_function_type_list (integer_type_node,
24644 V2SI_type_node, integer_type_node, NULL_TREE);
24645 tree v8qi_ftype_v8qi_int_int
24646 = build_function_type_list (V8QI_type_node,
24647 V8QI_type_node, integer_type_node,
24648 integer_type_node, NULL_TREE);
24649 tree v4hi_ftype_v4hi_int_int
24650 = build_function_type_list (V4HI_type_node,
24651 V4HI_type_node, integer_type_node,
24652 integer_type_node, NULL_TREE);
24653 tree v2si_ftype_v2si_int_int
24654 = build_function_type_list (V2SI_type_node,
24655 V2SI_type_node, integer_type_node,
24656 integer_type_node, NULL_TREE);
24657 /* Miscellaneous. */
24658 tree v8qi_ftype_v4hi_v4hi
24659 = build_function_type_list (V8QI_type_node,
24660 V4HI_type_node, V4HI_type_node, NULL_TREE);
24661 tree v4hi_ftype_v2si_v2si
24662 = build_function_type_list (V4HI_type_node,
24663 V2SI_type_node, V2SI_type_node, NULL_TREE);
24664 tree v8qi_ftype_v4hi_v8qi
24665 = build_function_type_list (V8QI_type_node,
24666 V4HI_type_node, V8QI_type_node, NULL_TREE);
24667 tree v2si_ftype_v4hi_v4hi
24668 = build_function_type_list (V2SI_type_node,
24669 V4HI_type_node, V4HI_type_node, NULL_TREE);
24670 tree v2si_ftype_v8qi_v8qi
24671 = build_function_type_list (V2SI_type_node,
24672 V8QI_type_node, V8QI_type_node, NULL_TREE);
24673 tree v4hi_ftype_v4hi_di
24674 = build_function_type_list (V4HI_type_node,
24675 V4HI_type_node, long_long_integer_type_node,
24676 NULL_TREE);
24677 tree v2si_ftype_v2si_di
24678 = build_function_type_list (V2SI_type_node,
24679 V2SI_type_node, long_long_integer_type_node,
24680 NULL_TREE);
24681 tree di_ftype_void
24682 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24683 tree int_ftype_void
24684 = build_function_type_list (integer_type_node, NULL_TREE);
24685 tree di_ftype_v8qi
24686 = build_function_type_list (long_long_integer_type_node,
24687 V8QI_type_node, NULL_TREE);
24688 tree di_ftype_v4hi
24689 = build_function_type_list (long_long_integer_type_node,
24690 V4HI_type_node, NULL_TREE);
24691 tree di_ftype_v2si
24692 = build_function_type_list (long_long_integer_type_node,
24693 V2SI_type_node, NULL_TREE);
24694 tree v2si_ftype_v4hi
24695 = build_function_type_list (V2SI_type_node,
24696 V4HI_type_node, NULL_TREE);
24697 tree v4hi_ftype_v8qi
24698 = build_function_type_list (V4HI_type_node,
24699 V8QI_type_node, NULL_TREE);
24700 tree v8qi_ftype_v8qi
24701 = build_function_type_list (V8QI_type_node,
24702 V8QI_type_node, NULL_TREE);
24703 tree v4hi_ftype_v4hi
24704 = build_function_type_list (V4HI_type_node,
24705 V4HI_type_node, NULL_TREE);
24706 tree v2si_ftype_v2si
24707 = build_function_type_list (V2SI_type_node,
24708 V2SI_type_node, NULL_TREE);
24710 tree di_ftype_di_v4hi_v4hi
24711 = build_function_type_list (long_long_unsigned_type_node,
24712 long_long_unsigned_type_node,
24713 V4HI_type_node, V4HI_type_node,
24714 NULL_TREE);
24716 tree di_ftype_v4hi_v4hi
24717 = build_function_type_list (long_long_unsigned_type_node,
24718 V4HI_type_node,V4HI_type_node,
24719 NULL_TREE);
24721 tree v2si_ftype_v2si_v4hi_v4hi
24722 = build_function_type_list (V2SI_type_node,
24723 V2SI_type_node, V4HI_type_node,
24724 V4HI_type_node, NULL_TREE);
24726 tree v2si_ftype_v2si_v8qi_v8qi
24727 = build_function_type_list (V2SI_type_node,
24728 V2SI_type_node, V8QI_type_node,
24729 V8QI_type_node, NULL_TREE);
24731 tree di_ftype_di_v2si_v2si
24732 = build_function_type_list (long_long_unsigned_type_node,
24733 long_long_unsigned_type_node,
24734 V2SI_type_node, V2SI_type_node,
24735 NULL_TREE);
24737 tree di_ftype_di_di_int
24738 = build_function_type_list (long_long_unsigned_type_node,
24739 long_long_unsigned_type_node,
24740 long_long_unsigned_type_node,
24741 integer_type_node, NULL_TREE);
24743 tree void_ftype_int
24744 = build_function_type_list (void_type_node,
24745 integer_type_node, NULL_TREE);
24747 tree v8qi_ftype_char
24748 = build_function_type_list (V8QI_type_node,
24749 signed_char_type_node, NULL_TREE);
24751 tree v4hi_ftype_short
24752 = build_function_type_list (V4HI_type_node,
24753 short_integer_type_node, NULL_TREE);
24755 tree v2si_ftype_int
24756 = build_function_type_list (V2SI_type_node,
24757 integer_type_node, NULL_TREE);
24759 /* Normal vector binops. */
24760 tree v8qi_ftype_v8qi_v8qi
24761 = build_function_type_list (V8QI_type_node,
24762 V8QI_type_node, V8QI_type_node, NULL_TREE);
24763 tree v4hi_ftype_v4hi_v4hi
24764 = build_function_type_list (V4HI_type_node,
24765 V4HI_type_node,V4HI_type_node, NULL_TREE);
24766 tree v2si_ftype_v2si_v2si
24767 = build_function_type_list (V2SI_type_node,
24768 V2SI_type_node, V2SI_type_node, NULL_TREE);
24769 tree di_ftype_di_di
24770 = build_function_type_list (long_long_unsigned_type_node,
24771 long_long_unsigned_type_node,
24772 long_long_unsigned_type_node,
24773 NULL_TREE);
24775 /* Add all builtins that are more or less simple operations on two
24776 operands. */
24777 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24779 /* Use one of the operands; the target can have a different mode for
24780 mask-generating compares. */
24781 machine_mode mode;
24782 tree type;
24784 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24785 continue;
24787 mode = insn_data[d->icode].operand[1].mode;
24789 switch (mode)
24791 case V8QImode:
24792 type = v8qi_ftype_v8qi_v8qi;
24793 break;
24794 case V4HImode:
24795 type = v4hi_ftype_v4hi_v4hi;
24796 break;
24797 case V2SImode:
24798 type = v2si_ftype_v2si_v2si;
24799 break;
24800 case DImode:
24801 type = di_ftype_di_di;
24802 break;
24804 default:
24805 gcc_unreachable ();
24808 def_mbuiltin (d->mask, d->name, type, d->code);
24811 /* Add the remaining MMX insns with somewhat more complicated types. */
24812 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24814 ARM_BUILTIN_ ## CODE)
24816 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24817 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24818 ARM_BUILTIN_ ## CODE)
24820 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24821 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24822 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24823 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24824 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24825 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24826 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24827 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24828 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24830 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24831 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24832 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24833 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24834 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24835 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24837 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24838 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24839 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24840 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24841 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24842 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24844 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24845 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24846 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24847 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24848 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24849 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24851 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24852 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24853 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24854 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24855 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24856 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24858 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24860 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24861 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24862 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24863 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24864 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24865 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24866 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24867 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24868 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24869 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24871 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24872 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24873 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24874 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24875 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24876 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24877 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24878 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24879 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24881 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24882 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24883 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24885 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24886 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24887 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24889 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24890 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24892 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24893 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24894 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24895 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24896 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24897 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24899 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24900 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24901 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24902 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24903 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24904 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24905 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24906 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24907 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24908 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24909 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24910 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24912 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24913 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24914 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24915 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24917 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24918 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24919 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24920 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24921 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24922 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24923 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24925 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24926 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24927 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24929 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24930 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24931 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24932 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24934 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24935 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24936 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24937 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24939 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24940 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24941 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24942 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24944 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24945 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24946 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24947 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24949 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24950 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24951 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24952 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24954 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24955 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24956 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24957 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24959 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24961 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24962 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24963 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24965 #undef iwmmx_mbuiltin
24966 #undef iwmmx2_mbuiltin
24969 static void
24970 arm_init_fp16_builtins (void)
24972 tree fp16_type = make_node (REAL_TYPE);
24973 TYPE_PRECISION (fp16_type) = 16;
24974 layout_type (fp16_type);
24975 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24978 static void
24979 arm_init_crc32_builtins ()
24981 tree si_ftype_si_qi
24982 = build_function_type_list (unsigned_intSI_type_node,
24983 unsigned_intSI_type_node,
24984 unsigned_intQI_type_node, NULL_TREE);
24985 tree si_ftype_si_hi
24986 = build_function_type_list (unsigned_intSI_type_node,
24987 unsigned_intSI_type_node,
24988 unsigned_intHI_type_node, NULL_TREE);
24989 tree si_ftype_si_si
24990 = build_function_type_list (unsigned_intSI_type_node,
24991 unsigned_intSI_type_node,
24992 unsigned_intSI_type_node, NULL_TREE);
24994 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24995 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24996 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24997 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24998 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24999 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
25000 arm_builtin_decls[ARM_BUILTIN_CRC32W]
25001 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
25002 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
25003 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
25004 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
25005 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
25006 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
25007 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
25008 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
25009 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
25010 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
25011 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
25014 static void
25015 arm_init_builtins (void)
25017 if (TARGET_REALLY_IWMMXT)
25018 arm_init_iwmmxt_builtins ();
25020 if (TARGET_NEON)
25021 arm_init_neon_builtins ();
25023 if (arm_fp16_format)
25024 arm_init_fp16_builtins ();
25026 if (TARGET_CRC32)
25027 arm_init_crc32_builtins ();
25029 if (TARGET_VFP && TARGET_HARD_FLOAT)
25031 tree ftype_set_fpscr
25032 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
25033 tree ftype_get_fpscr
25034 = build_function_type_list (unsigned_type_node, NULL);
25036 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
25037 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
25038 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25039 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
25040 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
25041 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25045 /* Return the ARM builtin for CODE. */
25047 static tree
25048 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25050 if (code >= ARM_BUILTIN_MAX)
25051 return error_mark_node;
25053 return arm_builtin_decls[code];
25056 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25058 static const char *
25059 arm_invalid_parameter_type (const_tree t)
25061 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25062 return N_("function parameters cannot have __fp16 type");
25063 return NULL;
25066 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25068 static const char *
25069 arm_invalid_return_type (const_tree t)
25071 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25072 return N_("functions cannot return __fp16 type");
25073 return NULL;
25076 /* Implement TARGET_PROMOTED_TYPE. */
25078 static tree
25079 arm_promoted_type (const_tree t)
25081 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25082 return float_type_node;
25083 return NULL_TREE;
25086 /* Implement TARGET_CONVERT_TO_TYPE.
25087 Specifically, this hook implements the peculiarity of the ARM
25088 half-precision floating-point C semantics that requires conversions between
25089 __fp16 to or from double to do an intermediate conversion to float. */
25091 static tree
25092 arm_convert_to_type (tree type, tree expr)
25094 tree fromtype = TREE_TYPE (expr);
25095 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25096 return NULL_TREE;
25097 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25098 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25099 return convert (type, convert (float_type_node, expr));
25100 return NULL_TREE;
25103 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25104 This simply adds HFmode as a supported mode; even though we don't
25105 implement arithmetic on this type directly, it's supported by
25106 optabs conversions, much the way the double-word arithmetic is
25107 special-cased in the default hook. */
25109 static bool
25110 arm_scalar_mode_supported_p (machine_mode mode)
25112 if (mode == HFmode)
25113 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25114 else if (ALL_FIXED_POINT_MODE_P (mode))
25115 return true;
25116 else
25117 return default_scalar_mode_supported_p (mode);
25120 /* Errors in the source file can cause expand_expr to return const0_rtx
25121 where we expect a vector. To avoid crashing, use one of the vector
25122 clear instructions. */
25124 static rtx
25125 safe_vector_operand (rtx x, machine_mode mode)
25127 if (x != const0_rtx)
25128 return x;
25129 x = gen_reg_rtx (mode);
25131 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25132 : gen_rtx_SUBREG (DImode, x, 0)));
25133 return x;
25136 /* Function to expand ternary builtins. */
25137 static rtx
25138 arm_expand_ternop_builtin (enum insn_code icode,
25139 tree exp, rtx target)
25141 rtx pat;
25142 tree arg0 = CALL_EXPR_ARG (exp, 0);
25143 tree arg1 = CALL_EXPR_ARG (exp, 1);
25144 tree arg2 = CALL_EXPR_ARG (exp, 2);
25146 rtx op0 = expand_normal (arg0);
25147 rtx op1 = expand_normal (arg1);
25148 rtx op2 = expand_normal (arg2);
25149 rtx op3 = NULL_RTX;
25151 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25152 lane operand depending on endianness. */
25153 bool builtin_sha1cpm_p = false;
25155 if (insn_data[icode].n_operands == 5)
25157 gcc_assert (icode == CODE_FOR_crypto_sha1c
25158 || icode == CODE_FOR_crypto_sha1p
25159 || icode == CODE_FOR_crypto_sha1m);
25160 builtin_sha1cpm_p = true;
25162 machine_mode tmode = insn_data[icode].operand[0].mode;
25163 machine_mode mode0 = insn_data[icode].operand[1].mode;
25164 machine_mode mode1 = insn_data[icode].operand[2].mode;
25165 machine_mode mode2 = insn_data[icode].operand[3].mode;
25168 if (VECTOR_MODE_P (mode0))
25169 op0 = safe_vector_operand (op0, mode0);
25170 if (VECTOR_MODE_P (mode1))
25171 op1 = safe_vector_operand (op1, mode1);
25172 if (VECTOR_MODE_P (mode2))
25173 op2 = safe_vector_operand (op2, mode2);
25175 if (! target
25176 || GET_MODE (target) != tmode
25177 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25178 target = gen_reg_rtx (tmode);
25180 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25181 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25182 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25184 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25185 op0 = copy_to_mode_reg (mode0, op0);
25186 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25187 op1 = copy_to_mode_reg (mode1, op1);
25188 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25189 op2 = copy_to_mode_reg (mode2, op2);
25190 if (builtin_sha1cpm_p)
25191 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25193 if (builtin_sha1cpm_p)
25194 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25195 else
25196 pat = GEN_FCN (icode) (target, op0, op1, op2);
25197 if (! pat)
25198 return 0;
25199 emit_insn (pat);
25200 return target;
25203 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25205 static rtx
25206 arm_expand_binop_builtin (enum insn_code icode,
25207 tree exp, rtx target)
25209 rtx pat;
25210 tree arg0 = CALL_EXPR_ARG (exp, 0);
25211 tree arg1 = CALL_EXPR_ARG (exp, 1);
25212 rtx op0 = expand_normal (arg0);
25213 rtx op1 = expand_normal (arg1);
25214 machine_mode tmode = insn_data[icode].operand[0].mode;
25215 machine_mode mode0 = insn_data[icode].operand[1].mode;
25216 machine_mode mode1 = insn_data[icode].operand[2].mode;
25218 if (VECTOR_MODE_P (mode0))
25219 op0 = safe_vector_operand (op0, mode0);
25220 if (VECTOR_MODE_P (mode1))
25221 op1 = safe_vector_operand (op1, mode1);
25223 if (! target
25224 || GET_MODE (target) != tmode
25225 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25226 target = gen_reg_rtx (tmode);
25228 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25229 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25231 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25232 op0 = copy_to_mode_reg (mode0, op0);
25233 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25234 op1 = copy_to_mode_reg (mode1, op1);
25236 pat = GEN_FCN (icode) (target, op0, op1);
25237 if (! pat)
25238 return 0;
25239 emit_insn (pat);
25240 return target;
25243 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25245 static rtx
25246 arm_expand_unop_builtin (enum insn_code icode,
25247 tree exp, rtx target, int do_load)
25249 rtx pat;
25250 tree arg0 = CALL_EXPR_ARG (exp, 0);
25251 rtx op0 = expand_normal (arg0);
25252 rtx op1 = NULL_RTX;
25253 machine_mode tmode = insn_data[icode].operand[0].mode;
25254 machine_mode mode0 = insn_data[icode].operand[1].mode;
25255 bool builtin_sha1h_p = false;
25257 if (insn_data[icode].n_operands == 3)
25259 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25260 builtin_sha1h_p = true;
25263 if (! target
25264 || GET_MODE (target) != tmode
25265 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25266 target = gen_reg_rtx (tmode);
25267 if (do_load)
25268 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25269 else
25271 if (VECTOR_MODE_P (mode0))
25272 op0 = safe_vector_operand (op0, mode0);
25274 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25275 op0 = copy_to_mode_reg (mode0, op0);
25277 if (builtin_sha1h_p)
25278 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25280 if (builtin_sha1h_p)
25281 pat = GEN_FCN (icode) (target, op0, op1);
25282 else
25283 pat = GEN_FCN (icode) (target, op0);
25284 if (! pat)
25285 return 0;
25286 emit_insn (pat);
25287 return target;
25290 typedef enum {
25291 NEON_ARG_COPY_TO_REG,
25292 NEON_ARG_CONSTANT,
25293 NEON_ARG_MEMORY,
25294 NEON_ARG_STOP
25295 } builtin_arg;
25297 #define NEON_MAX_BUILTIN_ARGS 5
25299 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25300 and return an expression for the accessed memory.
25302 The intrinsic function operates on a block of registers that has
25303 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25304 function references the memory at EXP of type TYPE and in mode
25305 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25306 available. */
25308 static tree
25309 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25310 machine_mode reg_mode,
25311 neon_builtin_type_mode type_mode)
25313 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25314 tree elem_type, upper_bound, array_type;
25316 /* Work out the size of the register block in bytes. */
25317 reg_size = GET_MODE_SIZE (reg_mode);
25319 /* Work out the size of each vector in bytes. */
25320 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25321 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25323 /* Work out how many vectors there are. */
25324 gcc_assert (reg_size % vector_size == 0);
25325 nvectors = reg_size / vector_size;
25327 /* Work out the type of each element. */
25328 gcc_assert (POINTER_TYPE_P (type));
25329 elem_type = TREE_TYPE (type);
25331 /* Work out how many elements are being loaded or stored.
25332 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25333 and memory elements; anything else implies a lane load or store. */
25334 if (mem_mode == reg_mode)
25335 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25336 else
25337 nelems = nvectors;
25339 /* Create a type that describes the full access. */
25340 upper_bound = build_int_cst (size_type_node, nelems - 1);
25341 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25343 /* Dereference EXP using that type. */
25344 return fold_build2 (MEM_REF, array_type, exp,
25345 build_int_cst (build_pointer_type (array_type), 0));
25348 /* Expand a Neon builtin. */
25349 static rtx
25350 arm_expand_neon_args (rtx target, int icode, int have_retval,
25351 neon_builtin_type_mode type_mode,
25352 tree exp, int fcode, ...)
25354 va_list ap;
25355 rtx pat;
25356 tree arg[NEON_MAX_BUILTIN_ARGS];
25357 rtx op[NEON_MAX_BUILTIN_ARGS];
25358 tree arg_type;
25359 tree formals;
25360 machine_mode tmode = insn_data[icode].operand[0].mode;
25361 machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25362 machine_mode other_mode;
25363 int argc = 0;
25364 int opno;
25366 if (have_retval
25367 && (!target
25368 || GET_MODE (target) != tmode
25369 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25370 target = gen_reg_rtx (tmode);
25372 va_start (ap, fcode);
25374 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25376 for (;;)
25378 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25380 if (thisarg == NEON_ARG_STOP)
25381 break;
25382 else
25384 opno = argc + have_retval;
25385 mode[argc] = insn_data[icode].operand[opno].mode;
25386 arg[argc] = CALL_EXPR_ARG (exp, argc);
25387 arg_type = TREE_VALUE (formals);
25388 if (thisarg == NEON_ARG_MEMORY)
25390 other_mode = insn_data[icode].operand[1 - opno].mode;
25391 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25392 mode[argc], other_mode,
25393 type_mode);
25396 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25397 be returned. */
25398 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25399 (thisarg == NEON_ARG_MEMORY
25400 ? EXPAND_MEMORY : EXPAND_NORMAL));
25402 switch (thisarg)
25404 case NEON_ARG_COPY_TO_REG:
25405 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25406 if (!(*insn_data[icode].operand[opno].predicate)
25407 (op[argc], mode[argc]))
25408 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25409 break;
25411 case NEON_ARG_CONSTANT:
25412 /* FIXME: This error message is somewhat unhelpful. */
25413 if (!(*insn_data[icode].operand[opno].predicate)
25414 (op[argc], mode[argc]))
25415 error ("argument must be a constant");
25416 break;
25418 case NEON_ARG_MEMORY:
25419 /* Check if expand failed. */
25420 if (op[argc] == const0_rtx)
25421 return 0;
25422 gcc_assert (MEM_P (op[argc]));
25423 PUT_MODE (op[argc], mode[argc]);
25424 /* ??? arm_neon.h uses the same built-in functions for signed
25425 and unsigned accesses, casting where necessary. This isn't
25426 alias safe. */
25427 set_mem_alias_set (op[argc], 0);
25428 if (!(*insn_data[icode].operand[opno].predicate)
25429 (op[argc], mode[argc]))
25430 op[argc] = (replace_equiv_address
25431 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25432 break;
25434 case NEON_ARG_STOP:
25435 gcc_unreachable ();
25438 argc++;
25439 formals = TREE_CHAIN (formals);
25443 va_end (ap);
25445 if (have_retval)
25446 switch (argc)
25448 case 1:
25449 pat = GEN_FCN (icode) (target, op[0]);
25450 break;
25452 case 2:
25453 pat = GEN_FCN (icode) (target, op[0], op[1]);
25454 break;
25456 case 3:
25457 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25458 break;
25460 case 4:
25461 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25462 break;
25464 case 5:
25465 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25466 break;
25468 default:
25469 gcc_unreachable ();
25471 else
25472 switch (argc)
25474 case 1:
25475 pat = GEN_FCN (icode) (op[0]);
25476 break;
25478 case 2:
25479 pat = GEN_FCN (icode) (op[0], op[1]);
25480 break;
25482 case 3:
25483 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25484 break;
25486 case 4:
25487 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25488 break;
25490 case 5:
25491 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25492 break;
25494 default:
25495 gcc_unreachable ();
25498 if (!pat)
25499 return 0;
25501 emit_insn (pat);
25503 return target;
25506 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25507 constants defined per-instruction or per instruction-variant. Instead, the
25508 required info is looked up in the table neon_builtin_data. */
25509 static rtx
25510 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25512 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25513 neon_itype itype = d->itype;
25514 enum insn_code icode = d->code;
25515 neon_builtin_type_mode type_mode = d->mode;
25517 switch (itype)
25519 case NEON_UNOP:
25520 case NEON_CONVERT:
25521 case NEON_DUPLANE:
25522 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25523 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25525 case NEON_BINOP:
25526 case NEON_SETLANE:
25527 case NEON_SCALARMUL:
25528 case NEON_SCALARMULL:
25529 case NEON_SCALARMULH:
25530 case NEON_SHIFTINSERT:
25531 case NEON_LOGICBINOP:
25532 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25533 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25534 NEON_ARG_STOP);
25536 case NEON_TERNOP:
25537 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25538 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25539 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25541 case NEON_GETLANE:
25542 case NEON_FIXCONV:
25543 case NEON_SHIFTIMM:
25544 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25545 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25546 NEON_ARG_STOP);
25548 case NEON_CREATE:
25549 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25550 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25552 case NEON_DUP:
25553 case NEON_RINT:
25554 case NEON_SPLIT:
25555 case NEON_FLOAT_WIDEN:
25556 case NEON_FLOAT_NARROW:
25557 case NEON_BSWAP:
25558 case NEON_REINTERP:
25559 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25560 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25562 case NEON_COPYSIGNF:
25563 case NEON_COMBINE:
25564 case NEON_VTBL:
25565 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25566 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25568 case NEON_LANEMUL:
25569 case NEON_LANEMULL:
25570 case NEON_LANEMULH:
25571 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25572 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25573 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25575 case NEON_LANEMAC:
25576 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25577 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25578 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25580 case NEON_SHIFTACC:
25581 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25582 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25583 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25585 case NEON_SCALARMAC:
25586 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25587 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25588 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25590 case NEON_SELECT:
25591 case NEON_VTBX:
25592 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25593 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25594 NEON_ARG_STOP);
25596 case NEON_LOAD1:
25597 case NEON_LOADSTRUCT:
25598 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25599 NEON_ARG_MEMORY, NEON_ARG_STOP);
25601 case NEON_LOAD1LANE:
25602 case NEON_LOADSTRUCTLANE:
25603 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25604 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25605 NEON_ARG_STOP);
25607 case NEON_STORE1:
25608 case NEON_STORESTRUCT:
25609 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25610 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25612 case NEON_STORE1LANE:
25613 case NEON_STORESTRUCTLANE:
25614 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25615 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25616 NEON_ARG_STOP);
25619 gcc_unreachable ();
25622 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25623 void
25624 neon_reinterpret (rtx dest, rtx src)
25626 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25629 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25630 not to early-clobber SRC registers in the process.
25632 We assume that the operands described by SRC and DEST represent a
25633 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25634 number of components into which the copy has been decomposed. */
25635 void
25636 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25638 unsigned int i;
25640 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25641 || REGNO (operands[0]) < REGNO (operands[1]))
25643 for (i = 0; i < count; i++)
25645 operands[2 * i] = dest[i];
25646 operands[2 * i + 1] = src[i];
25649 else
25651 for (i = 0; i < count; i++)
25653 operands[2 * i] = dest[count - i - 1];
25654 operands[2 * i + 1] = src[count - i - 1];
25659 /* Split operands into moves from op[1] + op[2] into op[0]. */
25661 void
25662 neon_split_vcombine (rtx operands[3])
25664 unsigned int dest = REGNO (operands[0]);
25665 unsigned int src1 = REGNO (operands[1]);
25666 unsigned int src2 = REGNO (operands[2]);
25667 machine_mode halfmode = GET_MODE (operands[1]);
25668 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25669 rtx destlo, desthi;
25671 if (src1 == dest && src2 == dest + halfregs)
25673 /* No-op move. Can't split to nothing; emit something. */
25674 emit_note (NOTE_INSN_DELETED);
25675 return;
25678 /* Preserve register attributes for variable tracking. */
25679 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25680 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25681 GET_MODE_SIZE (halfmode));
25683 /* Special case of reversed high/low parts. Use VSWP. */
25684 if (src2 == dest && src1 == dest + halfregs)
25686 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25687 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25688 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25689 return;
25692 if (!reg_overlap_mentioned_p (operands[2], destlo))
25694 /* Try to avoid unnecessary moves if part of the result
25695 is in the right place already. */
25696 if (src1 != dest)
25697 emit_move_insn (destlo, operands[1]);
25698 if (src2 != dest + halfregs)
25699 emit_move_insn (desthi, operands[2]);
25701 else
25703 if (src2 != dest + halfregs)
25704 emit_move_insn (desthi, operands[2]);
25705 if (src1 != dest)
25706 emit_move_insn (destlo, operands[1]);
25710 /* Expand an expression EXP that calls a built-in function,
25711 with result going to TARGET if that's convenient
25712 (and in mode MODE if that's convenient).
25713 SUBTARGET may be used as the target for computing one of EXP's operands.
25714 IGNORE is nonzero if the value is to be ignored. */
25716 static rtx
25717 arm_expand_builtin (tree exp,
25718 rtx target,
25719 rtx subtarget ATTRIBUTE_UNUSED,
25720 machine_mode mode ATTRIBUTE_UNUSED,
25721 int ignore ATTRIBUTE_UNUSED)
25723 const struct builtin_description * d;
25724 enum insn_code icode;
25725 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25726 tree arg0;
25727 tree arg1;
25728 tree arg2;
25729 rtx op0;
25730 rtx op1;
25731 rtx op2;
25732 rtx pat;
25733 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25734 size_t i;
25735 machine_mode tmode;
25736 machine_mode mode0;
25737 machine_mode mode1;
25738 machine_mode mode2;
25739 int opint;
25740 int selector;
25741 int mask;
25742 int imm;
25744 if (fcode >= ARM_BUILTIN_NEON_BASE)
25745 return arm_expand_neon_builtin (fcode, exp, target);
25747 switch (fcode)
25749 case ARM_BUILTIN_GET_FPSCR:
25750 case ARM_BUILTIN_SET_FPSCR:
25751 if (fcode == ARM_BUILTIN_GET_FPSCR)
25753 icode = CODE_FOR_get_fpscr;
25754 target = gen_reg_rtx (SImode);
25755 pat = GEN_FCN (icode) (target);
25757 else
25759 target = NULL_RTX;
25760 icode = CODE_FOR_set_fpscr;
25761 arg0 = CALL_EXPR_ARG (exp, 0);
25762 op0 = expand_normal (arg0);
25763 pat = GEN_FCN (icode) (op0);
25765 emit_insn (pat);
25766 return target;
25768 case ARM_BUILTIN_TEXTRMSB:
25769 case ARM_BUILTIN_TEXTRMUB:
25770 case ARM_BUILTIN_TEXTRMSH:
25771 case ARM_BUILTIN_TEXTRMUH:
25772 case ARM_BUILTIN_TEXTRMSW:
25773 case ARM_BUILTIN_TEXTRMUW:
25774 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25775 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25776 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25777 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25778 : CODE_FOR_iwmmxt_textrmw);
25780 arg0 = CALL_EXPR_ARG (exp, 0);
25781 arg1 = CALL_EXPR_ARG (exp, 1);
25782 op0 = expand_normal (arg0);
25783 op1 = expand_normal (arg1);
25784 tmode = insn_data[icode].operand[0].mode;
25785 mode0 = insn_data[icode].operand[1].mode;
25786 mode1 = insn_data[icode].operand[2].mode;
25788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25789 op0 = copy_to_mode_reg (mode0, op0);
25790 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25792 /* @@@ better error message */
25793 error ("selector must be an immediate");
25794 return gen_reg_rtx (tmode);
25797 opint = INTVAL (op1);
25798 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25800 if (opint > 7 || opint < 0)
25801 error ("the range of selector should be in 0 to 7");
25803 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25805 if (opint > 3 || opint < 0)
25806 error ("the range of selector should be in 0 to 3");
25808 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25810 if (opint > 1 || opint < 0)
25811 error ("the range of selector should be in 0 to 1");
25814 if (target == 0
25815 || GET_MODE (target) != tmode
25816 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25817 target = gen_reg_rtx (tmode);
25818 pat = GEN_FCN (icode) (target, op0, op1);
25819 if (! pat)
25820 return 0;
25821 emit_insn (pat);
25822 return target;
25824 case ARM_BUILTIN_WALIGNI:
25825 /* If op2 is immediate, call walighi, else call walighr. */
25826 arg0 = CALL_EXPR_ARG (exp, 0);
25827 arg1 = CALL_EXPR_ARG (exp, 1);
25828 arg2 = CALL_EXPR_ARG (exp, 2);
25829 op0 = expand_normal (arg0);
25830 op1 = expand_normal (arg1);
25831 op2 = expand_normal (arg2);
25832 if (CONST_INT_P (op2))
25834 icode = CODE_FOR_iwmmxt_waligni;
25835 tmode = insn_data[icode].operand[0].mode;
25836 mode0 = insn_data[icode].operand[1].mode;
25837 mode1 = insn_data[icode].operand[2].mode;
25838 mode2 = insn_data[icode].operand[3].mode;
25839 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25840 op0 = copy_to_mode_reg (mode0, op0);
25841 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25842 op1 = copy_to_mode_reg (mode1, op1);
25843 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25844 selector = INTVAL (op2);
25845 if (selector > 7 || selector < 0)
25846 error ("the range of selector should be in 0 to 7");
25848 else
25850 icode = CODE_FOR_iwmmxt_walignr;
25851 tmode = insn_data[icode].operand[0].mode;
25852 mode0 = insn_data[icode].operand[1].mode;
25853 mode1 = insn_data[icode].operand[2].mode;
25854 mode2 = insn_data[icode].operand[3].mode;
25855 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25856 op0 = copy_to_mode_reg (mode0, op0);
25857 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25858 op1 = copy_to_mode_reg (mode1, op1);
25859 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25860 op2 = copy_to_mode_reg (mode2, op2);
25862 if (target == 0
25863 || GET_MODE (target) != tmode
25864 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25865 target = gen_reg_rtx (tmode);
25866 pat = GEN_FCN (icode) (target, op0, op1, op2);
25867 if (!pat)
25868 return 0;
25869 emit_insn (pat);
25870 return target;
25872 case ARM_BUILTIN_TINSRB:
25873 case ARM_BUILTIN_TINSRH:
25874 case ARM_BUILTIN_TINSRW:
25875 case ARM_BUILTIN_WMERGE:
25876 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25877 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25878 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25879 : CODE_FOR_iwmmxt_tinsrw);
25880 arg0 = CALL_EXPR_ARG (exp, 0);
25881 arg1 = CALL_EXPR_ARG (exp, 1);
25882 arg2 = CALL_EXPR_ARG (exp, 2);
25883 op0 = expand_normal (arg0);
25884 op1 = expand_normal (arg1);
25885 op2 = expand_normal (arg2);
25886 tmode = insn_data[icode].operand[0].mode;
25887 mode0 = insn_data[icode].operand[1].mode;
25888 mode1 = insn_data[icode].operand[2].mode;
25889 mode2 = insn_data[icode].operand[3].mode;
25891 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25892 op0 = copy_to_mode_reg (mode0, op0);
25893 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25894 op1 = copy_to_mode_reg (mode1, op1);
25895 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25897 error ("selector must be an immediate");
25898 return const0_rtx;
25900 if (icode == CODE_FOR_iwmmxt_wmerge)
25902 selector = INTVAL (op2);
25903 if (selector > 7 || selector < 0)
25904 error ("the range of selector should be in 0 to 7");
25906 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25907 || (icode == CODE_FOR_iwmmxt_tinsrh)
25908 || (icode == CODE_FOR_iwmmxt_tinsrw))
25910 mask = 0x01;
25911 selector= INTVAL (op2);
25912 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25913 error ("the range of selector should be in 0 to 7");
25914 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25915 error ("the range of selector should be in 0 to 3");
25916 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25917 error ("the range of selector should be in 0 to 1");
25918 mask <<= selector;
25919 op2 = GEN_INT (mask);
25921 if (target == 0
25922 || GET_MODE (target) != tmode
25923 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25924 target = gen_reg_rtx (tmode);
25925 pat = GEN_FCN (icode) (target, op0, op1, op2);
25926 if (! pat)
25927 return 0;
25928 emit_insn (pat);
25929 return target;
25931 case ARM_BUILTIN_SETWCGR0:
25932 case ARM_BUILTIN_SETWCGR1:
25933 case ARM_BUILTIN_SETWCGR2:
25934 case ARM_BUILTIN_SETWCGR3:
25935 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25936 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25937 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25938 : CODE_FOR_iwmmxt_setwcgr3);
25939 arg0 = CALL_EXPR_ARG (exp, 0);
25940 op0 = expand_normal (arg0);
25941 mode0 = insn_data[icode].operand[0].mode;
25942 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25943 op0 = copy_to_mode_reg (mode0, op0);
25944 pat = GEN_FCN (icode) (op0);
25945 if (!pat)
25946 return 0;
25947 emit_insn (pat);
25948 return 0;
25950 case ARM_BUILTIN_GETWCGR0:
25951 case ARM_BUILTIN_GETWCGR1:
25952 case ARM_BUILTIN_GETWCGR2:
25953 case ARM_BUILTIN_GETWCGR3:
25954 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25955 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25956 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25957 : CODE_FOR_iwmmxt_getwcgr3);
25958 tmode = insn_data[icode].operand[0].mode;
25959 if (target == 0
25960 || GET_MODE (target) != tmode
25961 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25962 target = gen_reg_rtx (tmode);
25963 pat = GEN_FCN (icode) (target);
25964 if (!pat)
25965 return 0;
25966 emit_insn (pat);
25967 return target;
25969 case ARM_BUILTIN_WSHUFH:
25970 icode = CODE_FOR_iwmmxt_wshufh;
25971 arg0 = CALL_EXPR_ARG (exp, 0);
25972 arg1 = CALL_EXPR_ARG (exp, 1);
25973 op0 = expand_normal (arg0);
25974 op1 = expand_normal (arg1);
25975 tmode = insn_data[icode].operand[0].mode;
25976 mode1 = insn_data[icode].operand[1].mode;
25977 mode2 = insn_data[icode].operand[2].mode;
25979 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25980 op0 = copy_to_mode_reg (mode1, op0);
25981 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25983 error ("mask must be an immediate");
25984 return const0_rtx;
25986 selector = INTVAL (op1);
25987 if (selector < 0 || selector > 255)
25988 error ("the range of mask should be in 0 to 255");
25989 if (target == 0
25990 || GET_MODE (target) != tmode
25991 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25992 target = gen_reg_rtx (tmode);
25993 pat = GEN_FCN (icode) (target, op0, op1);
25994 if (! pat)
25995 return 0;
25996 emit_insn (pat);
25997 return target;
25999 case ARM_BUILTIN_WMADDS:
26000 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
26001 case ARM_BUILTIN_WMADDSX:
26002 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
26003 case ARM_BUILTIN_WMADDSN:
26004 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
26005 case ARM_BUILTIN_WMADDU:
26006 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
26007 case ARM_BUILTIN_WMADDUX:
26008 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
26009 case ARM_BUILTIN_WMADDUN:
26010 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
26011 case ARM_BUILTIN_WSADBZ:
26012 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
26013 case ARM_BUILTIN_WSADHZ:
26014 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
26016 /* Several three-argument builtins. */
26017 case ARM_BUILTIN_WMACS:
26018 case ARM_BUILTIN_WMACU:
26019 case ARM_BUILTIN_TMIA:
26020 case ARM_BUILTIN_TMIAPH:
26021 case ARM_BUILTIN_TMIATT:
26022 case ARM_BUILTIN_TMIATB:
26023 case ARM_BUILTIN_TMIABT:
26024 case ARM_BUILTIN_TMIABB:
26025 case ARM_BUILTIN_WQMIABB:
26026 case ARM_BUILTIN_WQMIABT:
26027 case ARM_BUILTIN_WQMIATB:
26028 case ARM_BUILTIN_WQMIATT:
26029 case ARM_BUILTIN_WQMIABBN:
26030 case ARM_BUILTIN_WQMIABTN:
26031 case ARM_BUILTIN_WQMIATBN:
26032 case ARM_BUILTIN_WQMIATTN:
26033 case ARM_BUILTIN_WMIABB:
26034 case ARM_BUILTIN_WMIABT:
26035 case ARM_BUILTIN_WMIATB:
26036 case ARM_BUILTIN_WMIATT:
26037 case ARM_BUILTIN_WMIABBN:
26038 case ARM_BUILTIN_WMIABTN:
26039 case ARM_BUILTIN_WMIATBN:
26040 case ARM_BUILTIN_WMIATTN:
26041 case ARM_BUILTIN_WMIAWBB:
26042 case ARM_BUILTIN_WMIAWBT:
26043 case ARM_BUILTIN_WMIAWTB:
26044 case ARM_BUILTIN_WMIAWTT:
26045 case ARM_BUILTIN_WMIAWBBN:
26046 case ARM_BUILTIN_WMIAWBTN:
26047 case ARM_BUILTIN_WMIAWTBN:
26048 case ARM_BUILTIN_WMIAWTTN:
26049 case ARM_BUILTIN_WSADB:
26050 case ARM_BUILTIN_WSADH:
26051 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26052 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26053 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26054 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26055 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26056 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26057 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26058 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26059 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26060 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26061 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26062 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26063 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26064 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26065 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26066 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26067 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26068 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26069 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26070 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26071 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26072 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26073 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26074 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26075 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26076 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26077 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26078 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26079 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26080 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26081 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26082 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26083 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26084 : CODE_FOR_iwmmxt_wsadh);
26085 arg0 = CALL_EXPR_ARG (exp, 0);
26086 arg1 = CALL_EXPR_ARG (exp, 1);
26087 arg2 = CALL_EXPR_ARG (exp, 2);
26088 op0 = expand_normal (arg0);
26089 op1 = expand_normal (arg1);
26090 op2 = expand_normal (arg2);
26091 tmode = insn_data[icode].operand[0].mode;
26092 mode0 = insn_data[icode].operand[1].mode;
26093 mode1 = insn_data[icode].operand[2].mode;
26094 mode2 = insn_data[icode].operand[3].mode;
26096 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26097 op0 = copy_to_mode_reg (mode0, op0);
26098 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26099 op1 = copy_to_mode_reg (mode1, op1);
26100 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26101 op2 = copy_to_mode_reg (mode2, op2);
26102 if (target == 0
26103 || GET_MODE (target) != tmode
26104 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26105 target = gen_reg_rtx (tmode);
26106 pat = GEN_FCN (icode) (target, op0, op1, op2);
26107 if (! pat)
26108 return 0;
26109 emit_insn (pat);
26110 return target;
26112 case ARM_BUILTIN_WZERO:
26113 target = gen_reg_rtx (DImode);
26114 emit_insn (gen_iwmmxt_clrdi (target));
26115 return target;
26117 case ARM_BUILTIN_WSRLHI:
26118 case ARM_BUILTIN_WSRLWI:
26119 case ARM_BUILTIN_WSRLDI:
26120 case ARM_BUILTIN_WSLLHI:
26121 case ARM_BUILTIN_WSLLWI:
26122 case ARM_BUILTIN_WSLLDI:
26123 case ARM_BUILTIN_WSRAHI:
26124 case ARM_BUILTIN_WSRAWI:
26125 case ARM_BUILTIN_WSRADI:
26126 case ARM_BUILTIN_WRORHI:
26127 case ARM_BUILTIN_WRORWI:
26128 case ARM_BUILTIN_WRORDI:
26129 case ARM_BUILTIN_WSRLH:
26130 case ARM_BUILTIN_WSRLW:
26131 case ARM_BUILTIN_WSRLD:
26132 case ARM_BUILTIN_WSLLH:
26133 case ARM_BUILTIN_WSLLW:
26134 case ARM_BUILTIN_WSLLD:
26135 case ARM_BUILTIN_WSRAH:
26136 case ARM_BUILTIN_WSRAW:
26137 case ARM_BUILTIN_WSRAD:
26138 case ARM_BUILTIN_WRORH:
26139 case ARM_BUILTIN_WRORW:
26140 case ARM_BUILTIN_WRORD:
26141 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26142 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26143 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26144 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26145 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26146 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26147 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26148 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26149 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26150 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26151 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26152 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26153 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26154 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26155 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26156 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26157 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26158 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26159 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26160 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26161 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26162 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26163 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26164 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26165 : CODE_FOR_nothing);
26166 arg1 = CALL_EXPR_ARG (exp, 1);
26167 op1 = expand_normal (arg1);
26168 if (GET_MODE (op1) == VOIDmode)
26170 imm = INTVAL (op1);
26171 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26172 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26173 && (imm < 0 || imm > 32))
26175 if (fcode == ARM_BUILTIN_WRORHI)
26176 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26177 else if (fcode == ARM_BUILTIN_WRORWI)
26178 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26179 else if (fcode == ARM_BUILTIN_WRORH)
26180 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26181 else
26182 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26184 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26185 && (imm < 0 || imm > 64))
26187 if (fcode == ARM_BUILTIN_WRORDI)
26188 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26189 else
26190 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26192 else if (imm < 0)
26194 if (fcode == ARM_BUILTIN_WSRLHI)
26195 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26196 else if (fcode == ARM_BUILTIN_WSRLWI)
26197 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26198 else if (fcode == ARM_BUILTIN_WSRLDI)
26199 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26200 else if (fcode == ARM_BUILTIN_WSLLHI)
26201 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26202 else if (fcode == ARM_BUILTIN_WSLLWI)
26203 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26204 else if (fcode == ARM_BUILTIN_WSLLDI)
26205 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26206 else if (fcode == ARM_BUILTIN_WSRAHI)
26207 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26208 else if (fcode == ARM_BUILTIN_WSRAWI)
26209 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26210 else if (fcode == ARM_BUILTIN_WSRADI)
26211 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26212 else if (fcode == ARM_BUILTIN_WSRLH)
26213 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26214 else if (fcode == ARM_BUILTIN_WSRLW)
26215 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26216 else if (fcode == ARM_BUILTIN_WSRLD)
26217 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26218 else if (fcode == ARM_BUILTIN_WSLLH)
26219 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26220 else if (fcode == ARM_BUILTIN_WSLLW)
26221 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26222 else if (fcode == ARM_BUILTIN_WSLLD)
26223 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26224 else if (fcode == ARM_BUILTIN_WSRAH)
26225 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26226 else if (fcode == ARM_BUILTIN_WSRAW)
26227 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26228 else
26229 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26232 return arm_expand_binop_builtin (icode, exp, target);
26234 default:
26235 break;
26238 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26239 if (d->code == (const enum arm_builtins) fcode)
26240 return arm_expand_binop_builtin (d->icode, exp, target);
26242 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26243 if (d->code == (const enum arm_builtins) fcode)
26244 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26246 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26247 if (d->code == (const enum arm_builtins) fcode)
26248 return arm_expand_ternop_builtin (d->icode, exp, target);
26250 /* @@@ Should really do something sensible here. */
26251 return NULL_RTX;
26254 /* Return the number (counting from 0) of
26255 the least significant set bit in MASK. */
26257 inline static int
26258 number_of_first_bit_set (unsigned mask)
26260 return ctz_hwi (mask);
26263 /* Like emit_multi_reg_push, but allowing for a different set of
26264 registers to be described as saved. MASK is the set of registers
26265 to be saved; REAL_REGS is the set of registers to be described as
26266 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26268 static rtx_insn *
26269 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26271 unsigned long regno;
26272 rtx par[10], tmp, reg;
26273 rtx_insn *insn;
26274 int i, j;
26276 /* Build the parallel of the registers actually being stored. */
26277 for (i = 0; mask; ++i, mask &= mask - 1)
26279 regno = ctz_hwi (mask);
26280 reg = gen_rtx_REG (SImode, regno);
26282 if (i == 0)
26283 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26284 else
26285 tmp = gen_rtx_USE (VOIDmode, reg);
26287 par[i] = tmp;
26290 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26291 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26292 tmp = gen_frame_mem (BLKmode, tmp);
26293 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26294 par[0] = tmp;
26296 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26297 insn = emit_insn (tmp);
26299 /* Always build the stack adjustment note for unwind info. */
26300 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26301 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26302 par[0] = tmp;
26304 /* Build the parallel of the registers recorded as saved for unwind. */
26305 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26307 regno = ctz_hwi (real_regs);
26308 reg = gen_rtx_REG (SImode, regno);
26310 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26311 tmp = gen_frame_mem (SImode, tmp);
26312 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26313 RTX_FRAME_RELATED_P (tmp) = 1;
26314 par[j + 1] = tmp;
26317 if (j == 0)
26318 tmp = par[0];
26319 else
26321 RTX_FRAME_RELATED_P (par[0]) = 1;
26322 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26325 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26327 return insn;
26330 /* Emit code to push or pop registers to or from the stack. F is the
26331 assembly file. MASK is the registers to pop. */
26332 static void
26333 thumb_pop (FILE *f, unsigned long mask)
26335 int regno;
26336 int lo_mask = mask & 0xFF;
26337 int pushed_words = 0;
26339 gcc_assert (mask);
26341 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26343 /* Special case. Do not generate a POP PC statement here, do it in
26344 thumb_exit() */
26345 thumb_exit (f, -1);
26346 return;
26349 fprintf (f, "\tpop\t{");
26351 /* Look at the low registers first. */
26352 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26354 if (lo_mask & 1)
26356 asm_fprintf (f, "%r", regno);
26358 if ((lo_mask & ~1) != 0)
26359 fprintf (f, ", ");
26361 pushed_words++;
26365 if (mask & (1 << PC_REGNUM))
26367 /* Catch popping the PC. */
26368 if (TARGET_INTERWORK || TARGET_BACKTRACE
26369 || crtl->calls_eh_return)
26371 /* The PC is never poped directly, instead
26372 it is popped into r3 and then BX is used. */
26373 fprintf (f, "}\n");
26375 thumb_exit (f, -1);
26377 return;
26379 else
26381 if (mask & 0xFF)
26382 fprintf (f, ", ");
26384 asm_fprintf (f, "%r", PC_REGNUM);
26388 fprintf (f, "}\n");
26391 /* Generate code to return from a thumb function.
26392 If 'reg_containing_return_addr' is -1, then the return address is
26393 actually on the stack, at the stack pointer. */
26394 static void
26395 thumb_exit (FILE *f, int reg_containing_return_addr)
26397 unsigned regs_available_for_popping;
26398 unsigned regs_to_pop;
26399 int pops_needed;
26400 unsigned available;
26401 unsigned required;
26402 machine_mode mode;
26403 int size;
26404 int restore_a4 = FALSE;
26406 /* Compute the registers we need to pop. */
26407 regs_to_pop = 0;
26408 pops_needed = 0;
26410 if (reg_containing_return_addr == -1)
26412 regs_to_pop |= 1 << LR_REGNUM;
26413 ++pops_needed;
26416 if (TARGET_BACKTRACE)
26418 /* Restore the (ARM) frame pointer and stack pointer. */
26419 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26420 pops_needed += 2;
26423 /* If there is nothing to pop then just emit the BX instruction and
26424 return. */
26425 if (pops_needed == 0)
26427 if (crtl->calls_eh_return)
26428 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26430 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26431 return;
26433 /* Otherwise if we are not supporting interworking and we have not created
26434 a backtrace structure and the function was not entered in ARM mode then
26435 just pop the return address straight into the PC. */
26436 else if (!TARGET_INTERWORK
26437 && !TARGET_BACKTRACE
26438 && !is_called_in_ARM_mode (current_function_decl)
26439 && !crtl->calls_eh_return)
26441 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26442 return;
26445 /* Find out how many of the (return) argument registers we can corrupt. */
26446 regs_available_for_popping = 0;
26448 /* If returning via __builtin_eh_return, the bottom three registers
26449 all contain information needed for the return. */
26450 if (crtl->calls_eh_return)
26451 size = 12;
26452 else
26454 /* If we can deduce the registers used from the function's
26455 return value. This is more reliable that examining
26456 df_regs_ever_live_p () because that will be set if the register is
26457 ever used in the function, not just if the register is used
26458 to hold a return value. */
26460 if (crtl->return_rtx != 0)
26461 mode = GET_MODE (crtl->return_rtx);
26462 else
26463 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26465 size = GET_MODE_SIZE (mode);
26467 if (size == 0)
26469 /* In a void function we can use any argument register.
26470 In a function that returns a structure on the stack
26471 we can use the second and third argument registers. */
26472 if (mode == VOIDmode)
26473 regs_available_for_popping =
26474 (1 << ARG_REGISTER (1))
26475 | (1 << ARG_REGISTER (2))
26476 | (1 << ARG_REGISTER (3));
26477 else
26478 regs_available_for_popping =
26479 (1 << ARG_REGISTER (2))
26480 | (1 << ARG_REGISTER (3));
26482 else if (size <= 4)
26483 regs_available_for_popping =
26484 (1 << ARG_REGISTER (2))
26485 | (1 << ARG_REGISTER (3));
26486 else if (size <= 8)
26487 regs_available_for_popping =
26488 (1 << ARG_REGISTER (3));
26491 /* Match registers to be popped with registers into which we pop them. */
26492 for (available = regs_available_for_popping,
26493 required = regs_to_pop;
26494 required != 0 && available != 0;
26495 available &= ~(available & - available),
26496 required &= ~(required & - required))
26497 -- pops_needed;
26499 /* If we have any popping registers left over, remove them. */
26500 if (available > 0)
26501 regs_available_for_popping &= ~available;
26503 /* Otherwise if we need another popping register we can use
26504 the fourth argument register. */
26505 else if (pops_needed)
26507 /* If we have not found any free argument registers and
26508 reg a4 contains the return address, we must move it. */
26509 if (regs_available_for_popping == 0
26510 && reg_containing_return_addr == LAST_ARG_REGNUM)
26512 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26513 reg_containing_return_addr = LR_REGNUM;
26515 else if (size > 12)
26517 /* Register a4 is being used to hold part of the return value,
26518 but we have dire need of a free, low register. */
26519 restore_a4 = TRUE;
26521 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26524 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26526 /* The fourth argument register is available. */
26527 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26529 --pops_needed;
26533 /* Pop as many registers as we can. */
26534 thumb_pop (f, regs_available_for_popping);
26536 /* Process the registers we popped. */
26537 if (reg_containing_return_addr == -1)
26539 /* The return address was popped into the lowest numbered register. */
26540 regs_to_pop &= ~(1 << LR_REGNUM);
26542 reg_containing_return_addr =
26543 number_of_first_bit_set (regs_available_for_popping);
26545 /* Remove this register for the mask of available registers, so that
26546 the return address will not be corrupted by further pops. */
26547 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26550 /* If we popped other registers then handle them here. */
26551 if (regs_available_for_popping)
26553 int frame_pointer;
26555 /* Work out which register currently contains the frame pointer. */
26556 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26558 /* Move it into the correct place. */
26559 asm_fprintf (f, "\tmov\t%r, %r\n",
26560 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26562 /* (Temporarily) remove it from the mask of popped registers. */
26563 regs_available_for_popping &= ~(1 << frame_pointer);
26564 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26566 if (regs_available_for_popping)
26568 int stack_pointer;
26570 /* We popped the stack pointer as well,
26571 find the register that contains it. */
26572 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26574 /* Move it into the stack register. */
26575 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26577 /* At this point we have popped all necessary registers, so
26578 do not worry about restoring regs_available_for_popping
26579 to its correct value:
26581 assert (pops_needed == 0)
26582 assert (regs_available_for_popping == (1 << frame_pointer))
26583 assert (regs_to_pop == (1 << STACK_POINTER)) */
26585 else
26587 /* Since we have just move the popped value into the frame
26588 pointer, the popping register is available for reuse, and
26589 we know that we still have the stack pointer left to pop. */
26590 regs_available_for_popping |= (1 << frame_pointer);
26594 /* If we still have registers left on the stack, but we no longer have
26595 any registers into which we can pop them, then we must move the return
26596 address into the link register and make available the register that
26597 contained it. */
26598 if (regs_available_for_popping == 0 && pops_needed > 0)
26600 regs_available_for_popping |= 1 << reg_containing_return_addr;
26602 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26603 reg_containing_return_addr);
26605 reg_containing_return_addr = LR_REGNUM;
26608 /* If we have registers left on the stack then pop some more.
26609 We know that at most we will want to pop FP and SP. */
26610 if (pops_needed > 0)
26612 int popped_into;
26613 int move_to;
26615 thumb_pop (f, regs_available_for_popping);
26617 /* We have popped either FP or SP.
26618 Move whichever one it is into the correct register. */
26619 popped_into = number_of_first_bit_set (regs_available_for_popping);
26620 move_to = number_of_first_bit_set (regs_to_pop);
26622 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26624 regs_to_pop &= ~(1 << move_to);
26626 --pops_needed;
26629 /* If we still have not popped everything then we must have only
26630 had one register available to us and we are now popping the SP. */
26631 if (pops_needed > 0)
26633 int popped_into;
26635 thumb_pop (f, regs_available_for_popping);
26637 popped_into = number_of_first_bit_set (regs_available_for_popping);
26639 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26641 assert (regs_to_pop == (1 << STACK_POINTER))
26642 assert (pops_needed == 1)
26646 /* If necessary restore the a4 register. */
26647 if (restore_a4)
26649 if (reg_containing_return_addr != LR_REGNUM)
26651 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26652 reg_containing_return_addr = LR_REGNUM;
26655 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26658 if (crtl->calls_eh_return)
26659 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26661 /* Return to caller. */
26662 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26665 /* Scan INSN just before assembler is output for it.
26666 For Thumb-1, we track the status of the condition codes; this
26667 information is used in the cbranchsi4_insn pattern. */
26668 void
26669 thumb1_final_prescan_insn (rtx_insn *insn)
26671 if (flag_print_asm_name)
26672 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26673 INSN_ADDRESSES (INSN_UID (insn)));
26674 /* Don't overwrite the previous setter when we get to a cbranch. */
26675 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26677 enum attr_conds conds;
26679 if (cfun->machine->thumb1_cc_insn)
26681 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26682 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26683 CC_STATUS_INIT;
26685 conds = get_attr_conds (insn);
26686 if (conds == CONDS_SET)
26688 rtx set = single_set (insn);
26689 cfun->machine->thumb1_cc_insn = insn;
26690 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26691 cfun->machine->thumb1_cc_op1 = const0_rtx;
26692 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26693 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26695 rtx src1 = XEXP (SET_SRC (set), 1);
26696 if (src1 == const0_rtx)
26697 cfun->machine->thumb1_cc_mode = CCmode;
26699 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26701 /* Record the src register operand instead of dest because
26702 cprop_hardreg pass propagates src. */
26703 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26706 else if (conds != CONDS_NOCOND)
26707 cfun->machine->thumb1_cc_insn = NULL_RTX;
26710 /* Check if unexpected far jump is used. */
26711 if (cfun->machine->lr_save_eliminated
26712 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26713 internal_error("Unexpected thumb1 far jump");
26717 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26719 unsigned HOST_WIDE_INT mask = 0xff;
26720 int i;
26722 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26723 if (val == 0) /* XXX */
26724 return 0;
26726 for (i = 0; i < 25; i++)
26727 if ((val & (mask << i)) == val)
26728 return 1;
26730 return 0;
26733 /* Returns nonzero if the current function contains,
26734 or might contain a far jump. */
26735 static int
26736 thumb_far_jump_used_p (void)
26738 rtx_insn *insn;
26739 bool far_jump = false;
26740 unsigned int func_size = 0;
26742 /* This test is only important for leaf functions. */
26743 /* assert (!leaf_function_p ()); */
26745 /* If we have already decided that far jumps may be used,
26746 do not bother checking again, and always return true even if
26747 it turns out that they are not being used. Once we have made
26748 the decision that far jumps are present (and that hence the link
26749 register will be pushed onto the stack) we cannot go back on it. */
26750 if (cfun->machine->far_jump_used)
26751 return 1;
26753 /* If this function is not being called from the prologue/epilogue
26754 generation code then it must be being called from the
26755 INITIAL_ELIMINATION_OFFSET macro. */
26756 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26758 /* In this case we know that we are being asked about the elimination
26759 of the arg pointer register. If that register is not being used,
26760 then there are no arguments on the stack, and we do not have to
26761 worry that a far jump might force the prologue to push the link
26762 register, changing the stack offsets. In this case we can just
26763 return false, since the presence of far jumps in the function will
26764 not affect stack offsets.
26766 If the arg pointer is live (or if it was live, but has now been
26767 eliminated and so set to dead) then we do have to test to see if
26768 the function might contain a far jump. This test can lead to some
26769 false negatives, since before reload is completed, then length of
26770 branch instructions is not known, so gcc defaults to returning their
26771 longest length, which in turn sets the far jump attribute to true.
26773 A false negative will not result in bad code being generated, but it
26774 will result in a needless push and pop of the link register. We
26775 hope that this does not occur too often.
26777 If we need doubleword stack alignment this could affect the other
26778 elimination offsets so we can't risk getting it wrong. */
26779 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26780 cfun->machine->arg_pointer_live = 1;
26781 else if (!cfun->machine->arg_pointer_live)
26782 return 0;
26785 /* We should not change far_jump_used during or after reload, as there is
26786 no chance to change stack frame layout. */
26787 if (reload_in_progress || reload_completed)
26788 return 0;
26790 /* Check to see if the function contains a branch
26791 insn with the far jump attribute set. */
26792 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26794 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26796 far_jump = true;
26798 func_size += get_attr_length (insn);
26801 /* Attribute far_jump will always be true for thumb1 before
26802 shorten_branch pass. So checking far_jump attribute before
26803 shorten_branch isn't much useful.
26805 Following heuristic tries to estimate more accurately if a far jump
26806 may finally be used. The heuristic is very conservative as there is
26807 no chance to roll-back the decision of not to use far jump.
26809 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26810 2-byte insn is associated with a 4 byte constant pool. Using
26811 function size 2048/3 as the threshold is conservative enough. */
26812 if (far_jump)
26814 if ((func_size * 3) >= 2048)
26816 /* Record the fact that we have decided that
26817 the function does use far jumps. */
26818 cfun->machine->far_jump_used = 1;
26819 return 1;
26823 return 0;
26826 /* Return nonzero if FUNC must be entered in ARM mode. */
26828 is_called_in_ARM_mode (tree func)
26830 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26832 /* Ignore the problem about functions whose address is taken. */
26833 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26834 return TRUE;
26836 #ifdef ARM_PE
26837 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26838 #else
26839 return FALSE;
26840 #endif
26843 /* Given the stack offsets and register mask in OFFSETS, decide how
26844 many additional registers to push instead of subtracting a constant
26845 from SP. For epilogues the principle is the same except we use pop.
26846 FOR_PROLOGUE indicates which we're generating. */
26847 static int
26848 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26850 HOST_WIDE_INT amount;
26851 unsigned long live_regs_mask = offsets->saved_regs_mask;
26852 /* Extract a mask of the ones we can give to the Thumb's push/pop
26853 instruction. */
26854 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26855 /* Then count how many other high registers will need to be pushed. */
26856 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26857 int n_free, reg_base, size;
26859 if (!for_prologue && frame_pointer_needed)
26860 amount = offsets->locals_base - offsets->saved_regs;
26861 else
26862 amount = offsets->outgoing_args - offsets->saved_regs;
26864 /* If the stack frame size is 512 exactly, we can save one load
26865 instruction, which should make this a win even when optimizing
26866 for speed. */
26867 if (!optimize_size && amount != 512)
26868 return 0;
26870 /* Can't do this if there are high registers to push. */
26871 if (high_regs_pushed != 0)
26872 return 0;
26874 /* Shouldn't do it in the prologue if no registers would normally
26875 be pushed at all. In the epilogue, also allow it if we'll have
26876 a pop insn for the PC. */
26877 if (l_mask == 0
26878 && (for_prologue
26879 || TARGET_BACKTRACE
26880 || (live_regs_mask & 1 << LR_REGNUM) == 0
26881 || TARGET_INTERWORK
26882 || crtl->args.pretend_args_size != 0))
26883 return 0;
26885 /* Don't do this if thumb_expand_prologue wants to emit instructions
26886 between the push and the stack frame allocation. */
26887 if (for_prologue
26888 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26889 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26890 return 0;
26892 reg_base = 0;
26893 n_free = 0;
26894 if (!for_prologue)
26896 size = arm_size_return_regs ();
26897 reg_base = ARM_NUM_INTS (size);
26898 live_regs_mask >>= reg_base;
26901 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26902 && (for_prologue || call_used_regs[reg_base + n_free]))
26904 live_regs_mask >>= 1;
26905 n_free++;
26908 if (n_free == 0)
26909 return 0;
26910 gcc_assert (amount / 4 * 4 == amount);
26912 if (amount >= 512 && (amount - n_free * 4) < 512)
26913 return (amount - 508) / 4;
26914 if (amount <= n_free * 4)
26915 return amount / 4;
26916 return 0;
26919 /* The bits which aren't usefully expanded as rtl. */
26920 const char *
26921 thumb1_unexpanded_epilogue (void)
26923 arm_stack_offsets *offsets;
26924 int regno;
26925 unsigned long live_regs_mask = 0;
26926 int high_regs_pushed = 0;
26927 int extra_pop;
26928 int had_to_push_lr;
26929 int size;
26931 if (cfun->machine->return_used_this_function != 0)
26932 return "";
26934 if (IS_NAKED (arm_current_func_type ()))
26935 return "";
26937 offsets = arm_get_frame_offsets ();
26938 live_regs_mask = offsets->saved_regs_mask;
26939 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26941 /* If we can deduce the registers used from the function's return value.
26942 This is more reliable that examining df_regs_ever_live_p () because that
26943 will be set if the register is ever used in the function, not just if
26944 the register is used to hold a return value. */
26945 size = arm_size_return_regs ();
26947 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26948 if (extra_pop > 0)
26950 unsigned long extra_mask = (1 << extra_pop) - 1;
26951 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26954 /* The prolog may have pushed some high registers to use as
26955 work registers. e.g. the testsuite file:
26956 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26957 compiles to produce:
26958 push {r4, r5, r6, r7, lr}
26959 mov r7, r9
26960 mov r6, r8
26961 push {r6, r7}
26962 as part of the prolog. We have to undo that pushing here. */
26964 if (high_regs_pushed)
26966 unsigned long mask = live_regs_mask & 0xff;
26967 int next_hi_reg;
26969 /* The available low registers depend on the size of the value we are
26970 returning. */
26971 if (size <= 12)
26972 mask |= 1 << 3;
26973 if (size <= 8)
26974 mask |= 1 << 2;
26976 if (mask == 0)
26977 /* Oh dear! We have no low registers into which we can pop
26978 high registers! */
26979 internal_error
26980 ("no low registers available for popping high registers");
26982 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26983 if (live_regs_mask & (1 << next_hi_reg))
26984 break;
26986 while (high_regs_pushed)
26988 /* Find lo register(s) into which the high register(s) can
26989 be popped. */
26990 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26992 if (mask & (1 << regno))
26993 high_regs_pushed--;
26994 if (high_regs_pushed == 0)
26995 break;
26998 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
27000 /* Pop the values into the low register(s). */
27001 thumb_pop (asm_out_file, mask);
27003 /* Move the value(s) into the high registers. */
27004 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
27006 if (mask & (1 << regno))
27008 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
27009 regno);
27011 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
27012 if (live_regs_mask & (1 << next_hi_reg))
27013 break;
27017 live_regs_mask &= ~0x0f00;
27020 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
27021 live_regs_mask &= 0xff;
27023 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
27025 /* Pop the return address into the PC. */
27026 if (had_to_push_lr)
27027 live_regs_mask |= 1 << PC_REGNUM;
27029 /* Either no argument registers were pushed or a backtrace
27030 structure was created which includes an adjusted stack
27031 pointer, so just pop everything. */
27032 if (live_regs_mask)
27033 thumb_pop (asm_out_file, live_regs_mask);
27035 /* We have either just popped the return address into the
27036 PC or it is was kept in LR for the entire function.
27037 Note that thumb_pop has already called thumb_exit if the
27038 PC was in the list. */
27039 if (!had_to_push_lr)
27040 thumb_exit (asm_out_file, LR_REGNUM);
27042 else
27044 /* Pop everything but the return address. */
27045 if (live_regs_mask)
27046 thumb_pop (asm_out_file, live_regs_mask);
27048 if (had_to_push_lr)
27050 if (size > 12)
27052 /* We have no free low regs, so save one. */
27053 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27054 LAST_ARG_REGNUM);
27057 /* Get the return address into a temporary register. */
27058 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27060 if (size > 12)
27062 /* Move the return address to lr. */
27063 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27064 LAST_ARG_REGNUM);
27065 /* Restore the low register. */
27066 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27067 IP_REGNUM);
27068 regno = LR_REGNUM;
27070 else
27071 regno = LAST_ARG_REGNUM;
27073 else
27074 regno = LR_REGNUM;
27076 /* Remove the argument registers that were pushed onto the stack. */
27077 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27078 SP_REGNUM, SP_REGNUM,
27079 crtl->args.pretend_args_size);
27081 thumb_exit (asm_out_file, regno);
27084 return "";
27087 /* Functions to save and restore machine-specific function data. */
27088 static struct machine_function *
27089 arm_init_machine_status (void)
27091 struct machine_function *machine;
27092 machine = ggc_cleared_alloc<machine_function> ();
27094 #if ARM_FT_UNKNOWN != 0
27095 machine->func_type = ARM_FT_UNKNOWN;
27096 #endif
27097 return machine;
27100 /* Return an RTX indicating where the return address to the
27101 calling function can be found. */
27103 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27105 if (count != 0)
27106 return NULL_RTX;
27108 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27111 /* Do anything needed before RTL is emitted for each function. */
27112 void
27113 arm_init_expanders (void)
27115 /* Arrange to initialize and mark the machine per-function status. */
27116 init_machine_status = arm_init_machine_status;
27118 /* This is to stop the combine pass optimizing away the alignment
27119 adjustment of va_arg. */
27120 /* ??? It is claimed that this should not be necessary. */
27121 if (cfun)
27122 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27126 /* Like arm_compute_initial_elimination offset. Simpler because there
27127 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27128 to point at the base of the local variables after static stack
27129 space for a function has been allocated. */
27131 HOST_WIDE_INT
27132 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27134 arm_stack_offsets *offsets;
27136 offsets = arm_get_frame_offsets ();
27138 switch (from)
27140 case ARG_POINTER_REGNUM:
27141 switch (to)
27143 case STACK_POINTER_REGNUM:
27144 return offsets->outgoing_args - offsets->saved_args;
27146 case FRAME_POINTER_REGNUM:
27147 return offsets->soft_frame - offsets->saved_args;
27149 case ARM_HARD_FRAME_POINTER_REGNUM:
27150 return offsets->saved_regs - offsets->saved_args;
27152 case THUMB_HARD_FRAME_POINTER_REGNUM:
27153 return offsets->locals_base - offsets->saved_args;
27155 default:
27156 gcc_unreachable ();
27158 break;
27160 case FRAME_POINTER_REGNUM:
27161 switch (to)
27163 case STACK_POINTER_REGNUM:
27164 return offsets->outgoing_args - offsets->soft_frame;
27166 case ARM_HARD_FRAME_POINTER_REGNUM:
27167 return offsets->saved_regs - offsets->soft_frame;
27169 case THUMB_HARD_FRAME_POINTER_REGNUM:
27170 return offsets->locals_base - offsets->soft_frame;
27172 default:
27173 gcc_unreachable ();
27175 break;
27177 default:
27178 gcc_unreachable ();
27182 /* Generate the function's prologue. */
27184 void
27185 thumb1_expand_prologue (void)
27187 rtx_insn *insn;
27189 HOST_WIDE_INT amount;
27190 arm_stack_offsets *offsets;
27191 unsigned long func_type;
27192 int regno;
27193 unsigned long live_regs_mask;
27194 unsigned long l_mask;
27195 unsigned high_regs_pushed = 0;
27197 func_type = arm_current_func_type ();
27199 /* Naked functions don't have prologues. */
27200 if (IS_NAKED (func_type))
27201 return;
27203 if (IS_INTERRUPT (func_type))
27205 error ("interrupt Service Routines cannot be coded in Thumb mode");
27206 return;
27209 if (is_called_in_ARM_mode (current_function_decl))
27210 emit_insn (gen_prologue_thumb1_interwork ());
27212 offsets = arm_get_frame_offsets ();
27213 live_regs_mask = offsets->saved_regs_mask;
27215 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27216 l_mask = live_regs_mask & 0x40ff;
27217 /* Then count how many other high registers will need to be pushed. */
27218 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27220 if (crtl->args.pretend_args_size)
27222 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27224 if (cfun->machine->uses_anonymous_args)
27226 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27227 unsigned long mask;
27229 mask = 1ul << (LAST_ARG_REGNUM + 1);
27230 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27232 insn = thumb1_emit_multi_reg_push (mask, 0);
27234 else
27236 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27237 stack_pointer_rtx, x));
27239 RTX_FRAME_RELATED_P (insn) = 1;
27242 if (TARGET_BACKTRACE)
27244 HOST_WIDE_INT offset = 0;
27245 unsigned work_register;
27246 rtx work_reg, x, arm_hfp_rtx;
27248 /* We have been asked to create a stack backtrace structure.
27249 The code looks like this:
27251 0 .align 2
27252 0 func:
27253 0 sub SP, #16 Reserve space for 4 registers.
27254 2 push {R7} Push low registers.
27255 4 add R7, SP, #20 Get the stack pointer before the push.
27256 6 str R7, [SP, #8] Store the stack pointer
27257 (before reserving the space).
27258 8 mov R7, PC Get hold of the start of this code + 12.
27259 10 str R7, [SP, #16] Store it.
27260 12 mov R7, FP Get hold of the current frame pointer.
27261 14 str R7, [SP, #4] Store it.
27262 16 mov R7, LR Get hold of the current return address.
27263 18 str R7, [SP, #12] Store it.
27264 20 add R7, SP, #16 Point at the start of the
27265 backtrace structure.
27266 22 mov FP, R7 Put this value into the frame pointer. */
27268 work_register = thumb_find_work_register (live_regs_mask);
27269 work_reg = gen_rtx_REG (SImode, work_register);
27270 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27272 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27273 stack_pointer_rtx, GEN_INT (-16)));
27274 RTX_FRAME_RELATED_P (insn) = 1;
27276 if (l_mask)
27278 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27279 RTX_FRAME_RELATED_P (insn) = 1;
27281 offset = bit_count (l_mask) * UNITS_PER_WORD;
27284 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27285 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27287 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27288 x = gen_frame_mem (SImode, x);
27289 emit_move_insn (x, work_reg);
27291 /* Make sure that the instruction fetching the PC is in the right place
27292 to calculate "start of backtrace creation code + 12". */
27293 /* ??? The stores using the common WORK_REG ought to be enough to
27294 prevent the scheduler from doing anything weird. Failing that
27295 we could always move all of the following into an UNSPEC_VOLATILE. */
27296 if (l_mask)
27298 x = gen_rtx_REG (SImode, PC_REGNUM);
27299 emit_move_insn (work_reg, x);
27301 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27302 x = gen_frame_mem (SImode, x);
27303 emit_move_insn (x, work_reg);
27305 emit_move_insn (work_reg, arm_hfp_rtx);
27307 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27308 x = gen_frame_mem (SImode, x);
27309 emit_move_insn (x, work_reg);
27311 else
27313 emit_move_insn (work_reg, arm_hfp_rtx);
27315 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27316 x = gen_frame_mem (SImode, x);
27317 emit_move_insn (x, work_reg);
27319 x = gen_rtx_REG (SImode, PC_REGNUM);
27320 emit_move_insn (work_reg, x);
27322 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27323 x = gen_frame_mem (SImode, x);
27324 emit_move_insn (x, work_reg);
27327 x = gen_rtx_REG (SImode, LR_REGNUM);
27328 emit_move_insn (work_reg, x);
27330 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27331 x = gen_frame_mem (SImode, x);
27332 emit_move_insn (x, work_reg);
27334 x = GEN_INT (offset + 12);
27335 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27337 emit_move_insn (arm_hfp_rtx, work_reg);
27339 /* Optimization: If we are not pushing any low registers but we are going
27340 to push some high registers then delay our first push. This will just
27341 be a push of LR and we can combine it with the push of the first high
27342 register. */
27343 else if ((l_mask & 0xff) != 0
27344 || (high_regs_pushed == 0 && l_mask))
27346 unsigned long mask = l_mask;
27347 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27348 insn = thumb1_emit_multi_reg_push (mask, mask);
27349 RTX_FRAME_RELATED_P (insn) = 1;
27352 if (high_regs_pushed)
27354 unsigned pushable_regs;
27355 unsigned next_hi_reg;
27356 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27357 : crtl->args.info.nregs;
27358 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27360 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27361 if (live_regs_mask & (1 << next_hi_reg))
27362 break;
27364 /* Here we need to mask out registers used for passing arguments
27365 even if they can be pushed. This is to avoid using them to stash the high
27366 registers. Such kind of stash may clobber the use of arguments. */
27367 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27369 if (pushable_regs == 0)
27370 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27372 while (high_regs_pushed > 0)
27374 unsigned long real_regs_mask = 0;
27376 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27378 if (pushable_regs & (1 << regno))
27380 emit_move_insn (gen_rtx_REG (SImode, regno),
27381 gen_rtx_REG (SImode, next_hi_reg));
27383 high_regs_pushed --;
27384 real_regs_mask |= (1 << next_hi_reg);
27386 if (high_regs_pushed)
27388 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27389 next_hi_reg --)
27390 if (live_regs_mask & (1 << next_hi_reg))
27391 break;
27393 else
27395 pushable_regs &= ~((1 << regno) - 1);
27396 break;
27401 /* If we had to find a work register and we have not yet
27402 saved the LR then add it to the list of regs to push. */
27403 if (l_mask == (1 << LR_REGNUM))
27405 pushable_regs |= l_mask;
27406 real_regs_mask |= l_mask;
27407 l_mask = 0;
27410 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27411 RTX_FRAME_RELATED_P (insn) = 1;
27415 /* Load the pic register before setting the frame pointer,
27416 so we can use r7 as a temporary work register. */
27417 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27418 arm_load_pic_register (live_regs_mask);
27420 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27421 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27422 stack_pointer_rtx);
27424 if (flag_stack_usage_info)
27425 current_function_static_stack_size
27426 = offsets->outgoing_args - offsets->saved_args;
27428 amount = offsets->outgoing_args - offsets->saved_regs;
27429 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27430 if (amount)
27432 if (amount < 512)
27434 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27435 GEN_INT (- amount)));
27436 RTX_FRAME_RELATED_P (insn) = 1;
27438 else
27440 rtx reg, dwarf;
27442 /* The stack decrement is too big for an immediate value in a single
27443 insn. In theory we could issue multiple subtracts, but after
27444 three of them it becomes more space efficient to place the full
27445 value in the constant pool and load into a register. (Also the
27446 ARM debugger really likes to see only one stack decrement per
27447 function). So instead we look for a scratch register into which
27448 we can load the decrement, and then we subtract this from the
27449 stack pointer. Unfortunately on the thumb the only available
27450 scratch registers are the argument registers, and we cannot use
27451 these as they may hold arguments to the function. Instead we
27452 attempt to locate a call preserved register which is used by this
27453 function. If we can find one, then we know that it will have
27454 been pushed at the start of the prologue and so we can corrupt
27455 it now. */
27456 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27457 if (live_regs_mask & (1 << regno))
27458 break;
27460 gcc_assert(regno <= LAST_LO_REGNUM);
27462 reg = gen_rtx_REG (SImode, regno);
27464 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27466 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27467 stack_pointer_rtx, reg));
27469 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27470 plus_constant (Pmode, stack_pointer_rtx,
27471 -amount));
27472 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27473 RTX_FRAME_RELATED_P (insn) = 1;
27477 if (frame_pointer_needed)
27478 thumb_set_frame_pointer (offsets);
27480 /* If we are profiling, make sure no instructions are scheduled before
27481 the call to mcount. Similarly if the user has requested no
27482 scheduling in the prolog. Similarly if we want non-call exceptions
27483 using the EABI unwinder, to prevent faulting instructions from being
27484 swapped with a stack adjustment. */
27485 if (crtl->profile || !TARGET_SCHED_PROLOG
27486 || (arm_except_unwind_info (&global_options) == UI_TARGET
27487 && cfun->can_throw_non_call_exceptions))
27488 emit_insn (gen_blockage ());
27490 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27491 if (live_regs_mask & 0xff)
27492 cfun->machine->lr_save_eliminated = 0;
27495 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27496 POP instruction can be generated. LR should be replaced by PC. All
27497 the checks required are already done by USE_RETURN_INSN (). Hence,
27498 all we really need to check here is if single register is to be
27499 returned, or multiple register return. */
27500 void
27501 thumb2_expand_return (bool simple_return)
27503 int i, num_regs;
27504 unsigned long saved_regs_mask;
27505 arm_stack_offsets *offsets;
27507 offsets = arm_get_frame_offsets ();
27508 saved_regs_mask = offsets->saved_regs_mask;
27510 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27511 if (saved_regs_mask & (1 << i))
27512 num_regs++;
27514 if (!simple_return && saved_regs_mask)
27516 if (num_regs == 1)
27518 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27519 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27520 rtx addr = gen_rtx_MEM (SImode,
27521 gen_rtx_POST_INC (SImode,
27522 stack_pointer_rtx));
27523 set_mem_alias_set (addr, get_frame_alias_set ());
27524 XVECEXP (par, 0, 0) = ret_rtx;
27525 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27526 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27527 emit_jump_insn (par);
27529 else
27531 saved_regs_mask &= ~ (1 << LR_REGNUM);
27532 saved_regs_mask |= (1 << PC_REGNUM);
27533 arm_emit_multi_reg_pop (saved_regs_mask);
27536 else
27538 emit_jump_insn (simple_return_rtx);
27542 void
27543 thumb1_expand_epilogue (void)
27545 HOST_WIDE_INT amount;
27546 arm_stack_offsets *offsets;
27547 int regno;
27549 /* Naked functions don't have prologues. */
27550 if (IS_NAKED (arm_current_func_type ()))
27551 return;
27553 offsets = arm_get_frame_offsets ();
27554 amount = offsets->outgoing_args - offsets->saved_regs;
27556 if (frame_pointer_needed)
27558 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27559 amount = offsets->locals_base - offsets->saved_regs;
27561 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27563 gcc_assert (amount >= 0);
27564 if (amount)
27566 emit_insn (gen_blockage ());
27568 if (amount < 512)
27569 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27570 GEN_INT (amount)));
27571 else
27573 /* r3 is always free in the epilogue. */
27574 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27576 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27577 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27581 /* Emit a USE (stack_pointer_rtx), so that
27582 the stack adjustment will not be deleted. */
27583 emit_insn (gen_force_register_use (stack_pointer_rtx));
27585 if (crtl->profile || !TARGET_SCHED_PROLOG)
27586 emit_insn (gen_blockage ());
27588 /* Emit a clobber for each insn that will be restored in the epilogue,
27589 so that flow2 will get register lifetimes correct. */
27590 for (regno = 0; regno < 13; regno++)
27591 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27592 emit_clobber (gen_rtx_REG (SImode, regno));
27594 if (! df_regs_ever_live_p (LR_REGNUM))
27595 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27598 /* Epilogue code for APCS frame. */
27599 static void
27600 arm_expand_epilogue_apcs_frame (bool really_return)
27602 unsigned long func_type;
27603 unsigned long saved_regs_mask;
27604 int num_regs = 0;
27605 int i;
27606 int floats_from_frame = 0;
27607 arm_stack_offsets *offsets;
27609 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27610 func_type = arm_current_func_type ();
27612 /* Get frame offsets for ARM. */
27613 offsets = arm_get_frame_offsets ();
27614 saved_regs_mask = offsets->saved_regs_mask;
27616 /* Find the offset of the floating-point save area in the frame. */
27617 floats_from_frame
27618 = (offsets->saved_args
27619 + arm_compute_static_chain_stack_bytes ()
27620 - offsets->frame);
27622 /* Compute how many core registers saved and how far away the floats are. */
27623 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27624 if (saved_regs_mask & (1 << i))
27626 num_regs++;
27627 floats_from_frame += 4;
27630 if (TARGET_HARD_FLOAT && TARGET_VFP)
27632 int start_reg;
27633 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27635 /* The offset is from IP_REGNUM. */
27636 int saved_size = arm_get_vfp_saved_size ();
27637 if (saved_size > 0)
27639 rtx_insn *insn;
27640 floats_from_frame += saved_size;
27641 insn = emit_insn (gen_addsi3 (ip_rtx,
27642 hard_frame_pointer_rtx,
27643 GEN_INT (-floats_from_frame)));
27644 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27645 ip_rtx, hard_frame_pointer_rtx);
27648 /* Generate VFP register multi-pop. */
27649 start_reg = FIRST_VFP_REGNUM;
27651 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27652 /* Look for a case where a reg does not need restoring. */
27653 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27654 && (!df_regs_ever_live_p (i + 1)
27655 || call_used_regs[i + 1]))
27657 if (start_reg != i)
27658 arm_emit_vfp_multi_reg_pop (start_reg,
27659 (i - start_reg) / 2,
27660 gen_rtx_REG (SImode,
27661 IP_REGNUM));
27662 start_reg = i + 2;
27665 /* Restore the remaining regs that we have discovered (or possibly
27666 even all of them, if the conditional in the for loop never
27667 fired). */
27668 if (start_reg != i)
27669 arm_emit_vfp_multi_reg_pop (start_reg,
27670 (i - start_reg) / 2,
27671 gen_rtx_REG (SImode, IP_REGNUM));
27674 if (TARGET_IWMMXT)
27676 /* The frame pointer is guaranteed to be non-double-word aligned, as
27677 it is set to double-word-aligned old_stack_pointer - 4. */
27678 rtx_insn *insn;
27679 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27681 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27682 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27684 rtx addr = gen_frame_mem (V2SImode,
27685 plus_constant (Pmode, hard_frame_pointer_rtx,
27686 - lrm_count * 4));
27687 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27688 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27689 gen_rtx_REG (V2SImode, i),
27690 NULL_RTX);
27691 lrm_count += 2;
27695 /* saved_regs_mask should contain IP which contains old stack pointer
27696 at the time of activation creation. Since SP and IP are adjacent registers,
27697 we can restore the value directly into SP. */
27698 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27699 saved_regs_mask &= ~(1 << IP_REGNUM);
27700 saved_regs_mask |= (1 << SP_REGNUM);
27702 /* There are two registers left in saved_regs_mask - LR and PC. We
27703 only need to restore LR (the return address), but to
27704 save time we can load it directly into PC, unless we need a
27705 special function exit sequence, or we are not really returning. */
27706 if (really_return
27707 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27708 && !crtl->calls_eh_return)
27709 /* Delete LR from the register mask, so that LR on
27710 the stack is loaded into the PC in the register mask. */
27711 saved_regs_mask &= ~(1 << LR_REGNUM);
27712 else
27713 saved_regs_mask &= ~(1 << PC_REGNUM);
27715 num_regs = bit_count (saved_regs_mask);
27716 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27718 rtx_insn *insn;
27719 emit_insn (gen_blockage ());
27720 /* Unwind the stack to just below the saved registers. */
27721 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27722 hard_frame_pointer_rtx,
27723 GEN_INT (- 4 * num_regs)));
27725 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27726 stack_pointer_rtx, hard_frame_pointer_rtx);
27729 arm_emit_multi_reg_pop (saved_regs_mask);
27731 if (IS_INTERRUPT (func_type))
27733 /* Interrupt handlers will have pushed the
27734 IP onto the stack, so restore it now. */
27735 rtx_insn *insn;
27736 rtx addr = gen_rtx_MEM (SImode,
27737 gen_rtx_POST_INC (SImode,
27738 stack_pointer_rtx));
27739 set_mem_alias_set (addr, get_frame_alias_set ());
27740 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27741 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27742 gen_rtx_REG (SImode, IP_REGNUM),
27743 NULL_RTX);
27746 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27747 return;
27749 if (crtl->calls_eh_return)
27750 emit_insn (gen_addsi3 (stack_pointer_rtx,
27751 stack_pointer_rtx,
27752 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27754 if (IS_STACKALIGN (func_type))
27755 /* Restore the original stack pointer. Before prologue, the stack was
27756 realigned and the original stack pointer saved in r0. For details,
27757 see comment in arm_expand_prologue. */
27758 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27760 emit_jump_insn (simple_return_rtx);
27763 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27764 function is not a sibcall. */
27765 void
27766 arm_expand_epilogue (bool really_return)
27768 unsigned long func_type;
27769 unsigned long saved_regs_mask;
27770 int num_regs = 0;
27771 int i;
27772 int amount;
27773 arm_stack_offsets *offsets;
27775 func_type = arm_current_func_type ();
27777 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27778 let output_return_instruction take care of instruction emission if any. */
27779 if (IS_NAKED (func_type)
27780 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27782 if (really_return)
27783 emit_jump_insn (simple_return_rtx);
27784 return;
27787 /* If we are throwing an exception, then we really must be doing a
27788 return, so we can't tail-call. */
27789 gcc_assert (!crtl->calls_eh_return || really_return);
27791 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27793 arm_expand_epilogue_apcs_frame (really_return);
27794 return;
27797 /* Get frame offsets for ARM. */
27798 offsets = arm_get_frame_offsets ();
27799 saved_regs_mask = offsets->saved_regs_mask;
27800 num_regs = bit_count (saved_regs_mask);
27802 if (frame_pointer_needed)
27804 rtx_insn *insn;
27805 /* Restore stack pointer if necessary. */
27806 if (TARGET_ARM)
27808 /* In ARM mode, frame pointer points to first saved register.
27809 Restore stack pointer to last saved register. */
27810 amount = offsets->frame - offsets->saved_regs;
27812 /* Force out any pending memory operations that reference stacked data
27813 before stack de-allocation occurs. */
27814 emit_insn (gen_blockage ());
27815 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27816 hard_frame_pointer_rtx,
27817 GEN_INT (amount)));
27818 arm_add_cfa_adjust_cfa_note (insn, amount,
27819 stack_pointer_rtx,
27820 hard_frame_pointer_rtx);
27822 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27823 deleted. */
27824 emit_insn (gen_force_register_use (stack_pointer_rtx));
27826 else
27828 /* In Thumb-2 mode, the frame pointer points to the last saved
27829 register. */
27830 amount = offsets->locals_base - offsets->saved_regs;
27831 if (amount)
27833 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27834 hard_frame_pointer_rtx,
27835 GEN_INT (amount)));
27836 arm_add_cfa_adjust_cfa_note (insn, amount,
27837 hard_frame_pointer_rtx,
27838 hard_frame_pointer_rtx);
27841 /* Force out any pending memory operations that reference stacked data
27842 before stack de-allocation occurs. */
27843 emit_insn (gen_blockage ());
27844 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27845 hard_frame_pointer_rtx));
27846 arm_add_cfa_adjust_cfa_note (insn, 0,
27847 stack_pointer_rtx,
27848 hard_frame_pointer_rtx);
27849 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27850 deleted. */
27851 emit_insn (gen_force_register_use (stack_pointer_rtx));
27854 else
27856 /* Pop off outgoing args and local frame to adjust stack pointer to
27857 last saved register. */
27858 amount = offsets->outgoing_args - offsets->saved_regs;
27859 if (amount)
27861 rtx_insn *tmp;
27862 /* Force out any pending memory operations that reference stacked data
27863 before stack de-allocation occurs. */
27864 emit_insn (gen_blockage ());
27865 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27866 stack_pointer_rtx,
27867 GEN_INT (amount)));
27868 arm_add_cfa_adjust_cfa_note (tmp, amount,
27869 stack_pointer_rtx, stack_pointer_rtx);
27870 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27871 not deleted. */
27872 emit_insn (gen_force_register_use (stack_pointer_rtx));
27876 if (TARGET_HARD_FLOAT && TARGET_VFP)
27878 /* Generate VFP register multi-pop. */
27879 int end_reg = LAST_VFP_REGNUM + 1;
27881 /* Scan the registers in reverse order. We need to match
27882 any groupings made in the prologue and generate matching
27883 vldm operations. The need to match groups is because,
27884 unlike pop, vldm can only do consecutive regs. */
27885 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27886 /* Look for a case where a reg does not need restoring. */
27887 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27888 && (!df_regs_ever_live_p (i + 1)
27889 || call_used_regs[i + 1]))
27891 /* Restore the regs discovered so far (from reg+2 to
27892 end_reg). */
27893 if (end_reg > i + 2)
27894 arm_emit_vfp_multi_reg_pop (i + 2,
27895 (end_reg - (i + 2)) / 2,
27896 stack_pointer_rtx);
27897 end_reg = i;
27900 /* Restore the remaining regs that we have discovered (or possibly
27901 even all of them, if the conditional in the for loop never
27902 fired). */
27903 if (end_reg > i + 2)
27904 arm_emit_vfp_multi_reg_pop (i + 2,
27905 (end_reg - (i + 2)) / 2,
27906 stack_pointer_rtx);
27909 if (TARGET_IWMMXT)
27910 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27911 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27913 rtx_insn *insn;
27914 rtx addr = gen_rtx_MEM (V2SImode,
27915 gen_rtx_POST_INC (SImode,
27916 stack_pointer_rtx));
27917 set_mem_alias_set (addr, get_frame_alias_set ());
27918 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27919 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27920 gen_rtx_REG (V2SImode, i),
27921 NULL_RTX);
27922 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27923 stack_pointer_rtx, stack_pointer_rtx);
27926 if (saved_regs_mask)
27928 rtx insn;
27929 bool return_in_pc = false;
27931 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27932 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27933 && !IS_STACKALIGN (func_type)
27934 && really_return
27935 && crtl->args.pretend_args_size == 0
27936 && saved_regs_mask & (1 << LR_REGNUM)
27937 && !crtl->calls_eh_return)
27939 saved_regs_mask &= ~(1 << LR_REGNUM);
27940 saved_regs_mask |= (1 << PC_REGNUM);
27941 return_in_pc = true;
27944 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27946 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27947 if (saved_regs_mask & (1 << i))
27949 rtx addr = gen_rtx_MEM (SImode,
27950 gen_rtx_POST_INC (SImode,
27951 stack_pointer_rtx));
27952 set_mem_alias_set (addr, get_frame_alias_set ());
27954 if (i == PC_REGNUM)
27956 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27957 XVECEXP (insn, 0, 0) = ret_rtx;
27958 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27959 gen_rtx_REG (SImode, i),
27960 addr);
27961 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27962 insn = emit_jump_insn (insn);
27964 else
27966 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27967 addr));
27968 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27969 gen_rtx_REG (SImode, i),
27970 NULL_RTX);
27971 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27972 stack_pointer_rtx,
27973 stack_pointer_rtx);
27977 else
27979 if (TARGET_LDRD
27980 && current_tune->prefer_ldrd_strd
27981 && !optimize_function_for_size_p (cfun))
27983 if (TARGET_THUMB2)
27984 thumb2_emit_ldrd_pop (saved_regs_mask);
27985 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27986 arm_emit_ldrd_pop (saved_regs_mask);
27987 else
27988 arm_emit_multi_reg_pop (saved_regs_mask);
27990 else
27991 arm_emit_multi_reg_pop (saved_regs_mask);
27994 if (return_in_pc == true)
27995 return;
27998 if (crtl->args.pretend_args_size)
28000 int i, j;
28001 rtx dwarf = NULL_RTX;
28002 rtx_insn *tmp =
28003 emit_insn (gen_addsi3 (stack_pointer_rtx,
28004 stack_pointer_rtx,
28005 GEN_INT (crtl->args.pretend_args_size)));
28007 RTX_FRAME_RELATED_P (tmp) = 1;
28009 if (cfun->machine->uses_anonymous_args)
28011 /* Restore pretend args. Refer arm_expand_prologue on how to save
28012 pretend_args in stack. */
28013 int num_regs = crtl->args.pretend_args_size / 4;
28014 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28015 for (j = 0, i = 0; j < num_regs; i++)
28016 if (saved_regs_mask & (1 << i))
28018 rtx reg = gen_rtx_REG (SImode, i);
28019 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28020 j++;
28022 REG_NOTES (tmp) = dwarf;
28024 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
28025 stack_pointer_rtx, stack_pointer_rtx);
28028 if (!really_return)
28029 return;
28031 if (crtl->calls_eh_return)
28032 emit_insn (gen_addsi3 (stack_pointer_rtx,
28033 stack_pointer_rtx,
28034 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28036 if (IS_STACKALIGN (func_type))
28037 /* Restore the original stack pointer. Before prologue, the stack was
28038 realigned and the original stack pointer saved in r0. For details,
28039 see comment in arm_expand_prologue. */
28040 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
28042 emit_jump_insn (simple_return_rtx);
28045 /* Implementation of insn prologue_thumb1_interwork. This is the first
28046 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28048 const char *
28049 thumb1_output_interwork (void)
28051 const char * name;
28052 FILE *f = asm_out_file;
28054 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28055 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28056 == SYMBOL_REF);
28057 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28059 /* Generate code sequence to switch us into Thumb mode. */
28060 /* The .code 32 directive has already been emitted by
28061 ASM_DECLARE_FUNCTION_NAME. */
28062 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28063 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28065 /* Generate a label, so that the debugger will notice the
28066 change in instruction sets. This label is also used by
28067 the assembler to bypass the ARM code when this function
28068 is called from a Thumb encoded function elsewhere in the
28069 same file. Hence the definition of STUB_NAME here must
28070 agree with the definition in gas/config/tc-arm.c. */
28072 #define STUB_NAME ".real_start_of"
28074 fprintf (f, "\t.code\t16\n");
28075 #ifdef ARM_PE
28076 if (arm_dllexport_name_p (name))
28077 name = arm_strip_name_encoding (name);
28078 #endif
28079 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28080 fprintf (f, "\t.thumb_func\n");
28081 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28083 return "";
28086 /* Handle the case of a double word load into a low register from
28087 a computed memory address. The computed address may involve a
28088 register which is overwritten by the load. */
28089 const char *
28090 thumb_load_double_from_address (rtx *operands)
28092 rtx addr;
28093 rtx base;
28094 rtx offset;
28095 rtx arg1;
28096 rtx arg2;
28098 gcc_assert (REG_P (operands[0]));
28099 gcc_assert (MEM_P (operands[1]));
28101 /* Get the memory address. */
28102 addr = XEXP (operands[1], 0);
28104 /* Work out how the memory address is computed. */
28105 switch (GET_CODE (addr))
28107 case REG:
28108 operands[2] = adjust_address (operands[1], SImode, 4);
28110 if (REGNO (operands[0]) == REGNO (addr))
28112 output_asm_insn ("ldr\t%H0, %2", operands);
28113 output_asm_insn ("ldr\t%0, %1", operands);
28115 else
28117 output_asm_insn ("ldr\t%0, %1", operands);
28118 output_asm_insn ("ldr\t%H0, %2", operands);
28120 break;
28122 case CONST:
28123 /* Compute <address> + 4 for the high order load. */
28124 operands[2] = adjust_address (operands[1], SImode, 4);
28126 output_asm_insn ("ldr\t%0, %1", operands);
28127 output_asm_insn ("ldr\t%H0, %2", operands);
28128 break;
28130 case PLUS:
28131 arg1 = XEXP (addr, 0);
28132 arg2 = XEXP (addr, 1);
28134 if (CONSTANT_P (arg1))
28135 base = arg2, offset = arg1;
28136 else
28137 base = arg1, offset = arg2;
28139 gcc_assert (REG_P (base));
28141 /* Catch the case of <address> = <reg> + <reg> */
28142 if (REG_P (offset))
28144 int reg_offset = REGNO (offset);
28145 int reg_base = REGNO (base);
28146 int reg_dest = REGNO (operands[0]);
28148 /* Add the base and offset registers together into the
28149 higher destination register. */
28150 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28151 reg_dest + 1, reg_base, reg_offset);
28153 /* Load the lower destination register from the address in
28154 the higher destination register. */
28155 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28156 reg_dest, reg_dest + 1);
28158 /* Load the higher destination register from its own address
28159 plus 4. */
28160 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28161 reg_dest + 1, reg_dest + 1);
28163 else
28165 /* Compute <address> + 4 for the high order load. */
28166 operands[2] = adjust_address (operands[1], SImode, 4);
28168 /* If the computed address is held in the low order register
28169 then load the high order register first, otherwise always
28170 load the low order register first. */
28171 if (REGNO (operands[0]) == REGNO (base))
28173 output_asm_insn ("ldr\t%H0, %2", operands);
28174 output_asm_insn ("ldr\t%0, %1", operands);
28176 else
28178 output_asm_insn ("ldr\t%0, %1", operands);
28179 output_asm_insn ("ldr\t%H0, %2", operands);
28182 break;
28184 case LABEL_REF:
28185 /* With no registers to worry about we can just load the value
28186 directly. */
28187 operands[2] = adjust_address (operands[1], SImode, 4);
28189 output_asm_insn ("ldr\t%H0, %2", operands);
28190 output_asm_insn ("ldr\t%0, %1", operands);
28191 break;
28193 default:
28194 gcc_unreachable ();
28197 return "";
28200 const char *
28201 thumb_output_move_mem_multiple (int n, rtx *operands)
28203 rtx tmp;
28205 switch (n)
28207 case 2:
28208 if (REGNO (operands[4]) > REGNO (operands[5]))
28210 tmp = operands[4];
28211 operands[4] = operands[5];
28212 operands[5] = tmp;
28214 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28215 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28216 break;
28218 case 3:
28219 if (REGNO (operands[4]) > REGNO (operands[5]))
28221 tmp = operands[4];
28222 operands[4] = operands[5];
28223 operands[5] = tmp;
28225 if (REGNO (operands[5]) > REGNO (operands[6]))
28227 tmp = operands[5];
28228 operands[5] = operands[6];
28229 operands[6] = tmp;
28231 if (REGNO (operands[4]) > REGNO (operands[5]))
28233 tmp = operands[4];
28234 operands[4] = operands[5];
28235 operands[5] = tmp;
28238 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28239 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28240 break;
28242 default:
28243 gcc_unreachable ();
28246 return "";
28249 /* Output a call-via instruction for thumb state. */
28250 const char *
28251 thumb_call_via_reg (rtx reg)
28253 int regno = REGNO (reg);
28254 rtx *labelp;
28256 gcc_assert (regno < LR_REGNUM);
28258 /* If we are in the normal text section we can use a single instance
28259 per compilation unit. If we are doing function sections, then we need
28260 an entry per section, since we can't rely on reachability. */
28261 if (in_section == text_section)
28263 thumb_call_reg_needed = 1;
28265 if (thumb_call_via_label[regno] == NULL)
28266 thumb_call_via_label[regno] = gen_label_rtx ();
28267 labelp = thumb_call_via_label + regno;
28269 else
28271 if (cfun->machine->call_via[regno] == NULL)
28272 cfun->machine->call_via[regno] = gen_label_rtx ();
28273 labelp = cfun->machine->call_via + regno;
28276 output_asm_insn ("bl\t%a0", labelp);
28277 return "";
28280 /* Routines for generating rtl. */
28281 void
28282 thumb_expand_movmemqi (rtx *operands)
28284 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28285 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28286 HOST_WIDE_INT len = INTVAL (operands[2]);
28287 HOST_WIDE_INT offset = 0;
28289 while (len >= 12)
28291 emit_insn (gen_movmem12b (out, in, out, in));
28292 len -= 12;
28295 if (len >= 8)
28297 emit_insn (gen_movmem8b (out, in, out, in));
28298 len -= 8;
28301 if (len >= 4)
28303 rtx reg = gen_reg_rtx (SImode);
28304 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28305 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28306 len -= 4;
28307 offset += 4;
28310 if (len >= 2)
28312 rtx reg = gen_reg_rtx (HImode);
28313 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28314 plus_constant (Pmode, in,
28315 offset))));
28316 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28317 offset)),
28318 reg));
28319 len -= 2;
28320 offset += 2;
28323 if (len)
28325 rtx reg = gen_reg_rtx (QImode);
28326 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28327 plus_constant (Pmode, in,
28328 offset))));
28329 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28330 offset)),
28331 reg));
28335 void
28336 thumb_reload_out_hi (rtx *operands)
28338 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28341 /* Handle reading a half-word from memory during reload. */
28342 void
28343 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28345 gcc_unreachable ();
28348 /* Return the length of a function name prefix
28349 that starts with the character 'c'. */
28350 static int
28351 arm_get_strip_length (int c)
28353 switch (c)
28355 ARM_NAME_ENCODING_LENGTHS
28356 default: return 0;
28360 /* Return a pointer to a function's name with any
28361 and all prefix encodings stripped from it. */
28362 const char *
28363 arm_strip_name_encoding (const char *name)
28365 int skip;
28367 while ((skip = arm_get_strip_length (* name)))
28368 name += skip;
28370 return name;
28373 /* If there is a '*' anywhere in the name's prefix, then
28374 emit the stripped name verbatim, otherwise prepend an
28375 underscore if leading underscores are being used. */
28376 void
28377 arm_asm_output_labelref (FILE *stream, const char *name)
28379 int skip;
28380 int verbatim = 0;
28382 while ((skip = arm_get_strip_length (* name)))
28384 verbatim |= (*name == '*');
28385 name += skip;
28388 if (verbatim)
28389 fputs (name, stream);
28390 else
28391 asm_fprintf (stream, "%U%s", name);
28394 /* This function is used to emit an EABI tag and its associated value.
28395 We emit the numerical value of the tag in case the assembler does not
28396 support textual tags. (Eg gas prior to 2.20). If requested we include
28397 the tag name in a comment so that anyone reading the assembler output
28398 will know which tag is being set.
28400 This function is not static because arm-c.c needs it too. */
28402 void
28403 arm_emit_eabi_attribute (const char *name, int num, int val)
28405 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28406 if (flag_verbose_asm || flag_debug_asm)
28407 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28408 asm_fprintf (asm_out_file, "\n");
28411 static void
28412 arm_file_start (void)
28414 int val;
28416 if (TARGET_UNIFIED_ASM)
28417 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28419 if (TARGET_BPABI)
28421 const char *fpu_name;
28422 if (arm_selected_arch)
28424 /* armv7ve doesn't support any extensions. */
28425 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28427 /* Keep backward compatability for assemblers
28428 which don't support armv7ve. */
28429 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28430 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28431 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28432 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28433 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28435 else
28437 const char* pos = strchr (arm_selected_arch->name, '+');
28438 if (pos)
28440 char buf[15];
28441 gcc_assert (strlen (arm_selected_arch->name)
28442 <= sizeof (buf) / sizeof (*pos));
28443 strncpy (buf, arm_selected_arch->name,
28444 (pos - arm_selected_arch->name) * sizeof (*pos));
28445 buf[pos - arm_selected_arch->name] = '\0';
28446 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28447 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28449 else
28450 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28453 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28454 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28455 else
28457 const char* truncated_name
28458 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28459 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28462 if (TARGET_SOFT_FLOAT)
28464 fpu_name = "softvfp";
28466 else
28468 fpu_name = arm_fpu_desc->name;
28469 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28471 if (TARGET_HARD_FLOAT)
28472 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28473 if (TARGET_HARD_FLOAT_ABI)
28474 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28477 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28479 /* Some of these attributes only apply when the corresponding features
28480 are used. However we don't have any easy way of figuring this out.
28481 Conservatively record the setting that would have been used. */
28483 if (flag_rounding_math)
28484 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28486 if (!flag_unsafe_math_optimizations)
28488 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28489 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28491 if (flag_signaling_nans)
28492 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28494 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28495 flag_finite_math_only ? 1 : 3);
28497 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28498 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28499 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28500 flag_short_enums ? 1 : 2);
28502 /* Tag_ABI_optimization_goals. */
28503 if (optimize_size)
28504 val = 4;
28505 else if (optimize >= 2)
28506 val = 2;
28507 else if (optimize)
28508 val = 1;
28509 else
28510 val = 6;
28511 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28513 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28514 unaligned_access);
28516 if (arm_fp16_format)
28517 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28518 (int) arm_fp16_format);
28520 if (arm_lang_output_object_attributes_hook)
28521 arm_lang_output_object_attributes_hook();
28524 default_file_start ();
28527 static void
28528 arm_file_end (void)
28530 int regno;
28532 if (NEED_INDICATE_EXEC_STACK)
28533 /* Add .note.GNU-stack. */
28534 file_end_indicate_exec_stack ();
28536 if (! thumb_call_reg_needed)
28537 return;
28539 switch_to_section (text_section);
28540 asm_fprintf (asm_out_file, "\t.code 16\n");
28541 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28543 for (regno = 0; regno < LR_REGNUM; regno++)
28545 rtx label = thumb_call_via_label[regno];
28547 if (label != 0)
28549 targetm.asm_out.internal_label (asm_out_file, "L",
28550 CODE_LABEL_NUMBER (label));
28551 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28556 #ifndef ARM_PE
28557 /* Symbols in the text segment can be accessed without indirecting via the
28558 constant pool; it may take an extra binary operation, but this is still
28559 faster than indirecting via memory. Don't do this when not optimizing,
28560 since we won't be calculating al of the offsets necessary to do this
28561 simplification. */
28563 static void
28564 arm_encode_section_info (tree decl, rtx rtl, int first)
28566 if (optimize > 0 && TREE_CONSTANT (decl))
28567 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28569 default_encode_section_info (decl, rtl, first);
28571 #endif /* !ARM_PE */
28573 static void
28574 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28576 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28577 && !strcmp (prefix, "L"))
28579 arm_ccfsm_state = 0;
28580 arm_target_insn = NULL;
28582 default_internal_label (stream, prefix, labelno);
28585 /* Output code to add DELTA to the first argument, and then jump
28586 to FUNCTION. Used for C++ multiple inheritance. */
28587 static void
28588 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28589 HOST_WIDE_INT delta,
28590 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28591 tree function)
28593 static int thunk_label = 0;
28594 char label[256];
28595 char labelpc[256];
28596 int mi_delta = delta;
28597 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28598 int shift = 0;
28599 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28600 ? 1 : 0);
28601 if (mi_delta < 0)
28602 mi_delta = - mi_delta;
28604 final_start_function (emit_barrier (), file, 1);
28606 if (TARGET_THUMB1)
28608 int labelno = thunk_label++;
28609 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28610 /* Thunks are entered in arm mode when avaiable. */
28611 if (TARGET_THUMB1_ONLY)
28613 /* push r3 so we can use it as a temporary. */
28614 /* TODO: Omit this save if r3 is not used. */
28615 fputs ("\tpush {r3}\n", file);
28616 fputs ("\tldr\tr3, ", file);
28618 else
28620 fputs ("\tldr\tr12, ", file);
28622 assemble_name (file, label);
28623 fputc ('\n', file);
28624 if (flag_pic)
28626 /* If we are generating PIC, the ldr instruction below loads
28627 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28628 the address of the add + 8, so we have:
28630 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28631 = target + 1.
28633 Note that we have "+ 1" because some versions of GNU ld
28634 don't set the low bit of the result for R_ARM_REL32
28635 relocations against thumb function symbols.
28636 On ARMv6M this is +4, not +8. */
28637 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28638 assemble_name (file, labelpc);
28639 fputs (":\n", file);
28640 if (TARGET_THUMB1_ONLY)
28642 /* This is 2 insns after the start of the thunk, so we know it
28643 is 4-byte aligned. */
28644 fputs ("\tadd\tr3, pc, r3\n", file);
28645 fputs ("\tmov r12, r3\n", file);
28647 else
28648 fputs ("\tadd\tr12, pc, r12\n", file);
28650 else if (TARGET_THUMB1_ONLY)
28651 fputs ("\tmov r12, r3\n", file);
28653 if (TARGET_THUMB1_ONLY)
28655 if (mi_delta > 255)
28657 fputs ("\tldr\tr3, ", file);
28658 assemble_name (file, label);
28659 fputs ("+4\n", file);
28660 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28661 mi_op, this_regno, this_regno);
28663 else if (mi_delta != 0)
28665 /* Thumb1 unified syntax requires s suffix in instruction name when
28666 one of the operands is immediate. */
28667 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28668 mi_op, this_regno, this_regno,
28669 mi_delta);
28672 else
28674 /* TODO: Use movw/movt for large constants when available. */
28675 while (mi_delta != 0)
28677 if ((mi_delta & (3 << shift)) == 0)
28678 shift += 2;
28679 else
28681 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28682 mi_op, this_regno, this_regno,
28683 mi_delta & (0xff << shift));
28684 mi_delta &= ~(0xff << shift);
28685 shift += 8;
28689 if (TARGET_THUMB1)
28691 if (TARGET_THUMB1_ONLY)
28692 fputs ("\tpop\t{r3}\n", file);
28694 fprintf (file, "\tbx\tr12\n");
28695 ASM_OUTPUT_ALIGN (file, 2);
28696 assemble_name (file, label);
28697 fputs (":\n", file);
28698 if (flag_pic)
28700 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28701 rtx tem = XEXP (DECL_RTL (function), 0);
28702 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28703 pipeline offset is four rather than eight. Adjust the offset
28704 accordingly. */
28705 tem = plus_constant (GET_MODE (tem), tem,
28706 TARGET_THUMB1_ONLY ? -3 : -7);
28707 tem = gen_rtx_MINUS (GET_MODE (tem),
28708 tem,
28709 gen_rtx_SYMBOL_REF (Pmode,
28710 ggc_strdup (labelpc)));
28711 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28713 else
28714 /* Output ".word .LTHUNKn". */
28715 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28717 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28718 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28720 else
28722 fputs ("\tb\t", file);
28723 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28724 if (NEED_PLT_RELOC)
28725 fputs ("(PLT)", file);
28726 fputc ('\n', file);
28729 final_end_function ();
28733 arm_emit_vector_const (FILE *file, rtx x)
28735 int i;
28736 const char * pattern;
28738 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28740 switch (GET_MODE (x))
28742 case V2SImode: pattern = "%08x"; break;
28743 case V4HImode: pattern = "%04x"; break;
28744 case V8QImode: pattern = "%02x"; break;
28745 default: gcc_unreachable ();
28748 fprintf (file, "0x");
28749 for (i = CONST_VECTOR_NUNITS (x); i--;)
28751 rtx element;
28753 element = CONST_VECTOR_ELT (x, i);
28754 fprintf (file, pattern, INTVAL (element));
28757 return 1;
28760 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28761 HFmode constant pool entries are actually loaded with ldr. */
28762 void
28763 arm_emit_fp16_const (rtx c)
28765 REAL_VALUE_TYPE r;
28766 long bits;
28768 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28769 bits = real_to_target (NULL, &r, HFmode);
28770 if (WORDS_BIG_ENDIAN)
28771 assemble_zeros (2);
28772 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28773 if (!WORDS_BIG_ENDIAN)
28774 assemble_zeros (2);
28777 const char *
28778 arm_output_load_gr (rtx *operands)
28780 rtx reg;
28781 rtx offset;
28782 rtx wcgr;
28783 rtx sum;
28785 if (!MEM_P (operands [1])
28786 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28787 || !REG_P (reg = XEXP (sum, 0))
28788 || !CONST_INT_P (offset = XEXP (sum, 1))
28789 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28790 return "wldrw%?\t%0, %1";
28792 /* Fix up an out-of-range load of a GR register. */
28793 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28794 wcgr = operands[0];
28795 operands[0] = reg;
28796 output_asm_insn ("ldr%?\t%0, %1", operands);
28798 operands[0] = wcgr;
28799 operands[1] = reg;
28800 output_asm_insn ("tmcr%?\t%0, %1", operands);
28801 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28803 return "";
28806 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28808 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28809 named arg and all anonymous args onto the stack.
28810 XXX I know the prologue shouldn't be pushing registers, but it is faster
28811 that way. */
28813 static void
28814 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28815 machine_mode mode,
28816 tree type,
28817 int *pretend_size,
28818 int second_time ATTRIBUTE_UNUSED)
28820 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28821 int nregs;
28823 cfun->machine->uses_anonymous_args = 1;
28824 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28826 nregs = pcum->aapcs_ncrn;
28827 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28828 nregs++;
28830 else
28831 nregs = pcum->nregs;
28833 if (nregs < NUM_ARG_REGS)
28834 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28837 /* We can't rely on the caller doing the proper promotion when
28838 using APCS or ATPCS. */
28840 static bool
28841 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28843 return !TARGET_AAPCS_BASED;
28846 static machine_mode
28847 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28848 machine_mode mode,
28849 int *punsignedp ATTRIBUTE_UNUSED,
28850 const_tree fntype ATTRIBUTE_UNUSED,
28851 int for_return ATTRIBUTE_UNUSED)
28853 if (GET_MODE_CLASS (mode) == MODE_INT
28854 && GET_MODE_SIZE (mode) < 4)
28855 return SImode;
28857 return mode;
28860 /* AAPCS based ABIs use short enums by default. */
28862 static bool
28863 arm_default_short_enums (void)
28865 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28869 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28871 static bool
28872 arm_align_anon_bitfield (void)
28874 return TARGET_AAPCS_BASED;
28878 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28880 static tree
28881 arm_cxx_guard_type (void)
28883 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28887 /* The EABI says test the least significant bit of a guard variable. */
28889 static bool
28890 arm_cxx_guard_mask_bit (void)
28892 return TARGET_AAPCS_BASED;
28896 /* The EABI specifies that all array cookies are 8 bytes long. */
28898 static tree
28899 arm_get_cookie_size (tree type)
28901 tree size;
28903 if (!TARGET_AAPCS_BASED)
28904 return default_cxx_get_cookie_size (type);
28906 size = build_int_cst (sizetype, 8);
28907 return size;
28911 /* The EABI says that array cookies should also contain the element size. */
28913 static bool
28914 arm_cookie_has_size (void)
28916 return TARGET_AAPCS_BASED;
28920 /* The EABI says constructors and destructors should return a pointer to
28921 the object constructed/destroyed. */
28923 static bool
28924 arm_cxx_cdtor_returns_this (void)
28926 return TARGET_AAPCS_BASED;
28929 /* The EABI says that an inline function may never be the key
28930 method. */
28932 static bool
28933 arm_cxx_key_method_may_be_inline (void)
28935 return !TARGET_AAPCS_BASED;
28938 static void
28939 arm_cxx_determine_class_data_visibility (tree decl)
28941 if (!TARGET_AAPCS_BASED
28942 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28943 return;
28945 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28946 is exported. However, on systems without dynamic vague linkage,
28947 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28948 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28949 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28950 else
28951 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28952 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28955 static bool
28956 arm_cxx_class_data_always_comdat (void)
28958 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28959 vague linkage if the class has no key function. */
28960 return !TARGET_AAPCS_BASED;
28964 /* The EABI says __aeabi_atexit should be used to register static
28965 destructors. */
28967 static bool
28968 arm_cxx_use_aeabi_atexit (void)
28970 return TARGET_AAPCS_BASED;
28974 void
28975 arm_set_return_address (rtx source, rtx scratch)
28977 arm_stack_offsets *offsets;
28978 HOST_WIDE_INT delta;
28979 rtx addr;
28980 unsigned long saved_regs;
28982 offsets = arm_get_frame_offsets ();
28983 saved_regs = offsets->saved_regs_mask;
28985 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28986 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28987 else
28989 if (frame_pointer_needed)
28990 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28991 else
28993 /* LR will be the first saved register. */
28994 delta = offsets->outgoing_args - (offsets->frame + 4);
28997 if (delta >= 4096)
28999 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29000 GEN_INT (delta & ~4095)));
29001 addr = scratch;
29002 delta &= 4095;
29004 else
29005 addr = stack_pointer_rtx;
29007 addr = plus_constant (Pmode, addr, delta);
29009 /* The store needs to be marked as frame related in order to prevent
29010 DSE from deleting it as dead if it is based on fp. */
29011 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29012 RTX_FRAME_RELATED_P (insn) = 1;
29013 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29018 void
29019 thumb_set_return_address (rtx source, rtx scratch)
29021 arm_stack_offsets *offsets;
29022 HOST_WIDE_INT delta;
29023 HOST_WIDE_INT limit;
29024 int reg;
29025 rtx addr;
29026 unsigned long mask;
29028 emit_use (source);
29030 offsets = arm_get_frame_offsets ();
29031 mask = offsets->saved_regs_mask;
29032 if (mask & (1 << LR_REGNUM))
29034 limit = 1024;
29035 /* Find the saved regs. */
29036 if (frame_pointer_needed)
29038 delta = offsets->soft_frame - offsets->saved_args;
29039 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29040 if (TARGET_THUMB1)
29041 limit = 128;
29043 else
29045 delta = offsets->outgoing_args - offsets->saved_args;
29046 reg = SP_REGNUM;
29048 /* Allow for the stack frame. */
29049 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29050 delta -= 16;
29051 /* The link register is always the first saved register. */
29052 delta -= 4;
29054 /* Construct the address. */
29055 addr = gen_rtx_REG (SImode, reg);
29056 if (delta > limit)
29058 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29059 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29060 addr = scratch;
29062 else
29063 addr = plus_constant (Pmode, addr, delta);
29065 /* The store needs to be marked as frame related in order to prevent
29066 DSE from deleting it as dead if it is based on fp. */
29067 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29068 RTX_FRAME_RELATED_P (insn) = 1;
29069 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29071 else
29072 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29075 /* Implements target hook vector_mode_supported_p. */
29076 bool
29077 arm_vector_mode_supported_p (machine_mode mode)
29079 /* Neon also supports V2SImode, etc. listed in the clause below. */
29080 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29081 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29082 return true;
29084 if ((TARGET_NEON || TARGET_IWMMXT)
29085 && ((mode == V2SImode)
29086 || (mode == V4HImode)
29087 || (mode == V8QImode)))
29088 return true;
29090 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29091 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29092 || mode == V2HAmode))
29093 return true;
29095 return false;
29098 /* Implements target hook array_mode_supported_p. */
29100 static bool
29101 arm_array_mode_supported_p (machine_mode mode,
29102 unsigned HOST_WIDE_INT nelems)
29104 if (TARGET_NEON
29105 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29106 && (nelems >= 2 && nelems <= 4))
29107 return true;
29109 return false;
29112 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29113 registers when autovectorizing for Neon, at least until multiple vector
29114 widths are supported properly by the middle-end. */
29116 static machine_mode
29117 arm_preferred_simd_mode (machine_mode mode)
29119 if (TARGET_NEON)
29120 switch (mode)
29122 case SFmode:
29123 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29124 case SImode:
29125 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29126 case HImode:
29127 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29128 case QImode:
29129 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29130 case DImode:
29131 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29132 return V2DImode;
29133 break;
29135 default:;
29138 if (TARGET_REALLY_IWMMXT)
29139 switch (mode)
29141 case SImode:
29142 return V2SImode;
29143 case HImode:
29144 return V4HImode;
29145 case QImode:
29146 return V8QImode;
29148 default:;
29151 return word_mode;
29154 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29156 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29157 using r0-r4 for function arguments, r7 for the stack frame and don't have
29158 enough left over to do doubleword arithmetic. For Thumb-2 all the
29159 potentially problematic instructions accept high registers so this is not
29160 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29161 that require many low registers. */
29162 static bool
29163 arm_class_likely_spilled_p (reg_class_t rclass)
29165 if ((TARGET_THUMB1 && rclass == LO_REGS)
29166 || rclass == CC_REG)
29167 return true;
29169 return false;
29172 /* Implements target hook small_register_classes_for_mode_p. */
29173 bool
29174 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29176 return TARGET_THUMB1;
29179 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29180 ARM insns and therefore guarantee that the shift count is modulo 256.
29181 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29182 guarantee no particular behavior for out-of-range counts. */
29184 static unsigned HOST_WIDE_INT
29185 arm_shift_truncation_mask (machine_mode mode)
29187 return mode == SImode ? 255 : 0;
29191 /* Map internal gcc register numbers to DWARF2 register numbers. */
29193 unsigned int
29194 arm_dbx_register_number (unsigned int regno)
29196 if (regno < 16)
29197 return regno;
29199 if (IS_VFP_REGNUM (regno))
29201 /* See comment in arm_dwarf_register_span. */
29202 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29203 return 64 + regno - FIRST_VFP_REGNUM;
29204 else
29205 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29208 if (IS_IWMMXT_GR_REGNUM (regno))
29209 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29211 if (IS_IWMMXT_REGNUM (regno))
29212 return 112 + regno - FIRST_IWMMXT_REGNUM;
29214 gcc_unreachable ();
29217 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29218 GCC models tham as 64 32-bit registers, so we need to describe this to
29219 the DWARF generation code. Other registers can use the default. */
29220 static rtx
29221 arm_dwarf_register_span (rtx rtl)
29223 machine_mode mode;
29224 unsigned regno;
29225 rtx parts[16];
29226 int nregs;
29227 int i;
29229 regno = REGNO (rtl);
29230 if (!IS_VFP_REGNUM (regno))
29231 return NULL_RTX;
29233 /* XXX FIXME: The EABI defines two VFP register ranges:
29234 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29235 256-287: D0-D31
29236 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29237 corresponding D register. Until GDB supports this, we shall use the
29238 legacy encodings. We also use these encodings for D0-D15 for
29239 compatibility with older debuggers. */
29240 mode = GET_MODE (rtl);
29241 if (GET_MODE_SIZE (mode) < 8)
29242 return NULL_RTX;
29244 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29246 nregs = GET_MODE_SIZE (mode) / 4;
29247 for (i = 0; i < nregs; i += 2)
29248 if (TARGET_BIG_END)
29250 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29251 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29253 else
29255 parts[i] = gen_rtx_REG (SImode, regno + i);
29256 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29259 else
29261 nregs = GET_MODE_SIZE (mode) / 8;
29262 for (i = 0; i < nregs; i++)
29263 parts[i] = gen_rtx_REG (DImode, regno + i);
29266 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29269 #if ARM_UNWIND_INFO
29270 /* Emit unwind directives for a store-multiple instruction or stack pointer
29271 push during alignment.
29272 These should only ever be generated by the function prologue code, so
29273 expect them to have a particular form.
29274 The store-multiple instruction sometimes pushes pc as the last register,
29275 although it should not be tracked into unwind information, or for -Os
29276 sometimes pushes some dummy registers before first register that needs
29277 to be tracked in unwind information; such dummy registers are there just
29278 to avoid separate stack adjustment, and will not be restored in the
29279 epilogue. */
29281 static void
29282 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29284 int i;
29285 HOST_WIDE_INT offset;
29286 HOST_WIDE_INT nregs;
29287 int reg_size;
29288 unsigned reg;
29289 unsigned lastreg;
29290 unsigned padfirst = 0, padlast = 0;
29291 rtx e;
29293 e = XVECEXP (p, 0, 0);
29294 gcc_assert (GET_CODE (e) == SET);
29296 /* First insn will adjust the stack pointer. */
29297 gcc_assert (GET_CODE (e) == SET
29298 && REG_P (SET_DEST (e))
29299 && REGNO (SET_DEST (e)) == SP_REGNUM
29300 && GET_CODE (SET_SRC (e)) == PLUS);
29302 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29303 nregs = XVECLEN (p, 0) - 1;
29304 gcc_assert (nregs);
29306 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29307 if (reg < 16)
29309 /* For -Os dummy registers can be pushed at the beginning to
29310 avoid separate stack pointer adjustment. */
29311 e = XVECEXP (p, 0, 1);
29312 e = XEXP (SET_DEST (e), 0);
29313 if (GET_CODE (e) == PLUS)
29314 padfirst = INTVAL (XEXP (e, 1));
29315 gcc_assert (padfirst == 0 || optimize_size);
29316 /* The function prologue may also push pc, but not annotate it as it is
29317 never restored. We turn this into a stack pointer adjustment. */
29318 e = XVECEXP (p, 0, nregs);
29319 e = XEXP (SET_DEST (e), 0);
29320 if (GET_CODE (e) == PLUS)
29321 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29322 else
29323 padlast = offset - 4;
29324 gcc_assert (padlast == 0 || padlast == 4);
29325 if (padlast == 4)
29326 fprintf (asm_out_file, "\t.pad #4\n");
29327 reg_size = 4;
29328 fprintf (asm_out_file, "\t.save {");
29330 else if (IS_VFP_REGNUM (reg))
29332 reg_size = 8;
29333 fprintf (asm_out_file, "\t.vsave {");
29335 else
29336 /* Unknown register type. */
29337 gcc_unreachable ();
29339 /* If the stack increment doesn't match the size of the saved registers,
29340 something has gone horribly wrong. */
29341 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29343 offset = padfirst;
29344 lastreg = 0;
29345 /* The remaining insns will describe the stores. */
29346 for (i = 1; i <= nregs; i++)
29348 /* Expect (set (mem <addr>) (reg)).
29349 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29350 e = XVECEXP (p, 0, i);
29351 gcc_assert (GET_CODE (e) == SET
29352 && MEM_P (SET_DEST (e))
29353 && REG_P (SET_SRC (e)));
29355 reg = REGNO (SET_SRC (e));
29356 gcc_assert (reg >= lastreg);
29358 if (i != 1)
29359 fprintf (asm_out_file, ", ");
29360 /* We can't use %r for vfp because we need to use the
29361 double precision register names. */
29362 if (IS_VFP_REGNUM (reg))
29363 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29364 else
29365 asm_fprintf (asm_out_file, "%r", reg);
29367 #ifdef ENABLE_CHECKING
29368 /* Check that the addresses are consecutive. */
29369 e = XEXP (SET_DEST (e), 0);
29370 if (GET_CODE (e) == PLUS)
29371 gcc_assert (REG_P (XEXP (e, 0))
29372 && REGNO (XEXP (e, 0)) == SP_REGNUM
29373 && CONST_INT_P (XEXP (e, 1))
29374 && offset == INTVAL (XEXP (e, 1)));
29375 else
29376 gcc_assert (i == 1
29377 && REG_P (e)
29378 && REGNO (e) == SP_REGNUM);
29379 offset += reg_size;
29380 #endif
29382 fprintf (asm_out_file, "}\n");
29383 if (padfirst)
29384 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29387 /* Emit unwind directives for a SET. */
29389 static void
29390 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29392 rtx e0;
29393 rtx e1;
29394 unsigned reg;
29396 e0 = XEXP (p, 0);
29397 e1 = XEXP (p, 1);
29398 switch (GET_CODE (e0))
29400 case MEM:
29401 /* Pushing a single register. */
29402 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29403 || !REG_P (XEXP (XEXP (e0, 0), 0))
29404 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29405 abort ();
29407 asm_fprintf (asm_out_file, "\t.save ");
29408 if (IS_VFP_REGNUM (REGNO (e1)))
29409 asm_fprintf(asm_out_file, "{d%d}\n",
29410 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29411 else
29412 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29413 break;
29415 case REG:
29416 if (REGNO (e0) == SP_REGNUM)
29418 /* A stack increment. */
29419 if (GET_CODE (e1) != PLUS
29420 || !REG_P (XEXP (e1, 0))
29421 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29422 || !CONST_INT_P (XEXP (e1, 1)))
29423 abort ();
29425 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29426 -INTVAL (XEXP (e1, 1)));
29428 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29430 HOST_WIDE_INT offset;
29432 if (GET_CODE (e1) == PLUS)
29434 if (!REG_P (XEXP (e1, 0))
29435 || !CONST_INT_P (XEXP (e1, 1)))
29436 abort ();
29437 reg = REGNO (XEXP (e1, 0));
29438 offset = INTVAL (XEXP (e1, 1));
29439 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29440 HARD_FRAME_POINTER_REGNUM, reg,
29441 offset);
29443 else if (REG_P (e1))
29445 reg = REGNO (e1);
29446 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29447 HARD_FRAME_POINTER_REGNUM, reg);
29449 else
29450 abort ();
29452 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29454 /* Move from sp to reg. */
29455 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29457 else if (GET_CODE (e1) == PLUS
29458 && REG_P (XEXP (e1, 0))
29459 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29460 && CONST_INT_P (XEXP (e1, 1)))
29462 /* Set reg to offset from sp. */
29463 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29464 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29466 else
29467 abort ();
29468 break;
29470 default:
29471 abort ();
29476 /* Emit unwind directives for the given insn. */
29478 static void
29479 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29481 rtx note, pat;
29482 bool handled_one = false;
29484 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29485 return;
29487 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29488 && (TREE_NOTHROW (current_function_decl)
29489 || crtl->all_throwers_are_sibcalls))
29490 return;
29492 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29493 return;
29495 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29497 switch (REG_NOTE_KIND (note))
29499 case REG_FRAME_RELATED_EXPR:
29500 pat = XEXP (note, 0);
29501 goto found;
29503 case REG_CFA_REGISTER:
29504 pat = XEXP (note, 0);
29505 if (pat == NULL)
29507 pat = PATTERN (insn);
29508 if (GET_CODE (pat) == PARALLEL)
29509 pat = XVECEXP (pat, 0, 0);
29512 /* Only emitted for IS_STACKALIGN re-alignment. */
29514 rtx dest, src;
29515 unsigned reg;
29517 src = SET_SRC (pat);
29518 dest = SET_DEST (pat);
29520 gcc_assert (src == stack_pointer_rtx);
29521 reg = REGNO (dest);
29522 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29523 reg + 0x90, reg);
29525 handled_one = true;
29526 break;
29528 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29529 to get correct dwarf information for shrink-wrap. We should not
29530 emit unwind information for it because these are used either for
29531 pretend arguments or notes to adjust sp and restore registers from
29532 stack. */
29533 case REG_CFA_DEF_CFA:
29534 case REG_CFA_ADJUST_CFA:
29535 case REG_CFA_RESTORE:
29536 return;
29538 case REG_CFA_EXPRESSION:
29539 case REG_CFA_OFFSET:
29540 /* ??? Only handling here what we actually emit. */
29541 gcc_unreachable ();
29543 default:
29544 break;
29547 if (handled_one)
29548 return;
29549 pat = PATTERN (insn);
29550 found:
29552 switch (GET_CODE (pat))
29554 case SET:
29555 arm_unwind_emit_set (asm_out_file, pat);
29556 break;
29558 case SEQUENCE:
29559 /* Store multiple. */
29560 arm_unwind_emit_sequence (asm_out_file, pat);
29561 break;
29563 default:
29564 abort();
29569 /* Output a reference from a function exception table to the type_info
29570 object X. The EABI specifies that the symbol should be relocated by
29571 an R_ARM_TARGET2 relocation. */
29573 static bool
29574 arm_output_ttype (rtx x)
29576 fputs ("\t.word\t", asm_out_file);
29577 output_addr_const (asm_out_file, x);
29578 /* Use special relocations for symbol references. */
29579 if (!CONST_INT_P (x))
29580 fputs ("(TARGET2)", asm_out_file);
29581 fputc ('\n', asm_out_file);
29583 return TRUE;
29586 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29588 static void
29589 arm_asm_emit_except_personality (rtx personality)
29591 fputs ("\t.personality\t", asm_out_file);
29592 output_addr_const (asm_out_file, personality);
29593 fputc ('\n', asm_out_file);
29596 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29598 static void
29599 arm_asm_init_sections (void)
29601 exception_section = get_unnamed_section (0, output_section_asm_op,
29602 "\t.handlerdata");
29604 #endif /* ARM_UNWIND_INFO */
29606 /* Output unwind directives for the start/end of a function. */
29608 void
29609 arm_output_fn_unwind (FILE * f, bool prologue)
29611 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29612 return;
29614 if (prologue)
29615 fputs ("\t.fnstart\n", f);
29616 else
29618 /* If this function will never be unwound, then mark it as such.
29619 The came condition is used in arm_unwind_emit to suppress
29620 the frame annotations. */
29621 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29622 && (TREE_NOTHROW (current_function_decl)
29623 || crtl->all_throwers_are_sibcalls))
29624 fputs("\t.cantunwind\n", f);
29626 fputs ("\t.fnend\n", f);
29630 static bool
29631 arm_emit_tls_decoration (FILE *fp, rtx x)
29633 enum tls_reloc reloc;
29634 rtx val;
29636 val = XVECEXP (x, 0, 0);
29637 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29639 output_addr_const (fp, val);
29641 switch (reloc)
29643 case TLS_GD32:
29644 fputs ("(tlsgd)", fp);
29645 break;
29646 case TLS_LDM32:
29647 fputs ("(tlsldm)", fp);
29648 break;
29649 case TLS_LDO32:
29650 fputs ("(tlsldo)", fp);
29651 break;
29652 case TLS_IE32:
29653 fputs ("(gottpoff)", fp);
29654 break;
29655 case TLS_LE32:
29656 fputs ("(tpoff)", fp);
29657 break;
29658 case TLS_DESCSEQ:
29659 fputs ("(tlsdesc)", fp);
29660 break;
29661 default:
29662 gcc_unreachable ();
29665 switch (reloc)
29667 case TLS_GD32:
29668 case TLS_LDM32:
29669 case TLS_IE32:
29670 case TLS_DESCSEQ:
29671 fputs (" + (. - ", fp);
29672 output_addr_const (fp, XVECEXP (x, 0, 2));
29673 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29674 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29675 output_addr_const (fp, XVECEXP (x, 0, 3));
29676 fputc (')', fp);
29677 break;
29678 default:
29679 break;
29682 return TRUE;
29685 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29687 static void
29688 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29690 gcc_assert (size == 4);
29691 fputs ("\t.word\t", file);
29692 output_addr_const (file, x);
29693 fputs ("(tlsldo)", file);
29696 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29698 static bool
29699 arm_output_addr_const_extra (FILE *fp, rtx x)
29701 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29702 return arm_emit_tls_decoration (fp, x);
29703 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29705 char label[256];
29706 int labelno = INTVAL (XVECEXP (x, 0, 0));
29708 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29709 assemble_name_raw (fp, label);
29711 return TRUE;
29713 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29715 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29716 if (GOT_PCREL)
29717 fputs ("+.", fp);
29718 fputs ("-(", fp);
29719 output_addr_const (fp, XVECEXP (x, 0, 0));
29720 fputc (')', fp);
29721 return TRUE;
29723 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29725 output_addr_const (fp, XVECEXP (x, 0, 0));
29726 if (GOT_PCREL)
29727 fputs ("+.", fp);
29728 fputs ("-(", fp);
29729 output_addr_const (fp, XVECEXP (x, 0, 1));
29730 fputc (')', fp);
29731 return TRUE;
29733 else if (GET_CODE (x) == CONST_VECTOR)
29734 return arm_emit_vector_const (fp, x);
29736 return FALSE;
29739 /* Output assembly for a shift instruction.
29740 SET_FLAGS determines how the instruction modifies the condition codes.
29741 0 - Do not set condition codes.
29742 1 - Set condition codes.
29743 2 - Use smallest instruction. */
29744 const char *
29745 arm_output_shift(rtx * operands, int set_flags)
29747 char pattern[100];
29748 static const char flag_chars[3] = {'?', '.', '!'};
29749 const char *shift;
29750 HOST_WIDE_INT val;
29751 char c;
29753 c = flag_chars[set_flags];
29754 if (TARGET_UNIFIED_ASM)
29756 shift = shift_op(operands[3], &val);
29757 if (shift)
29759 if (val != -1)
29760 operands[2] = GEN_INT(val);
29761 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29763 else
29764 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29766 else
29767 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29768 output_asm_insn (pattern, operands);
29769 return "";
29772 /* Output assembly for a WMMX immediate shift instruction. */
29773 const char *
29774 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29776 int shift = INTVAL (operands[2]);
29777 char templ[50];
29778 machine_mode opmode = GET_MODE (operands[0]);
29780 gcc_assert (shift >= 0);
29782 /* If the shift value in the register versions is > 63 (for D qualifier),
29783 31 (for W qualifier) or 15 (for H qualifier). */
29784 if (((opmode == V4HImode) && (shift > 15))
29785 || ((opmode == V2SImode) && (shift > 31))
29786 || ((opmode == DImode) && (shift > 63)))
29788 if (wror_or_wsra)
29790 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29791 output_asm_insn (templ, operands);
29792 if (opmode == DImode)
29794 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29795 output_asm_insn (templ, operands);
29798 else
29800 /* The destination register will contain all zeros. */
29801 sprintf (templ, "wzero\t%%0");
29802 output_asm_insn (templ, operands);
29804 return "";
29807 if ((opmode == DImode) && (shift > 32))
29809 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29810 output_asm_insn (templ, operands);
29811 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29812 output_asm_insn (templ, operands);
29814 else
29816 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29817 output_asm_insn (templ, operands);
29819 return "";
29822 /* Output assembly for a WMMX tinsr instruction. */
29823 const char *
29824 arm_output_iwmmxt_tinsr (rtx *operands)
29826 int mask = INTVAL (operands[3]);
29827 int i;
29828 char templ[50];
29829 int units = mode_nunits[GET_MODE (operands[0])];
29830 gcc_assert ((mask & (mask - 1)) == 0);
29831 for (i = 0; i < units; ++i)
29833 if ((mask & 0x01) == 1)
29835 break;
29837 mask >>= 1;
29839 gcc_assert (i < units);
29841 switch (GET_MODE (operands[0]))
29843 case V8QImode:
29844 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29845 break;
29846 case V4HImode:
29847 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29848 break;
29849 case V2SImode:
29850 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29851 break;
29852 default:
29853 gcc_unreachable ();
29854 break;
29856 output_asm_insn (templ, operands);
29858 return "";
29861 /* Output a Thumb-1 casesi dispatch sequence. */
29862 const char *
29863 thumb1_output_casesi (rtx *operands)
29865 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29867 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29869 switch (GET_MODE(diff_vec))
29871 case QImode:
29872 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29873 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29874 case HImode:
29875 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29876 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29877 case SImode:
29878 return "bl\t%___gnu_thumb1_case_si";
29879 default:
29880 gcc_unreachable ();
29884 /* Output a Thumb-2 casesi instruction. */
29885 const char *
29886 thumb2_output_casesi (rtx *operands)
29888 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29890 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29892 output_asm_insn ("cmp\t%0, %1", operands);
29893 output_asm_insn ("bhi\t%l3", operands);
29894 switch (GET_MODE(diff_vec))
29896 case QImode:
29897 return "tbb\t[%|pc, %0]";
29898 case HImode:
29899 return "tbh\t[%|pc, %0, lsl #1]";
29900 case SImode:
29901 if (flag_pic)
29903 output_asm_insn ("adr\t%4, %l2", operands);
29904 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29905 output_asm_insn ("add\t%4, %4, %5", operands);
29906 return "bx\t%4";
29908 else
29910 output_asm_insn ("adr\t%4, %l2", operands);
29911 return "ldr\t%|pc, [%4, %0, lsl #2]";
29913 default:
29914 gcc_unreachable ();
29918 /* Most ARM cores are single issue, but some newer ones can dual issue.
29919 The scheduler descriptions rely on this being correct. */
29920 static int
29921 arm_issue_rate (void)
29923 switch (arm_tune)
29925 case cortexa15:
29926 case cortexa57:
29927 return 3;
29929 case cortexr4:
29930 case cortexr4f:
29931 case cortexr5:
29932 case genericv7a:
29933 case cortexa5:
29934 case cortexa7:
29935 case cortexa8:
29936 case cortexa9:
29937 case cortexa12:
29938 case cortexa53:
29939 case fa726te:
29940 case marvell_pj4:
29941 return 2;
29943 default:
29944 return 1;
29948 /* A table and a function to perform ARM-specific name mangling for
29949 NEON vector types in order to conform to the AAPCS (see "Procedure
29950 Call Standard for the ARM Architecture", Appendix A). To qualify
29951 for emission with the mangled names defined in that document, a
29952 vector type must not only be of the correct mode but also be
29953 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29954 typedef struct
29956 machine_mode mode;
29957 const char *element_type_name;
29958 const char *aapcs_name;
29959 } arm_mangle_map_entry;
29961 static arm_mangle_map_entry arm_mangle_map[] = {
29962 /* 64-bit containerized types. */
29963 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29964 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29965 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29966 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29967 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29968 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29969 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29970 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29971 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29972 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29974 /* 128-bit containerized types. */
29975 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29976 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29977 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29978 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29979 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29980 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29981 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29982 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29983 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29984 { VOIDmode, NULL, NULL }
29987 const char *
29988 arm_mangle_type (const_tree type)
29990 arm_mangle_map_entry *pos = arm_mangle_map;
29992 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29993 has to be managled as if it is in the "std" namespace. */
29994 if (TARGET_AAPCS_BASED
29995 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29996 return "St9__va_list";
29998 /* Half-precision float. */
29999 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30000 return "Dh";
30002 if (TREE_CODE (type) != VECTOR_TYPE)
30003 return NULL;
30005 /* Check the mode of the vector type, and the name of the vector
30006 element type, against the table. */
30007 while (pos->mode != VOIDmode)
30009 tree elt_type = TREE_TYPE (type);
30011 if (pos->mode == TYPE_MODE (type)
30012 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
30013 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
30014 pos->element_type_name))
30015 return pos->aapcs_name;
30017 pos++;
30020 /* Use the default mangling for unrecognized (possibly user-defined)
30021 vector types. */
30022 return NULL;
30025 /* Order of allocation of core registers for Thumb: this allocation is
30026 written over the corresponding initial entries of the array
30027 initialized with REG_ALLOC_ORDER. We allocate all low registers
30028 first. Saving and restoring a low register is usually cheaper than
30029 using a call-clobbered high register. */
30031 static const int thumb_core_reg_alloc_order[] =
30033 3, 2, 1, 0, 4, 5, 6, 7,
30034 14, 12, 8, 9, 10, 11
30037 /* Adjust register allocation order when compiling for Thumb. */
30039 void
30040 arm_order_regs_for_local_alloc (void)
30042 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30043 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30044 if (TARGET_THUMB)
30045 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30046 sizeof (thumb_core_reg_alloc_order));
30049 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30051 bool
30052 arm_frame_pointer_required (void)
30054 return (cfun->has_nonlocal_label
30055 || SUBTARGET_FRAME_POINTER_REQUIRED
30056 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30059 /* Only thumb1 can't support conditional execution, so return true if
30060 the target is not thumb1. */
30061 static bool
30062 arm_have_conditional_execution (void)
30064 return !TARGET_THUMB1;
30067 tree
30068 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30070 machine_mode in_mode, out_mode;
30071 int in_n, out_n;
30072 bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30074 if (TREE_CODE (type_out) != VECTOR_TYPE
30075 || TREE_CODE (type_in) != VECTOR_TYPE)
30076 return NULL_TREE;
30078 out_mode = TYPE_MODE (TREE_TYPE (type_out));
30079 out_n = TYPE_VECTOR_SUBPARTS (type_out);
30080 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30081 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30083 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30084 decl of the vectorized builtin for the appropriate vector mode.
30085 NULL_TREE is returned if no such builtin is available. */
30086 #undef ARM_CHECK_BUILTIN_MODE
30087 #define ARM_CHECK_BUILTIN_MODE(C) \
30088 (TARGET_NEON && TARGET_FPU_ARMV8 \
30089 && flag_unsafe_math_optimizations \
30090 && ARM_CHECK_BUILTIN_MODE_1 (C))
30092 #undef ARM_CHECK_BUILTIN_MODE_1
30093 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30094 (out_mode == SFmode && out_n == C \
30095 && in_mode == SFmode && in_n == C)
30097 #undef ARM_FIND_VRINT_VARIANT
30098 #define ARM_FIND_VRINT_VARIANT(N) \
30099 (ARM_CHECK_BUILTIN_MODE (2) \
30100 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30101 : (ARM_CHECK_BUILTIN_MODE (4) \
30102 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30103 : NULL_TREE))
30105 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30107 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30108 switch (fn)
30110 case BUILT_IN_FLOORF:
30111 return ARM_FIND_VRINT_VARIANT (vrintm);
30112 case BUILT_IN_CEILF:
30113 return ARM_FIND_VRINT_VARIANT (vrintp);
30114 case BUILT_IN_TRUNCF:
30115 return ARM_FIND_VRINT_VARIANT (vrintz);
30116 case BUILT_IN_ROUNDF:
30117 return ARM_FIND_VRINT_VARIANT (vrinta);
30118 #undef ARM_CHECK_BUILTIN_MODE_1
30119 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30120 (out_mode == SImode && out_n == C \
30121 && in_mode == SFmode && in_n == C)
30123 #define ARM_FIND_VCVT_VARIANT(N) \
30124 (ARM_CHECK_BUILTIN_MODE (2) \
30125 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30126 : (ARM_CHECK_BUILTIN_MODE (4) \
30127 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30128 : NULL_TREE))
30130 #define ARM_FIND_VCVTU_VARIANT(N) \
30131 (ARM_CHECK_BUILTIN_MODE (2) \
30132 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30133 : (ARM_CHECK_BUILTIN_MODE (4) \
30134 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30135 : NULL_TREE))
30136 case BUILT_IN_LROUNDF:
30137 return out_unsigned_p
30138 ? ARM_FIND_VCVTU_VARIANT (vcvta)
30139 : ARM_FIND_VCVT_VARIANT (vcvta);
30140 case BUILT_IN_LCEILF:
30141 return out_unsigned_p
30142 ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30143 : ARM_FIND_VCVT_VARIANT (vcvtp);
30144 case BUILT_IN_LFLOORF:
30145 return out_unsigned_p
30146 ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30147 : ARM_FIND_VCVT_VARIANT (vcvtm);
30148 #undef ARM_CHECK_BUILTIN_MODE
30149 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30150 (out_mode == N##mode && out_n == C \
30151 && in_mode == N##mode && in_n == C)
30152 case BUILT_IN_BSWAP16:
30153 if (ARM_CHECK_BUILTIN_MODE (4, HI))
30154 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30155 else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30156 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30157 else
30158 return NULL_TREE;
30159 case BUILT_IN_BSWAP32:
30160 if (ARM_CHECK_BUILTIN_MODE (2, SI))
30161 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30162 else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30163 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30164 else
30165 return NULL_TREE;
30166 case BUILT_IN_BSWAP64:
30167 if (ARM_CHECK_BUILTIN_MODE (2, DI))
30168 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30169 else
30170 return NULL_TREE;
30171 case BUILT_IN_COPYSIGNF:
30172 if (ARM_CHECK_BUILTIN_MODE (2, SF))
30173 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30174 else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30175 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30176 else
30177 return NULL_TREE;
30179 default:
30180 return NULL_TREE;
30183 return NULL_TREE;
30185 #undef ARM_FIND_VCVT_VARIANT
30186 #undef ARM_FIND_VCVTU_VARIANT
30187 #undef ARM_CHECK_BUILTIN_MODE
30188 #undef ARM_FIND_VRINT_VARIANT
30191 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30192 static HOST_WIDE_INT
30193 arm_vector_alignment (const_tree type)
30195 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30197 if (TARGET_AAPCS_BASED)
30198 align = MIN (align, 64);
30200 return align;
30203 static unsigned int
30204 arm_autovectorize_vector_sizes (void)
30206 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30209 static bool
30210 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30212 /* Vectors which aren't in packed structures will not be less aligned than
30213 the natural alignment of their element type, so this is safe. */
30214 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30215 return !is_packed;
30217 return default_builtin_vector_alignment_reachable (type, is_packed);
30220 static bool
30221 arm_builtin_support_vector_misalignment (machine_mode mode,
30222 const_tree type, int misalignment,
30223 bool is_packed)
30225 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30227 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30229 if (is_packed)
30230 return align == 1;
30232 /* If the misalignment is unknown, we should be able to handle the access
30233 so long as it is not to a member of a packed data structure. */
30234 if (misalignment == -1)
30235 return true;
30237 /* Return true if the misalignment is a multiple of the natural alignment
30238 of the vector's element type. This is probably always going to be
30239 true in practice, since we've already established that this isn't a
30240 packed access. */
30241 return ((misalignment % align) == 0);
30244 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30245 is_packed);
30248 static void
30249 arm_conditional_register_usage (void)
30251 int regno;
30253 if (TARGET_THUMB1 && optimize_size)
30255 /* When optimizing for size on Thumb-1, it's better not
30256 to use the HI regs, because of the overhead of
30257 stacking them. */
30258 for (regno = FIRST_HI_REGNUM;
30259 regno <= LAST_HI_REGNUM; ++regno)
30260 fixed_regs[regno] = call_used_regs[regno] = 1;
30263 /* The link register can be clobbered by any branch insn,
30264 but we have no way to track that at present, so mark
30265 it as unavailable. */
30266 if (TARGET_THUMB1)
30267 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30269 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30271 /* VFPv3 registers are disabled when earlier VFP
30272 versions are selected due to the definition of
30273 LAST_VFP_REGNUM. */
30274 for (regno = FIRST_VFP_REGNUM;
30275 regno <= LAST_VFP_REGNUM; ++ regno)
30277 fixed_regs[regno] = 0;
30278 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30279 || regno >= FIRST_VFP_REGNUM + 32;
30283 if (TARGET_REALLY_IWMMXT)
30285 regno = FIRST_IWMMXT_GR_REGNUM;
30286 /* The 2002/10/09 revision of the XScale ABI has wCG0
30287 and wCG1 as call-preserved registers. The 2002/11/21
30288 revision changed this so that all wCG registers are
30289 scratch registers. */
30290 for (regno = FIRST_IWMMXT_GR_REGNUM;
30291 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30292 fixed_regs[regno] = 0;
30293 /* The XScale ABI has wR0 - wR9 as scratch registers,
30294 the rest as call-preserved registers. */
30295 for (regno = FIRST_IWMMXT_REGNUM;
30296 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30298 fixed_regs[regno] = 0;
30299 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30303 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30305 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30306 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30308 else if (TARGET_APCS_STACK)
30310 fixed_regs[10] = 1;
30311 call_used_regs[10] = 1;
30313 /* -mcaller-super-interworking reserves r11 for calls to
30314 _interwork_r11_call_via_rN(). Making the register global
30315 is an easy way of ensuring that it remains valid for all
30316 calls. */
30317 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30318 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30320 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30321 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30322 if (TARGET_CALLER_INTERWORKING)
30323 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30325 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30328 static reg_class_t
30329 arm_preferred_rename_class (reg_class_t rclass)
30331 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30332 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30333 and code size can be reduced. */
30334 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30335 return LO_REGS;
30336 else
30337 return NO_REGS;
30340 /* Compute the atrribute "length" of insn "*push_multi".
30341 So this function MUST be kept in sync with that insn pattern. */
30343 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30345 int i, regno, hi_reg;
30346 int num_saves = XVECLEN (parallel_op, 0);
30348 /* ARM mode. */
30349 if (TARGET_ARM)
30350 return 4;
30351 /* Thumb1 mode. */
30352 if (TARGET_THUMB1)
30353 return 2;
30355 /* Thumb2 mode. */
30356 regno = REGNO (first_op);
30357 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30358 for (i = 1; i < num_saves && !hi_reg; i++)
30360 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30361 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30364 if (!hi_reg)
30365 return 2;
30366 return 4;
30369 /* Compute the number of instructions emitted by output_move_double. */
30371 arm_count_output_move_double_insns (rtx *operands)
30373 int count;
30374 rtx ops[2];
30375 /* output_move_double may modify the operands array, so call it
30376 here on a copy of the array. */
30377 ops[0] = operands[0];
30378 ops[1] = operands[1];
30379 output_move_double (ops, false, &count);
30380 return count;
30384 vfp3_const_double_for_fract_bits (rtx operand)
30386 REAL_VALUE_TYPE r0;
30388 if (!CONST_DOUBLE_P (operand))
30389 return 0;
30391 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30392 if (exact_real_inverse (DFmode, &r0))
30394 if (exact_real_truncate (DFmode, &r0))
30396 HOST_WIDE_INT value = real_to_integer (&r0);
30397 value = value & 0xffffffff;
30398 if ((value != 0) && ( (value & (value - 1)) == 0))
30399 return int_log2 (value);
30402 return 0;
30406 vfp3_const_double_for_bits (rtx operand)
30408 REAL_VALUE_TYPE r0;
30410 if (!CONST_DOUBLE_P (operand))
30411 return 0;
30413 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30414 if (exact_real_truncate (DFmode, &r0))
30416 HOST_WIDE_INT value = real_to_integer (&r0);
30417 value = value & 0xffffffff;
30418 if ((value != 0) && ( (value & (value - 1)) == 0))
30419 return int_log2 (value);
30422 return 0;
30425 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30427 static void
30428 arm_pre_atomic_barrier (enum memmodel model)
30430 if (need_atomic_barrier_p (model, true))
30431 emit_insn (gen_memory_barrier ());
30434 static void
30435 arm_post_atomic_barrier (enum memmodel model)
30437 if (need_atomic_barrier_p (model, false))
30438 emit_insn (gen_memory_barrier ());
30441 /* Emit the load-exclusive and store-exclusive instructions.
30442 Use acquire and release versions if necessary. */
30444 static void
30445 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30447 rtx (*gen) (rtx, rtx);
30449 if (acq)
30451 switch (mode)
30453 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30454 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30455 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30456 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30457 default:
30458 gcc_unreachable ();
30461 else
30463 switch (mode)
30465 case QImode: gen = gen_arm_load_exclusiveqi; break;
30466 case HImode: gen = gen_arm_load_exclusivehi; break;
30467 case SImode: gen = gen_arm_load_exclusivesi; break;
30468 case DImode: gen = gen_arm_load_exclusivedi; break;
30469 default:
30470 gcc_unreachable ();
30474 emit_insn (gen (rval, mem));
30477 static void
30478 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30479 rtx mem, bool rel)
30481 rtx (*gen) (rtx, rtx, rtx);
30483 if (rel)
30485 switch (mode)
30487 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30488 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30489 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30490 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30491 default:
30492 gcc_unreachable ();
30495 else
30497 switch (mode)
30499 case QImode: gen = gen_arm_store_exclusiveqi; break;
30500 case HImode: gen = gen_arm_store_exclusivehi; break;
30501 case SImode: gen = gen_arm_store_exclusivesi; break;
30502 case DImode: gen = gen_arm_store_exclusivedi; break;
30503 default:
30504 gcc_unreachable ();
30508 emit_insn (gen (bval, rval, mem));
30511 /* Mark the previous jump instruction as unlikely. */
30513 static void
30514 emit_unlikely_jump (rtx insn)
30516 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30518 insn = emit_jump_insn (insn);
30519 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30522 /* Expand a compare and swap pattern. */
30524 void
30525 arm_expand_compare_and_swap (rtx operands[])
30527 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30528 machine_mode mode;
30529 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30531 bval = operands[0];
30532 rval = operands[1];
30533 mem = operands[2];
30534 oldval = operands[3];
30535 newval = operands[4];
30536 is_weak = operands[5];
30537 mod_s = operands[6];
30538 mod_f = operands[7];
30539 mode = GET_MODE (mem);
30541 /* Normally the succ memory model must be stronger than fail, but in the
30542 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30543 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30545 if (TARGET_HAVE_LDACQ
30546 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30547 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30548 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30550 switch (mode)
30552 case QImode:
30553 case HImode:
30554 /* For narrow modes, we're going to perform the comparison in SImode,
30555 so do the zero-extension now. */
30556 rval = gen_reg_rtx (SImode);
30557 oldval = convert_modes (SImode, mode, oldval, true);
30558 /* FALLTHRU */
30560 case SImode:
30561 /* Force the value into a register if needed. We waited until after
30562 the zero-extension above to do this properly. */
30563 if (!arm_add_operand (oldval, SImode))
30564 oldval = force_reg (SImode, oldval);
30565 break;
30567 case DImode:
30568 if (!cmpdi_operand (oldval, mode))
30569 oldval = force_reg (mode, oldval);
30570 break;
30572 default:
30573 gcc_unreachable ();
30576 switch (mode)
30578 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30579 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30580 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30581 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30582 default:
30583 gcc_unreachable ();
30586 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30588 if (mode == QImode || mode == HImode)
30589 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30591 /* In all cases, we arrange for success to be signaled by Z set.
30592 This arrangement allows for the boolean result to be used directly
30593 in a subsequent branch, post optimization. */
30594 x = gen_rtx_REG (CCmode, CC_REGNUM);
30595 x = gen_rtx_EQ (SImode, x, const0_rtx);
30596 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30599 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30600 another memory store between the load-exclusive and store-exclusive can
30601 reset the monitor from Exclusive to Open state. This means we must wait
30602 until after reload to split the pattern, lest we get a register spill in
30603 the middle of the atomic sequence. */
30605 void
30606 arm_split_compare_and_swap (rtx operands[])
30608 rtx rval, mem, oldval, newval, scratch;
30609 machine_mode mode;
30610 enum memmodel mod_s, mod_f;
30611 bool is_weak;
30612 rtx_code_label *label1, *label2;
30613 rtx x, cond;
30615 rval = operands[0];
30616 mem = operands[1];
30617 oldval = operands[2];
30618 newval = operands[3];
30619 is_weak = (operands[4] != const0_rtx);
30620 mod_s = (enum memmodel) INTVAL (operands[5]);
30621 mod_f = (enum memmodel) INTVAL (operands[6]);
30622 scratch = operands[7];
30623 mode = GET_MODE (mem);
30625 bool use_acquire = TARGET_HAVE_LDACQ
30626 && !(mod_s == MEMMODEL_RELAXED
30627 || mod_s == MEMMODEL_CONSUME
30628 || mod_s == MEMMODEL_RELEASE);
30630 bool use_release = TARGET_HAVE_LDACQ
30631 && !(mod_s == MEMMODEL_RELAXED
30632 || mod_s == MEMMODEL_CONSUME
30633 || mod_s == MEMMODEL_ACQUIRE);
30635 /* Checks whether a barrier is needed and emits one accordingly. */
30636 if (!(use_acquire || use_release))
30637 arm_pre_atomic_barrier (mod_s);
30639 label1 = NULL;
30640 if (!is_weak)
30642 label1 = gen_label_rtx ();
30643 emit_label (label1);
30645 label2 = gen_label_rtx ();
30647 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30649 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30650 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30651 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30652 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30653 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30655 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30657 /* Weak or strong, we want EQ to be true for success, so that we
30658 match the flags that we got from the compare above. */
30659 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30660 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30661 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30663 if (!is_weak)
30665 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30666 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30667 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30668 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30671 if (mod_f != MEMMODEL_RELAXED)
30672 emit_label (label2);
30674 /* Checks whether a barrier is needed and emits one accordingly. */
30675 if (!(use_acquire || use_release))
30676 arm_post_atomic_barrier (mod_s);
30678 if (mod_f == MEMMODEL_RELAXED)
30679 emit_label (label2);
30682 void
30683 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30684 rtx value, rtx model_rtx, rtx cond)
30686 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30687 machine_mode mode = GET_MODE (mem);
30688 machine_mode wmode = (mode == DImode ? DImode : SImode);
30689 rtx_code_label *label;
30690 rtx x;
30692 bool use_acquire = TARGET_HAVE_LDACQ
30693 && !(model == MEMMODEL_RELAXED
30694 || model == MEMMODEL_CONSUME
30695 || model == MEMMODEL_RELEASE);
30697 bool use_release = TARGET_HAVE_LDACQ
30698 && !(model == MEMMODEL_RELAXED
30699 || model == MEMMODEL_CONSUME
30700 || model == MEMMODEL_ACQUIRE);
30702 /* Checks whether a barrier is needed and emits one accordingly. */
30703 if (!(use_acquire || use_release))
30704 arm_pre_atomic_barrier (model);
30706 label = gen_label_rtx ();
30707 emit_label (label);
30709 if (new_out)
30710 new_out = gen_lowpart (wmode, new_out);
30711 if (old_out)
30712 old_out = gen_lowpart (wmode, old_out);
30713 else
30714 old_out = new_out;
30715 value = simplify_gen_subreg (wmode, value, mode, 0);
30717 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30719 switch (code)
30721 case SET:
30722 new_out = value;
30723 break;
30725 case NOT:
30726 x = gen_rtx_AND (wmode, old_out, value);
30727 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30728 x = gen_rtx_NOT (wmode, new_out);
30729 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30730 break;
30732 case MINUS:
30733 if (CONST_INT_P (value))
30735 value = GEN_INT (-INTVAL (value));
30736 code = PLUS;
30738 /* FALLTHRU */
30740 case PLUS:
30741 if (mode == DImode)
30743 /* DImode plus/minus need to clobber flags. */
30744 /* The adddi3 and subdi3 patterns are incorrectly written so that
30745 they require matching operands, even when we could easily support
30746 three operands. Thankfully, this can be fixed up post-splitting,
30747 as the individual add+adc patterns do accept three operands and
30748 post-reload cprop can make these moves go away. */
30749 emit_move_insn (new_out, old_out);
30750 if (code == PLUS)
30751 x = gen_adddi3 (new_out, new_out, value);
30752 else
30753 x = gen_subdi3 (new_out, new_out, value);
30754 emit_insn (x);
30755 break;
30757 /* FALLTHRU */
30759 default:
30760 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30761 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30762 break;
30765 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30766 use_release);
30768 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30769 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30771 /* Checks whether a barrier is needed and emits one accordingly. */
30772 if (!(use_acquire || use_release))
30773 arm_post_atomic_barrier (model);
30776 #define MAX_VECT_LEN 16
30778 struct expand_vec_perm_d
30780 rtx target, op0, op1;
30781 unsigned char perm[MAX_VECT_LEN];
30782 machine_mode vmode;
30783 unsigned char nelt;
30784 bool one_vector_p;
30785 bool testing_p;
30788 /* Generate a variable permutation. */
30790 static void
30791 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30793 machine_mode vmode = GET_MODE (target);
30794 bool one_vector_p = rtx_equal_p (op0, op1);
30796 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30797 gcc_checking_assert (GET_MODE (op0) == vmode);
30798 gcc_checking_assert (GET_MODE (op1) == vmode);
30799 gcc_checking_assert (GET_MODE (sel) == vmode);
30800 gcc_checking_assert (TARGET_NEON);
30802 if (one_vector_p)
30804 if (vmode == V8QImode)
30805 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30806 else
30807 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30809 else
30811 rtx pair;
30813 if (vmode == V8QImode)
30815 pair = gen_reg_rtx (V16QImode);
30816 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30817 pair = gen_lowpart (TImode, pair);
30818 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30820 else
30822 pair = gen_reg_rtx (OImode);
30823 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30824 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30829 void
30830 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30832 machine_mode vmode = GET_MODE (target);
30833 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30834 bool one_vector_p = rtx_equal_p (op0, op1);
30835 rtx rmask[MAX_VECT_LEN], mask;
30837 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30838 numbering of elements for big-endian, we must reverse the order. */
30839 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30841 /* The VTBL instruction does not use a modulo index, so we must take care
30842 of that ourselves. */
30843 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30844 for (i = 0; i < nelt; ++i)
30845 rmask[i] = mask;
30846 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30847 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30849 arm_expand_vec_perm_1 (target, op0, op1, sel);
30852 /* Generate or test for an insn that supports a constant permutation. */
30854 /* Recognize patterns for the VUZP insns. */
30856 static bool
30857 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30859 unsigned int i, odd, mask, nelt = d->nelt;
30860 rtx out0, out1, in0, in1, x;
30861 rtx (*gen)(rtx, rtx, rtx, rtx);
30863 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30864 return false;
30866 /* Note that these are little-endian tests. Adjust for big-endian later. */
30867 if (d->perm[0] == 0)
30868 odd = 0;
30869 else if (d->perm[0] == 1)
30870 odd = 1;
30871 else
30872 return false;
30873 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30875 for (i = 0; i < nelt; i++)
30877 unsigned elt = (i * 2 + odd) & mask;
30878 if (d->perm[i] != elt)
30879 return false;
30882 /* Success! */
30883 if (d->testing_p)
30884 return true;
30886 switch (d->vmode)
30888 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30889 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30890 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30891 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30892 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30893 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30894 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30895 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30896 default:
30897 gcc_unreachable ();
30900 in0 = d->op0;
30901 in1 = d->op1;
30902 if (BYTES_BIG_ENDIAN)
30904 x = in0, in0 = in1, in1 = x;
30905 odd = !odd;
30908 out0 = d->target;
30909 out1 = gen_reg_rtx (d->vmode);
30910 if (odd)
30911 x = out0, out0 = out1, out1 = x;
30913 emit_insn (gen (out0, in0, in1, out1));
30914 return true;
30917 /* Recognize patterns for the VZIP insns. */
30919 static bool
30920 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30922 unsigned int i, high, mask, nelt = d->nelt;
30923 rtx out0, out1, in0, in1, x;
30924 rtx (*gen)(rtx, rtx, rtx, rtx);
30926 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30927 return false;
30929 /* Note that these are little-endian tests. Adjust for big-endian later. */
30930 high = nelt / 2;
30931 if (d->perm[0] == high)
30933 else if (d->perm[0] == 0)
30934 high = 0;
30935 else
30936 return false;
30937 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30939 for (i = 0; i < nelt / 2; i++)
30941 unsigned elt = (i + high) & mask;
30942 if (d->perm[i * 2] != elt)
30943 return false;
30944 elt = (elt + nelt) & mask;
30945 if (d->perm[i * 2 + 1] != elt)
30946 return false;
30949 /* Success! */
30950 if (d->testing_p)
30951 return true;
30953 switch (d->vmode)
30955 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30956 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30957 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30958 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30959 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30960 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30961 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30962 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30963 default:
30964 gcc_unreachable ();
30967 in0 = d->op0;
30968 in1 = d->op1;
30969 if (BYTES_BIG_ENDIAN)
30971 x = in0, in0 = in1, in1 = x;
30972 high = !high;
30975 out0 = d->target;
30976 out1 = gen_reg_rtx (d->vmode);
30977 if (high)
30978 x = out0, out0 = out1, out1 = x;
30980 emit_insn (gen (out0, in0, in1, out1));
30981 return true;
30984 /* Recognize patterns for the VREV insns. */
30986 static bool
30987 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30989 unsigned int i, j, diff, nelt = d->nelt;
30990 rtx (*gen)(rtx, rtx, rtx);
30992 if (!d->one_vector_p)
30993 return false;
30995 diff = d->perm[0];
30996 switch (diff)
30998 case 7:
30999 switch (d->vmode)
31001 case V16QImode: gen = gen_neon_vrev64v16qi; break;
31002 case V8QImode: gen = gen_neon_vrev64v8qi; break;
31003 default:
31004 return false;
31006 break;
31007 case 3:
31008 switch (d->vmode)
31010 case V16QImode: gen = gen_neon_vrev32v16qi; break;
31011 case V8QImode: gen = gen_neon_vrev32v8qi; break;
31012 case V8HImode: gen = gen_neon_vrev64v8hi; break;
31013 case V4HImode: gen = gen_neon_vrev64v4hi; break;
31014 default:
31015 return false;
31017 break;
31018 case 1:
31019 switch (d->vmode)
31021 case V16QImode: gen = gen_neon_vrev16v16qi; break;
31022 case V8QImode: gen = gen_neon_vrev16v8qi; break;
31023 case V8HImode: gen = gen_neon_vrev32v8hi; break;
31024 case V4HImode: gen = gen_neon_vrev32v4hi; break;
31025 case V4SImode: gen = gen_neon_vrev64v4si; break;
31026 case V2SImode: gen = gen_neon_vrev64v2si; break;
31027 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
31028 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
31029 default:
31030 return false;
31032 break;
31033 default:
31034 return false;
31037 for (i = 0; i < nelt ; i += diff + 1)
31038 for (j = 0; j <= diff; j += 1)
31040 /* This is guaranteed to be true as the value of diff
31041 is 7, 3, 1 and we should have enough elements in the
31042 queue to generate this. Getting a vector mask with a
31043 value of diff other than these values implies that
31044 something is wrong by the time we get here. */
31045 gcc_assert (i + j < nelt);
31046 if (d->perm[i + j] != i + diff - j)
31047 return false;
31050 /* Success! */
31051 if (d->testing_p)
31052 return true;
31054 /* ??? The third operand is an artifact of the builtin infrastructure
31055 and is ignored by the actual instruction. */
31056 emit_insn (gen (d->target, d->op0, const0_rtx));
31057 return true;
31060 /* Recognize patterns for the VTRN insns. */
31062 static bool
31063 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31065 unsigned int i, odd, mask, nelt = d->nelt;
31066 rtx out0, out1, in0, in1, x;
31067 rtx (*gen)(rtx, rtx, rtx, rtx);
31069 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31070 return false;
31072 /* Note that these are little-endian tests. Adjust for big-endian later. */
31073 if (d->perm[0] == 0)
31074 odd = 0;
31075 else if (d->perm[0] == 1)
31076 odd = 1;
31077 else
31078 return false;
31079 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31081 for (i = 0; i < nelt; i += 2)
31083 if (d->perm[i] != i + odd)
31084 return false;
31085 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31086 return false;
31089 /* Success! */
31090 if (d->testing_p)
31091 return true;
31093 switch (d->vmode)
31095 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31096 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
31097 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
31098 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
31099 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
31100 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
31101 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
31102 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
31103 default:
31104 gcc_unreachable ();
31107 in0 = d->op0;
31108 in1 = d->op1;
31109 if (BYTES_BIG_ENDIAN)
31111 x = in0, in0 = in1, in1 = x;
31112 odd = !odd;
31115 out0 = d->target;
31116 out1 = gen_reg_rtx (d->vmode);
31117 if (odd)
31118 x = out0, out0 = out1, out1 = x;
31120 emit_insn (gen (out0, in0, in1, out1));
31121 return true;
31124 /* Recognize patterns for the VEXT insns. */
31126 static bool
31127 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31129 unsigned int i, nelt = d->nelt;
31130 rtx (*gen) (rtx, rtx, rtx, rtx);
31131 rtx offset;
31133 unsigned int location;
31135 unsigned int next = d->perm[0] + 1;
31137 /* TODO: Handle GCC's numbering of elements for big-endian. */
31138 if (BYTES_BIG_ENDIAN)
31139 return false;
31141 /* Check if the extracted indexes are increasing by one. */
31142 for (i = 1; i < nelt; next++, i++)
31144 /* If we hit the most significant element of the 2nd vector in
31145 the previous iteration, no need to test further. */
31146 if (next == 2 * nelt)
31147 return false;
31149 /* If we are operating on only one vector: it could be a
31150 rotation. If there are only two elements of size < 64, let
31151 arm_evpc_neon_vrev catch it. */
31152 if (d->one_vector_p && (next == nelt))
31154 if ((nelt == 2) && (d->vmode != V2DImode))
31155 return false;
31156 else
31157 next = 0;
31160 if (d->perm[i] != next)
31161 return false;
31164 location = d->perm[0];
31166 switch (d->vmode)
31168 case V16QImode: gen = gen_neon_vextv16qi; break;
31169 case V8QImode: gen = gen_neon_vextv8qi; break;
31170 case V4HImode: gen = gen_neon_vextv4hi; break;
31171 case V8HImode: gen = gen_neon_vextv8hi; break;
31172 case V2SImode: gen = gen_neon_vextv2si; break;
31173 case V4SImode: gen = gen_neon_vextv4si; break;
31174 case V2SFmode: gen = gen_neon_vextv2sf; break;
31175 case V4SFmode: gen = gen_neon_vextv4sf; break;
31176 case V2DImode: gen = gen_neon_vextv2di; break;
31177 default:
31178 return false;
31181 /* Success! */
31182 if (d->testing_p)
31183 return true;
31185 offset = GEN_INT (location);
31186 emit_insn (gen (d->target, d->op0, d->op1, offset));
31187 return true;
31190 /* The NEON VTBL instruction is a fully variable permuation that's even
31191 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31192 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31193 can do slightly better by expanding this as a constant where we don't
31194 have to apply a mask. */
31196 static bool
31197 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31199 rtx rperm[MAX_VECT_LEN], sel;
31200 machine_mode vmode = d->vmode;
31201 unsigned int i, nelt = d->nelt;
31203 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31204 numbering of elements for big-endian, we must reverse the order. */
31205 if (BYTES_BIG_ENDIAN)
31206 return false;
31208 if (d->testing_p)
31209 return true;
31211 /* Generic code will try constant permutation twice. Once with the
31212 original mode and again with the elements lowered to QImode.
31213 So wait and don't do the selector expansion ourselves. */
31214 if (vmode != V8QImode && vmode != V16QImode)
31215 return false;
31217 for (i = 0; i < nelt; ++i)
31218 rperm[i] = GEN_INT (d->perm[i]);
31219 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31220 sel = force_reg (vmode, sel);
31222 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31223 return true;
31226 static bool
31227 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31229 /* Check if the input mask matches vext before reordering the
31230 operands. */
31231 if (TARGET_NEON)
31232 if (arm_evpc_neon_vext (d))
31233 return true;
31235 /* The pattern matching functions above are written to look for a small
31236 number to begin the sequence (0, 1, N/2). If we begin with an index
31237 from the second operand, we can swap the operands. */
31238 if (d->perm[0] >= d->nelt)
31240 unsigned i, nelt = d->nelt;
31241 rtx x;
31243 for (i = 0; i < nelt; ++i)
31244 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31246 x = d->op0;
31247 d->op0 = d->op1;
31248 d->op1 = x;
31251 if (TARGET_NEON)
31253 if (arm_evpc_neon_vuzp (d))
31254 return true;
31255 if (arm_evpc_neon_vzip (d))
31256 return true;
31257 if (arm_evpc_neon_vrev (d))
31258 return true;
31259 if (arm_evpc_neon_vtrn (d))
31260 return true;
31261 return arm_evpc_neon_vtbl (d);
31263 return false;
31266 /* Expand a vec_perm_const pattern. */
31268 bool
31269 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31271 struct expand_vec_perm_d d;
31272 int i, nelt, which;
31274 d.target = target;
31275 d.op0 = op0;
31276 d.op1 = op1;
31278 d.vmode = GET_MODE (target);
31279 gcc_assert (VECTOR_MODE_P (d.vmode));
31280 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31281 d.testing_p = false;
31283 for (i = which = 0; i < nelt; ++i)
31285 rtx e = XVECEXP (sel, 0, i);
31286 int ei = INTVAL (e) & (2 * nelt - 1);
31287 which |= (ei < nelt ? 1 : 2);
31288 d.perm[i] = ei;
31291 switch (which)
31293 default:
31294 gcc_unreachable();
31296 case 3:
31297 d.one_vector_p = false;
31298 if (!rtx_equal_p (op0, op1))
31299 break;
31301 /* The elements of PERM do not suggest that only the first operand
31302 is used, but both operands are identical. Allow easier matching
31303 of the permutation by folding the permutation into the single
31304 input vector. */
31305 /* FALLTHRU */
31306 case 2:
31307 for (i = 0; i < nelt; ++i)
31308 d.perm[i] &= nelt - 1;
31309 d.op0 = op1;
31310 d.one_vector_p = true;
31311 break;
31313 case 1:
31314 d.op1 = op0;
31315 d.one_vector_p = true;
31316 break;
31319 return arm_expand_vec_perm_const_1 (&d);
31322 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31324 static bool
31325 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31326 const unsigned char *sel)
31328 struct expand_vec_perm_d d;
31329 unsigned int i, nelt, which;
31330 bool ret;
31332 d.vmode = vmode;
31333 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31334 d.testing_p = true;
31335 memcpy (d.perm, sel, nelt);
31337 /* Categorize the set of elements in the selector. */
31338 for (i = which = 0; i < nelt; ++i)
31340 unsigned char e = d.perm[i];
31341 gcc_assert (e < 2 * nelt);
31342 which |= (e < nelt ? 1 : 2);
31345 /* For all elements from second vector, fold the elements to first. */
31346 if (which == 2)
31347 for (i = 0; i < nelt; ++i)
31348 d.perm[i] -= nelt;
31350 /* Check whether the mask can be applied to the vector type. */
31351 d.one_vector_p = (which != 3);
31353 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31354 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31355 if (!d.one_vector_p)
31356 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31358 start_sequence ();
31359 ret = arm_expand_vec_perm_const_1 (&d);
31360 end_sequence ();
31362 return ret;
31365 bool
31366 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31368 /* If we are soft float and we do not have ldrd
31369 then all auto increment forms are ok. */
31370 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31371 return true;
31373 switch (code)
31375 /* Post increment and Pre Decrement are supported for all
31376 instruction forms except for vector forms. */
31377 case ARM_POST_INC:
31378 case ARM_PRE_DEC:
31379 if (VECTOR_MODE_P (mode))
31381 if (code != ARM_PRE_DEC)
31382 return true;
31383 else
31384 return false;
31387 return true;
31389 case ARM_POST_DEC:
31390 case ARM_PRE_INC:
31391 /* Without LDRD and mode size greater than
31392 word size, there is no point in auto-incrementing
31393 because ldm and stm will not have these forms. */
31394 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31395 return false;
31397 /* Vector and floating point modes do not support
31398 these auto increment forms. */
31399 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31400 return false;
31402 return true;
31404 default:
31405 return false;
31409 return false;
31412 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31413 on ARM, since we know that shifts by negative amounts are no-ops.
31414 Additionally, the default expansion code is not available or suitable
31415 for post-reload insn splits (this can occur when the register allocator
31416 chooses not to do a shift in NEON).
31418 This function is used in both initial expand and post-reload splits, and
31419 handles all kinds of 64-bit shifts.
31421 Input requirements:
31422 - It is safe for the input and output to be the same register, but
31423 early-clobber rules apply for the shift amount and scratch registers.
31424 - Shift by register requires both scratch registers. In all other cases
31425 the scratch registers may be NULL.
31426 - Ashiftrt by a register also clobbers the CC register. */
31427 void
31428 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31429 rtx amount, rtx scratch1, rtx scratch2)
31431 rtx out_high = gen_highpart (SImode, out);
31432 rtx out_low = gen_lowpart (SImode, out);
31433 rtx in_high = gen_highpart (SImode, in);
31434 rtx in_low = gen_lowpart (SImode, in);
31436 /* Terminology:
31437 in = the register pair containing the input value.
31438 out = the destination register pair.
31439 up = the high- or low-part of each pair.
31440 down = the opposite part to "up".
31441 In a shift, we can consider bits to shift from "up"-stream to
31442 "down"-stream, so in a left-shift "up" is the low-part and "down"
31443 is the high-part of each register pair. */
31445 rtx out_up = code == ASHIFT ? out_low : out_high;
31446 rtx out_down = code == ASHIFT ? out_high : out_low;
31447 rtx in_up = code == ASHIFT ? in_low : in_high;
31448 rtx in_down = code == ASHIFT ? in_high : in_low;
31450 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31451 gcc_assert (out
31452 && (REG_P (out) || GET_CODE (out) == SUBREG)
31453 && GET_MODE (out) == DImode);
31454 gcc_assert (in
31455 && (REG_P (in) || GET_CODE (in) == SUBREG)
31456 && GET_MODE (in) == DImode);
31457 gcc_assert (amount
31458 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31459 && GET_MODE (amount) == SImode)
31460 || CONST_INT_P (amount)));
31461 gcc_assert (scratch1 == NULL
31462 || (GET_CODE (scratch1) == SCRATCH)
31463 || (GET_MODE (scratch1) == SImode
31464 && REG_P (scratch1)));
31465 gcc_assert (scratch2 == NULL
31466 || (GET_CODE (scratch2) == SCRATCH)
31467 || (GET_MODE (scratch2) == SImode
31468 && REG_P (scratch2)));
31469 gcc_assert (!REG_P (out) || !REG_P (amount)
31470 || !HARD_REGISTER_P (out)
31471 || (REGNO (out) != REGNO (amount)
31472 && REGNO (out) + 1 != REGNO (amount)));
31474 /* Macros to make following code more readable. */
31475 #define SUB_32(DEST,SRC) \
31476 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31477 #define RSB_32(DEST,SRC) \
31478 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31479 #define SUB_S_32(DEST,SRC) \
31480 gen_addsi3_compare0 ((DEST), (SRC), \
31481 GEN_INT (-32))
31482 #define SET(DEST,SRC) \
31483 gen_rtx_SET (SImode, (DEST), (SRC))
31484 #define SHIFT(CODE,SRC,AMOUNT) \
31485 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31486 #define LSHIFT(CODE,SRC,AMOUNT) \
31487 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31488 SImode, (SRC), (AMOUNT))
31489 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31490 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31491 SImode, (SRC), (AMOUNT))
31492 #define ORR(A,B) \
31493 gen_rtx_IOR (SImode, (A), (B))
31494 #define BRANCH(COND,LABEL) \
31495 gen_arm_cond_branch ((LABEL), \
31496 gen_rtx_ ## COND (CCmode, cc_reg, \
31497 const0_rtx), \
31498 cc_reg)
31500 /* Shifts by register and shifts by constant are handled separately. */
31501 if (CONST_INT_P (amount))
31503 /* We have a shift-by-constant. */
31505 /* First, handle out-of-range shift amounts.
31506 In both cases we try to match the result an ARM instruction in a
31507 shift-by-register would give. This helps reduce execution
31508 differences between optimization levels, but it won't stop other
31509 parts of the compiler doing different things. This is "undefined
31510 behaviour, in any case. */
31511 if (INTVAL (amount) <= 0)
31512 emit_insn (gen_movdi (out, in));
31513 else if (INTVAL (amount) >= 64)
31515 if (code == ASHIFTRT)
31517 rtx const31_rtx = GEN_INT (31);
31518 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31519 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31521 else
31522 emit_insn (gen_movdi (out, const0_rtx));
31525 /* Now handle valid shifts. */
31526 else if (INTVAL (amount) < 32)
31528 /* Shifts by a constant less than 32. */
31529 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31531 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31532 emit_insn (SET (out_down,
31533 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31534 out_down)));
31535 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31537 else
31539 /* Shifts by a constant greater than 31. */
31540 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31542 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31543 if (code == ASHIFTRT)
31544 emit_insn (gen_ashrsi3 (out_up, in_up,
31545 GEN_INT (31)));
31546 else
31547 emit_insn (SET (out_up, const0_rtx));
31550 else
31552 /* We have a shift-by-register. */
31553 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31555 /* This alternative requires the scratch registers. */
31556 gcc_assert (scratch1 && REG_P (scratch1));
31557 gcc_assert (scratch2 && REG_P (scratch2));
31559 /* We will need the values "amount-32" and "32-amount" later.
31560 Swapping them around now allows the later code to be more general. */
31561 switch (code)
31563 case ASHIFT:
31564 emit_insn (SUB_32 (scratch1, amount));
31565 emit_insn (RSB_32 (scratch2, amount));
31566 break;
31567 case ASHIFTRT:
31568 emit_insn (RSB_32 (scratch1, amount));
31569 /* Also set CC = amount > 32. */
31570 emit_insn (SUB_S_32 (scratch2, amount));
31571 break;
31572 case LSHIFTRT:
31573 emit_insn (RSB_32 (scratch1, amount));
31574 emit_insn (SUB_32 (scratch2, amount));
31575 break;
31576 default:
31577 gcc_unreachable ();
31580 /* Emit code like this:
31582 arithmetic-left:
31583 out_down = in_down << amount;
31584 out_down = (in_up << (amount - 32)) | out_down;
31585 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31586 out_up = in_up << amount;
31588 arithmetic-right:
31589 out_down = in_down >> amount;
31590 out_down = (in_up << (32 - amount)) | out_down;
31591 if (amount < 32)
31592 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31593 out_up = in_up << amount;
31595 logical-right:
31596 out_down = in_down >> amount;
31597 out_down = (in_up << (32 - amount)) | out_down;
31598 if (amount < 32)
31599 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31600 out_up = in_up << amount;
31602 The ARM and Thumb2 variants are the same but implemented slightly
31603 differently. If this were only called during expand we could just
31604 use the Thumb2 case and let combine do the right thing, but this
31605 can also be called from post-reload splitters. */
31607 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31609 if (!TARGET_THUMB2)
31611 /* Emit code for ARM mode. */
31612 emit_insn (SET (out_down,
31613 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31614 if (code == ASHIFTRT)
31616 rtx_code_label *done_label = gen_label_rtx ();
31617 emit_jump_insn (BRANCH (LT, done_label));
31618 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31619 out_down)));
31620 emit_label (done_label);
31622 else
31623 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31624 out_down)));
31626 else
31628 /* Emit code for Thumb2 mode.
31629 Thumb2 can't do shift and or in one insn. */
31630 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31631 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31633 if (code == ASHIFTRT)
31635 rtx_code_label *done_label = gen_label_rtx ();
31636 emit_jump_insn (BRANCH (LT, done_label));
31637 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31638 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31639 emit_label (done_label);
31641 else
31643 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31644 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31648 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31651 #undef SUB_32
31652 #undef RSB_32
31653 #undef SUB_S_32
31654 #undef SET
31655 #undef SHIFT
31656 #undef LSHIFT
31657 #undef REV_LSHIFT
31658 #undef ORR
31659 #undef BRANCH
31663 /* Returns true if a valid comparison operation and makes
31664 the operands in a form that is valid. */
31665 bool
31666 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31668 enum rtx_code code = GET_CODE (*comparison);
31669 int code_int;
31670 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31671 ? GET_MODE (*op2) : GET_MODE (*op1);
31673 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31675 if (code == UNEQ || code == LTGT)
31676 return false;
31678 code_int = (int)code;
31679 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31680 PUT_CODE (*comparison, (enum rtx_code)code_int);
31682 switch (mode)
31684 case SImode:
31685 if (!arm_add_operand (*op1, mode))
31686 *op1 = force_reg (mode, *op1);
31687 if (!arm_add_operand (*op2, mode))
31688 *op2 = force_reg (mode, *op2);
31689 return true;
31691 case DImode:
31692 if (!cmpdi_operand (*op1, mode))
31693 *op1 = force_reg (mode, *op1);
31694 if (!cmpdi_operand (*op2, mode))
31695 *op2 = force_reg (mode, *op2);
31696 return true;
31698 case SFmode:
31699 case DFmode:
31700 if (!arm_float_compare_operand (*op1, mode))
31701 *op1 = force_reg (mode, *op1);
31702 if (!arm_float_compare_operand (*op2, mode))
31703 *op2 = force_reg (mode, *op2);
31704 return true;
31705 default:
31706 break;
31709 return false;
31713 /* Maximum number of instructions to set block of memory. */
31714 static int
31715 arm_block_set_max_insns (void)
31717 if (optimize_function_for_size_p (cfun))
31718 return 4;
31719 else
31720 return current_tune->max_insns_inline_memset;
31723 /* Return TRUE if it's profitable to set block of memory for
31724 non-vectorized case. VAL is the value to set the memory
31725 with. LENGTH is the number of bytes to set. ALIGN is the
31726 alignment of the destination memory in bytes. UNALIGNED_P
31727 is TRUE if we can only set the memory with instructions
31728 meeting alignment requirements. USE_STRD_P is TRUE if we
31729 can use strd to set the memory. */
31730 static bool
31731 arm_block_set_non_vect_profit_p (rtx val,
31732 unsigned HOST_WIDE_INT length,
31733 unsigned HOST_WIDE_INT align,
31734 bool unaligned_p, bool use_strd_p)
31736 int num = 0;
31737 /* For leftovers in bytes of 0-7, we can set the memory block using
31738 strb/strh/str with minimum instruction number. */
31739 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31741 if (unaligned_p)
31743 num = arm_const_inline_cost (SET, val);
31744 num += length / align + length % align;
31746 else if (use_strd_p)
31748 num = arm_const_double_inline_cost (val);
31749 num += (length >> 3) + leftover[length & 7];
31751 else
31753 num = arm_const_inline_cost (SET, val);
31754 num += (length >> 2) + leftover[length & 3];
31757 /* We may be able to combine last pair STRH/STRB into a single STR
31758 by shifting one byte back. */
31759 if (unaligned_access && length > 3 && (length & 3) == 3)
31760 num--;
31762 return (num <= arm_block_set_max_insns ());
31765 /* Return TRUE if it's profitable to set block of memory for
31766 vectorized case. LENGTH is the number of bytes to set.
31767 ALIGN is the alignment of destination memory in bytes.
31768 MODE is the vector mode used to set the memory. */
31769 static bool
31770 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31771 unsigned HOST_WIDE_INT align,
31772 machine_mode mode)
31774 int num;
31775 bool unaligned_p = ((align & 3) != 0);
31776 unsigned int nelt = GET_MODE_NUNITS (mode);
31778 /* Instruction loading constant value. */
31779 num = 1;
31780 /* Instructions storing the memory. */
31781 num += (length + nelt - 1) / nelt;
31782 /* Instructions adjusting the address expression. Only need to
31783 adjust address expression if it's 4 bytes aligned and bytes
31784 leftover can only be stored by mis-aligned store instruction. */
31785 if (!unaligned_p && (length & 3) != 0)
31786 num++;
31788 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31789 if (!unaligned_p && mode == V16QImode)
31790 num--;
31792 return (num <= arm_block_set_max_insns ());
31795 /* Set a block of memory using vectorization instructions for the
31796 unaligned case. We fill the first LENGTH bytes of the memory
31797 area starting from DSTBASE with byte constant VALUE. ALIGN is
31798 the alignment requirement of memory. Return TRUE if succeeded. */
31799 static bool
31800 arm_block_set_unaligned_vect (rtx dstbase,
31801 unsigned HOST_WIDE_INT length,
31802 unsigned HOST_WIDE_INT value,
31803 unsigned HOST_WIDE_INT align)
31805 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31806 rtx dst, mem;
31807 rtx val_elt, val_vec, reg;
31808 rtx rval[MAX_VECT_LEN];
31809 rtx (*gen_func) (rtx, rtx);
31810 machine_mode mode;
31811 unsigned HOST_WIDE_INT v = value;
31813 gcc_assert ((align & 0x3) != 0);
31814 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31815 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31816 if (length >= nelt_v16)
31818 mode = V16QImode;
31819 gen_func = gen_movmisalignv16qi;
31821 else
31823 mode = V8QImode;
31824 gen_func = gen_movmisalignv8qi;
31826 nelt_mode = GET_MODE_NUNITS (mode);
31827 gcc_assert (length >= nelt_mode);
31828 /* Skip if it isn't profitable. */
31829 if (!arm_block_set_vect_profit_p (length, align, mode))
31830 return false;
31832 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31833 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31835 v = sext_hwi (v, BITS_PER_WORD);
31836 val_elt = GEN_INT (v);
31837 for (j = 0; j < nelt_mode; j++)
31838 rval[j] = val_elt;
31840 reg = gen_reg_rtx (mode);
31841 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31842 /* Emit instruction loading the constant value. */
31843 emit_move_insn (reg, val_vec);
31845 /* Handle nelt_mode bytes in a vector. */
31846 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31848 emit_insn ((*gen_func) (mem, reg));
31849 if (i + 2 * nelt_mode <= length)
31850 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31853 /* If there are not less than nelt_v8 bytes leftover, we must be in
31854 V16QI mode. */
31855 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31857 /* Handle (8, 16) bytes leftover. */
31858 if (i + nelt_v8 < length)
31860 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31861 /* We are shifting bytes back, set the alignment accordingly. */
31862 if ((length & 1) != 0 && align >= 2)
31863 set_mem_align (mem, BITS_PER_UNIT);
31865 emit_insn (gen_movmisalignv16qi (mem, reg));
31867 /* Handle (0, 8] bytes leftover. */
31868 else if (i < length && i + nelt_v8 >= length)
31870 if (mode == V16QImode)
31872 reg = gen_lowpart (V8QImode, reg);
31873 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31875 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31876 + (nelt_mode - nelt_v8))));
31877 /* We are shifting bytes back, set the alignment accordingly. */
31878 if ((length & 1) != 0 && align >= 2)
31879 set_mem_align (mem, BITS_PER_UNIT);
31881 emit_insn (gen_movmisalignv8qi (mem, reg));
31884 return true;
31887 /* Set a block of memory using vectorization instructions for the
31888 aligned case. We fill the first LENGTH bytes of the memory area
31889 starting from DSTBASE with byte constant VALUE. ALIGN is the
31890 alignment requirement of memory. Return TRUE if succeeded. */
31891 static bool
31892 arm_block_set_aligned_vect (rtx dstbase,
31893 unsigned HOST_WIDE_INT length,
31894 unsigned HOST_WIDE_INT value,
31895 unsigned HOST_WIDE_INT align)
31897 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31898 rtx dst, addr, mem;
31899 rtx val_elt, val_vec, reg;
31900 rtx rval[MAX_VECT_LEN];
31901 machine_mode mode;
31902 unsigned HOST_WIDE_INT v = value;
31904 gcc_assert ((align & 0x3) == 0);
31905 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31906 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31907 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31908 mode = V16QImode;
31909 else
31910 mode = V8QImode;
31912 nelt_mode = GET_MODE_NUNITS (mode);
31913 gcc_assert (length >= nelt_mode);
31914 /* Skip if it isn't profitable. */
31915 if (!arm_block_set_vect_profit_p (length, align, mode))
31916 return false;
31918 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31920 v = sext_hwi (v, BITS_PER_WORD);
31921 val_elt = GEN_INT (v);
31922 for (j = 0; j < nelt_mode; j++)
31923 rval[j] = val_elt;
31925 reg = gen_reg_rtx (mode);
31926 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31927 /* Emit instruction loading the constant value. */
31928 emit_move_insn (reg, val_vec);
31930 i = 0;
31931 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31932 if (mode == V16QImode)
31934 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31935 emit_insn (gen_movmisalignv16qi (mem, reg));
31936 i += nelt_mode;
31937 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31938 if (i + nelt_v8 < length && i + nelt_v16 > length)
31940 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31941 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31942 /* We are shifting bytes back, set the alignment accordingly. */
31943 if ((length & 0x3) == 0)
31944 set_mem_align (mem, BITS_PER_UNIT * 4);
31945 else if ((length & 0x1) == 0)
31946 set_mem_align (mem, BITS_PER_UNIT * 2);
31947 else
31948 set_mem_align (mem, BITS_PER_UNIT);
31950 emit_insn (gen_movmisalignv16qi (mem, reg));
31951 return true;
31953 /* Fall through for bytes leftover. */
31954 mode = V8QImode;
31955 nelt_mode = GET_MODE_NUNITS (mode);
31956 reg = gen_lowpart (V8QImode, reg);
31959 /* Handle 8 bytes in a vector. */
31960 for (; (i + nelt_mode <= length); i += nelt_mode)
31962 addr = plus_constant (Pmode, dst, i);
31963 mem = adjust_automodify_address (dstbase, mode, addr, i);
31964 emit_move_insn (mem, reg);
31967 /* Handle single word leftover by shifting 4 bytes back. We can
31968 use aligned access for this case. */
31969 if (i + UNITS_PER_WORD == length)
31971 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31972 mem = adjust_automodify_address (dstbase, mode,
31973 addr, i - UNITS_PER_WORD);
31974 /* We are shifting 4 bytes back, set the alignment accordingly. */
31975 if (align > UNITS_PER_WORD)
31976 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31978 emit_move_insn (mem, reg);
31980 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31981 We have to use unaligned access for this case. */
31982 else if (i < length)
31984 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31985 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31986 /* We are shifting bytes back, set the alignment accordingly. */
31987 if ((length & 1) == 0)
31988 set_mem_align (mem, BITS_PER_UNIT * 2);
31989 else
31990 set_mem_align (mem, BITS_PER_UNIT);
31992 emit_insn (gen_movmisalignv8qi (mem, reg));
31995 return true;
31998 /* Set a block of memory using plain strh/strb instructions, only
31999 using instructions allowed by ALIGN on processor. We fill the
32000 first LENGTH bytes of the memory area starting from DSTBASE
32001 with byte constant VALUE. ALIGN is the alignment requirement
32002 of memory. */
32003 static bool
32004 arm_block_set_unaligned_non_vect (rtx dstbase,
32005 unsigned HOST_WIDE_INT length,
32006 unsigned HOST_WIDE_INT value,
32007 unsigned HOST_WIDE_INT align)
32009 unsigned int i;
32010 rtx dst, addr, mem;
32011 rtx val_exp, val_reg, reg;
32012 machine_mode mode;
32013 HOST_WIDE_INT v = value;
32015 gcc_assert (align == 1 || align == 2);
32017 if (align == 2)
32018 v |= (value << BITS_PER_UNIT);
32020 v = sext_hwi (v, BITS_PER_WORD);
32021 val_exp = GEN_INT (v);
32022 /* Skip if it isn't profitable. */
32023 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32024 align, true, false))
32025 return false;
32027 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32028 mode = (align == 2 ? HImode : QImode);
32029 val_reg = force_reg (SImode, val_exp);
32030 reg = gen_lowpart (mode, val_reg);
32032 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32034 addr = plus_constant (Pmode, dst, i);
32035 mem = adjust_automodify_address (dstbase, mode, addr, i);
32036 emit_move_insn (mem, reg);
32039 /* Handle single byte leftover. */
32040 if (i + 1 == length)
32042 reg = gen_lowpart (QImode, val_reg);
32043 addr = plus_constant (Pmode, dst, i);
32044 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32045 emit_move_insn (mem, reg);
32046 i++;
32049 gcc_assert (i == length);
32050 return true;
32053 /* Set a block of memory using plain strd/str/strh/strb instructions,
32054 to permit unaligned copies on processors which support unaligned
32055 semantics for those instructions. We fill the first LENGTH bytes
32056 of the memory area starting from DSTBASE with byte constant VALUE.
32057 ALIGN is the alignment requirement of memory. */
32058 static bool
32059 arm_block_set_aligned_non_vect (rtx dstbase,
32060 unsigned HOST_WIDE_INT length,
32061 unsigned HOST_WIDE_INT value,
32062 unsigned HOST_WIDE_INT align)
32064 unsigned int i;
32065 rtx dst, addr, mem;
32066 rtx val_exp, val_reg, reg;
32067 unsigned HOST_WIDE_INT v;
32068 bool use_strd_p;
32070 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32071 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32073 v = (value | (value << 8) | (value << 16) | (value << 24));
32074 if (length < UNITS_PER_WORD)
32075 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32077 if (use_strd_p)
32078 v |= (v << BITS_PER_WORD);
32079 else
32080 v = sext_hwi (v, BITS_PER_WORD);
32082 val_exp = GEN_INT (v);
32083 /* Skip if it isn't profitable. */
32084 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32085 align, false, use_strd_p))
32087 if (!use_strd_p)
32088 return false;
32090 /* Try without strd. */
32091 v = (v >> BITS_PER_WORD);
32092 v = sext_hwi (v, BITS_PER_WORD);
32093 val_exp = GEN_INT (v);
32094 use_strd_p = false;
32095 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32096 align, false, use_strd_p))
32097 return false;
32100 i = 0;
32101 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32102 /* Handle double words using strd if possible. */
32103 if (use_strd_p)
32105 val_reg = force_reg (DImode, val_exp);
32106 reg = val_reg;
32107 for (; (i + 8 <= length); i += 8)
32109 addr = plus_constant (Pmode, dst, i);
32110 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32111 emit_move_insn (mem, reg);
32114 else
32115 val_reg = force_reg (SImode, val_exp);
32117 /* Handle words. */
32118 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32119 for (; (i + 4 <= length); i += 4)
32121 addr = plus_constant (Pmode, dst, i);
32122 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32123 if ((align & 3) == 0)
32124 emit_move_insn (mem, reg);
32125 else
32126 emit_insn (gen_unaligned_storesi (mem, reg));
32129 /* Merge last pair of STRH and STRB into a STR if possible. */
32130 if (unaligned_access && i > 0 && (i + 3) == length)
32132 addr = plus_constant (Pmode, dst, i - 1);
32133 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32134 /* We are shifting one byte back, set the alignment accordingly. */
32135 if ((align & 1) == 0)
32136 set_mem_align (mem, BITS_PER_UNIT);
32138 /* Most likely this is an unaligned access, and we can't tell at
32139 compilation time. */
32140 emit_insn (gen_unaligned_storesi (mem, reg));
32141 return true;
32144 /* Handle half word leftover. */
32145 if (i + 2 <= length)
32147 reg = gen_lowpart (HImode, val_reg);
32148 addr = plus_constant (Pmode, dst, i);
32149 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32150 if ((align & 1) == 0)
32151 emit_move_insn (mem, reg);
32152 else
32153 emit_insn (gen_unaligned_storehi (mem, reg));
32155 i += 2;
32158 /* Handle single byte leftover. */
32159 if (i + 1 == length)
32161 reg = gen_lowpart (QImode, val_reg);
32162 addr = plus_constant (Pmode, dst, i);
32163 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32164 emit_move_insn (mem, reg);
32167 return true;
32170 /* Set a block of memory using vectorization instructions for both
32171 aligned and unaligned cases. We fill the first LENGTH bytes of
32172 the memory area starting from DSTBASE with byte constant VALUE.
32173 ALIGN is the alignment requirement of memory. */
32174 static bool
32175 arm_block_set_vect (rtx dstbase,
32176 unsigned HOST_WIDE_INT length,
32177 unsigned HOST_WIDE_INT value,
32178 unsigned HOST_WIDE_INT align)
32180 /* Check whether we need to use unaligned store instruction. */
32181 if (((align & 3) != 0 || (length & 3) != 0)
32182 /* Check whether unaligned store instruction is available. */
32183 && (!unaligned_access || BYTES_BIG_ENDIAN))
32184 return false;
32186 if ((align & 3) == 0)
32187 return arm_block_set_aligned_vect (dstbase, length, value, align);
32188 else
32189 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32192 /* Expand string store operation. Firstly we try to do that by using
32193 vectorization instructions, then try with ARM unaligned access and
32194 double-word store if profitable. OPERANDS[0] is the destination,
32195 OPERANDS[1] is the number of bytes, operands[2] is the value to
32196 initialize the memory, OPERANDS[3] is the known alignment of the
32197 destination. */
32198 bool
32199 arm_gen_setmem (rtx *operands)
32201 rtx dstbase = operands[0];
32202 unsigned HOST_WIDE_INT length;
32203 unsigned HOST_WIDE_INT value;
32204 unsigned HOST_WIDE_INT align;
32206 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32207 return false;
32209 length = UINTVAL (operands[1]);
32210 if (length > 64)
32211 return false;
32213 value = (UINTVAL (operands[2]) & 0xFF);
32214 align = UINTVAL (operands[3]);
32215 if (TARGET_NEON && length >= 8
32216 && current_tune->string_ops_prefer_neon
32217 && arm_block_set_vect (dstbase, length, value, align))
32218 return true;
32220 if (!unaligned_access && (align & 3) != 0)
32221 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32223 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32226 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32228 static unsigned HOST_WIDE_INT
32229 arm_asan_shadow_offset (void)
32231 return (unsigned HOST_WIDE_INT) 1 << 29;
32235 /* This is a temporary fix for PR60655. Ideally we need
32236 to handle most of these cases in the generic part but
32237 currently we reject minus (..) (sym_ref). We try to
32238 ameliorate the case with minus (sym_ref1) (sym_ref2)
32239 where they are in the same section. */
32241 static bool
32242 arm_const_not_ok_for_debug_p (rtx p)
32244 tree decl_op0 = NULL;
32245 tree decl_op1 = NULL;
32247 if (GET_CODE (p) == MINUS)
32249 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32251 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32252 if (decl_op1
32253 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32254 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32256 if ((TREE_CODE (decl_op1) == VAR_DECL
32257 || TREE_CODE (decl_op1) == CONST_DECL)
32258 && (TREE_CODE (decl_op0) == VAR_DECL
32259 || TREE_CODE (decl_op0) == CONST_DECL))
32260 return (get_variable_section (decl_op1, false)
32261 != get_variable_section (decl_op0, false));
32263 if (TREE_CODE (decl_op1) == LABEL_DECL
32264 && TREE_CODE (decl_op0) == LABEL_DECL)
32265 return (DECL_CONTEXT (decl_op1)
32266 != DECL_CONTEXT (decl_op0));
32269 return true;
32273 return false;
32276 static void
32277 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32279 const unsigned ARM_FE_INVALID = 1;
32280 const unsigned ARM_FE_DIVBYZERO = 2;
32281 const unsigned ARM_FE_OVERFLOW = 4;
32282 const unsigned ARM_FE_UNDERFLOW = 8;
32283 const unsigned ARM_FE_INEXACT = 16;
32284 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32285 | ARM_FE_DIVBYZERO
32286 | ARM_FE_OVERFLOW
32287 | ARM_FE_UNDERFLOW
32288 | ARM_FE_INEXACT);
32289 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32290 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32291 tree new_fenv_var, reload_fenv, restore_fnenv;
32292 tree update_call, atomic_feraiseexcept, hold_fnclex;
32294 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32295 return;
32297 /* Generate the equivalent of :
32298 unsigned int fenv_var;
32299 fenv_var = __builtin_arm_get_fpscr ();
32301 unsigned int masked_fenv;
32302 masked_fenv = fenv_var & mask;
32304 __builtin_arm_set_fpscr (masked_fenv); */
32306 fenv_var = create_tmp_var (unsigned_type_node, NULL);
32307 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32308 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32309 mask = build_int_cst (unsigned_type_node,
32310 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32311 | ARM_FE_ALL_EXCEPT));
32312 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32313 fenv_var, build_call_expr (get_fpscr, 0));
32314 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32315 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32316 *hold = build2 (COMPOUND_EXPR, void_type_node,
32317 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32318 hold_fnclex);
32320 /* Store the value of masked_fenv to clear the exceptions:
32321 __builtin_arm_set_fpscr (masked_fenv); */
32323 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32325 /* Generate the equivalent of :
32326 unsigned int new_fenv_var;
32327 new_fenv_var = __builtin_arm_get_fpscr ();
32329 __builtin_arm_set_fpscr (fenv_var);
32331 __atomic_feraiseexcept (new_fenv_var); */
32333 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32334 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32335 build_call_expr (get_fpscr, 0));
32336 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32337 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32338 update_call = build_call_expr (atomic_feraiseexcept, 1,
32339 fold_convert (integer_type_node, new_fenv_var));
32340 *update = build2 (COMPOUND_EXPR, void_type_node,
32341 build2 (COMPOUND_EXPR, void_type_node,
32342 reload_fenv, restore_fnenv), update_call);
32345 /* return TRUE if x is a reference to a value in a constant pool */
32346 extern bool
32347 arm_is_constant_pool_ref (rtx x)
32349 return (MEM_P (x)
32350 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32351 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32354 #include "gt-arm.h"