gcc/
[official-gcc.git] / gcc / config / arm / arm.c
blob3f2ddd4adb3bbf7c26bbdc6eb06c9ecf015398be
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
91 void (*arm_lang_output_object_attributes_hook)(void);
93 struct four_ints
95 int i[4];
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
121 static void arm_print_operand (FILE *, rtx, int);
122 static void arm_print_operand_address (FILE *, rtx);
123 static bool arm_print_operand_punct_valid_p (unsigned char code);
124 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
125 static arm_cc get_arm_condition_code (rtx);
126 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
127 static const char *output_multi_immediate (rtx *, const char *, const char *,
128 int, HOST_WIDE_INT);
129 static const char *shift_op (rtx, HOST_WIDE_INT *);
130 static struct machine_function *arm_init_machine_status (void);
131 static void thumb_exit (FILE *, int);
132 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
133 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
134 static Mnode *add_minipool_forward_ref (Mfix *);
135 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
136 static Mnode *add_minipool_backward_ref (Mfix *);
137 static void assign_minipool_offsets (Mfix *);
138 static void arm_print_value (FILE *, rtx);
139 static void dump_minipool (rtx_insn *);
140 static int arm_barrier_cost (rtx);
141 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
142 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
143 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
144 machine_mode, rtx);
145 static void arm_reorg (void);
146 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
148 static unsigned long arm_compute_save_reg_mask (void);
149 static unsigned long arm_isr_value (tree);
150 static unsigned long arm_compute_func_type (void);
151 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
153 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
156 #endif
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
159 static int arm_comp_type_attributes (const_tree, const_tree);
160 static void arm_set_default_type_attributes (tree);
161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence);
166 static int optimal_immediate_sequence_1 (enum rtx_code code,
167 unsigned HOST_WIDE_INT val,
168 struct four_ints *return_sequence,
169 int i);
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree, tree);
172 static machine_mode arm_promote_function_mode (const_tree,
173 machine_mode, int *,
174 const_tree, int);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 static rtx arm_function_value (const_tree, const_tree, bool);
177 static rtx arm_libcall_value_1 (machine_mode);
178 static rtx arm_libcall_value (machine_mode, const_rtx);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
182 tree);
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
185 static bool arm_legitimate_constant_p (machine_mode, rtx);
186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
196 static void arm_init_builtins (void);
197 static void arm_init_iwmmxt_builtins (void);
198 static rtx safe_vector_operand (rtx, machine_mode);
199 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
200 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
201 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
202 static tree arm_builtin_decl (unsigned, bool);
203 static void emit_constant_insn (rtx cond, rtx pattern);
204 static rtx_insn *emit_set_insn (rtx, rtx);
205 static rtx emit_multi_reg_push (unsigned long, unsigned long);
206 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
207 tree, bool);
208 static rtx arm_function_arg (cumulative_args_t, machine_mode,
209 const_tree, bool);
210 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
211 const_tree, bool);
212 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
213 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
214 const_tree);
215 static rtx aapcs_libcall_value (machine_mode);
216 static int aapcs_select_return_coproc (const_tree, const_tree);
218 #ifdef OBJECT_FORMAT_ELF
219 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
220 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
221 #endif
222 #ifndef ARM_PE
223 static void arm_encode_section_info (tree, rtx, int);
224 #endif
226 static void arm_file_end (void);
227 static void arm_file_start (void);
229 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
230 tree, int *, int);
231 static bool arm_pass_by_reference (cumulative_args_t,
232 machine_mode, const_tree, bool);
233 static bool arm_promote_prototypes (const_tree);
234 static bool arm_default_short_enums (void);
235 static bool arm_align_anon_bitfield (void);
236 static bool arm_return_in_msb (const_tree);
237 static bool arm_must_pass_in_stack (machine_mode, const_tree);
238 static bool arm_return_in_memory (const_tree, const_tree);
239 #if ARM_UNWIND_INFO
240 static void arm_unwind_emit (FILE *, rtx_insn *);
241 static bool arm_output_ttype (rtx);
242 static void arm_asm_emit_except_personality (rtx);
243 static void arm_asm_init_sections (void);
244 #endif
245 static rtx arm_dwarf_register_span (rtx);
247 static tree arm_cxx_guard_type (void);
248 static bool arm_cxx_guard_mask_bit (void);
249 static tree arm_get_cookie_size (tree);
250 static bool arm_cookie_has_size (void);
251 static bool arm_cxx_cdtor_returns_this (void);
252 static bool arm_cxx_key_method_may_be_inline (void);
253 static void arm_cxx_determine_class_data_visibility (tree);
254 static bool arm_cxx_class_data_always_comdat (void);
255 static bool arm_cxx_use_aeabi_atexit (void);
256 static void arm_init_libfuncs (void);
257 static tree arm_build_builtin_va_list (void);
258 static void arm_expand_builtin_va_start (tree, rtx);
259 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static void arm_option_override (void);
261 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
262 static bool arm_cannot_copy_insn_p (rtx_insn *);
263 static int arm_issue_rate (void);
264 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
265 static bool arm_output_addr_const_extra (FILE *, rtx);
266 static bool arm_allocate_stack_slots_for_args (void);
267 static bool arm_warn_func_return (tree);
268 static const char *arm_invalid_parameter_type (const_tree t);
269 static const char *arm_invalid_return_type (const_tree t);
270 static tree arm_promoted_type (const_tree t);
271 static tree arm_convert_to_type (tree type, tree expr);
272 static bool arm_scalar_mode_supported_p (machine_mode);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx, tree, rtx);
277 static rtx arm_trampoline_adjust_address (rtx);
278 static rtx arm_pic_static_addr (rtx orig, rtx reg);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (machine_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
294 static unsigned int arm_autovectorize_vector_sizes (void);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
300 const unsigned char *sel);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
315 /* Table of machine attributes. */
316 static const struct attribute_spec arm_attribute_table[] =
318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
319 affects_type_identity } */
320 /* Function calls made to this symbol must be done indirectly, because
321 it may lie outside of the 26 bit addressing range of a normal function
322 call. */
323 { "long_call", 0, 0, false, true, true, NULL, false },
324 /* Whereas these functions are always known to reside within the 26 bit
325 addressing range. */
326 { "short_call", 0, 0, false, true, true, NULL, false },
327 /* Specify the procedure call conventions for a function. */
328 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
329 false },
330 /* Interrupt Service Routines have special prologue and epilogue requirements. */
331 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
332 false },
333 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
334 false },
335 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
336 false },
337 #ifdef ARM_PE
338 /* ARM/PE has three new attributes:
339 interfacearm - ?
340 dllexport - for exporting a function/variable that will live in a dll
341 dllimport - for importing a function/variable from a dll
343 Microsoft allows multiple declspecs in one __declspec, separating
344 them with spaces. We do NOT support this. Instead, use __declspec
345 multiple times.
347 { "dllimport", 0, 0, true, false, false, NULL, false },
348 { "dllexport", 0, 0, true, false, false, NULL, false },
349 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
354 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
355 false },
356 #endif
357 { NULL, 0, 0, false, false, false, NULL, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
364 #endif
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
369 #undef TARGET_LRA_P
370 #define TARGET_LRA_P arm_lra_p
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
401 #undef TARGET_OPTION_OVERRIDE
402 #define TARGET_OPTION_OVERRIDE arm_option_override
404 #undef TARGET_COMP_TYPE_ATTRIBUTES
405 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
407 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
408 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
410 #undef TARGET_SCHED_ADJUST_COST
411 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
413 #undef TARGET_SCHED_REORDER
414 #define TARGET_SCHED_REORDER arm_sched_reorder
416 #undef TARGET_REGISTER_MOVE_COST
417 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
419 #undef TARGET_MEMORY_MOVE_COST
420 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
422 #undef TARGET_ENCODE_SECTION_INFO
423 #ifdef ARM_PE
424 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
425 #else
426 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
427 #endif
429 #undef TARGET_STRIP_NAME_ENCODING
430 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
432 #undef TARGET_ASM_INTERNAL_LABEL
433 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
436 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
438 #undef TARGET_FUNCTION_VALUE
439 #define TARGET_FUNCTION_VALUE arm_function_value
441 #undef TARGET_LIBCALL_VALUE
442 #define TARGET_LIBCALL_VALUE arm_libcall_value
444 #undef TARGET_FUNCTION_VALUE_REGNO_P
445 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
447 #undef TARGET_ASM_OUTPUT_MI_THUNK
448 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
452 #undef TARGET_RTX_COSTS
453 #define TARGET_RTX_COSTS arm_rtx_costs
454 #undef TARGET_ADDRESS_COST
455 #define TARGET_ADDRESS_COST arm_address_cost
457 #undef TARGET_SHIFT_TRUNCATION_MASK
458 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
459 #undef TARGET_VECTOR_MODE_SUPPORTED_P
460 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
461 #undef TARGET_ARRAY_MODE_SUPPORTED_P
462 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
463 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
464 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
465 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
466 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
467 arm_autovectorize_vector_sizes
469 #undef TARGET_MACHINE_DEPENDENT_REORG
470 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
472 #undef TARGET_INIT_BUILTINS
473 #define TARGET_INIT_BUILTINS arm_init_builtins
474 #undef TARGET_EXPAND_BUILTIN
475 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
476 #undef TARGET_BUILTIN_DECL
477 #define TARGET_BUILTIN_DECL arm_builtin_decl
479 #undef TARGET_INIT_LIBFUNCS
480 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
482 #undef TARGET_PROMOTE_FUNCTION_MODE
483 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
484 #undef TARGET_PROMOTE_PROTOTYPES
485 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
486 #undef TARGET_PASS_BY_REFERENCE
487 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
488 #undef TARGET_ARG_PARTIAL_BYTES
489 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
490 #undef TARGET_FUNCTION_ARG
491 #define TARGET_FUNCTION_ARG arm_function_arg
492 #undef TARGET_FUNCTION_ARG_ADVANCE
493 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
494 #undef TARGET_FUNCTION_ARG_BOUNDARY
495 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
497 #undef TARGET_SETUP_INCOMING_VARARGS
498 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
500 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
501 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
503 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
504 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
505 #undef TARGET_TRAMPOLINE_INIT
506 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
507 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
508 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
510 #undef TARGET_WARN_FUNC_RETURN
511 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
513 #undef TARGET_DEFAULT_SHORT_ENUMS
514 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
516 #undef TARGET_ALIGN_ANON_BITFIELD
517 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
519 #undef TARGET_NARROW_VOLATILE_BITFIELD
520 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
522 #undef TARGET_CXX_GUARD_TYPE
523 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
525 #undef TARGET_CXX_GUARD_MASK_BIT
526 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
528 #undef TARGET_CXX_GET_COOKIE_SIZE
529 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
531 #undef TARGET_CXX_COOKIE_HAS_SIZE
532 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
534 #undef TARGET_CXX_CDTOR_RETURNS_THIS
535 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
537 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
538 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
540 #undef TARGET_CXX_USE_AEABI_ATEXIT
541 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
543 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
544 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
545 arm_cxx_determine_class_data_visibility
547 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
548 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
550 #undef TARGET_RETURN_IN_MSB
551 #define TARGET_RETURN_IN_MSB arm_return_in_msb
553 #undef TARGET_RETURN_IN_MEMORY
554 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
556 #undef TARGET_MUST_PASS_IN_STACK
557 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
559 #if ARM_UNWIND_INFO
560 #undef TARGET_ASM_UNWIND_EMIT
561 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
563 /* EABI unwinding tables use a different format for the typeinfo tables. */
564 #undef TARGET_ASM_TTYPE
565 #define TARGET_ASM_TTYPE arm_output_ttype
567 #undef TARGET_ARM_EABI_UNWINDER
568 #define TARGET_ARM_EABI_UNWINDER true
570 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
571 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
573 #undef TARGET_ASM_INIT_SECTIONS
574 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
575 #endif /* ARM_UNWIND_INFO */
577 #undef TARGET_DWARF_REGISTER_SPAN
578 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
580 #undef TARGET_CANNOT_COPY_INSN_P
581 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
583 #ifdef HAVE_AS_TLS
584 #undef TARGET_HAVE_TLS
585 #define TARGET_HAVE_TLS true
586 #endif
588 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
589 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
591 #undef TARGET_LEGITIMATE_CONSTANT_P
592 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
594 #undef TARGET_CANNOT_FORCE_CONST_MEM
595 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
597 #undef TARGET_MAX_ANCHOR_OFFSET
598 #define TARGET_MAX_ANCHOR_OFFSET 4095
600 /* The minimum is set such that the total size of the block
601 for a particular anchor is -4088 + 1 + 4095 bytes, which is
602 divisible by eight, ensuring natural spacing of anchors. */
603 #undef TARGET_MIN_ANCHOR_OFFSET
604 #define TARGET_MIN_ANCHOR_OFFSET -4088
606 #undef TARGET_SCHED_ISSUE_RATE
607 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
609 #undef TARGET_MANGLE_TYPE
610 #define TARGET_MANGLE_TYPE arm_mangle_type
612 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
613 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
615 #undef TARGET_BUILD_BUILTIN_VA_LIST
616 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
617 #undef TARGET_EXPAND_BUILTIN_VA_START
618 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
619 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
620 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
622 #ifdef HAVE_AS_TLS
623 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
624 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
625 #endif
627 #undef TARGET_LEGITIMATE_ADDRESS_P
628 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
630 #undef TARGET_PREFERRED_RELOAD_CLASS
631 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
633 #undef TARGET_INVALID_PARAMETER_TYPE
634 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
636 #undef TARGET_INVALID_RETURN_TYPE
637 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
639 #undef TARGET_PROMOTED_TYPE
640 #define TARGET_PROMOTED_TYPE arm_promoted_type
642 #undef TARGET_CONVERT_TO_TYPE
643 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
645 #undef TARGET_SCALAR_MODE_SUPPORTED_P
646 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
648 #undef TARGET_FRAME_POINTER_REQUIRED
649 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
651 #undef TARGET_CAN_ELIMINATE
652 #define TARGET_CAN_ELIMINATE arm_can_eliminate
654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
655 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
657 #undef TARGET_CLASS_LIKELY_SPILLED_P
658 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
660 #undef TARGET_VECTORIZE_BUILTINS
661 #define TARGET_VECTORIZE_BUILTINS
663 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
664 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
665 arm_builtin_vectorized_function
667 #undef TARGET_VECTOR_ALIGNMENT
668 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
670 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
671 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
672 arm_vector_alignment_reachable
674 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
675 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
676 arm_builtin_support_vector_misalignment
678 #undef TARGET_PREFERRED_RENAME_CLASS
679 #define TARGET_PREFERRED_RENAME_CLASS \
680 arm_preferred_rename_class
682 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
683 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
684 arm_vectorize_vec_perm_const_ok
686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
688 arm_builtin_vectorization_cost
689 #undef TARGET_VECTORIZE_ADD_STMT_COST
690 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
692 #undef TARGET_CANONICALIZE_COMPARISON
693 #define TARGET_CANONICALIZE_COMPARISON \
694 arm_canonicalize_comparison
696 #undef TARGET_ASAN_SHADOW_OFFSET
697 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
699 #undef MAX_INSN_PER_IT_BLOCK
700 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
702 #undef TARGET_CAN_USE_DOLOOP_P
703 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
705 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
706 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
708 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
709 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
711 struct gcc_target targetm = TARGET_INITIALIZER;
713 /* Obstack for minipool constant handling. */
714 static struct obstack minipool_obstack;
715 static char * minipool_startobj;
717 /* The maximum number of insns skipped which
718 will be conditionalised if possible. */
719 static int max_insns_skipped = 5;
721 extern FILE * asm_out_file;
723 /* True if we are currently building a constant table. */
724 int making_const_table;
726 /* The processor for which instructions should be scheduled. */
727 enum processor_type arm_tune = arm_none;
729 /* The current tuning set. */
730 const struct tune_params *current_tune;
732 /* Which floating point hardware to schedule for. */
733 int arm_fpu_attr;
735 /* Which floating popint hardware to use. */
736 const struct arm_fpu_desc *arm_fpu_desc;
738 /* Used for Thumb call_via trampolines. */
739 rtx thumb_call_via_label[14];
740 static int thumb_call_reg_needed;
742 /* Bit values used to identify processor capabilities. */
743 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
744 #define FL_ARCH3M (1 << 1) /* Extended multiply */
745 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
746 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
747 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
748 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
749 #define FL_THUMB (1 << 6) /* Thumb aware */
750 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
751 #define FL_STRONG (1 << 8) /* StrongARM */
752 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
753 #define FL_XSCALE (1 << 10) /* XScale */
754 /* spare (1 << 11) */
755 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
756 media instructions. */
757 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
758 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
759 Note: ARM6 & 7 derivatives only. */
760 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
761 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
762 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
763 profile. */
764 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
765 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
766 #define FL_NEON (1 << 20) /* Neon instructions. */
767 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
768 architecture. */
769 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
770 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
771 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
772 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
774 #define FL_SMALLMUL (1 << 26) /* Small multiply supported. */
776 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
777 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
779 /* Flags that only effect tuning, not available instructions. */
780 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
781 | FL_CO_PROC)
783 #define FL_FOR_ARCH2 FL_NOTM
784 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
785 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
786 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
787 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
788 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
789 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
790 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
791 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
792 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
793 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
794 #define FL_FOR_ARCH6J FL_FOR_ARCH6
795 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
796 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
797 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
798 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
799 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
800 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
801 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
802 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
803 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
804 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
805 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
806 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
808 /* The bits in this mask specify which
809 instructions we are allowed to generate. */
810 static unsigned long insn_flags = 0;
812 /* The bits in this mask specify which instruction scheduling options should
813 be used. */
814 static unsigned long tune_flags = 0;
816 /* The highest ARM architecture version supported by the
817 target. */
818 enum base_architecture arm_base_arch = BASE_ARCH_0;
820 /* The following are used in the arm.md file as equivalents to bits
821 in the above two flag variables. */
823 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
824 int arm_arch3m = 0;
826 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
827 int arm_arch4 = 0;
829 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
830 int arm_arch4t = 0;
832 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
833 int arm_arch5 = 0;
835 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
836 int arm_arch5e = 0;
838 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
839 int arm_arch6 = 0;
841 /* Nonzero if this chip supports the ARM 6K extensions. */
842 int arm_arch6k = 0;
844 /* Nonzero if instructions present in ARMv6-M can be used. */
845 int arm_arch6m = 0;
847 /* Nonzero if this chip supports the ARM 7 extensions. */
848 int arm_arch7 = 0;
850 /* Nonzero if instructions not present in the 'M' profile can be used. */
851 int arm_arch_notm = 0;
853 /* Nonzero if instructions present in ARMv7E-M can be used. */
854 int arm_arch7em = 0;
856 /* Nonzero if instructions present in ARMv8 can be used. */
857 int arm_arch8 = 0;
859 /* Nonzero if this chip can benefit from load scheduling. */
860 int arm_ld_sched = 0;
862 /* Nonzero if this chip is a StrongARM. */
863 int arm_tune_strongarm = 0;
865 /* Nonzero if this chip supports Intel Wireless MMX technology. */
866 int arm_arch_iwmmxt = 0;
868 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
869 int arm_arch_iwmmxt2 = 0;
871 /* Nonzero if this chip is an XScale. */
872 int arm_arch_xscale = 0;
874 /* Nonzero if tuning for XScale */
875 int arm_tune_xscale = 0;
877 /* Nonzero if we want to tune for stores that access the write-buffer.
878 This typically means an ARM6 or ARM7 with MMU or MPU. */
879 int arm_tune_wbuf = 0;
881 /* Nonzero if tuning for Cortex-A9. */
882 int arm_tune_cortex_a9 = 0;
884 /* Nonzero if generating Thumb instructions. */
885 int thumb_code = 0;
887 /* Nonzero if generating Thumb-1 instructions. */
888 int thumb1_code = 0;
890 /* Nonzero if we should define __THUMB_INTERWORK__ in the
891 preprocessor.
892 XXX This is a bit of a hack, it's intended to help work around
893 problems in GLD which doesn't understand that armv5t code is
894 interworking clean. */
895 int arm_cpp_interwork = 0;
897 /* Nonzero if chip supports Thumb 2. */
898 int arm_arch_thumb2;
900 /* Nonzero if chip supports integer division instruction. */
901 int arm_arch_arm_hwdiv;
902 int arm_arch_thumb_hwdiv;
904 /* Nonzero if we should use Neon to handle 64-bits operations rather
905 than core registers. */
906 int prefer_neon_for_64bits = 0;
908 /* Nonzero if we shouldn't use literal pools. */
909 bool arm_disable_literal_pool = false;
911 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
912 we must report the mode of the memory reference from
913 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
914 machine_mode output_memory_reference_mode;
916 /* The register number to be used for the PIC offset register. */
917 unsigned arm_pic_register = INVALID_REGNUM;
919 enum arm_pcs arm_pcs_default;
921 /* For an explanation of these variables, see final_prescan_insn below. */
922 int arm_ccfsm_state;
923 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
924 enum arm_cond_code arm_current_cc;
926 rtx arm_target_insn;
927 int arm_target_label;
928 /* The number of conditionally executed insns, including the current insn. */
929 int arm_condexec_count = 0;
930 /* A bitmask specifying the patterns for the IT block.
931 Zero means do not output an IT block before this insn. */
932 int arm_condexec_mask = 0;
933 /* The number of bits used in arm_condexec_mask. */
934 int arm_condexec_masklen = 0;
936 /* Nonzero if chip supports the ARMv8 CRC instructions. */
937 int arm_arch_crc = 0;
939 /* Nonzero if the core has a very small, high-latency, multiply unit. */
940 int arm_m_profile_small_mul = 0;
942 /* The condition codes of the ARM, and the inverse function. */
943 static const char * const arm_condition_codes[] =
945 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
946 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
949 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
950 int arm_regs_in_sequence[] =
952 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
955 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
956 #define streq(string1, string2) (strcmp (string1, string2) == 0)
958 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
959 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
960 | (1 << PIC_OFFSET_TABLE_REGNUM)))
962 /* Initialization code. */
964 struct processors
966 const char *const name;
967 enum processor_type core;
968 const char *arch;
969 enum base_architecture base_arch;
970 const unsigned long flags;
971 const struct tune_params *const tune;
975 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
976 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
977 prefetch_slots, \
978 l1_size, \
979 l1_line_size
981 /* arm generic vectorizer costs. */
982 static const
983 struct cpu_vec_costs arm_default_vec_cost = {
984 1, /* scalar_stmt_cost. */
985 1, /* scalar load_cost. */
986 1, /* scalar_store_cost. */
987 1, /* vec_stmt_cost. */
988 1, /* vec_to_scalar_cost. */
989 1, /* scalar_to_vec_cost. */
990 1, /* vec_align_load_cost. */
991 1, /* vec_unalign_load_cost. */
992 1, /* vec_unalign_store_cost. */
993 1, /* vec_store_cost. */
994 3, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
999 #include "aarch-cost-tables.h"
1003 const struct cpu_cost_table cortexa9_extra_costs =
1005 /* ALU */
1007 0, /* arith. */
1008 0, /* logical. */
1009 0, /* shift. */
1010 COSTS_N_INSNS (1), /* shift_reg. */
1011 COSTS_N_INSNS (1), /* arith_shift. */
1012 COSTS_N_INSNS (2), /* arith_shift_reg. */
1013 0, /* log_shift. */
1014 COSTS_N_INSNS (1), /* log_shift_reg. */
1015 COSTS_N_INSNS (1), /* extend. */
1016 COSTS_N_INSNS (2), /* extend_arith. */
1017 COSTS_N_INSNS (1), /* bfi. */
1018 COSTS_N_INSNS (1), /* bfx. */
1019 0, /* clz. */
1020 0, /* rev. */
1021 0, /* non_exec. */
1022 true /* non_exec_costs_exec. */
1025 /* MULT SImode */
1027 COSTS_N_INSNS (3), /* simple. */
1028 COSTS_N_INSNS (3), /* flag_setting. */
1029 COSTS_N_INSNS (2), /* extend. */
1030 COSTS_N_INSNS (3), /* add. */
1031 COSTS_N_INSNS (2), /* extend_add. */
1032 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1034 /* MULT DImode */
1036 0, /* simple (N/A). */
1037 0, /* flag_setting (N/A). */
1038 COSTS_N_INSNS (4), /* extend. */
1039 0, /* add (N/A). */
1040 COSTS_N_INSNS (4), /* extend_add. */
1041 0 /* idiv (N/A). */
1044 /* LD/ST */
1046 COSTS_N_INSNS (2), /* load. */
1047 COSTS_N_INSNS (2), /* load_sign_extend. */
1048 COSTS_N_INSNS (2), /* ldrd. */
1049 COSTS_N_INSNS (2), /* ldm_1st. */
1050 1, /* ldm_regs_per_insn_1st. */
1051 2, /* ldm_regs_per_insn_subsequent. */
1052 COSTS_N_INSNS (5), /* loadf. */
1053 COSTS_N_INSNS (5), /* loadd. */
1054 COSTS_N_INSNS (1), /* load_unaligned. */
1055 COSTS_N_INSNS (2), /* store. */
1056 COSTS_N_INSNS (2), /* strd. */
1057 COSTS_N_INSNS (2), /* stm_1st. */
1058 1, /* stm_regs_per_insn_1st. */
1059 2, /* stm_regs_per_insn_subsequent. */
1060 COSTS_N_INSNS (1), /* storef. */
1061 COSTS_N_INSNS (1), /* stored. */
1062 COSTS_N_INSNS (1) /* store_unaligned. */
1065 /* FP SFmode */
1067 COSTS_N_INSNS (14), /* div. */
1068 COSTS_N_INSNS (4), /* mult. */
1069 COSTS_N_INSNS (7), /* mult_addsub. */
1070 COSTS_N_INSNS (30), /* fma. */
1071 COSTS_N_INSNS (3), /* addsub. */
1072 COSTS_N_INSNS (1), /* fpconst. */
1073 COSTS_N_INSNS (1), /* neg. */
1074 COSTS_N_INSNS (3), /* compare. */
1075 COSTS_N_INSNS (3), /* widen. */
1076 COSTS_N_INSNS (3), /* narrow. */
1077 COSTS_N_INSNS (3), /* toint. */
1078 COSTS_N_INSNS (3), /* fromint. */
1079 COSTS_N_INSNS (3) /* roundint. */
1081 /* FP DFmode */
1083 COSTS_N_INSNS (24), /* div. */
1084 COSTS_N_INSNS (5), /* mult. */
1085 COSTS_N_INSNS (8), /* mult_addsub. */
1086 COSTS_N_INSNS (30), /* fma. */
1087 COSTS_N_INSNS (3), /* addsub. */
1088 COSTS_N_INSNS (1), /* fpconst. */
1089 COSTS_N_INSNS (1), /* neg. */
1090 COSTS_N_INSNS (3), /* compare. */
1091 COSTS_N_INSNS (3), /* widen. */
1092 COSTS_N_INSNS (3), /* narrow. */
1093 COSTS_N_INSNS (3), /* toint. */
1094 COSTS_N_INSNS (3), /* fromint. */
1095 COSTS_N_INSNS (3) /* roundint. */
1098 /* Vector */
1100 COSTS_N_INSNS (1) /* alu. */
1104 const struct cpu_cost_table cortexa8_extra_costs =
1106 /* ALU */
1108 0, /* arith. */
1109 0, /* logical. */
1110 COSTS_N_INSNS (1), /* shift. */
1111 0, /* shift_reg. */
1112 COSTS_N_INSNS (1), /* arith_shift. */
1113 0, /* arith_shift_reg. */
1114 COSTS_N_INSNS (1), /* log_shift. */
1115 0, /* log_shift_reg. */
1116 0, /* extend. */
1117 0, /* extend_arith. */
1118 0, /* bfi. */
1119 0, /* bfx. */
1120 0, /* clz. */
1121 0, /* rev. */
1122 0, /* non_exec. */
1123 true /* non_exec_costs_exec. */
1126 /* MULT SImode */
1128 COSTS_N_INSNS (1), /* simple. */
1129 COSTS_N_INSNS (1), /* flag_setting. */
1130 COSTS_N_INSNS (1), /* extend. */
1131 COSTS_N_INSNS (1), /* add. */
1132 COSTS_N_INSNS (1), /* extend_add. */
1133 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1135 /* MULT DImode */
1137 0, /* simple (N/A). */
1138 0, /* flag_setting (N/A). */
1139 COSTS_N_INSNS (2), /* extend. */
1140 0, /* add (N/A). */
1141 COSTS_N_INSNS (2), /* extend_add. */
1142 0 /* idiv (N/A). */
1145 /* LD/ST */
1147 COSTS_N_INSNS (1), /* load. */
1148 COSTS_N_INSNS (1), /* load_sign_extend. */
1149 COSTS_N_INSNS (1), /* ldrd. */
1150 COSTS_N_INSNS (1), /* ldm_1st. */
1151 1, /* ldm_regs_per_insn_1st. */
1152 2, /* ldm_regs_per_insn_subsequent. */
1153 COSTS_N_INSNS (1), /* loadf. */
1154 COSTS_N_INSNS (1), /* loadd. */
1155 COSTS_N_INSNS (1), /* load_unaligned. */
1156 COSTS_N_INSNS (1), /* store. */
1157 COSTS_N_INSNS (1), /* strd. */
1158 COSTS_N_INSNS (1), /* stm_1st. */
1159 1, /* stm_regs_per_insn_1st. */
1160 2, /* stm_regs_per_insn_subsequent. */
1161 COSTS_N_INSNS (1), /* storef. */
1162 COSTS_N_INSNS (1), /* stored. */
1163 COSTS_N_INSNS (1) /* store_unaligned. */
1166 /* FP SFmode */
1168 COSTS_N_INSNS (36), /* div. */
1169 COSTS_N_INSNS (11), /* mult. */
1170 COSTS_N_INSNS (20), /* mult_addsub. */
1171 COSTS_N_INSNS (30), /* fma. */
1172 COSTS_N_INSNS (9), /* addsub. */
1173 COSTS_N_INSNS (3), /* fpconst. */
1174 COSTS_N_INSNS (3), /* neg. */
1175 COSTS_N_INSNS (6), /* compare. */
1176 COSTS_N_INSNS (4), /* widen. */
1177 COSTS_N_INSNS (4), /* narrow. */
1178 COSTS_N_INSNS (8), /* toint. */
1179 COSTS_N_INSNS (8), /* fromint. */
1180 COSTS_N_INSNS (8) /* roundint. */
1182 /* FP DFmode */
1184 COSTS_N_INSNS (64), /* div. */
1185 COSTS_N_INSNS (16), /* mult. */
1186 COSTS_N_INSNS (25), /* mult_addsub. */
1187 COSTS_N_INSNS (30), /* fma. */
1188 COSTS_N_INSNS (9), /* addsub. */
1189 COSTS_N_INSNS (3), /* fpconst. */
1190 COSTS_N_INSNS (3), /* neg. */
1191 COSTS_N_INSNS (6), /* compare. */
1192 COSTS_N_INSNS (6), /* widen. */
1193 COSTS_N_INSNS (6), /* narrow. */
1194 COSTS_N_INSNS (8), /* toint. */
1195 COSTS_N_INSNS (8), /* fromint. */
1196 COSTS_N_INSNS (8) /* roundint. */
1199 /* Vector */
1201 COSTS_N_INSNS (1) /* alu. */
1205 const struct cpu_cost_table cortexa5_extra_costs =
1207 /* ALU */
1209 0, /* arith. */
1210 0, /* logical. */
1211 COSTS_N_INSNS (1), /* shift. */
1212 COSTS_N_INSNS (1), /* shift_reg. */
1213 COSTS_N_INSNS (1), /* arith_shift. */
1214 COSTS_N_INSNS (1), /* arith_shift_reg. */
1215 COSTS_N_INSNS (1), /* log_shift. */
1216 COSTS_N_INSNS (1), /* log_shift_reg. */
1217 COSTS_N_INSNS (1), /* extend. */
1218 COSTS_N_INSNS (1), /* extend_arith. */
1219 COSTS_N_INSNS (1), /* bfi. */
1220 COSTS_N_INSNS (1), /* bfx. */
1221 COSTS_N_INSNS (1), /* clz. */
1222 COSTS_N_INSNS (1), /* rev. */
1223 0, /* non_exec. */
1224 true /* non_exec_costs_exec. */
1228 /* MULT SImode */
1230 0, /* simple. */
1231 COSTS_N_INSNS (1), /* flag_setting. */
1232 COSTS_N_INSNS (1), /* extend. */
1233 COSTS_N_INSNS (1), /* add. */
1234 COSTS_N_INSNS (1), /* extend_add. */
1235 COSTS_N_INSNS (7) /* idiv. */
1237 /* MULT DImode */
1239 0, /* simple (N/A). */
1240 0, /* flag_setting (N/A). */
1241 COSTS_N_INSNS (1), /* extend. */
1242 0, /* add. */
1243 COSTS_N_INSNS (2), /* extend_add. */
1244 0 /* idiv (N/A). */
1247 /* LD/ST */
1249 COSTS_N_INSNS (1), /* load. */
1250 COSTS_N_INSNS (1), /* load_sign_extend. */
1251 COSTS_N_INSNS (6), /* ldrd. */
1252 COSTS_N_INSNS (1), /* ldm_1st. */
1253 1, /* ldm_regs_per_insn_1st. */
1254 2, /* ldm_regs_per_insn_subsequent. */
1255 COSTS_N_INSNS (2), /* loadf. */
1256 COSTS_N_INSNS (4), /* loadd. */
1257 COSTS_N_INSNS (1), /* load_unaligned. */
1258 COSTS_N_INSNS (1), /* store. */
1259 COSTS_N_INSNS (3), /* strd. */
1260 COSTS_N_INSNS (1), /* stm_1st. */
1261 1, /* stm_regs_per_insn_1st. */
1262 2, /* stm_regs_per_insn_subsequent. */
1263 COSTS_N_INSNS (2), /* storef. */
1264 COSTS_N_INSNS (2), /* stored. */
1265 COSTS_N_INSNS (1) /* store_unaligned. */
1268 /* FP SFmode */
1270 COSTS_N_INSNS (15), /* div. */
1271 COSTS_N_INSNS (3), /* mult. */
1272 COSTS_N_INSNS (7), /* mult_addsub. */
1273 COSTS_N_INSNS (7), /* fma. */
1274 COSTS_N_INSNS (3), /* addsub. */
1275 COSTS_N_INSNS (3), /* fpconst. */
1276 COSTS_N_INSNS (3), /* neg. */
1277 COSTS_N_INSNS (3), /* compare. */
1278 COSTS_N_INSNS (3), /* widen. */
1279 COSTS_N_INSNS (3), /* narrow. */
1280 COSTS_N_INSNS (3), /* toint. */
1281 COSTS_N_INSNS (3), /* fromint. */
1282 COSTS_N_INSNS (3) /* roundint. */
1284 /* FP DFmode */
1286 COSTS_N_INSNS (30), /* div. */
1287 COSTS_N_INSNS (6), /* mult. */
1288 COSTS_N_INSNS (10), /* mult_addsub. */
1289 COSTS_N_INSNS (7), /* fma. */
1290 COSTS_N_INSNS (3), /* addsub. */
1291 COSTS_N_INSNS (3), /* fpconst. */
1292 COSTS_N_INSNS (3), /* neg. */
1293 COSTS_N_INSNS (3), /* compare. */
1294 COSTS_N_INSNS (3), /* widen. */
1295 COSTS_N_INSNS (3), /* narrow. */
1296 COSTS_N_INSNS (3), /* toint. */
1297 COSTS_N_INSNS (3), /* fromint. */
1298 COSTS_N_INSNS (3) /* roundint. */
1301 /* Vector */
1303 COSTS_N_INSNS (1) /* alu. */
1308 const struct cpu_cost_table cortexa7_extra_costs =
1310 /* ALU */
1312 0, /* arith. */
1313 0, /* logical. */
1314 COSTS_N_INSNS (1), /* shift. */
1315 COSTS_N_INSNS (1), /* shift_reg. */
1316 COSTS_N_INSNS (1), /* arith_shift. */
1317 COSTS_N_INSNS (1), /* arith_shift_reg. */
1318 COSTS_N_INSNS (1), /* log_shift. */
1319 COSTS_N_INSNS (1), /* log_shift_reg. */
1320 COSTS_N_INSNS (1), /* extend. */
1321 COSTS_N_INSNS (1), /* extend_arith. */
1322 COSTS_N_INSNS (1), /* bfi. */
1323 COSTS_N_INSNS (1), /* bfx. */
1324 COSTS_N_INSNS (1), /* clz. */
1325 COSTS_N_INSNS (1), /* rev. */
1326 0, /* non_exec. */
1327 true /* non_exec_costs_exec. */
1331 /* MULT SImode */
1333 0, /* simple. */
1334 COSTS_N_INSNS (1), /* flag_setting. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* add. */
1337 COSTS_N_INSNS (1), /* extend_add. */
1338 COSTS_N_INSNS (7) /* idiv. */
1340 /* MULT DImode */
1342 0, /* simple (N/A). */
1343 0, /* flag_setting (N/A). */
1344 COSTS_N_INSNS (1), /* extend. */
1345 0, /* add. */
1346 COSTS_N_INSNS (2), /* extend_add. */
1347 0 /* idiv (N/A). */
1350 /* LD/ST */
1352 COSTS_N_INSNS (1), /* load. */
1353 COSTS_N_INSNS (1), /* load_sign_extend. */
1354 COSTS_N_INSNS (3), /* ldrd. */
1355 COSTS_N_INSNS (1), /* ldm_1st. */
1356 1, /* ldm_regs_per_insn_1st. */
1357 2, /* ldm_regs_per_insn_subsequent. */
1358 COSTS_N_INSNS (2), /* loadf. */
1359 COSTS_N_INSNS (2), /* loadd. */
1360 COSTS_N_INSNS (1), /* load_unaligned. */
1361 COSTS_N_INSNS (1), /* store. */
1362 COSTS_N_INSNS (3), /* strd. */
1363 COSTS_N_INSNS (1), /* stm_1st. */
1364 1, /* stm_regs_per_insn_1st. */
1365 2, /* stm_regs_per_insn_subsequent. */
1366 COSTS_N_INSNS (2), /* storef. */
1367 COSTS_N_INSNS (2), /* stored. */
1368 COSTS_N_INSNS (1) /* store_unaligned. */
1371 /* FP SFmode */
1373 COSTS_N_INSNS (15), /* div. */
1374 COSTS_N_INSNS (3), /* mult. */
1375 COSTS_N_INSNS (7), /* mult_addsub. */
1376 COSTS_N_INSNS (7), /* fma. */
1377 COSTS_N_INSNS (3), /* addsub. */
1378 COSTS_N_INSNS (3), /* fpconst. */
1379 COSTS_N_INSNS (3), /* neg. */
1380 COSTS_N_INSNS (3), /* compare. */
1381 COSTS_N_INSNS (3), /* widen. */
1382 COSTS_N_INSNS (3), /* narrow. */
1383 COSTS_N_INSNS (3), /* toint. */
1384 COSTS_N_INSNS (3), /* fromint. */
1385 COSTS_N_INSNS (3) /* roundint. */
1387 /* FP DFmode */
1389 COSTS_N_INSNS (30), /* div. */
1390 COSTS_N_INSNS (6), /* mult. */
1391 COSTS_N_INSNS (10), /* mult_addsub. */
1392 COSTS_N_INSNS (7), /* fma. */
1393 COSTS_N_INSNS (3), /* addsub. */
1394 COSTS_N_INSNS (3), /* fpconst. */
1395 COSTS_N_INSNS (3), /* neg. */
1396 COSTS_N_INSNS (3), /* compare. */
1397 COSTS_N_INSNS (3), /* widen. */
1398 COSTS_N_INSNS (3), /* narrow. */
1399 COSTS_N_INSNS (3), /* toint. */
1400 COSTS_N_INSNS (3), /* fromint. */
1401 COSTS_N_INSNS (3) /* roundint. */
1404 /* Vector */
1406 COSTS_N_INSNS (1) /* alu. */
1410 const struct cpu_cost_table cortexa12_extra_costs =
1412 /* ALU */
1414 0, /* arith. */
1415 0, /* logical. */
1416 0, /* shift. */
1417 COSTS_N_INSNS (1), /* shift_reg. */
1418 COSTS_N_INSNS (1), /* arith_shift. */
1419 COSTS_N_INSNS (1), /* arith_shift_reg. */
1420 COSTS_N_INSNS (1), /* log_shift. */
1421 COSTS_N_INSNS (1), /* log_shift_reg. */
1422 0, /* extend. */
1423 COSTS_N_INSNS (1), /* extend_arith. */
1424 0, /* bfi. */
1425 COSTS_N_INSNS (1), /* bfx. */
1426 COSTS_N_INSNS (1), /* clz. */
1427 COSTS_N_INSNS (1), /* rev. */
1428 0, /* non_exec. */
1429 true /* non_exec_costs_exec. */
1431 /* MULT SImode */
1434 COSTS_N_INSNS (2), /* simple. */
1435 COSTS_N_INSNS (3), /* flag_setting. */
1436 COSTS_N_INSNS (2), /* extend. */
1437 COSTS_N_INSNS (3), /* add. */
1438 COSTS_N_INSNS (2), /* extend_add. */
1439 COSTS_N_INSNS (18) /* idiv. */
1441 /* MULT DImode */
1443 0, /* simple (N/A). */
1444 0, /* flag_setting (N/A). */
1445 COSTS_N_INSNS (3), /* extend. */
1446 0, /* add (N/A). */
1447 COSTS_N_INSNS (3), /* extend_add. */
1448 0 /* idiv (N/A). */
1451 /* LD/ST */
1453 COSTS_N_INSNS (3), /* load. */
1454 COSTS_N_INSNS (3), /* load_sign_extend. */
1455 COSTS_N_INSNS (3), /* ldrd. */
1456 COSTS_N_INSNS (3), /* ldm_1st. */
1457 1, /* ldm_regs_per_insn_1st. */
1458 2, /* ldm_regs_per_insn_subsequent. */
1459 COSTS_N_INSNS (3), /* loadf. */
1460 COSTS_N_INSNS (3), /* loadd. */
1461 0, /* load_unaligned. */
1462 0, /* store. */
1463 0, /* strd. */
1464 0, /* stm_1st. */
1465 1, /* stm_regs_per_insn_1st. */
1466 2, /* stm_regs_per_insn_subsequent. */
1467 COSTS_N_INSNS (2), /* storef. */
1468 COSTS_N_INSNS (2), /* stored. */
1469 0 /* store_unaligned. */
1472 /* FP SFmode */
1474 COSTS_N_INSNS (17), /* div. */
1475 COSTS_N_INSNS (4), /* mult. */
1476 COSTS_N_INSNS (8), /* mult_addsub. */
1477 COSTS_N_INSNS (8), /* fma. */
1478 COSTS_N_INSNS (4), /* addsub. */
1479 COSTS_N_INSNS (2), /* fpconst. */
1480 COSTS_N_INSNS (2), /* neg. */
1481 COSTS_N_INSNS (2), /* compare. */
1482 COSTS_N_INSNS (4), /* widen. */
1483 COSTS_N_INSNS (4), /* narrow. */
1484 COSTS_N_INSNS (4), /* toint. */
1485 COSTS_N_INSNS (4), /* fromint. */
1486 COSTS_N_INSNS (4) /* roundint. */
1488 /* FP DFmode */
1490 COSTS_N_INSNS (31), /* div. */
1491 COSTS_N_INSNS (4), /* mult. */
1492 COSTS_N_INSNS (8), /* mult_addsub. */
1493 COSTS_N_INSNS (8), /* fma. */
1494 COSTS_N_INSNS (4), /* addsub. */
1495 COSTS_N_INSNS (2), /* fpconst. */
1496 COSTS_N_INSNS (2), /* neg. */
1497 COSTS_N_INSNS (2), /* compare. */
1498 COSTS_N_INSNS (4), /* widen. */
1499 COSTS_N_INSNS (4), /* narrow. */
1500 COSTS_N_INSNS (4), /* toint. */
1501 COSTS_N_INSNS (4), /* fromint. */
1502 COSTS_N_INSNS (4) /* roundint. */
1505 /* Vector */
1507 COSTS_N_INSNS (1) /* alu. */
1511 const struct cpu_cost_table cortexa15_extra_costs =
1513 /* ALU */
1515 0, /* arith. */
1516 0, /* logical. */
1517 0, /* shift. */
1518 0, /* shift_reg. */
1519 COSTS_N_INSNS (1), /* arith_shift. */
1520 COSTS_N_INSNS (1), /* arith_shift_reg. */
1521 COSTS_N_INSNS (1), /* log_shift. */
1522 COSTS_N_INSNS (1), /* log_shift_reg. */
1523 0, /* extend. */
1524 COSTS_N_INSNS (1), /* extend_arith. */
1525 COSTS_N_INSNS (1), /* bfi. */
1526 0, /* bfx. */
1527 0, /* clz. */
1528 0, /* rev. */
1529 0, /* non_exec. */
1530 true /* non_exec_costs_exec. */
1532 /* MULT SImode */
1535 COSTS_N_INSNS (2), /* simple. */
1536 COSTS_N_INSNS (3), /* flag_setting. */
1537 COSTS_N_INSNS (2), /* extend. */
1538 COSTS_N_INSNS (2), /* add. */
1539 COSTS_N_INSNS (2), /* extend_add. */
1540 COSTS_N_INSNS (18) /* idiv. */
1542 /* MULT DImode */
1544 0, /* simple (N/A). */
1545 0, /* flag_setting (N/A). */
1546 COSTS_N_INSNS (3), /* extend. */
1547 0, /* add (N/A). */
1548 COSTS_N_INSNS (3), /* extend_add. */
1549 0 /* idiv (N/A). */
1552 /* LD/ST */
1554 COSTS_N_INSNS (3), /* load. */
1555 COSTS_N_INSNS (3), /* load_sign_extend. */
1556 COSTS_N_INSNS (3), /* ldrd. */
1557 COSTS_N_INSNS (4), /* ldm_1st. */
1558 1, /* ldm_regs_per_insn_1st. */
1559 2, /* ldm_regs_per_insn_subsequent. */
1560 COSTS_N_INSNS (4), /* loadf. */
1561 COSTS_N_INSNS (4), /* loadd. */
1562 0, /* load_unaligned. */
1563 0, /* store. */
1564 0, /* strd. */
1565 COSTS_N_INSNS (1), /* stm_1st. */
1566 1, /* stm_regs_per_insn_1st. */
1567 2, /* stm_regs_per_insn_subsequent. */
1568 0, /* storef. */
1569 0, /* stored. */
1570 0 /* store_unaligned. */
1573 /* FP SFmode */
1575 COSTS_N_INSNS (17), /* div. */
1576 COSTS_N_INSNS (4), /* mult. */
1577 COSTS_N_INSNS (8), /* mult_addsub. */
1578 COSTS_N_INSNS (8), /* fma. */
1579 COSTS_N_INSNS (4), /* addsub. */
1580 COSTS_N_INSNS (2), /* fpconst. */
1581 COSTS_N_INSNS (2), /* neg. */
1582 COSTS_N_INSNS (5), /* compare. */
1583 COSTS_N_INSNS (4), /* widen. */
1584 COSTS_N_INSNS (4), /* narrow. */
1585 COSTS_N_INSNS (4), /* toint. */
1586 COSTS_N_INSNS (4), /* fromint. */
1587 COSTS_N_INSNS (4) /* roundint. */
1589 /* FP DFmode */
1591 COSTS_N_INSNS (31), /* div. */
1592 COSTS_N_INSNS (4), /* mult. */
1593 COSTS_N_INSNS (8), /* mult_addsub. */
1594 COSTS_N_INSNS (8), /* fma. */
1595 COSTS_N_INSNS (4), /* addsub. */
1596 COSTS_N_INSNS (2), /* fpconst. */
1597 COSTS_N_INSNS (2), /* neg. */
1598 COSTS_N_INSNS (2), /* compare. */
1599 COSTS_N_INSNS (4), /* widen. */
1600 COSTS_N_INSNS (4), /* narrow. */
1601 COSTS_N_INSNS (4), /* toint. */
1602 COSTS_N_INSNS (4), /* fromint. */
1603 COSTS_N_INSNS (4) /* roundint. */
1606 /* Vector */
1608 COSTS_N_INSNS (1) /* alu. */
1612 const struct cpu_cost_table v7m_extra_costs =
1614 /* ALU */
1616 0, /* arith. */
1617 0, /* logical. */
1618 0, /* shift. */
1619 0, /* shift_reg. */
1620 0, /* arith_shift. */
1621 COSTS_N_INSNS (1), /* arith_shift_reg. */
1622 0, /* log_shift. */
1623 COSTS_N_INSNS (1), /* log_shift_reg. */
1624 0, /* extend. */
1625 COSTS_N_INSNS (1), /* extend_arith. */
1626 0, /* bfi. */
1627 0, /* bfx. */
1628 0, /* clz. */
1629 0, /* rev. */
1630 COSTS_N_INSNS (1), /* non_exec. */
1631 false /* non_exec_costs_exec. */
1634 /* MULT SImode */
1636 COSTS_N_INSNS (1), /* simple. */
1637 COSTS_N_INSNS (1), /* flag_setting. */
1638 COSTS_N_INSNS (2), /* extend. */
1639 COSTS_N_INSNS (1), /* add. */
1640 COSTS_N_INSNS (3), /* extend_add. */
1641 COSTS_N_INSNS (8) /* idiv. */
1643 /* MULT DImode */
1645 0, /* simple (N/A). */
1646 0, /* flag_setting (N/A). */
1647 COSTS_N_INSNS (2), /* extend. */
1648 0, /* add (N/A). */
1649 COSTS_N_INSNS (3), /* extend_add. */
1650 0 /* idiv (N/A). */
1653 /* LD/ST */
1655 COSTS_N_INSNS (2), /* load. */
1656 0, /* load_sign_extend. */
1657 COSTS_N_INSNS (3), /* ldrd. */
1658 COSTS_N_INSNS (2), /* ldm_1st. */
1659 1, /* ldm_regs_per_insn_1st. */
1660 1, /* ldm_regs_per_insn_subsequent. */
1661 COSTS_N_INSNS (2), /* loadf. */
1662 COSTS_N_INSNS (3), /* loadd. */
1663 COSTS_N_INSNS (1), /* load_unaligned. */
1664 COSTS_N_INSNS (2), /* store. */
1665 COSTS_N_INSNS (3), /* strd. */
1666 COSTS_N_INSNS (2), /* stm_1st. */
1667 1, /* stm_regs_per_insn_1st. */
1668 1, /* stm_regs_per_insn_subsequent. */
1669 COSTS_N_INSNS (2), /* storef. */
1670 COSTS_N_INSNS (3), /* stored. */
1671 COSTS_N_INSNS (1) /* store_unaligned. */
1674 /* FP SFmode */
1676 COSTS_N_INSNS (7), /* div. */
1677 COSTS_N_INSNS (2), /* mult. */
1678 COSTS_N_INSNS (5), /* mult_addsub. */
1679 COSTS_N_INSNS (3), /* fma. */
1680 COSTS_N_INSNS (1), /* addsub. */
1681 0, /* fpconst. */
1682 0, /* neg. */
1683 0, /* compare. */
1684 0, /* widen. */
1685 0, /* narrow. */
1686 0, /* toint. */
1687 0, /* fromint. */
1688 0 /* roundint. */
1690 /* FP DFmode */
1692 COSTS_N_INSNS (15), /* div. */
1693 COSTS_N_INSNS (5), /* mult. */
1694 COSTS_N_INSNS (7), /* mult_addsub. */
1695 COSTS_N_INSNS (7), /* fma. */
1696 COSTS_N_INSNS (3), /* addsub. */
1697 0, /* fpconst. */
1698 0, /* neg. */
1699 0, /* compare. */
1700 0, /* widen. */
1701 0, /* narrow. */
1702 0, /* toint. */
1703 0, /* fromint. */
1704 0 /* roundint. */
1707 /* Vector */
1709 COSTS_N_INSNS (1) /* alu. */
1713 const struct tune_params arm_slowmul_tune =
1715 arm_slowmul_rtx_costs,
1716 NULL,
1717 NULL, /* Sched adj cost. */
1718 3, /* Constant limit. */
1719 5, /* Max cond insns. */
1720 ARM_PREFETCH_NOT_BENEFICIAL,
1721 true, /* Prefer constant pool. */
1722 arm_default_branch_cost,
1723 false, /* Prefer LDRD/STRD. */
1724 {true, true}, /* Prefer non short circuit. */
1725 &arm_default_vec_cost, /* Vectorizer costs. */
1726 false, /* Prefer Neon for 64-bits bitops. */
1727 false, false, /* Prefer 32-bit encodings. */
1728 false, /* Prefer Neon for stringops. */
1729 8 /* Maximum insns to inline memset. */
1732 const struct tune_params arm_fastmul_tune =
1734 arm_fastmul_rtx_costs,
1735 NULL,
1736 NULL, /* Sched adj cost. */
1737 1, /* Constant limit. */
1738 5, /* Max cond insns. */
1739 ARM_PREFETCH_NOT_BENEFICIAL,
1740 true, /* Prefer constant pool. */
1741 arm_default_branch_cost,
1742 false, /* Prefer LDRD/STRD. */
1743 {true, true}, /* Prefer non short circuit. */
1744 &arm_default_vec_cost, /* Vectorizer costs. */
1745 false, /* Prefer Neon for 64-bits bitops. */
1746 false, false, /* Prefer 32-bit encodings. */
1747 false, /* Prefer Neon for stringops. */
1748 8 /* Maximum insns to inline memset. */
1751 /* StrongARM has early execution of branches, so a sequence that is worth
1752 skipping is shorter. Set max_insns_skipped to a lower value. */
1754 const struct tune_params arm_strongarm_tune =
1756 arm_fastmul_rtx_costs,
1757 NULL,
1758 NULL, /* Sched adj cost. */
1759 1, /* Constant limit. */
1760 3, /* Max cond insns. */
1761 ARM_PREFETCH_NOT_BENEFICIAL,
1762 true, /* Prefer constant pool. */
1763 arm_default_branch_cost,
1764 false, /* Prefer LDRD/STRD. */
1765 {true, true}, /* Prefer non short circuit. */
1766 &arm_default_vec_cost, /* Vectorizer costs. */
1767 false, /* Prefer Neon for 64-bits bitops. */
1768 false, false, /* Prefer 32-bit encodings. */
1769 false, /* Prefer Neon for stringops. */
1770 8 /* Maximum insns to inline memset. */
1773 const struct tune_params arm_xscale_tune =
1775 arm_xscale_rtx_costs,
1776 NULL,
1777 xscale_sched_adjust_cost,
1778 2, /* Constant limit. */
1779 3, /* Max cond insns. */
1780 ARM_PREFETCH_NOT_BENEFICIAL,
1781 true, /* Prefer constant pool. */
1782 arm_default_branch_cost,
1783 false, /* Prefer LDRD/STRD. */
1784 {true, true}, /* Prefer non short circuit. */
1785 &arm_default_vec_cost, /* Vectorizer costs. */
1786 false, /* Prefer Neon for 64-bits bitops. */
1787 false, false, /* Prefer 32-bit encodings. */
1788 false, /* Prefer Neon for stringops. */
1789 8 /* Maximum insns to inline memset. */
1792 const struct tune_params arm_9e_tune =
1794 arm_9e_rtx_costs,
1795 NULL,
1796 NULL, /* Sched adj cost. */
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 ARM_PREFETCH_NOT_BENEFICIAL,
1800 true, /* Prefer constant pool. */
1801 arm_default_branch_cost,
1802 false, /* Prefer LDRD/STRD. */
1803 {true, true}, /* Prefer non short circuit. */
1804 &arm_default_vec_cost, /* Vectorizer costs. */
1805 false, /* Prefer Neon for 64-bits bitops. */
1806 false, false, /* Prefer 32-bit encodings. */
1807 false, /* Prefer Neon for stringops. */
1808 8 /* Maximum insns to inline memset. */
1811 const struct tune_params arm_v6t2_tune =
1813 arm_9e_rtx_costs,
1814 NULL,
1815 NULL, /* Sched adj cost. */
1816 1, /* Constant limit. */
1817 5, /* Max cond insns. */
1818 ARM_PREFETCH_NOT_BENEFICIAL,
1819 false, /* Prefer constant pool. */
1820 arm_default_branch_cost,
1821 false, /* Prefer LDRD/STRD. */
1822 {true, true}, /* Prefer non short circuit. */
1823 &arm_default_vec_cost, /* Vectorizer costs. */
1824 false, /* Prefer Neon for 64-bits bitops. */
1825 false, false, /* Prefer 32-bit encodings. */
1826 false, /* Prefer Neon for stringops. */
1827 8 /* Maximum insns to inline memset. */
1830 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1831 const struct tune_params arm_cortex_tune =
1833 arm_9e_rtx_costs,
1834 &generic_extra_costs,
1835 NULL, /* Sched adj cost. */
1836 1, /* Constant limit. */
1837 5, /* Max cond insns. */
1838 ARM_PREFETCH_NOT_BENEFICIAL,
1839 false, /* Prefer constant pool. */
1840 arm_default_branch_cost,
1841 false, /* Prefer LDRD/STRD. */
1842 {true, true}, /* Prefer non short circuit. */
1843 &arm_default_vec_cost, /* Vectorizer costs. */
1844 false, /* Prefer Neon for 64-bits bitops. */
1845 false, false, /* Prefer 32-bit encodings. */
1846 false, /* Prefer Neon for stringops. */
1847 8 /* Maximum insns to inline memset. */
1850 const struct tune_params arm_cortex_a8_tune =
1852 arm_9e_rtx_costs,
1853 &cortexa8_extra_costs,
1854 NULL, /* Sched adj cost. */
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 ARM_PREFETCH_NOT_BENEFICIAL,
1858 false, /* Prefer constant pool. */
1859 arm_default_branch_cost,
1860 false, /* Prefer LDRD/STRD. */
1861 {true, true}, /* Prefer non short circuit. */
1862 &arm_default_vec_cost, /* Vectorizer costs. */
1863 false, /* Prefer Neon for 64-bits bitops. */
1864 false, false, /* Prefer 32-bit encodings. */
1865 true, /* Prefer Neon for stringops. */
1866 8 /* Maximum insns to inline memset. */
1869 const struct tune_params arm_cortex_a7_tune =
1871 arm_9e_rtx_costs,
1872 &cortexa7_extra_costs,
1873 NULL,
1874 1, /* Constant limit. */
1875 5, /* Max cond insns. */
1876 ARM_PREFETCH_NOT_BENEFICIAL,
1877 false, /* Prefer constant pool. */
1878 arm_default_branch_cost,
1879 false, /* Prefer LDRD/STRD. */
1880 {true, true}, /* Prefer non short circuit. */
1881 &arm_default_vec_cost, /* Vectorizer costs. */
1882 false, /* Prefer Neon for 64-bits bitops. */
1883 false, false, /* Prefer 32-bit encodings. */
1884 true, /* Prefer Neon for stringops. */
1885 8 /* Maximum insns to inline memset. */
1888 const struct tune_params arm_cortex_a15_tune =
1890 arm_9e_rtx_costs,
1891 &cortexa15_extra_costs,
1892 NULL, /* Sched adj cost. */
1893 1, /* Constant limit. */
1894 2, /* Max cond insns. */
1895 ARM_PREFETCH_NOT_BENEFICIAL,
1896 false, /* Prefer constant pool. */
1897 arm_default_branch_cost,
1898 true, /* Prefer LDRD/STRD. */
1899 {true, true}, /* Prefer non short circuit. */
1900 &arm_default_vec_cost, /* Vectorizer costs. */
1901 false, /* Prefer Neon for 64-bits bitops. */
1902 true, true, /* Prefer 32-bit encodings. */
1903 true, /* Prefer Neon for stringops. */
1904 8 /* Maximum insns to inline memset. */
1907 const struct tune_params arm_cortex_a53_tune =
1909 arm_9e_rtx_costs,
1910 &cortexa53_extra_costs,
1911 NULL, /* Scheduler cost adjustment. */
1912 1, /* Constant limit. */
1913 5, /* Max cond insns. */
1914 ARM_PREFETCH_NOT_BENEFICIAL,
1915 false, /* Prefer constant pool. */
1916 arm_default_branch_cost,
1917 false, /* Prefer LDRD/STRD. */
1918 {true, true}, /* Prefer non short circuit. */
1919 &arm_default_vec_cost, /* Vectorizer costs. */
1920 false, /* Prefer Neon for 64-bits bitops. */
1921 false, false, /* Prefer 32-bit encodings. */
1922 false, /* Prefer Neon for stringops. */
1923 8 /* Maximum insns to inline memset. */
1926 const struct tune_params arm_cortex_a57_tune =
1928 arm_9e_rtx_costs,
1929 &cortexa57_extra_costs,
1930 NULL, /* Scheduler cost adjustment. */
1931 1, /* Constant limit. */
1932 2, /* Max cond insns. */
1933 ARM_PREFETCH_NOT_BENEFICIAL,
1934 false, /* Prefer constant pool. */
1935 arm_default_branch_cost,
1936 true, /* Prefer LDRD/STRD. */
1937 {true, true}, /* Prefer non short circuit. */
1938 &arm_default_vec_cost, /* Vectorizer costs. */
1939 false, /* Prefer Neon for 64-bits bitops. */
1940 true, true, /* Prefer 32-bit encodings. */
1941 false, /* Prefer Neon for stringops. */
1942 8 /* Maximum insns to inline memset. */
1945 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1946 less appealing. Set max_insns_skipped to a low value. */
1948 const struct tune_params arm_cortex_a5_tune =
1950 arm_9e_rtx_costs,
1951 &cortexa5_extra_costs,
1952 NULL, /* Sched adj cost. */
1953 1, /* Constant limit. */
1954 1, /* Max cond insns. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 false, /* Prefer constant pool. */
1957 arm_cortex_a5_branch_cost,
1958 false, /* Prefer LDRD/STRD. */
1959 {false, false}, /* Prefer non short circuit. */
1960 &arm_default_vec_cost, /* Vectorizer costs. */
1961 false, /* Prefer Neon for 64-bits bitops. */
1962 false, false, /* Prefer 32-bit encodings. */
1963 true, /* Prefer Neon for stringops. */
1964 8 /* Maximum insns to inline memset. */
1967 const struct tune_params arm_cortex_a9_tune =
1969 arm_9e_rtx_costs,
1970 &cortexa9_extra_costs,
1971 cortex_a9_sched_adjust_cost,
1972 1, /* Constant limit. */
1973 5, /* Max cond insns. */
1974 ARM_PREFETCH_BENEFICIAL(4,32,32),
1975 false, /* Prefer constant pool. */
1976 arm_default_branch_cost,
1977 false, /* Prefer LDRD/STRD. */
1978 {true, true}, /* Prefer non short circuit. */
1979 &arm_default_vec_cost, /* Vectorizer costs. */
1980 false, /* Prefer Neon for 64-bits bitops. */
1981 false, false, /* Prefer 32-bit encodings. */
1982 false, /* Prefer Neon for stringops. */
1983 8 /* Maximum insns to inline memset. */
1986 const struct tune_params arm_cortex_a12_tune =
1988 arm_9e_rtx_costs,
1989 &cortexa12_extra_costs,
1990 NULL,
1991 1, /* Constant limit. */
1992 5, /* Max cond insns. */
1993 ARM_PREFETCH_BENEFICIAL(4,32,32),
1994 false, /* Prefer constant pool. */
1995 arm_default_branch_cost,
1996 true, /* Prefer LDRD/STRD. */
1997 {true, true}, /* Prefer non short circuit. */
1998 &arm_default_vec_cost, /* Vectorizer costs. */
1999 false, /* Prefer Neon for 64-bits bitops. */
2000 false, false, /* Prefer 32-bit encodings. */
2001 true, /* Prefer Neon for stringops. */
2002 8 /* Maximum insns to inline memset. */
2005 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2006 cycle to execute each. An LDR from the constant pool also takes two cycles
2007 to execute, but mildly increases pipelining opportunity (consecutive
2008 loads/stores can be pipelined together, saving one cycle), and may also
2009 improve icache utilisation. Hence we prefer the constant pool for such
2010 processors. */
2012 const struct tune_params arm_v7m_tune =
2014 arm_9e_rtx_costs,
2015 &v7m_extra_costs,
2016 NULL, /* Sched adj cost. */
2017 1, /* Constant limit. */
2018 2, /* Max cond insns. */
2019 ARM_PREFETCH_NOT_BENEFICIAL,
2020 true, /* Prefer constant pool. */
2021 arm_cortex_m_branch_cost,
2022 false, /* Prefer LDRD/STRD. */
2023 {false, false}, /* Prefer non short circuit. */
2024 &arm_default_vec_cost, /* Vectorizer costs. */
2025 false, /* Prefer Neon for 64-bits bitops. */
2026 false, false, /* Prefer 32-bit encodings. */
2027 false, /* Prefer Neon for stringops. */
2028 8 /* Maximum insns to inline memset. */
2031 /* Cortex-M7 tuning. */
2033 const struct tune_params arm_cortex_m7_tune =
2035 arm_9e_rtx_costs,
2036 &v7m_extra_costs,
2037 NULL, /* Sched adj cost. */
2038 0, /* Constant limit. */
2039 0, /* Max cond insns. */
2040 ARM_PREFETCH_NOT_BENEFICIAL,
2041 true, /* Prefer constant pool. */
2042 arm_cortex_m_branch_cost,
2043 false, /* Prefer LDRD/STRD. */
2044 {true, true}, /* Prefer non short circuit. */
2045 &arm_default_vec_cost, /* Vectorizer costs. */
2046 false, /* Prefer Neon for 64-bits bitops. */
2047 false, false, /* Prefer 32-bit encodings. */
2048 false, /* Prefer Neon for stringops. */
2049 8 /* Maximum insns to inline memset. */
2052 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2053 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2054 const struct tune_params arm_v6m_tune =
2056 arm_9e_rtx_costs,
2057 NULL,
2058 NULL, /* Sched adj cost. */
2059 1, /* Constant limit. */
2060 5, /* Max cond insns. */
2061 ARM_PREFETCH_NOT_BENEFICIAL,
2062 false, /* Prefer constant pool. */
2063 arm_default_branch_cost,
2064 false, /* Prefer LDRD/STRD. */
2065 {false, false}, /* Prefer non short circuit. */
2066 &arm_default_vec_cost, /* Vectorizer costs. */
2067 false, /* Prefer Neon for 64-bits bitops. */
2068 false, false, /* Prefer 32-bit encodings. */
2069 false, /* Prefer Neon for stringops. */
2070 8 /* Maximum insns to inline memset. */
2073 const struct tune_params arm_fa726te_tune =
2075 arm_9e_rtx_costs,
2076 NULL,
2077 fa726te_sched_adjust_cost,
2078 1, /* Constant limit. */
2079 5, /* Max cond insns. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 true, /* Prefer constant pool. */
2082 arm_default_branch_cost,
2083 false, /* Prefer LDRD/STRD. */
2084 {true, true}, /* Prefer non short circuit. */
2085 &arm_default_vec_cost, /* Vectorizer costs. */
2086 false, /* Prefer Neon for 64-bits bitops. */
2087 false, false, /* Prefer 32-bit encodings. */
2088 false, /* Prefer Neon for stringops. */
2089 8 /* Maximum insns to inline memset. */
2093 /* Not all of these give usefully different compilation alternatives,
2094 but there is no simple way of generalizing them. */
2095 static const struct processors all_cores[] =
2097 /* ARM Cores */
2098 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2099 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2100 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2101 #include "arm-cores.def"
2102 #undef ARM_CORE
2103 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2106 static const struct processors all_architectures[] =
2108 /* ARM Architectures */
2109 /* We don't specify tuning costs here as it will be figured out
2110 from the core. */
2112 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2113 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2114 #include "arm-arches.def"
2115 #undef ARM_ARCH
2116 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2120 /* These are populated as commandline arguments are processed, or NULL
2121 if not specified. */
2122 static const struct processors *arm_selected_arch;
2123 static const struct processors *arm_selected_cpu;
2124 static const struct processors *arm_selected_tune;
2126 /* The name of the preprocessor macro to define for this architecture. */
2128 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2130 /* Available values for -mfpu=. */
2132 static const struct arm_fpu_desc all_fpus[] =
2134 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2135 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2136 #include "arm-fpus.def"
2137 #undef ARM_FPU
2141 /* Supported TLS relocations. */
2143 enum tls_reloc {
2144 TLS_GD32,
2145 TLS_LDM32,
2146 TLS_LDO32,
2147 TLS_IE32,
2148 TLS_LE32,
2149 TLS_DESCSEQ /* GNU scheme */
2152 /* The maximum number of insns to be used when loading a constant. */
2153 inline static int
2154 arm_constant_limit (bool size_p)
2156 return size_p ? 1 : current_tune->constant_limit;
2159 /* Emit an insn that's a simple single-set. Both the operands must be known
2160 to be valid. */
2161 inline static rtx_insn *
2162 emit_set_insn (rtx x, rtx y)
2164 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2167 /* Return the number of bits set in VALUE. */
2168 static unsigned
2169 bit_count (unsigned long value)
2171 unsigned long count = 0;
2173 while (value)
2175 count++;
2176 value &= value - 1; /* Clear the least-significant set bit. */
2179 return count;
2182 typedef struct
2184 machine_mode mode;
2185 const char *name;
2186 } arm_fixed_mode_set;
2188 /* A small helper for setting fixed-point library libfuncs. */
2190 static void
2191 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2192 const char *funcname, const char *modename,
2193 int num_suffix)
2195 char buffer[50];
2197 if (num_suffix == 0)
2198 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2199 else
2200 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2202 set_optab_libfunc (optable, mode, buffer);
2205 static void
2206 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2207 machine_mode from, const char *funcname,
2208 const char *toname, const char *fromname)
2210 char buffer[50];
2211 const char *maybe_suffix_2 = "";
2213 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2214 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2215 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2216 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2217 maybe_suffix_2 = "2";
2219 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2220 maybe_suffix_2);
2222 set_conv_libfunc (optable, to, from, buffer);
2225 /* Set up library functions unique to ARM. */
2227 static void
2228 arm_init_libfuncs (void)
2230 /* For Linux, we have access to kernel support for atomic operations. */
2231 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2232 init_sync_libfuncs (2 * UNITS_PER_WORD);
2234 /* There are no special library functions unless we are using the
2235 ARM BPABI. */
2236 if (!TARGET_BPABI)
2237 return;
2239 /* The functions below are described in Section 4 of the "Run-Time
2240 ABI for the ARM architecture", Version 1.0. */
2242 /* Double-precision floating-point arithmetic. Table 2. */
2243 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2244 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2245 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2246 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2247 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2249 /* Double-precision comparisons. Table 3. */
2250 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2251 set_optab_libfunc (ne_optab, DFmode, NULL);
2252 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2253 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2254 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2255 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2256 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2258 /* Single-precision floating-point arithmetic. Table 4. */
2259 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2260 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2261 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2262 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2263 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2265 /* Single-precision comparisons. Table 5. */
2266 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2267 set_optab_libfunc (ne_optab, SFmode, NULL);
2268 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2269 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2270 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2271 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2272 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2274 /* Floating-point to integer conversions. Table 6. */
2275 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2276 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2277 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2278 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2279 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2280 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2281 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2282 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2284 /* Conversions between floating types. Table 7. */
2285 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2286 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2288 /* Integer to floating-point conversions. Table 8. */
2289 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2290 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2291 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2292 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2293 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2294 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2295 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2296 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2298 /* Long long. Table 9. */
2299 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2300 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2301 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2302 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2303 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2304 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2305 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2306 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2308 /* Integer (32/32->32) division. \S 4.3.1. */
2309 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2310 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2312 /* The divmod functions are designed so that they can be used for
2313 plain division, even though they return both the quotient and the
2314 remainder. The quotient is returned in the usual location (i.e.,
2315 r0 for SImode, {r0, r1} for DImode), just as would be expected
2316 for an ordinary division routine. Because the AAPCS calling
2317 conventions specify that all of { r0, r1, r2, r3 } are
2318 callee-saved registers, there is no need to tell the compiler
2319 explicitly that those registers are clobbered by these
2320 routines. */
2321 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2322 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2324 /* For SImode division the ABI provides div-without-mod routines,
2325 which are faster. */
2326 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2327 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2329 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2330 divmod libcalls instead. */
2331 set_optab_libfunc (smod_optab, DImode, NULL);
2332 set_optab_libfunc (umod_optab, DImode, NULL);
2333 set_optab_libfunc (smod_optab, SImode, NULL);
2334 set_optab_libfunc (umod_optab, SImode, NULL);
2336 /* Half-precision float operations. The compiler handles all operations
2337 with NULL libfuncs by converting the SFmode. */
2338 switch (arm_fp16_format)
2340 case ARM_FP16_FORMAT_IEEE:
2341 case ARM_FP16_FORMAT_ALTERNATIVE:
2343 /* Conversions. */
2344 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2345 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2346 ? "__gnu_f2h_ieee"
2347 : "__gnu_f2h_alternative"));
2348 set_conv_libfunc (sext_optab, SFmode, HFmode,
2349 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2350 ? "__gnu_h2f_ieee"
2351 : "__gnu_h2f_alternative"));
2353 /* Arithmetic. */
2354 set_optab_libfunc (add_optab, HFmode, NULL);
2355 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2356 set_optab_libfunc (smul_optab, HFmode, NULL);
2357 set_optab_libfunc (neg_optab, HFmode, NULL);
2358 set_optab_libfunc (sub_optab, HFmode, NULL);
2360 /* Comparisons. */
2361 set_optab_libfunc (eq_optab, HFmode, NULL);
2362 set_optab_libfunc (ne_optab, HFmode, NULL);
2363 set_optab_libfunc (lt_optab, HFmode, NULL);
2364 set_optab_libfunc (le_optab, HFmode, NULL);
2365 set_optab_libfunc (ge_optab, HFmode, NULL);
2366 set_optab_libfunc (gt_optab, HFmode, NULL);
2367 set_optab_libfunc (unord_optab, HFmode, NULL);
2368 break;
2370 default:
2371 break;
2374 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2376 const arm_fixed_mode_set fixed_arith_modes[] =
2378 { QQmode, "qq" },
2379 { UQQmode, "uqq" },
2380 { HQmode, "hq" },
2381 { UHQmode, "uhq" },
2382 { SQmode, "sq" },
2383 { USQmode, "usq" },
2384 { DQmode, "dq" },
2385 { UDQmode, "udq" },
2386 { TQmode, "tq" },
2387 { UTQmode, "utq" },
2388 { HAmode, "ha" },
2389 { UHAmode, "uha" },
2390 { SAmode, "sa" },
2391 { USAmode, "usa" },
2392 { DAmode, "da" },
2393 { UDAmode, "uda" },
2394 { TAmode, "ta" },
2395 { UTAmode, "uta" }
2397 const arm_fixed_mode_set fixed_conv_modes[] =
2399 { QQmode, "qq" },
2400 { UQQmode, "uqq" },
2401 { HQmode, "hq" },
2402 { UHQmode, "uhq" },
2403 { SQmode, "sq" },
2404 { USQmode, "usq" },
2405 { DQmode, "dq" },
2406 { UDQmode, "udq" },
2407 { TQmode, "tq" },
2408 { UTQmode, "utq" },
2409 { HAmode, "ha" },
2410 { UHAmode, "uha" },
2411 { SAmode, "sa" },
2412 { USAmode, "usa" },
2413 { DAmode, "da" },
2414 { UDAmode, "uda" },
2415 { TAmode, "ta" },
2416 { UTAmode, "uta" },
2417 { QImode, "qi" },
2418 { HImode, "hi" },
2419 { SImode, "si" },
2420 { DImode, "di" },
2421 { TImode, "ti" },
2422 { SFmode, "sf" },
2423 { DFmode, "df" }
2425 unsigned int i, j;
2427 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2429 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2430 "add", fixed_arith_modes[i].name, 3);
2431 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2432 "ssadd", fixed_arith_modes[i].name, 3);
2433 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2434 "usadd", fixed_arith_modes[i].name, 3);
2435 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2436 "sub", fixed_arith_modes[i].name, 3);
2437 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2438 "sssub", fixed_arith_modes[i].name, 3);
2439 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2440 "ussub", fixed_arith_modes[i].name, 3);
2441 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2442 "mul", fixed_arith_modes[i].name, 3);
2443 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2444 "ssmul", fixed_arith_modes[i].name, 3);
2445 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2446 "usmul", fixed_arith_modes[i].name, 3);
2447 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2448 "div", fixed_arith_modes[i].name, 3);
2449 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2450 "udiv", fixed_arith_modes[i].name, 3);
2451 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2452 "ssdiv", fixed_arith_modes[i].name, 3);
2453 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2454 "usdiv", fixed_arith_modes[i].name, 3);
2455 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2456 "neg", fixed_arith_modes[i].name, 2);
2457 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2458 "ssneg", fixed_arith_modes[i].name, 2);
2459 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2460 "usneg", fixed_arith_modes[i].name, 2);
2461 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2462 "ashl", fixed_arith_modes[i].name, 3);
2463 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2464 "ashr", fixed_arith_modes[i].name, 3);
2465 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2466 "lshr", fixed_arith_modes[i].name, 3);
2467 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2468 "ssashl", fixed_arith_modes[i].name, 3);
2469 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2470 "usashl", fixed_arith_modes[i].name, 3);
2471 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2472 "cmp", fixed_arith_modes[i].name, 2);
2475 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2476 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2478 if (i == j
2479 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2480 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2481 continue;
2483 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2484 fixed_conv_modes[j].mode, "fract",
2485 fixed_conv_modes[i].name,
2486 fixed_conv_modes[j].name);
2487 arm_set_fixed_conv_libfunc (satfract_optab,
2488 fixed_conv_modes[i].mode,
2489 fixed_conv_modes[j].mode, "satfract",
2490 fixed_conv_modes[i].name,
2491 fixed_conv_modes[j].name);
2492 arm_set_fixed_conv_libfunc (fractuns_optab,
2493 fixed_conv_modes[i].mode,
2494 fixed_conv_modes[j].mode, "fractuns",
2495 fixed_conv_modes[i].name,
2496 fixed_conv_modes[j].name);
2497 arm_set_fixed_conv_libfunc (satfractuns_optab,
2498 fixed_conv_modes[i].mode,
2499 fixed_conv_modes[j].mode, "satfractuns",
2500 fixed_conv_modes[i].name,
2501 fixed_conv_modes[j].name);
2505 if (TARGET_AAPCS_BASED)
2506 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2509 /* On AAPCS systems, this is the "struct __va_list". */
2510 static GTY(()) tree va_list_type;
2512 /* Return the type to use as __builtin_va_list. */
2513 static tree
2514 arm_build_builtin_va_list (void)
2516 tree va_list_name;
2517 tree ap_field;
2519 if (!TARGET_AAPCS_BASED)
2520 return std_build_builtin_va_list ();
2522 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2523 defined as:
2525 struct __va_list
2527 void *__ap;
2530 The C Library ABI further reinforces this definition in \S
2531 4.1.
2533 We must follow this definition exactly. The structure tag
2534 name is visible in C++ mangled names, and thus forms a part
2535 of the ABI. The field name may be used by people who
2536 #include <stdarg.h>. */
2537 /* Create the type. */
2538 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2539 /* Give it the required name. */
2540 va_list_name = build_decl (BUILTINS_LOCATION,
2541 TYPE_DECL,
2542 get_identifier ("__va_list"),
2543 va_list_type);
2544 DECL_ARTIFICIAL (va_list_name) = 1;
2545 TYPE_NAME (va_list_type) = va_list_name;
2546 TYPE_STUB_DECL (va_list_type) = va_list_name;
2547 /* Create the __ap field. */
2548 ap_field = build_decl (BUILTINS_LOCATION,
2549 FIELD_DECL,
2550 get_identifier ("__ap"),
2551 ptr_type_node);
2552 DECL_ARTIFICIAL (ap_field) = 1;
2553 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2554 TYPE_FIELDS (va_list_type) = ap_field;
2555 /* Compute its layout. */
2556 layout_type (va_list_type);
2558 return va_list_type;
2561 /* Return an expression of type "void *" pointing to the next
2562 available argument in a variable-argument list. VALIST is the
2563 user-level va_list object, of type __builtin_va_list. */
2564 static tree
2565 arm_extract_valist_ptr (tree valist)
2567 if (TREE_TYPE (valist) == error_mark_node)
2568 return error_mark_node;
2570 /* On an AAPCS target, the pointer is stored within "struct
2571 va_list". */
2572 if (TARGET_AAPCS_BASED)
2574 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2575 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2576 valist, ap_field, NULL_TREE);
2579 return valist;
2582 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2583 static void
2584 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2586 valist = arm_extract_valist_ptr (valist);
2587 std_expand_builtin_va_start (valist, nextarg);
2590 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2591 static tree
2592 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2593 gimple_seq *post_p)
2595 valist = arm_extract_valist_ptr (valist);
2596 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2599 /* Fix up any incompatible options that the user has specified. */
2600 static void
2601 arm_option_override (void)
2603 if (global_options_set.x_arm_arch_option)
2604 arm_selected_arch = &all_architectures[arm_arch_option];
2606 if (global_options_set.x_arm_cpu_option)
2608 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2609 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2612 if (global_options_set.x_arm_tune_option)
2613 arm_selected_tune = &all_cores[(int) arm_tune_option];
2615 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2616 SUBTARGET_OVERRIDE_OPTIONS;
2617 #endif
2619 if (arm_selected_arch)
2621 if (arm_selected_cpu)
2623 /* Check for conflict between mcpu and march. */
2624 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2626 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2627 arm_selected_cpu->name, arm_selected_arch->name);
2628 /* -march wins for code generation.
2629 -mcpu wins for default tuning. */
2630 if (!arm_selected_tune)
2631 arm_selected_tune = arm_selected_cpu;
2633 arm_selected_cpu = arm_selected_arch;
2635 else
2636 /* -mcpu wins. */
2637 arm_selected_arch = NULL;
2639 else
2640 /* Pick a CPU based on the architecture. */
2641 arm_selected_cpu = arm_selected_arch;
2644 /* If the user did not specify a processor, choose one for them. */
2645 if (!arm_selected_cpu)
2647 const struct processors * sel;
2648 unsigned int sought;
2650 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2651 if (!arm_selected_cpu->name)
2653 #ifdef SUBTARGET_CPU_DEFAULT
2654 /* Use the subtarget default CPU if none was specified by
2655 configure. */
2656 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2657 #endif
2658 /* Default to ARM6. */
2659 if (!arm_selected_cpu->name)
2660 arm_selected_cpu = &all_cores[arm6];
2663 sel = arm_selected_cpu;
2664 insn_flags = sel->flags;
2666 /* Now check to see if the user has specified some command line
2667 switch that require certain abilities from the cpu. */
2668 sought = 0;
2670 if (TARGET_INTERWORK || TARGET_THUMB)
2672 sought |= (FL_THUMB | FL_MODE32);
2674 /* There are no ARM processors that support both APCS-26 and
2675 interworking. Therefore we force FL_MODE26 to be removed
2676 from insn_flags here (if it was set), so that the search
2677 below will always be able to find a compatible processor. */
2678 insn_flags &= ~FL_MODE26;
2681 if (sought != 0 && ((sought & insn_flags) != sought))
2683 /* Try to locate a CPU type that supports all of the abilities
2684 of the default CPU, plus the extra abilities requested by
2685 the user. */
2686 for (sel = all_cores; sel->name != NULL; sel++)
2687 if ((sel->flags & sought) == (sought | insn_flags))
2688 break;
2690 if (sel->name == NULL)
2692 unsigned current_bit_count = 0;
2693 const struct processors * best_fit = NULL;
2695 /* Ideally we would like to issue an error message here
2696 saying that it was not possible to find a CPU compatible
2697 with the default CPU, but which also supports the command
2698 line options specified by the programmer, and so they
2699 ought to use the -mcpu=<name> command line option to
2700 override the default CPU type.
2702 If we cannot find a cpu that has both the
2703 characteristics of the default cpu and the given
2704 command line options we scan the array again looking
2705 for a best match. */
2706 for (sel = all_cores; sel->name != NULL; sel++)
2707 if ((sel->flags & sought) == sought)
2709 unsigned count;
2711 count = bit_count (sel->flags & insn_flags);
2713 if (count >= current_bit_count)
2715 best_fit = sel;
2716 current_bit_count = count;
2720 gcc_assert (best_fit);
2721 sel = best_fit;
2724 arm_selected_cpu = sel;
2728 gcc_assert (arm_selected_cpu);
2729 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2730 if (!arm_selected_tune)
2731 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2733 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2734 insn_flags = arm_selected_cpu->flags;
2735 arm_base_arch = arm_selected_cpu->base_arch;
2737 arm_tune = arm_selected_tune->core;
2738 tune_flags = arm_selected_tune->flags;
2739 current_tune = arm_selected_tune->tune;
2741 /* Make sure that the processor choice does not conflict with any of the
2742 other command line choices. */
2743 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2744 error ("target CPU does not support ARM mode");
2746 /* BPABI targets use linker tricks to allow interworking on cores
2747 without thumb support. */
2748 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2750 warning (0, "target CPU does not support interworking" );
2751 target_flags &= ~MASK_INTERWORK;
2754 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2756 warning (0, "target CPU does not support THUMB instructions");
2757 target_flags &= ~MASK_THUMB;
2760 if (TARGET_APCS_FRAME && TARGET_THUMB)
2762 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2763 target_flags &= ~MASK_APCS_FRAME;
2766 /* Callee super interworking implies thumb interworking. Adding
2767 this to the flags here simplifies the logic elsewhere. */
2768 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2769 target_flags |= MASK_INTERWORK;
2771 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2772 from here where no function is being compiled currently. */
2773 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2774 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2776 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2777 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2779 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2781 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2782 target_flags |= MASK_APCS_FRAME;
2785 if (TARGET_POKE_FUNCTION_NAME)
2786 target_flags |= MASK_APCS_FRAME;
2788 if (TARGET_APCS_REENT && flag_pic)
2789 error ("-fpic and -mapcs-reent are incompatible");
2791 if (TARGET_APCS_REENT)
2792 warning (0, "APCS reentrant code not supported. Ignored");
2794 /* If this target is normally configured to use APCS frames, warn if they
2795 are turned off and debugging is turned on. */
2796 if (TARGET_ARM
2797 && write_symbols != NO_DEBUG
2798 && !TARGET_APCS_FRAME
2799 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2800 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2802 if (TARGET_APCS_FLOAT)
2803 warning (0, "passing floating point arguments in fp regs not yet supported");
2805 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2806 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2807 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2808 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2809 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2810 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2811 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2812 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2813 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2814 arm_arch6m = arm_arch6 && !arm_arch_notm;
2815 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2816 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2817 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2818 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2819 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2821 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2822 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2823 thumb_code = TARGET_ARM == 0;
2824 thumb1_code = TARGET_THUMB1 != 0;
2825 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2826 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2827 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2828 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2829 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2830 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2831 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2832 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2833 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2834 if (arm_restrict_it == 2)
2835 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2837 if (!TARGET_THUMB2)
2838 arm_restrict_it = 0;
2840 /* If we are not using the default (ARM mode) section anchor offset
2841 ranges, then set the correct ranges now. */
2842 if (TARGET_THUMB1)
2844 /* Thumb-1 LDR instructions cannot have negative offsets.
2845 Permissible positive offset ranges are 5-bit (for byte loads),
2846 6-bit (for halfword loads), or 7-bit (for word loads).
2847 Empirical results suggest a 7-bit anchor range gives the best
2848 overall code size. */
2849 targetm.min_anchor_offset = 0;
2850 targetm.max_anchor_offset = 127;
2852 else if (TARGET_THUMB2)
2854 /* The minimum is set such that the total size of the block
2855 for a particular anchor is 248 + 1 + 4095 bytes, which is
2856 divisible by eight, ensuring natural spacing of anchors. */
2857 targetm.min_anchor_offset = -248;
2858 targetm.max_anchor_offset = 4095;
2861 /* V5 code we generate is completely interworking capable, so we turn off
2862 TARGET_INTERWORK here to avoid many tests later on. */
2864 /* XXX However, we must pass the right pre-processor defines to CPP
2865 or GLD can get confused. This is a hack. */
2866 if (TARGET_INTERWORK)
2867 arm_cpp_interwork = 1;
2869 if (arm_arch5)
2870 target_flags &= ~MASK_INTERWORK;
2872 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2873 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2875 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2876 error ("iwmmxt abi requires an iwmmxt capable cpu");
2878 if (!global_options_set.x_arm_fpu_index)
2880 const char *target_fpu_name;
2881 bool ok;
2883 #ifdef FPUTYPE_DEFAULT
2884 target_fpu_name = FPUTYPE_DEFAULT;
2885 #else
2886 target_fpu_name = "vfp";
2887 #endif
2889 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2890 CL_TARGET);
2891 gcc_assert (ok);
2894 arm_fpu_desc = &all_fpus[arm_fpu_index];
2896 if (TARGET_NEON && !arm_arch7)
2897 error ("target CPU does not support NEON");
2899 switch (arm_fpu_desc->model)
2901 case ARM_FP_MODEL_VFP:
2902 arm_fpu_attr = FPU_VFP;
2903 break;
2905 default:
2906 gcc_unreachable();
2909 if (TARGET_AAPCS_BASED)
2911 if (TARGET_CALLER_INTERWORKING)
2912 error ("AAPCS does not support -mcaller-super-interworking");
2913 else
2914 if (TARGET_CALLEE_INTERWORKING)
2915 error ("AAPCS does not support -mcallee-super-interworking");
2918 /* iWMMXt and NEON are incompatible. */
2919 if (TARGET_IWMMXT && TARGET_NEON)
2920 error ("iWMMXt and NEON are incompatible");
2922 /* iWMMXt unsupported under Thumb mode. */
2923 if (TARGET_THUMB && TARGET_IWMMXT)
2924 error ("iWMMXt unsupported under Thumb mode");
2926 /* __fp16 support currently assumes the core has ldrh. */
2927 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2928 sorry ("__fp16 and no ldrh");
2930 /* If soft-float is specified then don't use FPU. */
2931 if (TARGET_SOFT_FLOAT)
2932 arm_fpu_attr = FPU_NONE;
2934 if (TARGET_AAPCS_BASED)
2936 if (arm_abi == ARM_ABI_IWMMXT)
2937 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2938 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2939 && TARGET_HARD_FLOAT
2940 && TARGET_VFP)
2941 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2942 else
2943 arm_pcs_default = ARM_PCS_AAPCS;
2945 else
2947 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2948 sorry ("-mfloat-abi=hard and VFP");
2950 if (arm_abi == ARM_ABI_APCS)
2951 arm_pcs_default = ARM_PCS_APCS;
2952 else
2953 arm_pcs_default = ARM_PCS_ATPCS;
2956 /* For arm2/3 there is no need to do any scheduling if we are doing
2957 software floating-point. */
2958 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2959 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2961 /* Use the cp15 method if it is available. */
2962 if (target_thread_pointer == TP_AUTO)
2964 if (arm_arch6k && !TARGET_THUMB1)
2965 target_thread_pointer = TP_CP15;
2966 else
2967 target_thread_pointer = TP_SOFT;
2970 if (TARGET_HARD_TP && TARGET_THUMB1)
2971 error ("can not use -mtp=cp15 with 16-bit Thumb");
2973 /* Override the default structure alignment for AAPCS ABI. */
2974 if (!global_options_set.x_arm_structure_size_boundary)
2976 if (TARGET_AAPCS_BASED)
2977 arm_structure_size_boundary = 8;
2979 else
2981 if (arm_structure_size_boundary != 8
2982 && arm_structure_size_boundary != 32
2983 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2985 if (ARM_DOUBLEWORD_ALIGN)
2986 warning (0,
2987 "structure size boundary can only be set to 8, 32 or 64");
2988 else
2989 warning (0, "structure size boundary can only be set to 8 or 32");
2990 arm_structure_size_boundary
2991 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2995 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2997 error ("RTP PIC is incompatible with Thumb");
2998 flag_pic = 0;
3001 /* If stack checking is disabled, we can use r10 as the PIC register,
3002 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3003 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3005 if (TARGET_VXWORKS_RTP)
3006 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3007 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3010 if (flag_pic && TARGET_VXWORKS_RTP)
3011 arm_pic_register = 9;
3013 if (arm_pic_register_string != NULL)
3015 int pic_register = decode_reg_name (arm_pic_register_string);
3017 if (!flag_pic)
3018 warning (0, "-mpic-register= is useless without -fpic");
3020 /* Prevent the user from choosing an obviously stupid PIC register. */
3021 else if (pic_register < 0 || call_used_regs[pic_register]
3022 || pic_register == HARD_FRAME_POINTER_REGNUM
3023 || pic_register == STACK_POINTER_REGNUM
3024 || pic_register >= PC_REGNUM
3025 || (TARGET_VXWORKS_RTP
3026 && (unsigned int) pic_register != arm_pic_register))
3027 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3028 else
3029 arm_pic_register = pic_register;
3032 if (TARGET_VXWORKS_RTP
3033 && !global_options_set.x_arm_pic_data_is_text_relative)
3034 arm_pic_data_is_text_relative = 0;
3036 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3037 if (fix_cm3_ldrd == 2)
3039 if (arm_selected_cpu->core == cortexm3)
3040 fix_cm3_ldrd = 1;
3041 else
3042 fix_cm3_ldrd = 0;
3045 /* Enable -munaligned-access by default for
3046 - all ARMv6 architecture-based processors
3047 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3048 - ARMv8 architecture-base processors.
3050 Disable -munaligned-access by default for
3051 - all pre-ARMv6 architecture-based processors
3052 - ARMv6-M architecture-based processors. */
3054 if (unaligned_access == 2)
3056 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3057 unaligned_access = 1;
3058 else
3059 unaligned_access = 0;
3061 else if (unaligned_access == 1
3062 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3064 warning (0, "target CPU does not support unaligned accesses");
3065 unaligned_access = 0;
3068 if (TARGET_THUMB1 && flag_schedule_insns)
3070 /* Don't warn since it's on by default in -O2. */
3071 flag_schedule_insns = 0;
3074 if (optimize_size)
3076 /* If optimizing for size, bump the number of instructions that we
3077 are prepared to conditionally execute (even on a StrongARM). */
3078 max_insns_skipped = 6;
3080 /* For THUMB2, we limit the conditional sequence to one IT block. */
3081 if (TARGET_THUMB2)
3082 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3084 else
3085 max_insns_skipped = current_tune->max_insns_skipped;
3087 /* Hot/Cold partitioning is not currently supported, since we can't
3088 handle literal pool placement in that case. */
3089 if (flag_reorder_blocks_and_partition)
3091 inform (input_location,
3092 "-freorder-blocks-and-partition not supported on this architecture");
3093 flag_reorder_blocks_and_partition = 0;
3094 flag_reorder_blocks = 1;
3097 if (flag_pic)
3098 /* Hoisting PIC address calculations more aggressively provides a small,
3099 but measurable, size reduction for PIC code. Therefore, we decrease
3100 the bar for unrestricted expression hoisting to the cost of PIC address
3101 calculation, which is 2 instructions. */
3102 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3103 global_options.x_param_values,
3104 global_options_set.x_param_values);
3106 /* ARM EABI defaults to strict volatile bitfields. */
3107 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3108 && abi_version_at_least(2))
3109 flag_strict_volatile_bitfields = 1;
3111 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3112 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3113 if (flag_prefetch_loop_arrays < 0
3114 && HAVE_prefetch
3115 && optimize >= 3
3116 && current_tune->num_prefetch_slots > 0)
3117 flag_prefetch_loop_arrays = 1;
3119 /* Set up parameters to be used in prefetching algorithm. Do not override the
3120 defaults unless we are tuning for a core we have researched values for. */
3121 if (current_tune->num_prefetch_slots > 0)
3122 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3123 current_tune->num_prefetch_slots,
3124 global_options.x_param_values,
3125 global_options_set.x_param_values);
3126 if (current_tune->l1_cache_line_size >= 0)
3127 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3128 current_tune->l1_cache_line_size,
3129 global_options.x_param_values,
3130 global_options_set.x_param_values);
3131 if (current_tune->l1_cache_size >= 0)
3132 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3133 current_tune->l1_cache_size,
3134 global_options.x_param_values,
3135 global_options_set.x_param_values);
3137 /* Use Neon to perform 64-bits operations rather than core
3138 registers. */
3139 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3140 if (use_neon_for_64bits == 1)
3141 prefer_neon_for_64bits = true;
3143 /* Use the alternative scheduling-pressure algorithm by default. */
3144 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3145 global_options.x_param_values,
3146 global_options_set.x_param_values);
3148 /* Disable shrink-wrap when optimizing function for size, since it tends to
3149 generate additional returns. */
3150 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3151 flag_shrink_wrap = false;
3152 /* TBD: Dwarf info for apcs frame is not handled yet. */
3153 if (TARGET_APCS_FRAME)
3154 flag_shrink_wrap = false;
3156 /* We only support -mslow-flash-data on armv7-m targets. */
3157 if (target_slow_flash_data
3158 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3159 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3160 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3162 /* Currently, for slow flash data, we just disable literal pools. */
3163 if (target_slow_flash_data)
3164 arm_disable_literal_pool = true;
3166 /* Thumb2 inline assembly code should always use unified syntax.
3167 This will apply to ARM and Thumb1 eventually. */
3168 if (TARGET_THUMB2)
3169 inline_asm_unified = 1;
3171 /* Register global variables with the garbage collector. */
3172 arm_add_gc_roots ();
3175 static void
3176 arm_add_gc_roots (void)
3178 gcc_obstack_init(&minipool_obstack);
3179 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3182 /* A table of known ARM exception types.
3183 For use with the interrupt function attribute. */
3185 typedef struct
3187 const char *const arg;
3188 const unsigned long return_value;
3190 isr_attribute_arg;
3192 static const isr_attribute_arg isr_attribute_args [] =
3194 { "IRQ", ARM_FT_ISR },
3195 { "irq", ARM_FT_ISR },
3196 { "FIQ", ARM_FT_FIQ },
3197 { "fiq", ARM_FT_FIQ },
3198 { "ABORT", ARM_FT_ISR },
3199 { "abort", ARM_FT_ISR },
3200 { "ABORT", ARM_FT_ISR },
3201 { "abort", ARM_FT_ISR },
3202 { "UNDEF", ARM_FT_EXCEPTION },
3203 { "undef", ARM_FT_EXCEPTION },
3204 { "SWI", ARM_FT_EXCEPTION },
3205 { "swi", ARM_FT_EXCEPTION },
3206 { NULL, ARM_FT_NORMAL }
3209 /* Returns the (interrupt) function type of the current
3210 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3212 static unsigned long
3213 arm_isr_value (tree argument)
3215 const isr_attribute_arg * ptr;
3216 const char * arg;
3218 if (!arm_arch_notm)
3219 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3221 /* No argument - default to IRQ. */
3222 if (argument == NULL_TREE)
3223 return ARM_FT_ISR;
3225 /* Get the value of the argument. */
3226 if (TREE_VALUE (argument) == NULL_TREE
3227 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3228 return ARM_FT_UNKNOWN;
3230 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3232 /* Check it against the list of known arguments. */
3233 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3234 if (streq (arg, ptr->arg))
3235 return ptr->return_value;
3237 /* An unrecognized interrupt type. */
3238 return ARM_FT_UNKNOWN;
3241 /* Computes the type of the current function. */
3243 static unsigned long
3244 arm_compute_func_type (void)
3246 unsigned long type = ARM_FT_UNKNOWN;
3247 tree a;
3248 tree attr;
3250 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3252 /* Decide if the current function is volatile. Such functions
3253 never return, and many memory cycles can be saved by not storing
3254 register values that will never be needed again. This optimization
3255 was added to speed up context switching in a kernel application. */
3256 if (optimize > 0
3257 && (TREE_NOTHROW (current_function_decl)
3258 || !(flag_unwind_tables
3259 || (flag_exceptions
3260 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3261 && TREE_THIS_VOLATILE (current_function_decl))
3262 type |= ARM_FT_VOLATILE;
3264 if (cfun->static_chain_decl != NULL)
3265 type |= ARM_FT_NESTED;
3267 attr = DECL_ATTRIBUTES (current_function_decl);
3269 a = lookup_attribute ("naked", attr);
3270 if (a != NULL_TREE)
3271 type |= ARM_FT_NAKED;
3273 a = lookup_attribute ("isr", attr);
3274 if (a == NULL_TREE)
3275 a = lookup_attribute ("interrupt", attr);
3277 if (a == NULL_TREE)
3278 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3279 else
3280 type |= arm_isr_value (TREE_VALUE (a));
3282 return type;
3285 /* Returns the type of the current function. */
3287 unsigned long
3288 arm_current_func_type (void)
3290 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3291 cfun->machine->func_type = arm_compute_func_type ();
3293 return cfun->machine->func_type;
3296 bool
3297 arm_allocate_stack_slots_for_args (void)
3299 /* Naked functions should not allocate stack slots for arguments. */
3300 return !IS_NAKED (arm_current_func_type ());
3303 static bool
3304 arm_warn_func_return (tree decl)
3306 /* Naked functions are implemented entirely in assembly, including the
3307 return sequence, so suppress warnings about this. */
3308 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3312 /* Output assembler code for a block containing the constant parts
3313 of a trampoline, leaving space for the variable parts.
3315 On the ARM, (if r8 is the static chain regnum, and remembering that
3316 referencing pc adds an offset of 8) the trampoline looks like:
3317 ldr r8, [pc, #0]
3318 ldr pc, [pc]
3319 .word static chain value
3320 .word function's address
3321 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3323 static void
3324 arm_asm_trampoline_template (FILE *f)
3326 if (TARGET_ARM)
3328 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3329 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3331 else if (TARGET_THUMB2)
3333 /* The Thumb-2 trampoline is similar to the arm implementation.
3334 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3335 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3336 STATIC_CHAIN_REGNUM, PC_REGNUM);
3337 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3339 else
3341 ASM_OUTPUT_ALIGN (f, 2);
3342 fprintf (f, "\t.code\t16\n");
3343 fprintf (f, ".Ltrampoline_start:\n");
3344 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3345 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3346 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3347 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3348 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3349 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3351 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3352 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3355 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3357 static void
3358 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3360 rtx fnaddr, mem, a_tramp;
3362 emit_block_move (m_tramp, assemble_trampoline_template (),
3363 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3365 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3366 emit_move_insn (mem, chain_value);
3368 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3369 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3370 emit_move_insn (mem, fnaddr);
3372 a_tramp = XEXP (m_tramp, 0);
3373 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3374 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3375 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3378 /* Thumb trampolines should be entered in thumb mode, so set
3379 the bottom bit of the address. */
3381 static rtx
3382 arm_trampoline_adjust_address (rtx addr)
3384 if (TARGET_THUMB)
3385 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3386 NULL, 0, OPTAB_LIB_WIDEN);
3387 return addr;
3390 /* Return 1 if it is possible to return using a single instruction.
3391 If SIBLING is non-null, this is a test for a return before a sibling
3392 call. SIBLING is the call insn, so we can examine its register usage. */
3395 use_return_insn (int iscond, rtx sibling)
3397 int regno;
3398 unsigned int func_type;
3399 unsigned long saved_int_regs;
3400 unsigned HOST_WIDE_INT stack_adjust;
3401 arm_stack_offsets *offsets;
3403 /* Never use a return instruction before reload has run. */
3404 if (!reload_completed)
3405 return 0;
3407 func_type = arm_current_func_type ();
3409 /* Naked, volatile and stack alignment functions need special
3410 consideration. */
3411 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3412 return 0;
3414 /* So do interrupt functions that use the frame pointer and Thumb
3415 interrupt functions. */
3416 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3417 return 0;
3419 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3420 && !optimize_function_for_size_p (cfun))
3421 return 0;
3423 offsets = arm_get_frame_offsets ();
3424 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3426 /* As do variadic functions. */
3427 if (crtl->args.pretend_args_size
3428 || cfun->machine->uses_anonymous_args
3429 /* Or if the function calls __builtin_eh_return () */
3430 || crtl->calls_eh_return
3431 /* Or if the function calls alloca */
3432 || cfun->calls_alloca
3433 /* Or if there is a stack adjustment. However, if the stack pointer
3434 is saved on the stack, we can use a pre-incrementing stack load. */
3435 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3436 && stack_adjust == 4)))
3437 return 0;
3439 saved_int_regs = offsets->saved_regs_mask;
3441 /* Unfortunately, the insn
3443 ldmib sp, {..., sp, ...}
3445 triggers a bug on most SA-110 based devices, such that the stack
3446 pointer won't be correctly restored if the instruction takes a
3447 page fault. We work around this problem by popping r3 along with
3448 the other registers, since that is never slower than executing
3449 another instruction.
3451 We test for !arm_arch5 here, because code for any architecture
3452 less than this could potentially be run on one of the buggy
3453 chips. */
3454 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3456 /* Validate that r3 is a call-clobbered register (always true in
3457 the default abi) ... */
3458 if (!call_used_regs[3])
3459 return 0;
3461 /* ... that it isn't being used for a return value ... */
3462 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3463 return 0;
3465 /* ... or for a tail-call argument ... */
3466 if (sibling)
3468 gcc_assert (CALL_P (sibling));
3470 if (find_regno_fusage (sibling, USE, 3))
3471 return 0;
3474 /* ... and that there are no call-saved registers in r0-r2
3475 (always true in the default ABI). */
3476 if (saved_int_regs & 0x7)
3477 return 0;
3480 /* Can't be done if interworking with Thumb, and any registers have been
3481 stacked. */
3482 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3483 return 0;
3485 /* On StrongARM, conditional returns are expensive if they aren't
3486 taken and multiple registers have been stacked. */
3487 if (iscond && arm_tune_strongarm)
3489 /* Conditional return when just the LR is stored is a simple
3490 conditional-load instruction, that's not expensive. */
3491 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3492 return 0;
3494 if (flag_pic
3495 && arm_pic_register != INVALID_REGNUM
3496 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3497 return 0;
3500 /* If there are saved registers but the LR isn't saved, then we need
3501 two instructions for the return. */
3502 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3503 return 0;
3505 /* Can't be done if any of the VFP regs are pushed,
3506 since this also requires an insn. */
3507 if (TARGET_HARD_FLOAT && TARGET_VFP)
3508 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3509 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3510 return 0;
3512 if (TARGET_REALLY_IWMMXT)
3513 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3514 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3515 return 0;
3517 return 1;
3520 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3521 shrink-wrapping if possible. This is the case if we need to emit a
3522 prologue, which we can test by looking at the offsets. */
3523 bool
3524 use_simple_return_p (void)
3526 arm_stack_offsets *offsets;
3528 offsets = arm_get_frame_offsets ();
3529 return offsets->outgoing_args != 0;
3532 /* Return TRUE if int I is a valid immediate ARM constant. */
3535 const_ok_for_arm (HOST_WIDE_INT i)
3537 int lowbit;
3539 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3540 be all zero, or all one. */
3541 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3542 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3543 != ((~(unsigned HOST_WIDE_INT) 0)
3544 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3545 return FALSE;
3547 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3549 /* Fast return for 0 and small values. We must do this for zero, since
3550 the code below can't handle that one case. */
3551 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3552 return TRUE;
3554 /* Get the number of trailing zeros. */
3555 lowbit = ffs((int) i) - 1;
3557 /* Only even shifts are allowed in ARM mode so round down to the
3558 nearest even number. */
3559 if (TARGET_ARM)
3560 lowbit &= ~1;
3562 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3563 return TRUE;
3565 if (TARGET_ARM)
3567 /* Allow rotated constants in ARM mode. */
3568 if (lowbit <= 4
3569 && ((i & ~0xc000003f) == 0
3570 || (i & ~0xf000000f) == 0
3571 || (i & ~0xfc000003) == 0))
3572 return TRUE;
3574 else
3576 HOST_WIDE_INT v;
3578 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3579 v = i & 0xff;
3580 v |= v << 16;
3581 if (i == v || i == (v | (v << 8)))
3582 return TRUE;
3584 /* Allow repeated pattern 0xXY00XY00. */
3585 v = i & 0xff00;
3586 v |= v << 16;
3587 if (i == v)
3588 return TRUE;
3591 return FALSE;
3594 /* Return true if I is a valid constant for the operation CODE. */
3596 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3598 if (const_ok_for_arm (i))
3599 return 1;
3601 switch (code)
3603 case SET:
3604 /* See if we can use movw. */
3605 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3606 return 1;
3607 else
3608 /* Otherwise, try mvn. */
3609 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3611 case PLUS:
3612 /* See if we can use addw or subw. */
3613 if (TARGET_THUMB2
3614 && ((i & 0xfffff000) == 0
3615 || ((-i) & 0xfffff000) == 0))
3616 return 1;
3617 /* else fall through. */
3619 case COMPARE:
3620 case EQ:
3621 case NE:
3622 case GT:
3623 case LE:
3624 case LT:
3625 case GE:
3626 case GEU:
3627 case LTU:
3628 case GTU:
3629 case LEU:
3630 case UNORDERED:
3631 case ORDERED:
3632 case UNEQ:
3633 case UNGE:
3634 case UNLT:
3635 case UNGT:
3636 case UNLE:
3637 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3639 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3640 case XOR:
3641 return 0;
3643 case IOR:
3644 if (TARGET_THUMB2)
3645 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3646 return 0;
3648 case AND:
3649 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3651 default:
3652 gcc_unreachable ();
3656 /* Return true if I is a valid di mode constant for the operation CODE. */
3658 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3660 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3661 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3662 rtx hi = GEN_INT (hi_val);
3663 rtx lo = GEN_INT (lo_val);
3665 if (TARGET_THUMB1)
3666 return 0;
3668 switch (code)
3670 case AND:
3671 case IOR:
3672 case XOR:
3673 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3674 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3675 case PLUS:
3676 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3678 default:
3679 return 0;
3683 /* Emit a sequence of insns to handle a large constant.
3684 CODE is the code of the operation required, it can be any of SET, PLUS,
3685 IOR, AND, XOR, MINUS;
3686 MODE is the mode in which the operation is being performed;
3687 VAL is the integer to operate on;
3688 SOURCE is the other operand (a register, or a null-pointer for SET);
3689 SUBTARGETS means it is safe to create scratch registers if that will
3690 either produce a simpler sequence, or we will want to cse the values.
3691 Return value is the number of insns emitted. */
3693 /* ??? Tweak this for thumb2. */
3695 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3696 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3698 rtx cond;
3700 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3701 cond = COND_EXEC_TEST (PATTERN (insn));
3702 else
3703 cond = NULL_RTX;
3705 if (subtargets || code == SET
3706 || (REG_P (target) && REG_P (source)
3707 && REGNO (target) != REGNO (source)))
3709 /* After arm_reorg has been called, we can't fix up expensive
3710 constants by pushing them into memory so we must synthesize
3711 them in-line, regardless of the cost. This is only likely to
3712 be more costly on chips that have load delay slots and we are
3713 compiling without running the scheduler (so no splitting
3714 occurred before the final instruction emission).
3716 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3718 if (!cfun->machine->after_arm_reorg
3719 && !cond
3720 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3721 1, 0)
3722 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3723 + (code != SET))))
3725 if (code == SET)
3727 /* Currently SET is the only monadic value for CODE, all
3728 the rest are diadic. */
3729 if (TARGET_USE_MOVT)
3730 arm_emit_movpair (target, GEN_INT (val));
3731 else
3732 emit_set_insn (target, GEN_INT (val));
3734 return 1;
3736 else
3738 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3740 if (TARGET_USE_MOVT)
3741 arm_emit_movpair (temp, GEN_INT (val));
3742 else
3743 emit_set_insn (temp, GEN_INT (val));
3745 /* For MINUS, the value is subtracted from, since we never
3746 have subtraction of a constant. */
3747 if (code == MINUS)
3748 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3749 else
3750 emit_set_insn (target,
3751 gen_rtx_fmt_ee (code, mode, source, temp));
3752 return 2;
3757 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3761 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3762 ARM/THUMB2 immediates, and add up to VAL.
3763 Thr function return value gives the number of insns required. */
3764 static int
3765 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3766 struct four_ints *return_sequence)
3768 int best_consecutive_zeros = 0;
3769 int i;
3770 int best_start = 0;
3771 int insns1, insns2;
3772 struct four_ints tmp_sequence;
3774 /* If we aren't targeting ARM, the best place to start is always at
3775 the bottom, otherwise look more closely. */
3776 if (TARGET_ARM)
3778 for (i = 0; i < 32; i += 2)
3780 int consecutive_zeros = 0;
3782 if (!(val & (3 << i)))
3784 while ((i < 32) && !(val & (3 << i)))
3786 consecutive_zeros += 2;
3787 i += 2;
3789 if (consecutive_zeros > best_consecutive_zeros)
3791 best_consecutive_zeros = consecutive_zeros;
3792 best_start = i - consecutive_zeros;
3794 i -= 2;
3799 /* So long as it won't require any more insns to do so, it's
3800 desirable to emit a small constant (in bits 0...9) in the last
3801 insn. This way there is more chance that it can be combined with
3802 a later addressing insn to form a pre-indexed load or store
3803 operation. Consider:
3805 *((volatile int *)0xe0000100) = 1;
3806 *((volatile int *)0xe0000110) = 2;
3808 We want this to wind up as:
3810 mov rA, #0xe0000000
3811 mov rB, #1
3812 str rB, [rA, #0x100]
3813 mov rB, #2
3814 str rB, [rA, #0x110]
3816 rather than having to synthesize both large constants from scratch.
3818 Therefore, we calculate how many insns would be required to emit
3819 the constant starting from `best_start', and also starting from
3820 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3821 yield a shorter sequence, we may as well use zero. */
3822 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3823 if (best_start != 0
3824 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3826 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3827 if (insns2 <= insns1)
3829 *return_sequence = tmp_sequence;
3830 insns1 = insns2;
3834 return insns1;
3837 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3838 static int
3839 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3840 struct four_ints *return_sequence, int i)
3842 int remainder = val & 0xffffffff;
3843 int insns = 0;
3845 /* Try and find a way of doing the job in either two or three
3846 instructions.
3848 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3849 location. We start at position I. This may be the MSB, or
3850 optimial_immediate_sequence may have positioned it at the largest block
3851 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3852 wrapping around to the top of the word when we drop off the bottom.
3853 In the worst case this code should produce no more than four insns.
3855 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3856 constants, shifted to any arbitrary location. We should always start
3857 at the MSB. */
3860 int end;
3861 unsigned int b1, b2, b3, b4;
3862 unsigned HOST_WIDE_INT result;
3863 int loc;
3865 gcc_assert (insns < 4);
3867 if (i <= 0)
3868 i += 32;
3870 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3871 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3873 loc = i;
3874 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3875 /* We can use addw/subw for the last 12 bits. */
3876 result = remainder;
3877 else
3879 /* Use an 8-bit shifted/rotated immediate. */
3880 end = i - 8;
3881 if (end < 0)
3882 end += 32;
3883 result = remainder & ((0x0ff << end)
3884 | ((i < end) ? (0xff >> (32 - end))
3885 : 0));
3886 i -= 8;
3889 else
3891 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3892 arbitrary shifts. */
3893 i -= TARGET_ARM ? 2 : 1;
3894 continue;
3897 /* Next, see if we can do a better job with a thumb2 replicated
3898 constant.
3900 We do it this way around to catch the cases like 0x01F001E0 where
3901 two 8-bit immediates would work, but a replicated constant would
3902 make it worse.
3904 TODO: 16-bit constants that don't clear all the bits, but still win.
3905 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3906 if (TARGET_THUMB2)
3908 b1 = (remainder & 0xff000000) >> 24;
3909 b2 = (remainder & 0x00ff0000) >> 16;
3910 b3 = (remainder & 0x0000ff00) >> 8;
3911 b4 = remainder & 0xff;
3913 if (loc > 24)
3915 /* The 8-bit immediate already found clears b1 (and maybe b2),
3916 but must leave b3 and b4 alone. */
3918 /* First try to find a 32-bit replicated constant that clears
3919 almost everything. We can assume that we can't do it in one,
3920 or else we wouldn't be here. */
3921 unsigned int tmp = b1 & b2 & b3 & b4;
3922 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3923 + (tmp << 24);
3924 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3925 + (tmp == b3) + (tmp == b4);
3926 if (tmp
3927 && (matching_bytes >= 3
3928 || (matching_bytes == 2
3929 && const_ok_for_op (remainder & ~tmp2, code))))
3931 /* At least 3 of the bytes match, and the fourth has at
3932 least as many bits set, or two of the bytes match
3933 and it will only require one more insn to finish. */
3934 result = tmp2;
3935 i = tmp != b1 ? 32
3936 : tmp != b2 ? 24
3937 : tmp != b3 ? 16
3938 : 8;
3941 /* Second, try to find a 16-bit replicated constant that can
3942 leave three of the bytes clear. If b2 or b4 is already
3943 zero, then we can. If the 8-bit from above would not
3944 clear b2 anyway, then we still win. */
3945 else if (b1 == b3 && (!b2 || !b4
3946 || (remainder & 0x00ff0000 & ~result)))
3948 result = remainder & 0xff00ff00;
3949 i = 24;
3952 else if (loc > 16)
3954 /* The 8-bit immediate already found clears b2 (and maybe b3)
3955 and we don't get here unless b1 is alredy clear, but it will
3956 leave b4 unchanged. */
3958 /* If we can clear b2 and b4 at once, then we win, since the
3959 8-bits couldn't possibly reach that far. */
3960 if (b2 == b4)
3962 result = remainder & 0x00ff00ff;
3963 i = 16;
3968 return_sequence->i[insns++] = result;
3969 remainder &= ~result;
3971 if (code == SET || code == MINUS)
3972 code = PLUS;
3974 while (remainder);
3976 return insns;
3979 /* Emit an instruction with the indicated PATTERN. If COND is
3980 non-NULL, conditionalize the execution of the instruction on COND
3981 being true. */
3983 static void
3984 emit_constant_insn (rtx cond, rtx pattern)
3986 if (cond)
3987 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3988 emit_insn (pattern);
3991 /* As above, but extra parameter GENERATE which, if clear, suppresses
3992 RTL generation. */
3994 static int
3995 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3996 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3997 int generate)
3999 int can_invert = 0;
4000 int can_negate = 0;
4001 int final_invert = 0;
4002 int i;
4003 int set_sign_bit_copies = 0;
4004 int clear_sign_bit_copies = 0;
4005 int clear_zero_bit_copies = 0;
4006 int set_zero_bit_copies = 0;
4007 int insns = 0, neg_insns, inv_insns;
4008 unsigned HOST_WIDE_INT temp1, temp2;
4009 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4010 struct four_ints *immediates;
4011 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4013 /* Find out which operations are safe for a given CODE. Also do a quick
4014 check for degenerate cases; these can occur when DImode operations
4015 are split. */
4016 switch (code)
4018 case SET:
4019 can_invert = 1;
4020 break;
4022 case PLUS:
4023 can_negate = 1;
4024 break;
4026 case IOR:
4027 if (remainder == 0xffffffff)
4029 if (generate)
4030 emit_constant_insn (cond,
4031 gen_rtx_SET (VOIDmode, target,
4032 GEN_INT (ARM_SIGN_EXTEND (val))));
4033 return 1;
4036 if (remainder == 0)
4038 if (reload_completed && rtx_equal_p (target, source))
4039 return 0;
4041 if (generate)
4042 emit_constant_insn (cond,
4043 gen_rtx_SET (VOIDmode, target, source));
4044 return 1;
4046 break;
4048 case AND:
4049 if (remainder == 0)
4051 if (generate)
4052 emit_constant_insn (cond,
4053 gen_rtx_SET (VOIDmode, target, const0_rtx));
4054 return 1;
4056 if (remainder == 0xffffffff)
4058 if (reload_completed && rtx_equal_p (target, source))
4059 return 0;
4060 if (generate)
4061 emit_constant_insn (cond,
4062 gen_rtx_SET (VOIDmode, target, source));
4063 return 1;
4065 can_invert = 1;
4066 break;
4068 case XOR:
4069 if (remainder == 0)
4071 if (reload_completed && rtx_equal_p (target, source))
4072 return 0;
4073 if (generate)
4074 emit_constant_insn (cond,
4075 gen_rtx_SET (VOIDmode, target, source));
4076 return 1;
4079 if (remainder == 0xffffffff)
4081 if (generate)
4082 emit_constant_insn (cond,
4083 gen_rtx_SET (VOIDmode, target,
4084 gen_rtx_NOT (mode, source)));
4085 return 1;
4087 final_invert = 1;
4088 break;
4090 case MINUS:
4091 /* We treat MINUS as (val - source), since (source - val) is always
4092 passed as (source + (-val)). */
4093 if (remainder == 0)
4095 if (generate)
4096 emit_constant_insn (cond,
4097 gen_rtx_SET (VOIDmode, target,
4098 gen_rtx_NEG (mode, source)));
4099 return 1;
4101 if (const_ok_for_arm (val))
4103 if (generate)
4104 emit_constant_insn (cond,
4105 gen_rtx_SET (VOIDmode, target,
4106 gen_rtx_MINUS (mode, GEN_INT (val),
4107 source)));
4108 return 1;
4111 break;
4113 default:
4114 gcc_unreachable ();
4117 /* If we can do it in one insn get out quickly. */
4118 if (const_ok_for_op (val, code))
4120 if (generate)
4121 emit_constant_insn (cond,
4122 gen_rtx_SET (VOIDmode, target,
4123 (source
4124 ? gen_rtx_fmt_ee (code, mode, source,
4125 GEN_INT (val))
4126 : GEN_INT (val))));
4127 return 1;
4130 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4131 insn. */
4132 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4133 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4135 if (generate)
4137 if (mode == SImode && i == 16)
4138 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4139 smaller insn. */
4140 emit_constant_insn (cond,
4141 gen_zero_extendhisi2
4142 (target, gen_lowpart (HImode, source)));
4143 else
4144 /* Extz only supports SImode, but we can coerce the operands
4145 into that mode. */
4146 emit_constant_insn (cond,
4147 gen_extzv_t2 (gen_lowpart (SImode, target),
4148 gen_lowpart (SImode, source),
4149 GEN_INT (i), const0_rtx));
4152 return 1;
4155 /* Calculate a few attributes that may be useful for specific
4156 optimizations. */
4157 /* Count number of leading zeros. */
4158 for (i = 31; i >= 0; i--)
4160 if ((remainder & (1 << i)) == 0)
4161 clear_sign_bit_copies++;
4162 else
4163 break;
4166 /* Count number of leading 1's. */
4167 for (i = 31; i >= 0; i--)
4169 if ((remainder & (1 << i)) != 0)
4170 set_sign_bit_copies++;
4171 else
4172 break;
4175 /* Count number of trailing zero's. */
4176 for (i = 0; i <= 31; i++)
4178 if ((remainder & (1 << i)) == 0)
4179 clear_zero_bit_copies++;
4180 else
4181 break;
4184 /* Count number of trailing 1's. */
4185 for (i = 0; i <= 31; i++)
4187 if ((remainder & (1 << i)) != 0)
4188 set_zero_bit_copies++;
4189 else
4190 break;
4193 switch (code)
4195 case SET:
4196 /* See if we can do this by sign_extending a constant that is known
4197 to be negative. This is a good, way of doing it, since the shift
4198 may well merge into a subsequent insn. */
4199 if (set_sign_bit_copies > 1)
4201 if (const_ok_for_arm
4202 (temp1 = ARM_SIGN_EXTEND (remainder
4203 << (set_sign_bit_copies - 1))))
4205 if (generate)
4207 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4208 emit_constant_insn (cond,
4209 gen_rtx_SET (VOIDmode, new_src,
4210 GEN_INT (temp1)));
4211 emit_constant_insn (cond,
4212 gen_ashrsi3 (target, new_src,
4213 GEN_INT (set_sign_bit_copies - 1)));
4215 return 2;
4217 /* For an inverted constant, we will need to set the low bits,
4218 these will be shifted out of harm's way. */
4219 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4220 if (const_ok_for_arm (~temp1))
4222 if (generate)
4224 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4225 emit_constant_insn (cond,
4226 gen_rtx_SET (VOIDmode, new_src,
4227 GEN_INT (temp1)));
4228 emit_constant_insn (cond,
4229 gen_ashrsi3 (target, new_src,
4230 GEN_INT (set_sign_bit_copies - 1)));
4232 return 2;
4236 /* See if we can calculate the value as the difference between two
4237 valid immediates. */
4238 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4240 int topshift = clear_sign_bit_copies & ~1;
4242 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4243 & (0xff000000 >> topshift));
4245 /* If temp1 is zero, then that means the 9 most significant
4246 bits of remainder were 1 and we've caused it to overflow.
4247 When topshift is 0 we don't need to do anything since we
4248 can borrow from 'bit 32'. */
4249 if (temp1 == 0 && topshift != 0)
4250 temp1 = 0x80000000 >> (topshift - 1);
4252 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4254 if (const_ok_for_arm (temp2))
4256 if (generate)
4258 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4259 emit_constant_insn (cond,
4260 gen_rtx_SET (VOIDmode, new_src,
4261 GEN_INT (temp1)));
4262 emit_constant_insn (cond,
4263 gen_addsi3 (target, new_src,
4264 GEN_INT (-temp2)));
4267 return 2;
4271 /* See if we can generate this by setting the bottom (or the top)
4272 16 bits, and then shifting these into the other half of the
4273 word. We only look for the simplest cases, to do more would cost
4274 too much. Be careful, however, not to generate this when the
4275 alternative would take fewer insns. */
4276 if (val & 0xffff0000)
4278 temp1 = remainder & 0xffff0000;
4279 temp2 = remainder & 0x0000ffff;
4281 /* Overlaps outside this range are best done using other methods. */
4282 for (i = 9; i < 24; i++)
4284 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4285 && !const_ok_for_arm (temp2))
4287 rtx new_src = (subtargets
4288 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4289 : target);
4290 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4291 source, subtargets, generate);
4292 source = new_src;
4293 if (generate)
4294 emit_constant_insn
4295 (cond,
4296 gen_rtx_SET
4297 (VOIDmode, target,
4298 gen_rtx_IOR (mode,
4299 gen_rtx_ASHIFT (mode, source,
4300 GEN_INT (i)),
4301 source)));
4302 return insns + 1;
4306 /* Don't duplicate cases already considered. */
4307 for (i = 17; i < 24; i++)
4309 if (((temp1 | (temp1 >> i)) == remainder)
4310 && !const_ok_for_arm (temp1))
4312 rtx new_src = (subtargets
4313 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4314 : target);
4315 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4316 source, subtargets, generate);
4317 source = new_src;
4318 if (generate)
4319 emit_constant_insn
4320 (cond,
4321 gen_rtx_SET (VOIDmode, target,
4322 gen_rtx_IOR
4323 (mode,
4324 gen_rtx_LSHIFTRT (mode, source,
4325 GEN_INT (i)),
4326 source)));
4327 return insns + 1;
4331 break;
4333 case IOR:
4334 case XOR:
4335 /* If we have IOR or XOR, and the constant can be loaded in a
4336 single instruction, and we can find a temporary to put it in,
4337 then this can be done in two instructions instead of 3-4. */
4338 if (subtargets
4339 /* TARGET can't be NULL if SUBTARGETS is 0 */
4340 || (reload_completed && !reg_mentioned_p (target, source)))
4342 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4344 if (generate)
4346 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4348 emit_constant_insn (cond,
4349 gen_rtx_SET (VOIDmode, sub,
4350 GEN_INT (val)));
4351 emit_constant_insn (cond,
4352 gen_rtx_SET (VOIDmode, target,
4353 gen_rtx_fmt_ee (code, mode,
4354 source, sub)));
4356 return 2;
4360 if (code == XOR)
4361 break;
4363 /* Convert.
4364 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4365 and the remainder 0s for e.g. 0xfff00000)
4366 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4368 This can be done in 2 instructions by using shifts with mov or mvn.
4369 e.g. for
4370 x = x | 0xfff00000;
4371 we generate.
4372 mvn r0, r0, asl #12
4373 mvn r0, r0, lsr #12 */
4374 if (set_sign_bit_copies > 8
4375 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4377 if (generate)
4379 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4380 rtx shift = GEN_INT (set_sign_bit_copies);
4382 emit_constant_insn
4383 (cond,
4384 gen_rtx_SET (VOIDmode, sub,
4385 gen_rtx_NOT (mode,
4386 gen_rtx_ASHIFT (mode,
4387 source,
4388 shift))));
4389 emit_constant_insn
4390 (cond,
4391 gen_rtx_SET (VOIDmode, target,
4392 gen_rtx_NOT (mode,
4393 gen_rtx_LSHIFTRT (mode, sub,
4394 shift))));
4396 return 2;
4399 /* Convert
4400 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4402 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4404 For eg. r0 = r0 | 0xfff
4405 mvn r0, r0, lsr #12
4406 mvn r0, r0, asl #12
4409 if (set_zero_bit_copies > 8
4410 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4412 if (generate)
4414 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4415 rtx shift = GEN_INT (set_zero_bit_copies);
4417 emit_constant_insn
4418 (cond,
4419 gen_rtx_SET (VOIDmode, sub,
4420 gen_rtx_NOT (mode,
4421 gen_rtx_LSHIFTRT (mode,
4422 source,
4423 shift))));
4424 emit_constant_insn
4425 (cond,
4426 gen_rtx_SET (VOIDmode, target,
4427 gen_rtx_NOT (mode,
4428 gen_rtx_ASHIFT (mode, sub,
4429 shift))));
4431 return 2;
4434 /* This will never be reached for Thumb2 because orn is a valid
4435 instruction. This is for Thumb1 and the ARM 32 bit cases.
4437 x = y | constant (such that ~constant is a valid constant)
4438 Transform this to
4439 x = ~(~y & ~constant).
4441 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4443 if (generate)
4445 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4446 emit_constant_insn (cond,
4447 gen_rtx_SET (VOIDmode, sub,
4448 gen_rtx_NOT (mode, source)));
4449 source = sub;
4450 if (subtargets)
4451 sub = gen_reg_rtx (mode);
4452 emit_constant_insn (cond,
4453 gen_rtx_SET (VOIDmode, sub,
4454 gen_rtx_AND (mode, source,
4455 GEN_INT (temp1))));
4456 emit_constant_insn (cond,
4457 gen_rtx_SET (VOIDmode, target,
4458 gen_rtx_NOT (mode, sub)));
4460 return 3;
4462 break;
4464 case AND:
4465 /* See if two shifts will do 2 or more insn's worth of work. */
4466 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4468 HOST_WIDE_INT shift_mask = ((0xffffffff
4469 << (32 - clear_sign_bit_copies))
4470 & 0xffffffff);
4472 if ((remainder | shift_mask) != 0xffffffff)
4474 if (generate)
4476 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4477 insns = arm_gen_constant (AND, mode, cond,
4478 remainder | shift_mask,
4479 new_src, source, subtargets, 1);
4480 source = new_src;
4482 else
4484 rtx targ = subtargets ? NULL_RTX : target;
4485 insns = arm_gen_constant (AND, mode, cond,
4486 remainder | shift_mask,
4487 targ, source, subtargets, 0);
4491 if (generate)
4493 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4494 rtx shift = GEN_INT (clear_sign_bit_copies);
4496 emit_insn (gen_ashlsi3 (new_src, source, shift));
4497 emit_insn (gen_lshrsi3 (target, new_src, shift));
4500 return insns + 2;
4503 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4505 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4507 if ((remainder | shift_mask) != 0xffffffff)
4509 if (generate)
4511 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4513 insns = arm_gen_constant (AND, mode, cond,
4514 remainder | shift_mask,
4515 new_src, source, subtargets, 1);
4516 source = new_src;
4518 else
4520 rtx targ = subtargets ? NULL_RTX : target;
4522 insns = arm_gen_constant (AND, mode, cond,
4523 remainder | shift_mask,
4524 targ, source, subtargets, 0);
4528 if (generate)
4530 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4531 rtx shift = GEN_INT (clear_zero_bit_copies);
4533 emit_insn (gen_lshrsi3 (new_src, source, shift));
4534 emit_insn (gen_ashlsi3 (target, new_src, shift));
4537 return insns + 2;
4540 break;
4542 default:
4543 break;
4546 /* Calculate what the instruction sequences would be if we generated it
4547 normally, negated, or inverted. */
4548 if (code == AND)
4549 /* AND cannot be split into multiple insns, so invert and use BIC. */
4550 insns = 99;
4551 else
4552 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4554 if (can_negate)
4555 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4556 &neg_immediates);
4557 else
4558 neg_insns = 99;
4560 if (can_invert || final_invert)
4561 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4562 &inv_immediates);
4563 else
4564 inv_insns = 99;
4566 immediates = &pos_immediates;
4568 /* Is the negated immediate sequence more efficient? */
4569 if (neg_insns < insns && neg_insns <= inv_insns)
4571 insns = neg_insns;
4572 immediates = &neg_immediates;
4574 else
4575 can_negate = 0;
4577 /* Is the inverted immediate sequence more efficient?
4578 We must allow for an extra NOT instruction for XOR operations, although
4579 there is some chance that the final 'mvn' will get optimized later. */
4580 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4582 insns = inv_insns;
4583 immediates = &inv_immediates;
4585 else
4587 can_invert = 0;
4588 final_invert = 0;
4591 /* Now output the chosen sequence as instructions. */
4592 if (generate)
4594 for (i = 0; i < insns; i++)
4596 rtx new_src, temp1_rtx;
4598 temp1 = immediates->i[i];
4600 if (code == SET || code == MINUS)
4601 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4602 else if ((final_invert || i < (insns - 1)) && subtargets)
4603 new_src = gen_reg_rtx (mode);
4604 else
4605 new_src = target;
4607 if (can_invert)
4608 temp1 = ~temp1;
4609 else if (can_negate)
4610 temp1 = -temp1;
4612 temp1 = trunc_int_for_mode (temp1, mode);
4613 temp1_rtx = GEN_INT (temp1);
4615 if (code == SET)
4617 else if (code == MINUS)
4618 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4619 else
4620 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4622 emit_constant_insn (cond,
4623 gen_rtx_SET (VOIDmode, new_src,
4624 temp1_rtx));
4625 source = new_src;
4627 if (code == SET)
4629 can_negate = can_invert;
4630 can_invert = 0;
4631 code = PLUS;
4633 else if (code == MINUS)
4634 code = PLUS;
4638 if (final_invert)
4640 if (generate)
4641 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4642 gen_rtx_NOT (mode, source)));
4643 insns++;
4646 return insns;
4649 /* Canonicalize a comparison so that we are more likely to recognize it.
4650 This can be done for a few constant compares, where we can make the
4651 immediate value easier to load. */
4653 static void
4654 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4655 bool op0_preserve_value)
4657 machine_mode mode;
4658 unsigned HOST_WIDE_INT i, maxval;
4660 mode = GET_MODE (*op0);
4661 if (mode == VOIDmode)
4662 mode = GET_MODE (*op1);
4664 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4666 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4667 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4668 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4669 for GTU/LEU in Thumb mode. */
4670 if (mode == DImode)
4672 rtx tem;
4674 if (*code == GT || *code == LE
4675 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4677 /* Missing comparison. First try to use an available
4678 comparison. */
4679 if (CONST_INT_P (*op1))
4681 i = INTVAL (*op1);
4682 switch (*code)
4684 case GT:
4685 case LE:
4686 if (i != maxval
4687 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4689 *op1 = GEN_INT (i + 1);
4690 *code = *code == GT ? GE : LT;
4691 return;
4693 break;
4694 case GTU:
4695 case LEU:
4696 if (i != ~((unsigned HOST_WIDE_INT) 0)
4697 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4699 *op1 = GEN_INT (i + 1);
4700 *code = *code == GTU ? GEU : LTU;
4701 return;
4703 break;
4704 default:
4705 gcc_unreachable ();
4709 /* If that did not work, reverse the condition. */
4710 if (!op0_preserve_value)
4712 tem = *op0;
4713 *op0 = *op1;
4714 *op1 = tem;
4715 *code = (int)swap_condition ((enum rtx_code)*code);
4718 return;
4721 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4722 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4723 to facilitate possible combining with a cmp into 'ands'. */
4724 if (mode == SImode
4725 && GET_CODE (*op0) == ZERO_EXTEND
4726 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4727 && GET_MODE (XEXP (*op0, 0)) == QImode
4728 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4729 && subreg_lowpart_p (XEXP (*op0, 0))
4730 && *op1 == const0_rtx)
4731 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4732 GEN_INT (255));
4734 /* Comparisons smaller than DImode. Only adjust comparisons against
4735 an out-of-range constant. */
4736 if (!CONST_INT_P (*op1)
4737 || const_ok_for_arm (INTVAL (*op1))
4738 || const_ok_for_arm (- INTVAL (*op1)))
4739 return;
4741 i = INTVAL (*op1);
4743 switch (*code)
4745 case EQ:
4746 case NE:
4747 return;
4749 case GT:
4750 case LE:
4751 if (i != maxval
4752 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4754 *op1 = GEN_INT (i + 1);
4755 *code = *code == GT ? GE : LT;
4756 return;
4758 break;
4760 case GE:
4761 case LT:
4762 if (i != ~maxval
4763 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4765 *op1 = GEN_INT (i - 1);
4766 *code = *code == GE ? GT : LE;
4767 return;
4769 break;
4771 case GTU:
4772 case LEU:
4773 if (i != ~((unsigned HOST_WIDE_INT) 0)
4774 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4776 *op1 = GEN_INT (i + 1);
4777 *code = *code == GTU ? GEU : LTU;
4778 return;
4780 break;
4782 case GEU:
4783 case LTU:
4784 if (i != 0
4785 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4787 *op1 = GEN_INT (i - 1);
4788 *code = *code == GEU ? GTU : LEU;
4789 return;
4791 break;
4793 default:
4794 gcc_unreachable ();
4799 /* Define how to find the value returned by a function. */
4801 static rtx
4802 arm_function_value(const_tree type, const_tree func,
4803 bool outgoing ATTRIBUTE_UNUSED)
4805 machine_mode mode;
4806 int unsignedp ATTRIBUTE_UNUSED;
4807 rtx r ATTRIBUTE_UNUSED;
4809 mode = TYPE_MODE (type);
4811 if (TARGET_AAPCS_BASED)
4812 return aapcs_allocate_return_reg (mode, type, func);
4814 /* Promote integer types. */
4815 if (INTEGRAL_TYPE_P (type))
4816 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4818 /* Promotes small structs returned in a register to full-word size
4819 for big-endian AAPCS. */
4820 if (arm_return_in_msb (type))
4822 HOST_WIDE_INT size = int_size_in_bytes (type);
4823 if (size % UNITS_PER_WORD != 0)
4825 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4826 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4830 return arm_libcall_value_1 (mode);
4833 /* libcall hashtable helpers. */
4835 struct libcall_hasher : typed_noop_remove <rtx_def>
4837 typedef rtx_def value_type;
4838 typedef rtx_def compare_type;
4839 static inline hashval_t hash (const value_type *);
4840 static inline bool equal (const value_type *, const compare_type *);
4841 static inline void remove (value_type *);
4844 inline bool
4845 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4847 return rtx_equal_p (p1, p2);
4850 inline hashval_t
4851 libcall_hasher::hash (const value_type *p1)
4853 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4856 typedef hash_table<libcall_hasher> libcall_table_type;
4858 static void
4859 add_libcall (libcall_table_type *htab, rtx libcall)
4861 *htab->find_slot (libcall, INSERT) = libcall;
4864 static bool
4865 arm_libcall_uses_aapcs_base (const_rtx libcall)
4867 static bool init_done = false;
4868 static libcall_table_type *libcall_htab = NULL;
4870 if (!init_done)
4872 init_done = true;
4874 libcall_htab = new libcall_table_type (31);
4875 add_libcall (libcall_htab,
4876 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4877 add_libcall (libcall_htab,
4878 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4879 add_libcall (libcall_htab,
4880 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4881 add_libcall (libcall_htab,
4882 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4884 add_libcall (libcall_htab,
4885 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4886 add_libcall (libcall_htab,
4887 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4888 add_libcall (libcall_htab,
4889 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4890 add_libcall (libcall_htab,
4891 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4893 add_libcall (libcall_htab,
4894 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4895 add_libcall (libcall_htab,
4896 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4897 add_libcall (libcall_htab,
4898 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4899 add_libcall (libcall_htab,
4900 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4901 add_libcall (libcall_htab,
4902 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4903 add_libcall (libcall_htab,
4904 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4905 add_libcall (libcall_htab,
4906 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4907 add_libcall (libcall_htab,
4908 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4910 /* Values from double-precision helper functions are returned in core
4911 registers if the selected core only supports single-precision
4912 arithmetic, even if we are using the hard-float ABI. The same is
4913 true for single-precision helpers, but we will never be using the
4914 hard-float ABI on a CPU which doesn't support single-precision
4915 operations in hardware. */
4916 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4917 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4918 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4919 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4920 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4921 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4922 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4923 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4924 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4925 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4926 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4927 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4928 SFmode));
4929 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4930 DFmode));
4933 return libcall && libcall_htab->find (libcall) != NULL;
4936 static rtx
4937 arm_libcall_value_1 (machine_mode mode)
4939 if (TARGET_AAPCS_BASED)
4940 return aapcs_libcall_value (mode);
4941 else if (TARGET_IWMMXT_ABI
4942 && arm_vector_mode_supported_p (mode))
4943 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4944 else
4945 return gen_rtx_REG (mode, ARG_REGISTER (1));
4948 /* Define how to find the value returned by a library function
4949 assuming the value has mode MODE. */
4951 static rtx
4952 arm_libcall_value (machine_mode mode, const_rtx libcall)
4954 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4955 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4957 /* The following libcalls return their result in integer registers,
4958 even though they return a floating point value. */
4959 if (arm_libcall_uses_aapcs_base (libcall))
4960 return gen_rtx_REG (mode, ARG_REGISTER(1));
4964 return arm_libcall_value_1 (mode);
4967 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4969 static bool
4970 arm_function_value_regno_p (const unsigned int regno)
4972 if (regno == ARG_REGISTER (1)
4973 || (TARGET_32BIT
4974 && TARGET_AAPCS_BASED
4975 && TARGET_VFP
4976 && TARGET_HARD_FLOAT
4977 && regno == FIRST_VFP_REGNUM)
4978 || (TARGET_IWMMXT_ABI
4979 && regno == FIRST_IWMMXT_REGNUM))
4980 return true;
4982 return false;
4985 /* Determine the amount of memory needed to store the possible return
4986 registers of an untyped call. */
4988 arm_apply_result_size (void)
4990 int size = 16;
4992 if (TARGET_32BIT)
4994 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4995 size += 32;
4996 if (TARGET_IWMMXT_ABI)
4997 size += 8;
5000 return size;
5003 /* Decide whether TYPE should be returned in memory (true)
5004 or in a register (false). FNTYPE is the type of the function making
5005 the call. */
5006 static bool
5007 arm_return_in_memory (const_tree type, const_tree fntype)
5009 HOST_WIDE_INT size;
5011 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5013 if (TARGET_AAPCS_BASED)
5015 /* Simple, non-aggregate types (ie not including vectors and
5016 complex) are always returned in a register (or registers).
5017 We don't care about which register here, so we can short-cut
5018 some of the detail. */
5019 if (!AGGREGATE_TYPE_P (type)
5020 && TREE_CODE (type) != VECTOR_TYPE
5021 && TREE_CODE (type) != COMPLEX_TYPE)
5022 return false;
5024 /* Any return value that is no larger than one word can be
5025 returned in r0. */
5026 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5027 return false;
5029 /* Check any available co-processors to see if they accept the
5030 type as a register candidate (VFP, for example, can return
5031 some aggregates in consecutive registers). These aren't
5032 available if the call is variadic. */
5033 if (aapcs_select_return_coproc (type, fntype) >= 0)
5034 return false;
5036 /* Vector values should be returned using ARM registers, not
5037 memory (unless they're over 16 bytes, which will break since
5038 we only have four call-clobbered registers to play with). */
5039 if (TREE_CODE (type) == VECTOR_TYPE)
5040 return (size < 0 || size > (4 * UNITS_PER_WORD));
5042 /* The rest go in memory. */
5043 return true;
5046 if (TREE_CODE (type) == VECTOR_TYPE)
5047 return (size < 0 || size > (4 * UNITS_PER_WORD));
5049 if (!AGGREGATE_TYPE_P (type) &&
5050 (TREE_CODE (type) != VECTOR_TYPE))
5051 /* All simple types are returned in registers. */
5052 return false;
5054 if (arm_abi != ARM_ABI_APCS)
5056 /* ATPCS and later return aggregate types in memory only if they are
5057 larger than a word (or are variable size). */
5058 return (size < 0 || size > UNITS_PER_WORD);
5061 /* For the arm-wince targets we choose to be compatible with Microsoft's
5062 ARM and Thumb compilers, which always return aggregates in memory. */
5063 #ifndef ARM_WINCE
5064 /* All structures/unions bigger than one word are returned in memory.
5065 Also catch the case where int_size_in_bytes returns -1. In this case
5066 the aggregate is either huge or of variable size, and in either case
5067 we will want to return it via memory and not in a register. */
5068 if (size < 0 || size > UNITS_PER_WORD)
5069 return true;
5071 if (TREE_CODE (type) == RECORD_TYPE)
5073 tree field;
5075 /* For a struct the APCS says that we only return in a register
5076 if the type is 'integer like' and every addressable element
5077 has an offset of zero. For practical purposes this means
5078 that the structure can have at most one non bit-field element
5079 and that this element must be the first one in the structure. */
5081 /* Find the first field, ignoring non FIELD_DECL things which will
5082 have been created by C++. */
5083 for (field = TYPE_FIELDS (type);
5084 field && TREE_CODE (field) != FIELD_DECL;
5085 field = DECL_CHAIN (field))
5086 continue;
5088 if (field == NULL)
5089 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5091 /* Check that the first field is valid for returning in a register. */
5093 /* ... Floats are not allowed */
5094 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5095 return true;
5097 /* ... Aggregates that are not themselves valid for returning in
5098 a register are not allowed. */
5099 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5100 return true;
5102 /* Now check the remaining fields, if any. Only bitfields are allowed,
5103 since they are not addressable. */
5104 for (field = DECL_CHAIN (field);
5105 field;
5106 field = DECL_CHAIN (field))
5108 if (TREE_CODE (field) != FIELD_DECL)
5109 continue;
5111 if (!DECL_BIT_FIELD_TYPE (field))
5112 return true;
5115 return false;
5118 if (TREE_CODE (type) == UNION_TYPE)
5120 tree field;
5122 /* Unions can be returned in registers if every element is
5123 integral, or can be returned in an integer register. */
5124 for (field = TYPE_FIELDS (type);
5125 field;
5126 field = DECL_CHAIN (field))
5128 if (TREE_CODE (field) != FIELD_DECL)
5129 continue;
5131 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5132 return true;
5134 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5135 return true;
5138 return false;
5140 #endif /* not ARM_WINCE */
5142 /* Return all other types in memory. */
5143 return true;
5146 const struct pcs_attribute_arg
5148 const char *arg;
5149 enum arm_pcs value;
5150 } pcs_attribute_args[] =
5152 {"aapcs", ARM_PCS_AAPCS},
5153 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5154 #if 0
5155 /* We could recognize these, but changes would be needed elsewhere
5156 * to implement them. */
5157 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5158 {"atpcs", ARM_PCS_ATPCS},
5159 {"apcs", ARM_PCS_APCS},
5160 #endif
5161 {NULL, ARM_PCS_UNKNOWN}
5164 static enum arm_pcs
5165 arm_pcs_from_attribute (tree attr)
5167 const struct pcs_attribute_arg *ptr;
5168 const char *arg;
5170 /* Get the value of the argument. */
5171 if (TREE_VALUE (attr) == NULL_TREE
5172 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5173 return ARM_PCS_UNKNOWN;
5175 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5177 /* Check it against the list of known arguments. */
5178 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5179 if (streq (arg, ptr->arg))
5180 return ptr->value;
5182 /* An unrecognized interrupt type. */
5183 return ARM_PCS_UNKNOWN;
5186 /* Get the PCS variant to use for this call. TYPE is the function's type
5187 specification, DECL is the specific declartion. DECL may be null if
5188 the call could be indirect or if this is a library call. */
5189 static enum arm_pcs
5190 arm_get_pcs_model (const_tree type, const_tree decl)
5192 bool user_convention = false;
5193 enum arm_pcs user_pcs = arm_pcs_default;
5194 tree attr;
5196 gcc_assert (type);
5198 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5199 if (attr)
5201 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5202 user_convention = true;
5205 if (TARGET_AAPCS_BASED)
5207 /* Detect varargs functions. These always use the base rules
5208 (no argument is ever a candidate for a co-processor
5209 register). */
5210 bool base_rules = stdarg_p (type);
5212 if (user_convention)
5214 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5215 sorry ("non-AAPCS derived PCS variant");
5216 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5217 error ("variadic functions must use the base AAPCS variant");
5220 if (base_rules)
5221 return ARM_PCS_AAPCS;
5222 else if (user_convention)
5223 return user_pcs;
5224 else if (decl && flag_unit_at_a_time)
5226 /* Local functions never leak outside this compilation unit,
5227 so we are free to use whatever conventions are
5228 appropriate. */
5229 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5230 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5231 if (i && i->local)
5232 return ARM_PCS_AAPCS_LOCAL;
5235 else if (user_convention && user_pcs != arm_pcs_default)
5236 sorry ("PCS variant");
5238 /* For everything else we use the target's default. */
5239 return arm_pcs_default;
5243 static void
5244 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5245 const_tree fntype ATTRIBUTE_UNUSED,
5246 rtx libcall ATTRIBUTE_UNUSED,
5247 const_tree fndecl ATTRIBUTE_UNUSED)
5249 /* Record the unallocated VFP registers. */
5250 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5251 pcum->aapcs_vfp_reg_alloc = 0;
5254 /* Walk down the type tree of TYPE counting consecutive base elements.
5255 If *MODEP is VOIDmode, then set it to the first valid floating point
5256 type. If a non-floating point type is found, or if a floating point
5257 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5258 otherwise return the count in the sub-tree. */
5259 static int
5260 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5262 machine_mode mode;
5263 HOST_WIDE_INT size;
5265 switch (TREE_CODE (type))
5267 case REAL_TYPE:
5268 mode = TYPE_MODE (type);
5269 if (mode != DFmode && mode != SFmode)
5270 return -1;
5272 if (*modep == VOIDmode)
5273 *modep = mode;
5275 if (*modep == mode)
5276 return 1;
5278 break;
5280 case COMPLEX_TYPE:
5281 mode = TYPE_MODE (TREE_TYPE (type));
5282 if (mode != DFmode && mode != SFmode)
5283 return -1;
5285 if (*modep == VOIDmode)
5286 *modep = mode;
5288 if (*modep == mode)
5289 return 2;
5291 break;
5293 case VECTOR_TYPE:
5294 /* Use V2SImode and V4SImode as representatives of all 64-bit
5295 and 128-bit vector types, whether or not those modes are
5296 supported with the present options. */
5297 size = int_size_in_bytes (type);
5298 switch (size)
5300 case 8:
5301 mode = V2SImode;
5302 break;
5303 case 16:
5304 mode = V4SImode;
5305 break;
5306 default:
5307 return -1;
5310 if (*modep == VOIDmode)
5311 *modep = mode;
5313 /* Vector modes are considered to be opaque: two vectors are
5314 equivalent for the purposes of being homogeneous aggregates
5315 if they are the same size. */
5316 if (*modep == mode)
5317 return 1;
5319 break;
5321 case ARRAY_TYPE:
5323 int count;
5324 tree index = TYPE_DOMAIN (type);
5326 /* Can't handle incomplete types nor sizes that are not
5327 fixed. */
5328 if (!COMPLETE_TYPE_P (type)
5329 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5330 return -1;
5332 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5333 if (count == -1
5334 || !index
5335 || !TYPE_MAX_VALUE (index)
5336 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5337 || !TYPE_MIN_VALUE (index)
5338 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5339 || count < 0)
5340 return -1;
5342 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5343 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5345 /* There must be no padding. */
5346 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5347 return -1;
5349 return count;
5352 case RECORD_TYPE:
5354 int count = 0;
5355 int sub_count;
5356 tree field;
5358 /* Can't handle incomplete types nor sizes that are not
5359 fixed. */
5360 if (!COMPLETE_TYPE_P (type)
5361 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5362 return -1;
5364 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5366 if (TREE_CODE (field) != FIELD_DECL)
5367 continue;
5369 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5370 if (sub_count < 0)
5371 return -1;
5372 count += sub_count;
5375 /* There must be no padding. */
5376 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5377 return -1;
5379 return count;
5382 case UNION_TYPE:
5383 case QUAL_UNION_TYPE:
5385 /* These aren't very interesting except in a degenerate case. */
5386 int count = 0;
5387 int sub_count;
5388 tree field;
5390 /* Can't handle incomplete types nor sizes that are not
5391 fixed. */
5392 if (!COMPLETE_TYPE_P (type)
5393 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5394 return -1;
5396 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5398 if (TREE_CODE (field) != FIELD_DECL)
5399 continue;
5401 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5402 if (sub_count < 0)
5403 return -1;
5404 count = count > sub_count ? count : sub_count;
5407 /* There must be no padding. */
5408 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5409 return -1;
5411 return count;
5414 default:
5415 break;
5418 return -1;
5421 /* Return true if PCS_VARIANT should use VFP registers. */
5422 static bool
5423 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5425 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5427 static bool seen_thumb1_vfp = false;
5429 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5431 sorry ("Thumb-1 hard-float VFP ABI");
5432 /* sorry() is not immediately fatal, so only display this once. */
5433 seen_thumb1_vfp = true;
5436 return true;
5439 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5440 return false;
5442 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5443 (TARGET_VFP_DOUBLE || !is_double));
5446 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5447 suitable for passing or returning in VFP registers for the PCS
5448 variant selected. If it is, then *BASE_MODE is updated to contain
5449 a machine mode describing each element of the argument's type and
5450 *COUNT to hold the number of such elements. */
5451 static bool
5452 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5453 machine_mode mode, const_tree type,
5454 machine_mode *base_mode, int *count)
5456 machine_mode new_mode = VOIDmode;
5458 /* If we have the type information, prefer that to working things
5459 out from the mode. */
5460 if (type)
5462 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5464 if (ag_count > 0 && ag_count <= 4)
5465 *count = ag_count;
5466 else
5467 return false;
5469 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5470 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5471 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5473 *count = 1;
5474 new_mode = mode;
5476 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5478 *count = 2;
5479 new_mode = (mode == DCmode ? DFmode : SFmode);
5481 else
5482 return false;
5485 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5486 return false;
5488 *base_mode = new_mode;
5489 return true;
5492 static bool
5493 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5494 machine_mode mode, const_tree type)
5496 int count ATTRIBUTE_UNUSED;
5497 machine_mode ag_mode ATTRIBUTE_UNUSED;
5499 if (!use_vfp_abi (pcs_variant, false))
5500 return false;
5501 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5502 &ag_mode, &count);
5505 static bool
5506 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5507 const_tree type)
5509 if (!use_vfp_abi (pcum->pcs_variant, false))
5510 return false;
5512 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5513 &pcum->aapcs_vfp_rmode,
5514 &pcum->aapcs_vfp_rcount);
5517 static bool
5518 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5519 const_tree type ATTRIBUTE_UNUSED)
5521 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5522 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5523 int regno;
5525 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5526 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5528 pcum->aapcs_vfp_reg_alloc = mask << regno;
5529 if (mode == BLKmode
5530 || (mode == TImode && ! TARGET_NEON)
5531 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5533 int i;
5534 int rcount = pcum->aapcs_vfp_rcount;
5535 int rshift = shift;
5536 machine_mode rmode = pcum->aapcs_vfp_rmode;
5537 rtx par;
5538 if (!TARGET_NEON)
5540 /* Avoid using unsupported vector modes. */
5541 if (rmode == V2SImode)
5542 rmode = DImode;
5543 else if (rmode == V4SImode)
5545 rmode = DImode;
5546 rcount *= 2;
5547 rshift /= 2;
5550 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5551 for (i = 0; i < rcount; i++)
5553 rtx tmp = gen_rtx_REG (rmode,
5554 FIRST_VFP_REGNUM + regno + i * rshift);
5555 tmp = gen_rtx_EXPR_LIST
5556 (VOIDmode, tmp,
5557 GEN_INT (i * GET_MODE_SIZE (rmode)));
5558 XVECEXP (par, 0, i) = tmp;
5561 pcum->aapcs_reg = par;
5563 else
5564 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5565 return true;
5567 return false;
5570 static rtx
5571 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5572 machine_mode mode,
5573 const_tree type ATTRIBUTE_UNUSED)
5575 if (!use_vfp_abi (pcs_variant, false))
5576 return NULL;
5578 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5580 int count;
5581 machine_mode ag_mode;
5582 int i;
5583 rtx par;
5584 int shift;
5586 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5587 &ag_mode, &count);
5589 if (!TARGET_NEON)
5591 if (ag_mode == V2SImode)
5592 ag_mode = DImode;
5593 else if (ag_mode == V4SImode)
5595 ag_mode = DImode;
5596 count *= 2;
5599 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5600 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5601 for (i = 0; i < count; i++)
5603 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5604 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5605 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5606 XVECEXP (par, 0, i) = tmp;
5609 return par;
5612 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5615 static void
5616 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5617 machine_mode mode ATTRIBUTE_UNUSED,
5618 const_tree type ATTRIBUTE_UNUSED)
5620 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5621 pcum->aapcs_vfp_reg_alloc = 0;
5622 return;
5625 #define AAPCS_CP(X) \
5627 aapcs_ ## X ## _cum_init, \
5628 aapcs_ ## X ## _is_call_candidate, \
5629 aapcs_ ## X ## _allocate, \
5630 aapcs_ ## X ## _is_return_candidate, \
5631 aapcs_ ## X ## _allocate_return_reg, \
5632 aapcs_ ## X ## _advance \
5635 /* Table of co-processors that can be used to pass arguments in
5636 registers. Idealy no arugment should be a candidate for more than
5637 one co-processor table entry, but the table is processed in order
5638 and stops after the first match. If that entry then fails to put
5639 the argument into a co-processor register, the argument will go on
5640 the stack. */
5641 static struct
5643 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5644 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5646 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5647 BLKmode) is a candidate for this co-processor's registers; this
5648 function should ignore any position-dependent state in
5649 CUMULATIVE_ARGS and only use call-type dependent information. */
5650 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5652 /* Return true if the argument does get a co-processor register; it
5653 should set aapcs_reg to an RTX of the register allocated as is
5654 required for a return from FUNCTION_ARG. */
5655 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5657 /* Return true if a result of mode MODE (or type TYPE if MODE is
5658 BLKmode) is can be returned in this co-processor's registers. */
5659 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5661 /* Allocate and return an RTX element to hold the return type of a
5662 call, this routine must not fail and will only be called if
5663 is_return_candidate returned true with the same parameters. */
5664 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5666 /* Finish processing this argument and prepare to start processing
5667 the next one. */
5668 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5669 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5671 AAPCS_CP(vfp)
5674 #undef AAPCS_CP
5676 static int
5677 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5678 const_tree type)
5680 int i;
5682 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5683 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5684 return i;
5686 return -1;
5689 static int
5690 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5692 /* We aren't passed a decl, so we can't check that a call is local.
5693 However, it isn't clear that that would be a win anyway, since it
5694 might limit some tail-calling opportunities. */
5695 enum arm_pcs pcs_variant;
5697 if (fntype)
5699 const_tree fndecl = NULL_TREE;
5701 if (TREE_CODE (fntype) == FUNCTION_DECL)
5703 fndecl = fntype;
5704 fntype = TREE_TYPE (fntype);
5707 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5709 else
5710 pcs_variant = arm_pcs_default;
5712 if (pcs_variant != ARM_PCS_AAPCS)
5714 int i;
5716 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5717 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5718 TYPE_MODE (type),
5719 type))
5720 return i;
5722 return -1;
5725 static rtx
5726 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5727 const_tree fntype)
5729 /* We aren't passed a decl, so we can't check that a call is local.
5730 However, it isn't clear that that would be a win anyway, since it
5731 might limit some tail-calling opportunities. */
5732 enum arm_pcs pcs_variant;
5733 int unsignedp ATTRIBUTE_UNUSED;
5735 if (fntype)
5737 const_tree fndecl = NULL_TREE;
5739 if (TREE_CODE (fntype) == FUNCTION_DECL)
5741 fndecl = fntype;
5742 fntype = TREE_TYPE (fntype);
5745 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5747 else
5748 pcs_variant = arm_pcs_default;
5750 /* Promote integer types. */
5751 if (type && INTEGRAL_TYPE_P (type))
5752 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5754 if (pcs_variant != ARM_PCS_AAPCS)
5756 int i;
5758 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5759 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5760 type))
5761 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5762 mode, type);
5765 /* Promotes small structs returned in a register to full-word size
5766 for big-endian AAPCS. */
5767 if (type && arm_return_in_msb (type))
5769 HOST_WIDE_INT size = int_size_in_bytes (type);
5770 if (size % UNITS_PER_WORD != 0)
5772 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5773 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5777 return gen_rtx_REG (mode, R0_REGNUM);
5780 static rtx
5781 aapcs_libcall_value (machine_mode mode)
5783 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5784 && GET_MODE_SIZE (mode) <= 4)
5785 mode = SImode;
5787 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5790 /* Lay out a function argument using the AAPCS rules. The rule
5791 numbers referred to here are those in the AAPCS. */
5792 static void
5793 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5794 const_tree type, bool named)
5796 int nregs, nregs2;
5797 int ncrn;
5799 /* We only need to do this once per argument. */
5800 if (pcum->aapcs_arg_processed)
5801 return;
5803 pcum->aapcs_arg_processed = true;
5805 /* Special case: if named is false then we are handling an incoming
5806 anonymous argument which is on the stack. */
5807 if (!named)
5808 return;
5810 /* Is this a potential co-processor register candidate? */
5811 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5813 int slot = aapcs_select_call_coproc (pcum, mode, type);
5814 pcum->aapcs_cprc_slot = slot;
5816 /* We don't have to apply any of the rules from part B of the
5817 preparation phase, these are handled elsewhere in the
5818 compiler. */
5820 if (slot >= 0)
5822 /* A Co-processor register candidate goes either in its own
5823 class of registers or on the stack. */
5824 if (!pcum->aapcs_cprc_failed[slot])
5826 /* C1.cp - Try to allocate the argument to co-processor
5827 registers. */
5828 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5829 return;
5831 /* C2.cp - Put the argument on the stack and note that we
5832 can't assign any more candidates in this slot. We also
5833 need to note that we have allocated stack space, so that
5834 we won't later try to split a non-cprc candidate between
5835 core registers and the stack. */
5836 pcum->aapcs_cprc_failed[slot] = true;
5837 pcum->can_split = false;
5840 /* We didn't get a register, so this argument goes on the
5841 stack. */
5842 gcc_assert (pcum->can_split == false);
5843 return;
5847 /* C3 - For double-word aligned arguments, round the NCRN up to the
5848 next even number. */
5849 ncrn = pcum->aapcs_ncrn;
5850 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5851 ncrn++;
5853 nregs = ARM_NUM_REGS2(mode, type);
5855 /* Sigh, this test should really assert that nregs > 0, but a GCC
5856 extension allows empty structs and then gives them empty size; it
5857 then allows such a structure to be passed by value. For some of
5858 the code below we have to pretend that such an argument has
5859 non-zero size so that we 'locate' it correctly either in
5860 registers or on the stack. */
5861 gcc_assert (nregs >= 0);
5863 nregs2 = nregs ? nregs : 1;
5865 /* C4 - Argument fits entirely in core registers. */
5866 if (ncrn + nregs2 <= NUM_ARG_REGS)
5868 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5869 pcum->aapcs_next_ncrn = ncrn + nregs;
5870 return;
5873 /* C5 - Some core registers left and there are no arguments already
5874 on the stack: split this argument between the remaining core
5875 registers and the stack. */
5876 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5878 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5879 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5880 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5881 return;
5884 /* C6 - NCRN is set to 4. */
5885 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5887 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5888 return;
5891 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5892 for a call to a function whose data type is FNTYPE.
5893 For a library call, FNTYPE is NULL. */
5894 void
5895 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5896 rtx libname,
5897 tree fndecl ATTRIBUTE_UNUSED)
5899 /* Long call handling. */
5900 if (fntype)
5901 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5902 else
5903 pcum->pcs_variant = arm_pcs_default;
5905 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5907 if (arm_libcall_uses_aapcs_base (libname))
5908 pcum->pcs_variant = ARM_PCS_AAPCS;
5910 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5911 pcum->aapcs_reg = NULL_RTX;
5912 pcum->aapcs_partial = 0;
5913 pcum->aapcs_arg_processed = false;
5914 pcum->aapcs_cprc_slot = -1;
5915 pcum->can_split = true;
5917 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5919 int i;
5921 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5923 pcum->aapcs_cprc_failed[i] = false;
5924 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5927 return;
5930 /* Legacy ABIs */
5932 /* On the ARM, the offset starts at 0. */
5933 pcum->nregs = 0;
5934 pcum->iwmmxt_nregs = 0;
5935 pcum->can_split = true;
5937 /* Varargs vectors are treated the same as long long.
5938 named_count avoids having to change the way arm handles 'named' */
5939 pcum->named_count = 0;
5940 pcum->nargs = 0;
5942 if (TARGET_REALLY_IWMMXT && fntype)
5944 tree fn_arg;
5946 for (fn_arg = TYPE_ARG_TYPES (fntype);
5947 fn_arg;
5948 fn_arg = TREE_CHAIN (fn_arg))
5949 pcum->named_count += 1;
5951 if (! pcum->named_count)
5952 pcum->named_count = INT_MAX;
5956 /* Return true if we use LRA instead of reload pass. */
5957 static bool
5958 arm_lra_p (void)
5960 return arm_lra_flag;
5963 /* Return true if mode/type need doubleword alignment. */
5964 static bool
5965 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5967 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5968 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5972 /* Determine where to put an argument to a function.
5973 Value is zero to push the argument on the stack,
5974 or a hard register in which to store the argument.
5976 MODE is the argument's machine mode.
5977 TYPE is the data type of the argument (as a tree).
5978 This is null for libcalls where that information may
5979 not be available.
5980 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5981 the preceding args and about the function being called.
5982 NAMED is nonzero if this argument is a named parameter
5983 (otherwise it is an extra parameter matching an ellipsis).
5985 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5986 other arguments are passed on the stack. If (NAMED == 0) (which happens
5987 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5988 defined), say it is passed in the stack (function_prologue will
5989 indeed make it pass in the stack if necessary). */
5991 static rtx
5992 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5993 const_tree type, bool named)
5995 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5996 int nregs;
5998 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5999 a call insn (op3 of a call_value insn). */
6000 if (mode == VOIDmode)
6001 return const0_rtx;
6003 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6005 aapcs_layout_arg (pcum, mode, type, named);
6006 return pcum->aapcs_reg;
6009 /* Varargs vectors are treated the same as long long.
6010 named_count avoids having to change the way arm handles 'named' */
6011 if (TARGET_IWMMXT_ABI
6012 && arm_vector_mode_supported_p (mode)
6013 && pcum->named_count > pcum->nargs + 1)
6015 if (pcum->iwmmxt_nregs <= 9)
6016 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6017 else
6019 pcum->can_split = false;
6020 return NULL_RTX;
6024 /* Put doubleword aligned quantities in even register pairs. */
6025 if (pcum->nregs & 1
6026 && ARM_DOUBLEWORD_ALIGN
6027 && arm_needs_doubleword_align (mode, type))
6028 pcum->nregs++;
6030 /* Only allow splitting an arg between regs and memory if all preceding
6031 args were allocated to regs. For args passed by reference we only count
6032 the reference pointer. */
6033 if (pcum->can_split)
6034 nregs = 1;
6035 else
6036 nregs = ARM_NUM_REGS2 (mode, type);
6038 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6039 return NULL_RTX;
6041 return gen_rtx_REG (mode, pcum->nregs);
6044 static unsigned int
6045 arm_function_arg_boundary (machine_mode mode, const_tree type)
6047 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6048 ? DOUBLEWORD_ALIGNMENT
6049 : PARM_BOUNDARY);
6052 static int
6053 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6054 tree type, bool named)
6056 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6057 int nregs = pcum->nregs;
6059 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6061 aapcs_layout_arg (pcum, mode, type, named);
6062 return pcum->aapcs_partial;
6065 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6066 return 0;
6068 if (NUM_ARG_REGS > nregs
6069 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6070 && pcum->can_split)
6071 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6073 return 0;
6076 /* Update the data in PCUM to advance over an argument
6077 of mode MODE and data type TYPE.
6078 (TYPE is null for libcalls where that information may not be available.) */
6080 static void
6081 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6082 const_tree type, bool named)
6084 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6086 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6088 aapcs_layout_arg (pcum, mode, type, named);
6090 if (pcum->aapcs_cprc_slot >= 0)
6092 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6093 type);
6094 pcum->aapcs_cprc_slot = -1;
6097 /* Generic stuff. */
6098 pcum->aapcs_arg_processed = false;
6099 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6100 pcum->aapcs_reg = NULL_RTX;
6101 pcum->aapcs_partial = 0;
6103 else
6105 pcum->nargs += 1;
6106 if (arm_vector_mode_supported_p (mode)
6107 && pcum->named_count > pcum->nargs
6108 && TARGET_IWMMXT_ABI)
6109 pcum->iwmmxt_nregs += 1;
6110 else
6111 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6115 /* Variable sized types are passed by reference. This is a GCC
6116 extension to the ARM ABI. */
6118 static bool
6119 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6120 machine_mode mode ATTRIBUTE_UNUSED,
6121 const_tree type, bool named ATTRIBUTE_UNUSED)
6123 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6126 /* Encode the current state of the #pragma [no_]long_calls. */
6127 typedef enum
6129 OFF, /* No #pragma [no_]long_calls is in effect. */
6130 LONG, /* #pragma long_calls is in effect. */
6131 SHORT /* #pragma no_long_calls is in effect. */
6132 } arm_pragma_enum;
6134 static arm_pragma_enum arm_pragma_long_calls = OFF;
6136 void
6137 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6139 arm_pragma_long_calls = LONG;
6142 void
6143 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6145 arm_pragma_long_calls = SHORT;
6148 void
6149 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6151 arm_pragma_long_calls = OFF;
6154 /* Handle an attribute requiring a FUNCTION_DECL;
6155 arguments as in struct attribute_spec.handler. */
6156 static tree
6157 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6158 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6160 if (TREE_CODE (*node) != FUNCTION_DECL)
6162 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6163 name);
6164 *no_add_attrs = true;
6167 return NULL_TREE;
6170 /* Handle an "interrupt" or "isr" attribute;
6171 arguments as in struct attribute_spec.handler. */
6172 static tree
6173 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6174 bool *no_add_attrs)
6176 if (DECL_P (*node))
6178 if (TREE_CODE (*node) != FUNCTION_DECL)
6180 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6181 name);
6182 *no_add_attrs = true;
6184 /* FIXME: the argument if any is checked for type attributes;
6185 should it be checked for decl ones? */
6187 else
6189 if (TREE_CODE (*node) == FUNCTION_TYPE
6190 || TREE_CODE (*node) == METHOD_TYPE)
6192 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6194 warning (OPT_Wattributes, "%qE attribute ignored",
6195 name);
6196 *no_add_attrs = true;
6199 else if (TREE_CODE (*node) == POINTER_TYPE
6200 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6201 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6202 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6204 *node = build_variant_type_copy (*node);
6205 TREE_TYPE (*node) = build_type_attribute_variant
6206 (TREE_TYPE (*node),
6207 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6208 *no_add_attrs = true;
6210 else
6212 /* Possibly pass this attribute on from the type to a decl. */
6213 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6214 | (int) ATTR_FLAG_FUNCTION_NEXT
6215 | (int) ATTR_FLAG_ARRAY_NEXT))
6217 *no_add_attrs = true;
6218 return tree_cons (name, args, NULL_TREE);
6220 else
6222 warning (OPT_Wattributes, "%qE attribute ignored",
6223 name);
6228 return NULL_TREE;
6231 /* Handle a "pcs" attribute; arguments as in struct
6232 attribute_spec.handler. */
6233 static tree
6234 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6235 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6237 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6239 warning (OPT_Wattributes, "%qE attribute ignored", name);
6240 *no_add_attrs = true;
6242 return NULL_TREE;
6245 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6246 /* Handle the "notshared" attribute. This attribute is another way of
6247 requesting hidden visibility. ARM's compiler supports
6248 "__declspec(notshared)"; we support the same thing via an
6249 attribute. */
6251 static tree
6252 arm_handle_notshared_attribute (tree *node,
6253 tree name ATTRIBUTE_UNUSED,
6254 tree args ATTRIBUTE_UNUSED,
6255 int flags ATTRIBUTE_UNUSED,
6256 bool *no_add_attrs)
6258 tree decl = TYPE_NAME (*node);
6260 if (decl)
6262 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6263 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6264 *no_add_attrs = false;
6266 return NULL_TREE;
6268 #endif
6270 /* Return 0 if the attributes for two types are incompatible, 1 if they
6271 are compatible, and 2 if they are nearly compatible (which causes a
6272 warning to be generated). */
6273 static int
6274 arm_comp_type_attributes (const_tree type1, const_tree type2)
6276 int l1, l2, s1, s2;
6278 /* Check for mismatch of non-default calling convention. */
6279 if (TREE_CODE (type1) != FUNCTION_TYPE)
6280 return 1;
6282 /* Check for mismatched call attributes. */
6283 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6284 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6285 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6286 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6288 /* Only bother to check if an attribute is defined. */
6289 if (l1 | l2 | s1 | s2)
6291 /* If one type has an attribute, the other must have the same attribute. */
6292 if ((l1 != l2) || (s1 != s2))
6293 return 0;
6295 /* Disallow mixed attributes. */
6296 if ((l1 & s2) || (l2 & s1))
6297 return 0;
6300 /* Check for mismatched ISR attribute. */
6301 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6302 if (! l1)
6303 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6304 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6305 if (! l2)
6306 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6307 if (l1 != l2)
6308 return 0;
6310 return 1;
6313 /* Assigns default attributes to newly defined type. This is used to
6314 set short_call/long_call attributes for function types of
6315 functions defined inside corresponding #pragma scopes. */
6316 static void
6317 arm_set_default_type_attributes (tree type)
6319 /* Add __attribute__ ((long_call)) to all functions, when
6320 inside #pragma long_calls or __attribute__ ((short_call)),
6321 when inside #pragma no_long_calls. */
6322 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6324 tree type_attr_list, attr_name;
6325 type_attr_list = TYPE_ATTRIBUTES (type);
6327 if (arm_pragma_long_calls == LONG)
6328 attr_name = get_identifier ("long_call");
6329 else if (arm_pragma_long_calls == SHORT)
6330 attr_name = get_identifier ("short_call");
6331 else
6332 return;
6334 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6335 TYPE_ATTRIBUTES (type) = type_attr_list;
6339 /* Return true if DECL is known to be linked into section SECTION. */
6341 static bool
6342 arm_function_in_section_p (tree decl, section *section)
6344 /* We can only be certain about functions defined in the same
6345 compilation unit. */
6346 if (!TREE_STATIC (decl))
6347 return false;
6349 /* Make sure that SYMBOL always binds to the definition in this
6350 compilation unit. */
6351 if (!targetm.binds_local_p (decl))
6352 return false;
6354 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6355 if (!DECL_SECTION_NAME (decl))
6357 /* Make sure that we will not create a unique section for DECL. */
6358 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6359 return false;
6362 return function_section (decl) == section;
6365 /* Return nonzero if a 32-bit "long_call" should be generated for
6366 a call from the current function to DECL. We generate a long_call
6367 if the function:
6369 a. has an __attribute__((long call))
6370 or b. is within the scope of a #pragma long_calls
6371 or c. the -mlong-calls command line switch has been specified
6373 However we do not generate a long call if the function:
6375 d. has an __attribute__ ((short_call))
6376 or e. is inside the scope of a #pragma no_long_calls
6377 or f. is defined in the same section as the current function. */
6379 bool
6380 arm_is_long_call_p (tree decl)
6382 tree attrs;
6384 if (!decl)
6385 return TARGET_LONG_CALLS;
6387 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6388 if (lookup_attribute ("short_call", attrs))
6389 return false;
6391 /* For "f", be conservative, and only cater for cases in which the
6392 whole of the current function is placed in the same section. */
6393 if (!flag_reorder_blocks_and_partition
6394 && TREE_CODE (decl) == FUNCTION_DECL
6395 && arm_function_in_section_p (decl, current_function_section ()))
6396 return false;
6398 if (lookup_attribute ("long_call", attrs))
6399 return true;
6401 return TARGET_LONG_CALLS;
6404 /* Return nonzero if it is ok to make a tail-call to DECL. */
6405 static bool
6406 arm_function_ok_for_sibcall (tree decl, tree exp)
6408 unsigned long func_type;
6410 if (cfun->machine->sibcall_blocked)
6411 return false;
6413 /* Never tailcall something if we are generating code for Thumb-1. */
6414 if (TARGET_THUMB1)
6415 return false;
6417 /* The PIC register is live on entry to VxWorks PLT entries, so we
6418 must make the call before restoring the PIC register. */
6419 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6420 return false;
6422 /* If we are interworking and the function is not declared static
6423 then we can't tail-call it unless we know that it exists in this
6424 compilation unit (since it might be a Thumb routine). */
6425 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6426 && !TREE_ASM_WRITTEN (decl))
6427 return false;
6429 func_type = arm_current_func_type ();
6430 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6431 if (IS_INTERRUPT (func_type))
6432 return false;
6434 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6436 /* Check that the return value locations are the same. For
6437 example that we aren't returning a value from the sibling in
6438 a VFP register but then need to transfer it to a core
6439 register. */
6440 rtx a, b;
6442 a = arm_function_value (TREE_TYPE (exp), decl, false);
6443 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6444 cfun->decl, false);
6445 if (!rtx_equal_p (a, b))
6446 return false;
6449 /* Never tailcall if function may be called with a misaligned SP. */
6450 if (IS_STACKALIGN (func_type))
6451 return false;
6453 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6454 references should become a NOP. Don't convert such calls into
6455 sibling calls. */
6456 if (TARGET_AAPCS_BASED
6457 && arm_abi == ARM_ABI_AAPCS
6458 && decl
6459 && DECL_WEAK (decl))
6460 return false;
6462 /* Everything else is ok. */
6463 return true;
6467 /* Addressing mode support functions. */
6469 /* Return nonzero if X is a legitimate immediate operand when compiling
6470 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6472 legitimate_pic_operand_p (rtx x)
6474 if (GET_CODE (x) == SYMBOL_REF
6475 || (GET_CODE (x) == CONST
6476 && GET_CODE (XEXP (x, 0)) == PLUS
6477 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6478 return 0;
6480 return 1;
6483 /* Record that the current function needs a PIC register. Initialize
6484 cfun->machine->pic_reg if we have not already done so. */
6486 static void
6487 require_pic_register (void)
6489 /* A lot of the logic here is made obscure by the fact that this
6490 routine gets called as part of the rtx cost estimation process.
6491 We don't want those calls to affect any assumptions about the real
6492 function; and further, we can't call entry_of_function() until we
6493 start the real expansion process. */
6494 if (!crtl->uses_pic_offset_table)
6496 gcc_assert (can_create_pseudo_p ());
6497 if (arm_pic_register != INVALID_REGNUM
6498 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6500 if (!cfun->machine->pic_reg)
6501 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6503 /* Play games to avoid marking the function as needing pic
6504 if we are being called as part of the cost-estimation
6505 process. */
6506 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6507 crtl->uses_pic_offset_table = 1;
6509 else
6511 rtx_insn *seq, *insn;
6513 if (!cfun->machine->pic_reg)
6514 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6516 /* Play games to avoid marking the function as needing pic
6517 if we are being called as part of the cost-estimation
6518 process. */
6519 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6521 crtl->uses_pic_offset_table = 1;
6522 start_sequence ();
6524 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6525 && arm_pic_register > LAST_LO_REGNUM)
6526 emit_move_insn (cfun->machine->pic_reg,
6527 gen_rtx_REG (Pmode, arm_pic_register));
6528 else
6529 arm_load_pic_register (0UL);
6531 seq = get_insns ();
6532 end_sequence ();
6534 for (insn = seq; insn; insn = NEXT_INSN (insn))
6535 if (INSN_P (insn))
6536 INSN_LOCATION (insn) = prologue_location;
6538 /* We can be called during expansion of PHI nodes, where
6539 we can't yet emit instructions directly in the final
6540 insn stream. Queue the insns on the entry edge, they will
6541 be committed after everything else is expanded. */
6542 insert_insn_on_edge (seq,
6543 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6550 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6552 if (GET_CODE (orig) == SYMBOL_REF
6553 || GET_CODE (orig) == LABEL_REF)
6555 rtx insn;
6557 if (reg == 0)
6559 gcc_assert (can_create_pseudo_p ());
6560 reg = gen_reg_rtx (Pmode);
6563 /* VxWorks does not impose a fixed gap between segments; the run-time
6564 gap can be different from the object-file gap. We therefore can't
6565 use GOTOFF unless we are absolutely sure that the symbol is in the
6566 same segment as the GOT. Unfortunately, the flexibility of linker
6567 scripts means that we can't be sure of that in general, so assume
6568 that GOTOFF is never valid on VxWorks. */
6569 if ((GET_CODE (orig) == LABEL_REF
6570 || (GET_CODE (orig) == SYMBOL_REF &&
6571 SYMBOL_REF_LOCAL_P (orig)))
6572 && NEED_GOT_RELOC
6573 && arm_pic_data_is_text_relative)
6574 insn = arm_pic_static_addr (orig, reg);
6575 else
6577 rtx pat;
6578 rtx mem;
6580 /* If this function doesn't have a pic register, create one now. */
6581 require_pic_register ();
6583 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6585 /* Make the MEM as close to a constant as possible. */
6586 mem = SET_SRC (pat);
6587 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6588 MEM_READONLY_P (mem) = 1;
6589 MEM_NOTRAP_P (mem) = 1;
6591 insn = emit_insn (pat);
6594 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6595 by loop. */
6596 set_unique_reg_note (insn, REG_EQUAL, orig);
6598 return reg;
6600 else if (GET_CODE (orig) == CONST)
6602 rtx base, offset;
6604 if (GET_CODE (XEXP (orig, 0)) == PLUS
6605 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6606 return orig;
6608 /* Handle the case where we have: const (UNSPEC_TLS). */
6609 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6610 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6611 return orig;
6613 /* Handle the case where we have:
6614 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6615 CONST_INT. */
6616 if (GET_CODE (XEXP (orig, 0)) == PLUS
6617 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6618 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6620 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6621 return orig;
6624 if (reg == 0)
6626 gcc_assert (can_create_pseudo_p ());
6627 reg = gen_reg_rtx (Pmode);
6630 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6632 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6633 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6634 base == reg ? 0 : reg);
6636 if (CONST_INT_P (offset))
6638 /* The base register doesn't really matter, we only want to
6639 test the index for the appropriate mode. */
6640 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6642 gcc_assert (can_create_pseudo_p ());
6643 offset = force_reg (Pmode, offset);
6646 if (CONST_INT_P (offset))
6647 return plus_constant (Pmode, base, INTVAL (offset));
6650 if (GET_MODE_SIZE (mode) > 4
6651 && (GET_MODE_CLASS (mode) == MODE_INT
6652 || TARGET_SOFT_FLOAT))
6654 emit_insn (gen_addsi3 (reg, base, offset));
6655 return reg;
6658 return gen_rtx_PLUS (Pmode, base, offset);
6661 return orig;
6665 /* Find a spare register to use during the prolog of a function. */
6667 static int
6668 thumb_find_work_register (unsigned long pushed_regs_mask)
6670 int reg;
6672 /* Check the argument registers first as these are call-used. The
6673 register allocation order means that sometimes r3 might be used
6674 but earlier argument registers might not, so check them all. */
6675 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6676 if (!df_regs_ever_live_p (reg))
6677 return reg;
6679 /* Before going on to check the call-saved registers we can try a couple
6680 more ways of deducing that r3 is available. The first is when we are
6681 pushing anonymous arguments onto the stack and we have less than 4
6682 registers worth of fixed arguments(*). In this case r3 will be part of
6683 the variable argument list and so we can be sure that it will be
6684 pushed right at the start of the function. Hence it will be available
6685 for the rest of the prologue.
6686 (*): ie crtl->args.pretend_args_size is greater than 0. */
6687 if (cfun->machine->uses_anonymous_args
6688 && crtl->args.pretend_args_size > 0)
6689 return LAST_ARG_REGNUM;
6691 /* The other case is when we have fixed arguments but less than 4 registers
6692 worth. In this case r3 might be used in the body of the function, but
6693 it is not being used to convey an argument into the function. In theory
6694 we could just check crtl->args.size to see how many bytes are
6695 being passed in argument registers, but it seems that it is unreliable.
6696 Sometimes it will have the value 0 when in fact arguments are being
6697 passed. (See testcase execute/20021111-1.c for an example). So we also
6698 check the args_info.nregs field as well. The problem with this field is
6699 that it makes no allowances for arguments that are passed to the
6700 function but which are not used. Hence we could miss an opportunity
6701 when a function has an unused argument in r3. But it is better to be
6702 safe than to be sorry. */
6703 if (! cfun->machine->uses_anonymous_args
6704 && crtl->args.size >= 0
6705 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6706 && (TARGET_AAPCS_BASED
6707 ? crtl->args.info.aapcs_ncrn < 4
6708 : crtl->args.info.nregs < 4))
6709 return LAST_ARG_REGNUM;
6711 /* Otherwise look for a call-saved register that is going to be pushed. */
6712 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6713 if (pushed_regs_mask & (1 << reg))
6714 return reg;
6716 if (TARGET_THUMB2)
6718 /* Thumb-2 can use high regs. */
6719 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6720 if (pushed_regs_mask & (1 << reg))
6721 return reg;
6723 /* Something went wrong - thumb_compute_save_reg_mask()
6724 should have arranged for a suitable register to be pushed. */
6725 gcc_unreachable ();
6728 static GTY(()) int pic_labelno;
6730 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6731 low register. */
6733 void
6734 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6736 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6738 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6739 return;
6741 gcc_assert (flag_pic);
6743 pic_reg = cfun->machine->pic_reg;
6744 if (TARGET_VXWORKS_RTP)
6746 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6747 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6748 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6750 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6752 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6753 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6755 else
6757 /* We use an UNSPEC rather than a LABEL_REF because this label
6758 never appears in the code stream. */
6760 labelno = GEN_INT (pic_labelno++);
6761 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6762 l1 = gen_rtx_CONST (VOIDmode, l1);
6764 /* On the ARM the PC register contains 'dot + 8' at the time of the
6765 addition, on the Thumb it is 'dot + 4'. */
6766 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6767 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6768 UNSPEC_GOTSYM_OFF);
6769 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6771 if (TARGET_32BIT)
6773 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6775 else /* TARGET_THUMB1 */
6777 if (arm_pic_register != INVALID_REGNUM
6778 && REGNO (pic_reg) > LAST_LO_REGNUM)
6780 /* We will have pushed the pic register, so we should always be
6781 able to find a work register. */
6782 pic_tmp = gen_rtx_REG (SImode,
6783 thumb_find_work_register (saved_regs));
6784 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6785 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6786 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6788 else if (arm_pic_register != INVALID_REGNUM
6789 && arm_pic_register > LAST_LO_REGNUM
6790 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6792 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6793 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6794 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6796 else
6797 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6801 /* Need to emit this whether or not we obey regdecls,
6802 since setjmp/longjmp can cause life info to screw up. */
6803 emit_use (pic_reg);
6806 /* Generate code to load the address of a static var when flag_pic is set. */
6807 static rtx
6808 arm_pic_static_addr (rtx orig, rtx reg)
6810 rtx l1, labelno, offset_rtx, insn;
6812 gcc_assert (flag_pic);
6814 /* We use an UNSPEC rather than a LABEL_REF because this label
6815 never appears in the code stream. */
6816 labelno = GEN_INT (pic_labelno++);
6817 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6818 l1 = gen_rtx_CONST (VOIDmode, l1);
6820 /* On the ARM the PC register contains 'dot + 8' at the time of the
6821 addition, on the Thumb it is 'dot + 4'. */
6822 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6823 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6824 UNSPEC_SYMBOL_OFFSET);
6825 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6827 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6828 return insn;
6831 /* Return nonzero if X is valid as an ARM state addressing register. */
6832 static int
6833 arm_address_register_rtx_p (rtx x, int strict_p)
6835 int regno;
6837 if (!REG_P (x))
6838 return 0;
6840 regno = REGNO (x);
6842 if (strict_p)
6843 return ARM_REGNO_OK_FOR_BASE_P (regno);
6845 return (regno <= LAST_ARM_REGNUM
6846 || regno >= FIRST_PSEUDO_REGISTER
6847 || regno == FRAME_POINTER_REGNUM
6848 || regno == ARG_POINTER_REGNUM);
6851 /* Return TRUE if this rtx is the difference of a symbol and a label,
6852 and will reduce to a PC-relative relocation in the object file.
6853 Expressions like this can be left alone when generating PIC, rather
6854 than forced through the GOT. */
6855 static int
6856 pcrel_constant_p (rtx x)
6858 if (GET_CODE (x) == MINUS)
6859 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6861 return FALSE;
6864 /* Return true if X will surely end up in an index register after next
6865 splitting pass. */
6866 static bool
6867 will_be_in_index_register (const_rtx x)
6869 /* arm.md: calculate_pic_address will split this into a register. */
6870 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6873 /* Return nonzero if X is a valid ARM state address operand. */
6875 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6876 int strict_p)
6878 bool use_ldrd;
6879 enum rtx_code code = GET_CODE (x);
6881 if (arm_address_register_rtx_p (x, strict_p))
6882 return 1;
6884 use_ldrd = (TARGET_LDRD
6885 && (mode == DImode
6886 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6888 if (code == POST_INC || code == PRE_DEC
6889 || ((code == PRE_INC || code == POST_DEC)
6890 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6891 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6893 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6894 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6895 && GET_CODE (XEXP (x, 1)) == PLUS
6896 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6898 rtx addend = XEXP (XEXP (x, 1), 1);
6900 /* Don't allow ldrd post increment by register because it's hard
6901 to fixup invalid register choices. */
6902 if (use_ldrd
6903 && GET_CODE (x) == POST_MODIFY
6904 && REG_P (addend))
6905 return 0;
6907 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6908 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6911 /* After reload constants split into minipools will have addresses
6912 from a LABEL_REF. */
6913 else if (reload_completed
6914 && (code == LABEL_REF
6915 || (code == CONST
6916 && GET_CODE (XEXP (x, 0)) == PLUS
6917 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6918 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6919 return 1;
6921 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6922 return 0;
6924 else if (code == PLUS)
6926 rtx xop0 = XEXP (x, 0);
6927 rtx xop1 = XEXP (x, 1);
6929 return ((arm_address_register_rtx_p (xop0, strict_p)
6930 && ((CONST_INT_P (xop1)
6931 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6932 || (!strict_p && will_be_in_index_register (xop1))))
6933 || (arm_address_register_rtx_p (xop1, strict_p)
6934 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6937 #if 0
6938 /* Reload currently can't handle MINUS, so disable this for now */
6939 else if (GET_CODE (x) == MINUS)
6941 rtx xop0 = XEXP (x, 0);
6942 rtx xop1 = XEXP (x, 1);
6944 return (arm_address_register_rtx_p (xop0, strict_p)
6945 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6947 #endif
6949 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6950 && code == SYMBOL_REF
6951 && CONSTANT_POOL_ADDRESS_P (x)
6952 && ! (flag_pic
6953 && symbol_mentioned_p (get_pool_constant (x))
6954 && ! pcrel_constant_p (get_pool_constant (x))))
6955 return 1;
6957 return 0;
6960 /* Return nonzero if X is a valid Thumb-2 address operand. */
6961 static int
6962 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6964 bool use_ldrd;
6965 enum rtx_code code = GET_CODE (x);
6967 if (arm_address_register_rtx_p (x, strict_p))
6968 return 1;
6970 use_ldrd = (TARGET_LDRD
6971 && (mode == DImode
6972 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6974 if (code == POST_INC || code == PRE_DEC
6975 || ((code == PRE_INC || code == POST_DEC)
6976 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6977 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6979 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6980 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6981 && GET_CODE (XEXP (x, 1)) == PLUS
6982 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6984 /* Thumb-2 only has autoincrement by constant. */
6985 rtx addend = XEXP (XEXP (x, 1), 1);
6986 HOST_WIDE_INT offset;
6988 if (!CONST_INT_P (addend))
6989 return 0;
6991 offset = INTVAL(addend);
6992 if (GET_MODE_SIZE (mode) <= 4)
6993 return (offset > -256 && offset < 256);
6995 return (use_ldrd && offset > -1024 && offset < 1024
6996 && (offset & 3) == 0);
6999 /* After reload constants split into minipools will have addresses
7000 from a LABEL_REF. */
7001 else if (reload_completed
7002 && (code == LABEL_REF
7003 || (code == CONST
7004 && GET_CODE (XEXP (x, 0)) == PLUS
7005 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7006 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7007 return 1;
7009 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7010 return 0;
7012 else if (code == PLUS)
7014 rtx xop0 = XEXP (x, 0);
7015 rtx xop1 = XEXP (x, 1);
7017 return ((arm_address_register_rtx_p (xop0, strict_p)
7018 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7019 || (!strict_p && will_be_in_index_register (xop1))))
7020 || (arm_address_register_rtx_p (xop1, strict_p)
7021 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7024 /* Normally we can assign constant values to target registers without
7025 the help of constant pool. But there are cases we have to use constant
7026 pool like:
7027 1) assign a label to register.
7028 2) sign-extend a 8bit value to 32bit and then assign to register.
7030 Constant pool access in format:
7031 (set (reg r0) (mem (symbol_ref (".LC0"))))
7032 will cause the use of literal pool (later in function arm_reorg).
7033 So here we mark such format as an invalid format, then the compiler
7034 will adjust it into:
7035 (set (reg r0) (symbol_ref (".LC0")))
7036 (set (reg r0) (mem (reg r0))).
7037 No extra register is required, and (mem (reg r0)) won't cause the use
7038 of literal pools. */
7039 else if (arm_disable_literal_pool && code == SYMBOL_REF
7040 && CONSTANT_POOL_ADDRESS_P (x))
7041 return 0;
7043 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7044 && code == SYMBOL_REF
7045 && CONSTANT_POOL_ADDRESS_P (x)
7046 && ! (flag_pic
7047 && symbol_mentioned_p (get_pool_constant (x))
7048 && ! pcrel_constant_p (get_pool_constant (x))))
7049 return 1;
7051 return 0;
7054 /* Return nonzero if INDEX is valid for an address index operand in
7055 ARM state. */
7056 static int
7057 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7058 int strict_p)
7060 HOST_WIDE_INT range;
7061 enum rtx_code code = GET_CODE (index);
7063 /* Standard coprocessor addressing modes. */
7064 if (TARGET_HARD_FLOAT
7065 && TARGET_VFP
7066 && (mode == SFmode || mode == DFmode))
7067 return (code == CONST_INT && INTVAL (index) < 1024
7068 && INTVAL (index) > -1024
7069 && (INTVAL (index) & 3) == 0);
7071 /* For quad modes, we restrict the constant offset to be slightly less
7072 than what the instruction format permits. We do this because for
7073 quad mode moves, we will actually decompose them into two separate
7074 double-mode reads or writes. INDEX must therefore be a valid
7075 (double-mode) offset and so should INDEX+8. */
7076 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7077 return (code == CONST_INT
7078 && INTVAL (index) < 1016
7079 && INTVAL (index) > -1024
7080 && (INTVAL (index) & 3) == 0);
7082 /* We have no such constraint on double mode offsets, so we permit the
7083 full range of the instruction format. */
7084 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7085 return (code == CONST_INT
7086 && INTVAL (index) < 1024
7087 && INTVAL (index) > -1024
7088 && (INTVAL (index) & 3) == 0);
7090 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7091 return (code == CONST_INT
7092 && INTVAL (index) < 1024
7093 && INTVAL (index) > -1024
7094 && (INTVAL (index) & 3) == 0);
7096 if (arm_address_register_rtx_p (index, strict_p)
7097 && (GET_MODE_SIZE (mode) <= 4))
7098 return 1;
7100 if (mode == DImode || mode == DFmode)
7102 if (code == CONST_INT)
7104 HOST_WIDE_INT val = INTVAL (index);
7106 if (TARGET_LDRD)
7107 return val > -256 && val < 256;
7108 else
7109 return val > -4096 && val < 4092;
7112 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7115 if (GET_MODE_SIZE (mode) <= 4
7116 && ! (arm_arch4
7117 && (mode == HImode
7118 || mode == HFmode
7119 || (mode == QImode && outer == SIGN_EXTEND))))
7121 if (code == MULT)
7123 rtx xiop0 = XEXP (index, 0);
7124 rtx xiop1 = XEXP (index, 1);
7126 return ((arm_address_register_rtx_p (xiop0, strict_p)
7127 && power_of_two_operand (xiop1, SImode))
7128 || (arm_address_register_rtx_p (xiop1, strict_p)
7129 && power_of_two_operand (xiop0, SImode)));
7131 else if (code == LSHIFTRT || code == ASHIFTRT
7132 || code == ASHIFT || code == ROTATERT)
7134 rtx op = XEXP (index, 1);
7136 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7137 && CONST_INT_P (op)
7138 && INTVAL (op) > 0
7139 && INTVAL (op) <= 31);
7143 /* For ARM v4 we may be doing a sign-extend operation during the
7144 load. */
7145 if (arm_arch4)
7147 if (mode == HImode
7148 || mode == HFmode
7149 || (outer == SIGN_EXTEND && mode == QImode))
7150 range = 256;
7151 else
7152 range = 4096;
7154 else
7155 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7157 return (code == CONST_INT
7158 && INTVAL (index) < range
7159 && INTVAL (index) > -range);
7162 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7163 index operand. i.e. 1, 2, 4 or 8. */
7164 static bool
7165 thumb2_index_mul_operand (rtx op)
7167 HOST_WIDE_INT val;
7169 if (!CONST_INT_P (op))
7170 return false;
7172 val = INTVAL(op);
7173 return (val == 1 || val == 2 || val == 4 || val == 8);
7176 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7177 static int
7178 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7180 enum rtx_code code = GET_CODE (index);
7182 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7183 /* Standard coprocessor addressing modes. */
7184 if (TARGET_HARD_FLOAT
7185 && TARGET_VFP
7186 && (mode == SFmode || mode == DFmode))
7187 return (code == CONST_INT && INTVAL (index) < 1024
7188 /* Thumb-2 allows only > -256 index range for it's core register
7189 load/stores. Since we allow SF/DF in core registers, we have
7190 to use the intersection between -256~4096 (core) and -1024~1024
7191 (coprocessor). */
7192 && INTVAL (index) > -256
7193 && (INTVAL (index) & 3) == 0);
7195 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7197 /* For DImode assume values will usually live in core regs
7198 and only allow LDRD addressing modes. */
7199 if (!TARGET_LDRD || mode != DImode)
7200 return (code == CONST_INT
7201 && INTVAL (index) < 1024
7202 && INTVAL (index) > -1024
7203 && (INTVAL (index) & 3) == 0);
7206 /* For quad modes, we restrict the constant offset to be slightly less
7207 than what the instruction format permits. We do this because for
7208 quad mode moves, we will actually decompose them into two separate
7209 double-mode reads or writes. INDEX must therefore be a valid
7210 (double-mode) offset and so should INDEX+8. */
7211 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7212 return (code == CONST_INT
7213 && INTVAL (index) < 1016
7214 && INTVAL (index) > -1024
7215 && (INTVAL (index) & 3) == 0);
7217 /* We have no such constraint on double mode offsets, so we permit the
7218 full range of the instruction format. */
7219 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7220 return (code == CONST_INT
7221 && INTVAL (index) < 1024
7222 && INTVAL (index) > -1024
7223 && (INTVAL (index) & 3) == 0);
7225 if (arm_address_register_rtx_p (index, strict_p)
7226 && (GET_MODE_SIZE (mode) <= 4))
7227 return 1;
7229 if (mode == DImode || mode == DFmode)
7231 if (code == CONST_INT)
7233 HOST_WIDE_INT val = INTVAL (index);
7234 /* ??? Can we assume ldrd for thumb2? */
7235 /* Thumb-2 ldrd only has reg+const addressing modes. */
7236 /* ldrd supports offsets of +-1020.
7237 However the ldr fallback does not. */
7238 return val > -256 && val < 256 && (val & 3) == 0;
7240 else
7241 return 0;
7244 if (code == MULT)
7246 rtx xiop0 = XEXP (index, 0);
7247 rtx xiop1 = XEXP (index, 1);
7249 return ((arm_address_register_rtx_p (xiop0, strict_p)
7250 && thumb2_index_mul_operand (xiop1))
7251 || (arm_address_register_rtx_p (xiop1, strict_p)
7252 && thumb2_index_mul_operand (xiop0)));
7254 else if (code == ASHIFT)
7256 rtx op = XEXP (index, 1);
7258 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7259 && CONST_INT_P (op)
7260 && INTVAL (op) > 0
7261 && INTVAL (op) <= 3);
7264 return (code == CONST_INT
7265 && INTVAL (index) < 4096
7266 && INTVAL (index) > -256);
7269 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7270 static int
7271 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7273 int regno;
7275 if (!REG_P (x))
7276 return 0;
7278 regno = REGNO (x);
7280 if (strict_p)
7281 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7283 return (regno <= LAST_LO_REGNUM
7284 || regno > LAST_VIRTUAL_REGISTER
7285 || regno == FRAME_POINTER_REGNUM
7286 || (GET_MODE_SIZE (mode) >= 4
7287 && (regno == STACK_POINTER_REGNUM
7288 || regno >= FIRST_PSEUDO_REGISTER
7289 || x == hard_frame_pointer_rtx
7290 || x == arg_pointer_rtx)));
7293 /* Return nonzero if x is a legitimate index register. This is the case
7294 for any base register that can access a QImode object. */
7295 inline static int
7296 thumb1_index_register_rtx_p (rtx x, int strict_p)
7298 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7301 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7303 The AP may be eliminated to either the SP or the FP, so we use the
7304 least common denominator, e.g. SImode, and offsets from 0 to 64.
7306 ??? Verify whether the above is the right approach.
7308 ??? Also, the FP may be eliminated to the SP, so perhaps that
7309 needs special handling also.
7311 ??? Look at how the mips16 port solves this problem. It probably uses
7312 better ways to solve some of these problems.
7314 Although it is not incorrect, we don't accept QImode and HImode
7315 addresses based on the frame pointer or arg pointer until the
7316 reload pass starts. This is so that eliminating such addresses
7317 into stack based ones won't produce impossible code. */
7319 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7321 /* ??? Not clear if this is right. Experiment. */
7322 if (GET_MODE_SIZE (mode) < 4
7323 && !(reload_in_progress || reload_completed)
7324 && (reg_mentioned_p (frame_pointer_rtx, x)
7325 || reg_mentioned_p (arg_pointer_rtx, x)
7326 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7327 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7328 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7329 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7330 return 0;
7332 /* Accept any base register. SP only in SImode or larger. */
7333 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7334 return 1;
7336 /* This is PC relative data before arm_reorg runs. */
7337 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7338 && GET_CODE (x) == SYMBOL_REF
7339 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7340 return 1;
7342 /* This is PC relative data after arm_reorg runs. */
7343 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7344 && reload_completed
7345 && (GET_CODE (x) == LABEL_REF
7346 || (GET_CODE (x) == CONST
7347 && GET_CODE (XEXP (x, 0)) == PLUS
7348 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7349 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7350 return 1;
7352 /* Post-inc indexing only supported for SImode and larger. */
7353 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7354 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7355 return 1;
7357 else if (GET_CODE (x) == PLUS)
7359 /* REG+REG address can be any two index registers. */
7360 /* We disallow FRAME+REG addressing since we know that FRAME
7361 will be replaced with STACK, and SP relative addressing only
7362 permits SP+OFFSET. */
7363 if (GET_MODE_SIZE (mode) <= 4
7364 && XEXP (x, 0) != frame_pointer_rtx
7365 && XEXP (x, 1) != frame_pointer_rtx
7366 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7367 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7368 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7369 return 1;
7371 /* REG+const has 5-7 bit offset for non-SP registers. */
7372 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7373 || XEXP (x, 0) == arg_pointer_rtx)
7374 && CONST_INT_P (XEXP (x, 1))
7375 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7376 return 1;
7378 /* REG+const has 10-bit offset for SP, but only SImode and
7379 larger is supported. */
7380 /* ??? Should probably check for DI/DFmode overflow here
7381 just like GO_IF_LEGITIMATE_OFFSET does. */
7382 else if (REG_P (XEXP (x, 0))
7383 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7384 && GET_MODE_SIZE (mode) >= 4
7385 && CONST_INT_P (XEXP (x, 1))
7386 && INTVAL (XEXP (x, 1)) >= 0
7387 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7388 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7389 return 1;
7391 else if (REG_P (XEXP (x, 0))
7392 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7393 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7394 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7395 && REGNO (XEXP (x, 0))
7396 <= LAST_VIRTUAL_POINTER_REGISTER))
7397 && GET_MODE_SIZE (mode) >= 4
7398 && CONST_INT_P (XEXP (x, 1))
7399 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7400 return 1;
7403 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7404 && GET_MODE_SIZE (mode) == 4
7405 && GET_CODE (x) == SYMBOL_REF
7406 && CONSTANT_POOL_ADDRESS_P (x)
7407 && ! (flag_pic
7408 && symbol_mentioned_p (get_pool_constant (x))
7409 && ! pcrel_constant_p (get_pool_constant (x))))
7410 return 1;
7412 return 0;
7415 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7416 instruction of mode MODE. */
7418 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7420 switch (GET_MODE_SIZE (mode))
7422 case 1:
7423 return val >= 0 && val < 32;
7425 case 2:
7426 return val >= 0 && val < 64 && (val & 1) == 0;
7428 default:
7429 return (val >= 0
7430 && (val + GET_MODE_SIZE (mode)) <= 128
7431 && (val & 3) == 0);
7435 bool
7436 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7438 if (TARGET_ARM)
7439 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7440 else if (TARGET_THUMB2)
7441 return thumb2_legitimate_address_p (mode, x, strict_p);
7442 else /* if (TARGET_THUMB1) */
7443 return thumb1_legitimate_address_p (mode, x, strict_p);
7446 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7448 Given an rtx X being reloaded into a reg required to be
7449 in class CLASS, return the class of reg to actually use.
7450 In general this is just CLASS, but for the Thumb core registers and
7451 immediate constants we prefer a LO_REGS class or a subset. */
7453 static reg_class_t
7454 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7456 if (TARGET_32BIT)
7457 return rclass;
7458 else
7460 if (rclass == GENERAL_REGS)
7461 return LO_REGS;
7462 else
7463 return rclass;
7467 /* Build the SYMBOL_REF for __tls_get_addr. */
7469 static GTY(()) rtx tls_get_addr_libfunc;
7471 static rtx
7472 get_tls_get_addr (void)
7474 if (!tls_get_addr_libfunc)
7475 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7476 return tls_get_addr_libfunc;
7480 arm_load_tp (rtx target)
7482 if (!target)
7483 target = gen_reg_rtx (SImode);
7485 if (TARGET_HARD_TP)
7487 /* Can return in any reg. */
7488 emit_insn (gen_load_tp_hard (target));
7490 else
7492 /* Always returned in r0. Immediately copy the result into a pseudo,
7493 otherwise other uses of r0 (e.g. setting up function arguments) may
7494 clobber the value. */
7496 rtx tmp;
7498 emit_insn (gen_load_tp_soft ());
7500 tmp = gen_rtx_REG (SImode, 0);
7501 emit_move_insn (target, tmp);
7503 return target;
7506 static rtx
7507 load_tls_operand (rtx x, rtx reg)
7509 rtx tmp;
7511 if (reg == NULL_RTX)
7512 reg = gen_reg_rtx (SImode);
7514 tmp = gen_rtx_CONST (SImode, x);
7516 emit_move_insn (reg, tmp);
7518 return reg;
7521 static rtx
7522 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7524 rtx insns, label, labelno, sum;
7526 gcc_assert (reloc != TLS_DESCSEQ);
7527 start_sequence ();
7529 labelno = GEN_INT (pic_labelno++);
7530 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7531 label = gen_rtx_CONST (VOIDmode, label);
7533 sum = gen_rtx_UNSPEC (Pmode,
7534 gen_rtvec (4, x, GEN_INT (reloc), label,
7535 GEN_INT (TARGET_ARM ? 8 : 4)),
7536 UNSPEC_TLS);
7537 reg = load_tls_operand (sum, reg);
7539 if (TARGET_ARM)
7540 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7541 else
7542 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7544 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7545 LCT_PURE, /* LCT_CONST? */
7546 Pmode, 1, reg, Pmode);
7548 insns = get_insns ();
7549 end_sequence ();
7551 return insns;
7554 static rtx
7555 arm_tls_descseq_addr (rtx x, rtx reg)
7557 rtx labelno = GEN_INT (pic_labelno++);
7558 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7559 rtx sum = gen_rtx_UNSPEC (Pmode,
7560 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7561 gen_rtx_CONST (VOIDmode, label),
7562 GEN_INT (!TARGET_ARM)),
7563 UNSPEC_TLS);
7564 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7566 emit_insn (gen_tlscall (x, labelno));
7567 if (!reg)
7568 reg = gen_reg_rtx (SImode);
7569 else
7570 gcc_assert (REGNO (reg) != 0);
7572 emit_move_insn (reg, reg0);
7574 return reg;
7578 legitimize_tls_address (rtx x, rtx reg)
7580 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7581 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7583 switch (model)
7585 case TLS_MODEL_GLOBAL_DYNAMIC:
7586 if (TARGET_GNU2_TLS)
7588 reg = arm_tls_descseq_addr (x, reg);
7590 tp = arm_load_tp (NULL_RTX);
7592 dest = gen_rtx_PLUS (Pmode, tp, reg);
7594 else
7596 /* Original scheme */
7597 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7598 dest = gen_reg_rtx (Pmode);
7599 emit_libcall_block (insns, dest, ret, x);
7601 return dest;
7603 case TLS_MODEL_LOCAL_DYNAMIC:
7604 if (TARGET_GNU2_TLS)
7606 reg = arm_tls_descseq_addr (x, reg);
7608 tp = arm_load_tp (NULL_RTX);
7610 dest = gen_rtx_PLUS (Pmode, tp, reg);
7612 else
7614 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7616 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7617 share the LDM result with other LD model accesses. */
7618 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7619 UNSPEC_TLS);
7620 dest = gen_reg_rtx (Pmode);
7621 emit_libcall_block (insns, dest, ret, eqv);
7623 /* Load the addend. */
7624 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7625 GEN_INT (TLS_LDO32)),
7626 UNSPEC_TLS);
7627 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7628 dest = gen_rtx_PLUS (Pmode, dest, addend);
7630 return dest;
7632 case TLS_MODEL_INITIAL_EXEC:
7633 labelno = GEN_INT (pic_labelno++);
7634 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7635 label = gen_rtx_CONST (VOIDmode, label);
7636 sum = gen_rtx_UNSPEC (Pmode,
7637 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7638 GEN_INT (TARGET_ARM ? 8 : 4)),
7639 UNSPEC_TLS);
7640 reg = load_tls_operand (sum, reg);
7642 if (TARGET_ARM)
7643 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7644 else if (TARGET_THUMB2)
7645 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7646 else
7648 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7649 emit_move_insn (reg, gen_const_mem (SImode, reg));
7652 tp = arm_load_tp (NULL_RTX);
7654 return gen_rtx_PLUS (Pmode, tp, reg);
7656 case TLS_MODEL_LOCAL_EXEC:
7657 tp = arm_load_tp (NULL_RTX);
7659 reg = gen_rtx_UNSPEC (Pmode,
7660 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7661 UNSPEC_TLS);
7662 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7664 return gen_rtx_PLUS (Pmode, tp, reg);
7666 default:
7667 abort ();
7671 /* Try machine-dependent ways of modifying an illegitimate address
7672 to be legitimate. If we find one, return the new, valid address. */
7674 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7676 if (arm_tls_referenced_p (x))
7678 rtx addend = NULL;
7680 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7682 addend = XEXP (XEXP (x, 0), 1);
7683 x = XEXP (XEXP (x, 0), 0);
7686 if (GET_CODE (x) != SYMBOL_REF)
7687 return x;
7689 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7691 x = legitimize_tls_address (x, NULL_RTX);
7693 if (addend)
7695 x = gen_rtx_PLUS (SImode, x, addend);
7696 orig_x = x;
7698 else
7699 return x;
7702 if (!TARGET_ARM)
7704 /* TODO: legitimize_address for Thumb2. */
7705 if (TARGET_THUMB2)
7706 return x;
7707 return thumb_legitimize_address (x, orig_x, mode);
7710 if (GET_CODE (x) == PLUS)
7712 rtx xop0 = XEXP (x, 0);
7713 rtx xop1 = XEXP (x, 1);
7715 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7716 xop0 = force_reg (SImode, xop0);
7718 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7719 && !symbol_mentioned_p (xop1))
7720 xop1 = force_reg (SImode, xop1);
7722 if (ARM_BASE_REGISTER_RTX_P (xop0)
7723 && CONST_INT_P (xop1))
7725 HOST_WIDE_INT n, low_n;
7726 rtx base_reg, val;
7727 n = INTVAL (xop1);
7729 /* VFP addressing modes actually allow greater offsets, but for
7730 now we just stick with the lowest common denominator. */
7731 if (mode == DImode
7732 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7734 low_n = n & 0x0f;
7735 n &= ~0x0f;
7736 if (low_n > 4)
7738 n += 16;
7739 low_n -= 16;
7742 else
7744 low_n = ((mode) == TImode ? 0
7745 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7746 n -= low_n;
7749 base_reg = gen_reg_rtx (SImode);
7750 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7751 emit_move_insn (base_reg, val);
7752 x = plus_constant (Pmode, base_reg, low_n);
7754 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7755 x = gen_rtx_PLUS (SImode, xop0, xop1);
7758 /* XXX We don't allow MINUS any more -- see comment in
7759 arm_legitimate_address_outer_p (). */
7760 else if (GET_CODE (x) == MINUS)
7762 rtx xop0 = XEXP (x, 0);
7763 rtx xop1 = XEXP (x, 1);
7765 if (CONSTANT_P (xop0))
7766 xop0 = force_reg (SImode, xop0);
7768 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7769 xop1 = force_reg (SImode, xop1);
7771 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7772 x = gen_rtx_MINUS (SImode, xop0, xop1);
7775 /* Make sure to take full advantage of the pre-indexed addressing mode
7776 with absolute addresses which often allows for the base register to
7777 be factorized for multiple adjacent memory references, and it might
7778 even allows for the mini pool to be avoided entirely. */
7779 else if (CONST_INT_P (x) && optimize > 0)
7781 unsigned int bits;
7782 HOST_WIDE_INT mask, base, index;
7783 rtx base_reg;
7785 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7786 use a 8-bit index. So let's use a 12-bit index for SImode only and
7787 hope that arm_gen_constant will enable ldrb to use more bits. */
7788 bits = (mode == SImode) ? 12 : 8;
7789 mask = (1 << bits) - 1;
7790 base = INTVAL (x) & ~mask;
7791 index = INTVAL (x) & mask;
7792 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7794 /* It'll most probably be more efficient to generate the base
7795 with more bits set and use a negative index instead. */
7796 base |= mask;
7797 index -= mask;
7799 base_reg = force_reg (SImode, GEN_INT (base));
7800 x = plus_constant (Pmode, base_reg, index);
7803 if (flag_pic)
7805 /* We need to find and carefully transform any SYMBOL and LABEL
7806 references; so go back to the original address expression. */
7807 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7809 if (new_x != orig_x)
7810 x = new_x;
7813 return x;
7817 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7818 to be legitimate. If we find one, return the new, valid address. */
7820 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7822 if (GET_CODE (x) == PLUS
7823 && CONST_INT_P (XEXP (x, 1))
7824 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7825 || INTVAL (XEXP (x, 1)) < 0))
7827 rtx xop0 = XEXP (x, 0);
7828 rtx xop1 = XEXP (x, 1);
7829 HOST_WIDE_INT offset = INTVAL (xop1);
7831 /* Try and fold the offset into a biasing of the base register and
7832 then offsetting that. Don't do this when optimizing for space
7833 since it can cause too many CSEs. */
7834 if (optimize_size && offset >= 0
7835 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7837 HOST_WIDE_INT delta;
7839 if (offset >= 256)
7840 delta = offset - (256 - GET_MODE_SIZE (mode));
7841 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7842 delta = 31 * GET_MODE_SIZE (mode);
7843 else
7844 delta = offset & (~31 * GET_MODE_SIZE (mode));
7846 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7847 NULL_RTX);
7848 x = plus_constant (Pmode, xop0, delta);
7850 else if (offset < 0 && offset > -256)
7851 /* Small negative offsets are best done with a subtract before the
7852 dereference, forcing these into a register normally takes two
7853 instructions. */
7854 x = force_operand (x, NULL_RTX);
7855 else
7857 /* For the remaining cases, force the constant into a register. */
7858 xop1 = force_reg (SImode, xop1);
7859 x = gen_rtx_PLUS (SImode, xop0, xop1);
7862 else if (GET_CODE (x) == PLUS
7863 && s_register_operand (XEXP (x, 1), SImode)
7864 && !s_register_operand (XEXP (x, 0), SImode))
7866 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7868 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7871 if (flag_pic)
7873 /* We need to find and carefully transform any SYMBOL and LABEL
7874 references; so go back to the original address expression. */
7875 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7877 if (new_x != orig_x)
7878 x = new_x;
7881 return x;
7884 bool
7885 arm_legitimize_reload_address (rtx *p,
7886 machine_mode mode,
7887 int opnum, int type,
7888 int ind_levels ATTRIBUTE_UNUSED)
7890 /* We must recognize output that we have already generated ourselves. */
7891 if (GET_CODE (*p) == PLUS
7892 && GET_CODE (XEXP (*p, 0)) == PLUS
7893 && REG_P (XEXP (XEXP (*p, 0), 0))
7894 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7895 && CONST_INT_P (XEXP (*p, 1)))
7897 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7898 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7899 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7900 return true;
7903 if (GET_CODE (*p) == PLUS
7904 && REG_P (XEXP (*p, 0))
7905 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7906 /* If the base register is equivalent to a constant, let the generic
7907 code handle it. Otherwise we will run into problems if a future
7908 reload pass decides to rematerialize the constant. */
7909 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7910 && CONST_INT_P (XEXP (*p, 1)))
7912 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7913 HOST_WIDE_INT low, high;
7915 /* Detect coprocessor load/stores. */
7916 bool coproc_p = ((TARGET_HARD_FLOAT
7917 && TARGET_VFP
7918 && (mode == SFmode || mode == DFmode))
7919 || (TARGET_REALLY_IWMMXT
7920 && VALID_IWMMXT_REG_MODE (mode))
7921 || (TARGET_NEON
7922 && (VALID_NEON_DREG_MODE (mode)
7923 || VALID_NEON_QREG_MODE (mode))));
7925 /* For some conditions, bail out when lower two bits are unaligned. */
7926 if ((val & 0x3) != 0
7927 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7928 && (coproc_p
7929 /* For DI, and DF under soft-float: */
7930 || ((mode == DImode || mode == DFmode)
7931 /* Without ldrd, we use stm/ldm, which does not
7932 fair well with unaligned bits. */
7933 && (! TARGET_LDRD
7934 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7935 || TARGET_THUMB2))))
7936 return false;
7938 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7939 of which the (reg+high) gets turned into a reload add insn,
7940 we try to decompose the index into high/low values that can often
7941 also lead to better reload CSE.
7942 For example:
7943 ldr r0, [r2, #4100] // Offset too large
7944 ldr r1, [r2, #4104] // Offset too large
7946 is best reloaded as:
7947 add t1, r2, #4096
7948 ldr r0, [t1, #4]
7949 add t2, r2, #4096
7950 ldr r1, [t2, #8]
7952 which post-reload CSE can simplify in most cases to eliminate the
7953 second add instruction:
7954 add t1, r2, #4096
7955 ldr r0, [t1, #4]
7956 ldr r1, [t1, #8]
7958 The idea here is that we want to split out the bits of the constant
7959 as a mask, rather than as subtracting the maximum offset that the
7960 respective type of load/store used can handle.
7962 When encountering negative offsets, we can still utilize it even if
7963 the overall offset is positive; sometimes this may lead to an immediate
7964 that can be constructed with fewer instructions.
7965 For example:
7966 ldr r0, [r2, #0x3FFFFC]
7968 This is best reloaded as:
7969 add t1, r2, #0x400000
7970 ldr r0, [t1, #-4]
7972 The trick for spotting this for a load insn with N bits of offset
7973 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7974 negative offset that is going to make bit N and all the bits below
7975 it become zero in the remainder part.
7977 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7978 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7979 used in most cases of ARM load/store instructions. */
7981 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7982 (((VAL) & ((1 << (N)) - 1)) \
7983 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7984 : 0)
7986 if (coproc_p)
7988 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7990 /* NEON quad-word load/stores are made of two double-word accesses,
7991 so the valid index range is reduced by 8. Treat as 9-bit range if
7992 we go over it. */
7993 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7994 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7996 else if (GET_MODE_SIZE (mode) == 8)
7998 if (TARGET_LDRD)
7999 low = (TARGET_THUMB2
8000 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8001 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8002 else
8003 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8004 to access doublewords. The supported load/store offsets are
8005 -8, -4, and 4, which we try to produce here. */
8006 low = ((val & 0xf) ^ 0x8) - 0x8;
8008 else if (GET_MODE_SIZE (mode) < 8)
8010 /* NEON element load/stores do not have an offset. */
8011 if (TARGET_NEON_FP16 && mode == HFmode)
8012 return false;
8014 if (TARGET_THUMB2)
8016 /* Thumb-2 has an asymmetrical index range of (-256,4096).
8017 Try the wider 12-bit range first, and re-try if the result
8018 is out of range. */
8019 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8020 if (low < -255)
8021 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8023 else
8025 if (mode == HImode || mode == HFmode)
8027 if (arm_arch4)
8028 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8029 else
8031 /* The storehi/movhi_bytes fallbacks can use only
8032 [-4094,+4094] of the full ldrb/strb index range. */
8033 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8034 if (low == 4095 || low == -4095)
8035 return false;
8038 else
8039 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8042 else
8043 return false;
8045 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8046 ^ (unsigned HOST_WIDE_INT) 0x80000000)
8047 - (unsigned HOST_WIDE_INT) 0x80000000);
8048 /* Check for overflow or zero */
8049 if (low == 0 || high == 0 || (high + low != val))
8050 return false;
8052 /* Reload the high part into a base reg; leave the low part
8053 in the mem.
8054 Note that replacing this gen_rtx_PLUS with plus_constant is
8055 wrong in this case because we rely on the
8056 (plus (plus reg c1) c2) structure being preserved so that
8057 XEXP (*p, 0) in push_reload below uses the correct term. */
8058 *p = gen_rtx_PLUS (GET_MODE (*p),
8059 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8060 GEN_INT (high)),
8061 GEN_INT (low));
8062 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8063 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8064 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8065 return true;
8068 return false;
8072 thumb_legitimize_reload_address (rtx *x_p,
8073 machine_mode mode,
8074 int opnum, int type,
8075 int ind_levels ATTRIBUTE_UNUSED)
8077 rtx x = *x_p;
8079 if (GET_CODE (x) == PLUS
8080 && GET_MODE_SIZE (mode) < 4
8081 && REG_P (XEXP (x, 0))
8082 && XEXP (x, 0) == stack_pointer_rtx
8083 && CONST_INT_P (XEXP (x, 1))
8084 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8086 rtx orig_x = x;
8088 x = copy_rtx (x);
8089 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8090 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8091 return x;
8094 /* If both registers are hi-regs, then it's better to reload the
8095 entire expression rather than each register individually. That
8096 only requires one reload register rather than two. */
8097 if (GET_CODE (x) == PLUS
8098 && REG_P (XEXP (x, 0))
8099 && REG_P (XEXP (x, 1))
8100 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8101 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8103 rtx orig_x = x;
8105 x = copy_rtx (x);
8106 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8107 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8108 return x;
8111 return NULL;
8114 /* Return TRUE if X contains any TLS symbol references. */
8116 bool
8117 arm_tls_referenced_p (rtx x)
8119 if (! TARGET_HAVE_TLS)
8120 return false;
8122 subrtx_iterator::array_type array;
8123 FOR_EACH_SUBRTX (iter, array, x, ALL)
8125 const_rtx x = *iter;
8126 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8127 return true;
8129 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8130 TLS offsets, not real symbol references. */
8131 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8132 iter.skip_subrtxes ();
8134 return false;
8137 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8139 On the ARM, allow any integer (invalid ones are removed later by insn
8140 patterns), nice doubles and symbol_refs which refer to the function's
8141 constant pool XXX.
8143 When generating pic allow anything. */
8145 static bool
8146 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8148 /* At present, we have no support for Neon structure constants, so forbid
8149 them here. It might be possible to handle simple cases like 0 and -1
8150 in future. */
8151 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8152 return false;
8154 return flag_pic || !label_mentioned_p (x);
8157 static bool
8158 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8160 return (CONST_INT_P (x)
8161 || CONST_DOUBLE_P (x)
8162 || CONSTANT_ADDRESS_P (x)
8163 || flag_pic);
8166 static bool
8167 arm_legitimate_constant_p (machine_mode mode, rtx x)
8169 return (!arm_cannot_force_const_mem (mode, x)
8170 && (TARGET_32BIT
8171 ? arm_legitimate_constant_p_1 (mode, x)
8172 : thumb_legitimate_constant_p (mode, x)));
8175 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8177 static bool
8178 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8180 rtx base, offset;
8182 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8184 split_const (x, &base, &offset);
8185 if (GET_CODE (base) == SYMBOL_REF
8186 && !offset_within_block_p (base, INTVAL (offset)))
8187 return true;
8189 return arm_tls_referenced_p (x);
8192 #define REG_OR_SUBREG_REG(X) \
8193 (REG_P (X) \
8194 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8196 #define REG_OR_SUBREG_RTX(X) \
8197 (REG_P (X) ? (X) : SUBREG_REG (X))
8199 static inline int
8200 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8202 machine_mode mode = GET_MODE (x);
8203 int total, words;
8205 switch (code)
8207 case ASHIFT:
8208 case ASHIFTRT:
8209 case LSHIFTRT:
8210 case ROTATERT:
8211 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8213 case PLUS:
8214 case MINUS:
8215 case COMPARE:
8216 case NEG:
8217 case NOT:
8218 return COSTS_N_INSNS (1);
8220 case MULT:
8221 if (CONST_INT_P (XEXP (x, 1)))
8223 int cycles = 0;
8224 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8226 while (i)
8228 i >>= 2;
8229 cycles++;
8231 return COSTS_N_INSNS (2) + cycles;
8233 return COSTS_N_INSNS (1) + 16;
8235 case SET:
8236 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8237 the mode. */
8238 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8239 return (COSTS_N_INSNS (words)
8240 + 4 * ((MEM_P (SET_SRC (x)))
8241 + MEM_P (SET_DEST (x))));
8243 case CONST_INT:
8244 if (outer == SET)
8246 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8247 return 0;
8248 if (thumb_shiftable_const (INTVAL (x)))
8249 return COSTS_N_INSNS (2);
8250 return COSTS_N_INSNS (3);
8252 else if ((outer == PLUS || outer == COMPARE)
8253 && INTVAL (x) < 256 && INTVAL (x) > -256)
8254 return 0;
8255 else if ((outer == IOR || outer == XOR || outer == AND)
8256 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8257 return COSTS_N_INSNS (1);
8258 else if (outer == AND)
8260 int i;
8261 /* This duplicates the tests in the andsi3 expander. */
8262 for (i = 9; i <= 31; i++)
8263 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8264 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8265 return COSTS_N_INSNS (2);
8267 else if (outer == ASHIFT || outer == ASHIFTRT
8268 || outer == LSHIFTRT)
8269 return 0;
8270 return COSTS_N_INSNS (2);
8272 case CONST:
8273 case CONST_DOUBLE:
8274 case LABEL_REF:
8275 case SYMBOL_REF:
8276 return COSTS_N_INSNS (3);
8278 case UDIV:
8279 case UMOD:
8280 case DIV:
8281 case MOD:
8282 return 100;
8284 case TRUNCATE:
8285 return 99;
8287 case AND:
8288 case XOR:
8289 case IOR:
8290 /* XXX guess. */
8291 return 8;
8293 case MEM:
8294 /* XXX another guess. */
8295 /* Memory costs quite a lot for the first word, but subsequent words
8296 load at the equivalent of a single insn each. */
8297 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8298 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8299 ? 4 : 0));
8301 case IF_THEN_ELSE:
8302 /* XXX a guess. */
8303 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8304 return 14;
8305 return 2;
8307 case SIGN_EXTEND:
8308 case ZERO_EXTEND:
8309 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8310 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8312 if (mode == SImode)
8313 return total;
8315 if (arm_arch6)
8316 return total + COSTS_N_INSNS (1);
8318 /* Assume a two-shift sequence. Increase the cost slightly so
8319 we prefer actual shifts over an extend operation. */
8320 return total + 1 + COSTS_N_INSNS (2);
8322 default:
8323 return 99;
8327 static inline bool
8328 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8330 machine_mode mode = GET_MODE (x);
8331 enum rtx_code subcode;
8332 rtx operand;
8333 enum rtx_code code = GET_CODE (x);
8334 *total = 0;
8336 switch (code)
8338 case MEM:
8339 /* Memory costs quite a lot for the first word, but subsequent words
8340 load at the equivalent of a single insn each. */
8341 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8342 return true;
8344 case DIV:
8345 case MOD:
8346 case UDIV:
8347 case UMOD:
8348 if (TARGET_HARD_FLOAT && mode == SFmode)
8349 *total = COSTS_N_INSNS (2);
8350 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8351 *total = COSTS_N_INSNS (4);
8352 else
8353 *total = COSTS_N_INSNS (20);
8354 return false;
8356 case ROTATE:
8357 if (REG_P (XEXP (x, 1)))
8358 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8359 else if (!CONST_INT_P (XEXP (x, 1)))
8360 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8362 /* Fall through */
8363 case ROTATERT:
8364 if (mode != SImode)
8366 *total += COSTS_N_INSNS (4);
8367 return true;
8370 /* Fall through */
8371 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8372 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8373 if (mode == DImode)
8375 *total += COSTS_N_INSNS (3);
8376 return true;
8379 *total += COSTS_N_INSNS (1);
8380 /* Increase the cost of complex shifts because they aren't any faster,
8381 and reduce dual issue opportunities. */
8382 if (arm_tune_cortex_a9
8383 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8384 ++*total;
8386 return true;
8388 case MINUS:
8389 if (mode == DImode)
8391 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8392 if (CONST_INT_P (XEXP (x, 0))
8393 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8395 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8396 return true;
8399 if (CONST_INT_P (XEXP (x, 1))
8400 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8402 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8403 return true;
8406 return false;
8409 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8411 if (TARGET_HARD_FLOAT
8412 && (mode == SFmode
8413 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8415 *total = COSTS_N_INSNS (1);
8416 if (CONST_DOUBLE_P (XEXP (x, 0))
8417 && arm_const_double_rtx (XEXP (x, 0)))
8419 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8420 return true;
8423 if (CONST_DOUBLE_P (XEXP (x, 1))
8424 && arm_const_double_rtx (XEXP (x, 1)))
8426 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8427 return true;
8430 return false;
8432 *total = COSTS_N_INSNS (20);
8433 return false;
8436 *total = COSTS_N_INSNS (1);
8437 if (CONST_INT_P (XEXP (x, 0))
8438 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8440 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8441 return true;
8444 subcode = GET_CODE (XEXP (x, 1));
8445 if (subcode == ASHIFT || subcode == ASHIFTRT
8446 || subcode == LSHIFTRT
8447 || subcode == ROTATE || subcode == ROTATERT)
8449 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8450 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8451 return true;
8454 /* A shift as a part of RSB costs no more than RSB itself. */
8455 if (GET_CODE (XEXP (x, 0)) == MULT
8456 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8458 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8459 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8460 return true;
8463 if (subcode == MULT
8464 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8466 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8467 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8468 return true;
8471 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8472 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8474 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8475 if (REG_P (XEXP (XEXP (x, 1), 0))
8476 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8477 *total += COSTS_N_INSNS (1);
8479 return true;
8482 /* Fall through */
8484 case PLUS:
8485 if (code == PLUS && arm_arch6 && mode == SImode
8486 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8487 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8489 *total = COSTS_N_INSNS (1);
8490 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8491 0, speed);
8492 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8493 return true;
8496 /* MLA: All arguments must be registers. We filter out
8497 multiplication by a power of two, so that we fall down into
8498 the code below. */
8499 if (GET_CODE (XEXP (x, 0)) == MULT
8500 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8502 /* The cost comes from the cost of the multiply. */
8503 return false;
8506 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8508 if (TARGET_HARD_FLOAT
8509 && (mode == SFmode
8510 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8512 *total = COSTS_N_INSNS (1);
8513 if (CONST_DOUBLE_P (XEXP (x, 1))
8514 && arm_const_double_rtx (XEXP (x, 1)))
8516 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8517 return true;
8520 return false;
8523 *total = COSTS_N_INSNS (20);
8524 return false;
8527 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8528 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8530 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8531 if (REG_P (XEXP (XEXP (x, 0), 0))
8532 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8533 *total += COSTS_N_INSNS (1);
8534 return true;
8537 /* Fall through */
8539 case AND: case XOR: case IOR:
8541 /* Normally the frame registers will be spilt into reg+const during
8542 reload, so it is a bad idea to combine them with other instructions,
8543 since then they might not be moved outside of loops. As a compromise
8544 we allow integration with ops that have a constant as their second
8545 operand. */
8546 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8547 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8548 && !CONST_INT_P (XEXP (x, 1)))
8549 *total = COSTS_N_INSNS (1);
8551 if (mode == DImode)
8553 *total += COSTS_N_INSNS (2);
8554 if (CONST_INT_P (XEXP (x, 1))
8555 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8557 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8558 return true;
8561 return false;
8564 *total += COSTS_N_INSNS (1);
8565 if (CONST_INT_P (XEXP (x, 1))
8566 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8568 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8569 return true;
8571 subcode = GET_CODE (XEXP (x, 0));
8572 if (subcode == ASHIFT || subcode == ASHIFTRT
8573 || subcode == LSHIFTRT
8574 || subcode == ROTATE || subcode == ROTATERT)
8576 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8577 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8578 return true;
8581 if (subcode == MULT
8582 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8584 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8585 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8586 return true;
8589 if (subcode == UMIN || subcode == UMAX
8590 || subcode == SMIN || subcode == SMAX)
8592 *total = COSTS_N_INSNS (3);
8593 return true;
8596 return false;
8598 case MULT:
8599 /* This should have been handled by the CPU specific routines. */
8600 gcc_unreachable ();
8602 case TRUNCATE:
8603 if (arm_arch3m && mode == SImode
8604 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8605 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8606 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8607 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8608 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8609 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8611 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8612 return true;
8614 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8615 return false;
8617 case NEG:
8618 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8620 if (TARGET_HARD_FLOAT
8621 && (mode == SFmode
8622 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8624 *total = COSTS_N_INSNS (1);
8625 return false;
8627 *total = COSTS_N_INSNS (2);
8628 return false;
8631 /* Fall through */
8632 case NOT:
8633 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8634 if (mode == SImode && code == NOT)
8636 subcode = GET_CODE (XEXP (x, 0));
8637 if (subcode == ASHIFT || subcode == ASHIFTRT
8638 || subcode == LSHIFTRT
8639 || subcode == ROTATE || subcode == ROTATERT
8640 || (subcode == MULT
8641 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8643 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8644 /* Register shifts cost an extra cycle. */
8645 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8646 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8647 subcode, 1, speed);
8648 return true;
8652 return false;
8654 case IF_THEN_ELSE:
8655 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8657 *total = COSTS_N_INSNS (4);
8658 return true;
8661 operand = XEXP (x, 0);
8663 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8664 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8665 && REG_P (XEXP (operand, 0))
8666 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8667 *total += COSTS_N_INSNS (1);
8668 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8669 + rtx_cost (XEXP (x, 2), code, 2, speed));
8670 return true;
8672 case NE:
8673 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8675 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8676 return true;
8678 goto scc_insn;
8680 case GE:
8681 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8682 && mode == SImode && XEXP (x, 1) == const0_rtx)
8684 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8685 return true;
8687 goto scc_insn;
8689 case LT:
8690 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8691 && mode == SImode && XEXP (x, 1) == const0_rtx)
8693 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8694 return true;
8696 goto scc_insn;
8698 case EQ:
8699 case GT:
8700 case LE:
8701 case GEU:
8702 case LTU:
8703 case GTU:
8704 case LEU:
8705 case UNORDERED:
8706 case ORDERED:
8707 case UNEQ:
8708 case UNGE:
8709 case UNLT:
8710 case UNGT:
8711 case UNLE:
8712 scc_insn:
8713 /* SCC insns. In the case where the comparison has already been
8714 performed, then they cost 2 instructions. Otherwise they need
8715 an additional comparison before them. */
8716 *total = COSTS_N_INSNS (2);
8717 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8719 return true;
8722 /* Fall through */
8723 case COMPARE:
8724 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8726 *total = 0;
8727 return true;
8730 *total += COSTS_N_INSNS (1);
8731 if (CONST_INT_P (XEXP (x, 1))
8732 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8734 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8735 return true;
8738 subcode = GET_CODE (XEXP (x, 0));
8739 if (subcode == ASHIFT || subcode == ASHIFTRT
8740 || subcode == LSHIFTRT
8741 || subcode == ROTATE || subcode == ROTATERT)
8743 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8744 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8745 return true;
8748 if (subcode == MULT
8749 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8751 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8752 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8753 return true;
8756 return false;
8758 case UMIN:
8759 case UMAX:
8760 case SMIN:
8761 case SMAX:
8762 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8763 if (!CONST_INT_P (XEXP (x, 1))
8764 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8765 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8766 return true;
8768 case ABS:
8769 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8771 if (TARGET_HARD_FLOAT
8772 && (mode == SFmode
8773 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8775 *total = COSTS_N_INSNS (1);
8776 return false;
8778 *total = COSTS_N_INSNS (20);
8779 return false;
8781 *total = COSTS_N_INSNS (1);
8782 if (mode == DImode)
8783 *total += COSTS_N_INSNS (3);
8784 return false;
8786 case SIGN_EXTEND:
8787 case ZERO_EXTEND:
8788 *total = 0;
8789 if (GET_MODE_CLASS (mode) == MODE_INT)
8791 rtx op = XEXP (x, 0);
8792 machine_mode opmode = GET_MODE (op);
8794 if (mode == DImode)
8795 *total += COSTS_N_INSNS (1);
8797 if (opmode != SImode)
8799 if (MEM_P (op))
8801 /* If !arm_arch4, we use one of the extendhisi2_mem
8802 or movhi_bytes patterns for HImode. For a QImode
8803 sign extension, we first zero-extend from memory
8804 and then perform a shift sequence. */
8805 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8806 *total += COSTS_N_INSNS (2);
8808 else if (arm_arch6)
8809 *total += COSTS_N_INSNS (1);
8811 /* We don't have the necessary insn, so we need to perform some
8812 other operation. */
8813 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8814 /* An and with constant 255. */
8815 *total += COSTS_N_INSNS (1);
8816 else
8817 /* A shift sequence. Increase costs slightly to avoid
8818 combining two shifts into an extend operation. */
8819 *total += COSTS_N_INSNS (2) + 1;
8822 return false;
8825 switch (GET_MODE (XEXP (x, 0)))
8827 case V8QImode:
8828 case V4HImode:
8829 case V2SImode:
8830 case V4QImode:
8831 case V2HImode:
8832 *total = COSTS_N_INSNS (1);
8833 return false;
8835 default:
8836 gcc_unreachable ();
8838 gcc_unreachable ();
8840 case ZERO_EXTRACT:
8841 case SIGN_EXTRACT:
8842 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8843 return true;
8845 case CONST_INT:
8846 if (const_ok_for_arm (INTVAL (x))
8847 || const_ok_for_arm (~INTVAL (x)))
8848 *total = COSTS_N_INSNS (1);
8849 else
8850 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8851 INTVAL (x), NULL_RTX,
8852 NULL_RTX, 0, 0));
8853 return true;
8855 case CONST:
8856 case LABEL_REF:
8857 case SYMBOL_REF:
8858 *total = COSTS_N_INSNS (3);
8859 return true;
8861 case HIGH:
8862 *total = COSTS_N_INSNS (1);
8863 return true;
8865 case LO_SUM:
8866 *total = COSTS_N_INSNS (1);
8867 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8868 return true;
8870 case CONST_DOUBLE:
8871 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8872 && (mode == SFmode || !TARGET_VFP_SINGLE))
8873 *total = COSTS_N_INSNS (1);
8874 else
8875 *total = COSTS_N_INSNS (4);
8876 return true;
8878 case SET:
8879 /* The vec_extract patterns accept memory operands that require an
8880 address reload. Account for the cost of that reload to give the
8881 auto-inc-dec pass an incentive to try to replace them. */
8882 if (TARGET_NEON && MEM_P (SET_DEST (x))
8883 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8885 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8886 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8887 *total += COSTS_N_INSNS (1);
8888 return true;
8890 /* Likewise for the vec_set patterns. */
8891 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8892 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8893 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8895 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8896 *total = rtx_cost (mem, code, 0, speed);
8897 if (!neon_vector_mem_operand (mem, 2, true))
8898 *total += COSTS_N_INSNS (1);
8899 return true;
8901 return false;
8903 case UNSPEC:
8904 /* We cost this as high as our memory costs to allow this to
8905 be hoisted from loops. */
8906 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8908 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8910 return true;
8912 case CONST_VECTOR:
8913 if (TARGET_NEON
8914 && TARGET_HARD_FLOAT
8915 && outer == SET
8916 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8917 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8918 *total = COSTS_N_INSNS (1);
8919 else
8920 *total = COSTS_N_INSNS (4);
8921 return true;
8923 default:
8924 *total = COSTS_N_INSNS (4);
8925 return false;
8929 /* Estimates the size cost of thumb1 instructions.
8930 For now most of the code is copied from thumb1_rtx_costs. We need more
8931 fine grain tuning when we have more related test cases. */
8932 static inline int
8933 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8935 machine_mode mode = GET_MODE (x);
8936 int words;
8938 switch (code)
8940 case ASHIFT:
8941 case ASHIFTRT:
8942 case LSHIFTRT:
8943 case ROTATERT:
8944 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8946 case PLUS:
8947 case MINUS:
8948 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8949 defined by RTL expansion, especially for the expansion of
8950 multiplication. */
8951 if ((GET_CODE (XEXP (x, 0)) == MULT
8952 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8953 || (GET_CODE (XEXP (x, 1)) == MULT
8954 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8955 return COSTS_N_INSNS (2);
8956 /* On purpose fall through for normal RTX. */
8957 case COMPARE:
8958 case NEG:
8959 case NOT:
8960 return COSTS_N_INSNS (1);
8962 case MULT:
8963 if (CONST_INT_P (XEXP (x, 1)))
8965 /* Thumb1 mul instruction can't operate on const. We must Load it
8966 into a register first. */
8967 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8968 /* For the targets which have a very small and high-latency multiply
8969 unit, we prefer to synthesize the mult with up to 5 instructions,
8970 giving a good balance between size and performance. */
8971 if (arm_arch6m && arm_m_profile_small_mul)
8972 return COSTS_N_INSNS (5);
8973 else
8974 return COSTS_N_INSNS (1) + const_size;
8976 return COSTS_N_INSNS (1);
8978 case SET:
8979 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8980 the mode. */
8981 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8982 return COSTS_N_INSNS (words)
8983 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8984 || satisfies_constraint_K (SET_SRC (x))
8985 /* thumb1_movdi_insn. */
8986 || ((words > 1) && MEM_P (SET_SRC (x))));
8988 case CONST_INT:
8989 if (outer == SET)
8991 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8992 return COSTS_N_INSNS (1);
8993 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8994 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8995 return COSTS_N_INSNS (2);
8996 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8997 if (thumb_shiftable_const (INTVAL (x)))
8998 return COSTS_N_INSNS (2);
8999 return COSTS_N_INSNS (3);
9001 else if ((outer == PLUS || outer == COMPARE)
9002 && INTVAL (x) < 256 && INTVAL (x) > -256)
9003 return 0;
9004 else if ((outer == IOR || outer == XOR || outer == AND)
9005 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9006 return COSTS_N_INSNS (1);
9007 else if (outer == AND)
9009 int i;
9010 /* This duplicates the tests in the andsi3 expander. */
9011 for (i = 9; i <= 31; i++)
9012 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9013 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9014 return COSTS_N_INSNS (2);
9016 else if (outer == ASHIFT || outer == ASHIFTRT
9017 || outer == LSHIFTRT)
9018 return 0;
9019 return COSTS_N_INSNS (2);
9021 case CONST:
9022 case CONST_DOUBLE:
9023 case LABEL_REF:
9024 case SYMBOL_REF:
9025 return COSTS_N_INSNS (3);
9027 case UDIV:
9028 case UMOD:
9029 case DIV:
9030 case MOD:
9031 return 100;
9033 case TRUNCATE:
9034 return 99;
9036 case AND:
9037 case XOR:
9038 case IOR:
9039 return COSTS_N_INSNS (1);
9041 case MEM:
9042 return (COSTS_N_INSNS (1)
9043 + COSTS_N_INSNS (1)
9044 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9045 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9046 ? COSTS_N_INSNS (1) : 0));
9048 case IF_THEN_ELSE:
9049 /* XXX a guess. */
9050 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9051 return 14;
9052 return 2;
9054 case ZERO_EXTEND:
9055 /* XXX still guessing. */
9056 switch (GET_MODE (XEXP (x, 0)))
9058 case QImode:
9059 return (1 + (mode == DImode ? 4 : 0)
9060 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9062 case HImode:
9063 return (4 + (mode == DImode ? 4 : 0)
9064 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9066 case SImode:
9067 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9069 default:
9070 return 99;
9073 default:
9074 return 99;
9078 /* RTX costs when optimizing for size. */
9079 static bool
9080 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9081 int *total)
9083 machine_mode mode = GET_MODE (x);
9084 if (TARGET_THUMB1)
9086 *total = thumb1_size_rtx_costs (x, code, outer_code);
9087 return true;
9090 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9091 switch (code)
9093 case MEM:
9094 /* A memory access costs 1 insn if the mode is small, or the address is
9095 a single register, otherwise it costs one insn per word. */
9096 if (REG_P (XEXP (x, 0)))
9097 *total = COSTS_N_INSNS (1);
9098 else if (flag_pic
9099 && GET_CODE (XEXP (x, 0)) == PLUS
9100 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9101 /* This will be split into two instructions.
9102 See arm.md:calculate_pic_address. */
9103 *total = COSTS_N_INSNS (2);
9104 else
9105 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9106 return true;
9108 case DIV:
9109 case MOD:
9110 case UDIV:
9111 case UMOD:
9112 /* Needs a libcall, so it costs about this. */
9113 *total = COSTS_N_INSNS (2);
9114 return false;
9116 case ROTATE:
9117 if (mode == SImode && REG_P (XEXP (x, 1)))
9119 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9120 return true;
9122 /* Fall through */
9123 case ROTATERT:
9124 case ASHIFT:
9125 case LSHIFTRT:
9126 case ASHIFTRT:
9127 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9129 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9130 return true;
9132 else if (mode == SImode)
9134 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9135 /* Slightly disparage register shifts, but not by much. */
9136 if (!CONST_INT_P (XEXP (x, 1)))
9137 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9138 return true;
9141 /* Needs a libcall. */
9142 *total = COSTS_N_INSNS (2);
9143 return false;
9145 case MINUS:
9146 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9147 && (mode == SFmode || !TARGET_VFP_SINGLE))
9149 *total = COSTS_N_INSNS (1);
9150 return false;
9153 if (mode == SImode)
9155 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9156 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9158 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9159 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9160 || subcode1 == ROTATE || subcode1 == ROTATERT
9161 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9162 || subcode1 == ASHIFTRT)
9164 /* It's just the cost of the two operands. */
9165 *total = 0;
9166 return false;
9169 *total = COSTS_N_INSNS (1);
9170 return false;
9173 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9174 return false;
9176 case PLUS:
9177 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9178 && (mode == SFmode || !TARGET_VFP_SINGLE))
9180 *total = COSTS_N_INSNS (1);
9181 return false;
9184 /* A shift as a part of ADD costs nothing. */
9185 if (GET_CODE (XEXP (x, 0)) == MULT
9186 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9188 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9189 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9190 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9191 return true;
9194 /* Fall through */
9195 case AND: case XOR: case IOR:
9196 if (mode == SImode)
9198 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9200 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9201 || subcode == LSHIFTRT || subcode == ASHIFTRT
9202 || (code == AND && subcode == NOT))
9204 /* It's just the cost of the two operands. */
9205 *total = 0;
9206 return false;
9210 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9211 return false;
9213 case MULT:
9214 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9215 return false;
9217 case NEG:
9218 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9219 && (mode == SFmode || !TARGET_VFP_SINGLE))
9221 *total = COSTS_N_INSNS (1);
9222 return false;
9225 /* Fall through */
9226 case NOT:
9227 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9229 return false;
9231 case IF_THEN_ELSE:
9232 *total = 0;
9233 return false;
9235 case COMPARE:
9236 if (cc_register (XEXP (x, 0), VOIDmode))
9237 * total = 0;
9238 else
9239 *total = COSTS_N_INSNS (1);
9240 return false;
9242 case ABS:
9243 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9244 && (mode == SFmode || !TARGET_VFP_SINGLE))
9245 *total = COSTS_N_INSNS (1);
9246 else
9247 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9248 return false;
9250 case SIGN_EXTEND:
9251 case ZERO_EXTEND:
9252 return arm_rtx_costs_1 (x, outer_code, total, 0);
9254 case CONST_INT:
9255 if (const_ok_for_arm (INTVAL (x)))
9256 /* A multiplication by a constant requires another instruction
9257 to load the constant to a register. */
9258 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9259 ? 1 : 0);
9260 else if (const_ok_for_arm (~INTVAL (x)))
9261 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9262 else if (const_ok_for_arm (-INTVAL (x)))
9264 if (outer_code == COMPARE || outer_code == PLUS
9265 || outer_code == MINUS)
9266 *total = 0;
9267 else
9268 *total = COSTS_N_INSNS (1);
9270 else
9271 *total = COSTS_N_INSNS (2);
9272 return true;
9274 case CONST:
9275 case LABEL_REF:
9276 case SYMBOL_REF:
9277 *total = COSTS_N_INSNS (2);
9278 return true;
9280 case CONST_DOUBLE:
9281 *total = COSTS_N_INSNS (4);
9282 return true;
9284 case CONST_VECTOR:
9285 if (TARGET_NEON
9286 && TARGET_HARD_FLOAT
9287 && outer_code == SET
9288 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9289 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9290 *total = COSTS_N_INSNS (1);
9291 else
9292 *total = COSTS_N_INSNS (4);
9293 return true;
9295 case HIGH:
9296 case LO_SUM:
9297 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9298 cost of these slightly. */
9299 *total = COSTS_N_INSNS (1) + 1;
9300 return true;
9302 case SET:
9303 return false;
9305 default:
9306 if (mode != VOIDmode)
9307 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9308 else
9309 *total = COSTS_N_INSNS (4); /* How knows? */
9310 return false;
9314 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9315 operand, then return the operand that is being shifted. If the shift
9316 is not by a constant, then set SHIFT_REG to point to the operand.
9317 Return NULL if OP is not a shifter operand. */
9318 static rtx
9319 shifter_op_p (rtx op, rtx *shift_reg)
9321 enum rtx_code code = GET_CODE (op);
9323 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9324 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9325 return XEXP (op, 0);
9326 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9327 return XEXP (op, 0);
9328 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9329 || code == ASHIFTRT)
9331 if (!CONST_INT_P (XEXP (op, 1)))
9332 *shift_reg = XEXP (op, 1);
9333 return XEXP (op, 0);
9336 return NULL;
9339 static bool
9340 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9342 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9343 gcc_assert (GET_CODE (x) == UNSPEC);
9345 switch (XINT (x, 1))
9347 case UNSPEC_UNALIGNED_LOAD:
9348 /* We can only do unaligned loads into the integer unit, and we can't
9349 use LDM or LDRD. */
9350 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9351 if (speed_p)
9352 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9353 + extra_cost->ldst.load_unaligned);
9355 #ifdef NOT_YET
9356 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9357 ADDR_SPACE_GENERIC, speed_p);
9358 #endif
9359 return true;
9361 case UNSPEC_UNALIGNED_STORE:
9362 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9363 if (speed_p)
9364 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9365 + extra_cost->ldst.store_unaligned);
9367 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9368 #ifdef NOT_YET
9369 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9370 ADDR_SPACE_GENERIC, speed_p);
9371 #endif
9372 return true;
9374 case UNSPEC_VRINTZ:
9375 case UNSPEC_VRINTP:
9376 case UNSPEC_VRINTM:
9377 case UNSPEC_VRINTR:
9378 case UNSPEC_VRINTX:
9379 case UNSPEC_VRINTA:
9380 *cost = COSTS_N_INSNS (1);
9381 if (speed_p)
9382 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9384 return true;
9385 default:
9386 *cost = COSTS_N_INSNS (2);
9387 break;
9389 return false;
9392 /* Cost of a libcall. We assume one insn per argument, an amount for the
9393 call (one insn for -Os) and then one for processing the result. */
9394 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9396 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9397 do \
9399 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9400 if (shift_op != NULL \
9401 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9403 if (shift_reg) \
9405 if (speed_p) \
9406 *cost += extra_cost->alu.arith_shift_reg; \
9407 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9409 else if (speed_p) \
9410 *cost += extra_cost->alu.arith_shift; \
9412 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9413 + rtx_cost (XEXP (x, 1 - IDX), \
9414 OP, 1, speed_p)); \
9415 return true; \
9418 while (0);
9420 /* RTX costs. Make an estimate of the cost of executing the operation
9421 X, which is contained with an operation with code OUTER_CODE.
9422 SPEED_P indicates whether the cost desired is the performance cost,
9423 or the size cost. The estimate is stored in COST and the return
9424 value is TRUE if the cost calculation is final, or FALSE if the
9425 caller should recurse through the operands of X to add additional
9426 costs.
9428 We currently make no attempt to model the size savings of Thumb-2
9429 16-bit instructions. At the normal points in compilation where
9430 this code is called we have no measure of whether the condition
9431 flags are live or not, and thus no realistic way to determine what
9432 the size will eventually be. */
9433 static bool
9434 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9435 const struct cpu_cost_table *extra_cost,
9436 int *cost, bool speed_p)
9438 machine_mode mode = GET_MODE (x);
9440 if (TARGET_THUMB1)
9442 if (speed_p)
9443 *cost = thumb1_rtx_costs (x, code, outer_code);
9444 else
9445 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9446 return true;
9449 switch (code)
9451 case SET:
9452 *cost = 0;
9453 /* SET RTXs don't have a mode so we get it from the destination. */
9454 mode = GET_MODE (SET_DEST (x));
9456 if (REG_P (SET_SRC (x))
9457 && REG_P (SET_DEST (x)))
9459 /* Assume that most copies can be done with a single insn,
9460 unless we don't have HW FP, in which case everything
9461 larger than word mode will require two insns. */
9462 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9463 && GET_MODE_SIZE (mode) > 4)
9464 || mode == DImode)
9465 ? 2 : 1);
9466 /* Conditional register moves can be encoded
9467 in 16 bits in Thumb mode. */
9468 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9469 *cost >>= 1;
9471 return true;
9474 if (CONST_INT_P (SET_SRC (x)))
9476 /* Handle CONST_INT here, since the value doesn't have a mode
9477 and we would otherwise be unable to work out the true cost. */
9478 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9479 outer_code = SET;
9480 /* Slightly lower the cost of setting a core reg to a constant.
9481 This helps break up chains and allows for better scheduling. */
9482 if (REG_P (SET_DEST (x))
9483 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9484 *cost -= 1;
9485 x = SET_SRC (x);
9486 /* Immediate moves with an immediate in the range [0, 255] can be
9487 encoded in 16 bits in Thumb mode. */
9488 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9489 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9490 *cost >>= 1;
9491 goto const_int_cost;
9494 return false;
9496 case MEM:
9497 /* A memory access costs 1 insn if the mode is small, or the address is
9498 a single register, otherwise it costs one insn per word. */
9499 if (REG_P (XEXP (x, 0)))
9500 *cost = COSTS_N_INSNS (1);
9501 else if (flag_pic
9502 && GET_CODE (XEXP (x, 0)) == PLUS
9503 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9504 /* This will be split into two instructions.
9505 See arm.md:calculate_pic_address. */
9506 *cost = COSTS_N_INSNS (2);
9507 else
9508 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9510 /* For speed optimizations, add the costs of the address and
9511 accessing memory. */
9512 if (speed_p)
9513 #ifdef NOT_YET
9514 *cost += (extra_cost->ldst.load
9515 + arm_address_cost (XEXP (x, 0), mode,
9516 ADDR_SPACE_GENERIC, speed_p));
9517 #else
9518 *cost += extra_cost->ldst.load;
9519 #endif
9520 return true;
9522 case PARALLEL:
9524 /* Calculations of LDM costs are complex. We assume an initial cost
9525 (ldm_1st) which will load the number of registers mentioned in
9526 ldm_regs_per_insn_1st registers; then each additional
9527 ldm_regs_per_insn_subsequent registers cost one more insn. The
9528 formula for N regs is thus:
9530 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9531 + ldm_regs_per_insn_subsequent - 1)
9532 / ldm_regs_per_insn_subsequent).
9534 Additional costs may also be added for addressing. A similar
9535 formula is used for STM. */
9537 bool is_ldm = load_multiple_operation (x, SImode);
9538 bool is_stm = store_multiple_operation (x, SImode);
9540 *cost = COSTS_N_INSNS (1);
9542 if (is_ldm || is_stm)
9544 if (speed_p)
9546 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9547 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9548 ? extra_cost->ldst.ldm_regs_per_insn_1st
9549 : extra_cost->ldst.stm_regs_per_insn_1st;
9550 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9551 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9552 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9554 *cost += regs_per_insn_1st
9555 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9556 + regs_per_insn_sub - 1)
9557 / regs_per_insn_sub);
9558 return true;
9562 return false;
9564 case DIV:
9565 case UDIV:
9566 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9567 && (mode == SFmode || !TARGET_VFP_SINGLE))
9568 *cost = COSTS_N_INSNS (speed_p
9569 ? extra_cost->fp[mode != SFmode].div : 1);
9570 else if (mode == SImode && TARGET_IDIV)
9571 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9572 else
9573 *cost = LIBCALL_COST (2);
9574 return false; /* All arguments must be in registers. */
9576 case MOD:
9577 case UMOD:
9578 *cost = LIBCALL_COST (2);
9579 return false; /* All arguments must be in registers. */
9581 case ROTATE:
9582 if (mode == SImode && REG_P (XEXP (x, 1)))
9584 *cost = (COSTS_N_INSNS (2)
9585 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9586 if (speed_p)
9587 *cost += extra_cost->alu.shift_reg;
9588 return true;
9590 /* Fall through */
9591 case ROTATERT:
9592 case ASHIFT:
9593 case LSHIFTRT:
9594 case ASHIFTRT:
9595 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9597 *cost = (COSTS_N_INSNS (3)
9598 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9599 if (speed_p)
9600 *cost += 2 * extra_cost->alu.shift;
9601 return true;
9603 else if (mode == SImode)
9605 *cost = (COSTS_N_INSNS (1)
9606 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9607 /* Slightly disparage register shifts at -Os, but not by much. */
9608 if (!CONST_INT_P (XEXP (x, 1)))
9609 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9610 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9611 return true;
9613 else if (GET_MODE_CLASS (mode) == MODE_INT
9614 && GET_MODE_SIZE (mode) < 4)
9616 if (code == ASHIFT)
9618 *cost = (COSTS_N_INSNS (1)
9619 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9620 /* Slightly disparage register shifts at -Os, but not by
9621 much. */
9622 if (!CONST_INT_P (XEXP (x, 1)))
9623 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9624 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9626 else if (code == LSHIFTRT || code == ASHIFTRT)
9628 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9630 /* Can use SBFX/UBFX. */
9631 *cost = COSTS_N_INSNS (1);
9632 if (speed_p)
9633 *cost += extra_cost->alu.bfx;
9634 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9636 else
9638 *cost = COSTS_N_INSNS (2);
9639 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9640 if (speed_p)
9642 if (CONST_INT_P (XEXP (x, 1)))
9643 *cost += 2 * extra_cost->alu.shift;
9644 else
9645 *cost += (extra_cost->alu.shift
9646 + extra_cost->alu.shift_reg);
9648 else
9649 /* Slightly disparage register shifts. */
9650 *cost += !CONST_INT_P (XEXP (x, 1));
9653 else /* Rotates. */
9655 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9656 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9657 if (speed_p)
9659 if (CONST_INT_P (XEXP (x, 1)))
9660 *cost += (2 * extra_cost->alu.shift
9661 + extra_cost->alu.log_shift);
9662 else
9663 *cost += (extra_cost->alu.shift
9664 + extra_cost->alu.shift_reg
9665 + extra_cost->alu.log_shift_reg);
9668 return true;
9671 *cost = LIBCALL_COST (2);
9672 return false;
9674 case BSWAP:
9675 if (arm_arch6)
9677 if (mode == SImode)
9679 *cost = COSTS_N_INSNS (1);
9680 if (speed_p)
9681 *cost += extra_cost->alu.rev;
9683 return false;
9686 else
9688 /* No rev instruction available. Look at arm_legacy_rev
9689 and thumb_legacy_rev for the form of RTL used then. */
9690 if (TARGET_THUMB)
9692 *cost = COSTS_N_INSNS (10);
9694 if (speed_p)
9696 *cost += 6 * extra_cost->alu.shift;
9697 *cost += 3 * extra_cost->alu.logical;
9700 else
9702 *cost = COSTS_N_INSNS (5);
9704 if (speed_p)
9706 *cost += 2 * extra_cost->alu.shift;
9707 *cost += extra_cost->alu.arith_shift;
9708 *cost += 2 * extra_cost->alu.logical;
9711 return true;
9713 return false;
9715 case MINUS:
9716 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9717 && (mode == SFmode || !TARGET_VFP_SINGLE))
9719 *cost = COSTS_N_INSNS (1);
9720 if (GET_CODE (XEXP (x, 0)) == MULT
9721 || GET_CODE (XEXP (x, 1)) == MULT)
9723 rtx mul_op0, mul_op1, sub_op;
9725 if (speed_p)
9726 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9728 if (GET_CODE (XEXP (x, 0)) == MULT)
9730 mul_op0 = XEXP (XEXP (x, 0), 0);
9731 mul_op1 = XEXP (XEXP (x, 0), 1);
9732 sub_op = XEXP (x, 1);
9734 else
9736 mul_op0 = XEXP (XEXP (x, 1), 0);
9737 mul_op1 = XEXP (XEXP (x, 1), 1);
9738 sub_op = XEXP (x, 0);
9741 /* The first operand of the multiply may be optionally
9742 negated. */
9743 if (GET_CODE (mul_op0) == NEG)
9744 mul_op0 = XEXP (mul_op0, 0);
9746 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9747 + rtx_cost (mul_op1, code, 0, speed_p)
9748 + rtx_cost (sub_op, code, 0, speed_p));
9750 return true;
9753 if (speed_p)
9754 *cost += extra_cost->fp[mode != SFmode].addsub;
9755 return false;
9758 if (mode == SImode)
9760 rtx shift_by_reg = NULL;
9761 rtx shift_op;
9762 rtx non_shift_op;
9764 *cost = COSTS_N_INSNS (1);
9766 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9767 if (shift_op == NULL)
9769 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9770 non_shift_op = XEXP (x, 0);
9772 else
9773 non_shift_op = XEXP (x, 1);
9775 if (shift_op != NULL)
9777 if (shift_by_reg != NULL)
9779 if (speed_p)
9780 *cost += extra_cost->alu.arith_shift_reg;
9781 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9783 else if (speed_p)
9784 *cost += extra_cost->alu.arith_shift;
9786 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9787 + rtx_cost (non_shift_op, code, 0, speed_p));
9788 return true;
9791 if (arm_arch_thumb2
9792 && GET_CODE (XEXP (x, 1)) == MULT)
9794 /* MLS. */
9795 if (speed_p)
9796 *cost += extra_cost->mult[0].add;
9797 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9798 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9799 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9800 return true;
9803 if (CONST_INT_P (XEXP (x, 0)))
9805 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9806 INTVAL (XEXP (x, 0)), NULL_RTX,
9807 NULL_RTX, 1, 0);
9808 *cost = COSTS_N_INSNS (insns);
9809 if (speed_p)
9810 *cost += insns * extra_cost->alu.arith;
9811 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9812 return true;
9815 return false;
9818 if (GET_MODE_CLASS (mode) == MODE_INT
9819 && GET_MODE_SIZE (mode) < 4)
9821 rtx shift_op, shift_reg;
9822 shift_reg = NULL;
9824 /* We check both sides of the MINUS for shifter operands since,
9825 unlike PLUS, it's not commutative. */
9827 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9828 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9830 /* Slightly disparage, as we might need to widen the result. */
9831 *cost = 1 + COSTS_N_INSNS (1);
9832 if (speed_p)
9833 *cost += extra_cost->alu.arith;
9835 if (CONST_INT_P (XEXP (x, 0)))
9837 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9838 return true;
9841 return false;
9844 if (mode == DImode)
9846 *cost = COSTS_N_INSNS (2);
9848 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9850 rtx op1 = XEXP (x, 1);
9852 if (speed_p)
9853 *cost += 2 * extra_cost->alu.arith;
9855 if (GET_CODE (op1) == ZERO_EXTEND)
9856 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9857 else
9858 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9859 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9860 0, speed_p);
9861 return true;
9863 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9865 if (speed_p)
9866 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9867 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9868 0, speed_p)
9869 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9870 return true;
9872 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9873 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9875 if (speed_p)
9876 *cost += (extra_cost->alu.arith
9877 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9878 ? extra_cost->alu.arith
9879 : extra_cost->alu.arith_shift));
9880 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9881 + rtx_cost (XEXP (XEXP (x, 1), 0),
9882 GET_CODE (XEXP (x, 1)), 0, speed_p));
9883 return true;
9886 if (speed_p)
9887 *cost += 2 * extra_cost->alu.arith;
9888 return false;
9891 /* Vector mode? */
9893 *cost = LIBCALL_COST (2);
9894 return false;
9896 case PLUS:
9897 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9898 && (mode == SFmode || !TARGET_VFP_SINGLE))
9900 *cost = COSTS_N_INSNS (1);
9901 if (GET_CODE (XEXP (x, 0)) == MULT)
9903 rtx mul_op0, mul_op1, add_op;
9905 if (speed_p)
9906 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9908 mul_op0 = XEXP (XEXP (x, 0), 0);
9909 mul_op1 = XEXP (XEXP (x, 0), 1);
9910 add_op = XEXP (x, 1);
9912 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9913 + rtx_cost (mul_op1, code, 0, speed_p)
9914 + rtx_cost (add_op, code, 0, speed_p));
9916 return true;
9919 if (speed_p)
9920 *cost += extra_cost->fp[mode != SFmode].addsub;
9921 return false;
9923 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9925 *cost = LIBCALL_COST (2);
9926 return false;
9929 /* Narrow modes can be synthesized in SImode, but the range
9930 of useful sub-operations is limited. Check for shift operations
9931 on one of the operands. Only left shifts can be used in the
9932 narrow modes. */
9933 if (GET_MODE_CLASS (mode) == MODE_INT
9934 && GET_MODE_SIZE (mode) < 4)
9936 rtx shift_op, shift_reg;
9937 shift_reg = NULL;
9939 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9941 if (CONST_INT_P (XEXP (x, 1)))
9943 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9944 INTVAL (XEXP (x, 1)), NULL_RTX,
9945 NULL_RTX, 1, 0);
9946 *cost = COSTS_N_INSNS (insns);
9947 if (speed_p)
9948 *cost += insns * extra_cost->alu.arith;
9949 /* Slightly penalize a narrow operation as the result may
9950 need widening. */
9951 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9952 return true;
9955 /* Slightly penalize a narrow operation as the result may
9956 need widening. */
9957 *cost = 1 + COSTS_N_INSNS (1);
9958 if (speed_p)
9959 *cost += extra_cost->alu.arith;
9961 return false;
9964 if (mode == SImode)
9966 rtx shift_op, shift_reg;
9968 *cost = COSTS_N_INSNS (1);
9969 if (TARGET_INT_SIMD
9970 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9971 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9973 /* UXTA[BH] or SXTA[BH]. */
9974 if (speed_p)
9975 *cost += extra_cost->alu.extend_arith;
9976 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9977 speed_p)
9978 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9979 return true;
9982 shift_reg = NULL;
9983 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9984 if (shift_op != NULL)
9986 if (shift_reg)
9988 if (speed_p)
9989 *cost += extra_cost->alu.arith_shift_reg;
9990 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9992 else if (speed_p)
9993 *cost += extra_cost->alu.arith_shift;
9995 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9996 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9997 return true;
9999 if (GET_CODE (XEXP (x, 0)) == MULT)
10001 rtx mul_op = XEXP (x, 0);
10003 *cost = COSTS_N_INSNS (1);
10005 if (TARGET_DSP_MULTIPLY
10006 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10007 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10008 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10009 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10010 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10011 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10012 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10013 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10014 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10015 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10016 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10017 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10018 == 16))))))
10020 /* SMLA[BT][BT]. */
10021 if (speed_p)
10022 *cost += extra_cost->mult[0].extend_add;
10023 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10024 SIGN_EXTEND, 0, speed_p)
10025 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10026 SIGN_EXTEND, 0, speed_p)
10027 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10028 return true;
10031 if (speed_p)
10032 *cost += extra_cost->mult[0].add;
10033 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10034 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10035 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036 return true;
10038 if (CONST_INT_P (XEXP (x, 1)))
10040 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10041 INTVAL (XEXP (x, 1)), NULL_RTX,
10042 NULL_RTX, 1, 0);
10043 *cost = COSTS_N_INSNS (insns);
10044 if (speed_p)
10045 *cost += insns * extra_cost->alu.arith;
10046 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10047 return true;
10049 return false;
10052 if (mode == DImode)
10054 if (arm_arch3m
10055 && GET_CODE (XEXP (x, 0)) == MULT
10056 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10058 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10059 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10061 *cost = COSTS_N_INSNS (1);
10062 if (speed_p)
10063 *cost += extra_cost->mult[1].extend_add;
10064 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10065 ZERO_EXTEND, 0, speed_p)
10066 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10067 ZERO_EXTEND, 0, speed_p)
10068 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10069 return true;
10072 *cost = COSTS_N_INSNS (2);
10074 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10075 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10077 if (speed_p)
10078 *cost += (extra_cost->alu.arith
10079 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10080 ? extra_cost->alu.arith
10081 : extra_cost->alu.arith_shift));
10083 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10084 speed_p)
10085 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10086 return true;
10089 if (speed_p)
10090 *cost += 2 * extra_cost->alu.arith;
10091 return false;
10094 /* Vector mode? */
10095 *cost = LIBCALL_COST (2);
10096 return false;
10097 case IOR:
10098 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10100 *cost = COSTS_N_INSNS (1);
10101 if (speed_p)
10102 *cost += extra_cost->alu.rev;
10104 return true;
10106 /* Fall through. */
10107 case AND: case XOR:
10108 if (mode == SImode)
10110 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10111 rtx op0 = XEXP (x, 0);
10112 rtx shift_op, shift_reg;
10114 *cost = COSTS_N_INSNS (1);
10116 if (subcode == NOT
10117 && (code == AND
10118 || (code == IOR && TARGET_THUMB2)))
10119 op0 = XEXP (op0, 0);
10121 shift_reg = NULL;
10122 shift_op = shifter_op_p (op0, &shift_reg);
10123 if (shift_op != NULL)
10125 if (shift_reg)
10127 if (speed_p)
10128 *cost += extra_cost->alu.log_shift_reg;
10129 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10131 else if (speed_p)
10132 *cost += extra_cost->alu.log_shift;
10134 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10135 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10136 return true;
10139 if (CONST_INT_P (XEXP (x, 1)))
10141 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10142 INTVAL (XEXP (x, 1)), NULL_RTX,
10143 NULL_RTX, 1, 0);
10145 *cost = COSTS_N_INSNS (insns);
10146 if (speed_p)
10147 *cost += insns * extra_cost->alu.logical;
10148 *cost += rtx_cost (op0, code, 0, speed_p);
10149 return true;
10152 if (speed_p)
10153 *cost += extra_cost->alu.logical;
10154 *cost += (rtx_cost (op0, code, 0, speed_p)
10155 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10156 return true;
10159 if (mode == DImode)
10161 rtx op0 = XEXP (x, 0);
10162 enum rtx_code subcode = GET_CODE (op0);
10164 *cost = COSTS_N_INSNS (2);
10166 if (subcode == NOT
10167 && (code == AND
10168 || (code == IOR && TARGET_THUMB2)))
10169 op0 = XEXP (op0, 0);
10171 if (GET_CODE (op0) == ZERO_EXTEND)
10173 if (speed_p)
10174 *cost += 2 * extra_cost->alu.logical;
10176 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10177 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10178 return true;
10180 else if (GET_CODE (op0) == SIGN_EXTEND)
10182 if (speed_p)
10183 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10185 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10186 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10187 return true;
10190 if (speed_p)
10191 *cost += 2 * extra_cost->alu.logical;
10193 return true;
10195 /* Vector mode? */
10197 *cost = LIBCALL_COST (2);
10198 return false;
10200 case MULT:
10201 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10202 && (mode == SFmode || !TARGET_VFP_SINGLE))
10204 rtx op0 = XEXP (x, 0);
10206 *cost = COSTS_N_INSNS (1);
10208 if (GET_CODE (op0) == NEG)
10209 op0 = XEXP (op0, 0);
10211 if (speed_p)
10212 *cost += extra_cost->fp[mode != SFmode].mult;
10214 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10215 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10216 return true;
10218 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10220 *cost = LIBCALL_COST (2);
10221 return false;
10224 if (mode == SImode)
10226 *cost = COSTS_N_INSNS (1);
10227 if (TARGET_DSP_MULTIPLY
10228 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10229 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10230 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10231 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10232 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10233 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10234 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10235 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10236 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10237 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10238 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10239 && (INTVAL (XEXP (XEXP (x, 1), 1))
10240 == 16))))))
10242 /* SMUL[TB][TB]. */
10243 if (speed_p)
10244 *cost += extra_cost->mult[0].extend;
10245 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10246 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10247 return true;
10249 if (speed_p)
10250 *cost += extra_cost->mult[0].simple;
10251 return false;
10254 if (mode == DImode)
10256 if (arm_arch3m
10257 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10258 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10259 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10260 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10262 *cost = COSTS_N_INSNS (1);
10263 if (speed_p)
10264 *cost += extra_cost->mult[1].extend;
10265 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10266 ZERO_EXTEND, 0, speed_p)
10267 + rtx_cost (XEXP (XEXP (x, 1), 0),
10268 ZERO_EXTEND, 0, speed_p));
10269 return true;
10272 *cost = LIBCALL_COST (2);
10273 return false;
10276 /* Vector mode? */
10277 *cost = LIBCALL_COST (2);
10278 return false;
10280 case NEG:
10281 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10282 && (mode == SFmode || !TARGET_VFP_SINGLE))
10284 *cost = COSTS_N_INSNS (1);
10285 if (speed_p)
10286 *cost += extra_cost->fp[mode != SFmode].neg;
10288 return false;
10290 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10292 *cost = LIBCALL_COST (1);
10293 return false;
10296 if (mode == SImode)
10298 if (GET_CODE (XEXP (x, 0)) == ABS)
10300 *cost = COSTS_N_INSNS (2);
10301 /* Assume the non-flag-changing variant. */
10302 if (speed_p)
10303 *cost += (extra_cost->alu.log_shift
10304 + extra_cost->alu.arith_shift);
10305 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10306 return true;
10309 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10310 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10312 *cost = COSTS_N_INSNS (2);
10313 /* No extra cost for MOV imm and MVN imm. */
10314 /* If the comparison op is using the flags, there's no further
10315 cost, otherwise we need to add the cost of the comparison. */
10316 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10317 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10318 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10320 *cost += (COSTS_N_INSNS (1)
10321 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10322 speed_p)
10323 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10324 speed_p));
10325 if (speed_p)
10326 *cost += extra_cost->alu.arith;
10328 return true;
10330 *cost = COSTS_N_INSNS (1);
10331 if (speed_p)
10332 *cost += extra_cost->alu.arith;
10333 return false;
10336 if (GET_MODE_CLASS (mode) == MODE_INT
10337 && GET_MODE_SIZE (mode) < 4)
10339 /* Slightly disparage, as we might need an extend operation. */
10340 *cost = 1 + COSTS_N_INSNS (1);
10341 if (speed_p)
10342 *cost += extra_cost->alu.arith;
10343 return false;
10346 if (mode == DImode)
10348 *cost = COSTS_N_INSNS (2);
10349 if (speed_p)
10350 *cost += 2 * extra_cost->alu.arith;
10351 return false;
10354 /* Vector mode? */
10355 *cost = LIBCALL_COST (1);
10356 return false;
10358 case NOT:
10359 if (mode == SImode)
10361 rtx shift_op;
10362 rtx shift_reg = NULL;
10364 *cost = COSTS_N_INSNS (1);
10365 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10367 if (shift_op)
10369 if (shift_reg != NULL)
10371 if (speed_p)
10372 *cost += extra_cost->alu.log_shift_reg;
10373 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10375 else if (speed_p)
10376 *cost += extra_cost->alu.log_shift;
10377 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10378 return true;
10381 if (speed_p)
10382 *cost += extra_cost->alu.logical;
10383 return false;
10385 if (mode == DImode)
10387 *cost = COSTS_N_INSNS (2);
10388 return false;
10391 /* Vector mode? */
10393 *cost += LIBCALL_COST (1);
10394 return false;
10396 case IF_THEN_ELSE:
10398 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10400 *cost = COSTS_N_INSNS (4);
10401 return true;
10403 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10404 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10406 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10407 /* Assume that if one arm of the if_then_else is a register,
10408 that it will be tied with the result and eliminate the
10409 conditional insn. */
10410 if (REG_P (XEXP (x, 1)))
10411 *cost += op2cost;
10412 else if (REG_P (XEXP (x, 2)))
10413 *cost += op1cost;
10414 else
10416 if (speed_p)
10418 if (extra_cost->alu.non_exec_costs_exec)
10419 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10420 else
10421 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10423 else
10424 *cost += op1cost + op2cost;
10427 return true;
10429 case COMPARE:
10430 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10431 *cost = 0;
10432 else
10434 machine_mode op0mode;
10435 /* We'll mostly assume that the cost of a compare is the cost of the
10436 LHS. However, there are some notable exceptions. */
10438 /* Floating point compares are never done as side-effects. */
10439 op0mode = GET_MODE (XEXP (x, 0));
10440 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10441 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10443 *cost = COSTS_N_INSNS (1);
10444 if (speed_p)
10445 *cost += extra_cost->fp[op0mode != SFmode].compare;
10447 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10449 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10450 return true;
10453 return false;
10455 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10457 *cost = LIBCALL_COST (2);
10458 return false;
10461 /* DImode compares normally take two insns. */
10462 if (op0mode == DImode)
10464 *cost = COSTS_N_INSNS (2);
10465 if (speed_p)
10466 *cost += 2 * extra_cost->alu.arith;
10467 return false;
10470 if (op0mode == SImode)
10472 rtx shift_op;
10473 rtx shift_reg;
10475 if (XEXP (x, 1) == const0_rtx
10476 && !(REG_P (XEXP (x, 0))
10477 || (GET_CODE (XEXP (x, 0)) == SUBREG
10478 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10480 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10482 /* Multiply operations that set the flags are often
10483 significantly more expensive. */
10484 if (speed_p
10485 && GET_CODE (XEXP (x, 0)) == MULT
10486 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10487 *cost += extra_cost->mult[0].flag_setting;
10489 if (speed_p
10490 && GET_CODE (XEXP (x, 0)) == PLUS
10491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10492 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10493 0), 1), mode))
10494 *cost += extra_cost->mult[0].flag_setting;
10495 return true;
10498 shift_reg = NULL;
10499 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10500 if (shift_op != NULL)
10502 *cost = COSTS_N_INSNS (1);
10503 if (shift_reg != NULL)
10505 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10506 if (speed_p)
10507 *cost += extra_cost->alu.arith_shift_reg;
10509 else if (speed_p)
10510 *cost += extra_cost->alu.arith_shift;
10511 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10512 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10513 return true;
10516 *cost = COSTS_N_INSNS (1);
10517 if (speed_p)
10518 *cost += extra_cost->alu.arith;
10519 if (CONST_INT_P (XEXP (x, 1))
10520 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10522 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10523 return true;
10525 return false;
10528 /* Vector mode? */
10530 *cost = LIBCALL_COST (2);
10531 return false;
10533 return true;
10535 case EQ:
10536 case NE:
10537 case LT:
10538 case LE:
10539 case GT:
10540 case GE:
10541 case LTU:
10542 case LEU:
10543 case GEU:
10544 case GTU:
10545 case ORDERED:
10546 case UNORDERED:
10547 case UNEQ:
10548 case UNLE:
10549 case UNLT:
10550 case UNGE:
10551 case UNGT:
10552 case LTGT:
10553 if (outer_code == SET)
10555 /* Is it a store-flag operation? */
10556 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10557 && XEXP (x, 1) == const0_rtx)
10559 /* Thumb also needs an IT insn. */
10560 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10561 return true;
10563 if (XEXP (x, 1) == const0_rtx)
10565 switch (code)
10567 case LT:
10568 /* LSR Rd, Rn, #31. */
10569 *cost = COSTS_N_INSNS (1);
10570 if (speed_p)
10571 *cost += extra_cost->alu.shift;
10572 break;
10574 case EQ:
10575 /* RSBS T1, Rn, #0
10576 ADC Rd, Rn, T1. */
10578 case NE:
10579 /* SUBS T1, Rn, #1
10580 SBC Rd, Rn, T1. */
10581 *cost = COSTS_N_INSNS (2);
10582 break;
10584 case LE:
10585 /* RSBS T1, Rn, Rn, LSR #31
10586 ADC Rd, Rn, T1. */
10587 *cost = COSTS_N_INSNS (2);
10588 if (speed_p)
10589 *cost += extra_cost->alu.arith_shift;
10590 break;
10592 case GT:
10593 /* RSB Rd, Rn, Rn, ASR #1
10594 LSR Rd, Rd, #31. */
10595 *cost = COSTS_N_INSNS (2);
10596 if (speed_p)
10597 *cost += (extra_cost->alu.arith_shift
10598 + extra_cost->alu.shift);
10599 break;
10601 case GE:
10602 /* ASR Rd, Rn, #31
10603 ADD Rd, Rn, #1. */
10604 *cost = COSTS_N_INSNS (2);
10605 if (speed_p)
10606 *cost += extra_cost->alu.shift;
10607 break;
10609 default:
10610 /* Remaining cases are either meaningless or would take
10611 three insns anyway. */
10612 *cost = COSTS_N_INSNS (3);
10613 break;
10615 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10616 return true;
10618 else
10620 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10621 if (CONST_INT_P (XEXP (x, 1))
10622 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10624 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10625 return true;
10628 return false;
10631 /* Not directly inside a set. If it involves the condition code
10632 register it must be the condition for a branch, cond_exec or
10633 I_T_E operation. Since the comparison is performed elsewhere
10634 this is just the control part which has no additional
10635 cost. */
10636 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10637 && XEXP (x, 1) == const0_rtx)
10639 *cost = 0;
10640 return true;
10642 return false;
10644 case ABS:
10645 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10646 && (mode == SFmode || !TARGET_VFP_SINGLE))
10648 *cost = COSTS_N_INSNS (1);
10649 if (speed_p)
10650 *cost += extra_cost->fp[mode != SFmode].neg;
10652 return false;
10654 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10656 *cost = LIBCALL_COST (1);
10657 return false;
10660 if (mode == SImode)
10662 *cost = COSTS_N_INSNS (1);
10663 if (speed_p)
10664 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10665 return false;
10667 /* Vector mode? */
10668 *cost = LIBCALL_COST (1);
10669 return false;
10671 case SIGN_EXTEND:
10672 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10673 && MEM_P (XEXP (x, 0)))
10675 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10677 if (mode == DImode)
10678 *cost += COSTS_N_INSNS (1);
10680 if (!speed_p)
10681 return true;
10683 if (GET_MODE (XEXP (x, 0)) == SImode)
10684 *cost += extra_cost->ldst.load;
10685 else
10686 *cost += extra_cost->ldst.load_sign_extend;
10688 if (mode == DImode)
10689 *cost += extra_cost->alu.shift;
10691 return true;
10694 /* Widening from less than 32-bits requires an extend operation. */
10695 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10697 /* We have SXTB/SXTH. */
10698 *cost = COSTS_N_INSNS (1);
10699 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10700 if (speed_p)
10701 *cost += extra_cost->alu.extend;
10703 else if (GET_MODE (XEXP (x, 0)) != SImode)
10705 /* Needs two shifts. */
10706 *cost = COSTS_N_INSNS (2);
10707 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10708 if (speed_p)
10709 *cost += 2 * extra_cost->alu.shift;
10712 /* Widening beyond 32-bits requires one more insn. */
10713 if (mode == DImode)
10715 *cost += COSTS_N_INSNS (1);
10716 if (speed_p)
10717 *cost += extra_cost->alu.shift;
10720 return true;
10722 case ZERO_EXTEND:
10723 if ((arm_arch4
10724 || GET_MODE (XEXP (x, 0)) == SImode
10725 || GET_MODE (XEXP (x, 0)) == QImode)
10726 && MEM_P (XEXP (x, 0)))
10728 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10730 if (mode == DImode)
10731 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10733 return true;
10736 /* Widening from less than 32-bits requires an extend operation. */
10737 if (GET_MODE (XEXP (x, 0)) == QImode)
10739 /* UXTB can be a shorter instruction in Thumb2, but it might
10740 be slower than the AND Rd, Rn, #255 alternative. When
10741 optimizing for speed it should never be slower to use
10742 AND, and we don't really model 16-bit vs 32-bit insns
10743 here. */
10744 *cost = COSTS_N_INSNS (1);
10745 if (speed_p)
10746 *cost += extra_cost->alu.logical;
10748 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10750 /* We have UXTB/UXTH. */
10751 *cost = COSTS_N_INSNS (1);
10752 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10753 if (speed_p)
10754 *cost += extra_cost->alu.extend;
10756 else if (GET_MODE (XEXP (x, 0)) != SImode)
10758 /* Needs two shifts. It's marginally preferable to use
10759 shifts rather than two BIC instructions as the second
10760 shift may merge with a subsequent insn as a shifter
10761 op. */
10762 *cost = COSTS_N_INSNS (2);
10763 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10764 if (speed_p)
10765 *cost += 2 * extra_cost->alu.shift;
10767 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10768 *cost = COSTS_N_INSNS (1);
10770 /* Widening beyond 32-bits requires one more insn. */
10771 if (mode == DImode)
10773 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10776 return true;
10778 case CONST_INT:
10779 *cost = 0;
10780 /* CONST_INT has no mode, so we cannot tell for sure how many
10781 insns are really going to be needed. The best we can do is
10782 look at the value passed. If it fits in SImode, then assume
10783 that's the mode it will be used for. Otherwise assume it
10784 will be used in DImode. */
10785 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10786 mode = SImode;
10787 else
10788 mode = DImode;
10790 /* Avoid blowing up in arm_gen_constant (). */
10791 if (!(outer_code == PLUS
10792 || outer_code == AND
10793 || outer_code == IOR
10794 || outer_code == XOR
10795 || outer_code == MINUS))
10796 outer_code = SET;
10798 const_int_cost:
10799 if (mode == SImode)
10801 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10802 INTVAL (x), NULL, NULL,
10803 0, 0));
10804 /* Extra costs? */
10806 else
10808 *cost += COSTS_N_INSNS (arm_gen_constant
10809 (outer_code, SImode, NULL,
10810 trunc_int_for_mode (INTVAL (x), SImode),
10811 NULL, NULL, 0, 0)
10812 + arm_gen_constant (outer_code, SImode, NULL,
10813 INTVAL (x) >> 32, NULL,
10814 NULL, 0, 0));
10815 /* Extra costs? */
10818 return true;
10820 case CONST:
10821 case LABEL_REF:
10822 case SYMBOL_REF:
10823 if (speed_p)
10825 if (arm_arch_thumb2 && !flag_pic)
10826 *cost = COSTS_N_INSNS (2);
10827 else
10828 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10830 else
10831 *cost = COSTS_N_INSNS (2);
10833 if (flag_pic)
10835 *cost += COSTS_N_INSNS (1);
10836 if (speed_p)
10837 *cost += extra_cost->alu.arith;
10840 return true;
10842 case CONST_FIXED:
10843 *cost = COSTS_N_INSNS (4);
10844 /* Fixme. */
10845 return true;
10847 case CONST_DOUBLE:
10848 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10849 && (mode == SFmode || !TARGET_VFP_SINGLE))
10851 if (vfp3_const_double_rtx (x))
10853 *cost = COSTS_N_INSNS (1);
10854 if (speed_p)
10855 *cost += extra_cost->fp[mode == DFmode].fpconst;
10856 return true;
10859 if (speed_p)
10861 *cost = COSTS_N_INSNS (1);
10862 if (mode == DFmode)
10863 *cost += extra_cost->ldst.loadd;
10864 else
10865 *cost += extra_cost->ldst.loadf;
10867 else
10868 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10870 return true;
10872 *cost = COSTS_N_INSNS (4);
10873 return true;
10875 case CONST_VECTOR:
10876 /* Fixme. */
10877 if (TARGET_NEON
10878 && TARGET_HARD_FLOAT
10879 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10880 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10881 *cost = COSTS_N_INSNS (1);
10882 else
10883 *cost = COSTS_N_INSNS (4);
10884 return true;
10886 case HIGH:
10887 case LO_SUM:
10888 *cost = COSTS_N_INSNS (1);
10889 /* When optimizing for size, we prefer constant pool entries to
10890 MOVW/MOVT pairs, so bump the cost of these slightly. */
10891 if (!speed_p)
10892 *cost += 1;
10893 return true;
10895 case CLZ:
10896 *cost = COSTS_N_INSNS (1);
10897 if (speed_p)
10898 *cost += extra_cost->alu.clz;
10899 return false;
10901 case SMIN:
10902 if (XEXP (x, 1) == const0_rtx)
10904 *cost = COSTS_N_INSNS (1);
10905 if (speed_p)
10906 *cost += extra_cost->alu.log_shift;
10907 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10908 return true;
10910 /* Fall through. */
10911 case SMAX:
10912 case UMIN:
10913 case UMAX:
10914 *cost = COSTS_N_INSNS (2);
10915 return false;
10917 case TRUNCATE:
10918 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10919 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10920 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10921 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10922 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10923 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10924 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10925 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10926 == ZERO_EXTEND))))
10928 *cost = COSTS_N_INSNS (1);
10929 if (speed_p)
10930 *cost += extra_cost->mult[1].extend;
10931 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10932 speed_p)
10933 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10934 0, speed_p));
10935 return true;
10937 *cost = LIBCALL_COST (1);
10938 return false;
10940 case UNSPEC:
10941 return arm_unspec_cost (x, outer_code, speed_p, cost);
10943 case PC:
10944 /* Reading the PC is like reading any other register. Writing it
10945 is more expensive, but we take that into account elsewhere. */
10946 *cost = 0;
10947 return true;
10949 case ZERO_EXTRACT:
10950 /* TODO: Simple zero_extract of bottom bits using AND. */
10951 /* Fall through. */
10952 case SIGN_EXTRACT:
10953 if (arm_arch6
10954 && mode == SImode
10955 && CONST_INT_P (XEXP (x, 1))
10956 && CONST_INT_P (XEXP (x, 2)))
10958 *cost = COSTS_N_INSNS (1);
10959 if (speed_p)
10960 *cost += extra_cost->alu.bfx;
10961 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10962 return true;
10964 /* Without UBFX/SBFX, need to resort to shift operations. */
10965 *cost = COSTS_N_INSNS (2);
10966 if (speed_p)
10967 *cost += 2 * extra_cost->alu.shift;
10968 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10969 return true;
10971 case FLOAT_EXTEND:
10972 if (TARGET_HARD_FLOAT)
10974 *cost = COSTS_N_INSNS (1);
10975 if (speed_p)
10976 *cost += extra_cost->fp[mode == DFmode].widen;
10977 if (!TARGET_FPU_ARMV8
10978 && GET_MODE (XEXP (x, 0)) == HFmode)
10980 /* Pre v8, widening HF->DF is a two-step process, first
10981 widening to SFmode. */
10982 *cost += COSTS_N_INSNS (1);
10983 if (speed_p)
10984 *cost += extra_cost->fp[0].widen;
10986 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10987 return true;
10990 *cost = LIBCALL_COST (1);
10991 return false;
10993 case FLOAT_TRUNCATE:
10994 if (TARGET_HARD_FLOAT)
10996 *cost = COSTS_N_INSNS (1);
10997 if (speed_p)
10998 *cost += extra_cost->fp[mode == DFmode].narrow;
10999 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11000 return true;
11001 /* Vector modes? */
11003 *cost = LIBCALL_COST (1);
11004 return false;
11006 case FMA:
11007 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11009 rtx op0 = XEXP (x, 0);
11010 rtx op1 = XEXP (x, 1);
11011 rtx op2 = XEXP (x, 2);
11013 *cost = COSTS_N_INSNS (1);
11015 /* vfms or vfnma. */
11016 if (GET_CODE (op0) == NEG)
11017 op0 = XEXP (op0, 0);
11019 /* vfnms or vfnma. */
11020 if (GET_CODE (op2) == NEG)
11021 op2 = XEXP (op2, 0);
11023 *cost += rtx_cost (op0, FMA, 0, speed_p);
11024 *cost += rtx_cost (op1, FMA, 1, speed_p);
11025 *cost += rtx_cost (op2, FMA, 2, speed_p);
11027 if (speed_p)
11028 *cost += extra_cost->fp[mode ==DFmode].fma;
11030 return true;
11033 *cost = LIBCALL_COST (3);
11034 return false;
11036 case FIX:
11037 case UNSIGNED_FIX:
11038 if (TARGET_HARD_FLOAT)
11040 if (GET_MODE_CLASS (mode) == MODE_INT)
11042 *cost = COSTS_N_INSNS (1);
11043 if (speed_p)
11044 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11045 /* Strip of the 'cost' of rounding towards zero. */
11046 if (GET_CODE (XEXP (x, 0)) == FIX)
11047 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11048 else
11049 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11050 /* ??? Increase the cost to deal with transferring from
11051 FP -> CORE registers? */
11052 return true;
11054 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11055 && TARGET_FPU_ARMV8)
11057 *cost = COSTS_N_INSNS (1);
11058 if (speed_p)
11059 *cost += extra_cost->fp[mode == DFmode].roundint;
11060 return false;
11062 /* Vector costs? */
11064 *cost = LIBCALL_COST (1);
11065 return false;
11067 case FLOAT:
11068 case UNSIGNED_FLOAT:
11069 if (TARGET_HARD_FLOAT)
11071 /* ??? Increase the cost to deal with transferring from CORE
11072 -> FP registers? */
11073 *cost = COSTS_N_INSNS (1);
11074 if (speed_p)
11075 *cost += extra_cost->fp[mode == DFmode].fromint;
11076 return false;
11078 *cost = LIBCALL_COST (1);
11079 return false;
11081 case CALL:
11082 *cost = COSTS_N_INSNS (1);
11083 return true;
11085 case ASM_OPERANDS:
11087 /* Just a guess. Guess number of instructions in the asm
11088 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11089 though (see PR60663). */
11090 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11091 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11093 *cost = COSTS_N_INSNS (asm_length + num_operands);
11094 return true;
11096 default:
11097 if (mode != VOIDmode)
11098 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11099 else
11100 *cost = COSTS_N_INSNS (4); /* Who knows? */
11101 return false;
11105 #undef HANDLE_NARROW_SHIFT_ARITH
11107 /* RTX costs when optimizing for size. */
11108 static bool
11109 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11110 int *total, bool speed)
11112 bool result;
11114 if (TARGET_OLD_RTX_COSTS
11115 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11117 /* Old way. (Deprecated.) */
11118 if (!speed)
11119 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11120 (enum rtx_code) outer_code, total);
11121 else
11122 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11123 (enum rtx_code) outer_code, total,
11124 speed);
11126 else
11128 /* New way. */
11129 if (current_tune->insn_extra_cost)
11130 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11131 (enum rtx_code) outer_code,
11132 current_tune->insn_extra_cost,
11133 total, speed);
11134 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11135 && current_tune->insn_extra_cost != NULL */
11136 else
11137 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11138 (enum rtx_code) outer_code,
11139 &generic_extra_costs, total, speed);
11142 if (dump_file && (dump_flags & TDF_DETAILS))
11144 print_rtl_single (dump_file, x);
11145 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11146 *total, result ? "final" : "partial");
11148 return result;
11151 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11152 supported on any "slowmul" cores, so it can be ignored. */
11154 static bool
11155 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11156 int *total, bool speed)
11158 machine_mode mode = GET_MODE (x);
11160 if (TARGET_THUMB)
11162 *total = thumb1_rtx_costs (x, code, outer_code);
11163 return true;
11166 switch (code)
11168 case MULT:
11169 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11170 || mode == DImode)
11172 *total = COSTS_N_INSNS (20);
11173 return false;
11176 if (CONST_INT_P (XEXP (x, 1)))
11178 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11179 & (unsigned HOST_WIDE_INT) 0xffffffff);
11180 int cost, const_ok = const_ok_for_arm (i);
11181 int j, booth_unit_size;
11183 /* Tune as appropriate. */
11184 cost = const_ok ? 4 : 8;
11185 booth_unit_size = 2;
11186 for (j = 0; i && j < 32; j += booth_unit_size)
11188 i >>= booth_unit_size;
11189 cost++;
11192 *total = COSTS_N_INSNS (cost);
11193 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11194 return true;
11197 *total = COSTS_N_INSNS (20);
11198 return false;
11200 default:
11201 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11206 /* RTX cost for cores with a fast multiply unit (M variants). */
11208 static bool
11209 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11210 int *total, bool speed)
11212 machine_mode mode = GET_MODE (x);
11214 if (TARGET_THUMB1)
11216 *total = thumb1_rtx_costs (x, code, outer_code);
11217 return true;
11220 /* ??? should thumb2 use different costs? */
11221 switch (code)
11223 case MULT:
11224 /* There is no point basing this on the tuning, since it is always the
11225 fast variant if it exists at all. */
11226 if (mode == DImode
11227 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11228 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11229 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11231 *total = COSTS_N_INSNS(2);
11232 return false;
11236 if (mode == DImode)
11238 *total = COSTS_N_INSNS (5);
11239 return false;
11242 if (CONST_INT_P (XEXP (x, 1)))
11244 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11245 & (unsigned HOST_WIDE_INT) 0xffffffff);
11246 int cost, const_ok = const_ok_for_arm (i);
11247 int j, booth_unit_size;
11249 /* Tune as appropriate. */
11250 cost = const_ok ? 4 : 8;
11251 booth_unit_size = 8;
11252 for (j = 0; i && j < 32; j += booth_unit_size)
11254 i >>= booth_unit_size;
11255 cost++;
11258 *total = COSTS_N_INSNS(cost);
11259 return false;
11262 if (mode == SImode)
11264 *total = COSTS_N_INSNS (4);
11265 return false;
11268 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11270 if (TARGET_HARD_FLOAT
11271 && (mode == SFmode
11272 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11274 *total = COSTS_N_INSNS (1);
11275 return false;
11279 /* Requires a lib call */
11280 *total = COSTS_N_INSNS (20);
11281 return false;
11283 default:
11284 return arm_rtx_costs_1 (x, outer_code, total, speed);
11289 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11290 so it can be ignored. */
11292 static bool
11293 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11294 int *total, bool speed)
11296 machine_mode mode = GET_MODE (x);
11298 if (TARGET_THUMB)
11300 *total = thumb1_rtx_costs (x, code, outer_code);
11301 return true;
11304 switch (code)
11306 case COMPARE:
11307 if (GET_CODE (XEXP (x, 0)) != MULT)
11308 return arm_rtx_costs_1 (x, outer_code, total, speed);
11310 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11311 will stall until the multiplication is complete. */
11312 *total = COSTS_N_INSNS (3);
11313 return false;
11315 case MULT:
11316 /* There is no point basing this on the tuning, since it is always the
11317 fast variant if it exists at all. */
11318 if (mode == DImode
11319 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11320 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11321 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11323 *total = COSTS_N_INSNS (2);
11324 return false;
11328 if (mode == DImode)
11330 *total = COSTS_N_INSNS (5);
11331 return false;
11334 if (CONST_INT_P (XEXP (x, 1)))
11336 /* If operand 1 is a constant we can more accurately
11337 calculate the cost of the multiply. The multiplier can
11338 retire 15 bits on the first cycle and a further 12 on the
11339 second. We do, of course, have to load the constant into
11340 a register first. */
11341 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11342 /* There's a general overhead of one cycle. */
11343 int cost = 1;
11344 unsigned HOST_WIDE_INT masked_const;
11346 if (i & 0x80000000)
11347 i = ~i;
11349 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11351 masked_const = i & 0xffff8000;
11352 if (masked_const != 0)
11354 cost++;
11355 masked_const = i & 0xf8000000;
11356 if (masked_const != 0)
11357 cost++;
11359 *total = COSTS_N_INSNS (cost);
11360 return false;
11363 if (mode == SImode)
11365 *total = COSTS_N_INSNS (3);
11366 return false;
11369 /* Requires a lib call */
11370 *total = COSTS_N_INSNS (20);
11371 return false;
11373 default:
11374 return arm_rtx_costs_1 (x, outer_code, total, speed);
11379 /* RTX costs for 9e (and later) cores. */
11381 static bool
11382 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11383 int *total, bool speed)
11385 machine_mode mode = GET_MODE (x);
11387 if (TARGET_THUMB1)
11389 switch (code)
11391 case MULT:
11392 /* Small multiply: 32 cycles for an integer multiply inst. */
11393 if (arm_arch6m && arm_m_profile_small_mul)
11394 *total = COSTS_N_INSNS (32);
11395 else
11396 *total = COSTS_N_INSNS (3);
11397 return true;
11399 default:
11400 *total = thumb1_rtx_costs (x, code, outer_code);
11401 return true;
11405 switch (code)
11407 case MULT:
11408 /* There is no point basing this on the tuning, since it is always the
11409 fast variant if it exists at all. */
11410 if (mode == DImode
11411 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11412 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11413 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11415 *total = COSTS_N_INSNS (2);
11416 return false;
11420 if (mode == DImode)
11422 *total = COSTS_N_INSNS (5);
11423 return false;
11426 if (mode == SImode)
11428 *total = COSTS_N_INSNS (2);
11429 return false;
11432 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11434 if (TARGET_HARD_FLOAT
11435 && (mode == SFmode
11436 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11438 *total = COSTS_N_INSNS (1);
11439 return false;
11443 *total = COSTS_N_INSNS (20);
11444 return false;
11446 default:
11447 return arm_rtx_costs_1 (x, outer_code, total, speed);
11450 /* All address computations that can be done are free, but rtx cost returns
11451 the same for practically all of them. So we weight the different types
11452 of address here in the order (most pref first):
11453 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11454 static inline int
11455 arm_arm_address_cost (rtx x)
11457 enum rtx_code c = GET_CODE (x);
11459 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11460 return 0;
11461 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11462 return 10;
11464 if (c == PLUS)
11466 if (CONST_INT_P (XEXP (x, 1)))
11467 return 2;
11469 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11470 return 3;
11472 return 4;
11475 return 6;
11478 static inline int
11479 arm_thumb_address_cost (rtx x)
11481 enum rtx_code c = GET_CODE (x);
11483 if (c == REG)
11484 return 1;
11485 if (c == PLUS
11486 && REG_P (XEXP (x, 0))
11487 && CONST_INT_P (XEXP (x, 1)))
11488 return 1;
11490 return 2;
11493 static int
11494 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11495 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11497 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11500 /* Adjust cost hook for XScale. */
11501 static bool
11502 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11504 /* Some true dependencies can have a higher cost depending
11505 on precisely how certain input operands are used. */
11506 if (REG_NOTE_KIND(link) == 0
11507 && recog_memoized (insn) >= 0
11508 && recog_memoized (dep) >= 0)
11510 int shift_opnum = get_attr_shift (insn);
11511 enum attr_type attr_type = get_attr_type (dep);
11513 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11514 operand for INSN. If we have a shifted input operand and the
11515 instruction we depend on is another ALU instruction, then we may
11516 have to account for an additional stall. */
11517 if (shift_opnum != 0
11518 && (attr_type == TYPE_ALU_SHIFT_IMM
11519 || attr_type == TYPE_ALUS_SHIFT_IMM
11520 || attr_type == TYPE_LOGIC_SHIFT_IMM
11521 || attr_type == TYPE_LOGICS_SHIFT_IMM
11522 || attr_type == TYPE_ALU_SHIFT_REG
11523 || attr_type == TYPE_ALUS_SHIFT_REG
11524 || attr_type == TYPE_LOGIC_SHIFT_REG
11525 || attr_type == TYPE_LOGICS_SHIFT_REG
11526 || attr_type == TYPE_MOV_SHIFT
11527 || attr_type == TYPE_MVN_SHIFT
11528 || attr_type == TYPE_MOV_SHIFT_REG
11529 || attr_type == TYPE_MVN_SHIFT_REG))
11531 rtx shifted_operand;
11532 int opno;
11534 /* Get the shifted operand. */
11535 extract_insn (insn);
11536 shifted_operand = recog_data.operand[shift_opnum];
11538 /* Iterate over all the operands in DEP. If we write an operand
11539 that overlaps with SHIFTED_OPERAND, then we have increase the
11540 cost of this dependency. */
11541 extract_insn (dep);
11542 preprocess_constraints (dep);
11543 for (opno = 0; opno < recog_data.n_operands; opno++)
11545 /* We can ignore strict inputs. */
11546 if (recog_data.operand_type[opno] == OP_IN)
11547 continue;
11549 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11550 shifted_operand))
11552 *cost = 2;
11553 return false;
11558 return true;
11561 /* Adjust cost hook for Cortex A9. */
11562 static bool
11563 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11565 switch (REG_NOTE_KIND (link))
11567 case REG_DEP_ANTI:
11568 *cost = 0;
11569 return false;
11571 case REG_DEP_TRUE:
11572 case REG_DEP_OUTPUT:
11573 if (recog_memoized (insn) >= 0
11574 && recog_memoized (dep) >= 0)
11576 if (GET_CODE (PATTERN (insn)) == SET)
11578 if (GET_MODE_CLASS
11579 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11580 || GET_MODE_CLASS
11581 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11583 enum attr_type attr_type_insn = get_attr_type (insn);
11584 enum attr_type attr_type_dep = get_attr_type (dep);
11586 /* By default all dependencies of the form
11587 s0 = s0 <op> s1
11588 s0 = s0 <op> s2
11589 have an extra latency of 1 cycle because
11590 of the input and output dependency in this
11591 case. However this gets modeled as an true
11592 dependency and hence all these checks. */
11593 if (REG_P (SET_DEST (PATTERN (insn)))
11594 && REG_P (SET_DEST (PATTERN (dep)))
11595 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11596 SET_DEST (PATTERN (dep))))
11598 /* FMACS is a special case where the dependent
11599 instruction can be issued 3 cycles before
11600 the normal latency in case of an output
11601 dependency. */
11602 if ((attr_type_insn == TYPE_FMACS
11603 || attr_type_insn == TYPE_FMACD)
11604 && (attr_type_dep == TYPE_FMACS
11605 || attr_type_dep == TYPE_FMACD))
11607 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11608 *cost = insn_default_latency (dep) - 3;
11609 else
11610 *cost = insn_default_latency (dep);
11611 return false;
11613 else
11615 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11616 *cost = insn_default_latency (dep) + 1;
11617 else
11618 *cost = insn_default_latency (dep);
11620 return false;
11625 break;
11627 default:
11628 gcc_unreachable ();
11631 return true;
11634 /* Adjust cost hook for FA726TE. */
11635 static bool
11636 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11638 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11639 have penalty of 3. */
11640 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11641 && recog_memoized (insn) >= 0
11642 && recog_memoized (dep) >= 0
11643 && get_attr_conds (dep) == CONDS_SET)
11645 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11646 if (get_attr_conds (insn) == CONDS_USE
11647 && get_attr_type (insn) != TYPE_BRANCH)
11649 *cost = 3;
11650 return false;
11653 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11654 || get_attr_conds (insn) == CONDS_USE)
11656 *cost = 0;
11657 return false;
11661 return true;
11664 /* Implement TARGET_REGISTER_MOVE_COST.
11666 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11667 it is typically more expensive than a single memory access. We set
11668 the cost to less than two memory accesses so that floating
11669 point to integer conversion does not go through memory. */
11672 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11673 reg_class_t from, reg_class_t to)
11675 if (TARGET_32BIT)
11677 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11678 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11679 return 15;
11680 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11681 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11682 return 4;
11683 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11684 return 20;
11685 else
11686 return 2;
11688 else
11690 if (from == HI_REGS || to == HI_REGS)
11691 return 4;
11692 else
11693 return 2;
11697 /* Implement TARGET_MEMORY_MOVE_COST. */
11700 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11701 bool in ATTRIBUTE_UNUSED)
11703 if (TARGET_32BIT)
11704 return 10;
11705 else
11707 if (GET_MODE_SIZE (mode) < 4)
11708 return 8;
11709 else
11710 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11714 /* Vectorizer cost model implementation. */
11716 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11717 static int
11718 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11719 tree vectype,
11720 int misalign ATTRIBUTE_UNUSED)
11722 unsigned elements;
11724 switch (type_of_cost)
11726 case scalar_stmt:
11727 return current_tune->vec_costs->scalar_stmt_cost;
11729 case scalar_load:
11730 return current_tune->vec_costs->scalar_load_cost;
11732 case scalar_store:
11733 return current_tune->vec_costs->scalar_store_cost;
11735 case vector_stmt:
11736 return current_tune->vec_costs->vec_stmt_cost;
11738 case vector_load:
11739 return current_tune->vec_costs->vec_align_load_cost;
11741 case vector_store:
11742 return current_tune->vec_costs->vec_store_cost;
11744 case vec_to_scalar:
11745 return current_tune->vec_costs->vec_to_scalar_cost;
11747 case scalar_to_vec:
11748 return current_tune->vec_costs->scalar_to_vec_cost;
11750 case unaligned_load:
11751 return current_tune->vec_costs->vec_unalign_load_cost;
11753 case unaligned_store:
11754 return current_tune->vec_costs->vec_unalign_store_cost;
11756 case cond_branch_taken:
11757 return current_tune->vec_costs->cond_taken_branch_cost;
11759 case cond_branch_not_taken:
11760 return current_tune->vec_costs->cond_not_taken_branch_cost;
11762 case vec_perm:
11763 case vec_promote_demote:
11764 return current_tune->vec_costs->vec_stmt_cost;
11766 case vec_construct:
11767 elements = TYPE_VECTOR_SUBPARTS (vectype);
11768 return elements / 2 + 1;
11770 default:
11771 gcc_unreachable ();
11775 /* Implement targetm.vectorize.add_stmt_cost. */
11777 static unsigned
11778 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11779 struct _stmt_vec_info *stmt_info, int misalign,
11780 enum vect_cost_model_location where)
11782 unsigned *cost = (unsigned *) data;
11783 unsigned retval = 0;
11785 if (flag_vect_cost_model)
11787 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11788 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11790 /* Statements in an inner loop relative to the loop being
11791 vectorized are weighted more heavily. The value here is
11792 arbitrary and could potentially be improved with analysis. */
11793 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11794 count *= 50; /* FIXME. */
11796 retval = (unsigned) (count * stmt_cost);
11797 cost[where] += retval;
11800 return retval;
11803 /* Return true if and only if this insn can dual-issue only as older. */
11804 static bool
11805 cortexa7_older_only (rtx_insn *insn)
11807 if (recog_memoized (insn) < 0)
11808 return false;
11810 switch (get_attr_type (insn))
11812 case TYPE_ALU_DSP_REG:
11813 case TYPE_ALU_SREG:
11814 case TYPE_ALUS_SREG:
11815 case TYPE_LOGIC_REG:
11816 case TYPE_LOGICS_REG:
11817 case TYPE_ADC_REG:
11818 case TYPE_ADCS_REG:
11819 case TYPE_ADR:
11820 case TYPE_BFM:
11821 case TYPE_REV:
11822 case TYPE_MVN_REG:
11823 case TYPE_SHIFT_IMM:
11824 case TYPE_SHIFT_REG:
11825 case TYPE_LOAD_BYTE:
11826 case TYPE_LOAD1:
11827 case TYPE_STORE1:
11828 case TYPE_FFARITHS:
11829 case TYPE_FADDS:
11830 case TYPE_FFARITHD:
11831 case TYPE_FADDD:
11832 case TYPE_FMOV:
11833 case TYPE_F_CVT:
11834 case TYPE_FCMPS:
11835 case TYPE_FCMPD:
11836 case TYPE_FCONSTS:
11837 case TYPE_FCONSTD:
11838 case TYPE_FMULS:
11839 case TYPE_FMACS:
11840 case TYPE_FMULD:
11841 case TYPE_FMACD:
11842 case TYPE_FDIVS:
11843 case TYPE_FDIVD:
11844 case TYPE_F_MRC:
11845 case TYPE_F_MRRC:
11846 case TYPE_F_FLAG:
11847 case TYPE_F_LOADS:
11848 case TYPE_F_STORES:
11849 return true;
11850 default:
11851 return false;
11855 /* Return true if and only if this insn can dual-issue as younger. */
11856 static bool
11857 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11859 if (recog_memoized (insn) < 0)
11861 if (verbose > 5)
11862 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11863 return false;
11866 switch (get_attr_type (insn))
11868 case TYPE_ALU_IMM:
11869 case TYPE_ALUS_IMM:
11870 case TYPE_LOGIC_IMM:
11871 case TYPE_LOGICS_IMM:
11872 case TYPE_EXTEND:
11873 case TYPE_MVN_IMM:
11874 case TYPE_MOV_IMM:
11875 case TYPE_MOV_REG:
11876 case TYPE_MOV_SHIFT:
11877 case TYPE_MOV_SHIFT_REG:
11878 case TYPE_BRANCH:
11879 case TYPE_CALL:
11880 return true;
11881 default:
11882 return false;
11887 /* Look for an instruction that can dual issue only as an older
11888 instruction, and move it in front of any instructions that can
11889 dual-issue as younger, while preserving the relative order of all
11890 other instructions in the ready list. This is a hueuristic to help
11891 dual-issue in later cycles, by postponing issue of more flexible
11892 instructions. This heuristic may affect dual issue opportunities
11893 in the current cycle. */
11894 static void
11895 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11896 int *n_readyp, int clock)
11898 int i;
11899 int first_older_only = -1, first_younger = -1;
11901 if (verbose > 5)
11902 fprintf (file,
11903 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11904 clock,
11905 *n_readyp);
11907 /* Traverse the ready list from the head (the instruction to issue
11908 first), and looking for the first instruction that can issue as
11909 younger and the first instruction that can dual-issue only as
11910 older. */
11911 for (i = *n_readyp - 1; i >= 0; i--)
11913 rtx_insn *insn = ready[i];
11914 if (cortexa7_older_only (insn))
11916 first_older_only = i;
11917 if (verbose > 5)
11918 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11919 break;
11921 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11922 first_younger = i;
11925 /* Nothing to reorder because either no younger insn found or insn
11926 that can dual-issue only as older appears before any insn that
11927 can dual-issue as younger. */
11928 if (first_younger == -1)
11930 if (verbose > 5)
11931 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11932 return;
11935 /* Nothing to reorder because no older-only insn in the ready list. */
11936 if (first_older_only == -1)
11938 if (verbose > 5)
11939 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11940 return;
11943 /* Move first_older_only insn before first_younger. */
11944 if (verbose > 5)
11945 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11946 INSN_UID(ready [first_older_only]),
11947 INSN_UID(ready [first_younger]));
11948 rtx_insn *first_older_only_insn = ready [first_older_only];
11949 for (i = first_older_only; i < first_younger; i++)
11951 ready[i] = ready[i+1];
11954 ready[i] = first_older_only_insn;
11955 return;
11958 /* Implement TARGET_SCHED_REORDER. */
11959 static int
11960 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11961 int clock)
11963 switch (arm_tune)
11965 case cortexa7:
11966 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11967 break;
11968 default:
11969 /* Do nothing for other cores. */
11970 break;
11973 return arm_issue_rate ();
11976 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11977 It corrects the value of COST based on the relationship between
11978 INSN and DEP through the dependence LINK. It returns the new
11979 value. There is a per-core adjust_cost hook to adjust scheduler costs
11980 and the per-core hook can choose to completely override the generic
11981 adjust_cost function. Only put bits of code into arm_adjust_cost that
11982 are common across all cores. */
11983 static int
11984 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11986 rtx i_pat, d_pat;
11988 /* When generating Thumb-1 code, we want to place flag-setting operations
11989 close to a conditional branch which depends on them, so that we can
11990 omit the comparison. */
11991 if (TARGET_THUMB1
11992 && REG_NOTE_KIND (link) == 0
11993 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11994 && recog_memoized (dep) >= 0
11995 && get_attr_conds (dep) == CONDS_SET)
11996 return 0;
11998 if (current_tune->sched_adjust_cost != NULL)
12000 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12001 return cost;
12004 /* XXX Is this strictly true? */
12005 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12006 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12007 return 0;
12009 /* Call insns don't incur a stall, even if they follow a load. */
12010 if (REG_NOTE_KIND (link) == 0
12011 && CALL_P (insn))
12012 return 1;
12014 if ((i_pat = single_set (insn)) != NULL
12015 && MEM_P (SET_SRC (i_pat))
12016 && (d_pat = single_set (dep)) != NULL
12017 && MEM_P (SET_DEST (d_pat)))
12019 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12020 /* This is a load after a store, there is no conflict if the load reads
12021 from a cached area. Assume that loads from the stack, and from the
12022 constant pool are cached, and that others will miss. This is a
12023 hack. */
12025 if ((GET_CODE (src_mem) == SYMBOL_REF
12026 && CONSTANT_POOL_ADDRESS_P (src_mem))
12027 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12028 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12029 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12030 return 1;
12033 return cost;
12037 arm_max_conditional_execute (void)
12039 return max_insns_skipped;
12042 static int
12043 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12045 if (TARGET_32BIT)
12046 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12047 else
12048 return (optimize > 0) ? 2 : 0;
12051 static int
12052 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12054 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12057 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12058 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12059 sequences of non-executed instructions in IT blocks probably take the same
12060 amount of time as executed instructions (and the IT instruction itself takes
12061 space in icache). This function was experimentally determined to give good
12062 results on a popular embedded benchmark. */
12064 static int
12065 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12067 return (TARGET_32BIT && speed_p) ? 1
12068 : arm_default_branch_cost (speed_p, predictable_p);
12071 static bool fp_consts_inited = false;
12073 static REAL_VALUE_TYPE value_fp0;
12075 static void
12076 init_fp_table (void)
12078 REAL_VALUE_TYPE r;
12080 r = REAL_VALUE_ATOF ("0", DFmode);
12081 value_fp0 = r;
12082 fp_consts_inited = true;
12085 /* Return TRUE if rtx X is a valid immediate FP constant. */
12087 arm_const_double_rtx (rtx x)
12089 REAL_VALUE_TYPE r;
12091 if (!fp_consts_inited)
12092 init_fp_table ();
12094 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12095 if (REAL_VALUE_MINUS_ZERO (r))
12096 return 0;
12098 if (REAL_VALUES_EQUAL (r, value_fp0))
12099 return 1;
12101 return 0;
12104 /* VFPv3 has a fairly wide range of representable immediates, formed from
12105 "quarter-precision" floating-point values. These can be evaluated using this
12106 formula (with ^ for exponentiation):
12108 -1^s * n * 2^-r
12110 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12111 16 <= n <= 31 and 0 <= r <= 7.
12113 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12115 - A (most-significant) is the sign bit.
12116 - BCD are the exponent (encoded as r XOR 3).
12117 - EFGH are the mantissa (encoded as n - 16).
12120 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12121 fconst[sd] instruction, or -1 if X isn't suitable. */
12122 static int
12123 vfp3_const_double_index (rtx x)
12125 REAL_VALUE_TYPE r, m;
12126 int sign, exponent;
12127 unsigned HOST_WIDE_INT mantissa, mant_hi;
12128 unsigned HOST_WIDE_INT mask;
12129 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12130 bool fail;
12132 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12133 return -1;
12135 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12137 /* We can't represent these things, so detect them first. */
12138 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12139 return -1;
12141 /* Extract sign, exponent and mantissa. */
12142 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12143 r = real_value_abs (&r);
12144 exponent = REAL_EXP (&r);
12145 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12146 highest (sign) bit, with a fixed binary point at bit point_pos.
12147 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12148 bits for the mantissa, this may fail (low bits would be lost). */
12149 real_ldexp (&m, &r, point_pos - exponent);
12150 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12151 mantissa = w.elt (0);
12152 mant_hi = w.elt (1);
12154 /* If there are bits set in the low part of the mantissa, we can't
12155 represent this value. */
12156 if (mantissa != 0)
12157 return -1;
12159 /* Now make it so that mantissa contains the most-significant bits, and move
12160 the point_pos to indicate that the least-significant bits have been
12161 discarded. */
12162 point_pos -= HOST_BITS_PER_WIDE_INT;
12163 mantissa = mant_hi;
12165 /* We can permit four significant bits of mantissa only, plus a high bit
12166 which is always 1. */
12167 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12168 if ((mantissa & mask) != 0)
12169 return -1;
12171 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12172 mantissa >>= point_pos - 5;
12174 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12175 floating-point immediate zero with Neon using an integer-zero load, but
12176 that case is handled elsewhere.) */
12177 if (mantissa == 0)
12178 return -1;
12180 gcc_assert (mantissa >= 16 && mantissa <= 31);
12182 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12183 normalized significands are in the range [1, 2). (Our mantissa is shifted
12184 left 4 places at this point relative to normalized IEEE754 values). GCC
12185 internally uses [0.5, 1) (see real.c), so the exponent returned from
12186 REAL_EXP must be altered. */
12187 exponent = 5 - exponent;
12189 if (exponent < 0 || exponent > 7)
12190 return -1;
12192 /* Sign, mantissa and exponent are now in the correct form to plug into the
12193 formula described in the comment above. */
12194 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12197 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12199 vfp3_const_double_rtx (rtx x)
12201 if (!TARGET_VFP3)
12202 return 0;
12204 return vfp3_const_double_index (x) != -1;
12207 /* Recognize immediates which can be used in various Neon instructions. Legal
12208 immediates are described by the following table (for VMVN variants, the
12209 bitwise inverse of the constant shown is recognized. In either case, VMOV
12210 is output and the correct instruction to use for a given constant is chosen
12211 by the assembler). The constant shown is replicated across all elements of
12212 the destination vector.
12214 insn elems variant constant (binary)
12215 ---- ----- ------- -----------------
12216 vmov i32 0 00000000 00000000 00000000 abcdefgh
12217 vmov i32 1 00000000 00000000 abcdefgh 00000000
12218 vmov i32 2 00000000 abcdefgh 00000000 00000000
12219 vmov i32 3 abcdefgh 00000000 00000000 00000000
12220 vmov i16 4 00000000 abcdefgh
12221 vmov i16 5 abcdefgh 00000000
12222 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12223 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12224 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12225 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12226 vmvn i16 10 00000000 abcdefgh
12227 vmvn i16 11 abcdefgh 00000000
12228 vmov i32 12 00000000 00000000 abcdefgh 11111111
12229 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12230 vmov i32 14 00000000 abcdefgh 11111111 11111111
12231 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12232 vmov i8 16 abcdefgh
12233 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12234 eeeeeeee ffffffff gggggggg hhhhhhhh
12235 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12236 vmov f32 19 00000000 00000000 00000000 00000000
12238 For case 18, B = !b. Representable values are exactly those accepted by
12239 vfp3_const_double_index, but are output as floating-point numbers rather
12240 than indices.
12242 For case 19, we will change it to vmov.i32 when assembling.
12244 Variants 0-5 (inclusive) may also be used as immediates for the second
12245 operand of VORR/VBIC instructions.
12247 The INVERSE argument causes the bitwise inverse of the given operand to be
12248 recognized instead (used for recognizing legal immediates for the VAND/VORN
12249 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12250 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12251 output, rather than the real insns vbic/vorr).
12253 INVERSE makes no difference to the recognition of float vectors.
12255 The return value is the variant of immediate as shown in the above table, or
12256 -1 if the given value doesn't match any of the listed patterns.
12258 static int
12259 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12260 rtx *modconst, int *elementwidth)
12262 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12263 matches = 1; \
12264 for (i = 0; i < idx; i += (STRIDE)) \
12265 if (!(TEST)) \
12266 matches = 0; \
12267 if (matches) \
12269 immtype = (CLASS); \
12270 elsize = (ELSIZE); \
12271 break; \
12274 unsigned int i, elsize = 0, idx = 0, n_elts;
12275 unsigned int innersize;
12276 unsigned char bytes[16];
12277 int immtype = -1, matches;
12278 unsigned int invmask = inverse ? 0xff : 0;
12279 bool vector = GET_CODE (op) == CONST_VECTOR;
12281 if (vector)
12283 n_elts = CONST_VECTOR_NUNITS (op);
12284 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12286 else
12288 n_elts = 1;
12289 if (mode == VOIDmode)
12290 mode = DImode;
12291 innersize = GET_MODE_SIZE (mode);
12294 /* Vectors of float constants. */
12295 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12297 rtx el0 = CONST_VECTOR_ELT (op, 0);
12298 REAL_VALUE_TYPE r0;
12300 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12301 return -1;
12303 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12305 for (i = 1; i < n_elts; i++)
12307 rtx elt = CONST_VECTOR_ELT (op, i);
12308 REAL_VALUE_TYPE re;
12310 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12312 if (!REAL_VALUES_EQUAL (r0, re))
12313 return -1;
12316 if (modconst)
12317 *modconst = CONST_VECTOR_ELT (op, 0);
12319 if (elementwidth)
12320 *elementwidth = 0;
12322 if (el0 == CONST0_RTX (GET_MODE (el0)))
12323 return 19;
12324 else
12325 return 18;
12328 /* Splat vector constant out into a byte vector. */
12329 for (i = 0; i < n_elts; i++)
12331 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12332 unsigned HOST_WIDE_INT elpart;
12333 unsigned int part, parts;
12335 if (CONST_INT_P (el))
12337 elpart = INTVAL (el);
12338 parts = 1;
12340 else if (CONST_DOUBLE_P (el))
12342 elpart = CONST_DOUBLE_LOW (el);
12343 parts = 2;
12345 else
12346 gcc_unreachable ();
12348 for (part = 0; part < parts; part++)
12350 unsigned int byte;
12351 for (byte = 0; byte < innersize; byte++)
12353 bytes[idx++] = (elpart & 0xff) ^ invmask;
12354 elpart >>= BITS_PER_UNIT;
12356 if (CONST_DOUBLE_P (el))
12357 elpart = CONST_DOUBLE_HIGH (el);
12361 /* Sanity check. */
12362 gcc_assert (idx == GET_MODE_SIZE (mode));
12366 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12367 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12369 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12370 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12372 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12373 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12375 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12376 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12378 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12380 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12382 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12383 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12385 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12386 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12388 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12389 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12391 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12392 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12394 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12396 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12398 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12399 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12401 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12402 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12404 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12405 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12407 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12408 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12410 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12412 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12413 && bytes[i] == bytes[(i + 8) % idx]);
12415 while (0);
12417 if (immtype == -1)
12418 return -1;
12420 if (elementwidth)
12421 *elementwidth = elsize;
12423 if (modconst)
12425 unsigned HOST_WIDE_INT imm = 0;
12427 /* Un-invert bytes of recognized vector, if necessary. */
12428 if (invmask != 0)
12429 for (i = 0; i < idx; i++)
12430 bytes[i] ^= invmask;
12432 if (immtype == 17)
12434 /* FIXME: Broken on 32-bit H_W_I hosts. */
12435 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12437 for (i = 0; i < 8; i++)
12438 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12439 << (i * BITS_PER_UNIT);
12441 *modconst = GEN_INT (imm);
12443 else
12445 unsigned HOST_WIDE_INT imm = 0;
12447 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12448 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12450 *modconst = GEN_INT (imm);
12454 return immtype;
12455 #undef CHECK
12458 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12459 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12460 float elements), and a modified constant (whatever should be output for a
12461 VMOV) in *MODCONST. */
12464 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12465 rtx *modconst, int *elementwidth)
12467 rtx tmpconst;
12468 int tmpwidth;
12469 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12471 if (retval == -1)
12472 return 0;
12474 if (modconst)
12475 *modconst = tmpconst;
12477 if (elementwidth)
12478 *elementwidth = tmpwidth;
12480 return 1;
12483 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12484 the immediate is valid, write a constant suitable for using as an operand
12485 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12486 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12489 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12490 rtx *modconst, int *elementwidth)
12492 rtx tmpconst;
12493 int tmpwidth;
12494 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12496 if (retval < 0 || retval > 5)
12497 return 0;
12499 if (modconst)
12500 *modconst = tmpconst;
12502 if (elementwidth)
12503 *elementwidth = tmpwidth;
12505 return 1;
12508 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12509 the immediate is valid, write a constant suitable for using as an operand
12510 to VSHR/VSHL to *MODCONST and the corresponding element width to
12511 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12512 because they have different limitations. */
12515 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12516 rtx *modconst, int *elementwidth,
12517 bool isleftshift)
12519 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12520 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12521 unsigned HOST_WIDE_INT last_elt = 0;
12522 unsigned HOST_WIDE_INT maxshift;
12524 /* Split vector constant out into a byte vector. */
12525 for (i = 0; i < n_elts; i++)
12527 rtx el = CONST_VECTOR_ELT (op, i);
12528 unsigned HOST_WIDE_INT elpart;
12530 if (CONST_INT_P (el))
12531 elpart = INTVAL (el);
12532 else if (CONST_DOUBLE_P (el))
12533 return 0;
12534 else
12535 gcc_unreachable ();
12537 if (i != 0 && elpart != last_elt)
12538 return 0;
12540 last_elt = elpart;
12543 /* Shift less than element size. */
12544 maxshift = innersize * 8;
12546 if (isleftshift)
12548 /* Left shift immediate value can be from 0 to <size>-1. */
12549 if (last_elt >= maxshift)
12550 return 0;
12552 else
12554 /* Right shift immediate value can be from 1 to <size>. */
12555 if (last_elt == 0 || last_elt > maxshift)
12556 return 0;
12559 if (elementwidth)
12560 *elementwidth = innersize * 8;
12562 if (modconst)
12563 *modconst = CONST_VECTOR_ELT (op, 0);
12565 return 1;
12568 /* Return a string suitable for output of Neon immediate logic operation
12569 MNEM. */
12571 char *
12572 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12573 int inverse, int quad)
12575 int width, is_valid;
12576 static char templ[40];
12578 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12580 gcc_assert (is_valid != 0);
12582 if (quad)
12583 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12584 else
12585 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12587 return templ;
12590 /* Return a string suitable for output of Neon immediate shift operation
12591 (VSHR or VSHL) MNEM. */
12593 char *
12594 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12595 machine_mode mode, int quad,
12596 bool isleftshift)
12598 int width, is_valid;
12599 static char templ[40];
12601 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12602 gcc_assert (is_valid != 0);
12604 if (quad)
12605 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12606 else
12607 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12609 return templ;
12612 /* Output a sequence of pairwise operations to implement a reduction.
12613 NOTE: We do "too much work" here, because pairwise operations work on two
12614 registers-worth of operands in one go. Unfortunately we can't exploit those
12615 extra calculations to do the full operation in fewer steps, I don't think.
12616 Although all vector elements of the result but the first are ignored, we
12617 actually calculate the same result in each of the elements. An alternative
12618 such as initially loading a vector with zero to use as each of the second
12619 operands would use up an additional register and take an extra instruction,
12620 for no particular gain. */
12622 void
12623 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12624 rtx (*reduc) (rtx, rtx, rtx))
12626 machine_mode inner = GET_MODE_INNER (mode);
12627 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12628 rtx tmpsum = op1;
12630 for (i = parts / 2; i >= 1; i /= 2)
12632 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12633 emit_insn (reduc (dest, tmpsum, tmpsum));
12634 tmpsum = dest;
12638 /* If VALS is a vector constant that can be loaded into a register
12639 using VDUP, generate instructions to do so and return an RTX to
12640 assign to the register. Otherwise return NULL_RTX. */
12642 static rtx
12643 neon_vdup_constant (rtx vals)
12645 machine_mode mode = GET_MODE (vals);
12646 machine_mode inner_mode = GET_MODE_INNER (mode);
12647 int n_elts = GET_MODE_NUNITS (mode);
12648 bool all_same = true;
12649 rtx x;
12650 int i;
12652 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12653 return NULL_RTX;
12655 for (i = 0; i < n_elts; ++i)
12657 x = XVECEXP (vals, 0, i);
12658 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12659 all_same = false;
12662 if (!all_same)
12663 /* The elements are not all the same. We could handle repeating
12664 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12665 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12666 vdup.i16). */
12667 return NULL_RTX;
12669 /* We can load this constant by using VDUP and a constant in a
12670 single ARM register. This will be cheaper than a vector
12671 load. */
12673 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12674 return gen_rtx_VEC_DUPLICATE (mode, x);
12677 /* Generate code to load VALS, which is a PARALLEL containing only
12678 constants (for vec_init) or CONST_VECTOR, efficiently into a
12679 register. Returns an RTX to copy into the register, or NULL_RTX
12680 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12683 neon_make_constant (rtx vals)
12685 machine_mode mode = GET_MODE (vals);
12686 rtx target;
12687 rtx const_vec = NULL_RTX;
12688 int n_elts = GET_MODE_NUNITS (mode);
12689 int n_const = 0;
12690 int i;
12692 if (GET_CODE (vals) == CONST_VECTOR)
12693 const_vec = vals;
12694 else if (GET_CODE (vals) == PARALLEL)
12696 /* A CONST_VECTOR must contain only CONST_INTs and
12697 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12698 Only store valid constants in a CONST_VECTOR. */
12699 for (i = 0; i < n_elts; ++i)
12701 rtx x = XVECEXP (vals, 0, i);
12702 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12703 n_const++;
12705 if (n_const == n_elts)
12706 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12708 else
12709 gcc_unreachable ();
12711 if (const_vec != NULL
12712 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12713 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12714 return const_vec;
12715 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12716 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12717 pipeline cycle; creating the constant takes one or two ARM
12718 pipeline cycles. */
12719 return target;
12720 else if (const_vec != NULL_RTX)
12721 /* Load from constant pool. On Cortex-A8 this takes two cycles
12722 (for either double or quad vectors). We can not take advantage
12723 of single-cycle VLD1 because we need a PC-relative addressing
12724 mode. */
12725 return const_vec;
12726 else
12727 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12728 We can not construct an initializer. */
12729 return NULL_RTX;
12732 /* Initialize vector TARGET to VALS. */
12734 void
12735 neon_expand_vector_init (rtx target, rtx vals)
12737 machine_mode mode = GET_MODE (target);
12738 machine_mode inner_mode = GET_MODE_INNER (mode);
12739 int n_elts = GET_MODE_NUNITS (mode);
12740 int n_var = 0, one_var = -1;
12741 bool all_same = true;
12742 rtx x, mem;
12743 int i;
12745 for (i = 0; i < n_elts; ++i)
12747 x = XVECEXP (vals, 0, i);
12748 if (!CONSTANT_P (x))
12749 ++n_var, one_var = i;
12751 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12752 all_same = false;
12755 if (n_var == 0)
12757 rtx constant = neon_make_constant (vals);
12758 if (constant != NULL_RTX)
12760 emit_move_insn (target, constant);
12761 return;
12765 /* Splat a single non-constant element if we can. */
12766 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12768 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12769 emit_insn (gen_rtx_SET (VOIDmode, target,
12770 gen_rtx_VEC_DUPLICATE (mode, x)));
12771 return;
12774 /* One field is non-constant. Load constant then overwrite varying
12775 field. This is more efficient than using the stack. */
12776 if (n_var == 1)
12778 rtx copy = copy_rtx (vals);
12779 rtx index = GEN_INT (one_var);
12781 /* Load constant part of vector, substitute neighboring value for
12782 varying element. */
12783 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12784 neon_expand_vector_init (target, copy);
12786 /* Insert variable. */
12787 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12788 switch (mode)
12790 case V8QImode:
12791 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12792 break;
12793 case V16QImode:
12794 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12795 break;
12796 case V4HImode:
12797 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12798 break;
12799 case V8HImode:
12800 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12801 break;
12802 case V2SImode:
12803 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12804 break;
12805 case V4SImode:
12806 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12807 break;
12808 case V2SFmode:
12809 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12810 break;
12811 case V4SFmode:
12812 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12813 break;
12814 case V2DImode:
12815 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12816 break;
12817 default:
12818 gcc_unreachable ();
12820 return;
12823 /* Construct the vector in memory one field at a time
12824 and load the whole vector. */
12825 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12826 for (i = 0; i < n_elts; i++)
12827 emit_move_insn (adjust_address_nv (mem, inner_mode,
12828 i * GET_MODE_SIZE (inner_mode)),
12829 XVECEXP (vals, 0, i));
12830 emit_move_insn (target, mem);
12833 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12834 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12835 reported source locations are bogus. */
12837 static void
12838 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12839 const char *err)
12841 HOST_WIDE_INT lane;
12843 gcc_assert (CONST_INT_P (operand));
12845 lane = INTVAL (operand);
12847 if (lane < low || lane >= high)
12848 error (err);
12851 /* Bounds-check lanes. */
12853 void
12854 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12856 bounds_check (operand, low, high, "lane out of range");
12859 /* Bounds-check constants. */
12861 void
12862 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12864 bounds_check (operand, low, high, "constant out of range");
12867 HOST_WIDE_INT
12868 neon_element_bits (machine_mode mode)
12870 if (mode == DImode)
12871 return GET_MODE_BITSIZE (mode);
12872 else
12873 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12877 /* Predicates for `match_operand' and `match_operator'. */
12879 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12880 WB is true if full writeback address modes are allowed and is false
12881 if limited writeback address modes (POST_INC and PRE_DEC) are
12882 allowed. */
12885 arm_coproc_mem_operand (rtx op, bool wb)
12887 rtx ind;
12889 /* Reject eliminable registers. */
12890 if (! (reload_in_progress || reload_completed || lra_in_progress)
12891 && ( reg_mentioned_p (frame_pointer_rtx, op)
12892 || reg_mentioned_p (arg_pointer_rtx, op)
12893 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12894 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12895 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12896 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12897 return FALSE;
12899 /* Constants are converted into offsets from labels. */
12900 if (!MEM_P (op))
12901 return FALSE;
12903 ind = XEXP (op, 0);
12905 if (reload_completed
12906 && (GET_CODE (ind) == LABEL_REF
12907 || (GET_CODE (ind) == CONST
12908 && GET_CODE (XEXP (ind, 0)) == PLUS
12909 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12910 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12911 return TRUE;
12913 /* Match: (mem (reg)). */
12914 if (REG_P (ind))
12915 return arm_address_register_rtx_p (ind, 0);
12917 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12918 acceptable in any case (subject to verification by
12919 arm_address_register_rtx_p). We need WB to be true to accept
12920 PRE_INC and POST_DEC. */
12921 if (GET_CODE (ind) == POST_INC
12922 || GET_CODE (ind) == PRE_DEC
12923 || (wb
12924 && (GET_CODE (ind) == PRE_INC
12925 || GET_CODE (ind) == POST_DEC)))
12926 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12928 if (wb
12929 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12930 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12931 && GET_CODE (XEXP (ind, 1)) == PLUS
12932 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12933 ind = XEXP (ind, 1);
12935 /* Match:
12936 (plus (reg)
12937 (const)). */
12938 if (GET_CODE (ind) == PLUS
12939 && REG_P (XEXP (ind, 0))
12940 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12941 && CONST_INT_P (XEXP (ind, 1))
12942 && INTVAL (XEXP (ind, 1)) > -1024
12943 && INTVAL (XEXP (ind, 1)) < 1024
12944 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12945 return TRUE;
12947 return FALSE;
12950 /* Return TRUE if OP is a memory operand which we can load or store a vector
12951 to/from. TYPE is one of the following values:
12952 0 - Vector load/stor (vldr)
12953 1 - Core registers (ldm)
12954 2 - Element/structure loads (vld1)
12957 neon_vector_mem_operand (rtx op, int type, bool strict)
12959 rtx ind;
12961 /* Reject eliminable registers. */
12962 if (! (reload_in_progress || reload_completed)
12963 && ( reg_mentioned_p (frame_pointer_rtx, op)
12964 || reg_mentioned_p (arg_pointer_rtx, op)
12965 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12966 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12967 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12968 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12969 return !strict;
12971 /* Constants are converted into offsets from labels. */
12972 if (!MEM_P (op))
12973 return FALSE;
12975 ind = XEXP (op, 0);
12977 if (reload_completed
12978 && (GET_CODE (ind) == LABEL_REF
12979 || (GET_CODE (ind) == CONST
12980 && GET_CODE (XEXP (ind, 0)) == PLUS
12981 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12982 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12983 return TRUE;
12985 /* Match: (mem (reg)). */
12986 if (REG_P (ind))
12987 return arm_address_register_rtx_p (ind, 0);
12989 /* Allow post-increment with Neon registers. */
12990 if ((type != 1 && GET_CODE (ind) == POST_INC)
12991 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12992 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12994 /* Allow post-increment by register for VLDn */
12995 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12996 && GET_CODE (XEXP (ind, 1)) == PLUS
12997 && REG_P (XEXP (XEXP (ind, 1), 1)))
12998 return true;
13000 /* Match:
13001 (plus (reg)
13002 (const)). */
13003 if (type == 0
13004 && GET_CODE (ind) == PLUS
13005 && REG_P (XEXP (ind, 0))
13006 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13007 && CONST_INT_P (XEXP (ind, 1))
13008 && INTVAL (XEXP (ind, 1)) > -1024
13009 /* For quad modes, we restrict the constant offset to be slightly less
13010 than what the instruction format permits. We have no such constraint
13011 on double mode offsets. (This must match arm_legitimate_index_p.) */
13012 && (INTVAL (XEXP (ind, 1))
13013 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13014 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13015 return TRUE;
13017 return FALSE;
13020 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13021 type. */
13023 neon_struct_mem_operand (rtx op)
13025 rtx ind;
13027 /* Reject eliminable registers. */
13028 if (! (reload_in_progress || reload_completed)
13029 && ( reg_mentioned_p (frame_pointer_rtx, op)
13030 || reg_mentioned_p (arg_pointer_rtx, op)
13031 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13032 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13033 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13034 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13035 return FALSE;
13037 /* Constants are converted into offsets from labels. */
13038 if (!MEM_P (op))
13039 return FALSE;
13041 ind = XEXP (op, 0);
13043 if (reload_completed
13044 && (GET_CODE (ind) == LABEL_REF
13045 || (GET_CODE (ind) == CONST
13046 && GET_CODE (XEXP (ind, 0)) == PLUS
13047 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13048 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13049 return TRUE;
13051 /* Match: (mem (reg)). */
13052 if (REG_P (ind))
13053 return arm_address_register_rtx_p (ind, 0);
13055 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13056 if (GET_CODE (ind) == POST_INC
13057 || GET_CODE (ind) == PRE_DEC)
13058 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13060 return FALSE;
13063 /* Return true if X is a register that will be eliminated later on. */
13065 arm_eliminable_register (rtx x)
13067 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13068 || REGNO (x) == ARG_POINTER_REGNUM
13069 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13070 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13073 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13074 coprocessor registers. Otherwise return NO_REGS. */
13076 enum reg_class
13077 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13079 if (mode == HFmode)
13081 if (!TARGET_NEON_FP16)
13082 return GENERAL_REGS;
13083 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13084 return NO_REGS;
13085 return GENERAL_REGS;
13088 /* The neon move patterns handle all legitimate vector and struct
13089 addresses. */
13090 if (TARGET_NEON
13091 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13092 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13093 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13094 || VALID_NEON_STRUCT_MODE (mode)))
13095 return NO_REGS;
13097 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13098 return NO_REGS;
13100 return GENERAL_REGS;
13103 /* Values which must be returned in the most-significant end of the return
13104 register. */
13106 static bool
13107 arm_return_in_msb (const_tree valtype)
13109 return (TARGET_AAPCS_BASED
13110 && BYTES_BIG_ENDIAN
13111 && (AGGREGATE_TYPE_P (valtype)
13112 || TREE_CODE (valtype) == COMPLEX_TYPE
13113 || FIXED_POINT_TYPE_P (valtype)));
13116 /* Return TRUE if X references a SYMBOL_REF. */
13118 symbol_mentioned_p (rtx x)
13120 const char * fmt;
13121 int i;
13123 if (GET_CODE (x) == SYMBOL_REF)
13124 return 1;
13126 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13127 are constant offsets, not symbols. */
13128 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13129 return 0;
13131 fmt = GET_RTX_FORMAT (GET_CODE (x));
13133 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13135 if (fmt[i] == 'E')
13137 int j;
13139 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13140 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13141 return 1;
13143 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13144 return 1;
13147 return 0;
13150 /* Return TRUE if X references a LABEL_REF. */
13152 label_mentioned_p (rtx x)
13154 const char * fmt;
13155 int i;
13157 if (GET_CODE (x) == LABEL_REF)
13158 return 1;
13160 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13161 instruction, but they are constant offsets, not symbols. */
13162 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13163 return 0;
13165 fmt = GET_RTX_FORMAT (GET_CODE (x));
13166 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13168 if (fmt[i] == 'E')
13170 int j;
13172 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13173 if (label_mentioned_p (XVECEXP (x, i, j)))
13174 return 1;
13176 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13177 return 1;
13180 return 0;
13184 tls_mentioned_p (rtx x)
13186 switch (GET_CODE (x))
13188 case CONST:
13189 return tls_mentioned_p (XEXP (x, 0));
13191 case UNSPEC:
13192 if (XINT (x, 1) == UNSPEC_TLS)
13193 return 1;
13195 default:
13196 return 0;
13200 /* Must not copy any rtx that uses a pc-relative address. */
13202 static bool
13203 arm_cannot_copy_insn_p (rtx_insn *insn)
13205 /* The tls call insn cannot be copied, as it is paired with a data
13206 word. */
13207 if (recog_memoized (insn) == CODE_FOR_tlscall)
13208 return true;
13210 subrtx_iterator::array_type array;
13211 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13213 const_rtx x = *iter;
13214 if (GET_CODE (x) == UNSPEC
13215 && (XINT (x, 1) == UNSPEC_PIC_BASE
13216 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13217 return true;
13219 return false;
13222 enum rtx_code
13223 minmax_code (rtx x)
13225 enum rtx_code code = GET_CODE (x);
13227 switch (code)
13229 case SMAX:
13230 return GE;
13231 case SMIN:
13232 return LE;
13233 case UMIN:
13234 return LEU;
13235 case UMAX:
13236 return GEU;
13237 default:
13238 gcc_unreachable ();
13242 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13244 bool
13245 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13246 int *mask, bool *signed_sat)
13248 /* The high bound must be a power of two minus one. */
13249 int log = exact_log2 (INTVAL (hi_bound) + 1);
13250 if (log == -1)
13251 return false;
13253 /* The low bound is either zero (for usat) or one less than the
13254 negation of the high bound (for ssat). */
13255 if (INTVAL (lo_bound) == 0)
13257 if (mask)
13258 *mask = log;
13259 if (signed_sat)
13260 *signed_sat = false;
13262 return true;
13265 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13267 if (mask)
13268 *mask = log + 1;
13269 if (signed_sat)
13270 *signed_sat = true;
13272 return true;
13275 return false;
13278 /* Return 1 if memory locations are adjacent. */
13280 adjacent_mem_locations (rtx a, rtx b)
13282 /* We don't guarantee to preserve the order of these memory refs. */
13283 if (volatile_refs_p (a) || volatile_refs_p (b))
13284 return 0;
13286 if ((REG_P (XEXP (a, 0))
13287 || (GET_CODE (XEXP (a, 0)) == PLUS
13288 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13289 && (REG_P (XEXP (b, 0))
13290 || (GET_CODE (XEXP (b, 0)) == PLUS
13291 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13293 HOST_WIDE_INT val0 = 0, val1 = 0;
13294 rtx reg0, reg1;
13295 int val_diff;
13297 if (GET_CODE (XEXP (a, 0)) == PLUS)
13299 reg0 = XEXP (XEXP (a, 0), 0);
13300 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13302 else
13303 reg0 = XEXP (a, 0);
13305 if (GET_CODE (XEXP (b, 0)) == PLUS)
13307 reg1 = XEXP (XEXP (b, 0), 0);
13308 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13310 else
13311 reg1 = XEXP (b, 0);
13313 /* Don't accept any offset that will require multiple
13314 instructions to handle, since this would cause the
13315 arith_adjacentmem pattern to output an overlong sequence. */
13316 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13317 return 0;
13319 /* Don't allow an eliminable register: register elimination can make
13320 the offset too large. */
13321 if (arm_eliminable_register (reg0))
13322 return 0;
13324 val_diff = val1 - val0;
13326 if (arm_ld_sched)
13328 /* If the target has load delay slots, then there's no benefit
13329 to using an ldm instruction unless the offset is zero and
13330 we are optimizing for size. */
13331 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13332 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13333 && (val_diff == 4 || val_diff == -4));
13336 return ((REGNO (reg0) == REGNO (reg1))
13337 && (val_diff == 4 || val_diff == -4));
13340 return 0;
13343 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13344 for load operations, false for store operations. CONSECUTIVE is true
13345 if the register numbers in the operation must be consecutive in the register
13346 bank. RETURN_PC is true if value is to be loaded in PC.
13347 The pattern we are trying to match for load is:
13348 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13349 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13352 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13354 where
13355 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13356 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13357 3. If consecutive is TRUE, then for kth register being loaded,
13358 REGNO (R_dk) = REGNO (R_d0) + k.
13359 The pattern for store is similar. */
13360 bool
13361 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13362 bool consecutive, bool return_pc)
13364 HOST_WIDE_INT count = XVECLEN (op, 0);
13365 rtx reg, mem, addr;
13366 unsigned regno;
13367 unsigned first_regno;
13368 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13369 rtx elt;
13370 bool addr_reg_in_reglist = false;
13371 bool update = false;
13372 int reg_increment;
13373 int offset_adj;
13374 int regs_per_val;
13376 /* If not in SImode, then registers must be consecutive
13377 (e.g., VLDM instructions for DFmode). */
13378 gcc_assert ((mode == SImode) || consecutive);
13379 /* Setting return_pc for stores is illegal. */
13380 gcc_assert (!return_pc || load);
13382 /* Set up the increments and the regs per val based on the mode. */
13383 reg_increment = GET_MODE_SIZE (mode);
13384 regs_per_val = reg_increment / 4;
13385 offset_adj = return_pc ? 1 : 0;
13387 if (count <= 1
13388 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13389 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13390 return false;
13392 /* Check if this is a write-back. */
13393 elt = XVECEXP (op, 0, offset_adj);
13394 if (GET_CODE (SET_SRC (elt)) == PLUS)
13396 i++;
13397 base = 1;
13398 update = true;
13400 /* The offset adjustment must be the number of registers being
13401 popped times the size of a single register. */
13402 if (!REG_P (SET_DEST (elt))
13403 || !REG_P (XEXP (SET_SRC (elt), 0))
13404 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13405 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13406 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13407 ((count - 1 - offset_adj) * reg_increment))
13408 return false;
13411 i = i + offset_adj;
13412 base = base + offset_adj;
13413 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13414 success depends on the type: VLDM can do just one reg,
13415 LDM must do at least two. */
13416 if ((count <= i) && (mode == SImode))
13417 return false;
13419 elt = XVECEXP (op, 0, i - 1);
13420 if (GET_CODE (elt) != SET)
13421 return false;
13423 if (load)
13425 reg = SET_DEST (elt);
13426 mem = SET_SRC (elt);
13428 else
13430 reg = SET_SRC (elt);
13431 mem = SET_DEST (elt);
13434 if (!REG_P (reg) || !MEM_P (mem))
13435 return false;
13437 regno = REGNO (reg);
13438 first_regno = regno;
13439 addr = XEXP (mem, 0);
13440 if (GET_CODE (addr) == PLUS)
13442 if (!CONST_INT_P (XEXP (addr, 1)))
13443 return false;
13445 offset = INTVAL (XEXP (addr, 1));
13446 addr = XEXP (addr, 0);
13449 if (!REG_P (addr))
13450 return false;
13452 /* Don't allow SP to be loaded unless it is also the base register. It
13453 guarantees that SP is reset correctly when an LDM instruction
13454 is interrupted. Otherwise, we might end up with a corrupt stack. */
13455 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13456 return false;
13458 for (; i < count; i++)
13460 elt = XVECEXP (op, 0, i);
13461 if (GET_CODE (elt) != SET)
13462 return false;
13464 if (load)
13466 reg = SET_DEST (elt);
13467 mem = SET_SRC (elt);
13469 else
13471 reg = SET_SRC (elt);
13472 mem = SET_DEST (elt);
13475 if (!REG_P (reg)
13476 || GET_MODE (reg) != mode
13477 || REGNO (reg) <= regno
13478 || (consecutive
13479 && (REGNO (reg) !=
13480 (unsigned int) (first_regno + regs_per_val * (i - base))))
13481 /* Don't allow SP to be loaded unless it is also the base register. It
13482 guarantees that SP is reset correctly when an LDM instruction
13483 is interrupted. Otherwise, we might end up with a corrupt stack. */
13484 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13485 || !MEM_P (mem)
13486 || GET_MODE (mem) != mode
13487 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13488 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13489 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13490 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13491 offset + (i - base) * reg_increment))
13492 && (!REG_P (XEXP (mem, 0))
13493 || offset + (i - base) * reg_increment != 0)))
13494 return false;
13496 regno = REGNO (reg);
13497 if (regno == REGNO (addr))
13498 addr_reg_in_reglist = true;
13501 if (load)
13503 if (update && addr_reg_in_reglist)
13504 return false;
13506 /* For Thumb-1, address register is always modified - either by write-back
13507 or by explicit load. If the pattern does not describe an update,
13508 then the address register must be in the list of loaded registers. */
13509 if (TARGET_THUMB1)
13510 return update || addr_reg_in_reglist;
13513 return true;
13516 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13517 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13518 instruction. ADD_OFFSET is nonzero if the base address register needs
13519 to be modified with an add instruction before we can use it. */
13521 static bool
13522 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13523 int nops, HOST_WIDE_INT add_offset)
13525 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13526 if the offset isn't small enough. The reason 2 ldrs are faster
13527 is because these ARMs are able to do more than one cache access
13528 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13529 whilst the ARM8 has a double bandwidth cache. This means that
13530 these cores can do both an instruction fetch and a data fetch in
13531 a single cycle, so the trick of calculating the address into a
13532 scratch register (one of the result regs) and then doing a load
13533 multiple actually becomes slower (and no smaller in code size).
13534 That is the transformation
13536 ldr rd1, [rbase + offset]
13537 ldr rd2, [rbase + offset + 4]
13541 add rd1, rbase, offset
13542 ldmia rd1, {rd1, rd2}
13544 produces worse code -- '3 cycles + any stalls on rd2' instead of
13545 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13546 access per cycle, the first sequence could never complete in less
13547 than 6 cycles, whereas the ldm sequence would only take 5 and
13548 would make better use of sequential accesses if not hitting the
13549 cache.
13551 We cheat here and test 'arm_ld_sched' which we currently know to
13552 only be true for the ARM8, ARM9 and StrongARM. If this ever
13553 changes, then the test below needs to be reworked. */
13554 if (nops == 2 && arm_ld_sched && add_offset != 0)
13555 return false;
13557 /* XScale has load-store double instructions, but they have stricter
13558 alignment requirements than load-store multiple, so we cannot
13559 use them.
13561 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13562 the pipeline until completion.
13564 NREGS CYCLES
13570 An ldr instruction takes 1-3 cycles, but does not block the
13571 pipeline.
13573 NREGS CYCLES
13574 1 1-3
13575 2 2-6
13576 3 3-9
13577 4 4-12
13579 Best case ldr will always win. However, the more ldr instructions
13580 we issue, the less likely we are to be able to schedule them well.
13581 Using ldr instructions also increases code size.
13583 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13584 for counts of 3 or 4 regs. */
13585 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13586 return false;
13587 return true;
13590 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13591 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13592 an array ORDER which describes the sequence to use when accessing the
13593 offsets that produces an ascending order. In this sequence, each
13594 offset must be larger by exactly 4 than the previous one. ORDER[0]
13595 must have been filled in with the lowest offset by the caller.
13596 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13597 we use to verify that ORDER produces an ascending order of registers.
13598 Return true if it was possible to construct such an order, false if
13599 not. */
13601 static bool
13602 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13603 int *unsorted_regs)
13605 int i;
13606 for (i = 1; i < nops; i++)
13608 int j;
13610 order[i] = order[i - 1];
13611 for (j = 0; j < nops; j++)
13612 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13614 /* We must find exactly one offset that is higher than the
13615 previous one by 4. */
13616 if (order[i] != order[i - 1])
13617 return false;
13618 order[i] = j;
13620 if (order[i] == order[i - 1])
13621 return false;
13622 /* The register numbers must be ascending. */
13623 if (unsorted_regs != NULL
13624 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13625 return false;
13627 return true;
13630 /* Used to determine in a peephole whether a sequence of load
13631 instructions can be changed into a load-multiple instruction.
13632 NOPS is the number of separate load instructions we are examining. The
13633 first NOPS entries in OPERANDS are the destination registers, the
13634 next NOPS entries are memory operands. If this function is
13635 successful, *BASE is set to the common base register of the memory
13636 accesses; *LOAD_OFFSET is set to the first memory location's offset
13637 from that base register.
13638 REGS is an array filled in with the destination register numbers.
13639 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13640 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13641 the sequence of registers in REGS matches the loads from ascending memory
13642 locations, and the function verifies that the register numbers are
13643 themselves ascending. If CHECK_REGS is false, the register numbers
13644 are stored in the order they are found in the operands. */
13645 static int
13646 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13647 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13649 int unsorted_regs[MAX_LDM_STM_OPS];
13650 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13651 int order[MAX_LDM_STM_OPS];
13652 rtx base_reg_rtx = NULL;
13653 int base_reg = -1;
13654 int i, ldm_case;
13656 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13657 easily extended if required. */
13658 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13660 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13662 /* Loop over the operands and check that the memory references are
13663 suitable (i.e. immediate offsets from the same base register). At
13664 the same time, extract the target register, and the memory
13665 offsets. */
13666 for (i = 0; i < nops; i++)
13668 rtx reg;
13669 rtx offset;
13671 /* Convert a subreg of a mem into the mem itself. */
13672 if (GET_CODE (operands[nops + i]) == SUBREG)
13673 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13675 gcc_assert (MEM_P (operands[nops + i]));
13677 /* Don't reorder volatile memory references; it doesn't seem worth
13678 looking for the case where the order is ok anyway. */
13679 if (MEM_VOLATILE_P (operands[nops + i]))
13680 return 0;
13682 offset = const0_rtx;
13684 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13685 || (GET_CODE (reg) == SUBREG
13686 && REG_P (reg = SUBREG_REG (reg))))
13687 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13688 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13689 || (GET_CODE (reg) == SUBREG
13690 && REG_P (reg = SUBREG_REG (reg))))
13691 && (CONST_INT_P (offset
13692 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13694 if (i == 0)
13696 base_reg = REGNO (reg);
13697 base_reg_rtx = reg;
13698 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13699 return 0;
13701 else if (base_reg != (int) REGNO (reg))
13702 /* Not addressed from the same base register. */
13703 return 0;
13705 unsorted_regs[i] = (REG_P (operands[i])
13706 ? REGNO (operands[i])
13707 : REGNO (SUBREG_REG (operands[i])));
13709 /* If it isn't an integer register, or if it overwrites the
13710 base register but isn't the last insn in the list, then
13711 we can't do this. */
13712 if (unsorted_regs[i] < 0
13713 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13714 || unsorted_regs[i] > 14
13715 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13716 return 0;
13718 /* Don't allow SP to be loaded unless it is also the base
13719 register. It guarantees that SP is reset correctly when
13720 an LDM instruction is interrupted. Otherwise, we might
13721 end up with a corrupt stack. */
13722 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13723 return 0;
13725 unsorted_offsets[i] = INTVAL (offset);
13726 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13727 order[0] = i;
13729 else
13730 /* Not a suitable memory address. */
13731 return 0;
13734 /* All the useful information has now been extracted from the
13735 operands into unsorted_regs and unsorted_offsets; additionally,
13736 order[0] has been set to the lowest offset in the list. Sort
13737 the offsets into order, verifying that they are adjacent, and
13738 check that the register numbers are ascending. */
13739 if (!compute_offset_order (nops, unsorted_offsets, order,
13740 check_regs ? unsorted_regs : NULL))
13741 return 0;
13743 if (saved_order)
13744 memcpy (saved_order, order, sizeof order);
13746 if (base)
13748 *base = base_reg;
13750 for (i = 0; i < nops; i++)
13751 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13753 *load_offset = unsorted_offsets[order[0]];
13756 if (TARGET_THUMB1
13757 && !peep2_reg_dead_p (nops, base_reg_rtx))
13758 return 0;
13760 if (unsorted_offsets[order[0]] == 0)
13761 ldm_case = 1; /* ldmia */
13762 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13763 ldm_case = 2; /* ldmib */
13764 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13765 ldm_case = 3; /* ldmda */
13766 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13767 ldm_case = 4; /* ldmdb */
13768 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13769 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13770 ldm_case = 5;
13771 else
13772 return 0;
13774 if (!multiple_operation_profitable_p (false, nops,
13775 ldm_case == 5
13776 ? unsorted_offsets[order[0]] : 0))
13777 return 0;
13779 return ldm_case;
13782 /* Used to determine in a peephole whether a sequence of store instructions can
13783 be changed into a store-multiple instruction.
13784 NOPS is the number of separate store instructions we are examining.
13785 NOPS_TOTAL is the total number of instructions recognized by the peephole
13786 pattern.
13787 The first NOPS entries in OPERANDS are the source registers, the next
13788 NOPS entries are memory operands. If this function is successful, *BASE is
13789 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13790 to the first memory location's offset from that base register. REGS is an
13791 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13792 likewise filled with the corresponding rtx's.
13793 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13794 numbers to an ascending order of stores.
13795 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13796 from ascending memory locations, and the function verifies that the register
13797 numbers are themselves ascending. If CHECK_REGS is false, the register
13798 numbers are stored in the order they are found in the operands. */
13799 static int
13800 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13801 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13802 HOST_WIDE_INT *load_offset, bool check_regs)
13804 int unsorted_regs[MAX_LDM_STM_OPS];
13805 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13806 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13807 int order[MAX_LDM_STM_OPS];
13808 int base_reg = -1;
13809 rtx base_reg_rtx = NULL;
13810 int i, stm_case;
13812 /* Write back of base register is currently only supported for Thumb 1. */
13813 int base_writeback = TARGET_THUMB1;
13815 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13816 easily extended if required. */
13817 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13819 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13821 /* Loop over the operands and check that the memory references are
13822 suitable (i.e. immediate offsets from the same base register). At
13823 the same time, extract the target register, and the memory
13824 offsets. */
13825 for (i = 0; i < nops; i++)
13827 rtx reg;
13828 rtx offset;
13830 /* Convert a subreg of a mem into the mem itself. */
13831 if (GET_CODE (operands[nops + i]) == SUBREG)
13832 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13834 gcc_assert (MEM_P (operands[nops + i]));
13836 /* Don't reorder volatile memory references; it doesn't seem worth
13837 looking for the case where the order is ok anyway. */
13838 if (MEM_VOLATILE_P (operands[nops + i]))
13839 return 0;
13841 offset = const0_rtx;
13843 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13844 || (GET_CODE (reg) == SUBREG
13845 && REG_P (reg = SUBREG_REG (reg))))
13846 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13847 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13848 || (GET_CODE (reg) == SUBREG
13849 && REG_P (reg = SUBREG_REG (reg))))
13850 && (CONST_INT_P (offset
13851 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13853 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13854 ? operands[i] : SUBREG_REG (operands[i]));
13855 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13857 if (i == 0)
13859 base_reg = REGNO (reg);
13860 base_reg_rtx = reg;
13861 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13862 return 0;
13864 else if (base_reg != (int) REGNO (reg))
13865 /* Not addressed from the same base register. */
13866 return 0;
13868 /* If it isn't an integer register, then we can't do this. */
13869 if (unsorted_regs[i] < 0
13870 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13871 /* The effects are unpredictable if the base register is
13872 both updated and stored. */
13873 || (base_writeback && unsorted_regs[i] == base_reg)
13874 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13875 || unsorted_regs[i] > 14)
13876 return 0;
13878 unsorted_offsets[i] = INTVAL (offset);
13879 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13880 order[0] = i;
13882 else
13883 /* Not a suitable memory address. */
13884 return 0;
13887 /* All the useful information has now been extracted from the
13888 operands into unsorted_regs and unsorted_offsets; additionally,
13889 order[0] has been set to the lowest offset in the list. Sort
13890 the offsets into order, verifying that they are adjacent, and
13891 check that the register numbers are ascending. */
13892 if (!compute_offset_order (nops, unsorted_offsets, order,
13893 check_regs ? unsorted_regs : NULL))
13894 return 0;
13896 if (saved_order)
13897 memcpy (saved_order, order, sizeof order);
13899 if (base)
13901 *base = base_reg;
13903 for (i = 0; i < nops; i++)
13905 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13906 if (reg_rtxs)
13907 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13910 *load_offset = unsorted_offsets[order[0]];
13913 if (TARGET_THUMB1
13914 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13915 return 0;
13917 if (unsorted_offsets[order[0]] == 0)
13918 stm_case = 1; /* stmia */
13919 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13920 stm_case = 2; /* stmib */
13921 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13922 stm_case = 3; /* stmda */
13923 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13924 stm_case = 4; /* stmdb */
13925 else
13926 return 0;
13928 if (!multiple_operation_profitable_p (false, nops, 0))
13929 return 0;
13931 return stm_case;
13934 /* Routines for use in generating RTL. */
13936 /* Generate a load-multiple instruction. COUNT is the number of loads in
13937 the instruction; REGS and MEMS are arrays containing the operands.
13938 BASEREG is the base register to be used in addressing the memory operands.
13939 WBACK_OFFSET is nonzero if the instruction should update the base
13940 register. */
13942 static rtx
13943 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13944 HOST_WIDE_INT wback_offset)
13946 int i = 0, j;
13947 rtx result;
13949 if (!multiple_operation_profitable_p (false, count, 0))
13951 rtx seq;
13953 start_sequence ();
13955 for (i = 0; i < count; i++)
13956 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13958 if (wback_offset != 0)
13959 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13961 seq = get_insns ();
13962 end_sequence ();
13964 return seq;
13967 result = gen_rtx_PARALLEL (VOIDmode,
13968 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13969 if (wback_offset != 0)
13971 XVECEXP (result, 0, 0)
13972 = gen_rtx_SET (VOIDmode, basereg,
13973 plus_constant (Pmode, basereg, wback_offset));
13974 i = 1;
13975 count++;
13978 for (j = 0; i < count; i++, j++)
13979 XVECEXP (result, 0, i)
13980 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13982 return result;
13985 /* Generate a store-multiple instruction. COUNT is the number of stores in
13986 the instruction; REGS and MEMS are arrays containing the operands.
13987 BASEREG is the base register to be used in addressing the memory operands.
13988 WBACK_OFFSET is nonzero if the instruction should update the base
13989 register. */
13991 static rtx
13992 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13993 HOST_WIDE_INT wback_offset)
13995 int i = 0, j;
13996 rtx result;
13998 if (GET_CODE (basereg) == PLUS)
13999 basereg = XEXP (basereg, 0);
14001 if (!multiple_operation_profitable_p (false, count, 0))
14003 rtx seq;
14005 start_sequence ();
14007 for (i = 0; i < count; i++)
14008 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14010 if (wback_offset != 0)
14011 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14013 seq = get_insns ();
14014 end_sequence ();
14016 return seq;
14019 result = gen_rtx_PARALLEL (VOIDmode,
14020 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14021 if (wback_offset != 0)
14023 XVECEXP (result, 0, 0)
14024 = gen_rtx_SET (VOIDmode, basereg,
14025 plus_constant (Pmode, basereg, wback_offset));
14026 i = 1;
14027 count++;
14030 for (j = 0; i < count; i++, j++)
14031 XVECEXP (result, 0, i)
14032 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14034 return result;
14037 /* Generate either a load-multiple or a store-multiple instruction. This
14038 function can be used in situations where we can start with a single MEM
14039 rtx and adjust its address upwards.
14040 COUNT is the number of operations in the instruction, not counting a
14041 possible update of the base register. REGS is an array containing the
14042 register operands.
14043 BASEREG is the base register to be used in addressing the memory operands,
14044 which are constructed from BASEMEM.
14045 WRITE_BACK specifies whether the generated instruction should include an
14046 update of the base register.
14047 OFFSETP is used to pass an offset to and from this function; this offset
14048 is not used when constructing the address (instead BASEMEM should have an
14049 appropriate offset in its address), it is used only for setting
14050 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14052 static rtx
14053 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14054 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14056 rtx mems[MAX_LDM_STM_OPS];
14057 HOST_WIDE_INT offset = *offsetp;
14058 int i;
14060 gcc_assert (count <= MAX_LDM_STM_OPS);
14062 if (GET_CODE (basereg) == PLUS)
14063 basereg = XEXP (basereg, 0);
14065 for (i = 0; i < count; i++)
14067 rtx addr = plus_constant (Pmode, basereg, i * 4);
14068 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14069 offset += 4;
14072 if (write_back)
14073 *offsetp = offset;
14075 if (is_load)
14076 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14077 write_back ? 4 * count : 0);
14078 else
14079 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14080 write_back ? 4 * count : 0);
14084 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14085 rtx basemem, HOST_WIDE_INT *offsetp)
14087 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14088 offsetp);
14092 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14093 rtx basemem, HOST_WIDE_INT *offsetp)
14095 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14096 offsetp);
14099 /* Called from a peephole2 expander to turn a sequence of loads into an
14100 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14101 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14102 is true if we can reorder the registers because they are used commutatively
14103 subsequently.
14104 Returns true iff we could generate a new instruction. */
14106 bool
14107 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14109 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14110 rtx mems[MAX_LDM_STM_OPS];
14111 int i, j, base_reg;
14112 rtx base_reg_rtx;
14113 HOST_WIDE_INT offset;
14114 int write_back = FALSE;
14115 int ldm_case;
14116 rtx addr;
14118 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14119 &base_reg, &offset, !sort_regs);
14121 if (ldm_case == 0)
14122 return false;
14124 if (sort_regs)
14125 for (i = 0; i < nops - 1; i++)
14126 for (j = i + 1; j < nops; j++)
14127 if (regs[i] > regs[j])
14129 int t = regs[i];
14130 regs[i] = regs[j];
14131 regs[j] = t;
14133 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14135 if (TARGET_THUMB1)
14137 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14138 gcc_assert (ldm_case == 1 || ldm_case == 5);
14139 write_back = TRUE;
14142 if (ldm_case == 5)
14144 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14145 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14146 offset = 0;
14147 if (!TARGET_THUMB1)
14149 base_reg = regs[0];
14150 base_reg_rtx = newbase;
14154 for (i = 0; i < nops; i++)
14156 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14157 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14158 SImode, addr, 0);
14160 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14161 write_back ? offset + i * 4 : 0));
14162 return true;
14165 /* Called from a peephole2 expander to turn a sequence of stores into an
14166 STM instruction. OPERANDS are the operands found by the peephole matcher;
14167 NOPS indicates how many separate stores we are trying to combine.
14168 Returns true iff we could generate a new instruction. */
14170 bool
14171 gen_stm_seq (rtx *operands, int nops)
14173 int i;
14174 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14175 rtx mems[MAX_LDM_STM_OPS];
14176 int base_reg;
14177 rtx base_reg_rtx;
14178 HOST_WIDE_INT offset;
14179 int write_back = FALSE;
14180 int stm_case;
14181 rtx addr;
14182 bool base_reg_dies;
14184 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14185 mem_order, &base_reg, &offset, true);
14187 if (stm_case == 0)
14188 return false;
14190 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14192 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14193 if (TARGET_THUMB1)
14195 gcc_assert (base_reg_dies);
14196 write_back = TRUE;
14199 if (stm_case == 5)
14201 gcc_assert (base_reg_dies);
14202 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14203 offset = 0;
14206 addr = plus_constant (Pmode, base_reg_rtx, offset);
14208 for (i = 0; i < nops; i++)
14210 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14211 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14212 SImode, addr, 0);
14214 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14215 write_back ? offset + i * 4 : 0));
14216 return true;
14219 /* Called from a peephole2 expander to turn a sequence of stores that are
14220 preceded by constant loads into an STM instruction. OPERANDS are the
14221 operands found by the peephole matcher; NOPS indicates how many
14222 separate stores we are trying to combine; there are 2 * NOPS
14223 instructions in the peephole.
14224 Returns true iff we could generate a new instruction. */
14226 bool
14227 gen_const_stm_seq (rtx *operands, int nops)
14229 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14230 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14231 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14232 rtx mems[MAX_LDM_STM_OPS];
14233 int base_reg;
14234 rtx base_reg_rtx;
14235 HOST_WIDE_INT offset;
14236 int write_back = FALSE;
14237 int stm_case;
14238 rtx addr;
14239 bool base_reg_dies;
14240 int i, j;
14241 HARD_REG_SET allocated;
14243 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14244 mem_order, &base_reg, &offset, false);
14246 if (stm_case == 0)
14247 return false;
14249 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14251 /* If the same register is used more than once, try to find a free
14252 register. */
14253 CLEAR_HARD_REG_SET (allocated);
14254 for (i = 0; i < nops; i++)
14256 for (j = i + 1; j < nops; j++)
14257 if (regs[i] == regs[j])
14259 rtx t = peep2_find_free_register (0, nops * 2,
14260 TARGET_THUMB1 ? "l" : "r",
14261 SImode, &allocated);
14262 if (t == NULL_RTX)
14263 return false;
14264 reg_rtxs[i] = t;
14265 regs[i] = REGNO (t);
14269 /* Compute an ordering that maps the register numbers to an ascending
14270 sequence. */
14271 reg_order[0] = 0;
14272 for (i = 0; i < nops; i++)
14273 if (regs[i] < regs[reg_order[0]])
14274 reg_order[0] = i;
14276 for (i = 1; i < nops; i++)
14278 int this_order = reg_order[i - 1];
14279 for (j = 0; j < nops; j++)
14280 if (regs[j] > regs[reg_order[i - 1]]
14281 && (this_order == reg_order[i - 1]
14282 || regs[j] < regs[this_order]))
14283 this_order = j;
14284 reg_order[i] = this_order;
14287 /* Ensure that registers that must be live after the instruction end
14288 up with the correct value. */
14289 for (i = 0; i < nops; i++)
14291 int this_order = reg_order[i];
14292 if ((this_order != mem_order[i]
14293 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14294 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14295 return false;
14298 /* Load the constants. */
14299 for (i = 0; i < nops; i++)
14301 rtx op = operands[2 * nops + mem_order[i]];
14302 sorted_regs[i] = regs[reg_order[i]];
14303 emit_move_insn (reg_rtxs[reg_order[i]], op);
14306 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14308 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14309 if (TARGET_THUMB1)
14311 gcc_assert (base_reg_dies);
14312 write_back = TRUE;
14315 if (stm_case == 5)
14317 gcc_assert (base_reg_dies);
14318 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14319 offset = 0;
14322 addr = plus_constant (Pmode, base_reg_rtx, offset);
14324 for (i = 0; i < nops; i++)
14326 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14327 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14328 SImode, addr, 0);
14330 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14331 write_back ? offset + i * 4 : 0));
14332 return true;
14335 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14336 unaligned copies on processors which support unaligned semantics for those
14337 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14338 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14339 An interleave factor of 1 (the minimum) will perform no interleaving.
14340 Load/store multiple are used for aligned addresses where possible. */
14342 static void
14343 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14344 HOST_WIDE_INT length,
14345 unsigned int interleave_factor)
14347 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14348 int *regnos = XALLOCAVEC (int, interleave_factor);
14349 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14350 HOST_WIDE_INT i, j;
14351 HOST_WIDE_INT remaining = length, words;
14352 rtx halfword_tmp = NULL, byte_tmp = NULL;
14353 rtx dst, src;
14354 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14355 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14356 HOST_WIDE_INT srcoffset, dstoffset;
14357 HOST_WIDE_INT src_autoinc, dst_autoinc;
14358 rtx mem, addr;
14360 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14362 /* Use hard registers if we have aligned source or destination so we can use
14363 load/store multiple with contiguous registers. */
14364 if (dst_aligned || src_aligned)
14365 for (i = 0; i < interleave_factor; i++)
14366 regs[i] = gen_rtx_REG (SImode, i);
14367 else
14368 for (i = 0; i < interleave_factor; i++)
14369 regs[i] = gen_reg_rtx (SImode);
14371 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14372 src = copy_addr_to_reg (XEXP (srcbase, 0));
14374 srcoffset = dstoffset = 0;
14376 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14377 For copying the last bytes we want to subtract this offset again. */
14378 src_autoinc = dst_autoinc = 0;
14380 for (i = 0; i < interleave_factor; i++)
14381 regnos[i] = i;
14383 /* Copy BLOCK_SIZE_BYTES chunks. */
14385 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14387 /* Load words. */
14388 if (src_aligned && interleave_factor > 1)
14390 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14391 TRUE, srcbase, &srcoffset));
14392 src_autoinc += UNITS_PER_WORD * interleave_factor;
14394 else
14396 for (j = 0; j < interleave_factor; j++)
14398 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14399 - src_autoinc));
14400 mem = adjust_automodify_address (srcbase, SImode, addr,
14401 srcoffset + j * UNITS_PER_WORD);
14402 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14404 srcoffset += block_size_bytes;
14407 /* Store words. */
14408 if (dst_aligned && interleave_factor > 1)
14410 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14411 TRUE, dstbase, &dstoffset));
14412 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14414 else
14416 for (j = 0; j < interleave_factor; j++)
14418 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14419 - dst_autoinc));
14420 mem = adjust_automodify_address (dstbase, SImode, addr,
14421 dstoffset + j * UNITS_PER_WORD);
14422 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14424 dstoffset += block_size_bytes;
14427 remaining -= block_size_bytes;
14430 /* Copy any whole words left (note these aren't interleaved with any
14431 subsequent halfword/byte load/stores in the interests of simplicity). */
14433 words = remaining / UNITS_PER_WORD;
14435 gcc_assert (words < interleave_factor);
14437 if (src_aligned && words > 1)
14439 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14440 &srcoffset));
14441 src_autoinc += UNITS_PER_WORD * words;
14443 else
14445 for (j = 0; j < words; j++)
14447 addr = plus_constant (Pmode, src,
14448 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14449 mem = adjust_automodify_address (srcbase, SImode, addr,
14450 srcoffset + j * UNITS_PER_WORD);
14451 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14453 srcoffset += words * UNITS_PER_WORD;
14456 if (dst_aligned && words > 1)
14458 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14459 &dstoffset));
14460 dst_autoinc += words * UNITS_PER_WORD;
14462 else
14464 for (j = 0; j < words; j++)
14466 addr = plus_constant (Pmode, dst,
14467 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14468 mem = adjust_automodify_address (dstbase, SImode, addr,
14469 dstoffset + j * UNITS_PER_WORD);
14470 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14472 dstoffset += words * UNITS_PER_WORD;
14475 remaining -= words * UNITS_PER_WORD;
14477 gcc_assert (remaining < 4);
14479 /* Copy a halfword if necessary. */
14481 if (remaining >= 2)
14483 halfword_tmp = gen_reg_rtx (SImode);
14485 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14486 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14487 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14489 /* Either write out immediately, or delay until we've loaded the last
14490 byte, depending on interleave factor. */
14491 if (interleave_factor == 1)
14493 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14494 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14495 emit_insn (gen_unaligned_storehi (mem,
14496 gen_lowpart (HImode, halfword_tmp)));
14497 halfword_tmp = NULL;
14498 dstoffset += 2;
14501 remaining -= 2;
14502 srcoffset += 2;
14505 gcc_assert (remaining < 2);
14507 /* Copy last byte. */
14509 if ((remaining & 1) != 0)
14511 byte_tmp = gen_reg_rtx (SImode);
14513 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14514 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14515 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14517 if (interleave_factor == 1)
14519 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14520 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14521 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14522 byte_tmp = NULL;
14523 dstoffset++;
14526 remaining--;
14527 srcoffset++;
14530 /* Store last halfword if we haven't done so already. */
14532 if (halfword_tmp)
14534 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14535 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14536 emit_insn (gen_unaligned_storehi (mem,
14537 gen_lowpart (HImode, halfword_tmp)));
14538 dstoffset += 2;
14541 /* Likewise for last byte. */
14543 if (byte_tmp)
14545 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14546 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14547 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14548 dstoffset++;
14551 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14554 /* From mips_adjust_block_mem:
14556 Helper function for doing a loop-based block operation on memory
14557 reference MEM. Each iteration of the loop will operate on LENGTH
14558 bytes of MEM.
14560 Create a new base register for use within the loop and point it to
14561 the start of MEM. Create a new memory reference that uses this
14562 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14564 static void
14565 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14566 rtx *loop_mem)
14568 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14570 /* Although the new mem does not refer to a known location,
14571 it does keep up to LENGTH bytes of alignment. */
14572 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14573 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14576 /* From mips_block_move_loop:
14578 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14579 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14580 the memory regions do not overlap. */
14582 static void
14583 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14584 unsigned int interleave_factor,
14585 HOST_WIDE_INT bytes_per_iter)
14587 rtx src_reg, dest_reg, final_src, test;
14588 HOST_WIDE_INT leftover;
14590 leftover = length % bytes_per_iter;
14591 length -= leftover;
14593 /* Create registers and memory references for use within the loop. */
14594 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14595 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14597 /* Calculate the value that SRC_REG should have after the last iteration of
14598 the loop. */
14599 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14600 0, 0, OPTAB_WIDEN);
14602 /* Emit the start of the loop. */
14603 rtx_code_label *label = gen_label_rtx ();
14604 emit_label (label);
14606 /* Emit the loop body. */
14607 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14608 interleave_factor);
14610 /* Move on to the next block. */
14611 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14612 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14614 /* Emit the loop condition. */
14615 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14616 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14618 /* Mop up any left-over bytes. */
14619 if (leftover)
14620 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14623 /* Emit a block move when either the source or destination is unaligned (not
14624 aligned to a four-byte boundary). This may need further tuning depending on
14625 core type, optimize_size setting, etc. */
14627 static int
14628 arm_movmemqi_unaligned (rtx *operands)
14630 HOST_WIDE_INT length = INTVAL (operands[2]);
14632 if (optimize_size)
14634 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14635 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14636 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14637 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14638 or dst_aligned though: allow more interleaving in those cases since the
14639 resulting code can be smaller. */
14640 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14641 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14643 if (length > 12)
14644 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14645 interleave_factor, bytes_per_iter);
14646 else
14647 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14648 interleave_factor);
14650 else
14652 /* Note that the loop created by arm_block_move_unaligned_loop may be
14653 subject to loop unrolling, which makes tuning this condition a little
14654 redundant. */
14655 if (length > 32)
14656 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14657 else
14658 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14661 return 1;
14665 arm_gen_movmemqi (rtx *operands)
14667 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14668 HOST_WIDE_INT srcoffset, dstoffset;
14669 int i;
14670 rtx src, dst, srcbase, dstbase;
14671 rtx part_bytes_reg = NULL;
14672 rtx mem;
14674 if (!CONST_INT_P (operands[2])
14675 || !CONST_INT_P (operands[3])
14676 || INTVAL (operands[2]) > 64)
14677 return 0;
14679 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14680 return arm_movmemqi_unaligned (operands);
14682 if (INTVAL (operands[3]) & 3)
14683 return 0;
14685 dstbase = operands[0];
14686 srcbase = operands[1];
14688 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14689 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14691 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14692 out_words_to_go = INTVAL (operands[2]) / 4;
14693 last_bytes = INTVAL (operands[2]) & 3;
14694 dstoffset = srcoffset = 0;
14696 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14697 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14699 for (i = 0; in_words_to_go >= 2; i+=4)
14701 if (in_words_to_go > 4)
14702 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14703 TRUE, srcbase, &srcoffset));
14704 else
14705 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14706 src, FALSE, srcbase,
14707 &srcoffset));
14709 if (out_words_to_go)
14711 if (out_words_to_go > 4)
14712 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14713 TRUE, dstbase, &dstoffset));
14714 else if (out_words_to_go != 1)
14715 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14716 out_words_to_go, dst,
14717 (last_bytes == 0
14718 ? FALSE : TRUE),
14719 dstbase, &dstoffset));
14720 else
14722 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14723 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14724 if (last_bytes != 0)
14726 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14727 dstoffset += 4;
14732 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14733 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14736 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14737 if (out_words_to_go)
14739 rtx sreg;
14741 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14742 sreg = copy_to_reg (mem);
14744 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14745 emit_move_insn (mem, sreg);
14746 in_words_to_go--;
14748 gcc_assert (!in_words_to_go); /* Sanity check */
14751 if (in_words_to_go)
14753 gcc_assert (in_words_to_go > 0);
14755 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14756 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14759 gcc_assert (!last_bytes || part_bytes_reg);
14761 if (BYTES_BIG_ENDIAN && last_bytes)
14763 rtx tmp = gen_reg_rtx (SImode);
14765 /* The bytes we want are in the top end of the word. */
14766 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14767 GEN_INT (8 * (4 - last_bytes))));
14768 part_bytes_reg = tmp;
14770 while (last_bytes)
14772 mem = adjust_automodify_address (dstbase, QImode,
14773 plus_constant (Pmode, dst,
14774 last_bytes - 1),
14775 dstoffset + last_bytes - 1);
14776 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14778 if (--last_bytes)
14780 tmp = gen_reg_rtx (SImode);
14781 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14782 part_bytes_reg = tmp;
14787 else
14789 if (last_bytes > 1)
14791 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14792 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14793 last_bytes -= 2;
14794 if (last_bytes)
14796 rtx tmp = gen_reg_rtx (SImode);
14797 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14798 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14799 part_bytes_reg = tmp;
14800 dstoffset += 2;
14804 if (last_bytes)
14806 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14807 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14811 return 1;
14814 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14815 by mode size. */
14816 inline static rtx
14817 next_consecutive_mem (rtx mem)
14819 machine_mode mode = GET_MODE (mem);
14820 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14821 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14823 return adjust_automodify_address (mem, mode, addr, offset);
14826 /* Copy using LDRD/STRD instructions whenever possible.
14827 Returns true upon success. */
14828 bool
14829 gen_movmem_ldrd_strd (rtx *operands)
14831 unsigned HOST_WIDE_INT len;
14832 HOST_WIDE_INT align;
14833 rtx src, dst, base;
14834 rtx reg0;
14835 bool src_aligned, dst_aligned;
14836 bool src_volatile, dst_volatile;
14838 gcc_assert (CONST_INT_P (operands[2]));
14839 gcc_assert (CONST_INT_P (operands[3]));
14841 len = UINTVAL (operands[2]);
14842 if (len > 64)
14843 return false;
14845 /* Maximum alignment we can assume for both src and dst buffers. */
14846 align = INTVAL (operands[3]);
14848 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14849 return false;
14851 /* Place src and dst addresses in registers
14852 and update the corresponding mem rtx. */
14853 dst = operands[0];
14854 dst_volatile = MEM_VOLATILE_P (dst);
14855 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14856 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14857 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14859 src = operands[1];
14860 src_volatile = MEM_VOLATILE_P (src);
14861 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14862 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14863 src = adjust_automodify_address (src, VOIDmode, base, 0);
14865 if (!unaligned_access && !(src_aligned && dst_aligned))
14866 return false;
14868 if (src_volatile || dst_volatile)
14869 return false;
14871 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14872 if (!(dst_aligned || src_aligned))
14873 return arm_gen_movmemqi (operands);
14875 src = adjust_address (src, DImode, 0);
14876 dst = adjust_address (dst, DImode, 0);
14877 while (len >= 8)
14879 len -= 8;
14880 reg0 = gen_reg_rtx (DImode);
14881 if (src_aligned)
14882 emit_move_insn (reg0, src);
14883 else
14884 emit_insn (gen_unaligned_loaddi (reg0, src));
14886 if (dst_aligned)
14887 emit_move_insn (dst, reg0);
14888 else
14889 emit_insn (gen_unaligned_storedi (dst, reg0));
14891 src = next_consecutive_mem (src);
14892 dst = next_consecutive_mem (dst);
14895 gcc_assert (len < 8);
14896 if (len >= 4)
14898 /* More than a word but less than a double-word to copy. Copy a word. */
14899 reg0 = gen_reg_rtx (SImode);
14900 src = adjust_address (src, SImode, 0);
14901 dst = adjust_address (dst, SImode, 0);
14902 if (src_aligned)
14903 emit_move_insn (reg0, src);
14904 else
14905 emit_insn (gen_unaligned_loadsi (reg0, src));
14907 if (dst_aligned)
14908 emit_move_insn (dst, reg0);
14909 else
14910 emit_insn (gen_unaligned_storesi (dst, reg0));
14912 src = next_consecutive_mem (src);
14913 dst = next_consecutive_mem (dst);
14914 len -= 4;
14917 if (len == 0)
14918 return true;
14920 /* Copy the remaining bytes. */
14921 if (len >= 2)
14923 dst = adjust_address (dst, HImode, 0);
14924 src = adjust_address (src, HImode, 0);
14925 reg0 = gen_reg_rtx (SImode);
14926 if (src_aligned)
14927 emit_insn (gen_zero_extendhisi2 (reg0, src));
14928 else
14929 emit_insn (gen_unaligned_loadhiu (reg0, src));
14931 if (dst_aligned)
14932 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14933 else
14934 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14936 src = next_consecutive_mem (src);
14937 dst = next_consecutive_mem (dst);
14938 if (len == 2)
14939 return true;
14942 dst = adjust_address (dst, QImode, 0);
14943 src = adjust_address (src, QImode, 0);
14944 reg0 = gen_reg_rtx (QImode);
14945 emit_move_insn (reg0, src);
14946 emit_move_insn (dst, reg0);
14947 return true;
14950 /* Select a dominance comparison mode if possible for a test of the general
14951 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14952 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14953 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14954 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14955 In all cases OP will be either EQ or NE, but we don't need to know which
14956 here. If we are unable to support a dominance comparison we return
14957 CC mode. This will then fail to match for the RTL expressions that
14958 generate this call. */
14959 machine_mode
14960 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14962 enum rtx_code cond1, cond2;
14963 int swapped = 0;
14965 /* Currently we will probably get the wrong result if the individual
14966 comparisons are not simple. This also ensures that it is safe to
14967 reverse a comparison if necessary. */
14968 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14969 != CCmode)
14970 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14971 != CCmode))
14972 return CCmode;
14974 /* The if_then_else variant of this tests the second condition if the
14975 first passes, but is true if the first fails. Reverse the first
14976 condition to get a true "inclusive-or" expression. */
14977 if (cond_or == DOM_CC_NX_OR_Y)
14978 cond1 = reverse_condition (cond1);
14980 /* If the comparisons are not equal, and one doesn't dominate the other,
14981 then we can't do this. */
14982 if (cond1 != cond2
14983 && !comparison_dominates_p (cond1, cond2)
14984 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14985 return CCmode;
14987 if (swapped)
14989 enum rtx_code temp = cond1;
14990 cond1 = cond2;
14991 cond2 = temp;
14994 switch (cond1)
14996 case EQ:
14997 if (cond_or == DOM_CC_X_AND_Y)
14998 return CC_DEQmode;
15000 switch (cond2)
15002 case EQ: return CC_DEQmode;
15003 case LE: return CC_DLEmode;
15004 case LEU: return CC_DLEUmode;
15005 case GE: return CC_DGEmode;
15006 case GEU: return CC_DGEUmode;
15007 default: gcc_unreachable ();
15010 case LT:
15011 if (cond_or == DOM_CC_X_AND_Y)
15012 return CC_DLTmode;
15014 switch (cond2)
15016 case LT:
15017 return CC_DLTmode;
15018 case LE:
15019 return CC_DLEmode;
15020 case NE:
15021 return CC_DNEmode;
15022 default:
15023 gcc_unreachable ();
15026 case GT:
15027 if (cond_or == DOM_CC_X_AND_Y)
15028 return CC_DGTmode;
15030 switch (cond2)
15032 case GT:
15033 return CC_DGTmode;
15034 case GE:
15035 return CC_DGEmode;
15036 case NE:
15037 return CC_DNEmode;
15038 default:
15039 gcc_unreachable ();
15042 case LTU:
15043 if (cond_or == DOM_CC_X_AND_Y)
15044 return CC_DLTUmode;
15046 switch (cond2)
15048 case LTU:
15049 return CC_DLTUmode;
15050 case LEU:
15051 return CC_DLEUmode;
15052 case NE:
15053 return CC_DNEmode;
15054 default:
15055 gcc_unreachable ();
15058 case GTU:
15059 if (cond_or == DOM_CC_X_AND_Y)
15060 return CC_DGTUmode;
15062 switch (cond2)
15064 case GTU:
15065 return CC_DGTUmode;
15066 case GEU:
15067 return CC_DGEUmode;
15068 case NE:
15069 return CC_DNEmode;
15070 default:
15071 gcc_unreachable ();
15074 /* The remaining cases only occur when both comparisons are the
15075 same. */
15076 case NE:
15077 gcc_assert (cond1 == cond2);
15078 return CC_DNEmode;
15080 case LE:
15081 gcc_assert (cond1 == cond2);
15082 return CC_DLEmode;
15084 case GE:
15085 gcc_assert (cond1 == cond2);
15086 return CC_DGEmode;
15088 case LEU:
15089 gcc_assert (cond1 == cond2);
15090 return CC_DLEUmode;
15092 case GEU:
15093 gcc_assert (cond1 == cond2);
15094 return CC_DGEUmode;
15096 default:
15097 gcc_unreachable ();
15101 machine_mode
15102 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15104 /* All floating point compares return CCFP if it is an equality
15105 comparison, and CCFPE otherwise. */
15106 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15108 switch (op)
15110 case EQ:
15111 case NE:
15112 case UNORDERED:
15113 case ORDERED:
15114 case UNLT:
15115 case UNLE:
15116 case UNGT:
15117 case UNGE:
15118 case UNEQ:
15119 case LTGT:
15120 return CCFPmode;
15122 case LT:
15123 case LE:
15124 case GT:
15125 case GE:
15126 return CCFPEmode;
15128 default:
15129 gcc_unreachable ();
15133 /* A compare with a shifted operand. Because of canonicalization, the
15134 comparison will have to be swapped when we emit the assembler. */
15135 if (GET_MODE (y) == SImode
15136 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15137 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15138 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15139 || GET_CODE (x) == ROTATERT))
15140 return CC_SWPmode;
15142 /* This operation is performed swapped, but since we only rely on the Z
15143 flag we don't need an additional mode. */
15144 if (GET_MODE (y) == SImode
15145 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15146 && GET_CODE (x) == NEG
15147 && (op == EQ || op == NE))
15148 return CC_Zmode;
15150 /* This is a special case that is used by combine to allow a
15151 comparison of a shifted byte load to be split into a zero-extend
15152 followed by a comparison of the shifted integer (only valid for
15153 equalities and unsigned inequalities). */
15154 if (GET_MODE (x) == SImode
15155 && GET_CODE (x) == ASHIFT
15156 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15157 && GET_CODE (XEXP (x, 0)) == SUBREG
15158 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15159 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15160 && (op == EQ || op == NE
15161 || op == GEU || op == GTU || op == LTU || op == LEU)
15162 && CONST_INT_P (y))
15163 return CC_Zmode;
15165 /* A construct for a conditional compare, if the false arm contains
15166 0, then both conditions must be true, otherwise either condition
15167 must be true. Not all conditions are possible, so CCmode is
15168 returned if it can't be done. */
15169 if (GET_CODE (x) == IF_THEN_ELSE
15170 && (XEXP (x, 2) == const0_rtx
15171 || XEXP (x, 2) == const1_rtx)
15172 && COMPARISON_P (XEXP (x, 0))
15173 && COMPARISON_P (XEXP (x, 1)))
15174 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15175 INTVAL (XEXP (x, 2)));
15177 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15178 if (GET_CODE (x) == AND
15179 && (op == EQ || op == NE)
15180 && COMPARISON_P (XEXP (x, 0))
15181 && COMPARISON_P (XEXP (x, 1)))
15182 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15183 DOM_CC_X_AND_Y);
15185 if (GET_CODE (x) == IOR
15186 && (op == EQ || op == NE)
15187 && COMPARISON_P (XEXP (x, 0))
15188 && COMPARISON_P (XEXP (x, 1)))
15189 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15190 DOM_CC_X_OR_Y);
15192 /* An operation (on Thumb) where we want to test for a single bit.
15193 This is done by shifting that bit up into the top bit of a
15194 scratch register; we can then branch on the sign bit. */
15195 if (TARGET_THUMB1
15196 && GET_MODE (x) == SImode
15197 && (op == EQ || op == NE)
15198 && GET_CODE (x) == ZERO_EXTRACT
15199 && XEXP (x, 1) == const1_rtx)
15200 return CC_Nmode;
15202 /* An operation that sets the condition codes as a side-effect, the
15203 V flag is not set correctly, so we can only use comparisons where
15204 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15205 instead.) */
15206 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15207 if (GET_MODE (x) == SImode
15208 && y == const0_rtx
15209 && (op == EQ || op == NE || op == LT || op == GE)
15210 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15211 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15212 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15213 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15214 || GET_CODE (x) == LSHIFTRT
15215 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15216 || GET_CODE (x) == ROTATERT
15217 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15218 return CC_NOOVmode;
15220 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15221 return CC_Zmode;
15223 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15224 && GET_CODE (x) == PLUS
15225 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15226 return CC_Cmode;
15228 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15230 switch (op)
15232 case EQ:
15233 case NE:
15234 /* A DImode comparison against zero can be implemented by
15235 or'ing the two halves together. */
15236 if (y == const0_rtx)
15237 return CC_Zmode;
15239 /* We can do an equality test in three Thumb instructions. */
15240 if (!TARGET_32BIT)
15241 return CC_Zmode;
15243 /* FALLTHROUGH */
15245 case LTU:
15246 case LEU:
15247 case GTU:
15248 case GEU:
15249 /* DImode unsigned comparisons can be implemented by cmp +
15250 cmpeq without a scratch register. Not worth doing in
15251 Thumb-2. */
15252 if (TARGET_32BIT)
15253 return CC_CZmode;
15255 /* FALLTHROUGH */
15257 case LT:
15258 case LE:
15259 case GT:
15260 case GE:
15261 /* DImode signed and unsigned comparisons can be implemented
15262 by cmp + sbcs with a scratch register, but that does not
15263 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15264 gcc_assert (op != EQ && op != NE);
15265 return CC_NCVmode;
15267 default:
15268 gcc_unreachable ();
15272 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15273 return GET_MODE (x);
15275 return CCmode;
15278 /* X and Y are two things to compare using CODE. Emit the compare insn and
15279 return the rtx for register 0 in the proper mode. FP means this is a
15280 floating point compare: I don't think that it is needed on the arm. */
15282 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15284 machine_mode mode;
15285 rtx cc_reg;
15286 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15288 /* We might have X as a constant, Y as a register because of the predicates
15289 used for cmpdi. If so, force X to a register here. */
15290 if (dimode_comparison && !REG_P (x))
15291 x = force_reg (DImode, x);
15293 mode = SELECT_CC_MODE (code, x, y);
15294 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15296 if (dimode_comparison
15297 && mode != CC_CZmode)
15299 rtx clobber, set;
15301 /* To compare two non-zero values for equality, XOR them and
15302 then compare against zero. Not used for ARM mode; there
15303 CC_CZmode is cheaper. */
15304 if (mode == CC_Zmode && y != const0_rtx)
15306 gcc_assert (!reload_completed);
15307 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15308 y = const0_rtx;
15311 /* A scratch register is required. */
15312 if (reload_completed)
15313 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15314 else
15315 scratch = gen_rtx_SCRATCH (SImode);
15317 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15318 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15319 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15321 else
15322 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15324 return cc_reg;
15327 /* Generate a sequence of insns that will generate the correct return
15328 address mask depending on the physical architecture that the program
15329 is running on. */
15331 arm_gen_return_addr_mask (void)
15333 rtx reg = gen_reg_rtx (Pmode);
15335 emit_insn (gen_return_addr_mask (reg));
15336 return reg;
15339 void
15340 arm_reload_in_hi (rtx *operands)
15342 rtx ref = operands[1];
15343 rtx base, scratch;
15344 HOST_WIDE_INT offset = 0;
15346 if (GET_CODE (ref) == SUBREG)
15348 offset = SUBREG_BYTE (ref);
15349 ref = SUBREG_REG (ref);
15352 if (REG_P (ref))
15354 /* We have a pseudo which has been spilt onto the stack; there
15355 are two cases here: the first where there is a simple
15356 stack-slot replacement and a second where the stack-slot is
15357 out of range, or is used as a subreg. */
15358 if (reg_equiv_mem (REGNO (ref)))
15360 ref = reg_equiv_mem (REGNO (ref));
15361 base = find_replacement (&XEXP (ref, 0));
15363 else
15364 /* The slot is out of range, or was dressed up in a SUBREG. */
15365 base = reg_equiv_address (REGNO (ref));
15367 else
15368 base = find_replacement (&XEXP (ref, 0));
15370 /* Handle the case where the address is too complex to be offset by 1. */
15371 if (GET_CODE (base) == MINUS
15372 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15374 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15376 emit_set_insn (base_plus, base);
15377 base = base_plus;
15379 else if (GET_CODE (base) == PLUS)
15381 /* The addend must be CONST_INT, or we would have dealt with it above. */
15382 HOST_WIDE_INT hi, lo;
15384 offset += INTVAL (XEXP (base, 1));
15385 base = XEXP (base, 0);
15387 /* Rework the address into a legal sequence of insns. */
15388 /* Valid range for lo is -4095 -> 4095 */
15389 lo = (offset >= 0
15390 ? (offset & 0xfff)
15391 : -((-offset) & 0xfff));
15393 /* Corner case, if lo is the max offset then we would be out of range
15394 once we have added the additional 1 below, so bump the msb into the
15395 pre-loading insn(s). */
15396 if (lo == 4095)
15397 lo &= 0x7ff;
15399 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15400 ^ (HOST_WIDE_INT) 0x80000000)
15401 - (HOST_WIDE_INT) 0x80000000);
15403 gcc_assert (hi + lo == offset);
15405 if (hi != 0)
15407 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15409 /* Get the base address; addsi3 knows how to handle constants
15410 that require more than one insn. */
15411 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15412 base = base_plus;
15413 offset = lo;
15417 /* Operands[2] may overlap operands[0] (though it won't overlap
15418 operands[1]), that's why we asked for a DImode reg -- so we can
15419 use the bit that does not overlap. */
15420 if (REGNO (operands[2]) == REGNO (operands[0]))
15421 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15422 else
15423 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15425 emit_insn (gen_zero_extendqisi2 (scratch,
15426 gen_rtx_MEM (QImode,
15427 plus_constant (Pmode, base,
15428 offset))));
15429 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15430 gen_rtx_MEM (QImode,
15431 plus_constant (Pmode, base,
15432 offset + 1))));
15433 if (!BYTES_BIG_ENDIAN)
15434 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15435 gen_rtx_IOR (SImode,
15436 gen_rtx_ASHIFT
15437 (SImode,
15438 gen_rtx_SUBREG (SImode, operands[0], 0),
15439 GEN_INT (8)),
15440 scratch));
15441 else
15442 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15443 gen_rtx_IOR (SImode,
15444 gen_rtx_ASHIFT (SImode, scratch,
15445 GEN_INT (8)),
15446 gen_rtx_SUBREG (SImode, operands[0], 0)));
15449 /* Handle storing a half-word to memory during reload by synthesizing as two
15450 byte stores. Take care not to clobber the input values until after we
15451 have moved them somewhere safe. This code assumes that if the DImode
15452 scratch in operands[2] overlaps either the input value or output address
15453 in some way, then that value must die in this insn (we absolutely need
15454 two scratch registers for some corner cases). */
15455 void
15456 arm_reload_out_hi (rtx *operands)
15458 rtx ref = operands[0];
15459 rtx outval = operands[1];
15460 rtx base, scratch;
15461 HOST_WIDE_INT offset = 0;
15463 if (GET_CODE (ref) == SUBREG)
15465 offset = SUBREG_BYTE (ref);
15466 ref = SUBREG_REG (ref);
15469 if (REG_P (ref))
15471 /* We have a pseudo which has been spilt onto the stack; there
15472 are two cases here: the first where there is a simple
15473 stack-slot replacement and a second where the stack-slot is
15474 out of range, or is used as a subreg. */
15475 if (reg_equiv_mem (REGNO (ref)))
15477 ref = reg_equiv_mem (REGNO (ref));
15478 base = find_replacement (&XEXP (ref, 0));
15480 else
15481 /* The slot is out of range, or was dressed up in a SUBREG. */
15482 base = reg_equiv_address (REGNO (ref));
15484 else
15485 base = find_replacement (&XEXP (ref, 0));
15487 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15489 /* Handle the case where the address is too complex to be offset by 1. */
15490 if (GET_CODE (base) == MINUS
15491 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15493 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15495 /* Be careful not to destroy OUTVAL. */
15496 if (reg_overlap_mentioned_p (base_plus, outval))
15498 /* Updating base_plus might destroy outval, see if we can
15499 swap the scratch and base_plus. */
15500 if (!reg_overlap_mentioned_p (scratch, outval))
15502 rtx tmp = scratch;
15503 scratch = base_plus;
15504 base_plus = tmp;
15506 else
15508 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15510 /* Be conservative and copy OUTVAL into the scratch now,
15511 this should only be necessary if outval is a subreg
15512 of something larger than a word. */
15513 /* XXX Might this clobber base? I can't see how it can,
15514 since scratch is known to overlap with OUTVAL, and
15515 must be wider than a word. */
15516 emit_insn (gen_movhi (scratch_hi, outval));
15517 outval = scratch_hi;
15521 emit_set_insn (base_plus, base);
15522 base = base_plus;
15524 else if (GET_CODE (base) == PLUS)
15526 /* The addend must be CONST_INT, or we would have dealt with it above. */
15527 HOST_WIDE_INT hi, lo;
15529 offset += INTVAL (XEXP (base, 1));
15530 base = XEXP (base, 0);
15532 /* Rework the address into a legal sequence of insns. */
15533 /* Valid range for lo is -4095 -> 4095 */
15534 lo = (offset >= 0
15535 ? (offset & 0xfff)
15536 : -((-offset) & 0xfff));
15538 /* Corner case, if lo is the max offset then we would be out of range
15539 once we have added the additional 1 below, so bump the msb into the
15540 pre-loading insn(s). */
15541 if (lo == 4095)
15542 lo &= 0x7ff;
15544 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15545 ^ (HOST_WIDE_INT) 0x80000000)
15546 - (HOST_WIDE_INT) 0x80000000);
15548 gcc_assert (hi + lo == offset);
15550 if (hi != 0)
15552 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15554 /* Be careful not to destroy OUTVAL. */
15555 if (reg_overlap_mentioned_p (base_plus, outval))
15557 /* Updating base_plus might destroy outval, see if we
15558 can swap the scratch and base_plus. */
15559 if (!reg_overlap_mentioned_p (scratch, outval))
15561 rtx tmp = scratch;
15562 scratch = base_plus;
15563 base_plus = tmp;
15565 else
15567 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15569 /* Be conservative and copy outval into scratch now,
15570 this should only be necessary if outval is a
15571 subreg of something larger than a word. */
15572 /* XXX Might this clobber base? I can't see how it
15573 can, since scratch is known to overlap with
15574 outval. */
15575 emit_insn (gen_movhi (scratch_hi, outval));
15576 outval = scratch_hi;
15580 /* Get the base address; addsi3 knows how to handle constants
15581 that require more than one insn. */
15582 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15583 base = base_plus;
15584 offset = lo;
15588 if (BYTES_BIG_ENDIAN)
15590 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15591 plus_constant (Pmode, base,
15592 offset + 1)),
15593 gen_lowpart (QImode, outval)));
15594 emit_insn (gen_lshrsi3 (scratch,
15595 gen_rtx_SUBREG (SImode, outval, 0),
15596 GEN_INT (8)));
15597 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15598 offset)),
15599 gen_lowpart (QImode, scratch)));
15601 else
15603 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15604 offset)),
15605 gen_lowpart (QImode, outval)));
15606 emit_insn (gen_lshrsi3 (scratch,
15607 gen_rtx_SUBREG (SImode, outval, 0),
15608 GEN_INT (8)));
15609 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15610 plus_constant (Pmode, base,
15611 offset + 1)),
15612 gen_lowpart (QImode, scratch)));
15616 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15617 (padded to the size of a word) should be passed in a register. */
15619 static bool
15620 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15622 if (TARGET_AAPCS_BASED)
15623 return must_pass_in_stack_var_size (mode, type);
15624 else
15625 return must_pass_in_stack_var_size_or_pad (mode, type);
15629 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15630 Return true if an argument passed on the stack should be padded upwards,
15631 i.e. if the least-significant byte has useful data.
15632 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15633 aggregate types are placed in the lowest memory address. */
15635 bool
15636 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15638 if (!TARGET_AAPCS_BASED)
15639 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15641 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15642 return false;
15644 return true;
15648 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15649 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15650 register has useful data, and return the opposite if the most
15651 significant byte does. */
15653 bool
15654 arm_pad_reg_upward (machine_mode mode,
15655 tree type, int first ATTRIBUTE_UNUSED)
15657 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15659 /* For AAPCS, small aggregates, small fixed-point types,
15660 and small complex types are always padded upwards. */
15661 if (type)
15663 if ((AGGREGATE_TYPE_P (type)
15664 || TREE_CODE (type) == COMPLEX_TYPE
15665 || FIXED_POINT_TYPE_P (type))
15666 && int_size_in_bytes (type) <= 4)
15667 return true;
15669 else
15671 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15672 && GET_MODE_SIZE (mode) <= 4)
15673 return true;
15677 /* Otherwise, use default padding. */
15678 return !BYTES_BIG_ENDIAN;
15681 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15682 assuming that the address in the base register is word aligned. */
15683 bool
15684 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15686 HOST_WIDE_INT max_offset;
15688 /* Offset must be a multiple of 4 in Thumb mode. */
15689 if (TARGET_THUMB2 && ((offset & 3) != 0))
15690 return false;
15692 if (TARGET_THUMB2)
15693 max_offset = 1020;
15694 else if (TARGET_ARM)
15695 max_offset = 255;
15696 else
15697 return false;
15699 return ((offset <= max_offset) && (offset >= -max_offset));
15702 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15703 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15704 Assumes that the address in the base register RN is word aligned. Pattern
15705 guarantees that both memory accesses use the same base register,
15706 the offsets are constants within the range, and the gap between the offsets is 4.
15707 If preload complete then check that registers are legal. WBACK indicates whether
15708 address is updated. LOAD indicates whether memory access is load or store. */
15709 bool
15710 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15711 bool wback, bool load)
15713 unsigned int t, t2, n;
15715 if (!reload_completed)
15716 return true;
15718 if (!offset_ok_for_ldrd_strd (offset))
15719 return false;
15721 t = REGNO (rt);
15722 t2 = REGNO (rt2);
15723 n = REGNO (rn);
15725 if ((TARGET_THUMB2)
15726 && ((wback && (n == t || n == t2))
15727 || (t == SP_REGNUM)
15728 || (t == PC_REGNUM)
15729 || (t2 == SP_REGNUM)
15730 || (t2 == PC_REGNUM)
15731 || (!load && (n == PC_REGNUM))
15732 || (load && (t == t2))
15733 /* Triggers Cortex-M3 LDRD errata. */
15734 || (!wback && load && fix_cm3_ldrd && (n == t))))
15735 return false;
15737 if ((TARGET_ARM)
15738 && ((wback && (n == t || n == t2))
15739 || (t2 == PC_REGNUM)
15740 || (t % 2 != 0) /* First destination register is not even. */
15741 || (t2 != t + 1)
15742 /* PC can be used as base register (for offset addressing only),
15743 but it is depricated. */
15744 || (n == PC_REGNUM)))
15745 return false;
15747 return true;
15750 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15751 operand MEM's address contains an immediate offset from the base
15752 register and has no side effects, in which case it sets BASE and
15753 OFFSET accordingly. */
15754 static bool
15755 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15757 rtx addr;
15759 gcc_assert (base != NULL && offset != NULL);
15761 /* TODO: Handle more general memory operand patterns, such as
15762 PRE_DEC and PRE_INC. */
15764 if (side_effects_p (mem))
15765 return false;
15767 /* Can't deal with subregs. */
15768 if (GET_CODE (mem) == SUBREG)
15769 return false;
15771 gcc_assert (MEM_P (mem));
15773 *offset = const0_rtx;
15775 addr = XEXP (mem, 0);
15777 /* If addr isn't valid for DImode, then we can't handle it. */
15778 if (!arm_legitimate_address_p (DImode, addr,
15779 reload_in_progress || reload_completed))
15780 return false;
15782 if (REG_P (addr))
15784 *base = addr;
15785 return true;
15787 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15789 *base = XEXP (addr, 0);
15790 *offset = XEXP (addr, 1);
15791 return (REG_P (*base) && CONST_INT_P (*offset));
15794 return false;
15797 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15799 /* Called from a peephole2 to replace two word-size accesses with a
15800 single LDRD/STRD instruction. Returns true iff we can generate a
15801 new instruction sequence. That is, both accesses use the same base
15802 register and the gap between constant offsets is 4. This function
15803 may reorder its operands to match ldrd/strd RTL templates.
15804 OPERANDS are the operands found by the peephole matcher;
15805 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15806 corresponding memory operands. LOAD indicaates whether the access
15807 is load or store. CONST_STORE indicates a store of constant
15808 integer values held in OPERANDS[4,5] and assumes that the pattern
15809 is of length 4 insn, for the purpose of checking dead registers.
15810 COMMUTE indicates that register operands may be reordered. */
15811 bool
15812 gen_operands_ldrd_strd (rtx *operands, bool load,
15813 bool const_store, bool commute)
15815 int nops = 2;
15816 HOST_WIDE_INT offsets[2], offset;
15817 rtx base = NULL_RTX;
15818 rtx cur_base, cur_offset, tmp;
15819 int i, gap;
15820 HARD_REG_SET regset;
15822 gcc_assert (!const_store || !load);
15823 /* Check that the memory references are immediate offsets from the
15824 same base register. Extract the base register, the destination
15825 registers, and the corresponding memory offsets. */
15826 for (i = 0; i < nops; i++)
15828 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15829 return false;
15831 if (i == 0)
15832 base = cur_base;
15833 else if (REGNO (base) != REGNO (cur_base))
15834 return false;
15836 offsets[i] = INTVAL (cur_offset);
15837 if (GET_CODE (operands[i]) == SUBREG)
15839 tmp = SUBREG_REG (operands[i]);
15840 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15841 operands[i] = tmp;
15845 /* Make sure there is no dependency between the individual loads. */
15846 if (load && REGNO (operands[0]) == REGNO (base))
15847 return false; /* RAW */
15849 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15850 return false; /* WAW */
15852 /* If the same input register is used in both stores
15853 when storing different constants, try to find a free register.
15854 For example, the code
15855 mov r0, 0
15856 str r0, [r2]
15857 mov r0, 1
15858 str r0, [r2, #4]
15859 can be transformed into
15860 mov r1, 0
15861 strd r1, r0, [r2]
15862 in Thumb mode assuming that r1 is free. */
15863 if (const_store
15864 && REGNO (operands[0]) == REGNO (operands[1])
15865 && INTVAL (operands[4]) != INTVAL (operands[5]))
15867 if (TARGET_THUMB2)
15869 CLEAR_HARD_REG_SET (regset);
15870 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15871 if (tmp == NULL_RTX)
15872 return false;
15874 /* Use the new register in the first load to ensure that
15875 if the original input register is not dead after peephole,
15876 then it will have the correct constant value. */
15877 operands[0] = tmp;
15879 else if (TARGET_ARM)
15881 return false;
15882 int regno = REGNO (operands[0]);
15883 if (!peep2_reg_dead_p (4, operands[0]))
15885 /* When the input register is even and is not dead after the
15886 pattern, it has to hold the second constant but we cannot
15887 form a legal STRD in ARM mode with this register as the second
15888 register. */
15889 if (regno % 2 == 0)
15890 return false;
15892 /* Is regno-1 free? */
15893 SET_HARD_REG_SET (regset);
15894 CLEAR_HARD_REG_BIT(regset, regno - 1);
15895 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15896 if (tmp == NULL_RTX)
15897 return false;
15899 operands[0] = tmp;
15901 else
15903 /* Find a DImode register. */
15904 CLEAR_HARD_REG_SET (regset);
15905 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15906 if (tmp != NULL_RTX)
15908 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15909 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15911 else
15913 /* Can we use the input register to form a DI register? */
15914 SET_HARD_REG_SET (regset);
15915 CLEAR_HARD_REG_BIT(regset,
15916 regno % 2 == 0 ? regno + 1 : regno - 1);
15917 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15918 if (tmp == NULL_RTX)
15919 return false;
15920 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15924 gcc_assert (operands[0] != NULL_RTX);
15925 gcc_assert (operands[1] != NULL_RTX);
15926 gcc_assert (REGNO (operands[0]) % 2 == 0);
15927 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15931 /* Make sure the instructions are ordered with lower memory access first. */
15932 if (offsets[0] > offsets[1])
15934 gap = offsets[0] - offsets[1];
15935 offset = offsets[1];
15937 /* Swap the instructions such that lower memory is accessed first. */
15938 SWAP_RTX (operands[0], operands[1]);
15939 SWAP_RTX (operands[2], operands[3]);
15940 if (const_store)
15941 SWAP_RTX (operands[4], operands[5]);
15943 else
15945 gap = offsets[1] - offsets[0];
15946 offset = offsets[0];
15949 /* Make sure accesses are to consecutive memory locations. */
15950 if (gap != 4)
15951 return false;
15953 /* Make sure we generate legal instructions. */
15954 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15955 false, load))
15956 return true;
15958 /* In Thumb state, where registers are almost unconstrained, there
15959 is little hope to fix it. */
15960 if (TARGET_THUMB2)
15961 return false;
15963 if (load && commute)
15965 /* Try reordering registers. */
15966 SWAP_RTX (operands[0], operands[1]);
15967 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15968 false, load))
15969 return true;
15972 if (const_store)
15974 /* If input registers are dead after this pattern, they can be
15975 reordered or replaced by other registers that are free in the
15976 current pattern. */
15977 if (!peep2_reg_dead_p (4, operands[0])
15978 || !peep2_reg_dead_p (4, operands[1]))
15979 return false;
15981 /* Try to reorder the input registers. */
15982 /* For example, the code
15983 mov r0, 0
15984 mov r1, 1
15985 str r1, [r2]
15986 str r0, [r2, #4]
15987 can be transformed into
15988 mov r1, 0
15989 mov r0, 1
15990 strd r0, [r2]
15992 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15993 false, false))
15995 SWAP_RTX (operands[0], operands[1]);
15996 return true;
15999 /* Try to find a free DI register. */
16000 CLEAR_HARD_REG_SET (regset);
16001 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16002 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16003 while (true)
16005 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16006 if (tmp == NULL_RTX)
16007 return false;
16009 /* DREG must be an even-numbered register in DImode.
16010 Split it into SI registers. */
16011 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16012 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16013 gcc_assert (operands[0] != NULL_RTX);
16014 gcc_assert (operands[1] != NULL_RTX);
16015 gcc_assert (REGNO (operands[0]) % 2 == 0);
16016 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16018 return (operands_ok_ldrd_strd (operands[0], operands[1],
16019 base, offset,
16020 false, load));
16024 return false;
16026 #undef SWAP_RTX
16031 /* Print a symbolic form of X to the debug file, F. */
16032 static void
16033 arm_print_value (FILE *f, rtx x)
16035 switch (GET_CODE (x))
16037 case CONST_INT:
16038 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16039 return;
16041 case CONST_DOUBLE:
16042 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16043 return;
16045 case CONST_VECTOR:
16047 int i;
16049 fprintf (f, "<");
16050 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16052 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16053 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16054 fputc (',', f);
16056 fprintf (f, ">");
16058 return;
16060 case CONST_STRING:
16061 fprintf (f, "\"%s\"", XSTR (x, 0));
16062 return;
16064 case SYMBOL_REF:
16065 fprintf (f, "`%s'", XSTR (x, 0));
16066 return;
16068 case LABEL_REF:
16069 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16070 return;
16072 case CONST:
16073 arm_print_value (f, XEXP (x, 0));
16074 return;
16076 case PLUS:
16077 arm_print_value (f, XEXP (x, 0));
16078 fprintf (f, "+");
16079 arm_print_value (f, XEXP (x, 1));
16080 return;
16082 case PC:
16083 fprintf (f, "pc");
16084 return;
16086 default:
16087 fprintf (f, "????");
16088 return;
16092 /* Routines for manipulation of the constant pool. */
16094 /* Arm instructions cannot load a large constant directly into a
16095 register; they have to come from a pc relative load. The constant
16096 must therefore be placed in the addressable range of the pc
16097 relative load. Depending on the precise pc relative load
16098 instruction the range is somewhere between 256 bytes and 4k. This
16099 means that we often have to dump a constant inside a function, and
16100 generate code to branch around it.
16102 It is important to minimize this, since the branches will slow
16103 things down and make the code larger.
16105 Normally we can hide the table after an existing unconditional
16106 branch so that there is no interruption of the flow, but in the
16107 worst case the code looks like this:
16109 ldr rn, L1
16111 b L2
16112 align
16113 L1: .long value
16117 ldr rn, L3
16119 b L4
16120 align
16121 L3: .long value
16125 We fix this by performing a scan after scheduling, which notices
16126 which instructions need to have their operands fetched from the
16127 constant table and builds the table.
16129 The algorithm starts by building a table of all the constants that
16130 need fixing up and all the natural barriers in the function (places
16131 where a constant table can be dropped without breaking the flow).
16132 For each fixup we note how far the pc-relative replacement will be
16133 able to reach and the offset of the instruction into the function.
16135 Having built the table we then group the fixes together to form
16136 tables that are as large as possible (subject to addressing
16137 constraints) and emit each table of constants after the last
16138 barrier that is within range of all the instructions in the group.
16139 If a group does not contain a barrier, then we forcibly create one
16140 by inserting a jump instruction into the flow. Once the table has
16141 been inserted, the insns are then modified to reference the
16142 relevant entry in the pool.
16144 Possible enhancements to the algorithm (not implemented) are:
16146 1) For some processors and object formats, there may be benefit in
16147 aligning the pools to the start of cache lines; this alignment
16148 would need to be taken into account when calculating addressability
16149 of a pool. */
16151 /* These typedefs are located at the start of this file, so that
16152 they can be used in the prototypes there. This comment is to
16153 remind readers of that fact so that the following structures
16154 can be understood more easily.
16156 typedef struct minipool_node Mnode;
16157 typedef struct minipool_fixup Mfix; */
16159 struct minipool_node
16161 /* Doubly linked chain of entries. */
16162 Mnode * next;
16163 Mnode * prev;
16164 /* The maximum offset into the code that this entry can be placed. While
16165 pushing fixes for forward references, all entries are sorted in order
16166 of increasing max_address. */
16167 HOST_WIDE_INT max_address;
16168 /* Similarly for an entry inserted for a backwards ref. */
16169 HOST_WIDE_INT min_address;
16170 /* The number of fixes referencing this entry. This can become zero
16171 if we "unpush" an entry. In this case we ignore the entry when we
16172 come to emit the code. */
16173 int refcount;
16174 /* The offset from the start of the minipool. */
16175 HOST_WIDE_INT offset;
16176 /* The value in table. */
16177 rtx value;
16178 /* The mode of value. */
16179 machine_mode mode;
16180 /* The size of the value. With iWMMXt enabled
16181 sizes > 4 also imply an alignment of 8-bytes. */
16182 int fix_size;
16185 struct minipool_fixup
16187 Mfix * next;
16188 rtx_insn * insn;
16189 HOST_WIDE_INT address;
16190 rtx * loc;
16191 machine_mode mode;
16192 int fix_size;
16193 rtx value;
16194 Mnode * minipool;
16195 HOST_WIDE_INT forwards;
16196 HOST_WIDE_INT backwards;
16199 /* Fixes less than a word need padding out to a word boundary. */
16200 #define MINIPOOL_FIX_SIZE(mode) \
16201 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16203 static Mnode * minipool_vector_head;
16204 static Mnode * minipool_vector_tail;
16205 static rtx_code_label *minipool_vector_label;
16206 static int minipool_pad;
16208 /* The linked list of all minipool fixes required for this function. */
16209 Mfix * minipool_fix_head;
16210 Mfix * minipool_fix_tail;
16211 /* The fix entry for the current minipool, once it has been placed. */
16212 Mfix * minipool_barrier;
16214 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16215 #define JUMP_TABLES_IN_TEXT_SECTION 0
16216 #endif
16218 static HOST_WIDE_INT
16219 get_jump_table_size (rtx_jump_table_data *insn)
16221 /* ADDR_VECs only take room if read-only data does into the text
16222 section. */
16223 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16225 rtx body = PATTERN (insn);
16226 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16227 HOST_WIDE_INT size;
16228 HOST_WIDE_INT modesize;
16230 modesize = GET_MODE_SIZE (GET_MODE (body));
16231 size = modesize * XVECLEN (body, elt);
16232 switch (modesize)
16234 case 1:
16235 /* Round up size of TBB table to a halfword boundary. */
16236 size = (size + 1) & ~(HOST_WIDE_INT)1;
16237 break;
16238 case 2:
16239 /* No padding necessary for TBH. */
16240 break;
16241 case 4:
16242 /* Add two bytes for alignment on Thumb. */
16243 if (TARGET_THUMB)
16244 size += 2;
16245 break;
16246 default:
16247 gcc_unreachable ();
16249 return size;
16252 return 0;
16255 /* Return the maximum amount of padding that will be inserted before
16256 label LABEL. */
16258 static HOST_WIDE_INT
16259 get_label_padding (rtx label)
16261 HOST_WIDE_INT align, min_insn_size;
16263 align = 1 << label_to_alignment (label);
16264 min_insn_size = TARGET_THUMB ? 2 : 4;
16265 return align > min_insn_size ? align - min_insn_size : 0;
16268 /* Move a minipool fix MP from its current location to before MAX_MP.
16269 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16270 constraints may need updating. */
16271 static Mnode *
16272 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16273 HOST_WIDE_INT max_address)
16275 /* The code below assumes these are different. */
16276 gcc_assert (mp != max_mp);
16278 if (max_mp == NULL)
16280 if (max_address < mp->max_address)
16281 mp->max_address = max_address;
16283 else
16285 if (max_address > max_mp->max_address - mp->fix_size)
16286 mp->max_address = max_mp->max_address - mp->fix_size;
16287 else
16288 mp->max_address = max_address;
16290 /* Unlink MP from its current position. Since max_mp is non-null,
16291 mp->prev must be non-null. */
16292 mp->prev->next = mp->next;
16293 if (mp->next != NULL)
16294 mp->next->prev = mp->prev;
16295 else
16296 minipool_vector_tail = mp->prev;
16298 /* Re-insert it before MAX_MP. */
16299 mp->next = max_mp;
16300 mp->prev = max_mp->prev;
16301 max_mp->prev = mp;
16303 if (mp->prev != NULL)
16304 mp->prev->next = mp;
16305 else
16306 minipool_vector_head = mp;
16309 /* Save the new entry. */
16310 max_mp = mp;
16312 /* Scan over the preceding entries and adjust their addresses as
16313 required. */
16314 while (mp->prev != NULL
16315 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16317 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16318 mp = mp->prev;
16321 return max_mp;
16324 /* Add a constant to the minipool for a forward reference. Returns the
16325 node added or NULL if the constant will not fit in this pool. */
16326 static Mnode *
16327 add_minipool_forward_ref (Mfix *fix)
16329 /* If set, max_mp is the first pool_entry that has a lower
16330 constraint than the one we are trying to add. */
16331 Mnode * max_mp = NULL;
16332 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16333 Mnode * mp;
16335 /* If the minipool starts before the end of FIX->INSN then this FIX
16336 can not be placed into the current pool. Furthermore, adding the
16337 new constant pool entry may cause the pool to start FIX_SIZE bytes
16338 earlier. */
16339 if (minipool_vector_head &&
16340 (fix->address + get_attr_length (fix->insn)
16341 >= minipool_vector_head->max_address - fix->fix_size))
16342 return NULL;
16344 /* Scan the pool to see if a constant with the same value has
16345 already been added. While we are doing this, also note the
16346 location where we must insert the constant if it doesn't already
16347 exist. */
16348 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16350 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16351 && fix->mode == mp->mode
16352 && (!LABEL_P (fix->value)
16353 || (CODE_LABEL_NUMBER (fix->value)
16354 == CODE_LABEL_NUMBER (mp->value)))
16355 && rtx_equal_p (fix->value, mp->value))
16357 /* More than one fix references this entry. */
16358 mp->refcount++;
16359 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16362 /* Note the insertion point if necessary. */
16363 if (max_mp == NULL
16364 && mp->max_address > max_address)
16365 max_mp = mp;
16367 /* If we are inserting an 8-bytes aligned quantity and
16368 we have not already found an insertion point, then
16369 make sure that all such 8-byte aligned quantities are
16370 placed at the start of the pool. */
16371 if (ARM_DOUBLEWORD_ALIGN
16372 && max_mp == NULL
16373 && fix->fix_size >= 8
16374 && mp->fix_size < 8)
16376 max_mp = mp;
16377 max_address = mp->max_address;
16381 /* The value is not currently in the minipool, so we need to create
16382 a new entry for it. If MAX_MP is NULL, the entry will be put on
16383 the end of the list since the placement is less constrained than
16384 any existing entry. Otherwise, we insert the new fix before
16385 MAX_MP and, if necessary, adjust the constraints on the other
16386 entries. */
16387 mp = XNEW (Mnode);
16388 mp->fix_size = fix->fix_size;
16389 mp->mode = fix->mode;
16390 mp->value = fix->value;
16391 mp->refcount = 1;
16392 /* Not yet required for a backwards ref. */
16393 mp->min_address = -65536;
16395 if (max_mp == NULL)
16397 mp->max_address = max_address;
16398 mp->next = NULL;
16399 mp->prev = minipool_vector_tail;
16401 if (mp->prev == NULL)
16403 minipool_vector_head = mp;
16404 minipool_vector_label = gen_label_rtx ();
16406 else
16407 mp->prev->next = mp;
16409 minipool_vector_tail = mp;
16411 else
16413 if (max_address > max_mp->max_address - mp->fix_size)
16414 mp->max_address = max_mp->max_address - mp->fix_size;
16415 else
16416 mp->max_address = max_address;
16418 mp->next = max_mp;
16419 mp->prev = max_mp->prev;
16420 max_mp->prev = mp;
16421 if (mp->prev != NULL)
16422 mp->prev->next = mp;
16423 else
16424 minipool_vector_head = mp;
16427 /* Save the new entry. */
16428 max_mp = mp;
16430 /* Scan over the preceding entries and adjust their addresses as
16431 required. */
16432 while (mp->prev != NULL
16433 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16435 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16436 mp = mp->prev;
16439 return max_mp;
16442 static Mnode *
16443 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16444 HOST_WIDE_INT min_address)
16446 HOST_WIDE_INT offset;
16448 /* The code below assumes these are different. */
16449 gcc_assert (mp != min_mp);
16451 if (min_mp == NULL)
16453 if (min_address > mp->min_address)
16454 mp->min_address = min_address;
16456 else
16458 /* We will adjust this below if it is too loose. */
16459 mp->min_address = min_address;
16461 /* Unlink MP from its current position. Since min_mp is non-null,
16462 mp->next must be non-null. */
16463 mp->next->prev = mp->prev;
16464 if (mp->prev != NULL)
16465 mp->prev->next = mp->next;
16466 else
16467 minipool_vector_head = mp->next;
16469 /* Reinsert it after MIN_MP. */
16470 mp->prev = min_mp;
16471 mp->next = min_mp->next;
16472 min_mp->next = mp;
16473 if (mp->next != NULL)
16474 mp->next->prev = mp;
16475 else
16476 minipool_vector_tail = mp;
16479 min_mp = mp;
16481 offset = 0;
16482 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16484 mp->offset = offset;
16485 if (mp->refcount > 0)
16486 offset += mp->fix_size;
16488 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16489 mp->next->min_address = mp->min_address + mp->fix_size;
16492 return min_mp;
16495 /* Add a constant to the minipool for a backward reference. Returns the
16496 node added or NULL if the constant will not fit in this pool.
16498 Note that the code for insertion for a backwards reference can be
16499 somewhat confusing because the calculated offsets for each fix do
16500 not take into account the size of the pool (which is still under
16501 construction. */
16502 static Mnode *
16503 add_minipool_backward_ref (Mfix *fix)
16505 /* If set, min_mp is the last pool_entry that has a lower constraint
16506 than the one we are trying to add. */
16507 Mnode *min_mp = NULL;
16508 /* This can be negative, since it is only a constraint. */
16509 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16510 Mnode *mp;
16512 /* If we can't reach the current pool from this insn, or if we can't
16513 insert this entry at the end of the pool without pushing other
16514 fixes out of range, then we don't try. This ensures that we
16515 can't fail later on. */
16516 if (min_address >= minipool_barrier->address
16517 || (minipool_vector_tail->min_address + fix->fix_size
16518 >= minipool_barrier->address))
16519 return NULL;
16521 /* Scan the pool to see if a constant with the same value has
16522 already been added. While we are doing this, also note the
16523 location where we must insert the constant if it doesn't already
16524 exist. */
16525 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16527 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16528 && fix->mode == mp->mode
16529 && (!LABEL_P (fix->value)
16530 || (CODE_LABEL_NUMBER (fix->value)
16531 == CODE_LABEL_NUMBER (mp->value)))
16532 && rtx_equal_p (fix->value, mp->value)
16533 /* Check that there is enough slack to move this entry to the
16534 end of the table (this is conservative). */
16535 && (mp->max_address
16536 > (minipool_barrier->address
16537 + minipool_vector_tail->offset
16538 + minipool_vector_tail->fix_size)))
16540 mp->refcount++;
16541 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16544 if (min_mp != NULL)
16545 mp->min_address += fix->fix_size;
16546 else
16548 /* Note the insertion point if necessary. */
16549 if (mp->min_address < min_address)
16551 /* For now, we do not allow the insertion of 8-byte alignment
16552 requiring nodes anywhere but at the start of the pool. */
16553 if (ARM_DOUBLEWORD_ALIGN
16554 && fix->fix_size >= 8 && mp->fix_size < 8)
16555 return NULL;
16556 else
16557 min_mp = mp;
16559 else if (mp->max_address
16560 < minipool_barrier->address + mp->offset + fix->fix_size)
16562 /* Inserting before this entry would push the fix beyond
16563 its maximum address (which can happen if we have
16564 re-located a forwards fix); force the new fix to come
16565 after it. */
16566 if (ARM_DOUBLEWORD_ALIGN
16567 && fix->fix_size >= 8 && mp->fix_size < 8)
16568 return NULL;
16569 else
16571 min_mp = mp;
16572 min_address = mp->min_address + fix->fix_size;
16575 /* Do not insert a non-8-byte aligned quantity before 8-byte
16576 aligned quantities. */
16577 else if (ARM_DOUBLEWORD_ALIGN
16578 && fix->fix_size < 8
16579 && mp->fix_size >= 8)
16581 min_mp = mp;
16582 min_address = mp->min_address + fix->fix_size;
16587 /* We need to create a new entry. */
16588 mp = XNEW (Mnode);
16589 mp->fix_size = fix->fix_size;
16590 mp->mode = fix->mode;
16591 mp->value = fix->value;
16592 mp->refcount = 1;
16593 mp->max_address = minipool_barrier->address + 65536;
16595 mp->min_address = min_address;
16597 if (min_mp == NULL)
16599 mp->prev = NULL;
16600 mp->next = minipool_vector_head;
16602 if (mp->next == NULL)
16604 minipool_vector_tail = mp;
16605 minipool_vector_label = gen_label_rtx ();
16607 else
16608 mp->next->prev = mp;
16610 minipool_vector_head = mp;
16612 else
16614 mp->next = min_mp->next;
16615 mp->prev = min_mp;
16616 min_mp->next = mp;
16618 if (mp->next != NULL)
16619 mp->next->prev = mp;
16620 else
16621 minipool_vector_tail = mp;
16624 /* Save the new entry. */
16625 min_mp = mp;
16627 if (mp->prev)
16628 mp = mp->prev;
16629 else
16630 mp->offset = 0;
16632 /* Scan over the following entries and adjust their offsets. */
16633 while (mp->next != NULL)
16635 if (mp->next->min_address < mp->min_address + mp->fix_size)
16636 mp->next->min_address = mp->min_address + mp->fix_size;
16638 if (mp->refcount)
16639 mp->next->offset = mp->offset + mp->fix_size;
16640 else
16641 mp->next->offset = mp->offset;
16643 mp = mp->next;
16646 return min_mp;
16649 static void
16650 assign_minipool_offsets (Mfix *barrier)
16652 HOST_WIDE_INT offset = 0;
16653 Mnode *mp;
16655 minipool_barrier = barrier;
16657 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16659 mp->offset = offset;
16661 if (mp->refcount > 0)
16662 offset += mp->fix_size;
16666 /* Output the literal table */
16667 static void
16668 dump_minipool (rtx_insn *scan)
16670 Mnode * mp;
16671 Mnode * nmp;
16672 int align64 = 0;
16674 if (ARM_DOUBLEWORD_ALIGN)
16675 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16676 if (mp->refcount > 0 && mp->fix_size >= 8)
16678 align64 = 1;
16679 break;
16682 if (dump_file)
16683 fprintf (dump_file,
16684 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16685 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16687 scan = emit_label_after (gen_label_rtx (), scan);
16688 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16689 scan = emit_label_after (minipool_vector_label, scan);
16691 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16693 if (mp->refcount > 0)
16695 if (dump_file)
16697 fprintf (dump_file,
16698 ";; Offset %u, min %ld, max %ld ",
16699 (unsigned) mp->offset, (unsigned long) mp->min_address,
16700 (unsigned long) mp->max_address);
16701 arm_print_value (dump_file, mp->value);
16702 fputc ('\n', dump_file);
16705 switch (mp->fix_size)
16707 #ifdef HAVE_consttable_1
16708 case 1:
16709 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16710 break;
16712 #endif
16713 #ifdef HAVE_consttable_2
16714 case 2:
16715 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16716 break;
16718 #endif
16719 #ifdef HAVE_consttable_4
16720 case 4:
16721 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16722 break;
16724 #endif
16725 #ifdef HAVE_consttable_8
16726 case 8:
16727 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16728 break;
16730 #endif
16731 #ifdef HAVE_consttable_16
16732 case 16:
16733 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16734 break;
16736 #endif
16737 default:
16738 gcc_unreachable ();
16742 nmp = mp->next;
16743 free (mp);
16746 minipool_vector_head = minipool_vector_tail = NULL;
16747 scan = emit_insn_after (gen_consttable_end (), scan);
16748 scan = emit_barrier_after (scan);
16751 /* Return the cost of forcibly inserting a barrier after INSN. */
16752 static int
16753 arm_barrier_cost (rtx insn)
16755 /* Basing the location of the pool on the loop depth is preferable,
16756 but at the moment, the basic block information seems to be
16757 corrupt by this stage of the compilation. */
16758 int base_cost = 50;
16759 rtx next = next_nonnote_insn (insn);
16761 if (next != NULL && LABEL_P (next))
16762 base_cost -= 20;
16764 switch (GET_CODE (insn))
16766 case CODE_LABEL:
16767 /* It will always be better to place the table before the label, rather
16768 than after it. */
16769 return 50;
16771 case INSN:
16772 case CALL_INSN:
16773 return base_cost;
16775 case JUMP_INSN:
16776 return base_cost - 10;
16778 default:
16779 return base_cost + 10;
16783 /* Find the best place in the insn stream in the range
16784 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16785 Create the barrier by inserting a jump and add a new fix entry for
16786 it. */
16787 static Mfix *
16788 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16790 HOST_WIDE_INT count = 0;
16791 rtx_barrier *barrier;
16792 rtx_insn *from = fix->insn;
16793 /* The instruction after which we will insert the jump. */
16794 rtx_insn *selected = NULL;
16795 int selected_cost;
16796 /* The address at which the jump instruction will be placed. */
16797 HOST_WIDE_INT selected_address;
16798 Mfix * new_fix;
16799 HOST_WIDE_INT max_count = max_address - fix->address;
16800 rtx_code_label *label = gen_label_rtx ();
16802 selected_cost = arm_barrier_cost (from);
16803 selected_address = fix->address;
16805 while (from && count < max_count)
16807 rtx_jump_table_data *tmp;
16808 int new_cost;
16810 /* This code shouldn't have been called if there was a natural barrier
16811 within range. */
16812 gcc_assert (!BARRIER_P (from));
16814 /* Count the length of this insn. This must stay in sync with the
16815 code that pushes minipool fixes. */
16816 if (LABEL_P (from))
16817 count += get_label_padding (from);
16818 else
16819 count += get_attr_length (from);
16821 /* If there is a jump table, add its length. */
16822 if (tablejump_p (from, NULL, &tmp))
16824 count += get_jump_table_size (tmp);
16826 /* Jump tables aren't in a basic block, so base the cost on
16827 the dispatch insn. If we select this location, we will
16828 still put the pool after the table. */
16829 new_cost = arm_barrier_cost (from);
16831 if (count < max_count
16832 && (!selected || new_cost <= selected_cost))
16834 selected = tmp;
16835 selected_cost = new_cost;
16836 selected_address = fix->address + count;
16839 /* Continue after the dispatch table. */
16840 from = NEXT_INSN (tmp);
16841 continue;
16844 new_cost = arm_barrier_cost (from);
16846 if (count < max_count
16847 && (!selected || new_cost <= selected_cost))
16849 selected = from;
16850 selected_cost = new_cost;
16851 selected_address = fix->address + count;
16854 from = NEXT_INSN (from);
16857 /* Make sure that we found a place to insert the jump. */
16858 gcc_assert (selected);
16860 /* Make sure we do not split a call and its corresponding
16861 CALL_ARG_LOCATION note. */
16862 if (CALL_P (selected))
16864 rtx_insn *next = NEXT_INSN (selected);
16865 if (next && NOTE_P (next)
16866 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16867 selected = next;
16870 /* Create a new JUMP_INSN that branches around a barrier. */
16871 from = emit_jump_insn_after (gen_jump (label), selected);
16872 JUMP_LABEL (from) = label;
16873 barrier = emit_barrier_after (from);
16874 emit_label_after (label, barrier);
16876 /* Create a minipool barrier entry for the new barrier. */
16877 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16878 new_fix->insn = barrier;
16879 new_fix->address = selected_address;
16880 new_fix->next = fix->next;
16881 fix->next = new_fix;
16883 return new_fix;
16886 /* Record that there is a natural barrier in the insn stream at
16887 ADDRESS. */
16888 static void
16889 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16891 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16893 fix->insn = insn;
16894 fix->address = address;
16896 fix->next = NULL;
16897 if (minipool_fix_head != NULL)
16898 minipool_fix_tail->next = fix;
16899 else
16900 minipool_fix_head = fix;
16902 minipool_fix_tail = fix;
16905 /* Record INSN, which will need fixing up to load a value from the
16906 minipool. ADDRESS is the offset of the insn since the start of the
16907 function; LOC is a pointer to the part of the insn which requires
16908 fixing; VALUE is the constant that must be loaded, which is of type
16909 MODE. */
16910 static void
16911 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16912 machine_mode mode, rtx value)
16914 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16916 fix->insn = insn;
16917 fix->address = address;
16918 fix->loc = loc;
16919 fix->mode = mode;
16920 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16921 fix->value = value;
16922 fix->forwards = get_attr_pool_range (insn);
16923 fix->backwards = get_attr_neg_pool_range (insn);
16924 fix->minipool = NULL;
16926 /* If an insn doesn't have a range defined for it, then it isn't
16927 expecting to be reworked by this code. Better to stop now than
16928 to generate duff assembly code. */
16929 gcc_assert (fix->forwards || fix->backwards);
16931 /* If an entry requires 8-byte alignment then assume all constant pools
16932 require 4 bytes of padding. Trying to do this later on a per-pool
16933 basis is awkward because existing pool entries have to be modified. */
16934 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16935 minipool_pad = 4;
16937 if (dump_file)
16939 fprintf (dump_file,
16940 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16941 GET_MODE_NAME (mode),
16942 INSN_UID (insn), (unsigned long) address,
16943 -1 * (long)fix->backwards, (long)fix->forwards);
16944 arm_print_value (dump_file, fix->value);
16945 fprintf (dump_file, "\n");
16948 /* Add it to the chain of fixes. */
16949 fix->next = NULL;
16951 if (minipool_fix_head != NULL)
16952 minipool_fix_tail->next = fix;
16953 else
16954 minipool_fix_head = fix;
16956 minipool_fix_tail = fix;
16959 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16960 Returns the number of insns needed, or 99 if we always want to synthesize
16961 the value. */
16963 arm_max_const_double_inline_cost ()
16965 /* Let the value get synthesized to avoid the use of literal pools. */
16966 if (arm_disable_literal_pool)
16967 return 99;
16969 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16972 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16973 Returns the number of insns needed, or 99 if we don't know how to
16974 do it. */
16976 arm_const_double_inline_cost (rtx val)
16978 rtx lowpart, highpart;
16979 machine_mode mode;
16981 mode = GET_MODE (val);
16983 if (mode == VOIDmode)
16984 mode = DImode;
16986 gcc_assert (GET_MODE_SIZE (mode) == 8);
16988 lowpart = gen_lowpart (SImode, val);
16989 highpart = gen_highpart_mode (SImode, mode, val);
16991 gcc_assert (CONST_INT_P (lowpart));
16992 gcc_assert (CONST_INT_P (highpart));
16994 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16995 NULL_RTX, NULL_RTX, 0, 0)
16996 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16997 NULL_RTX, NULL_RTX, 0, 0));
17000 /* Cost of loading a SImode constant. */
17001 static inline int
17002 arm_const_inline_cost (enum rtx_code code, rtx val)
17004 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17005 NULL_RTX, NULL_RTX, 1, 0);
17008 /* Return true if it is worthwhile to split a 64-bit constant into two
17009 32-bit operations. This is the case if optimizing for size, or
17010 if we have load delay slots, or if one 32-bit part can be done with
17011 a single data operation. */
17012 bool
17013 arm_const_double_by_parts (rtx val)
17015 machine_mode mode = GET_MODE (val);
17016 rtx part;
17018 if (optimize_size || arm_ld_sched)
17019 return true;
17021 if (mode == VOIDmode)
17022 mode = DImode;
17024 part = gen_highpart_mode (SImode, mode, val);
17026 gcc_assert (CONST_INT_P (part));
17028 if (const_ok_for_arm (INTVAL (part))
17029 || const_ok_for_arm (~INTVAL (part)))
17030 return true;
17032 part = gen_lowpart (SImode, val);
17034 gcc_assert (CONST_INT_P (part));
17036 if (const_ok_for_arm (INTVAL (part))
17037 || const_ok_for_arm (~INTVAL (part)))
17038 return true;
17040 return false;
17043 /* Return true if it is possible to inline both the high and low parts
17044 of a 64-bit constant into 32-bit data processing instructions. */
17045 bool
17046 arm_const_double_by_immediates (rtx val)
17048 machine_mode mode = GET_MODE (val);
17049 rtx part;
17051 if (mode == VOIDmode)
17052 mode = DImode;
17054 part = gen_highpart_mode (SImode, mode, val);
17056 gcc_assert (CONST_INT_P (part));
17058 if (!const_ok_for_arm (INTVAL (part)))
17059 return false;
17061 part = gen_lowpart (SImode, val);
17063 gcc_assert (CONST_INT_P (part));
17065 if (!const_ok_for_arm (INTVAL (part)))
17066 return false;
17068 return true;
17071 /* Scan INSN and note any of its operands that need fixing.
17072 If DO_PUSHES is false we do not actually push any of the fixups
17073 needed. */
17074 static void
17075 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17077 int opno;
17079 extract_constrain_insn (insn);
17081 if (recog_data.n_alternatives == 0)
17082 return;
17084 /* Fill in recog_op_alt with information about the constraints of
17085 this insn. */
17086 preprocess_constraints (insn);
17088 const operand_alternative *op_alt = which_op_alt ();
17089 for (opno = 0; opno < recog_data.n_operands; opno++)
17091 /* Things we need to fix can only occur in inputs. */
17092 if (recog_data.operand_type[opno] != OP_IN)
17093 continue;
17095 /* If this alternative is a memory reference, then any mention
17096 of constants in this alternative is really to fool reload
17097 into allowing us to accept one there. We need to fix them up
17098 now so that we output the right code. */
17099 if (op_alt[opno].memory_ok)
17101 rtx op = recog_data.operand[opno];
17103 if (CONSTANT_P (op))
17105 if (do_pushes)
17106 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17107 recog_data.operand_mode[opno], op);
17109 else if (MEM_P (op)
17110 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17111 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17113 if (do_pushes)
17115 rtx cop = avoid_constant_pool_reference (op);
17117 /* Casting the address of something to a mode narrower
17118 than a word can cause avoid_constant_pool_reference()
17119 to return the pool reference itself. That's no good to
17120 us here. Lets just hope that we can use the
17121 constant pool value directly. */
17122 if (op == cop)
17123 cop = get_pool_constant (XEXP (op, 0));
17125 push_minipool_fix (insn, address,
17126 recog_data.operand_loc[opno],
17127 recog_data.operand_mode[opno], cop);
17134 return;
17137 /* Rewrite move insn into subtract of 0 if the condition codes will
17138 be useful in next conditional jump insn. */
17140 static void
17141 thumb1_reorg (void)
17143 basic_block bb;
17145 FOR_EACH_BB_FN (bb, cfun)
17147 rtx dest, src;
17148 rtx pat, op0, set = NULL;
17149 rtx_insn *prev, *insn = BB_END (bb);
17150 bool insn_clobbered = false;
17152 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17153 insn = PREV_INSN (insn);
17155 /* Find the last cbranchsi4_insn in basic block BB. */
17156 if (insn == BB_HEAD (bb)
17157 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17158 continue;
17160 /* Get the register with which we are comparing. */
17161 pat = PATTERN (insn);
17162 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17164 /* Find the first flag setting insn before INSN in basic block BB. */
17165 gcc_assert (insn != BB_HEAD (bb));
17166 for (prev = PREV_INSN (insn);
17167 (!insn_clobbered
17168 && prev != BB_HEAD (bb)
17169 && (NOTE_P (prev)
17170 || DEBUG_INSN_P (prev)
17171 || ((set = single_set (prev)) != NULL
17172 && get_attr_conds (prev) == CONDS_NOCOND)));
17173 prev = PREV_INSN (prev))
17175 if (reg_set_p (op0, prev))
17176 insn_clobbered = true;
17179 /* Skip if op0 is clobbered by insn other than prev. */
17180 if (insn_clobbered)
17181 continue;
17183 if (!set)
17184 continue;
17186 dest = SET_DEST (set);
17187 src = SET_SRC (set);
17188 if (!low_register_operand (dest, SImode)
17189 || !low_register_operand (src, SImode))
17190 continue;
17192 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17193 in INSN. Both src and dest of the move insn are checked. */
17194 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17196 dest = copy_rtx (dest);
17197 src = copy_rtx (src);
17198 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17199 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17200 INSN_CODE (prev) = -1;
17201 /* Set test register in INSN to dest. */
17202 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17203 INSN_CODE (insn) = -1;
17208 /* Convert instructions to their cc-clobbering variant if possible, since
17209 that allows us to use smaller encodings. */
17211 static void
17212 thumb2_reorg (void)
17214 basic_block bb;
17215 regset_head live;
17217 INIT_REG_SET (&live);
17219 /* We are freeing block_for_insn in the toplev to keep compatibility
17220 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17221 compute_bb_for_insn ();
17222 df_analyze ();
17224 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17226 FOR_EACH_BB_FN (bb, cfun)
17228 if (current_tune->disparage_flag_setting_t16_encodings
17229 && optimize_bb_for_speed_p (bb))
17230 continue;
17232 rtx_insn *insn;
17233 Convert_Action action = SKIP;
17234 Convert_Action action_for_partial_flag_setting
17235 = (current_tune->disparage_partial_flag_setting_t16_encodings
17236 && optimize_bb_for_speed_p (bb))
17237 ? SKIP : CONV;
17239 COPY_REG_SET (&live, DF_LR_OUT (bb));
17240 df_simulate_initialize_backwards (bb, &live);
17241 FOR_BB_INSNS_REVERSE (bb, insn)
17243 if (NONJUMP_INSN_P (insn)
17244 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17245 && GET_CODE (PATTERN (insn)) == SET)
17247 action = SKIP;
17248 rtx pat = PATTERN (insn);
17249 rtx dst = XEXP (pat, 0);
17250 rtx src = XEXP (pat, 1);
17251 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17253 if (!OBJECT_P (src))
17254 op0 = XEXP (src, 0);
17256 if (BINARY_P (src))
17257 op1 = XEXP (src, 1);
17259 if (low_register_operand (dst, SImode))
17261 switch (GET_CODE (src))
17263 case PLUS:
17264 /* Adding two registers and storing the result
17265 in the first source is already a 16-bit
17266 operation. */
17267 if (rtx_equal_p (dst, op0)
17268 && register_operand (op1, SImode))
17269 break;
17271 if (low_register_operand (op0, SImode))
17273 /* ADDS <Rd>,<Rn>,<Rm> */
17274 if (low_register_operand (op1, SImode))
17275 action = CONV;
17276 /* ADDS <Rdn>,#<imm8> */
17277 /* SUBS <Rdn>,#<imm8> */
17278 else if (rtx_equal_p (dst, op0)
17279 && CONST_INT_P (op1)
17280 && IN_RANGE (INTVAL (op1), -255, 255))
17281 action = CONV;
17282 /* ADDS <Rd>,<Rn>,#<imm3> */
17283 /* SUBS <Rd>,<Rn>,#<imm3> */
17284 else if (CONST_INT_P (op1)
17285 && IN_RANGE (INTVAL (op1), -7, 7))
17286 action = CONV;
17288 /* ADCS <Rd>, <Rn> */
17289 else if (GET_CODE (XEXP (src, 0)) == PLUS
17290 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17291 && low_register_operand (XEXP (XEXP (src, 0), 1),
17292 SImode)
17293 && COMPARISON_P (op1)
17294 && cc_register (XEXP (op1, 0), VOIDmode)
17295 && maybe_get_arm_condition_code (op1) == ARM_CS
17296 && XEXP (op1, 1) == const0_rtx)
17297 action = CONV;
17298 break;
17300 case MINUS:
17301 /* RSBS <Rd>,<Rn>,#0
17302 Not handled here: see NEG below. */
17303 /* SUBS <Rd>,<Rn>,#<imm3>
17304 SUBS <Rdn>,#<imm8>
17305 Not handled here: see PLUS above. */
17306 /* SUBS <Rd>,<Rn>,<Rm> */
17307 if (low_register_operand (op0, SImode)
17308 && low_register_operand (op1, SImode))
17309 action = CONV;
17310 break;
17312 case MULT:
17313 /* MULS <Rdm>,<Rn>,<Rdm>
17314 As an exception to the rule, this is only used
17315 when optimizing for size since MULS is slow on all
17316 known implementations. We do not even want to use
17317 MULS in cold code, if optimizing for speed, so we
17318 test the global flag here. */
17319 if (!optimize_size)
17320 break;
17321 /* else fall through. */
17322 case AND:
17323 case IOR:
17324 case XOR:
17325 /* ANDS <Rdn>,<Rm> */
17326 if (rtx_equal_p (dst, op0)
17327 && low_register_operand (op1, SImode))
17328 action = action_for_partial_flag_setting;
17329 else if (rtx_equal_p (dst, op1)
17330 && low_register_operand (op0, SImode))
17331 action = action_for_partial_flag_setting == SKIP
17332 ? SKIP : SWAP_CONV;
17333 break;
17335 case ASHIFTRT:
17336 case ASHIFT:
17337 case LSHIFTRT:
17338 /* ASRS <Rdn>,<Rm> */
17339 /* LSRS <Rdn>,<Rm> */
17340 /* LSLS <Rdn>,<Rm> */
17341 if (rtx_equal_p (dst, op0)
17342 && low_register_operand (op1, SImode))
17343 action = action_for_partial_flag_setting;
17344 /* ASRS <Rd>,<Rm>,#<imm5> */
17345 /* LSRS <Rd>,<Rm>,#<imm5> */
17346 /* LSLS <Rd>,<Rm>,#<imm5> */
17347 else if (low_register_operand (op0, SImode)
17348 && CONST_INT_P (op1)
17349 && IN_RANGE (INTVAL (op1), 0, 31))
17350 action = action_for_partial_flag_setting;
17351 break;
17353 case ROTATERT:
17354 /* RORS <Rdn>,<Rm> */
17355 if (rtx_equal_p (dst, op0)
17356 && low_register_operand (op1, SImode))
17357 action = action_for_partial_flag_setting;
17358 break;
17360 case NOT:
17361 /* MVNS <Rd>,<Rm> */
17362 if (low_register_operand (op0, SImode))
17363 action = action_for_partial_flag_setting;
17364 break;
17366 case NEG:
17367 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17368 if (low_register_operand (op0, SImode))
17369 action = CONV;
17370 break;
17372 case CONST_INT:
17373 /* MOVS <Rd>,#<imm8> */
17374 if (CONST_INT_P (src)
17375 && IN_RANGE (INTVAL (src), 0, 255))
17376 action = action_for_partial_flag_setting;
17377 break;
17379 case REG:
17380 /* MOVS and MOV<c> with registers have different
17381 encodings, so are not relevant here. */
17382 break;
17384 default:
17385 break;
17389 if (action != SKIP)
17391 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17392 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17393 rtvec vec;
17395 if (action == SWAP_CONV)
17397 src = copy_rtx (src);
17398 XEXP (src, 0) = op1;
17399 XEXP (src, 1) = op0;
17400 pat = gen_rtx_SET (VOIDmode, dst, src);
17401 vec = gen_rtvec (2, pat, clobber);
17403 else /* action == CONV */
17404 vec = gen_rtvec (2, pat, clobber);
17406 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17407 INSN_CODE (insn) = -1;
17411 if (NONDEBUG_INSN_P (insn))
17412 df_simulate_one_insn_backwards (bb, insn, &live);
17416 CLEAR_REG_SET (&live);
17419 /* Gcc puts the pool in the wrong place for ARM, since we can only
17420 load addresses a limited distance around the pc. We do some
17421 special munging to move the constant pool values to the correct
17422 point in the code. */
17423 static void
17424 arm_reorg (void)
17426 rtx_insn *insn;
17427 HOST_WIDE_INT address = 0;
17428 Mfix * fix;
17430 if (TARGET_THUMB1)
17431 thumb1_reorg ();
17432 else if (TARGET_THUMB2)
17433 thumb2_reorg ();
17435 /* Ensure all insns that must be split have been split at this point.
17436 Otherwise, the pool placement code below may compute incorrect
17437 insn lengths. Note that when optimizing, all insns have already
17438 been split at this point. */
17439 if (!optimize)
17440 split_all_insns_noflow ();
17442 minipool_fix_head = minipool_fix_tail = NULL;
17444 /* The first insn must always be a note, or the code below won't
17445 scan it properly. */
17446 insn = get_insns ();
17447 gcc_assert (NOTE_P (insn));
17448 minipool_pad = 0;
17450 /* Scan all the insns and record the operands that will need fixing. */
17451 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17453 if (BARRIER_P (insn))
17454 push_minipool_barrier (insn, address);
17455 else if (INSN_P (insn))
17457 rtx_jump_table_data *table;
17459 note_invalid_constants (insn, address, true);
17460 address += get_attr_length (insn);
17462 /* If the insn is a vector jump, add the size of the table
17463 and skip the table. */
17464 if (tablejump_p (insn, NULL, &table))
17466 address += get_jump_table_size (table);
17467 insn = table;
17470 else if (LABEL_P (insn))
17471 /* Add the worst-case padding due to alignment. We don't add
17472 the _current_ padding because the minipool insertions
17473 themselves might change it. */
17474 address += get_label_padding (insn);
17477 fix = minipool_fix_head;
17479 /* Now scan the fixups and perform the required changes. */
17480 while (fix)
17482 Mfix * ftmp;
17483 Mfix * fdel;
17484 Mfix * last_added_fix;
17485 Mfix * last_barrier = NULL;
17486 Mfix * this_fix;
17488 /* Skip any further barriers before the next fix. */
17489 while (fix && BARRIER_P (fix->insn))
17490 fix = fix->next;
17492 /* No more fixes. */
17493 if (fix == NULL)
17494 break;
17496 last_added_fix = NULL;
17498 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17500 if (BARRIER_P (ftmp->insn))
17502 if (ftmp->address >= minipool_vector_head->max_address)
17503 break;
17505 last_barrier = ftmp;
17507 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17508 break;
17510 last_added_fix = ftmp; /* Keep track of the last fix added. */
17513 /* If we found a barrier, drop back to that; any fixes that we
17514 could have reached but come after the barrier will now go in
17515 the next mini-pool. */
17516 if (last_barrier != NULL)
17518 /* Reduce the refcount for those fixes that won't go into this
17519 pool after all. */
17520 for (fdel = last_barrier->next;
17521 fdel && fdel != ftmp;
17522 fdel = fdel->next)
17524 fdel->minipool->refcount--;
17525 fdel->minipool = NULL;
17528 ftmp = last_barrier;
17530 else
17532 /* ftmp is first fix that we can't fit into this pool and
17533 there no natural barriers that we could use. Insert a
17534 new barrier in the code somewhere between the previous
17535 fix and this one, and arrange to jump around it. */
17536 HOST_WIDE_INT max_address;
17538 /* The last item on the list of fixes must be a barrier, so
17539 we can never run off the end of the list of fixes without
17540 last_barrier being set. */
17541 gcc_assert (ftmp);
17543 max_address = minipool_vector_head->max_address;
17544 /* Check that there isn't another fix that is in range that
17545 we couldn't fit into this pool because the pool was
17546 already too large: we need to put the pool before such an
17547 instruction. The pool itself may come just after the
17548 fix because create_fix_barrier also allows space for a
17549 jump instruction. */
17550 if (ftmp->address < max_address)
17551 max_address = ftmp->address + 1;
17553 last_barrier = create_fix_barrier (last_added_fix, max_address);
17556 assign_minipool_offsets (last_barrier);
17558 while (ftmp)
17560 if (!BARRIER_P (ftmp->insn)
17561 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17562 == NULL))
17563 break;
17565 ftmp = ftmp->next;
17568 /* Scan over the fixes we have identified for this pool, fixing them
17569 up and adding the constants to the pool itself. */
17570 for (this_fix = fix; this_fix && ftmp != this_fix;
17571 this_fix = this_fix->next)
17572 if (!BARRIER_P (this_fix->insn))
17574 rtx addr
17575 = plus_constant (Pmode,
17576 gen_rtx_LABEL_REF (VOIDmode,
17577 minipool_vector_label),
17578 this_fix->minipool->offset);
17579 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17582 dump_minipool (last_barrier->insn);
17583 fix = ftmp;
17586 /* From now on we must synthesize any constants that we can't handle
17587 directly. This can happen if the RTL gets split during final
17588 instruction generation. */
17589 cfun->machine->after_arm_reorg = 1;
17591 /* Free the minipool memory. */
17592 obstack_free (&minipool_obstack, minipool_startobj);
17595 /* Routines to output assembly language. */
17597 /* Return string representation of passed in real value. */
17598 static const char *
17599 fp_const_from_val (REAL_VALUE_TYPE *r)
17601 if (!fp_consts_inited)
17602 init_fp_table ();
17604 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17605 return "0";
17608 /* OPERANDS[0] is the entire list of insns that constitute pop,
17609 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17610 is in the list, UPDATE is true iff the list contains explicit
17611 update of base register. */
17612 void
17613 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17614 bool update)
17616 int i;
17617 char pattern[100];
17618 int offset;
17619 const char *conditional;
17620 int num_saves = XVECLEN (operands[0], 0);
17621 unsigned int regno;
17622 unsigned int regno_base = REGNO (operands[1]);
17624 offset = 0;
17625 offset += update ? 1 : 0;
17626 offset += return_pc ? 1 : 0;
17628 /* Is the base register in the list? */
17629 for (i = offset; i < num_saves; i++)
17631 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17632 /* If SP is in the list, then the base register must be SP. */
17633 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17634 /* If base register is in the list, there must be no explicit update. */
17635 if (regno == regno_base)
17636 gcc_assert (!update);
17639 conditional = reverse ? "%?%D0" : "%?%d0";
17640 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17642 /* Output pop (not stmfd) because it has a shorter encoding. */
17643 gcc_assert (update);
17644 sprintf (pattern, "pop%s\t{", conditional);
17646 else
17648 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17649 It's just a convention, their semantics are identical. */
17650 if (regno_base == SP_REGNUM)
17651 sprintf (pattern, "ldm%sfd\t", conditional);
17652 else if (TARGET_UNIFIED_ASM)
17653 sprintf (pattern, "ldmia%s\t", conditional);
17654 else
17655 sprintf (pattern, "ldm%sia\t", conditional);
17657 strcat (pattern, reg_names[regno_base]);
17658 if (update)
17659 strcat (pattern, "!, {");
17660 else
17661 strcat (pattern, ", {");
17664 /* Output the first destination register. */
17665 strcat (pattern,
17666 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17668 /* Output the rest of the destination registers. */
17669 for (i = offset + 1; i < num_saves; i++)
17671 strcat (pattern, ", ");
17672 strcat (pattern,
17673 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17676 strcat (pattern, "}");
17678 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17679 strcat (pattern, "^");
17681 output_asm_insn (pattern, &cond);
17685 /* Output the assembly for a store multiple. */
17687 const char *
17688 vfp_output_vstmd (rtx * operands)
17690 char pattern[100];
17691 int p;
17692 int base;
17693 int i;
17694 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17695 ? XEXP (operands[0], 0)
17696 : XEXP (XEXP (operands[0], 0), 0);
17697 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17699 if (push_p)
17700 strcpy (pattern, "vpush%?.64\t{%P1");
17701 else
17702 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17704 p = strlen (pattern);
17706 gcc_assert (REG_P (operands[1]));
17708 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17709 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17711 p += sprintf (&pattern[p], ", d%d", base + i);
17713 strcpy (&pattern[p], "}");
17715 output_asm_insn (pattern, operands);
17716 return "";
17720 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17721 number of bytes pushed. */
17723 static int
17724 vfp_emit_fstmd (int base_reg, int count)
17726 rtx par;
17727 rtx dwarf;
17728 rtx tmp, reg;
17729 int i;
17731 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17732 register pairs are stored by a store multiple insn. We avoid this
17733 by pushing an extra pair. */
17734 if (count == 2 && !arm_arch6)
17736 if (base_reg == LAST_VFP_REGNUM - 3)
17737 base_reg -= 2;
17738 count++;
17741 /* FSTMD may not store more than 16 doubleword registers at once. Split
17742 larger stores into multiple parts (up to a maximum of two, in
17743 practice). */
17744 if (count > 16)
17746 int saved;
17747 /* NOTE: base_reg is an internal register number, so each D register
17748 counts as 2. */
17749 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17750 saved += vfp_emit_fstmd (base_reg, 16);
17751 return saved;
17754 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17755 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17757 reg = gen_rtx_REG (DFmode, base_reg);
17758 base_reg += 2;
17760 XVECEXP (par, 0, 0)
17761 = gen_rtx_SET (VOIDmode,
17762 gen_frame_mem
17763 (BLKmode,
17764 gen_rtx_PRE_MODIFY (Pmode,
17765 stack_pointer_rtx,
17766 plus_constant
17767 (Pmode, stack_pointer_rtx,
17768 - (count * 8)))
17770 gen_rtx_UNSPEC (BLKmode,
17771 gen_rtvec (1, reg),
17772 UNSPEC_PUSH_MULT));
17774 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17775 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17776 RTX_FRAME_RELATED_P (tmp) = 1;
17777 XVECEXP (dwarf, 0, 0) = tmp;
17779 tmp = gen_rtx_SET (VOIDmode,
17780 gen_frame_mem (DFmode, stack_pointer_rtx),
17781 reg);
17782 RTX_FRAME_RELATED_P (tmp) = 1;
17783 XVECEXP (dwarf, 0, 1) = tmp;
17785 for (i = 1; i < count; i++)
17787 reg = gen_rtx_REG (DFmode, base_reg);
17788 base_reg += 2;
17789 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17791 tmp = gen_rtx_SET (VOIDmode,
17792 gen_frame_mem (DFmode,
17793 plus_constant (Pmode,
17794 stack_pointer_rtx,
17795 i * 8)),
17796 reg);
17797 RTX_FRAME_RELATED_P (tmp) = 1;
17798 XVECEXP (dwarf, 0, i + 1) = tmp;
17801 par = emit_insn (par);
17802 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17803 RTX_FRAME_RELATED_P (par) = 1;
17805 return count * 8;
17808 /* Emit a call instruction with pattern PAT. ADDR is the address of
17809 the call target. */
17811 void
17812 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17814 rtx insn;
17816 insn = emit_call_insn (pat);
17818 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17819 If the call might use such an entry, add a use of the PIC register
17820 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17821 if (TARGET_VXWORKS_RTP
17822 && flag_pic
17823 && !sibcall
17824 && GET_CODE (addr) == SYMBOL_REF
17825 && (SYMBOL_REF_DECL (addr)
17826 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17827 : !SYMBOL_REF_LOCAL_P (addr)))
17829 require_pic_register ();
17830 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17833 if (TARGET_AAPCS_BASED)
17835 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17836 linker. We need to add an IP clobber to allow setting
17837 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17838 is not needed since it's a fixed register. */
17839 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17840 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17844 /* Output a 'call' insn. */
17845 const char *
17846 output_call (rtx *operands)
17848 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17850 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17851 if (REGNO (operands[0]) == LR_REGNUM)
17853 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17854 output_asm_insn ("mov%?\t%0, %|lr", operands);
17857 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17859 if (TARGET_INTERWORK || arm_arch4t)
17860 output_asm_insn ("bx%?\t%0", operands);
17861 else
17862 output_asm_insn ("mov%?\t%|pc, %0", operands);
17864 return "";
17867 /* Output a 'call' insn that is a reference in memory. This is
17868 disabled for ARMv5 and we prefer a blx instead because otherwise
17869 there's a significant performance overhead. */
17870 const char *
17871 output_call_mem (rtx *operands)
17873 gcc_assert (!arm_arch5);
17874 if (TARGET_INTERWORK)
17876 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17877 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17878 output_asm_insn ("bx%?\t%|ip", operands);
17880 else if (regno_use_in (LR_REGNUM, operands[0]))
17882 /* LR is used in the memory address. We load the address in the
17883 first instruction. It's safe to use IP as the target of the
17884 load since the call will kill it anyway. */
17885 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17886 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17887 if (arm_arch4t)
17888 output_asm_insn ("bx%?\t%|ip", operands);
17889 else
17890 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17892 else
17894 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17895 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17898 return "";
17902 /* Output a move from arm registers to arm registers of a long double
17903 OPERANDS[0] is the destination.
17904 OPERANDS[1] is the source. */
17905 const char *
17906 output_mov_long_double_arm_from_arm (rtx *operands)
17908 /* We have to be careful here because the two might overlap. */
17909 int dest_start = REGNO (operands[0]);
17910 int src_start = REGNO (operands[1]);
17911 rtx ops[2];
17912 int i;
17914 if (dest_start < src_start)
17916 for (i = 0; i < 3; i++)
17918 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17919 ops[1] = gen_rtx_REG (SImode, src_start + i);
17920 output_asm_insn ("mov%?\t%0, %1", ops);
17923 else
17925 for (i = 2; i >= 0; i--)
17927 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17928 ops[1] = gen_rtx_REG (SImode, src_start + i);
17929 output_asm_insn ("mov%?\t%0, %1", ops);
17933 return "";
17936 void
17937 arm_emit_movpair (rtx dest, rtx src)
17939 /* If the src is an immediate, simplify it. */
17940 if (CONST_INT_P (src))
17942 HOST_WIDE_INT val = INTVAL (src);
17943 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17944 if ((val >> 16) & 0x0000ffff)
17945 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17946 GEN_INT (16)),
17947 GEN_INT ((val >> 16) & 0x0000ffff));
17948 return;
17950 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17951 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17954 /* Output a move between double words. It must be REG<-MEM
17955 or MEM<-REG. */
17956 const char *
17957 output_move_double (rtx *operands, bool emit, int *count)
17959 enum rtx_code code0 = GET_CODE (operands[0]);
17960 enum rtx_code code1 = GET_CODE (operands[1]);
17961 rtx otherops[3];
17962 if (count)
17963 *count = 1;
17965 /* The only case when this might happen is when
17966 you are looking at the length of a DImode instruction
17967 that has an invalid constant in it. */
17968 if (code0 == REG && code1 != MEM)
17970 gcc_assert (!emit);
17971 *count = 2;
17972 return "";
17975 if (code0 == REG)
17977 unsigned int reg0 = REGNO (operands[0]);
17979 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17981 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17983 switch (GET_CODE (XEXP (operands[1], 0)))
17985 case REG:
17987 if (emit)
17989 if (TARGET_LDRD
17990 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17991 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17992 else
17993 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17995 break;
17997 case PRE_INC:
17998 gcc_assert (TARGET_LDRD);
17999 if (emit)
18000 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18001 break;
18003 case PRE_DEC:
18004 if (emit)
18006 if (TARGET_LDRD)
18007 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18008 else
18009 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18011 break;
18013 case POST_INC:
18014 if (emit)
18016 if (TARGET_LDRD)
18017 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18018 else
18019 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18021 break;
18023 case POST_DEC:
18024 gcc_assert (TARGET_LDRD);
18025 if (emit)
18026 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18027 break;
18029 case PRE_MODIFY:
18030 case POST_MODIFY:
18031 /* Autoicrement addressing modes should never have overlapping
18032 base and destination registers, and overlapping index registers
18033 are already prohibited, so this doesn't need to worry about
18034 fix_cm3_ldrd. */
18035 otherops[0] = operands[0];
18036 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18037 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18039 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18041 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18043 /* Registers overlap so split out the increment. */
18044 if (emit)
18046 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18047 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18049 if (count)
18050 *count = 2;
18052 else
18054 /* Use a single insn if we can.
18055 FIXME: IWMMXT allows offsets larger than ldrd can
18056 handle, fix these up with a pair of ldr. */
18057 if (TARGET_THUMB2
18058 || !CONST_INT_P (otherops[2])
18059 || (INTVAL (otherops[2]) > -256
18060 && INTVAL (otherops[2]) < 256))
18062 if (emit)
18063 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18065 else
18067 if (emit)
18069 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18070 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18072 if (count)
18073 *count = 2;
18078 else
18080 /* Use a single insn if we can.
18081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18082 fix these up with a pair of ldr. */
18083 if (TARGET_THUMB2
18084 || !CONST_INT_P (otherops[2])
18085 || (INTVAL (otherops[2]) > -256
18086 && INTVAL (otherops[2]) < 256))
18088 if (emit)
18089 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18091 else
18093 if (emit)
18095 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18096 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18098 if (count)
18099 *count = 2;
18102 break;
18104 case LABEL_REF:
18105 case CONST:
18106 /* We might be able to use ldrd %0, %1 here. However the range is
18107 different to ldr/adr, and it is broken on some ARMv7-M
18108 implementations. */
18109 /* Use the second register of the pair to avoid problematic
18110 overlap. */
18111 otherops[1] = operands[1];
18112 if (emit)
18113 output_asm_insn ("adr%?\t%0, %1", otherops);
18114 operands[1] = otherops[0];
18115 if (emit)
18117 if (TARGET_LDRD)
18118 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18119 else
18120 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18123 if (count)
18124 *count = 2;
18125 break;
18127 /* ??? This needs checking for thumb2. */
18128 default:
18129 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18130 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18132 otherops[0] = operands[0];
18133 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18134 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18136 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18138 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18140 switch ((int) INTVAL (otherops[2]))
18142 case -8:
18143 if (emit)
18144 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18145 return "";
18146 case -4:
18147 if (TARGET_THUMB2)
18148 break;
18149 if (emit)
18150 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18151 return "";
18152 case 4:
18153 if (TARGET_THUMB2)
18154 break;
18155 if (emit)
18156 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18157 return "";
18160 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18161 operands[1] = otherops[0];
18162 if (TARGET_LDRD
18163 && (REG_P (otherops[2])
18164 || TARGET_THUMB2
18165 || (CONST_INT_P (otherops[2])
18166 && INTVAL (otherops[2]) > -256
18167 && INTVAL (otherops[2]) < 256)))
18169 if (reg_overlap_mentioned_p (operands[0],
18170 otherops[2]))
18172 rtx tmp;
18173 /* Swap base and index registers over to
18174 avoid a conflict. */
18175 tmp = otherops[1];
18176 otherops[1] = otherops[2];
18177 otherops[2] = tmp;
18179 /* If both registers conflict, it will usually
18180 have been fixed by a splitter. */
18181 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18182 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18184 if (emit)
18186 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18187 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18189 if (count)
18190 *count = 2;
18192 else
18194 otherops[0] = operands[0];
18195 if (emit)
18196 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18198 return "";
18201 if (CONST_INT_P (otherops[2]))
18203 if (emit)
18205 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18206 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18207 else
18208 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18211 else
18213 if (emit)
18214 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18217 else
18219 if (emit)
18220 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18223 if (count)
18224 *count = 2;
18226 if (TARGET_LDRD)
18227 return "ldr%(d%)\t%0, [%1]";
18229 return "ldm%(ia%)\t%1, %M0";
18231 else
18233 otherops[1] = adjust_address (operands[1], SImode, 4);
18234 /* Take care of overlapping base/data reg. */
18235 if (reg_mentioned_p (operands[0], operands[1]))
18237 if (emit)
18239 output_asm_insn ("ldr%?\t%0, %1", otherops);
18240 output_asm_insn ("ldr%?\t%0, %1", operands);
18242 if (count)
18243 *count = 2;
18246 else
18248 if (emit)
18250 output_asm_insn ("ldr%?\t%0, %1", operands);
18251 output_asm_insn ("ldr%?\t%0, %1", otherops);
18253 if (count)
18254 *count = 2;
18259 else
18261 /* Constraints should ensure this. */
18262 gcc_assert (code0 == MEM && code1 == REG);
18263 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18264 || (TARGET_ARM && TARGET_LDRD));
18266 switch (GET_CODE (XEXP (operands[0], 0)))
18268 case REG:
18269 if (emit)
18271 if (TARGET_LDRD)
18272 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18273 else
18274 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18276 break;
18278 case PRE_INC:
18279 gcc_assert (TARGET_LDRD);
18280 if (emit)
18281 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18282 break;
18284 case PRE_DEC:
18285 if (emit)
18287 if (TARGET_LDRD)
18288 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18289 else
18290 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18292 break;
18294 case POST_INC:
18295 if (emit)
18297 if (TARGET_LDRD)
18298 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18299 else
18300 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18302 break;
18304 case POST_DEC:
18305 gcc_assert (TARGET_LDRD);
18306 if (emit)
18307 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18308 break;
18310 case PRE_MODIFY:
18311 case POST_MODIFY:
18312 otherops[0] = operands[1];
18313 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18314 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18316 /* IWMMXT allows offsets larger than ldrd can handle,
18317 fix these up with a pair of ldr. */
18318 if (!TARGET_THUMB2
18319 && CONST_INT_P (otherops[2])
18320 && (INTVAL(otherops[2]) <= -256
18321 || INTVAL(otherops[2]) >= 256))
18323 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18325 if (emit)
18327 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18328 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18330 if (count)
18331 *count = 2;
18333 else
18335 if (emit)
18337 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18338 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18340 if (count)
18341 *count = 2;
18344 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18346 if (emit)
18347 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18349 else
18351 if (emit)
18352 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18354 break;
18356 case PLUS:
18357 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18358 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18360 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18362 case -8:
18363 if (emit)
18364 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18365 return "";
18367 case -4:
18368 if (TARGET_THUMB2)
18369 break;
18370 if (emit)
18371 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18372 return "";
18374 case 4:
18375 if (TARGET_THUMB2)
18376 break;
18377 if (emit)
18378 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18379 return "";
18382 if (TARGET_LDRD
18383 && (REG_P (otherops[2])
18384 || TARGET_THUMB2
18385 || (CONST_INT_P (otherops[2])
18386 && INTVAL (otherops[2]) > -256
18387 && INTVAL (otherops[2]) < 256)))
18389 otherops[0] = operands[1];
18390 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18391 if (emit)
18392 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18393 return "";
18395 /* Fall through */
18397 default:
18398 otherops[0] = adjust_address (operands[0], SImode, 4);
18399 otherops[1] = operands[1];
18400 if (emit)
18402 output_asm_insn ("str%?\t%1, %0", operands);
18403 output_asm_insn ("str%?\t%H1, %0", otherops);
18405 if (count)
18406 *count = 2;
18410 return "";
18413 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18414 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18416 const char *
18417 output_move_quad (rtx *operands)
18419 if (REG_P (operands[0]))
18421 /* Load, or reg->reg move. */
18423 if (MEM_P (operands[1]))
18425 switch (GET_CODE (XEXP (operands[1], 0)))
18427 case REG:
18428 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18429 break;
18431 case LABEL_REF:
18432 case CONST:
18433 output_asm_insn ("adr%?\t%0, %1", operands);
18434 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18435 break;
18437 default:
18438 gcc_unreachable ();
18441 else
18443 rtx ops[2];
18444 int dest, src, i;
18446 gcc_assert (REG_P (operands[1]));
18448 dest = REGNO (operands[0]);
18449 src = REGNO (operands[1]);
18451 /* This seems pretty dumb, but hopefully GCC won't try to do it
18452 very often. */
18453 if (dest < src)
18454 for (i = 0; i < 4; i++)
18456 ops[0] = gen_rtx_REG (SImode, dest + i);
18457 ops[1] = gen_rtx_REG (SImode, src + i);
18458 output_asm_insn ("mov%?\t%0, %1", ops);
18460 else
18461 for (i = 3; i >= 0; i--)
18463 ops[0] = gen_rtx_REG (SImode, dest + i);
18464 ops[1] = gen_rtx_REG (SImode, src + i);
18465 output_asm_insn ("mov%?\t%0, %1", ops);
18469 else
18471 gcc_assert (MEM_P (operands[0]));
18472 gcc_assert (REG_P (operands[1]));
18473 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18475 switch (GET_CODE (XEXP (operands[0], 0)))
18477 case REG:
18478 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18479 break;
18481 default:
18482 gcc_unreachable ();
18486 return "";
18489 /* Output a VFP load or store instruction. */
18491 const char *
18492 output_move_vfp (rtx *operands)
18494 rtx reg, mem, addr, ops[2];
18495 int load = REG_P (operands[0]);
18496 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18497 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18498 const char *templ;
18499 char buff[50];
18500 machine_mode mode;
18502 reg = operands[!load];
18503 mem = operands[load];
18505 mode = GET_MODE (reg);
18507 gcc_assert (REG_P (reg));
18508 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18509 gcc_assert (mode == SFmode
18510 || mode == DFmode
18511 || mode == SImode
18512 || mode == DImode
18513 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18514 gcc_assert (MEM_P (mem));
18516 addr = XEXP (mem, 0);
18518 switch (GET_CODE (addr))
18520 case PRE_DEC:
18521 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18522 ops[0] = XEXP (addr, 0);
18523 ops[1] = reg;
18524 break;
18526 case POST_INC:
18527 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18528 ops[0] = XEXP (addr, 0);
18529 ops[1] = reg;
18530 break;
18532 default:
18533 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18534 ops[0] = reg;
18535 ops[1] = mem;
18536 break;
18539 sprintf (buff, templ,
18540 load ? "ld" : "st",
18541 dp ? "64" : "32",
18542 dp ? "P" : "",
18543 integer_p ? "\t%@ int" : "");
18544 output_asm_insn (buff, ops);
18546 return "";
18549 /* Output a Neon double-word or quad-word load or store, or a load
18550 or store for larger structure modes.
18552 WARNING: The ordering of elements is weird in big-endian mode,
18553 because the EABI requires that vectors stored in memory appear
18554 as though they were stored by a VSTM, as required by the EABI.
18555 GCC RTL defines element ordering based on in-memory order.
18556 This can be different from the architectural ordering of elements
18557 within a NEON register. The intrinsics defined in arm_neon.h use the
18558 NEON register element ordering, not the GCC RTL element ordering.
18560 For example, the in-memory ordering of a big-endian a quadword
18561 vector with 16-bit elements when stored from register pair {d0,d1}
18562 will be (lowest address first, d0[N] is NEON register element N):
18564 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18566 When necessary, quadword registers (dN, dN+1) are moved to ARM
18567 registers from rN in the order:
18569 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18571 So that STM/LDM can be used on vectors in ARM registers, and the
18572 same memory layout will result as if VSTM/VLDM were used.
18574 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18575 possible, which allows use of appropriate alignment tags.
18576 Note that the choice of "64" is independent of the actual vector
18577 element size; this size simply ensures that the behavior is
18578 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18580 Due to limitations of those instructions, use of VST1.64/VLD1.64
18581 is not possible if:
18582 - the address contains PRE_DEC, or
18583 - the mode refers to more than 4 double-word registers
18585 In those cases, it would be possible to replace VSTM/VLDM by a
18586 sequence of instructions; this is not currently implemented since
18587 this is not certain to actually improve performance. */
18589 const char *
18590 output_move_neon (rtx *operands)
18592 rtx reg, mem, addr, ops[2];
18593 int regno, nregs, load = REG_P (operands[0]);
18594 const char *templ;
18595 char buff[50];
18596 machine_mode mode;
18598 reg = operands[!load];
18599 mem = operands[load];
18601 mode = GET_MODE (reg);
18603 gcc_assert (REG_P (reg));
18604 regno = REGNO (reg);
18605 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18606 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18607 || NEON_REGNO_OK_FOR_QUAD (regno));
18608 gcc_assert (VALID_NEON_DREG_MODE (mode)
18609 || VALID_NEON_QREG_MODE (mode)
18610 || VALID_NEON_STRUCT_MODE (mode));
18611 gcc_assert (MEM_P (mem));
18613 addr = XEXP (mem, 0);
18615 /* Strip off const from addresses like (const (plus (...))). */
18616 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18617 addr = XEXP (addr, 0);
18619 switch (GET_CODE (addr))
18621 case POST_INC:
18622 /* We have to use vldm / vstm for too-large modes. */
18623 if (nregs > 4)
18625 templ = "v%smia%%?\t%%0!, %%h1";
18626 ops[0] = XEXP (addr, 0);
18628 else
18630 templ = "v%s1.64\t%%h1, %%A0";
18631 ops[0] = mem;
18633 ops[1] = reg;
18634 break;
18636 case PRE_DEC:
18637 /* We have to use vldm / vstm in this case, since there is no
18638 pre-decrement form of the vld1 / vst1 instructions. */
18639 templ = "v%smdb%%?\t%%0!, %%h1";
18640 ops[0] = XEXP (addr, 0);
18641 ops[1] = reg;
18642 break;
18644 case POST_MODIFY:
18645 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18646 gcc_unreachable ();
18648 case REG:
18649 /* We have to use vldm / vstm for too-large modes. */
18650 if (nregs > 1)
18652 if (nregs > 4)
18653 templ = "v%smia%%?\t%%m0, %%h1";
18654 else
18655 templ = "v%s1.64\t%%h1, %%A0";
18657 ops[0] = mem;
18658 ops[1] = reg;
18659 break;
18661 /* Fall through. */
18662 case LABEL_REF:
18663 case PLUS:
18665 int i;
18666 int overlap = -1;
18667 for (i = 0; i < nregs; i++)
18669 /* We're only using DImode here because it's a convenient size. */
18670 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18671 ops[1] = adjust_address (mem, DImode, 8 * i);
18672 if (reg_overlap_mentioned_p (ops[0], mem))
18674 gcc_assert (overlap == -1);
18675 overlap = i;
18677 else
18679 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18680 output_asm_insn (buff, ops);
18683 if (overlap != -1)
18685 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18686 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18687 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18688 output_asm_insn (buff, ops);
18691 return "";
18694 default:
18695 gcc_unreachable ();
18698 sprintf (buff, templ, load ? "ld" : "st");
18699 output_asm_insn (buff, ops);
18701 return "";
18704 /* Compute and return the length of neon_mov<mode>, where <mode> is
18705 one of VSTRUCT modes: EI, OI, CI or XI. */
18707 arm_attr_length_move_neon (rtx_insn *insn)
18709 rtx reg, mem, addr;
18710 int load;
18711 machine_mode mode;
18713 extract_insn_cached (insn);
18715 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18717 mode = GET_MODE (recog_data.operand[0]);
18718 switch (mode)
18720 case EImode:
18721 case OImode:
18722 return 8;
18723 case CImode:
18724 return 12;
18725 case XImode:
18726 return 16;
18727 default:
18728 gcc_unreachable ();
18732 load = REG_P (recog_data.operand[0]);
18733 reg = recog_data.operand[!load];
18734 mem = recog_data.operand[load];
18736 gcc_assert (MEM_P (mem));
18738 mode = GET_MODE (reg);
18739 addr = XEXP (mem, 0);
18741 /* Strip off const from addresses like (const (plus (...))). */
18742 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18743 addr = XEXP (addr, 0);
18745 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18747 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18748 return insns * 4;
18750 else
18751 return 4;
18754 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18755 return zero. */
18758 arm_address_offset_is_imm (rtx_insn *insn)
18760 rtx mem, addr;
18762 extract_insn_cached (insn);
18764 if (REG_P (recog_data.operand[0]))
18765 return 0;
18767 mem = recog_data.operand[0];
18769 gcc_assert (MEM_P (mem));
18771 addr = XEXP (mem, 0);
18773 if (REG_P (addr)
18774 || (GET_CODE (addr) == PLUS
18775 && REG_P (XEXP (addr, 0))
18776 && CONST_INT_P (XEXP (addr, 1))))
18777 return 1;
18778 else
18779 return 0;
18782 /* Output an ADD r, s, #n where n may be too big for one instruction.
18783 If adding zero to one register, output nothing. */
18784 const char *
18785 output_add_immediate (rtx *operands)
18787 HOST_WIDE_INT n = INTVAL (operands[2]);
18789 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18791 if (n < 0)
18792 output_multi_immediate (operands,
18793 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18794 -n);
18795 else
18796 output_multi_immediate (operands,
18797 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18801 return "";
18804 /* Output a multiple immediate operation.
18805 OPERANDS is the vector of operands referred to in the output patterns.
18806 INSTR1 is the output pattern to use for the first constant.
18807 INSTR2 is the output pattern to use for subsequent constants.
18808 IMMED_OP is the index of the constant slot in OPERANDS.
18809 N is the constant value. */
18810 static const char *
18811 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18812 int immed_op, HOST_WIDE_INT n)
18814 #if HOST_BITS_PER_WIDE_INT > 32
18815 n &= 0xffffffff;
18816 #endif
18818 if (n == 0)
18820 /* Quick and easy output. */
18821 operands[immed_op] = const0_rtx;
18822 output_asm_insn (instr1, operands);
18824 else
18826 int i;
18827 const char * instr = instr1;
18829 /* Note that n is never zero here (which would give no output). */
18830 for (i = 0; i < 32; i += 2)
18832 if (n & (3 << i))
18834 operands[immed_op] = GEN_INT (n & (255 << i));
18835 output_asm_insn (instr, operands);
18836 instr = instr2;
18837 i += 6;
18842 return "";
18845 /* Return the name of a shifter operation. */
18846 static const char *
18847 arm_shift_nmem(enum rtx_code code)
18849 switch (code)
18851 case ASHIFT:
18852 return ARM_LSL_NAME;
18854 case ASHIFTRT:
18855 return "asr";
18857 case LSHIFTRT:
18858 return "lsr";
18860 case ROTATERT:
18861 return "ror";
18863 default:
18864 abort();
18868 /* Return the appropriate ARM instruction for the operation code.
18869 The returned result should not be overwritten. OP is the rtx of the
18870 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18871 was shifted. */
18872 const char *
18873 arithmetic_instr (rtx op, int shift_first_arg)
18875 switch (GET_CODE (op))
18877 case PLUS:
18878 return "add";
18880 case MINUS:
18881 return shift_first_arg ? "rsb" : "sub";
18883 case IOR:
18884 return "orr";
18886 case XOR:
18887 return "eor";
18889 case AND:
18890 return "and";
18892 case ASHIFT:
18893 case ASHIFTRT:
18894 case LSHIFTRT:
18895 case ROTATERT:
18896 return arm_shift_nmem(GET_CODE(op));
18898 default:
18899 gcc_unreachable ();
18903 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18904 for the operation code. The returned result should not be overwritten.
18905 OP is the rtx code of the shift.
18906 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18907 shift. */
18908 static const char *
18909 shift_op (rtx op, HOST_WIDE_INT *amountp)
18911 const char * mnem;
18912 enum rtx_code code = GET_CODE (op);
18914 switch (code)
18916 case ROTATE:
18917 if (!CONST_INT_P (XEXP (op, 1)))
18919 output_operand_lossage ("invalid shift operand");
18920 return NULL;
18923 code = ROTATERT;
18924 *amountp = 32 - INTVAL (XEXP (op, 1));
18925 mnem = "ror";
18926 break;
18928 case ASHIFT:
18929 case ASHIFTRT:
18930 case LSHIFTRT:
18931 case ROTATERT:
18932 mnem = arm_shift_nmem(code);
18933 if (CONST_INT_P (XEXP (op, 1)))
18935 *amountp = INTVAL (XEXP (op, 1));
18937 else if (REG_P (XEXP (op, 1)))
18939 *amountp = -1;
18940 return mnem;
18942 else
18944 output_operand_lossage ("invalid shift operand");
18945 return NULL;
18947 break;
18949 case MULT:
18950 /* We never have to worry about the amount being other than a
18951 power of 2, since this case can never be reloaded from a reg. */
18952 if (!CONST_INT_P (XEXP (op, 1)))
18954 output_operand_lossage ("invalid shift operand");
18955 return NULL;
18958 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18960 /* Amount must be a power of two. */
18961 if (*amountp & (*amountp - 1))
18963 output_operand_lossage ("invalid shift operand");
18964 return NULL;
18967 *amountp = int_log2 (*amountp);
18968 return ARM_LSL_NAME;
18970 default:
18971 output_operand_lossage ("invalid shift operand");
18972 return NULL;
18975 /* This is not 100% correct, but follows from the desire to merge
18976 multiplication by a power of 2 with the recognizer for a
18977 shift. >=32 is not a valid shift for "lsl", so we must try and
18978 output a shift that produces the correct arithmetical result.
18979 Using lsr #32 is identical except for the fact that the carry bit
18980 is not set correctly if we set the flags; but we never use the
18981 carry bit from such an operation, so we can ignore that. */
18982 if (code == ROTATERT)
18983 /* Rotate is just modulo 32. */
18984 *amountp &= 31;
18985 else if (*amountp != (*amountp & 31))
18987 if (code == ASHIFT)
18988 mnem = "lsr";
18989 *amountp = 32;
18992 /* Shifts of 0 are no-ops. */
18993 if (*amountp == 0)
18994 return NULL;
18996 return mnem;
18999 /* Obtain the shift from the POWER of two. */
19001 static HOST_WIDE_INT
19002 int_log2 (HOST_WIDE_INT power)
19004 HOST_WIDE_INT shift = 0;
19006 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19008 gcc_assert (shift <= 31);
19009 shift++;
19012 return shift;
19015 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19016 because /bin/as is horribly restrictive. The judgement about
19017 whether or not each character is 'printable' (and can be output as
19018 is) or not (and must be printed with an octal escape) must be made
19019 with reference to the *host* character set -- the situation is
19020 similar to that discussed in the comments above pp_c_char in
19021 c-pretty-print.c. */
19023 #define MAX_ASCII_LEN 51
19025 void
19026 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19028 int i;
19029 int len_so_far = 0;
19031 fputs ("\t.ascii\t\"", stream);
19033 for (i = 0; i < len; i++)
19035 int c = p[i];
19037 if (len_so_far >= MAX_ASCII_LEN)
19039 fputs ("\"\n\t.ascii\t\"", stream);
19040 len_so_far = 0;
19043 if (ISPRINT (c))
19045 if (c == '\\' || c == '\"')
19047 putc ('\\', stream);
19048 len_so_far++;
19050 putc (c, stream);
19051 len_so_far++;
19053 else
19055 fprintf (stream, "\\%03o", c);
19056 len_so_far += 4;
19060 fputs ("\"\n", stream);
19063 /* Compute the register save mask for registers 0 through 12
19064 inclusive. This code is used by arm_compute_save_reg_mask. */
19066 static unsigned long
19067 arm_compute_save_reg0_reg12_mask (void)
19069 unsigned long func_type = arm_current_func_type ();
19070 unsigned long save_reg_mask = 0;
19071 unsigned int reg;
19073 if (IS_INTERRUPT (func_type))
19075 unsigned int max_reg;
19076 /* Interrupt functions must not corrupt any registers,
19077 even call clobbered ones. If this is a leaf function
19078 we can just examine the registers used by the RTL, but
19079 otherwise we have to assume that whatever function is
19080 called might clobber anything, and so we have to save
19081 all the call-clobbered registers as well. */
19082 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19083 /* FIQ handlers have registers r8 - r12 banked, so
19084 we only need to check r0 - r7, Normal ISRs only
19085 bank r14 and r15, so we must check up to r12.
19086 r13 is the stack pointer which is always preserved,
19087 so we do not need to consider it here. */
19088 max_reg = 7;
19089 else
19090 max_reg = 12;
19092 for (reg = 0; reg <= max_reg; reg++)
19093 if (df_regs_ever_live_p (reg)
19094 || (! crtl->is_leaf && call_used_regs[reg]))
19095 save_reg_mask |= (1 << reg);
19097 /* Also save the pic base register if necessary. */
19098 if (flag_pic
19099 && !TARGET_SINGLE_PIC_BASE
19100 && arm_pic_register != INVALID_REGNUM
19101 && crtl->uses_pic_offset_table)
19102 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19104 else if (IS_VOLATILE(func_type))
19106 /* For noreturn functions we historically omitted register saves
19107 altogether. However this really messes up debugging. As a
19108 compromise save just the frame pointers. Combined with the link
19109 register saved elsewhere this should be sufficient to get
19110 a backtrace. */
19111 if (frame_pointer_needed)
19112 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19113 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19114 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19115 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19116 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19118 else
19120 /* In the normal case we only need to save those registers
19121 which are call saved and which are used by this function. */
19122 for (reg = 0; reg <= 11; reg++)
19123 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19124 save_reg_mask |= (1 << reg);
19126 /* Handle the frame pointer as a special case. */
19127 if (frame_pointer_needed)
19128 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19130 /* If we aren't loading the PIC register,
19131 don't stack it even though it may be live. */
19132 if (flag_pic
19133 && !TARGET_SINGLE_PIC_BASE
19134 && arm_pic_register != INVALID_REGNUM
19135 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19136 || crtl->uses_pic_offset_table))
19137 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19139 /* The prologue will copy SP into R0, so save it. */
19140 if (IS_STACKALIGN (func_type))
19141 save_reg_mask |= 1;
19144 /* Save registers so the exception handler can modify them. */
19145 if (crtl->calls_eh_return)
19147 unsigned int i;
19149 for (i = 0; ; i++)
19151 reg = EH_RETURN_DATA_REGNO (i);
19152 if (reg == INVALID_REGNUM)
19153 break;
19154 save_reg_mask |= 1 << reg;
19158 return save_reg_mask;
19161 /* Return true if r3 is live at the start of the function. */
19163 static bool
19164 arm_r3_live_at_start_p (void)
19166 /* Just look at cfg info, which is still close enough to correct at this
19167 point. This gives false positives for broken functions that might use
19168 uninitialized data that happens to be allocated in r3, but who cares? */
19169 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19172 /* Compute the number of bytes used to store the static chain register on the
19173 stack, above the stack frame. We need to know this accurately to get the
19174 alignment of the rest of the stack frame correct. */
19176 static int
19177 arm_compute_static_chain_stack_bytes (void)
19179 /* See the defining assertion in arm_expand_prologue. */
19180 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19181 && IS_NESTED (arm_current_func_type ())
19182 && arm_r3_live_at_start_p ()
19183 && crtl->args.pretend_args_size == 0)
19184 return 4;
19186 return 0;
19189 /* Compute a bit mask of which registers need to be
19190 saved on the stack for the current function.
19191 This is used by arm_get_frame_offsets, which may add extra registers. */
19193 static unsigned long
19194 arm_compute_save_reg_mask (void)
19196 unsigned int save_reg_mask = 0;
19197 unsigned long func_type = arm_current_func_type ();
19198 unsigned int reg;
19200 if (IS_NAKED (func_type))
19201 /* This should never really happen. */
19202 return 0;
19204 /* If we are creating a stack frame, then we must save the frame pointer,
19205 IP (which will hold the old stack pointer), LR and the PC. */
19206 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19207 save_reg_mask |=
19208 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19209 | (1 << IP_REGNUM)
19210 | (1 << LR_REGNUM)
19211 | (1 << PC_REGNUM);
19213 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19215 /* Decide if we need to save the link register.
19216 Interrupt routines have their own banked link register,
19217 so they never need to save it.
19218 Otherwise if we do not use the link register we do not need to save
19219 it. If we are pushing other registers onto the stack however, we
19220 can save an instruction in the epilogue by pushing the link register
19221 now and then popping it back into the PC. This incurs extra memory
19222 accesses though, so we only do it when optimizing for size, and only
19223 if we know that we will not need a fancy return sequence. */
19224 if (df_regs_ever_live_p (LR_REGNUM)
19225 || (save_reg_mask
19226 && optimize_size
19227 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19228 && !crtl->calls_eh_return))
19229 save_reg_mask |= 1 << LR_REGNUM;
19231 if (cfun->machine->lr_save_eliminated)
19232 save_reg_mask &= ~ (1 << LR_REGNUM);
19234 if (TARGET_REALLY_IWMMXT
19235 && ((bit_count (save_reg_mask)
19236 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19237 arm_compute_static_chain_stack_bytes())
19238 ) % 2) != 0)
19240 /* The total number of registers that are going to be pushed
19241 onto the stack is odd. We need to ensure that the stack
19242 is 64-bit aligned before we start to save iWMMXt registers,
19243 and also before we start to create locals. (A local variable
19244 might be a double or long long which we will load/store using
19245 an iWMMXt instruction). Therefore we need to push another
19246 ARM register, so that the stack will be 64-bit aligned. We
19247 try to avoid using the arg registers (r0 -r3) as they might be
19248 used to pass values in a tail call. */
19249 for (reg = 4; reg <= 12; reg++)
19250 if ((save_reg_mask & (1 << reg)) == 0)
19251 break;
19253 if (reg <= 12)
19254 save_reg_mask |= (1 << reg);
19255 else
19257 cfun->machine->sibcall_blocked = 1;
19258 save_reg_mask |= (1 << 3);
19262 /* We may need to push an additional register for use initializing the
19263 PIC base register. */
19264 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19265 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19267 reg = thumb_find_work_register (1 << 4);
19268 if (!call_used_regs[reg])
19269 save_reg_mask |= (1 << reg);
19272 return save_reg_mask;
19276 /* Compute a bit mask of which registers need to be
19277 saved on the stack for the current function. */
19278 static unsigned long
19279 thumb1_compute_save_reg_mask (void)
19281 unsigned long mask;
19282 unsigned reg;
19284 mask = 0;
19285 for (reg = 0; reg < 12; reg ++)
19286 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19287 mask |= 1 << reg;
19289 if (flag_pic
19290 && !TARGET_SINGLE_PIC_BASE
19291 && arm_pic_register != INVALID_REGNUM
19292 && crtl->uses_pic_offset_table)
19293 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19295 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19296 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19297 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19299 /* LR will also be pushed if any lo regs are pushed. */
19300 if (mask & 0xff || thumb_force_lr_save ())
19301 mask |= (1 << LR_REGNUM);
19303 /* Make sure we have a low work register if we need one.
19304 We will need one if we are going to push a high register,
19305 but we are not currently intending to push a low register. */
19306 if ((mask & 0xff) == 0
19307 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19309 /* Use thumb_find_work_register to choose which register
19310 we will use. If the register is live then we will
19311 have to push it. Use LAST_LO_REGNUM as our fallback
19312 choice for the register to select. */
19313 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19314 /* Make sure the register returned by thumb_find_work_register is
19315 not part of the return value. */
19316 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19317 reg = LAST_LO_REGNUM;
19319 if (! call_used_regs[reg])
19320 mask |= 1 << reg;
19323 /* The 504 below is 8 bytes less than 512 because there are two possible
19324 alignment words. We can't tell here if they will be present or not so we
19325 have to play it safe and assume that they are. */
19326 if ((CALLER_INTERWORKING_SLOT_SIZE +
19327 ROUND_UP_WORD (get_frame_size ()) +
19328 crtl->outgoing_args_size) >= 504)
19330 /* This is the same as the code in thumb1_expand_prologue() which
19331 determines which register to use for stack decrement. */
19332 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19333 if (mask & (1 << reg))
19334 break;
19336 if (reg > LAST_LO_REGNUM)
19338 /* Make sure we have a register available for stack decrement. */
19339 mask |= 1 << LAST_LO_REGNUM;
19343 return mask;
19347 /* Return the number of bytes required to save VFP registers. */
19348 static int
19349 arm_get_vfp_saved_size (void)
19351 unsigned int regno;
19352 int count;
19353 int saved;
19355 saved = 0;
19356 /* Space for saved VFP registers. */
19357 if (TARGET_HARD_FLOAT && TARGET_VFP)
19359 count = 0;
19360 for (regno = FIRST_VFP_REGNUM;
19361 regno < LAST_VFP_REGNUM;
19362 regno += 2)
19364 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19365 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19367 if (count > 0)
19369 /* Workaround ARM10 VFPr1 bug. */
19370 if (count == 2 && !arm_arch6)
19371 count++;
19372 saved += count * 8;
19374 count = 0;
19376 else
19377 count++;
19379 if (count > 0)
19381 if (count == 2 && !arm_arch6)
19382 count++;
19383 saved += count * 8;
19386 return saved;
19390 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19391 everything bar the final return instruction. If simple_return is true,
19392 then do not output epilogue, because it has already been emitted in RTL. */
19393 const char *
19394 output_return_instruction (rtx operand, bool really_return, bool reverse,
19395 bool simple_return)
19397 char conditional[10];
19398 char instr[100];
19399 unsigned reg;
19400 unsigned long live_regs_mask;
19401 unsigned long func_type;
19402 arm_stack_offsets *offsets;
19404 func_type = arm_current_func_type ();
19406 if (IS_NAKED (func_type))
19407 return "";
19409 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19411 /* If this function was declared non-returning, and we have
19412 found a tail call, then we have to trust that the called
19413 function won't return. */
19414 if (really_return)
19416 rtx ops[2];
19418 /* Otherwise, trap an attempted return by aborting. */
19419 ops[0] = operand;
19420 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19421 : "abort");
19422 assemble_external_libcall (ops[1]);
19423 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19426 return "";
19429 gcc_assert (!cfun->calls_alloca || really_return);
19431 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19433 cfun->machine->return_used_this_function = 1;
19435 offsets = arm_get_frame_offsets ();
19436 live_regs_mask = offsets->saved_regs_mask;
19438 if (!simple_return && live_regs_mask)
19440 const char * return_reg;
19442 /* If we do not have any special requirements for function exit
19443 (e.g. interworking) then we can load the return address
19444 directly into the PC. Otherwise we must load it into LR. */
19445 if (really_return
19446 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19447 return_reg = reg_names[PC_REGNUM];
19448 else
19449 return_reg = reg_names[LR_REGNUM];
19451 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19453 /* There are three possible reasons for the IP register
19454 being saved. 1) a stack frame was created, in which case
19455 IP contains the old stack pointer, or 2) an ISR routine
19456 corrupted it, or 3) it was saved to align the stack on
19457 iWMMXt. In case 1, restore IP into SP, otherwise just
19458 restore IP. */
19459 if (frame_pointer_needed)
19461 live_regs_mask &= ~ (1 << IP_REGNUM);
19462 live_regs_mask |= (1 << SP_REGNUM);
19464 else
19465 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19468 /* On some ARM architectures it is faster to use LDR rather than
19469 LDM to load a single register. On other architectures, the
19470 cost is the same. In 26 bit mode, or for exception handlers,
19471 we have to use LDM to load the PC so that the CPSR is also
19472 restored. */
19473 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19474 if (live_regs_mask == (1U << reg))
19475 break;
19477 if (reg <= LAST_ARM_REGNUM
19478 && (reg != LR_REGNUM
19479 || ! really_return
19480 || ! IS_INTERRUPT (func_type)))
19482 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19483 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19485 else
19487 char *p;
19488 int first = 1;
19490 /* Generate the load multiple instruction to restore the
19491 registers. Note we can get here, even if
19492 frame_pointer_needed is true, but only if sp already
19493 points to the base of the saved core registers. */
19494 if (live_regs_mask & (1 << SP_REGNUM))
19496 unsigned HOST_WIDE_INT stack_adjust;
19498 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19499 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19501 if (stack_adjust && arm_arch5 && TARGET_ARM)
19502 if (TARGET_UNIFIED_ASM)
19503 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19504 else
19505 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19506 else
19508 /* If we can't use ldmib (SA110 bug),
19509 then try to pop r3 instead. */
19510 if (stack_adjust)
19511 live_regs_mask |= 1 << 3;
19513 if (TARGET_UNIFIED_ASM)
19514 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19515 else
19516 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19519 else
19520 if (TARGET_UNIFIED_ASM)
19521 sprintf (instr, "pop%s\t{", conditional);
19522 else
19523 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19525 p = instr + strlen (instr);
19527 for (reg = 0; reg <= SP_REGNUM; reg++)
19528 if (live_regs_mask & (1 << reg))
19530 int l = strlen (reg_names[reg]);
19532 if (first)
19533 first = 0;
19534 else
19536 memcpy (p, ", ", 2);
19537 p += 2;
19540 memcpy (p, "%|", 2);
19541 memcpy (p + 2, reg_names[reg], l);
19542 p += l + 2;
19545 if (live_regs_mask & (1 << LR_REGNUM))
19547 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19548 /* If returning from an interrupt, restore the CPSR. */
19549 if (IS_INTERRUPT (func_type))
19550 strcat (p, "^");
19552 else
19553 strcpy (p, "}");
19556 output_asm_insn (instr, & operand);
19558 /* See if we need to generate an extra instruction to
19559 perform the actual function return. */
19560 if (really_return
19561 && func_type != ARM_FT_INTERWORKED
19562 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19564 /* The return has already been handled
19565 by loading the LR into the PC. */
19566 return "";
19570 if (really_return)
19572 switch ((int) ARM_FUNC_TYPE (func_type))
19574 case ARM_FT_ISR:
19575 case ARM_FT_FIQ:
19576 /* ??? This is wrong for unified assembly syntax. */
19577 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19578 break;
19580 case ARM_FT_INTERWORKED:
19581 sprintf (instr, "bx%s\t%%|lr", conditional);
19582 break;
19584 case ARM_FT_EXCEPTION:
19585 /* ??? This is wrong for unified assembly syntax. */
19586 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19587 break;
19589 default:
19590 /* Use bx if it's available. */
19591 if (arm_arch5 || arm_arch4t)
19592 sprintf (instr, "bx%s\t%%|lr", conditional);
19593 else
19594 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19595 break;
19598 output_asm_insn (instr, & operand);
19601 return "";
19604 /* Write the function name into the code section, directly preceding
19605 the function prologue.
19607 Code will be output similar to this:
19609 .ascii "arm_poke_function_name", 0
19610 .align
19612 .word 0xff000000 + (t1 - t0)
19613 arm_poke_function_name
19614 mov ip, sp
19615 stmfd sp!, {fp, ip, lr, pc}
19616 sub fp, ip, #4
19618 When performing a stack backtrace, code can inspect the value
19619 of 'pc' stored at 'fp' + 0. If the trace function then looks
19620 at location pc - 12 and the top 8 bits are set, then we know
19621 that there is a function name embedded immediately preceding this
19622 location and has length ((pc[-3]) & 0xff000000).
19624 We assume that pc is declared as a pointer to an unsigned long.
19626 It is of no benefit to output the function name if we are assembling
19627 a leaf function. These function types will not contain a stack
19628 backtrace structure, therefore it is not possible to determine the
19629 function name. */
19630 void
19631 arm_poke_function_name (FILE *stream, const char *name)
19633 unsigned long alignlength;
19634 unsigned long length;
19635 rtx x;
19637 length = strlen (name) + 1;
19638 alignlength = ROUND_UP_WORD (length);
19640 ASM_OUTPUT_ASCII (stream, name, length);
19641 ASM_OUTPUT_ALIGN (stream, 2);
19642 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19643 assemble_aligned_integer (UNITS_PER_WORD, x);
19646 /* Place some comments into the assembler stream
19647 describing the current function. */
19648 static void
19649 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19651 unsigned long func_type;
19653 /* ??? Do we want to print some of the below anyway? */
19654 if (TARGET_THUMB1)
19655 return;
19657 /* Sanity check. */
19658 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19660 func_type = arm_current_func_type ();
19662 switch ((int) ARM_FUNC_TYPE (func_type))
19664 default:
19665 case ARM_FT_NORMAL:
19666 break;
19667 case ARM_FT_INTERWORKED:
19668 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19669 break;
19670 case ARM_FT_ISR:
19671 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19672 break;
19673 case ARM_FT_FIQ:
19674 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19675 break;
19676 case ARM_FT_EXCEPTION:
19677 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19678 break;
19681 if (IS_NAKED (func_type))
19682 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19684 if (IS_VOLATILE (func_type))
19685 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19687 if (IS_NESTED (func_type))
19688 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19689 if (IS_STACKALIGN (func_type))
19690 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19692 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19693 crtl->args.size,
19694 crtl->args.pretend_args_size, frame_size);
19696 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19697 frame_pointer_needed,
19698 cfun->machine->uses_anonymous_args);
19700 if (cfun->machine->lr_save_eliminated)
19701 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19703 if (crtl->calls_eh_return)
19704 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19708 static void
19709 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19710 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19712 arm_stack_offsets *offsets;
19714 if (TARGET_THUMB1)
19716 int regno;
19718 /* Emit any call-via-reg trampolines that are needed for v4t support
19719 of call_reg and call_value_reg type insns. */
19720 for (regno = 0; regno < LR_REGNUM; regno++)
19722 rtx label = cfun->machine->call_via[regno];
19724 if (label != NULL)
19726 switch_to_section (function_section (current_function_decl));
19727 targetm.asm_out.internal_label (asm_out_file, "L",
19728 CODE_LABEL_NUMBER (label));
19729 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19733 /* ??? Probably not safe to set this here, since it assumes that a
19734 function will be emitted as assembly immediately after we generate
19735 RTL for it. This does not happen for inline functions. */
19736 cfun->machine->return_used_this_function = 0;
19738 else /* TARGET_32BIT */
19740 /* We need to take into account any stack-frame rounding. */
19741 offsets = arm_get_frame_offsets ();
19743 gcc_assert (!use_return_insn (FALSE, NULL)
19744 || (cfun->machine->return_used_this_function != 0)
19745 || offsets->saved_regs == offsets->outgoing_args
19746 || frame_pointer_needed);
19750 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19751 STR and STRD. If an even number of registers are being pushed, one
19752 or more STRD patterns are created for each register pair. If an
19753 odd number of registers are pushed, emit an initial STR followed by
19754 as many STRD instructions as are needed. This works best when the
19755 stack is initially 64-bit aligned (the normal case), since it
19756 ensures that each STRD is also 64-bit aligned. */
19757 static void
19758 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19760 int num_regs = 0;
19761 int i;
19762 int regno;
19763 rtx par = NULL_RTX;
19764 rtx dwarf = NULL_RTX;
19765 rtx tmp;
19766 bool first = true;
19768 num_regs = bit_count (saved_regs_mask);
19770 /* Must be at least one register to save, and can't save SP or PC. */
19771 gcc_assert (num_regs > 0 && num_regs <= 14);
19772 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19773 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19775 /* Create sequence for DWARF info. All the frame-related data for
19776 debugging is held in this wrapper. */
19777 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19779 /* Describe the stack adjustment. */
19780 tmp = gen_rtx_SET (VOIDmode,
19781 stack_pointer_rtx,
19782 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19783 RTX_FRAME_RELATED_P (tmp) = 1;
19784 XVECEXP (dwarf, 0, 0) = tmp;
19786 /* Find the first register. */
19787 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19790 i = 0;
19792 /* If there's an odd number of registers to push. Start off by
19793 pushing a single register. This ensures that subsequent strd
19794 operations are dword aligned (assuming that SP was originally
19795 64-bit aligned). */
19796 if ((num_regs & 1) != 0)
19798 rtx reg, mem, insn;
19800 reg = gen_rtx_REG (SImode, regno);
19801 if (num_regs == 1)
19802 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19803 stack_pointer_rtx));
19804 else
19805 mem = gen_frame_mem (Pmode,
19806 gen_rtx_PRE_MODIFY
19807 (Pmode, stack_pointer_rtx,
19808 plus_constant (Pmode, stack_pointer_rtx,
19809 -4 * num_regs)));
19811 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19812 RTX_FRAME_RELATED_P (tmp) = 1;
19813 insn = emit_insn (tmp);
19814 RTX_FRAME_RELATED_P (insn) = 1;
19815 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19816 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19817 reg);
19818 RTX_FRAME_RELATED_P (tmp) = 1;
19819 i++;
19820 regno++;
19821 XVECEXP (dwarf, 0, i) = tmp;
19822 first = false;
19825 while (i < num_regs)
19826 if (saved_regs_mask & (1 << regno))
19828 rtx reg1, reg2, mem1, mem2;
19829 rtx tmp0, tmp1, tmp2;
19830 int regno2;
19832 /* Find the register to pair with this one. */
19833 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19834 regno2++)
19837 reg1 = gen_rtx_REG (SImode, regno);
19838 reg2 = gen_rtx_REG (SImode, regno2);
19840 if (first)
19842 rtx insn;
19844 first = false;
19845 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19846 stack_pointer_rtx,
19847 -4 * num_regs));
19848 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19849 stack_pointer_rtx,
19850 -4 * (num_regs - 1)));
19851 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19852 plus_constant (Pmode, stack_pointer_rtx,
19853 -4 * (num_regs)));
19854 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19855 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19856 RTX_FRAME_RELATED_P (tmp0) = 1;
19857 RTX_FRAME_RELATED_P (tmp1) = 1;
19858 RTX_FRAME_RELATED_P (tmp2) = 1;
19859 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19860 XVECEXP (par, 0, 0) = tmp0;
19861 XVECEXP (par, 0, 1) = tmp1;
19862 XVECEXP (par, 0, 2) = tmp2;
19863 insn = emit_insn (par);
19864 RTX_FRAME_RELATED_P (insn) = 1;
19865 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19867 else
19869 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19870 stack_pointer_rtx,
19871 4 * i));
19872 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19873 stack_pointer_rtx,
19874 4 * (i + 1)));
19875 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19876 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19877 RTX_FRAME_RELATED_P (tmp1) = 1;
19878 RTX_FRAME_RELATED_P (tmp2) = 1;
19879 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19880 XVECEXP (par, 0, 0) = tmp1;
19881 XVECEXP (par, 0, 1) = tmp2;
19882 emit_insn (par);
19885 /* Create unwind information. This is an approximation. */
19886 tmp1 = gen_rtx_SET (VOIDmode,
19887 gen_frame_mem (Pmode,
19888 plus_constant (Pmode,
19889 stack_pointer_rtx,
19890 4 * i)),
19891 reg1);
19892 tmp2 = gen_rtx_SET (VOIDmode,
19893 gen_frame_mem (Pmode,
19894 plus_constant (Pmode,
19895 stack_pointer_rtx,
19896 4 * (i + 1))),
19897 reg2);
19899 RTX_FRAME_RELATED_P (tmp1) = 1;
19900 RTX_FRAME_RELATED_P (tmp2) = 1;
19901 XVECEXP (dwarf, 0, i + 1) = tmp1;
19902 XVECEXP (dwarf, 0, i + 2) = tmp2;
19903 i += 2;
19904 regno = regno2 + 1;
19906 else
19907 regno++;
19909 return;
19912 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19913 whenever possible, otherwise it emits single-word stores. The first store
19914 also allocates stack space for all saved registers, using writeback with
19915 post-addressing mode. All other stores use offset addressing. If no STRD
19916 can be emitted, this function emits a sequence of single-word stores,
19917 and not an STM as before, because single-word stores provide more freedom
19918 scheduling and can be turned into an STM by peephole optimizations. */
19919 static void
19920 arm_emit_strd_push (unsigned long saved_regs_mask)
19922 int num_regs = 0;
19923 int i, j, dwarf_index = 0;
19924 int offset = 0;
19925 rtx dwarf = NULL_RTX;
19926 rtx insn = NULL_RTX;
19927 rtx tmp, mem;
19929 /* TODO: A more efficient code can be emitted by changing the
19930 layout, e.g., first push all pairs that can use STRD to keep the
19931 stack aligned, and then push all other registers. */
19932 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19933 if (saved_regs_mask & (1 << i))
19934 num_regs++;
19936 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19937 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19938 gcc_assert (num_regs > 0);
19940 /* Create sequence for DWARF info. */
19941 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19943 /* For dwarf info, we generate explicit stack update. */
19944 tmp = gen_rtx_SET (VOIDmode,
19945 stack_pointer_rtx,
19946 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19947 RTX_FRAME_RELATED_P (tmp) = 1;
19948 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19950 /* Save registers. */
19951 offset = - 4 * num_regs;
19952 j = 0;
19953 while (j <= LAST_ARM_REGNUM)
19954 if (saved_regs_mask & (1 << j))
19956 if ((j % 2 == 0)
19957 && (saved_regs_mask & (1 << (j + 1))))
19959 /* Current register and previous register form register pair for
19960 which STRD can be generated. */
19961 if (offset < 0)
19963 /* Allocate stack space for all saved registers. */
19964 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19965 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19966 mem = gen_frame_mem (DImode, tmp);
19967 offset = 0;
19969 else if (offset > 0)
19970 mem = gen_frame_mem (DImode,
19971 plus_constant (Pmode,
19972 stack_pointer_rtx,
19973 offset));
19974 else
19975 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19977 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19978 RTX_FRAME_RELATED_P (tmp) = 1;
19979 tmp = emit_insn (tmp);
19981 /* Record the first store insn. */
19982 if (dwarf_index == 1)
19983 insn = tmp;
19985 /* Generate dwarf info. */
19986 mem = gen_frame_mem (SImode,
19987 plus_constant (Pmode,
19988 stack_pointer_rtx,
19989 offset));
19990 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19991 RTX_FRAME_RELATED_P (tmp) = 1;
19992 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19994 mem = gen_frame_mem (SImode,
19995 plus_constant (Pmode,
19996 stack_pointer_rtx,
19997 offset + 4));
19998 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19999 RTX_FRAME_RELATED_P (tmp) = 1;
20000 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20002 offset += 8;
20003 j += 2;
20005 else
20007 /* Emit a single word store. */
20008 if (offset < 0)
20010 /* Allocate stack space for all saved registers. */
20011 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20012 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20013 mem = gen_frame_mem (SImode, tmp);
20014 offset = 0;
20016 else if (offset > 0)
20017 mem = gen_frame_mem (SImode,
20018 plus_constant (Pmode,
20019 stack_pointer_rtx,
20020 offset));
20021 else
20022 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20024 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20025 RTX_FRAME_RELATED_P (tmp) = 1;
20026 tmp = emit_insn (tmp);
20028 /* Record the first store insn. */
20029 if (dwarf_index == 1)
20030 insn = tmp;
20032 /* Generate dwarf info. */
20033 mem = gen_frame_mem (SImode,
20034 plus_constant(Pmode,
20035 stack_pointer_rtx,
20036 offset));
20037 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20038 RTX_FRAME_RELATED_P (tmp) = 1;
20039 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20041 offset += 4;
20042 j += 1;
20045 else
20046 j++;
20048 /* Attach dwarf info to the first insn we generate. */
20049 gcc_assert (insn != NULL_RTX);
20050 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20051 RTX_FRAME_RELATED_P (insn) = 1;
20054 /* Generate and emit an insn that we will recognize as a push_multi.
20055 Unfortunately, since this insn does not reflect very well the actual
20056 semantics of the operation, we need to annotate the insn for the benefit
20057 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20058 MASK for registers that should be annotated for DWARF2 frame unwind
20059 information. */
20060 static rtx
20061 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20063 int num_regs = 0;
20064 int num_dwarf_regs = 0;
20065 int i, j;
20066 rtx par;
20067 rtx dwarf;
20068 int dwarf_par_index;
20069 rtx tmp, reg;
20071 /* We don't record the PC in the dwarf frame information. */
20072 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20074 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20076 if (mask & (1 << i))
20077 num_regs++;
20078 if (dwarf_regs_mask & (1 << i))
20079 num_dwarf_regs++;
20082 gcc_assert (num_regs && num_regs <= 16);
20083 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20085 /* For the body of the insn we are going to generate an UNSPEC in
20086 parallel with several USEs. This allows the insn to be recognized
20087 by the push_multi pattern in the arm.md file.
20089 The body of the insn looks something like this:
20091 (parallel [
20092 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20093 (const_int:SI <num>)))
20094 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20095 (use (reg:SI XX))
20096 (use (reg:SI YY))
20100 For the frame note however, we try to be more explicit and actually
20101 show each register being stored into the stack frame, plus a (single)
20102 decrement of the stack pointer. We do it this way in order to be
20103 friendly to the stack unwinding code, which only wants to see a single
20104 stack decrement per instruction. The RTL we generate for the note looks
20105 something like this:
20107 (sequence [
20108 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20109 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20110 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20111 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20115 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20116 instead we'd have a parallel expression detailing all
20117 the stores to the various memory addresses so that debug
20118 information is more up-to-date. Remember however while writing
20119 this to take care of the constraints with the push instruction.
20121 Note also that this has to be taken care of for the VFP registers.
20123 For more see PR43399. */
20125 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20126 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20127 dwarf_par_index = 1;
20129 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20131 if (mask & (1 << i))
20133 reg = gen_rtx_REG (SImode, i);
20135 XVECEXP (par, 0, 0)
20136 = gen_rtx_SET (VOIDmode,
20137 gen_frame_mem
20138 (BLKmode,
20139 gen_rtx_PRE_MODIFY (Pmode,
20140 stack_pointer_rtx,
20141 plus_constant
20142 (Pmode, stack_pointer_rtx,
20143 -4 * num_regs))
20145 gen_rtx_UNSPEC (BLKmode,
20146 gen_rtvec (1, reg),
20147 UNSPEC_PUSH_MULT));
20149 if (dwarf_regs_mask & (1 << i))
20151 tmp = gen_rtx_SET (VOIDmode,
20152 gen_frame_mem (SImode, stack_pointer_rtx),
20153 reg);
20154 RTX_FRAME_RELATED_P (tmp) = 1;
20155 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20158 break;
20162 for (j = 1, i++; j < num_regs; i++)
20164 if (mask & (1 << i))
20166 reg = gen_rtx_REG (SImode, i);
20168 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20170 if (dwarf_regs_mask & (1 << i))
20173 = gen_rtx_SET (VOIDmode,
20174 gen_frame_mem
20175 (SImode,
20176 plus_constant (Pmode, stack_pointer_rtx,
20177 4 * j)),
20178 reg);
20179 RTX_FRAME_RELATED_P (tmp) = 1;
20180 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20183 j++;
20187 par = emit_insn (par);
20189 tmp = gen_rtx_SET (VOIDmode,
20190 stack_pointer_rtx,
20191 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20192 RTX_FRAME_RELATED_P (tmp) = 1;
20193 XVECEXP (dwarf, 0, 0) = tmp;
20195 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20197 return par;
20200 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20201 SIZE is the offset to be adjusted.
20202 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20203 static void
20204 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20206 rtx dwarf;
20208 RTX_FRAME_RELATED_P (insn) = 1;
20209 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20210 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20213 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20214 SAVED_REGS_MASK shows which registers need to be restored.
20216 Unfortunately, since this insn does not reflect very well the actual
20217 semantics of the operation, we need to annotate the insn for the benefit
20218 of DWARF2 frame unwind information. */
20219 static void
20220 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20222 int num_regs = 0;
20223 int i, j;
20224 rtx par;
20225 rtx dwarf = NULL_RTX;
20226 rtx tmp, reg;
20227 bool return_in_pc;
20228 int offset_adj;
20229 int emit_update;
20231 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20232 offset_adj = return_in_pc ? 1 : 0;
20233 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20234 if (saved_regs_mask & (1 << i))
20235 num_regs++;
20237 gcc_assert (num_regs && num_regs <= 16);
20239 /* If SP is in reglist, then we don't emit SP update insn. */
20240 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20242 /* The parallel needs to hold num_regs SETs
20243 and one SET for the stack update. */
20244 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20246 if (return_in_pc)
20248 tmp = ret_rtx;
20249 XVECEXP (par, 0, 0) = tmp;
20252 if (emit_update)
20254 /* Increment the stack pointer, based on there being
20255 num_regs 4-byte registers to restore. */
20256 tmp = gen_rtx_SET (VOIDmode,
20257 stack_pointer_rtx,
20258 plus_constant (Pmode,
20259 stack_pointer_rtx,
20260 4 * num_regs));
20261 RTX_FRAME_RELATED_P (tmp) = 1;
20262 XVECEXP (par, 0, offset_adj) = tmp;
20265 /* Now restore every reg, which may include PC. */
20266 for (j = 0, i = 0; j < num_regs; i++)
20267 if (saved_regs_mask & (1 << i))
20269 reg = gen_rtx_REG (SImode, i);
20270 if ((num_regs == 1) && emit_update && !return_in_pc)
20272 /* Emit single load with writeback. */
20273 tmp = gen_frame_mem (SImode,
20274 gen_rtx_POST_INC (Pmode,
20275 stack_pointer_rtx));
20276 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20277 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20278 return;
20281 tmp = gen_rtx_SET (VOIDmode,
20282 reg,
20283 gen_frame_mem
20284 (SImode,
20285 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20286 RTX_FRAME_RELATED_P (tmp) = 1;
20287 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20289 /* We need to maintain a sequence for DWARF info too. As dwarf info
20290 should not have PC, skip PC. */
20291 if (i != PC_REGNUM)
20292 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20294 j++;
20297 if (return_in_pc)
20298 par = emit_jump_insn (par);
20299 else
20300 par = emit_insn (par);
20302 REG_NOTES (par) = dwarf;
20303 if (!return_in_pc)
20304 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20305 stack_pointer_rtx, stack_pointer_rtx);
20308 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20309 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20311 Unfortunately, since this insn does not reflect very well the actual
20312 semantics of the operation, we need to annotate the insn for the benefit
20313 of DWARF2 frame unwind information. */
20314 static void
20315 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20317 int i, j;
20318 rtx par;
20319 rtx dwarf = NULL_RTX;
20320 rtx tmp, reg;
20322 gcc_assert (num_regs && num_regs <= 32);
20324 /* Workaround ARM10 VFPr1 bug. */
20325 if (num_regs == 2 && !arm_arch6)
20327 if (first_reg == 15)
20328 first_reg--;
20330 num_regs++;
20333 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20334 there could be up to 32 D-registers to restore.
20335 If there are more than 16 D-registers, make two recursive calls,
20336 each of which emits one pop_multi instruction. */
20337 if (num_regs > 16)
20339 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20340 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20341 return;
20344 /* The parallel needs to hold num_regs SETs
20345 and one SET for the stack update. */
20346 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20348 /* Increment the stack pointer, based on there being
20349 num_regs 8-byte registers to restore. */
20350 tmp = gen_rtx_SET (VOIDmode,
20351 base_reg,
20352 plus_constant (Pmode, base_reg, 8 * num_regs));
20353 RTX_FRAME_RELATED_P (tmp) = 1;
20354 XVECEXP (par, 0, 0) = tmp;
20356 /* Now show every reg that will be restored, using a SET for each. */
20357 for (j = 0, i=first_reg; j < num_regs; i += 2)
20359 reg = gen_rtx_REG (DFmode, i);
20361 tmp = gen_rtx_SET (VOIDmode,
20362 reg,
20363 gen_frame_mem
20364 (DFmode,
20365 plus_constant (Pmode, base_reg, 8 * j)));
20366 RTX_FRAME_RELATED_P (tmp) = 1;
20367 XVECEXP (par, 0, j + 1) = tmp;
20369 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20371 j++;
20374 par = emit_insn (par);
20375 REG_NOTES (par) = dwarf;
20377 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20378 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20380 RTX_FRAME_RELATED_P (par) = 1;
20381 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20383 else
20384 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20385 base_reg, base_reg);
20388 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20389 number of registers are being popped, multiple LDRD patterns are created for
20390 all register pairs. If odd number of registers are popped, last register is
20391 loaded by using LDR pattern. */
20392 static void
20393 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20395 int num_regs = 0;
20396 int i, j;
20397 rtx par = NULL_RTX;
20398 rtx dwarf = NULL_RTX;
20399 rtx tmp, reg, tmp1;
20400 bool return_in_pc;
20402 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20403 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20404 if (saved_regs_mask & (1 << i))
20405 num_regs++;
20407 gcc_assert (num_regs && num_regs <= 16);
20409 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20410 to be popped. So, if num_regs is even, now it will become odd,
20411 and we can generate pop with PC. If num_regs is odd, it will be
20412 even now, and ldr with return can be generated for PC. */
20413 if (return_in_pc)
20414 num_regs--;
20416 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20418 /* Var j iterates over all the registers to gather all the registers in
20419 saved_regs_mask. Var i gives index of saved registers in stack frame.
20420 A PARALLEL RTX of register-pair is created here, so that pattern for
20421 LDRD can be matched. As PC is always last register to be popped, and
20422 we have already decremented num_regs if PC, we don't have to worry
20423 about PC in this loop. */
20424 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20425 if (saved_regs_mask & (1 << j))
20427 /* Create RTX for memory load. */
20428 reg = gen_rtx_REG (SImode, j);
20429 tmp = gen_rtx_SET (SImode,
20430 reg,
20431 gen_frame_mem (SImode,
20432 plus_constant (Pmode,
20433 stack_pointer_rtx, 4 * i)));
20434 RTX_FRAME_RELATED_P (tmp) = 1;
20436 if (i % 2 == 0)
20438 /* When saved-register index (i) is even, the RTX to be emitted is
20439 yet to be created. Hence create it first. The LDRD pattern we
20440 are generating is :
20441 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20442 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20443 where target registers need not be consecutive. */
20444 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20445 dwarf = NULL_RTX;
20448 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20449 added as 0th element and if i is odd, reg_i is added as 1st element
20450 of LDRD pattern shown above. */
20451 XVECEXP (par, 0, (i % 2)) = tmp;
20452 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20454 if ((i % 2) == 1)
20456 /* When saved-register index (i) is odd, RTXs for both the registers
20457 to be loaded are generated in above given LDRD pattern, and the
20458 pattern can be emitted now. */
20459 par = emit_insn (par);
20460 REG_NOTES (par) = dwarf;
20461 RTX_FRAME_RELATED_P (par) = 1;
20464 i++;
20467 /* If the number of registers pushed is odd AND return_in_pc is false OR
20468 number of registers are even AND return_in_pc is true, last register is
20469 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20470 then LDR with post increment. */
20472 /* Increment the stack pointer, based on there being
20473 num_regs 4-byte registers to restore. */
20474 tmp = gen_rtx_SET (VOIDmode,
20475 stack_pointer_rtx,
20476 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20477 RTX_FRAME_RELATED_P (tmp) = 1;
20478 tmp = emit_insn (tmp);
20479 if (!return_in_pc)
20481 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20482 stack_pointer_rtx, stack_pointer_rtx);
20485 dwarf = NULL_RTX;
20487 if (((num_regs % 2) == 1 && !return_in_pc)
20488 || ((num_regs % 2) == 0 && return_in_pc))
20490 /* Scan for the single register to be popped. Skip until the saved
20491 register is found. */
20492 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20494 /* Gen LDR with post increment here. */
20495 tmp1 = gen_rtx_MEM (SImode,
20496 gen_rtx_POST_INC (SImode,
20497 stack_pointer_rtx));
20498 set_mem_alias_set (tmp1, get_frame_alias_set ());
20500 reg = gen_rtx_REG (SImode, j);
20501 tmp = gen_rtx_SET (SImode, reg, tmp1);
20502 RTX_FRAME_RELATED_P (tmp) = 1;
20503 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20505 if (return_in_pc)
20507 /* If return_in_pc, j must be PC_REGNUM. */
20508 gcc_assert (j == PC_REGNUM);
20509 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20510 XVECEXP (par, 0, 0) = ret_rtx;
20511 XVECEXP (par, 0, 1) = tmp;
20512 par = emit_jump_insn (par);
20514 else
20516 par = emit_insn (tmp);
20517 REG_NOTES (par) = dwarf;
20518 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20519 stack_pointer_rtx, stack_pointer_rtx);
20523 else if ((num_regs % 2) == 1 && return_in_pc)
20525 /* There are 2 registers to be popped. So, generate the pattern
20526 pop_multiple_with_stack_update_and_return to pop in PC. */
20527 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20530 return;
20533 /* LDRD in ARM mode needs consecutive registers as operands. This function
20534 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20535 offset addressing and then generates one separate stack udpate. This provides
20536 more scheduling freedom, compared to writeback on every load. However,
20537 if the function returns using load into PC directly
20538 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20539 before the last load. TODO: Add a peephole optimization to recognize
20540 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20541 peephole optimization to merge the load at stack-offset zero
20542 with the stack update instruction using load with writeback
20543 in post-index addressing mode. */
20544 static void
20545 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20547 int j = 0;
20548 int offset = 0;
20549 rtx par = NULL_RTX;
20550 rtx dwarf = NULL_RTX;
20551 rtx tmp, mem;
20553 /* Restore saved registers. */
20554 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20555 j = 0;
20556 while (j <= LAST_ARM_REGNUM)
20557 if (saved_regs_mask & (1 << j))
20559 if ((j % 2) == 0
20560 && (saved_regs_mask & (1 << (j + 1)))
20561 && (j + 1) != PC_REGNUM)
20563 /* Current register and next register form register pair for which
20564 LDRD can be generated. PC is always the last register popped, and
20565 we handle it separately. */
20566 if (offset > 0)
20567 mem = gen_frame_mem (DImode,
20568 plus_constant (Pmode,
20569 stack_pointer_rtx,
20570 offset));
20571 else
20572 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20574 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20575 tmp = emit_insn (tmp);
20576 RTX_FRAME_RELATED_P (tmp) = 1;
20578 /* Generate dwarf info. */
20580 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20581 gen_rtx_REG (SImode, j),
20582 NULL_RTX);
20583 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20584 gen_rtx_REG (SImode, j + 1),
20585 dwarf);
20587 REG_NOTES (tmp) = dwarf;
20589 offset += 8;
20590 j += 2;
20592 else if (j != PC_REGNUM)
20594 /* Emit a single word load. */
20595 if (offset > 0)
20596 mem = gen_frame_mem (SImode,
20597 plus_constant (Pmode,
20598 stack_pointer_rtx,
20599 offset));
20600 else
20601 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20603 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20604 tmp = emit_insn (tmp);
20605 RTX_FRAME_RELATED_P (tmp) = 1;
20607 /* Generate dwarf info. */
20608 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20609 gen_rtx_REG (SImode, j),
20610 NULL_RTX);
20612 offset += 4;
20613 j += 1;
20615 else /* j == PC_REGNUM */
20616 j++;
20618 else
20619 j++;
20621 /* Update the stack. */
20622 if (offset > 0)
20624 tmp = gen_rtx_SET (Pmode,
20625 stack_pointer_rtx,
20626 plus_constant (Pmode,
20627 stack_pointer_rtx,
20628 offset));
20629 tmp = emit_insn (tmp);
20630 arm_add_cfa_adjust_cfa_note (tmp, offset,
20631 stack_pointer_rtx, stack_pointer_rtx);
20632 offset = 0;
20635 if (saved_regs_mask & (1 << PC_REGNUM))
20637 /* Only PC is to be popped. */
20638 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20639 XVECEXP (par, 0, 0) = ret_rtx;
20640 tmp = gen_rtx_SET (SImode,
20641 gen_rtx_REG (SImode, PC_REGNUM),
20642 gen_frame_mem (SImode,
20643 gen_rtx_POST_INC (SImode,
20644 stack_pointer_rtx)));
20645 RTX_FRAME_RELATED_P (tmp) = 1;
20646 XVECEXP (par, 0, 1) = tmp;
20647 par = emit_jump_insn (par);
20649 /* Generate dwarf info. */
20650 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20651 gen_rtx_REG (SImode, PC_REGNUM),
20652 NULL_RTX);
20653 REG_NOTES (par) = dwarf;
20654 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20655 stack_pointer_rtx, stack_pointer_rtx);
20659 /* Calculate the size of the return value that is passed in registers. */
20660 static unsigned
20661 arm_size_return_regs (void)
20663 machine_mode mode;
20665 if (crtl->return_rtx != 0)
20666 mode = GET_MODE (crtl->return_rtx);
20667 else
20668 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20670 return GET_MODE_SIZE (mode);
20673 /* Return true if the current function needs to save/restore LR. */
20674 static bool
20675 thumb_force_lr_save (void)
20677 return !cfun->machine->lr_save_eliminated
20678 && (!leaf_function_p ()
20679 || thumb_far_jump_used_p ()
20680 || df_regs_ever_live_p (LR_REGNUM));
20683 /* We do not know if r3 will be available because
20684 we do have an indirect tailcall happening in this
20685 particular case. */
20686 static bool
20687 is_indirect_tailcall_p (rtx call)
20689 rtx pat = PATTERN (call);
20691 /* Indirect tail call. */
20692 pat = XVECEXP (pat, 0, 0);
20693 if (GET_CODE (pat) == SET)
20694 pat = SET_SRC (pat);
20696 pat = XEXP (XEXP (pat, 0), 0);
20697 return REG_P (pat);
20700 /* Return true if r3 is used by any of the tail call insns in the
20701 current function. */
20702 static bool
20703 any_sibcall_could_use_r3 (void)
20705 edge_iterator ei;
20706 edge e;
20708 if (!crtl->tail_call_emit)
20709 return false;
20710 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20711 if (e->flags & EDGE_SIBCALL)
20713 rtx call = BB_END (e->src);
20714 if (!CALL_P (call))
20715 call = prev_nonnote_nondebug_insn (call);
20716 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20717 if (find_regno_fusage (call, USE, 3)
20718 || is_indirect_tailcall_p (call))
20719 return true;
20721 return false;
20725 /* Compute the distance from register FROM to register TO.
20726 These can be the arg pointer (26), the soft frame pointer (25),
20727 the stack pointer (13) or the hard frame pointer (11).
20728 In thumb mode r7 is used as the soft frame pointer, if needed.
20729 Typical stack layout looks like this:
20731 old stack pointer -> | |
20732 ----
20733 | | \
20734 | | saved arguments for
20735 | | vararg functions
20736 | | /
20738 hard FP & arg pointer -> | | \
20739 | | stack
20740 | | frame
20741 | | /
20743 | | \
20744 | | call saved
20745 | | registers
20746 soft frame pointer -> | | /
20748 | | \
20749 | | local
20750 | | variables
20751 locals base pointer -> | | /
20753 | | \
20754 | | outgoing
20755 | | arguments
20756 current stack pointer -> | | /
20759 For a given function some or all of these stack components
20760 may not be needed, giving rise to the possibility of
20761 eliminating some of the registers.
20763 The values returned by this function must reflect the behavior
20764 of arm_expand_prologue() and arm_compute_save_reg_mask().
20766 The sign of the number returned reflects the direction of stack
20767 growth, so the values are positive for all eliminations except
20768 from the soft frame pointer to the hard frame pointer.
20770 SFP may point just inside the local variables block to ensure correct
20771 alignment. */
20774 /* Calculate stack offsets. These are used to calculate register elimination
20775 offsets and in prologue/epilogue code. Also calculates which registers
20776 should be saved. */
20778 static arm_stack_offsets *
20779 arm_get_frame_offsets (void)
20781 struct arm_stack_offsets *offsets;
20782 unsigned long func_type;
20783 int leaf;
20784 int saved;
20785 int core_saved;
20786 HOST_WIDE_INT frame_size;
20787 int i;
20789 offsets = &cfun->machine->stack_offsets;
20791 /* We need to know if we are a leaf function. Unfortunately, it
20792 is possible to be called after start_sequence has been called,
20793 which causes get_insns to return the insns for the sequence,
20794 not the function, which will cause leaf_function_p to return
20795 the incorrect result.
20797 to know about leaf functions once reload has completed, and the
20798 frame size cannot be changed after that time, so we can safely
20799 use the cached value. */
20801 if (reload_completed)
20802 return offsets;
20804 /* Initially this is the size of the local variables. It will translated
20805 into an offset once we have determined the size of preceding data. */
20806 frame_size = ROUND_UP_WORD (get_frame_size ());
20808 leaf = leaf_function_p ();
20810 /* Space for variadic functions. */
20811 offsets->saved_args = crtl->args.pretend_args_size;
20813 /* In Thumb mode this is incorrect, but never used. */
20814 offsets->frame
20815 = (offsets->saved_args
20816 + arm_compute_static_chain_stack_bytes ()
20817 + (frame_pointer_needed ? 4 : 0));
20819 if (TARGET_32BIT)
20821 unsigned int regno;
20823 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20824 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20825 saved = core_saved;
20827 /* We know that SP will be doubleword aligned on entry, and we must
20828 preserve that condition at any subroutine call. We also require the
20829 soft frame pointer to be doubleword aligned. */
20831 if (TARGET_REALLY_IWMMXT)
20833 /* Check for the call-saved iWMMXt registers. */
20834 for (regno = FIRST_IWMMXT_REGNUM;
20835 regno <= LAST_IWMMXT_REGNUM;
20836 regno++)
20837 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20838 saved += 8;
20841 func_type = arm_current_func_type ();
20842 /* Space for saved VFP registers. */
20843 if (! IS_VOLATILE (func_type)
20844 && TARGET_HARD_FLOAT && TARGET_VFP)
20845 saved += arm_get_vfp_saved_size ();
20847 else /* TARGET_THUMB1 */
20849 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20850 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20851 saved = core_saved;
20852 if (TARGET_BACKTRACE)
20853 saved += 16;
20856 /* Saved registers include the stack frame. */
20857 offsets->saved_regs
20858 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20859 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20861 /* A leaf function does not need any stack alignment if it has nothing
20862 on the stack. */
20863 if (leaf && frame_size == 0
20864 /* However if it calls alloca(), we have a dynamically allocated
20865 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20866 && ! cfun->calls_alloca)
20868 offsets->outgoing_args = offsets->soft_frame;
20869 offsets->locals_base = offsets->soft_frame;
20870 return offsets;
20873 /* Ensure SFP has the correct alignment. */
20874 if (ARM_DOUBLEWORD_ALIGN
20875 && (offsets->soft_frame & 7))
20877 offsets->soft_frame += 4;
20878 /* Try to align stack by pushing an extra reg. Don't bother doing this
20879 when there is a stack frame as the alignment will be rolled into
20880 the normal stack adjustment. */
20881 if (frame_size + crtl->outgoing_args_size == 0)
20883 int reg = -1;
20885 /* Register r3 is caller-saved. Normally it does not need to be
20886 saved on entry by the prologue. However if we choose to save
20887 it for padding then we may confuse the compiler into thinking
20888 a prologue sequence is required when in fact it is not. This
20889 will occur when shrink-wrapping if r3 is used as a scratch
20890 register and there are no other callee-saved writes.
20892 This situation can be avoided when other callee-saved registers
20893 are available and r3 is not mandatory if we choose a callee-saved
20894 register for padding. */
20895 bool prefer_callee_reg_p = false;
20897 /* If it is safe to use r3, then do so. This sometimes
20898 generates better code on Thumb-2 by avoiding the need to
20899 use 32-bit push/pop instructions. */
20900 if (! any_sibcall_could_use_r3 ()
20901 && arm_size_return_regs () <= 12
20902 && (offsets->saved_regs_mask & (1 << 3)) == 0
20903 && (TARGET_THUMB2
20904 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20906 reg = 3;
20907 if (!TARGET_THUMB2)
20908 prefer_callee_reg_p = true;
20910 if (reg == -1
20911 || prefer_callee_reg_p)
20913 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20915 /* Avoid fixed registers; they may be changed at
20916 arbitrary times so it's unsafe to restore them
20917 during the epilogue. */
20918 if (!fixed_regs[i]
20919 && (offsets->saved_regs_mask & (1 << i)) == 0)
20921 reg = i;
20922 break;
20927 if (reg != -1)
20929 offsets->saved_regs += 4;
20930 offsets->saved_regs_mask |= (1 << reg);
20935 offsets->locals_base = offsets->soft_frame + frame_size;
20936 offsets->outgoing_args = (offsets->locals_base
20937 + crtl->outgoing_args_size);
20939 if (ARM_DOUBLEWORD_ALIGN)
20941 /* Ensure SP remains doubleword aligned. */
20942 if (offsets->outgoing_args & 7)
20943 offsets->outgoing_args += 4;
20944 gcc_assert (!(offsets->outgoing_args & 7));
20947 return offsets;
20951 /* Calculate the relative offsets for the different stack pointers. Positive
20952 offsets are in the direction of stack growth. */
20954 HOST_WIDE_INT
20955 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20957 arm_stack_offsets *offsets;
20959 offsets = arm_get_frame_offsets ();
20961 /* OK, now we have enough information to compute the distances.
20962 There must be an entry in these switch tables for each pair
20963 of registers in ELIMINABLE_REGS, even if some of the entries
20964 seem to be redundant or useless. */
20965 switch (from)
20967 case ARG_POINTER_REGNUM:
20968 switch (to)
20970 case THUMB_HARD_FRAME_POINTER_REGNUM:
20971 return 0;
20973 case FRAME_POINTER_REGNUM:
20974 /* This is the reverse of the soft frame pointer
20975 to hard frame pointer elimination below. */
20976 return offsets->soft_frame - offsets->saved_args;
20978 case ARM_HARD_FRAME_POINTER_REGNUM:
20979 /* This is only non-zero in the case where the static chain register
20980 is stored above the frame. */
20981 return offsets->frame - offsets->saved_args - 4;
20983 case STACK_POINTER_REGNUM:
20984 /* If nothing has been pushed on the stack at all
20985 then this will return -4. This *is* correct! */
20986 return offsets->outgoing_args - (offsets->saved_args + 4);
20988 default:
20989 gcc_unreachable ();
20991 gcc_unreachable ();
20993 case FRAME_POINTER_REGNUM:
20994 switch (to)
20996 case THUMB_HARD_FRAME_POINTER_REGNUM:
20997 return 0;
20999 case ARM_HARD_FRAME_POINTER_REGNUM:
21000 /* The hard frame pointer points to the top entry in the
21001 stack frame. The soft frame pointer to the bottom entry
21002 in the stack frame. If there is no stack frame at all,
21003 then they are identical. */
21005 return offsets->frame - offsets->soft_frame;
21007 case STACK_POINTER_REGNUM:
21008 return offsets->outgoing_args - offsets->soft_frame;
21010 default:
21011 gcc_unreachable ();
21013 gcc_unreachable ();
21015 default:
21016 /* You cannot eliminate from the stack pointer.
21017 In theory you could eliminate from the hard frame
21018 pointer to the stack pointer, but this will never
21019 happen, since if a stack frame is not needed the
21020 hard frame pointer will never be used. */
21021 gcc_unreachable ();
21025 /* Given FROM and TO register numbers, say whether this elimination is
21026 allowed. Frame pointer elimination is automatically handled.
21028 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21029 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21030 pointer, we must eliminate FRAME_POINTER_REGNUM into
21031 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21032 ARG_POINTER_REGNUM. */
21034 bool
21035 arm_can_eliminate (const int from, const int to)
21037 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21038 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21039 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21040 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21041 true);
21044 /* Emit RTL to save coprocessor registers on function entry. Returns the
21045 number of bytes pushed. */
21047 static int
21048 arm_save_coproc_regs(void)
21050 int saved_size = 0;
21051 unsigned reg;
21052 unsigned start_reg;
21053 rtx insn;
21055 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21056 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21058 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21059 insn = gen_rtx_MEM (V2SImode, insn);
21060 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21061 RTX_FRAME_RELATED_P (insn) = 1;
21062 saved_size += 8;
21065 if (TARGET_HARD_FLOAT && TARGET_VFP)
21067 start_reg = FIRST_VFP_REGNUM;
21069 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21071 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21072 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21074 if (start_reg != reg)
21075 saved_size += vfp_emit_fstmd (start_reg,
21076 (reg - start_reg) / 2);
21077 start_reg = reg + 2;
21080 if (start_reg != reg)
21081 saved_size += vfp_emit_fstmd (start_reg,
21082 (reg - start_reg) / 2);
21084 return saved_size;
21088 /* Set the Thumb frame pointer from the stack pointer. */
21090 static void
21091 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21093 HOST_WIDE_INT amount;
21094 rtx insn, dwarf;
21096 amount = offsets->outgoing_args - offsets->locals_base;
21097 if (amount < 1024)
21098 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21099 stack_pointer_rtx, GEN_INT (amount)));
21100 else
21102 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21103 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21104 expects the first two operands to be the same. */
21105 if (TARGET_THUMB2)
21107 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21108 stack_pointer_rtx,
21109 hard_frame_pointer_rtx));
21111 else
21113 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21114 hard_frame_pointer_rtx,
21115 stack_pointer_rtx));
21117 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21118 plus_constant (Pmode, stack_pointer_rtx, amount));
21119 RTX_FRAME_RELATED_P (dwarf) = 1;
21120 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21123 RTX_FRAME_RELATED_P (insn) = 1;
21126 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21127 function. */
21128 void
21129 arm_expand_prologue (void)
21131 rtx amount;
21132 rtx insn;
21133 rtx ip_rtx;
21134 unsigned long live_regs_mask;
21135 unsigned long func_type;
21136 int fp_offset = 0;
21137 int saved_pretend_args = 0;
21138 int saved_regs = 0;
21139 unsigned HOST_WIDE_INT args_to_push;
21140 arm_stack_offsets *offsets;
21142 func_type = arm_current_func_type ();
21144 /* Naked functions don't have prologues. */
21145 if (IS_NAKED (func_type))
21146 return;
21148 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21149 args_to_push = crtl->args.pretend_args_size;
21151 /* Compute which register we will have to save onto the stack. */
21152 offsets = arm_get_frame_offsets ();
21153 live_regs_mask = offsets->saved_regs_mask;
21155 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21157 if (IS_STACKALIGN (func_type))
21159 rtx r0, r1;
21161 /* Handle a word-aligned stack pointer. We generate the following:
21163 mov r0, sp
21164 bic r1, r0, #7
21165 mov sp, r1
21166 <save and restore r0 in normal prologue/epilogue>
21167 mov sp, r0
21168 bx lr
21170 The unwinder doesn't need to know about the stack realignment.
21171 Just tell it we saved SP in r0. */
21172 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21174 r0 = gen_rtx_REG (SImode, 0);
21175 r1 = gen_rtx_REG (SImode, 1);
21177 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21178 RTX_FRAME_RELATED_P (insn) = 1;
21179 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21181 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21183 /* ??? The CFA changes here, which may cause GDB to conclude that it
21184 has entered a different function. That said, the unwind info is
21185 correct, individually, before and after this instruction because
21186 we've described the save of SP, which will override the default
21187 handling of SP as restoring from the CFA. */
21188 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21191 /* For APCS frames, if IP register is clobbered
21192 when creating frame, save that register in a special
21193 way. */
21194 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21196 if (IS_INTERRUPT (func_type))
21198 /* Interrupt functions must not corrupt any registers.
21199 Creating a frame pointer however, corrupts the IP
21200 register, so we must push it first. */
21201 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21203 /* Do not set RTX_FRAME_RELATED_P on this insn.
21204 The dwarf stack unwinding code only wants to see one
21205 stack decrement per function, and this is not it. If
21206 this instruction is labeled as being part of the frame
21207 creation sequence then dwarf2out_frame_debug_expr will
21208 die when it encounters the assignment of IP to FP
21209 later on, since the use of SP here establishes SP as
21210 the CFA register and not IP.
21212 Anyway this instruction is not really part of the stack
21213 frame creation although it is part of the prologue. */
21215 else if (IS_NESTED (func_type))
21217 /* The static chain register is the same as the IP register
21218 used as a scratch register during stack frame creation.
21219 To get around this need to find somewhere to store IP
21220 whilst the frame is being created. We try the following
21221 places in order:
21223 1. The last argument register r3 if it is available.
21224 2. A slot on the stack above the frame if there are no
21225 arguments to push onto the stack.
21226 3. Register r3 again, after pushing the argument registers
21227 onto the stack, if this is a varargs function.
21228 4. The last slot on the stack created for the arguments to
21229 push, if this isn't a varargs function.
21231 Note - we only need to tell the dwarf2 backend about the SP
21232 adjustment in the second variant; the static chain register
21233 doesn't need to be unwound, as it doesn't contain a value
21234 inherited from the caller. */
21236 if (!arm_r3_live_at_start_p ())
21237 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21238 else if (args_to_push == 0)
21240 rtx addr, dwarf;
21242 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21243 saved_regs += 4;
21245 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21246 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21247 fp_offset = 4;
21249 /* Just tell the dwarf backend that we adjusted SP. */
21250 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21251 plus_constant (Pmode, stack_pointer_rtx,
21252 -fp_offset));
21253 RTX_FRAME_RELATED_P (insn) = 1;
21254 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21256 else
21258 /* Store the args on the stack. */
21259 if (cfun->machine->uses_anonymous_args)
21261 insn
21262 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21263 (0xf0 >> (args_to_push / 4)) & 0xf);
21264 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21265 saved_pretend_args = 1;
21267 else
21269 rtx addr, dwarf;
21271 if (args_to_push == 4)
21272 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21273 else
21274 addr
21275 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21276 plus_constant (Pmode,
21277 stack_pointer_rtx,
21278 -args_to_push));
21280 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21282 /* Just tell the dwarf backend that we adjusted SP. */
21283 dwarf
21284 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21285 plus_constant (Pmode, stack_pointer_rtx,
21286 -args_to_push));
21287 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21290 RTX_FRAME_RELATED_P (insn) = 1;
21291 fp_offset = args_to_push;
21292 args_to_push = 0;
21296 insn = emit_set_insn (ip_rtx,
21297 plus_constant (Pmode, stack_pointer_rtx,
21298 fp_offset));
21299 RTX_FRAME_RELATED_P (insn) = 1;
21302 if (args_to_push)
21304 /* Push the argument registers, or reserve space for them. */
21305 if (cfun->machine->uses_anonymous_args)
21306 insn = emit_multi_reg_push
21307 ((0xf0 >> (args_to_push / 4)) & 0xf,
21308 (0xf0 >> (args_to_push / 4)) & 0xf);
21309 else
21310 insn = emit_insn
21311 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21312 GEN_INT (- args_to_push)));
21313 RTX_FRAME_RELATED_P (insn) = 1;
21316 /* If this is an interrupt service routine, and the link register
21317 is going to be pushed, and we're not generating extra
21318 push of IP (needed when frame is needed and frame layout if apcs),
21319 subtracting four from LR now will mean that the function return
21320 can be done with a single instruction. */
21321 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21322 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21323 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21324 && TARGET_ARM)
21326 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21328 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21331 if (live_regs_mask)
21333 unsigned long dwarf_regs_mask = live_regs_mask;
21335 saved_regs += bit_count (live_regs_mask) * 4;
21336 if (optimize_size && !frame_pointer_needed
21337 && saved_regs == offsets->saved_regs - offsets->saved_args)
21339 /* If no coprocessor registers are being pushed and we don't have
21340 to worry about a frame pointer then push extra registers to
21341 create the stack frame. This is done is a way that does not
21342 alter the frame layout, so is independent of the epilogue. */
21343 int n;
21344 int frame;
21345 n = 0;
21346 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21347 n++;
21348 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21349 if (frame && n * 4 >= frame)
21351 n = frame / 4;
21352 live_regs_mask |= (1 << n) - 1;
21353 saved_regs += frame;
21357 if (TARGET_LDRD
21358 && current_tune->prefer_ldrd_strd
21359 && !optimize_function_for_size_p (cfun))
21361 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21362 if (TARGET_THUMB2)
21363 thumb2_emit_strd_push (live_regs_mask);
21364 else if (TARGET_ARM
21365 && !TARGET_APCS_FRAME
21366 && !IS_INTERRUPT (func_type))
21367 arm_emit_strd_push (live_regs_mask);
21368 else
21370 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21371 RTX_FRAME_RELATED_P (insn) = 1;
21374 else
21376 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21377 RTX_FRAME_RELATED_P (insn) = 1;
21381 if (! IS_VOLATILE (func_type))
21382 saved_regs += arm_save_coproc_regs ();
21384 if (frame_pointer_needed && TARGET_ARM)
21386 /* Create the new frame pointer. */
21387 if (TARGET_APCS_FRAME)
21389 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21390 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21391 RTX_FRAME_RELATED_P (insn) = 1;
21393 if (IS_NESTED (func_type))
21395 /* Recover the static chain register. */
21396 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21397 insn = gen_rtx_REG (SImode, 3);
21398 else
21400 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21401 insn = gen_frame_mem (SImode, insn);
21403 emit_set_insn (ip_rtx, insn);
21404 /* Add a USE to stop propagate_one_insn() from barfing. */
21405 emit_insn (gen_force_register_use (ip_rtx));
21408 else
21410 insn = GEN_INT (saved_regs - 4);
21411 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21412 stack_pointer_rtx, insn));
21413 RTX_FRAME_RELATED_P (insn) = 1;
21417 if (flag_stack_usage_info)
21418 current_function_static_stack_size
21419 = offsets->outgoing_args - offsets->saved_args;
21421 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21423 /* This add can produce multiple insns for a large constant, so we
21424 need to get tricky. */
21425 rtx_insn *last = get_last_insn ();
21427 amount = GEN_INT (offsets->saved_args + saved_regs
21428 - offsets->outgoing_args);
21430 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21431 amount));
21434 last = last ? NEXT_INSN (last) : get_insns ();
21435 RTX_FRAME_RELATED_P (last) = 1;
21437 while (last != insn);
21439 /* If the frame pointer is needed, emit a special barrier that
21440 will prevent the scheduler from moving stores to the frame
21441 before the stack adjustment. */
21442 if (frame_pointer_needed)
21443 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21444 hard_frame_pointer_rtx));
21448 if (frame_pointer_needed && TARGET_THUMB2)
21449 thumb_set_frame_pointer (offsets);
21451 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21453 unsigned long mask;
21455 mask = live_regs_mask;
21456 mask &= THUMB2_WORK_REGS;
21457 if (!IS_NESTED (func_type))
21458 mask |= (1 << IP_REGNUM);
21459 arm_load_pic_register (mask);
21462 /* If we are profiling, make sure no instructions are scheduled before
21463 the call to mcount. Similarly if the user has requested no
21464 scheduling in the prolog. Similarly if we want non-call exceptions
21465 using the EABI unwinder, to prevent faulting instructions from being
21466 swapped with a stack adjustment. */
21467 if (crtl->profile || !TARGET_SCHED_PROLOG
21468 || (arm_except_unwind_info (&global_options) == UI_TARGET
21469 && cfun->can_throw_non_call_exceptions))
21470 emit_insn (gen_blockage ());
21472 /* If the link register is being kept alive, with the return address in it,
21473 then make sure that it does not get reused by the ce2 pass. */
21474 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21475 cfun->machine->lr_save_eliminated = 1;
21478 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21479 static void
21480 arm_print_condition (FILE *stream)
21482 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21484 /* Branch conversion is not implemented for Thumb-2. */
21485 if (TARGET_THUMB)
21487 output_operand_lossage ("predicated Thumb instruction");
21488 return;
21490 if (current_insn_predicate != NULL)
21492 output_operand_lossage
21493 ("predicated instruction in conditional sequence");
21494 return;
21497 fputs (arm_condition_codes[arm_current_cc], stream);
21499 else if (current_insn_predicate)
21501 enum arm_cond_code code;
21503 if (TARGET_THUMB1)
21505 output_operand_lossage ("predicated Thumb instruction");
21506 return;
21509 code = get_arm_condition_code (current_insn_predicate);
21510 fputs (arm_condition_codes[code], stream);
21515 /* Globally reserved letters: acln
21516 Puncutation letters currently used: @_|?().!#
21517 Lower case letters currently used: bcdefhimpqtvwxyz
21518 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21519 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21521 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21523 If CODE is 'd', then the X is a condition operand and the instruction
21524 should only be executed if the condition is true.
21525 if CODE is 'D', then the X is a condition operand and the instruction
21526 should only be executed if the condition is false: however, if the mode
21527 of the comparison is CCFPEmode, then always execute the instruction -- we
21528 do this because in these circumstances !GE does not necessarily imply LT;
21529 in these cases the instruction pattern will take care to make sure that
21530 an instruction containing %d will follow, thereby undoing the effects of
21531 doing this instruction unconditionally.
21532 If CODE is 'N' then X is a floating point operand that must be negated
21533 before output.
21534 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21535 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21536 static void
21537 arm_print_operand (FILE *stream, rtx x, int code)
21539 switch (code)
21541 case '@':
21542 fputs (ASM_COMMENT_START, stream);
21543 return;
21545 case '_':
21546 fputs (user_label_prefix, stream);
21547 return;
21549 case '|':
21550 fputs (REGISTER_PREFIX, stream);
21551 return;
21553 case '?':
21554 arm_print_condition (stream);
21555 return;
21557 case '(':
21558 /* Nothing in unified syntax, otherwise the current condition code. */
21559 if (!TARGET_UNIFIED_ASM)
21560 arm_print_condition (stream);
21561 break;
21563 case ')':
21564 /* The current condition code in unified syntax, otherwise nothing. */
21565 if (TARGET_UNIFIED_ASM)
21566 arm_print_condition (stream);
21567 break;
21569 case '.':
21570 /* The current condition code for a condition code setting instruction.
21571 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21572 if (TARGET_UNIFIED_ASM)
21574 fputc('s', stream);
21575 arm_print_condition (stream);
21577 else
21579 arm_print_condition (stream);
21580 fputc('s', stream);
21582 return;
21584 case '!':
21585 /* If the instruction is conditionally executed then print
21586 the current condition code, otherwise print 's'. */
21587 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21588 if (current_insn_predicate)
21589 arm_print_condition (stream);
21590 else
21591 fputc('s', stream);
21592 break;
21594 /* %# is a "break" sequence. It doesn't output anything, but is used to
21595 separate e.g. operand numbers from following text, if that text consists
21596 of further digits which we don't want to be part of the operand
21597 number. */
21598 case '#':
21599 return;
21601 case 'N':
21603 REAL_VALUE_TYPE r;
21604 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21605 r = real_value_negate (&r);
21606 fprintf (stream, "%s", fp_const_from_val (&r));
21608 return;
21610 /* An integer or symbol address without a preceding # sign. */
21611 case 'c':
21612 switch (GET_CODE (x))
21614 case CONST_INT:
21615 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21616 break;
21618 case SYMBOL_REF:
21619 output_addr_const (stream, x);
21620 break;
21622 case CONST:
21623 if (GET_CODE (XEXP (x, 0)) == PLUS
21624 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21626 output_addr_const (stream, x);
21627 break;
21629 /* Fall through. */
21631 default:
21632 output_operand_lossage ("Unsupported operand for code '%c'", code);
21634 return;
21636 /* An integer that we want to print in HEX. */
21637 case 'x':
21638 switch (GET_CODE (x))
21640 case CONST_INT:
21641 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21642 break;
21644 default:
21645 output_operand_lossage ("Unsupported operand for code '%c'", code);
21647 return;
21649 case 'B':
21650 if (CONST_INT_P (x))
21652 HOST_WIDE_INT val;
21653 val = ARM_SIGN_EXTEND (~INTVAL (x));
21654 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21656 else
21658 putc ('~', stream);
21659 output_addr_const (stream, x);
21661 return;
21663 case 'b':
21664 /* Print the log2 of a CONST_INT. */
21666 HOST_WIDE_INT val;
21668 if (!CONST_INT_P (x)
21669 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21670 output_operand_lossage ("Unsupported operand for code '%c'", code);
21671 else
21672 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21674 return;
21676 case 'L':
21677 /* The low 16 bits of an immediate constant. */
21678 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21679 return;
21681 case 'i':
21682 fprintf (stream, "%s", arithmetic_instr (x, 1));
21683 return;
21685 case 'I':
21686 fprintf (stream, "%s", arithmetic_instr (x, 0));
21687 return;
21689 case 'S':
21691 HOST_WIDE_INT val;
21692 const char *shift;
21694 shift = shift_op (x, &val);
21696 if (shift)
21698 fprintf (stream, ", %s ", shift);
21699 if (val == -1)
21700 arm_print_operand (stream, XEXP (x, 1), 0);
21701 else
21702 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21705 return;
21707 /* An explanation of the 'Q', 'R' and 'H' register operands:
21709 In a pair of registers containing a DI or DF value the 'Q'
21710 operand returns the register number of the register containing
21711 the least significant part of the value. The 'R' operand returns
21712 the register number of the register containing the most
21713 significant part of the value.
21715 The 'H' operand returns the higher of the two register numbers.
21716 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21717 same as the 'Q' operand, since the most significant part of the
21718 value is held in the lower number register. The reverse is true
21719 on systems where WORDS_BIG_ENDIAN is false.
21721 The purpose of these operands is to distinguish between cases
21722 where the endian-ness of the values is important (for example
21723 when they are added together), and cases where the endian-ness
21724 is irrelevant, but the order of register operations is important.
21725 For example when loading a value from memory into a register
21726 pair, the endian-ness does not matter. Provided that the value
21727 from the lower memory address is put into the lower numbered
21728 register, and the value from the higher address is put into the
21729 higher numbered register, the load will work regardless of whether
21730 the value being loaded is big-wordian or little-wordian. The
21731 order of the two register loads can matter however, if the address
21732 of the memory location is actually held in one of the registers
21733 being overwritten by the load.
21735 The 'Q' and 'R' constraints are also available for 64-bit
21736 constants. */
21737 case 'Q':
21738 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21740 rtx part = gen_lowpart (SImode, x);
21741 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21742 return;
21745 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21747 output_operand_lossage ("invalid operand for code '%c'", code);
21748 return;
21751 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21752 return;
21754 case 'R':
21755 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21757 machine_mode mode = GET_MODE (x);
21758 rtx part;
21760 if (mode == VOIDmode)
21761 mode = DImode;
21762 part = gen_highpart_mode (SImode, mode, x);
21763 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21764 return;
21767 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21769 output_operand_lossage ("invalid operand for code '%c'", code);
21770 return;
21773 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21774 return;
21776 case 'H':
21777 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21779 output_operand_lossage ("invalid operand for code '%c'", code);
21780 return;
21783 asm_fprintf (stream, "%r", REGNO (x) + 1);
21784 return;
21786 case 'J':
21787 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21789 output_operand_lossage ("invalid operand for code '%c'", code);
21790 return;
21793 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21794 return;
21796 case 'K':
21797 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21799 output_operand_lossage ("invalid operand for code '%c'", code);
21800 return;
21803 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21804 return;
21806 case 'm':
21807 asm_fprintf (stream, "%r",
21808 REG_P (XEXP (x, 0))
21809 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21810 return;
21812 case 'M':
21813 asm_fprintf (stream, "{%r-%r}",
21814 REGNO (x),
21815 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21816 return;
21818 /* Like 'M', but writing doubleword vector registers, for use by Neon
21819 insns. */
21820 case 'h':
21822 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21823 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21824 if (numregs == 1)
21825 asm_fprintf (stream, "{d%d}", regno);
21826 else
21827 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21829 return;
21831 case 'd':
21832 /* CONST_TRUE_RTX means always -- that's the default. */
21833 if (x == const_true_rtx)
21834 return;
21836 if (!COMPARISON_P (x))
21838 output_operand_lossage ("invalid operand for code '%c'", code);
21839 return;
21842 fputs (arm_condition_codes[get_arm_condition_code (x)],
21843 stream);
21844 return;
21846 case 'D':
21847 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21848 want to do that. */
21849 if (x == const_true_rtx)
21851 output_operand_lossage ("instruction never executed");
21852 return;
21854 if (!COMPARISON_P (x))
21856 output_operand_lossage ("invalid operand for code '%c'", code);
21857 return;
21860 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21861 (get_arm_condition_code (x))],
21862 stream);
21863 return;
21865 case 's':
21866 case 'V':
21867 case 'W':
21868 case 'X':
21869 case 'Y':
21870 case 'Z':
21871 /* Former Maverick support, removed after GCC-4.7. */
21872 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21873 return;
21875 case 'U':
21876 if (!REG_P (x)
21877 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21878 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21879 /* Bad value for wCG register number. */
21881 output_operand_lossage ("invalid operand for code '%c'", code);
21882 return;
21885 else
21886 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21887 return;
21889 /* Print an iWMMXt control register name. */
21890 case 'w':
21891 if (!CONST_INT_P (x)
21892 || INTVAL (x) < 0
21893 || INTVAL (x) >= 16)
21894 /* Bad value for wC register number. */
21896 output_operand_lossage ("invalid operand for code '%c'", code);
21897 return;
21900 else
21902 static const char * wc_reg_names [16] =
21904 "wCID", "wCon", "wCSSF", "wCASF",
21905 "wC4", "wC5", "wC6", "wC7",
21906 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21907 "wC12", "wC13", "wC14", "wC15"
21910 fputs (wc_reg_names [INTVAL (x)], stream);
21912 return;
21914 /* Print the high single-precision register of a VFP double-precision
21915 register. */
21916 case 'p':
21918 machine_mode mode = GET_MODE (x);
21919 int regno;
21921 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21923 output_operand_lossage ("invalid operand for code '%c'", code);
21924 return;
21927 regno = REGNO (x);
21928 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21930 output_operand_lossage ("invalid operand for code '%c'", code);
21931 return;
21934 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21936 return;
21938 /* Print a VFP/Neon double precision or quad precision register name. */
21939 case 'P':
21940 case 'q':
21942 machine_mode mode = GET_MODE (x);
21943 int is_quad = (code == 'q');
21944 int regno;
21946 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21948 output_operand_lossage ("invalid operand for code '%c'", code);
21949 return;
21952 if (!REG_P (x)
21953 || !IS_VFP_REGNUM (REGNO (x)))
21955 output_operand_lossage ("invalid operand for code '%c'", code);
21956 return;
21959 regno = REGNO (x);
21960 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21961 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21963 output_operand_lossage ("invalid operand for code '%c'", code);
21964 return;
21967 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21968 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21970 return;
21972 /* These two codes print the low/high doubleword register of a Neon quad
21973 register, respectively. For pair-structure types, can also print
21974 low/high quadword registers. */
21975 case 'e':
21976 case 'f':
21978 machine_mode mode = GET_MODE (x);
21979 int regno;
21981 if ((GET_MODE_SIZE (mode) != 16
21982 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21984 output_operand_lossage ("invalid operand for code '%c'", code);
21985 return;
21988 regno = REGNO (x);
21989 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21991 output_operand_lossage ("invalid operand for code '%c'", code);
21992 return;
21995 if (GET_MODE_SIZE (mode) == 16)
21996 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21997 + (code == 'f' ? 1 : 0));
21998 else
21999 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22000 + (code == 'f' ? 1 : 0));
22002 return;
22004 /* Print a VFPv3 floating-point constant, represented as an integer
22005 index. */
22006 case 'G':
22008 int index = vfp3_const_double_index (x);
22009 gcc_assert (index != -1);
22010 fprintf (stream, "%d", index);
22012 return;
22014 /* Print bits representing opcode features for Neon.
22016 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22017 and polynomials as unsigned.
22019 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22021 Bit 2 is 1 for rounding functions, 0 otherwise. */
22023 /* Identify the type as 's', 'u', 'p' or 'f'. */
22024 case 'T':
22026 HOST_WIDE_INT bits = INTVAL (x);
22027 fputc ("uspf"[bits & 3], stream);
22029 return;
22031 /* Likewise, but signed and unsigned integers are both 'i'. */
22032 case 'F':
22034 HOST_WIDE_INT bits = INTVAL (x);
22035 fputc ("iipf"[bits & 3], stream);
22037 return;
22039 /* As for 'T', but emit 'u' instead of 'p'. */
22040 case 't':
22042 HOST_WIDE_INT bits = INTVAL (x);
22043 fputc ("usuf"[bits & 3], stream);
22045 return;
22047 /* Bit 2: rounding (vs none). */
22048 case 'O':
22050 HOST_WIDE_INT bits = INTVAL (x);
22051 fputs ((bits & 4) != 0 ? "r" : "", stream);
22053 return;
22055 /* Memory operand for vld1/vst1 instruction. */
22056 case 'A':
22058 rtx addr;
22059 bool postinc = FALSE;
22060 rtx postinc_reg = NULL;
22061 unsigned align, memsize, align_bits;
22063 gcc_assert (MEM_P (x));
22064 addr = XEXP (x, 0);
22065 if (GET_CODE (addr) == POST_INC)
22067 postinc = 1;
22068 addr = XEXP (addr, 0);
22070 if (GET_CODE (addr) == POST_MODIFY)
22072 postinc_reg = XEXP( XEXP (addr, 1), 1);
22073 addr = XEXP (addr, 0);
22075 asm_fprintf (stream, "[%r", REGNO (addr));
22077 /* We know the alignment of this access, so we can emit a hint in the
22078 instruction (for some alignments) as an aid to the memory subsystem
22079 of the target. */
22080 align = MEM_ALIGN (x) >> 3;
22081 memsize = MEM_SIZE (x);
22083 /* Only certain alignment specifiers are supported by the hardware. */
22084 if (memsize == 32 && (align % 32) == 0)
22085 align_bits = 256;
22086 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22087 align_bits = 128;
22088 else if (memsize >= 8 && (align % 8) == 0)
22089 align_bits = 64;
22090 else
22091 align_bits = 0;
22093 if (align_bits != 0)
22094 asm_fprintf (stream, ":%d", align_bits);
22096 asm_fprintf (stream, "]");
22098 if (postinc)
22099 fputs("!", stream);
22100 if (postinc_reg)
22101 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22103 return;
22105 case 'C':
22107 rtx addr;
22109 gcc_assert (MEM_P (x));
22110 addr = XEXP (x, 0);
22111 gcc_assert (REG_P (addr));
22112 asm_fprintf (stream, "[%r]", REGNO (addr));
22114 return;
22116 /* Translate an S register number into a D register number and element index. */
22117 case 'y':
22119 machine_mode mode = GET_MODE (x);
22120 int regno;
22122 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22124 output_operand_lossage ("invalid operand for code '%c'", code);
22125 return;
22128 regno = REGNO (x);
22129 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22131 output_operand_lossage ("invalid operand for code '%c'", code);
22132 return;
22135 regno = regno - FIRST_VFP_REGNUM;
22136 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22138 return;
22140 case 'v':
22141 gcc_assert (CONST_DOUBLE_P (x));
22142 int result;
22143 result = vfp3_const_double_for_fract_bits (x);
22144 if (result == 0)
22145 result = vfp3_const_double_for_bits (x);
22146 fprintf (stream, "#%d", result);
22147 return;
22149 /* Register specifier for vld1.16/vst1.16. Translate the S register
22150 number into a D register number and element index. */
22151 case 'z':
22153 machine_mode mode = GET_MODE (x);
22154 int regno;
22156 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22158 output_operand_lossage ("invalid operand for code '%c'", code);
22159 return;
22162 regno = REGNO (x);
22163 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22165 output_operand_lossage ("invalid operand for code '%c'", code);
22166 return;
22169 regno = regno - FIRST_VFP_REGNUM;
22170 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22172 return;
22174 default:
22175 if (x == 0)
22177 output_operand_lossage ("missing operand");
22178 return;
22181 switch (GET_CODE (x))
22183 case REG:
22184 asm_fprintf (stream, "%r", REGNO (x));
22185 break;
22187 case MEM:
22188 output_memory_reference_mode = GET_MODE (x);
22189 output_address (XEXP (x, 0));
22190 break;
22192 case CONST_DOUBLE:
22194 char fpstr[20];
22195 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22196 sizeof (fpstr), 0, 1);
22197 fprintf (stream, "#%s", fpstr);
22199 break;
22201 default:
22202 gcc_assert (GET_CODE (x) != NEG);
22203 fputc ('#', stream);
22204 if (GET_CODE (x) == HIGH)
22206 fputs (":lower16:", stream);
22207 x = XEXP (x, 0);
22210 output_addr_const (stream, x);
22211 break;
22216 /* Target hook for printing a memory address. */
22217 static void
22218 arm_print_operand_address (FILE *stream, rtx x)
22220 if (TARGET_32BIT)
22222 int is_minus = GET_CODE (x) == MINUS;
22224 if (REG_P (x))
22225 asm_fprintf (stream, "[%r]", REGNO (x));
22226 else if (GET_CODE (x) == PLUS || is_minus)
22228 rtx base = XEXP (x, 0);
22229 rtx index = XEXP (x, 1);
22230 HOST_WIDE_INT offset = 0;
22231 if (!REG_P (base)
22232 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22234 /* Ensure that BASE is a register. */
22235 /* (one of them must be). */
22236 /* Also ensure the SP is not used as in index register. */
22237 rtx temp = base;
22238 base = index;
22239 index = temp;
22241 switch (GET_CODE (index))
22243 case CONST_INT:
22244 offset = INTVAL (index);
22245 if (is_minus)
22246 offset = -offset;
22247 asm_fprintf (stream, "[%r, #%wd]",
22248 REGNO (base), offset);
22249 break;
22251 case REG:
22252 asm_fprintf (stream, "[%r, %s%r]",
22253 REGNO (base), is_minus ? "-" : "",
22254 REGNO (index));
22255 break;
22257 case MULT:
22258 case ASHIFTRT:
22259 case LSHIFTRT:
22260 case ASHIFT:
22261 case ROTATERT:
22263 asm_fprintf (stream, "[%r, %s%r",
22264 REGNO (base), is_minus ? "-" : "",
22265 REGNO (XEXP (index, 0)));
22266 arm_print_operand (stream, index, 'S');
22267 fputs ("]", stream);
22268 break;
22271 default:
22272 gcc_unreachable ();
22275 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22276 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22278 extern machine_mode output_memory_reference_mode;
22280 gcc_assert (REG_P (XEXP (x, 0)));
22282 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22283 asm_fprintf (stream, "[%r, #%s%d]!",
22284 REGNO (XEXP (x, 0)),
22285 GET_CODE (x) == PRE_DEC ? "-" : "",
22286 GET_MODE_SIZE (output_memory_reference_mode));
22287 else
22288 asm_fprintf (stream, "[%r], #%s%d",
22289 REGNO (XEXP (x, 0)),
22290 GET_CODE (x) == POST_DEC ? "-" : "",
22291 GET_MODE_SIZE (output_memory_reference_mode));
22293 else if (GET_CODE (x) == PRE_MODIFY)
22295 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22296 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22297 asm_fprintf (stream, "#%wd]!",
22298 INTVAL (XEXP (XEXP (x, 1), 1)));
22299 else
22300 asm_fprintf (stream, "%r]!",
22301 REGNO (XEXP (XEXP (x, 1), 1)));
22303 else if (GET_CODE (x) == POST_MODIFY)
22305 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22306 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22307 asm_fprintf (stream, "#%wd",
22308 INTVAL (XEXP (XEXP (x, 1), 1)));
22309 else
22310 asm_fprintf (stream, "%r",
22311 REGNO (XEXP (XEXP (x, 1), 1)));
22313 else output_addr_const (stream, x);
22315 else
22317 if (REG_P (x))
22318 asm_fprintf (stream, "[%r]", REGNO (x));
22319 else if (GET_CODE (x) == POST_INC)
22320 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22321 else if (GET_CODE (x) == PLUS)
22323 gcc_assert (REG_P (XEXP (x, 0)));
22324 if (CONST_INT_P (XEXP (x, 1)))
22325 asm_fprintf (stream, "[%r, #%wd]",
22326 REGNO (XEXP (x, 0)),
22327 INTVAL (XEXP (x, 1)));
22328 else
22329 asm_fprintf (stream, "[%r, %r]",
22330 REGNO (XEXP (x, 0)),
22331 REGNO (XEXP (x, 1)));
22333 else
22334 output_addr_const (stream, x);
22338 /* Target hook for indicating whether a punctuation character for
22339 TARGET_PRINT_OPERAND is valid. */
22340 static bool
22341 arm_print_operand_punct_valid_p (unsigned char code)
22343 return (code == '@' || code == '|' || code == '.'
22344 || code == '(' || code == ')' || code == '#'
22345 || (TARGET_32BIT && (code == '?'))
22346 || (TARGET_THUMB2 && (code == '!'))
22347 || (TARGET_THUMB && (code == '_')));
22350 /* Target hook for assembling integer objects. The ARM version needs to
22351 handle word-sized values specially. */
22352 static bool
22353 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22355 machine_mode mode;
22357 if (size == UNITS_PER_WORD && aligned_p)
22359 fputs ("\t.word\t", asm_out_file);
22360 output_addr_const (asm_out_file, x);
22362 /* Mark symbols as position independent. We only do this in the
22363 .text segment, not in the .data segment. */
22364 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22365 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22367 /* See legitimize_pic_address for an explanation of the
22368 TARGET_VXWORKS_RTP check. */
22369 if (!arm_pic_data_is_text_relative
22370 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22371 fputs ("(GOT)", asm_out_file);
22372 else
22373 fputs ("(GOTOFF)", asm_out_file);
22375 fputc ('\n', asm_out_file);
22376 return true;
22379 mode = GET_MODE (x);
22381 if (arm_vector_mode_supported_p (mode))
22383 int i, units;
22385 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22387 units = CONST_VECTOR_NUNITS (x);
22388 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22390 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22391 for (i = 0; i < units; i++)
22393 rtx elt = CONST_VECTOR_ELT (x, i);
22394 assemble_integer
22395 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22397 else
22398 for (i = 0; i < units; i++)
22400 rtx elt = CONST_VECTOR_ELT (x, i);
22401 REAL_VALUE_TYPE rval;
22403 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22405 assemble_real
22406 (rval, GET_MODE_INNER (mode),
22407 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22410 return true;
22413 return default_assemble_integer (x, size, aligned_p);
22416 static void
22417 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22419 section *s;
22421 if (!TARGET_AAPCS_BASED)
22423 (is_ctor ?
22424 default_named_section_asm_out_constructor
22425 : default_named_section_asm_out_destructor) (symbol, priority);
22426 return;
22429 /* Put these in the .init_array section, using a special relocation. */
22430 if (priority != DEFAULT_INIT_PRIORITY)
22432 char buf[18];
22433 sprintf (buf, "%s.%.5u",
22434 is_ctor ? ".init_array" : ".fini_array",
22435 priority);
22436 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22438 else if (is_ctor)
22439 s = ctors_section;
22440 else
22441 s = dtors_section;
22443 switch_to_section (s);
22444 assemble_align (POINTER_SIZE);
22445 fputs ("\t.word\t", asm_out_file);
22446 output_addr_const (asm_out_file, symbol);
22447 fputs ("(target1)\n", asm_out_file);
22450 /* Add a function to the list of static constructors. */
22452 static void
22453 arm_elf_asm_constructor (rtx symbol, int priority)
22455 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22458 /* Add a function to the list of static destructors. */
22460 static void
22461 arm_elf_asm_destructor (rtx symbol, int priority)
22463 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22466 /* A finite state machine takes care of noticing whether or not instructions
22467 can be conditionally executed, and thus decrease execution time and code
22468 size by deleting branch instructions. The fsm is controlled by
22469 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22471 /* The state of the fsm controlling condition codes are:
22472 0: normal, do nothing special
22473 1: make ASM_OUTPUT_OPCODE not output this instruction
22474 2: make ASM_OUTPUT_OPCODE not output this instruction
22475 3: make instructions conditional
22476 4: make instructions conditional
22478 State transitions (state->state by whom under condition):
22479 0 -> 1 final_prescan_insn if the `target' is a label
22480 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22481 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22482 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22483 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22484 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22485 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22486 (the target insn is arm_target_insn).
22488 If the jump clobbers the conditions then we use states 2 and 4.
22490 A similar thing can be done with conditional return insns.
22492 XXX In case the `target' is an unconditional branch, this conditionalising
22493 of the instructions always reduces code size, but not always execution
22494 time. But then, I want to reduce the code size to somewhere near what
22495 /bin/cc produces. */
22497 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22498 instructions. When a COND_EXEC instruction is seen the subsequent
22499 instructions are scanned so that multiple conditional instructions can be
22500 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22501 specify the length and true/false mask for the IT block. These will be
22502 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22504 /* Returns the index of the ARM condition code string in
22505 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22506 COMPARISON should be an rtx like `(eq (...) (...))'. */
22508 enum arm_cond_code
22509 maybe_get_arm_condition_code (rtx comparison)
22511 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22512 enum arm_cond_code code;
22513 enum rtx_code comp_code = GET_CODE (comparison);
22515 if (GET_MODE_CLASS (mode) != MODE_CC)
22516 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22517 XEXP (comparison, 1));
22519 switch (mode)
22521 case CC_DNEmode: code = ARM_NE; goto dominance;
22522 case CC_DEQmode: code = ARM_EQ; goto dominance;
22523 case CC_DGEmode: code = ARM_GE; goto dominance;
22524 case CC_DGTmode: code = ARM_GT; goto dominance;
22525 case CC_DLEmode: code = ARM_LE; goto dominance;
22526 case CC_DLTmode: code = ARM_LT; goto dominance;
22527 case CC_DGEUmode: code = ARM_CS; goto dominance;
22528 case CC_DGTUmode: code = ARM_HI; goto dominance;
22529 case CC_DLEUmode: code = ARM_LS; goto dominance;
22530 case CC_DLTUmode: code = ARM_CC;
22532 dominance:
22533 if (comp_code == EQ)
22534 return ARM_INVERSE_CONDITION_CODE (code);
22535 if (comp_code == NE)
22536 return code;
22537 return ARM_NV;
22539 case CC_NOOVmode:
22540 switch (comp_code)
22542 case NE: return ARM_NE;
22543 case EQ: return ARM_EQ;
22544 case GE: return ARM_PL;
22545 case LT: return ARM_MI;
22546 default: return ARM_NV;
22549 case CC_Zmode:
22550 switch (comp_code)
22552 case NE: return ARM_NE;
22553 case EQ: return ARM_EQ;
22554 default: return ARM_NV;
22557 case CC_Nmode:
22558 switch (comp_code)
22560 case NE: return ARM_MI;
22561 case EQ: return ARM_PL;
22562 default: return ARM_NV;
22565 case CCFPEmode:
22566 case CCFPmode:
22567 /* We can handle all cases except UNEQ and LTGT. */
22568 switch (comp_code)
22570 case GE: return ARM_GE;
22571 case GT: return ARM_GT;
22572 case LE: return ARM_LS;
22573 case LT: return ARM_MI;
22574 case NE: return ARM_NE;
22575 case EQ: return ARM_EQ;
22576 case ORDERED: return ARM_VC;
22577 case UNORDERED: return ARM_VS;
22578 case UNLT: return ARM_LT;
22579 case UNLE: return ARM_LE;
22580 case UNGT: return ARM_HI;
22581 case UNGE: return ARM_PL;
22582 /* UNEQ and LTGT do not have a representation. */
22583 case UNEQ: /* Fall through. */
22584 case LTGT: /* Fall through. */
22585 default: return ARM_NV;
22588 case CC_SWPmode:
22589 switch (comp_code)
22591 case NE: return ARM_NE;
22592 case EQ: return ARM_EQ;
22593 case GE: return ARM_LE;
22594 case GT: return ARM_LT;
22595 case LE: return ARM_GE;
22596 case LT: return ARM_GT;
22597 case GEU: return ARM_LS;
22598 case GTU: return ARM_CC;
22599 case LEU: return ARM_CS;
22600 case LTU: return ARM_HI;
22601 default: return ARM_NV;
22604 case CC_Cmode:
22605 switch (comp_code)
22607 case LTU: return ARM_CS;
22608 case GEU: return ARM_CC;
22609 default: return ARM_NV;
22612 case CC_CZmode:
22613 switch (comp_code)
22615 case NE: return ARM_NE;
22616 case EQ: return ARM_EQ;
22617 case GEU: return ARM_CS;
22618 case GTU: return ARM_HI;
22619 case LEU: return ARM_LS;
22620 case LTU: return ARM_CC;
22621 default: return ARM_NV;
22624 case CC_NCVmode:
22625 switch (comp_code)
22627 case GE: return ARM_GE;
22628 case LT: return ARM_LT;
22629 case GEU: return ARM_CS;
22630 case LTU: return ARM_CC;
22631 default: return ARM_NV;
22634 case CCmode:
22635 switch (comp_code)
22637 case NE: return ARM_NE;
22638 case EQ: return ARM_EQ;
22639 case GE: return ARM_GE;
22640 case GT: return ARM_GT;
22641 case LE: return ARM_LE;
22642 case LT: return ARM_LT;
22643 case GEU: return ARM_CS;
22644 case GTU: return ARM_HI;
22645 case LEU: return ARM_LS;
22646 case LTU: return ARM_CC;
22647 default: return ARM_NV;
22650 default: gcc_unreachable ();
22654 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22655 static enum arm_cond_code
22656 get_arm_condition_code (rtx comparison)
22658 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22659 gcc_assert (code != ARM_NV);
22660 return code;
22663 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22664 instructions. */
22665 void
22666 thumb2_final_prescan_insn (rtx_insn *insn)
22668 rtx_insn *first_insn = insn;
22669 rtx body = PATTERN (insn);
22670 rtx predicate;
22671 enum arm_cond_code code;
22672 int n;
22673 int mask;
22674 int max;
22676 /* max_insns_skipped in the tune was already taken into account in the
22677 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22678 just emit the IT blocks as we can. It does not make sense to split
22679 the IT blocks. */
22680 max = MAX_INSN_PER_IT_BLOCK;
22682 /* Remove the previous insn from the count of insns to be output. */
22683 if (arm_condexec_count)
22684 arm_condexec_count--;
22686 /* Nothing to do if we are already inside a conditional block. */
22687 if (arm_condexec_count)
22688 return;
22690 if (GET_CODE (body) != COND_EXEC)
22691 return;
22693 /* Conditional jumps are implemented directly. */
22694 if (JUMP_P (insn))
22695 return;
22697 predicate = COND_EXEC_TEST (body);
22698 arm_current_cc = get_arm_condition_code (predicate);
22700 n = get_attr_ce_count (insn);
22701 arm_condexec_count = 1;
22702 arm_condexec_mask = (1 << n) - 1;
22703 arm_condexec_masklen = n;
22704 /* See if subsequent instructions can be combined into the same block. */
22705 for (;;)
22707 insn = next_nonnote_insn (insn);
22709 /* Jumping into the middle of an IT block is illegal, so a label or
22710 barrier terminates the block. */
22711 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22712 break;
22714 body = PATTERN (insn);
22715 /* USE and CLOBBER aren't really insns, so just skip them. */
22716 if (GET_CODE (body) == USE
22717 || GET_CODE (body) == CLOBBER)
22718 continue;
22720 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22721 if (GET_CODE (body) != COND_EXEC)
22722 break;
22723 /* Maximum number of conditionally executed instructions in a block. */
22724 n = get_attr_ce_count (insn);
22725 if (arm_condexec_masklen + n > max)
22726 break;
22728 predicate = COND_EXEC_TEST (body);
22729 code = get_arm_condition_code (predicate);
22730 mask = (1 << n) - 1;
22731 if (arm_current_cc == code)
22732 arm_condexec_mask |= (mask << arm_condexec_masklen);
22733 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22734 break;
22736 arm_condexec_count++;
22737 arm_condexec_masklen += n;
22739 /* A jump must be the last instruction in a conditional block. */
22740 if (JUMP_P (insn))
22741 break;
22743 /* Restore recog_data (getting the attributes of other insns can
22744 destroy this array, but final.c assumes that it remains intact
22745 across this call). */
22746 extract_constrain_insn_cached (first_insn);
22749 void
22750 arm_final_prescan_insn (rtx_insn *insn)
22752 /* BODY will hold the body of INSN. */
22753 rtx body = PATTERN (insn);
22755 /* This will be 1 if trying to repeat the trick, and things need to be
22756 reversed if it appears to fail. */
22757 int reverse = 0;
22759 /* If we start with a return insn, we only succeed if we find another one. */
22760 int seeking_return = 0;
22761 enum rtx_code return_code = UNKNOWN;
22763 /* START_INSN will hold the insn from where we start looking. This is the
22764 first insn after the following code_label if REVERSE is true. */
22765 rtx_insn *start_insn = insn;
22767 /* If in state 4, check if the target branch is reached, in order to
22768 change back to state 0. */
22769 if (arm_ccfsm_state == 4)
22771 if (insn == arm_target_insn)
22773 arm_target_insn = NULL;
22774 arm_ccfsm_state = 0;
22776 return;
22779 /* If in state 3, it is possible to repeat the trick, if this insn is an
22780 unconditional branch to a label, and immediately following this branch
22781 is the previous target label which is only used once, and the label this
22782 branch jumps to is not too far off. */
22783 if (arm_ccfsm_state == 3)
22785 if (simplejump_p (insn))
22787 start_insn = next_nonnote_insn (start_insn);
22788 if (BARRIER_P (start_insn))
22790 /* XXX Isn't this always a barrier? */
22791 start_insn = next_nonnote_insn (start_insn);
22793 if (LABEL_P (start_insn)
22794 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22795 && LABEL_NUSES (start_insn) == 1)
22796 reverse = TRUE;
22797 else
22798 return;
22800 else if (ANY_RETURN_P (body))
22802 start_insn = next_nonnote_insn (start_insn);
22803 if (BARRIER_P (start_insn))
22804 start_insn = next_nonnote_insn (start_insn);
22805 if (LABEL_P (start_insn)
22806 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22807 && LABEL_NUSES (start_insn) == 1)
22809 reverse = TRUE;
22810 seeking_return = 1;
22811 return_code = GET_CODE (body);
22813 else
22814 return;
22816 else
22817 return;
22820 gcc_assert (!arm_ccfsm_state || reverse);
22821 if (!JUMP_P (insn))
22822 return;
22824 /* This jump might be paralleled with a clobber of the condition codes
22825 the jump should always come first */
22826 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22827 body = XVECEXP (body, 0, 0);
22829 if (reverse
22830 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22831 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22833 int insns_skipped;
22834 int fail = FALSE, succeed = FALSE;
22835 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22836 int then_not_else = TRUE;
22837 rtx_insn *this_insn = start_insn;
22838 rtx label = 0;
22840 /* Register the insn jumped to. */
22841 if (reverse)
22843 if (!seeking_return)
22844 label = XEXP (SET_SRC (body), 0);
22846 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22847 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22848 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22850 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22851 then_not_else = FALSE;
22853 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22855 seeking_return = 1;
22856 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22858 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22860 seeking_return = 1;
22861 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22862 then_not_else = FALSE;
22864 else
22865 gcc_unreachable ();
22867 /* See how many insns this branch skips, and what kind of insns. If all
22868 insns are okay, and the label or unconditional branch to the same
22869 label is not too far away, succeed. */
22870 for (insns_skipped = 0;
22871 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22873 rtx scanbody;
22875 this_insn = next_nonnote_insn (this_insn);
22876 if (!this_insn)
22877 break;
22879 switch (GET_CODE (this_insn))
22881 case CODE_LABEL:
22882 /* Succeed if it is the target label, otherwise fail since
22883 control falls in from somewhere else. */
22884 if (this_insn == label)
22886 arm_ccfsm_state = 1;
22887 succeed = TRUE;
22889 else
22890 fail = TRUE;
22891 break;
22893 case BARRIER:
22894 /* Succeed if the following insn is the target label.
22895 Otherwise fail.
22896 If return insns are used then the last insn in a function
22897 will be a barrier. */
22898 this_insn = next_nonnote_insn (this_insn);
22899 if (this_insn && this_insn == label)
22901 arm_ccfsm_state = 1;
22902 succeed = TRUE;
22904 else
22905 fail = TRUE;
22906 break;
22908 case CALL_INSN:
22909 /* The AAPCS says that conditional calls should not be
22910 used since they make interworking inefficient (the
22911 linker can't transform BL<cond> into BLX). That's
22912 only a problem if the machine has BLX. */
22913 if (arm_arch5)
22915 fail = TRUE;
22916 break;
22919 /* Succeed if the following insn is the target label, or
22920 if the following two insns are a barrier and the
22921 target label. */
22922 this_insn = next_nonnote_insn (this_insn);
22923 if (this_insn && BARRIER_P (this_insn))
22924 this_insn = next_nonnote_insn (this_insn);
22926 if (this_insn && this_insn == label
22927 && insns_skipped < max_insns_skipped)
22929 arm_ccfsm_state = 1;
22930 succeed = TRUE;
22932 else
22933 fail = TRUE;
22934 break;
22936 case JUMP_INSN:
22937 /* If this is an unconditional branch to the same label, succeed.
22938 If it is to another label, do nothing. If it is conditional,
22939 fail. */
22940 /* XXX Probably, the tests for SET and the PC are
22941 unnecessary. */
22943 scanbody = PATTERN (this_insn);
22944 if (GET_CODE (scanbody) == SET
22945 && GET_CODE (SET_DEST (scanbody)) == PC)
22947 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22948 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22950 arm_ccfsm_state = 2;
22951 succeed = TRUE;
22953 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22954 fail = TRUE;
22956 /* Fail if a conditional return is undesirable (e.g. on a
22957 StrongARM), but still allow this if optimizing for size. */
22958 else if (GET_CODE (scanbody) == return_code
22959 && !use_return_insn (TRUE, NULL)
22960 && !optimize_size)
22961 fail = TRUE;
22962 else if (GET_CODE (scanbody) == return_code)
22964 arm_ccfsm_state = 2;
22965 succeed = TRUE;
22967 else if (GET_CODE (scanbody) == PARALLEL)
22969 switch (get_attr_conds (this_insn))
22971 case CONDS_NOCOND:
22972 break;
22973 default:
22974 fail = TRUE;
22975 break;
22978 else
22979 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22981 break;
22983 case INSN:
22984 /* Instructions using or affecting the condition codes make it
22985 fail. */
22986 scanbody = PATTERN (this_insn);
22987 if (!(GET_CODE (scanbody) == SET
22988 || GET_CODE (scanbody) == PARALLEL)
22989 || get_attr_conds (this_insn) != CONDS_NOCOND)
22990 fail = TRUE;
22991 break;
22993 default:
22994 break;
22997 if (succeed)
22999 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23000 arm_target_label = CODE_LABEL_NUMBER (label);
23001 else
23003 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23005 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23007 this_insn = next_nonnote_insn (this_insn);
23008 gcc_assert (!this_insn
23009 || (!BARRIER_P (this_insn)
23010 && !LABEL_P (this_insn)));
23012 if (!this_insn)
23014 /* Oh, dear! we ran off the end.. give up. */
23015 extract_constrain_insn_cached (insn);
23016 arm_ccfsm_state = 0;
23017 arm_target_insn = NULL;
23018 return;
23020 arm_target_insn = this_insn;
23023 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23024 what it was. */
23025 if (!reverse)
23026 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23028 if (reverse || then_not_else)
23029 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23032 /* Restore recog_data (getting the attributes of other insns can
23033 destroy this array, but final.c assumes that it remains intact
23034 across this call. */
23035 extract_constrain_insn_cached (insn);
23039 /* Output IT instructions. */
23040 void
23041 thumb2_asm_output_opcode (FILE * stream)
23043 char buff[5];
23044 int n;
23046 if (arm_condexec_mask)
23048 for (n = 0; n < arm_condexec_masklen; n++)
23049 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23050 buff[n] = 0;
23051 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23052 arm_condition_codes[arm_current_cc]);
23053 arm_condexec_mask = 0;
23057 /* Returns true if REGNO is a valid register
23058 for holding a quantity of type MODE. */
23060 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23062 if (GET_MODE_CLASS (mode) == MODE_CC)
23063 return (regno == CC_REGNUM
23064 || (TARGET_HARD_FLOAT && TARGET_VFP
23065 && regno == VFPCC_REGNUM));
23067 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23068 return false;
23070 if (TARGET_THUMB1)
23071 /* For the Thumb we only allow values bigger than SImode in
23072 registers 0 - 6, so that there is always a second low
23073 register available to hold the upper part of the value.
23074 We probably we ought to ensure that the register is the
23075 start of an even numbered register pair. */
23076 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23078 if (TARGET_HARD_FLOAT && TARGET_VFP
23079 && IS_VFP_REGNUM (regno))
23081 if (mode == SFmode || mode == SImode)
23082 return VFP_REGNO_OK_FOR_SINGLE (regno);
23084 if (mode == DFmode)
23085 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23087 /* VFP registers can hold HFmode values, but there is no point in
23088 putting them there unless we have hardware conversion insns. */
23089 if (mode == HFmode)
23090 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23092 if (TARGET_NEON)
23093 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23094 || (VALID_NEON_QREG_MODE (mode)
23095 && NEON_REGNO_OK_FOR_QUAD (regno))
23096 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23097 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23098 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23099 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23100 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23102 return FALSE;
23105 if (TARGET_REALLY_IWMMXT)
23107 if (IS_IWMMXT_GR_REGNUM (regno))
23108 return mode == SImode;
23110 if (IS_IWMMXT_REGNUM (regno))
23111 return VALID_IWMMXT_REG_MODE (mode);
23114 /* We allow almost any value to be stored in the general registers.
23115 Restrict doubleword quantities to even register pairs in ARM state
23116 so that we can use ldrd. Do not allow very large Neon structure
23117 opaque modes in general registers; they would use too many. */
23118 if (regno <= LAST_ARM_REGNUM)
23120 if (ARM_NUM_REGS (mode) > 4)
23121 return FALSE;
23123 if (TARGET_THUMB2)
23124 return TRUE;
23126 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23129 if (regno == FRAME_POINTER_REGNUM
23130 || regno == ARG_POINTER_REGNUM)
23131 /* We only allow integers in the fake hard registers. */
23132 return GET_MODE_CLASS (mode) == MODE_INT;
23134 return FALSE;
23137 /* Implement MODES_TIEABLE_P. */
23139 bool
23140 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23142 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23143 return true;
23145 /* We specifically want to allow elements of "structure" modes to
23146 be tieable to the structure. This more general condition allows
23147 other rarer situations too. */
23148 if (TARGET_NEON
23149 && (VALID_NEON_DREG_MODE (mode1)
23150 || VALID_NEON_QREG_MODE (mode1)
23151 || VALID_NEON_STRUCT_MODE (mode1))
23152 && (VALID_NEON_DREG_MODE (mode2)
23153 || VALID_NEON_QREG_MODE (mode2)
23154 || VALID_NEON_STRUCT_MODE (mode2)))
23155 return true;
23157 return false;
23160 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23161 not used in arm mode. */
23163 enum reg_class
23164 arm_regno_class (int regno)
23166 if (regno == PC_REGNUM)
23167 return NO_REGS;
23169 if (TARGET_THUMB1)
23171 if (regno == STACK_POINTER_REGNUM)
23172 return STACK_REG;
23173 if (regno == CC_REGNUM)
23174 return CC_REG;
23175 if (regno < 8)
23176 return LO_REGS;
23177 return HI_REGS;
23180 if (TARGET_THUMB2 && regno < 8)
23181 return LO_REGS;
23183 if ( regno <= LAST_ARM_REGNUM
23184 || regno == FRAME_POINTER_REGNUM
23185 || regno == ARG_POINTER_REGNUM)
23186 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23188 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23189 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23191 if (IS_VFP_REGNUM (regno))
23193 if (regno <= D7_VFP_REGNUM)
23194 return VFP_D0_D7_REGS;
23195 else if (regno <= LAST_LO_VFP_REGNUM)
23196 return VFP_LO_REGS;
23197 else
23198 return VFP_HI_REGS;
23201 if (IS_IWMMXT_REGNUM (regno))
23202 return IWMMXT_REGS;
23204 if (IS_IWMMXT_GR_REGNUM (regno))
23205 return IWMMXT_GR_REGS;
23207 return NO_REGS;
23210 /* Handle a special case when computing the offset
23211 of an argument from the frame pointer. */
23213 arm_debugger_arg_offset (int value, rtx addr)
23215 rtx_insn *insn;
23217 /* We are only interested if dbxout_parms() failed to compute the offset. */
23218 if (value != 0)
23219 return 0;
23221 /* We can only cope with the case where the address is held in a register. */
23222 if (!REG_P (addr))
23223 return 0;
23225 /* If we are using the frame pointer to point at the argument, then
23226 an offset of 0 is correct. */
23227 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23228 return 0;
23230 /* If we are using the stack pointer to point at the
23231 argument, then an offset of 0 is correct. */
23232 /* ??? Check this is consistent with thumb2 frame layout. */
23233 if ((TARGET_THUMB || !frame_pointer_needed)
23234 && REGNO (addr) == SP_REGNUM)
23235 return 0;
23237 /* Oh dear. The argument is pointed to by a register rather
23238 than being held in a register, or being stored at a known
23239 offset from the frame pointer. Since GDB only understands
23240 those two kinds of argument we must translate the address
23241 held in the register into an offset from the frame pointer.
23242 We do this by searching through the insns for the function
23243 looking to see where this register gets its value. If the
23244 register is initialized from the frame pointer plus an offset
23245 then we are in luck and we can continue, otherwise we give up.
23247 This code is exercised by producing debugging information
23248 for a function with arguments like this:
23250 double func (double a, double b, int c, double d) {return d;}
23252 Without this code the stab for parameter 'd' will be set to
23253 an offset of 0 from the frame pointer, rather than 8. */
23255 /* The if() statement says:
23257 If the insn is a normal instruction
23258 and if the insn is setting the value in a register
23259 and if the register being set is the register holding the address of the argument
23260 and if the address is computing by an addition
23261 that involves adding to a register
23262 which is the frame pointer
23263 a constant integer
23265 then... */
23267 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23269 if ( NONJUMP_INSN_P (insn)
23270 && GET_CODE (PATTERN (insn)) == SET
23271 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23272 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23273 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23274 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23275 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23278 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23280 break;
23284 if (value == 0)
23286 debug_rtx (addr);
23287 warning (0, "unable to compute real location of stacked parameter");
23288 value = 8; /* XXX magic hack */
23291 return value;
23294 typedef enum {
23295 T_V8QI,
23296 T_V4HI,
23297 T_V4HF,
23298 T_V2SI,
23299 T_V2SF,
23300 T_DI,
23301 T_V16QI,
23302 T_V8HI,
23303 T_V4SI,
23304 T_V4SF,
23305 T_V2DI,
23306 T_TI,
23307 T_EI,
23308 T_OI,
23309 T_MAX /* Size of enum. Keep last. */
23310 } neon_builtin_type_mode;
23312 #define TYPE_MODE_BIT(X) (1 << (X))
23314 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23315 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23316 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23317 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23318 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23319 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23321 #define v8qi_UP T_V8QI
23322 #define v4hi_UP T_V4HI
23323 #define v4hf_UP T_V4HF
23324 #define v2si_UP T_V2SI
23325 #define v2sf_UP T_V2SF
23326 #define di_UP T_DI
23327 #define v16qi_UP T_V16QI
23328 #define v8hi_UP T_V8HI
23329 #define v4si_UP T_V4SI
23330 #define v4sf_UP T_V4SF
23331 #define v2di_UP T_V2DI
23332 #define ti_UP T_TI
23333 #define ei_UP T_EI
23334 #define oi_UP T_OI
23336 #define UP(X) X##_UP
23338 typedef enum {
23339 NEON_BINOP,
23340 NEON_TERNOP,
23341 NEON_UNOP,
23342 NEON_BSWAP,
23343 NEON_GETLANE,
23344 NEON_SETLANE,
23345 NEON_CREATE,
23346 NEON_RINT,
23347 NEON_COPYSIGNF,
23348 NEON_DUP,
23349 NEON_DUPLANE,
23350 NEON_COMBINE,
23351 NEON_SPLIT,
23352 NEON_LANEMUL,
23353 NEON_LANEMULL,
23354 NEON_LANEMULH,
23355 NEON_LANEMAC,
23356 NEON_SCALARMUL,
23357 NEON_SCALARMULL,
23358 NEON_SCALARMULH,
23359 NEON_SCALARMAC,
23360 NEON_CONVERT,
23361 NEON_FLOAT_WIDEN,
23362 NEON_FLOAT_NARROW,
23363 NEON_FIXCONV,
23364 NEON_SELECT,
23365 NEON_REINTERP,
23366 NEON_VTBL,
23367 NEON_VTBX,
23368 NEON_LOAD1,
23369 NEON_LOAD1LANE,
23370 NEON_STORE1,
23371 NEON_STORE1LANE,
23372 NEON_LOADSTRUCT,
23373 NEON_LOADSTRUCTLANE,
23374 NEON_STORESTRUCT,
23375 NEON_STORESTRUCTLANE,
23376 NEON_LOGICBINOP,
23377 NEON_SHIFTINSERT,
23378 NEON_SHIFTIMM,
23379 NEON_SHIFTACC
23380 } neon_itype;
23382 typedef struct {
23383 const char *name;
23384 const neon_itype itype;
23385 const neon_builtin_type_mode mode;
23386 const enum insn_code code;
23387 unsigned int fcode;
23388 } neon_builtin_datum;
23390 #define CF(N,X) CODE_FOR_neon_##N##X
23392 #define VAR1(T, N, A) \
23393 {#N, NEON_##T, UP (A), CF (N, A), 0}
23394 #define VAR2(T, N, A, B) \
23395 VAR1 (T, N, A), \
23396 {#N, NEON_##T, UP (B), CF (N, B), 0}
23397 #define VAR3(T, N, A, B, C) \
23398 VAR2 (T, N, A, B), \
23399 {#N, NEON_##T, UP (C), CF (N, C), 0}
23400 #define VAR4(T, N, A, B, C, D) \
23401 VAR3 (T, N, A, B, C), \
23402 {#N, NEON_##T, UP (D), CF (N, D), 0}
23403 #define VAR5(T, N, A, B, C, D, E) \
23404 VAR4 (T, N, A, B, C, D), \
23405 {#N, NEON_##T, UP (E), CF (N, E), 0}
23406 #define VAR6(T, N, A, B, C, D, E, F) \
23407 VAR5 (T, N, A, B, C, D, E), \
23408 {#N, NEON_##T, UP (F), CF (N, F), 0}
23409 #define VAR7(T, N, A, B, C, D, E, F, G) \
23410 VAR6 (T, N, A, B, C, D, E, F), \
23411 {#N, NEON_##T, UP (G), CF (N, G), 0}
23412 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23413 VAR7 (T, N, A, B, C, D, E, F, G), \
23414 {#N, NEON_##T, UP (H), CF (N, H), 0}
23415 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23416 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23417 {#N, NEON_##T, UP (I), CF (N, I), 0}
23418 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23419 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23420 {#N, NEON_##T, UP (J), CF (N, J), 0}
23422 /* The NEON builtin data can be found in arm_neon_builtins.def.
23423 The mode entries in the following table correspond to the "key" type of the
23424 instruction variant, i.e. equivalent to that which would be specified after
23425 the assembler mnemonic, which usually refers to the last vector operand.
23426 (Signed/unsigned/polynomial types are not differentiated between though, and
23427 are all mapped onto the same mode for a given element size.) The modes
23428 listed per instruction should be the same as those defined for that
23429 instruction's pattern in neon.md. */
23431 static neon_builtin_datum neon_builtin_data[] =
23433 #include "arm_neon_builtins.def"
23436 #undef CF
23437 #undef VAR1
23438 #undef VAR2
23439 #undef VAR3
23440 #undef VAR4
23441 #undef VAR5
23442 #undef VAR6
23443 #undef VAR7
23444 #undef VAR8
23445 #undef VAR9
23446 #undef VAR10
23448 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23449 #define VAR1(T, N, A) \
23450 CF (N, A)
23451 #define VAR2(T, N, A, B) \
23452 VAR1 (T, N, A), \
23453 CF (N, B)
23454 #define VAR3(T, N, A, B, C) \
23455 VAR2 (T, N, A, B), \
23456 CF (N, C)
23457 #define VAR4(T, N, A, B, C, D) \
23458 VAR3 (T, N, A, B, C), \
23459 CF (N, D)
23460 #define VAR5(T, N, A, B, C, D, E) \
23461 VAR4 (T, N, A, B, C, D), \
23462 CF (N, E)
23463 #define VAR6(T, N, A, B, C, D, E, F) \
23464 VAR5 (T, N, A, B, C, D, E), \
23465 CF (N, F)
23466 #define VAR7(T, N, A, B, C, D, E, F, G) \
23467 VAR6 (T, N, A, B, C, D, E, F), \
23468 CF (N, G)
23469 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23470 VAR7 (T, N, A, B, C, D, E, F, G), \
23471 CF (N, H)
23472 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23473 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23474 CF (N, I)
23475 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23476 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23477 CF (N, J)
23478 enum arm_builtins
23480 ARM_BUILTIN_GETWCGR0,
23481 ARM_BUILTIN_GETWCGR1,
23482 ARM_BUILTIN_GETWCGR2,
23483 ARM_BUILTIN_GETWCGR3,
23485 ARM_BUILTIN_SETWCGR0,
23486 ARM_BUILTIN_SETWCGR1,
23487 ARM_BUILTIN_SETWCGR2,
23488 ARM_BUILTIN_SETWCGR3,
23490 ARM_BUILTIN_WZERO,
23492 ARM_BUILTIN_WAVG2BR,
23493 ARM_BUILTIN_WAVG2HR,
23494 ARM_BUILTIN_WAVG2B,
23495 ARM_BUILTIN_WAVG2H,
23497 ARM_BUILTIN_WACCB,
23498 ARM_BUILTIN_WACCH,
23499 ARM_BUILTIN_WACCW,
23501 ARM_BUILTIN_WMACS,
23502 ARM_BUILTIN_WMACSZ,
23503 ARM_BUILTIN_WMACU,
23504 ARM_BUILTIN_WMACUZ,
23506 ARM_BUILTIN_WSADB,
23507 ARM_BUILTIN_WSADBZ,
23508 ARM_BUILTIN_WSADH,
23509 ARM_BUILTIN_WSADHZ,
23511 ARM_BUILTIN_WALIGNI,
23512 ARM_BUILTIN_WALIGNR0,
23513 ARM_BUILTIN_WALIGNR1,
23514 ARM_BUILTIN_WALIGNR2,
23515 ARM_BUILTIN_WALIGNR3,
23517 ARM_BUILTIN_TMIA,
23518 ARM_BUILTIN_TMIAPH,
23519 ARM_BUILTIN_TMIABB,
23520 ARM_BUILTIN_TMIABT,
23521 ARM_BUILTIN_TMIATB,
23522 ARM_BUILTIN_TMIATT,
23524 ARM_BUILTIN_TMOVMSKB,
23525 ARM_BUILTIN_TMOVMSKH,
23526 ARM_BUILTIN_TMOVMSKW,
23528 ARM_BUILTIN_TBCSTB,
23529 ARM_BUILTIN_TBCSTH,
23530 ARM_BUILTIN_TBCSTW,
23532 ARM_BUILTIN_WMADDS,
23533 ARM_BUILTIN_WMADDU,
23535 ARM_BUILTIN_WPACKHSS,
23536 ARM_BUILTIN_WPACKWSS,
23537 ARM_BUILTIN_WPACKDSS,
23538 ARM_BUILTIN_WPACKHUS,
23539 ARM_BUILTIN_WPACKWUS,
23540 ARM_BUILTIN_WPACKDUS,
23542 ARM_BUILTIN_WADDB,
23543 ARM_BUILTIN_WADDH,
23544 ARM_BUILTIN_WADDW,
23545 ARM_BUILTIN_WADDSSB,
23546 ARM_BUILTIN_WADDSSH,
23547 ARM_BUILTIN_WADDSSW,
23548 ARM_BUILTIN_WADDUSB,
23549 ARM_BUILTIN_WADDUSH,
23550 ARM_BUILTIN_WADDUSW,
23551 ARM_BUILTIN_WSUBB,
23552 ARM_BUILTIN_WSUBH,
23553 ARM_BUILTIN_WSUBW,
23554 ARM_BUILTIN_WSUBSSB,
23555 ARM_BUILTIN_WSUBSSH,
23556 ARM_BUILTIN_WSUBSSW,
23557 ARM_BUILTIN_WSUBUSB,
23558 ARM_BUILTIN_WSUBUSH,
23559 ARM_BUILTIN_WSUBUSW,
23561 ARM_BUILTIN_WAND,
23562 ARM_BUILTIN_WANDN,
23563 ARM_BUILTIN_WOR,
23564 ARM_BUILTIN_WXOR,
23566 ARM_BUILTIN_WCMPEQB,
23567 ARM_BUILTIN_WCMPEQH,
23568 ARM_BUILTIN_WCMPEQW,
23569 ARM_BUILTIN_WCMPGTUB,
23570 ARM_BUILTIN_WCMPGTUH,
23571 ARM_BUILTIN_WCMPGTUW,
23572 ARM_BUILTIN_WCMPGTSB,
23573 ARM_BUILTIN_WCMPGTSH,
23574 ARM_BUILTIN_WCMPGTSW,
23576 ARM_BUILTIN_TEXTRMSB,
23577 ARM_BUILTIN_TEXTRMSH,
23578 ARM_BUILTIN_TEXTRMSW,
23579 ARM_BUILTIN_TEXTRMUB,
23580 ARM_BUILTIN_TEXTRMUH,
23581 ARM_BUILTIN_TEXTRMUW,
23582 ARM_BUILTIN_TINSRB,
23583 ARM_BUILTIN_TINSRH,
23584 ARM_BUILTIN_TINSRW,
23586 ARM_BUILTIN_WMAXSW,
23587 ARM_BUILTIN_WMAXSH,
23588 ARM_BUILTIN_WMAXSB,
23589 ARM_BUILTIN_WMAXUW,
23590 ARM_BUILTIN_WMAXUH,
23591 ARM_BUILTIN_WMAXUB,
23592 ARM_BUILTIN_WMINSW,
23593 ARM_BUILTIN_WMINSH,
23594 ARM_BUILTIN_WMINSB,
23595 ARM_BUILTIN_WMINUW,
23596 ARM_BUILTIN_WMINUH,
23597 ARM_BUILTIN_WMINUB,
23599 ARM_BUILTIN_WMULUM,
23600 ARM_BUILTIN_WMULSM,
23601 ARM_BUILTIN_WMULUL,
23603 ARM_BUILTIN_PSADBH,
23604 ARM_BUILTIN_WSHUFH,
23606 ARM_BUILTIN_WSLLH,
23607 ARM_BUILTIN_WSLLW,
23608 ARM_BUILTIN_WSLLD,
23609 ARM_BUILTIN_WSRAH,
23610 ARM_BUILTIN_WSRAW,
23611 ARM_BUILTIN_WSRAD,
23612 ARM_BUILTIN_WSRLH,
23613 ARM_BUILTIN_WSRLW,
23614 ARM_BUILTIN_WSRLD,
23615 ARM_BUILTIN_WRORH,
23616 ARM_BUILTIN_WRORW,
23617 ARM_BUILTIN_WRORD,
23618 ARM_BUILTIN_WSLLHI,
23619 ARM_BUILTIN_WSLLWI,
23620 ARM_BUILTIN_WSLLDI,
23621 ARM_BUILTIN_WSRAHI,
23622 ARM_BUILTIN_WSRAWI,
23623 ARM_BUILTIN_WSRADI,
23624 ARM_BUILTIN_WSRLHI,
23625 ARM_BUILTIN_WSRLWI,
23626 ARM_BUILTIN_WSRLDI,
23627 ARM_BUILTIN_WRORHI,
23628 ARM_BUILTIN_WRORWI,
23629 ARM_BUILTIN_WRORDI,
23631 ARM_BUILTIN_WUNPCKIHB,
23632 ARM_BUILTIN_WUNPCKIHH,
23633 ARM_BUILTIN_WUNPCKIHW,
23634 ARM_BUILTIN_WUNPCKILB,
23635 ARM_BUILTIN_WUNPCKILH,
23636 ARM_BUILTIN_WUNPCKILW,
23638 ARM_BUILTIN_WUNPCKEHSB,
23639 ARM_BUILTIN_WUNPCKEHSH,
23640 ARM_BUILTIN_WUNPCKEHSW,
23641 ARM_BUILTIN_WUNPCKEHUB,
23642 ARM_BUILTIN_WUNPCKEHUH,
23643 ARM_BUILTIN_WUNPCKEHUW,
23644 ARM_BUILTIN_WUNPCKELSB,
23645 ARM_BUILTIN_WUNPCKELSH,
23646 ARM_BUILTIN_WUNPCKELSW,
23647 ARM_BUILTIN_WUNPCKELUB,
23648 ARM_BUILTIN_WUNPCKELUH,
23649 ARM_BUILTIN_WUNPCKELUW,
23651 ARM_BUILTIN_WABSB,
23652 ARM_BUILTIN_WABSH,
23653 ARM_BUILTIN_WABSW,
23655 ARM_BUILTIN_WADDSUBHX,
23656 ARM_BUILTIN_WSUBADDHX,
23658 ARM_BUILTIN_WABSDIFFB,
23659 ARM_BUILTIN_WABSDIFFH,
23660 ARM_BUILTIN_WABSDIFFW,
23662 ARM_BUILTIN_WADDCH,
23663 ARM_BUILTIN_WADDCW,
23665 ARM_BUILTIN_WAVG4,
23666 ARM_BUILTIN_WAVG4R,
23668 ARM_BUILTIN_WMADDSX,
23669 ARM_BUILTIN_WMADDUX,
23671 ARM_BUILTIN_WMADDSN,
23672 ARM_BUILTIN_WMADDUN,
23674 ARM_BUILTIN_WMULWSM,
23675 ARM_BUILTIN_WMULWUM,
23677 ARM_BUILTIN_WMULWSMR,
23678 ARM_BUILTIN_WMULWUMR,
23680 ARM_BUILTIN_WMULWL,
23682 ARM_BUILTIN_WMULSMR,
23683 ARM_BUILTIN_WMULUMR,
23685 ARM_BUILTIN_WQMULM,
23686 ARM_BUILTIN_WQMULMR,
23688 ARM_BUILTIN_WQMULWM,
23689 ARM_BUILTIN_WQMULWMR,
23691 ARM_BUILTIN_WADDBHUSM,
23692 ARM_BUILTIN_WADDBHUSL,
23694 ARM_BUILTIN_WQMIABB,
23695 ARM_BUILTIN_WQMIABT,
23696 ARM_BUILTIN_WQMIATB,
23697 ARM_BUILTIN_WQMIATT,
23699 ARM_BUILTIN_WQMIABBN,
23700 ARM_BUILTIN_WQMIABTN,
23701 ARM_BUILTIN_WQMIATBN,
23702 ARM_BUILTIN_WQMIATTN,
23704 ARM_BUILTIN_WMIABB,
23705 ARM_BUILTIN_WMIABT,
23706 ARM_BUILTIN_WMIATB,
23707 ARM_BUILTIN_WMIATT,
23709 ARM_BUILTIN_WMIABBN,
23710 ARM_BUILTIN_WMIABTN,
23711 ARM_BUILTIN_WMIATBN,
23712 ARM_BUILTIN_WMIATTN,
23714 ARM_BUILTIN_WMIAWBB,
23715 ARM_BUILTIN_WMIAWBT,
23716 ARM_BUILTIN_WMIAWTB,
23717 ARM_BUILTIN_WMIAWTT,
23719 ARM_BUILTIN_WMIAWBBN,
23720 ARM_BUILTIN_WMIAWBTN,
23721 ARM_BUILTIN_WMIAWTBN,
23722 ARM_BUILTIN_WMIAWTTN,
23724 ARM_BUILTIN_WMERGE,
23726 ARM_BUILTIN_CRC32B,
23727 ARM_BUILTIN_CRC32H,
23728 ARM_BUILTIN_CRC32W,
23729 ARM_BUILTIN_CRC32CB,
23730 ARM_BUILTIN_CRC32CH,
23731 ARM_BUILTIN_CRC32CW,
23733 ARM_BUILTIN_GET_FPSCR,
23734 ARM_BUILTIN_SET_FPSCR,
23736 #undef CRYPTO1
23737 #undef CRYPTO2
23738 #undef CRYPTO3
23740 #define CRYPTO1(L, U, M1, M2) \
23741 ARM_BUILTIN_CRYPTO_##U,
23742 #define CRYPTO2(L, U, M1, M2, M3) \
23743 ARM_BUILTIN_CRYPTO_##U,
23744 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23745 ARM_BUILTIN_CRYPTO_##U,
23747 #include "crypto.def"
23749 #undef CRYPTO1
23750 #undef CRYPTO2
23751 #undef CRYPTO3
23753 #include "arm_neon_builtins.def"
23755 ,ARM_BUILTIN_MAX
23758 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23760 #undef CF
23761 #undef VAR1
23762 #undef VAR2
23763 #undef VAR3
23764 #undef VAR4
23765 #undef VAR5
23766 #undef VAR6
23767 #undef VAR7
23768 #undef VAR8
23769 #undef VAR9
23770 #undef VAR10
23772 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23774 #define NUM_DREG_TYPES 5
23775 #define NUM_QREG_TYPES 6
23777 static void
23778 arm_init_neon_builtins (void)
23780 unsigned int i, fcode;
23781 tree decl;
23783 tree neon_intQI_type_node;
23784 tree neon_intHI_type_node;
23785 tree neon_floatHF_type_node;
23786 tree neon_polyQI_type_node;
23787 tree neon_polyHI_type_node;
23788 tree neon_intSI_type_node;
23789 tree neon_intDI_type_node;
23790 tree neon_intUTI_type_node;
23791 tree neon_float_type_node;
23793 tree intQI_pointer_node;
23794 tree intHI_pointer_node;
23795 tree intSI_pointer_node;
23796 tree intDI_pointer_node;
23797 tree float_pointer_node;
23799 tree const_intQI_node;
23800 tree const_intHI_node;
23801 tree const_intSI_node;
23802 tree const_intDI_node;
23803 tree const_float_node;
23805 tree const_intQI_pointer_node;
23806 tree const_intHI_pointer_node;
23807 tree const_intSI_pointer_node;
23808 tree const_intDI_pointer_node;
23809 tree const_float_pointer_node;
23811 tree V8QI_type_node;
23812 tree V4HI_type_node;
23813 tree V4UHI_type_node;
23814 tree V4HF_type_node;
23815 tree V2SI_type_node;
23816 tree V2USI_type_node;
23817 tree V2SF_type_node;
23818 tree V16QI_type_node;
23819 tree V8HI_type_node;
23820 tree V8UHI_type_node;
23821 tree V4SI_type_node;
23822 tree V4USI_type_node;
23823 tree V4SF_type_node;
23824 tree V2DI_type_node;
23825 tree V2UDI_type_node;
23827 tree intUQI_type_node;
23828 tree intUHI_type_node;
23829 tree intUSI_type_node;
23830 tree intUDI_type_node;
23832 tree intEI_type_node;
23833 tree intOI_type_node;
23834 tree intCI_type_node;
23835 tree intXI_type_node;
23837 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23838 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23839 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23841 /* Create distinguished type nodes for NEON vector element types,
23842 and pointers to values of such types, so we can detect them later. */
23843 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23844 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23845 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23846 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23847 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23848 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23849 neon_float_type_node = make_node (REAL_TYPE);
23850 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23851 layout_type (neon_float_type_node);
23852 neon_floatHF_type_node = make_node (REAL_TYPE);
23853 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23854 layout_type (neon_floatHF_type_node);
23856 /* Define typedefs which exactly correspond to the modes we are basing vector
23857 types on. If you change these names you'll need to change
23858 the table used by arm_mangle_type too. */
23859 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23860 "__builtin_neon_qi");
23861 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23862 "__builtin_neon_hi");
23863 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23864 "__builtin_neon_hf");
23865 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23866 "__builtin_neon_si");
23867 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23868 "__builtin_neon_sf");
23869 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23870 "__builtin_neon_di");
23871 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23872 "__builtin_neon_poly8");
23873 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23874 "__builtin_neon_poly16");
23876 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23877 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23878 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23879 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23880 float_pointer_node = build_pointer_type (neon_float_type_node);
23882 /* Next create constant-qualified versions of the above types. */
23883 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23884 TYPE_QUAL_CONST);
23885 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23886 TYPE_QUAL_CONST);
23887 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23888 TYPE_QUAL_CONST);
23889 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23890 TYPE_QUAL_CONST);
23891 const_float_node = build_qualified_type (neon_float_type_node,
23892 TYPE_QUAL_CONST);
23894 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23895 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23896 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23897 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23898 const_float_pointer_node = build_pointer_type (const_float_node);
23900 /* Unsigned integer types for various mode sizes. */
23901 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23902 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23903 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23904 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23905 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23906 /* Now create vector types based on our NEON element types. */
23907 /* 64-bit vectors. */
23908 V8QI_type_node =
23909 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23910 V4HI_type_node =
23911 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23912 V4UHI_type_node =
23913 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23914 V4HF_type_node =
23915 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23916 V2SI_type_node =
23917 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23918 V2USI_type_node =
23919 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23920 V2SF_type_node =
23921 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23922 /* 128-bit vectors. */
23923 V16QI_type_node =
23924 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23925 V8HI_type_node =
23926 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23927 V8UHI_type_node =
23928 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23929 V4SI_type_node =
23930 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23931 V4USI_type_node =
23932 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23933 V4SF_type_node =
23934 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23935 V2DI_type_node =
23936 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23937 V2UDI_type_node =
23938 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23941 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23942 "__builtin_neon_uqi");
23943 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23944 "__builtin_neon_uhi");
23945 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23946 "__builtin_neon_usi");
23947 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23948 "__builtin_neon_udi");
23949 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23950 "__builtin_neon_poly64");
23951 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23952 "__builtin_neon_poly128");
23954 /* Opaque integer types for structures of vectors. */
23955 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23956 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23957 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23958 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23960 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23961 "__builtin_neon_ti");
23962 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23963 "__builtin_neon_ei");
23964 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23965 "__builtin_neon_oi");
23966 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23967 "__builtin_neon_ci");
23968 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23969 "__builtin_neon_xi");
23971 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23974 tree V16UQI_type_node =
23975 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23977 tree v16uqi_ftype_v16uqi
23978 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23980 tree v16uqi_ftype_v16uqi_v16uqi
23981 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23982 V16UQI_type_node, NULL_TREE);
23984 tree v4usi_ftype_v4usi
23985 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23987 tree v4usi_ftype_v4usi_v4usi
23988 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23989 V4USI_type_node, NULL_TREE);
23991 tree v4usi_ftype_v4usi_v4usi_v4usi
23992 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23993 V4USI_type_node, V4USI_type_node, NULL_TREE);
23995 tree uti_ftype_udi_udi
23996 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23997 intUDI_type_node, NULL_TREE);
23999 #undef CRYPTO1
24000 #undef CRYPTO2
24001 #undef CRYPTO3
24002 #undef C
24003 #undef N
24004 #undef CF
24005 #undef FT1
24006 #undef FT2
24007 #undef FT3
24009 #define C(U) \
24010 ARM_BUILTIN_CRYPTO_##U
24011 #define N(L) \
24012 "__builtin_arm_crypto_"#L
24013 #define FT1(R, A) \
24014 R##_ftype_##A
24015 #define FT2(R, A1, A2) \
24016 R##_ftype_##A1##_##A2
24017 #define FT3(R, A1, A2, A3) \
24018 R##_ftype_##A1##_##A2##_##A3
24019 #define CRYPTO1(L, U, R, A) \
24020 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
24021 C (U), BUILT_IN_MD, \
24022 NULL, NULL_TREE);
24023 #define CRYPTO2(L, U, R, A1, A2) \
24024 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
24025 C (U), BUILT_IN_MD, \
24026 NULL, NULL_TREE);
24028 #define CRYPTO3(L, U, R, A1, A2, A3) \
24029 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
24030 C (U), BUILT_IN_MD, \
24031 NULL, NULL_TREE);
24032 #include "crypto.def"
24034 #undef CRYPTO1
24035 #undef CRYPTO2
24036 #undef CRYPTO3
24037 #undef C
24038 #undef N
24039 #undef FT1
24040 #undef FT2
24041 #undef FT3
24043 dreg_types[0] = V8QI_type_node;
24044 dreg_types[1] = V4HI_type_node;
24045 dreg_types[2] = V2SI_type_node;
24046 dreg_types[3] = V2SF_type_node;
24047 dreg_types[4] = neon_intDI_type_node;
24049 qreg_types[0] = V16QI_type_node;
24050 qreg_types[1] = V8HI_type_node;
24051 qreg_types[2] = V4SI_type_node;
24052 qreg_types[3] = V4SF_type_node;
24053 qreg_types[4] = V2DI_type_node;
24054 qreg_types[5] = neon_intUTI_type_node;
24056 for (i = 0; i < NUM_QREG_TYPES; i++)
24058 int j;
24059 for (j = 0; j < NUM_QREG_TYPES; j++)
24061 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24062 reinterp_ftype_dreg[i][j]
24063 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24065 reinterp_ftype_qreg[i][j]
24066 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24070 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24071 i < ARRAY_SIZE (neon_builtin_data);
24072 i++, fcode++)
24074 neon_builtin_datum *d = &neon_builtin_data[i];
24076 const char* const modenames[] = {
24077 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24078 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24079 "ti", "ei", "oi"
24081 char namebuf[60];
24082 tree ftype = NULL;
24083 int is_load = 0, is_store = 0;
24085 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24087 d->fcode = fcode;
24089 switch (d->itype)
24091 case NEON_LOAD1:
24092 case NEON_LOAD1LANE:
24093 case NEON_LOADSTRUCT:
24094 case NEON_LOADSTRUCTLANE:
24095 is_load = 1;
24096 /* Fall through. */
24097 case NEON_STORE1:
24098 case NEON_STORE1LANE:
24099 case NEON_STORESTRUCT:
24100 case NEON_STORESTRUCTLANE:
24101 if (!is_load)
24102 is_store = 1;
24103 /* Fall through. */
24104 case NEON_UNOP:
24105 case NEON_RINT:
24106 case NEON_BINOP:
24107 case NEON_LOGICBINOP:
24108 case NEON_SHIFTINSERT:
24109 case NEON_TERNOP:
24110 case NEON_GETLANE:
24111 case NEON_SETLANE:
24112 case NEON_CREATE:
24113 case NEON_DUP:
24114 case NEON_DUPLANE:
24115 case NEON_SHIFTIMM:
24116 case NEON_SHIFTACC:
24117 case NEON_COMBINE:
24118 case NEON_SPLIT:
24119 case NEON_CONVERT:
24120 case NEON_FIXCONV:
24121 case NEON_LANEMUL:
24122 case NEON_LANEMULL:
24123 case NEON_LANEMULH:
24124 case NEON_LANEMAC:
24125 case NEON_SCALARMUL:
24126 case NEON_SCALARMULL:
24127 case NEON_SCALARMULH:
24128 case NEON_SCALARMAC:
24129 case NEON_SELECT:
24130 case NEON_VTBL:
24131 case NEON_VTBX:
24133 int k;
24134 tree return_type = void_type_node, args = void_list_node;
24136 /* Build a function type directly from the insn_data for
24137 this builtin. The build_function_type() function takes
24138 care of removing duplicates for us. */
24139 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24141 tree eltype;
24143 if (is_load && k == 1)
24145 /* Neon load patterns always have the memory
24146 operand in the operand 1 position. */
24147 gcc_assert (insn_data[d->code].operand[k].predicate
24148 == neon_struct_operand);
24150 switch (d->mode)
24152 case T_V8QI:
24153 case T_V16QI:
24154 eltype = const_intQI_pointer_node;
24155 break;
24157 case T_V4HI:
24158 case T_V8HI:
24159 eltype = const_intHI_pointer_node;
24160 break;
24162 case T_V2SI:
24163 case T_V4SI:
24164 eltype = const_intSI_pointer_node;
24165 break;
24167 case T_V2SF:
24168 case T_V4SF:
24169 eltype = const_float_pointer_node;
24170 break;
24172 case T_DI:
24173 case T_V2DI:
24174 eltype = const_intDI_pointer_node;
24175 break;
24177 default: gcc_unreachable ();
24180 else if (is_store && k == 0)
24182 /* Similarly, Neon store patterns use operand 0 as
24183 the memory location to store to. */
24184 gcc_assert (insn_data[d->code].operand[k].predicate
24185 == neon_struct_operand);
24187 switch (d->mode)
24189 case T_V8QI:
24190 case T_V16QI:
24191 eltype = intQI_pointer_node;
24192 break;
24194 case T_V4HI:
24195 case T_V8HI:
24196 eltype = intHI_pointer_node;
24197 break;
24199 case T_V2SI:
24200 case T_V4SI:
24201 eltype = intSI_pointer_node;
24202 break;
24204 case T_V2SF:
24205 case T_V4SF:
24206 eltype = float_pointer_node;
24207 break;
24209 case T_DI:
24210 case T_V2DI:
24211 eltype = intDI_pointer_node;
24212 break;
24214 default: gcc_unreachable ();
24217 else
24219 switch (insn_data[d->code].operand[k].mode)
24221 case VOIDmode: eltype = void_type_node; break;
24222 /* Scalars. */
24223 case QImode: eltype = neon_intQI_type_node; break;
24224 case HImode: eltype = neon_intHI_type_node; break;
24225 case SImode: eltype = neon_intSI_type_node; break;
24226 case SFmode: eltype = neon_float_type_node; break;
24227 case DImode: eltype = neon_intDI_type_node; break;
24228 case TImode: eltype = intTI_type_node; break;
24229 case EImode: eltype = intEI_type_node; break;
24230 case OImode: eltype = intOI_type_node; break;
24231 case CImode: eltype = intCI_type_node; break;
24232 case XImode: eltype = intXI_type_node; break;
24233 /* 64-bit vectors. */
24234 case V8QImode: eltype = V8QI_type_node; break;
24235 case V4HImode: eltype = V4HI_type_node; break;
24236 case V2SImode: eltype = V2SI_type_node; break;
24237 case V2SFmode: eltype = V2SF_type_node; break;
24238 /* 128-bit vectors. */
24239 case V16QImode: eltype = V16QI_type_node; break;
24240 case V8HImode: eltype = V8HI_type_node; break;
24241 case V4SImode: eltype = V4SI_type_node; break;
24242 case V4SFmode: eltype = V4SF_type_node; break;
24243 case V2DImode: eltype = V2DI_type_node; break;
24244 default: gcc_unreachable ();
24248 if (k == 0 && !is_store)
24249 return_type = eltype;
24250 else
24251 args = tree_cons (NULL_TREE, eltype, args);
24254 ftype = build_function_type (return_type, args);
24256 break;
24258 case NEON_REINTERP:
24260 /* We iterate over NUM_DREG_TYPES doubleword types,
24261 then NUM_QREG_TYPES quadword types.
24262 V4HF is not a type used in reinterpret, so we translate
24263 d->mode to the correct index in reinterp_ftype_dreg. */
24264 bool qreg_p
24265 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24266 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24267 % NUM_QREG_TYPES;
24268 switch (insn_data[d->code].operand[0].mode)
24270 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24271 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24272 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24273 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24274 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24275 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24276 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24277 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24278 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24279 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24280 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24281 default: gcc_unreachable ();
24284 break;
24285 case NEON_FLOAT_WIDEN:
24287 tree eltype = NULL_TREE;
24288 tree return_type = NULL_TREE;
24290 switch (insn_data[d->code].operand[1].mode)
24292 case V4HFmode:
24293 eltype = V4HF_type_node;
24294 return_type = V4SF_type_node;
24295 break;
24296 default: gcc_unreachable ();
24298 ftype = build_function_type_list (return_type, eltype, NULL);
24299 break;
24301 case NEON_FLOAT_NARROW:
24303 tree eltype = NULL_TREE;
24304 tree return_type = NULL_TREE;
24306 switch (insn_data[d->code].operand[1].mode)
24308 case V4SFmode:
24309 eltype = V4SF_type_node;
24310 return_type = V4HF_type_node;
24311 break;
24312 default: gcc_unreachable ();
24314 ftype = build_function_type_list (return_type, eltype, NULL);
24315 break;
24317 case NEON_BSWAP:
24319 tree eltype = NULL_TREE;
24320 switch (insn_data[d->code].operand[1].mode)
24322 case V4HImode:
24323 eltype = V4UHI_type_node;
24324 break;
24325 case V8HImode:
24326 eltype = V8UHI_type_node;
24327 break;
24328 case V2SImode:
24329 eltype = V2USI_type_node;
24330 break;
24331 case V4SImode:
24332 eltype = V4USI_type_node;
24333 break;
24334 case V2DImode:
24335 eltype = V2UDI_type_node;
24336 break;
24337 default: gcc_unreachable ();
24339 ftype = build_function_type_list (eltype, eltype, NULL);
24340 break;
24342 case NEON_COPYSIGNF:
24344 tree eltype = NULL_TREE;
24345 switch (insn_data[d->code].operand[1].mode)
24347 case V2SFmode:
24348 eltype = V2SF_type_node;
24349 break;
24350 case V4SFmode:
24351 eltype = V4SF_type_node;
24352 break;
24353 default: gcc_unreachable ();
24355 ftype = build_function_type_list (eltype, eltype, NULL);
24356 break;
24358 default:
24359 gcc_unreachable ();
24362 gcc_assert (ftype != NULL);
24364 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24366 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24367 NULL_TREE);
24368 arm_builtin_decls[fcode] = decl;
24372 #undef NUM_DREG_TYPES
24373 #undef NUM_QREG_TYPES
24375 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24376 do \
24378 if ((MASK) & insn_flags) \
24380 tree bdecl; \
24381 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24382 BUILT_IN_MD, NULL, NULL_TREE); \
24383 arm_builtin_decls[CODE] = bdecl; \
24386 while (0)
24388 struct builtin_description
24390 const unsigned int mask;
24391 const enum insn_code icode;
24392 const char * const name;
24393 const enum arm_builtins code;
24394 const enum rtx_code comparison;
24395 const unsigned int flag;
24398 static const struct builtin_description bdesc_2arg[] =
24400 #define IWMMXT_BUILTIN(code, string, builtin) \
24401 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24402 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24404 #define IWMMXT2_BUILTIN(code, string, builtin) \
24405 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24406 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24408 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24409 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24410 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24411 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24412 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24413 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24414 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24415 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24416 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24417 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24418 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24419 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24420 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24421 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24422 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24423 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24424 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24425 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24426 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24427 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24428 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24429 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24430 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24431 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24432 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24433 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24434 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24435 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24436 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24437 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24438 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24439 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24440 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24441 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24442 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24443 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24444 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24445 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24446 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24447 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24448 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24449 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24450 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24451 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24452 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24453 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24454 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24455 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24456 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24457 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24458 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24459 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24460 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24461 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24462 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24463 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24464 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24465 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24466 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24467 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24468 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24469 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24470 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24471 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24472 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24473 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24474 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24475 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24476 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24477 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24478 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24479 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24480 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24481 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24482 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24483 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24484 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24485 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24487 #define IWMMXT_BUILTIN2(code, builtin) \
24488 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24490 #define IWMMXT2_BUILTIN2(code, builtin) \
24491 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24493 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24494 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24495 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24496 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24497 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24498 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24499 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24500 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24501 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24502 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24505 #define FP_BUILTIN(L, U) \
24506 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24507 UNKNOWN, 0},
24509 FP_BUILTIN (get_fpscr, GET_FPSCR)
24510 FP_BUILTIN (set_fpscr, SET_FPSCR)
24511 #undef FP_BUILTIN
24513 #define CRC32_BUILTIN(L, U) \
24514 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24515 UNKNOWN, 0},
24516 CRC32_BUILTIN (crc32b, CRC32B)
24517 CRC32_BUILTIN (crc32h, CRC32H)
24518 CRC32_BUILTIN (crc32w, CRC32W)
24519 CRC32_BUILTIN (crc32cb, CRC32CB)
24520 CRC32_BUILTIN (crc32ch, CRC32CH)
24521 CRC32_BUILTIN (crc32cw, CRC32CW)
24522 #undef CRC32_BUILTIN
24525 #define CRYPTO_BUILTIN(L, U) \
24526 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24527 UNKNOWN, 0},
24528 #undef CRYPTO1
24529 #undef CRYPTO2
24530 #undef CRYPTO3
24531 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24532 #define CRYPTO1(L, U, R, A)
24533 #define CRYPTO3(L, U, R, A1, A2, A3)
24534 #include "crypto.def"
24535 #undef CRYPTO1
24536 #undef CRYPTO2
24537 #undef CRYPTO3
24541 static const struct builtin_description bdesc_1arg[] =
24543 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24544 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24545 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24546 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24547 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24548 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24549 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24550 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24551 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24552 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24553 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24554 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24555 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24556 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24557 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24558 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24559 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24560 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24561 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24562 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24563 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24564 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24565 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24566 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24568 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24569 #define CRYPTO2(L, U, R, A1, A2)
24570 #define CRYPTO3(L, U, R, A1, A2, A3)
24571 #include "crypto.def"
24572 #undef CRYPTO1
24573 #undef CRYPTO2
24574 #undef CRYPTO3
24577 static const struct builtin_description bdesc_3arg[] =
24579 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24580 #define CRYPTO1(L, U, R, A)
24581 #define CRYPTO2(L, U, R, A1, A2)
24582 #include "crypto.def"
24583 #undef CRYPTO1
24584 #undef CRYPTO2
24585 #undef CRYPTO3
24587 #undef CRYPTO_BUILTIN
24589 /* Set up all the iWMMXt builtins. This is not called if
24590 TARGET_IWMMXT is zero. */
24592 static void
24593 arm_init_iwmmxt_builtins (void)
24595 const struct builtin_description * d;
24596 size_t i;
24598 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24599 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24600 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24602 tree v8qi_ftype_v8qi_v8qi_int
24603 = build_function_type_list (V8QI_type_node,
24604 V8QI_type_node, V8QI_type_node,
24605 integer_type_node, NULL_TREE);
24606 tree v4hi_ftype_v4hi_int
24607 = build_function_type_list (V4HI_type_node,
24608 V4HI_type_node, integer_type_node, NULL_TREE);
24609 tree v2si_ftype_v2si_int
24610 = build_function_type_list (V2SI_type_node,
24611 V2SI_type_node, integer_type_node, NULL_TREE);
24612 tree v2si_ftype_di_di
24613 = build_function_type_list (V2SI_type_node,
24614 long_long_integer_type_node,
24615 long_long_integer_type_node,
24616 NULL_TREE);
24617 tree di_ftype_di_int
24618 = build_function_type_list (long_long_integer_type_node,
24619 long_long_integer_type_node,
24620 integer_type_node, NULL_TREE);
24621 tree di_ftype_di_int_int
24622 = build_function_type_list (long_long_integer_type_node,
24623 long_long_integer_type_node,
24624 integer_type_node,
24625 integer_type_node, NULL_TREE);
24626 tree int_ftype_v8qi
24627 = build_function_type_list (integer_type_node,
24628 V8QI_type_node, NULL_TREE);
24629 tree int_ftype_v4hi
24630 = build_function_type_list (integer_type_node,
24631 V4HI_type_node, NULL_TREE);
24632 tree int_ftype_v2si
24633 = build_function_type_list (integer_type_node,
24634 V2SI_type_node, NULL_TREE);
24635 tree int_ftype_v8qi_int
24636 = build_function_type_list (integer_type_node,
24637 V8QI_type_node, integer_type_node, NULL_TREE);
24638 tree int_ftype_v4hi_int
24639 = build_function_type_list (integer_type_node,
24640 V4HI_type_node, integer_type_node, NULL_TREE);
24641 tree int_ftype_v2si_int
24642 = build_function_type_list (integer_type_node,
24643 V2SI_type_node, integer_type_node, NULL_TREE);
24644 tree v8qi_ftype_v8qi_int_int
24645 = build_function_type_list (V8QI_type_node,
24646 V8QI_type_node, integer_type_node,
24647 integer_type_node, NULL_TREE);
24648 tree v4hi_ftype_v4hi_int_int
24649 = build_function_type_list (V4HI_type_node,
24650 V4HI_type_node, integer_type_node,
24651 integer_type_node, NULL_TREE);
24652 tree v2si_ftype_v2si_int_int
24653 = build_function_type_list (V2SI_type_node,
24654 V2SI_type_node, integer_type_node,
24655 integer_type_node, NULL_TREE);
24656 /* Miscellaneous. */
24657 tree v8qi_ftype_v4hi_v4hi
24658 = build_function_type_list (V8QI_type_node,
24659 V4HI_type_node, V4HI_type_node, NULL_TREE);
24660 tree v4hi_ftype_v2si_v2si
24661 = build_function_type_list (V4HI_type_node,
24662 V2SI_type_node, V2SI_type_node, NULL_TREE);
24663 tree v8qi_ftype_v4hi_v8qi
24664 = build_function_type_list (V8QI_type_node,
24665 V4HI_type_node, V8QI_type_node, NULL_TREE);
24666 tree v2si_ftype_v4hi_v4hi
24667 = build_function_type_list (V2SI_type_node,
24668 V4HI_type_node, V4HI_type_node, NULL_TREE);
24669 tree v2si_ftype_v8qi_v8qi
24670 = build_function_type_list (V2SI_type_node,
24671 V8QI_type_node, V8QI_type_node, NULL_TREE);
24672 tree v4hi_ftype_v4hi_di
24673 = build_function_type_list (V4HI_type_node,
24674 V4HI_type_node, long_long_integer_type_node,
24675 NULL_TREE);
24676 tree v2si_ftype_v2si_di
24677 = build_function_type_list (V2SI_type_node,
24678 V2SI_type_node, long_long_integer_type_node,
24679 NULL_TREE);
24680 tree di_ftype_void
24681 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24682 tree int_ftype_void
24683 = build_function_type_list (integer_type_node, NULL_TREE);
24684 tree di_ftype_v8qi
24685 = build_function_type_list (long_long_integer_type_node,
24686 V8QI_type_node, NULL_TREE);
24687 tree di_ftype_v4hi
24688 = build_function_type_list (long_long_integer_type_node,
24689 V4HI_type_node, NULL_TREE);
24690 tree di_ftype_v2si
24691 = build_function_type_list (long_long_integer_type_node,
24692 V2SI_type_node, NULL_TREE);
24693 tree v2si_ftype_v4hi
24694 = build_function_type_list (V2SI_type_node,
24695 V4HI_type_node, NULL_TREE);
24696 tree v4hi_ftype_v8qi
24697 = build_function_type_list (V4HI_type_node,
24698 V8QI_type_node, NULL_TREE);
24699 tree v8qi_ftype_v8qi
24700 = build_function_type_list (V8QI_type_node,
24701 V8QI_type_node, NULL_TREE);
24702 tree v4hi_ftype_v4hi
24703 = build_function_type_list (V4HI_type_node,
24704 V4HI_type_node, NULL_TREE);
24705 tree v2si_ftype_v2si
24706 = build_function_type_list (V2SI_type_node,
24707 V2SI_type_node, NULL_TREE);
24709 tree di_ftype_di_v4hi_v4hi
24710 = build_function_type_list (long_long_unsigned_type_node,
24711 long_long_unsigned_type_node,
24712 V4HI_type_node, V4HI_type_node,
24713 NULL_TREE);
24715 tree di_ftype_v4hi_v4hi
24716 = build_function_type_list (long_long_unsigned_type_node,
24717 V4HI_type_node,V4HI_type_node,
24718 NULL_TREE);
24720 tree v2si_ftype_v2si_v4hi_v4hi
24721 = build_function_type_list (V2SI_type_node,
24722 V2SI_type_node, V4HI_type_node,
24723 V4HI_type_node, NULL_TREE);
24725 tree v2si_ftype_v2si_v8qi_v8qi
24726 = build_function_type_list (V2SI_type_node,
24727 V2SI_type_node, V8QI_type_node,
24728 V8QI_type_node, NULL_TREE);
24730 tree di_ftype_di_v2si_v2si
24731 = build_function_type_list (long_long_unsigned_type_node,
24732 long_long_unsigned_type_node,
24733 V2SI_type_node, V2SI_type_node,
24734 NULL_TREE);
24736 tree di_ftype_di_di_int
24737 = build_function_type_list (long_long_unsigned_type_node,
24738 long_long_unsigned_type_node,
24739 long_long_unsigned_type_node,
24740 integer_type_node, NULL_TREE);
24742 tree void_ftype_int
24743 = build_function_type_list (void_type_node,
24744 integer_type_node, NULL_TREE);
24746 tree v8qi_ftype_char
24747 = build_function_type_list (V8QI_type_node,
24748 signed_char_type_node, NULL_TREE);
24750 tree v4hi_ftype_short
24751 = build_function_type_list (V4HI_type_node,
24752 short_integer_type_node, NULL_TREE);
24754 tree v2si_ftype_int
24755 = build_function_type_list (V2SI_type_node,
24756 integer_type_node, NULL_TREE);
24758 /* Normal vector binops. */
24759 tree v8qi_ftype_v8qi_v8qi
24760 = build_function_type_list (V8QI_type_node,
24761 V8QI_type_node, V8QI_type_node, NULL_TREE);
24762 tree v4hi_ftype_v4hi_v4hi
24763 = build_function_type_list (V4HI_type_node,
24764 V4HI_type_node,V4HI_type_node, NULL_TREE);
24765 tree v2si_ftype_v2si_v2si
24766 = build_function_type_list (V2SI_type_node,
24767 V2SI_type_node, V2SI_type_node, NULL_TREE);
24768 tree di_ftype_di_di
24769 = build_function_type_list (long_long_unsigned_type_node,
24770 long_long_unsigned_type_node,
24771 long_long_unsigned_type_node,
24772 NULL_TREE);
24774 /* Add all builtins that are more or less simple operations on two
24775 operands. */
24776 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24778 /* Use one of the operands; the target can have a different mode for
24779 mask-generating compares. */
24780 machine_mode mode;
24781 tree type;
24783 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24784 continue;
24786 mode = insn_data[d->icode].operand[1].mode;
24788 switch (mode)
24790 case V8QImode:
24791 type = v8qi_ftype_v8qi_v8qi;
24792 break;
24793 case V4HImode:
24794 type = v4hi_ftype_v4hi_v4hi;
24795 break;
24796 case V2SImode:
24797 type = v2si_ftype_v2si_v2si;
24798 break;
24799 case DImode:
24800 type = di_ftype_di_di;
24801 break;
24803 default:
24804 gcc_unreachable ();
24807 def_mbuiltin (d->mask, d->name, type, d->code);
24810 /* Add the remaining MMX insns with somewhat more complicated types. */
24811 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24812 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24813 ARM_BUILTIN_ ## CODE)
24815 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24816 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24817 ARM_BUILTIN_ ## CODE)
24819 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24820 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24821 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24822 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24823 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24824 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24825 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24826 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24827 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24829 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24830 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24831 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24832 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24833 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24834 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24836 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24837 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24838 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24839 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24840 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24841 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24843 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24844 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24845 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24846 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24847 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24848 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24850 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24851 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24852 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24853 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24854 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24855 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24857 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24859 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24860 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24861 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24862 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24863 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24864 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24865 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24866 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24867 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24868 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24870 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24871 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24872 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24873 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24874 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24875 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24876 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24877 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24878 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24880 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24881 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24882 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24884 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24885 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24886 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24888 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24889 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24891 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24892 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24893 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24894 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24895 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24896 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24898 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24899 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24900 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24901 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24902 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24903 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24904 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24905 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24906 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24907 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24908 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24909 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24911 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24912 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24913 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24914 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24916 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24917 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24918 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24919 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24920 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24921 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24922 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24924 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24925 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24926 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24928 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24929 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24930 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24931 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24933 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24934 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24935 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24936 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24938 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24939 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24940 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24941 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24943 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24944 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24945 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24946 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24948 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24949 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24950 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24951 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24953 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24954 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24955 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24956 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24958 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24960 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24961 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24962 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24964 #undef iwmmx_mbuiltin
24965 #undef iwmmx2_mbuiltin
24968 static void
24969 arm_init_fp16_builtins (void)
24971 tree fp16_type = make_node (REAL_TYPE);
24972 TYPE_PRECISION (fp16_type) = 16;
24973 layout_type (fp16_type);
24974 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24977 static void
24978 arm_init_crc32_builtins ()
24980 tree si_ftype_si_qi
24981 = build_function_type_list (unsigned_intSI_type_node,
24982 unsigned_intSI_type_node,
24983 unsigned_intQI_type_node, NULL_TREE);
24984 tree si_ftype_si_hi
24985 = build_function_type_list (unsigned_intSI_type_node,
24986 unsigned_intSI_type_node,
24987 unsigned_intHI_type_node, NULL_TREE);
24988 tree si_ftype_si_si
24989 = build_function_type_list (unsigned_intSI_type_node,
24990 unsigned_intSI_type_node,
24991 unsigned_intSI_type_node, NULL_TREE);
24993 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24994 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24995 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24996 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24997 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24998 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24999 arm_builtin_decls[ARM_BUILTIN_CRC32W]
25000 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
25001 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
25002 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
25003 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
25004 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
25005 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
25006 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
25007 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
25008 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
25009 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
25010 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
25013 static void
25014 arm_init_builtins (void)
25016 if (TARGET_REALLY_IWMMXT)
25017 arm_init_iwmmxt_builtins ();
25019 if (TARGET_NEON)
25020 arm_init_neon_builtins ();
25022 if (arm_fp16_format)
25023 arm_init_fp16_builtins ();
25025 if (TARGET_CRC32)
25026 arm_init_crc32_builtins ();
25028 if (TARGET_VFP && TARGET_HARD_FLOAT)
25030 tree ftype_set_fpscr
25031 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
25032 tree ftype_get_fpscr
25033 = build_function_type_list (unsigned_type_node, NULL);
25035 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
25036 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
25037 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25038 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
25039 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
25040 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25044 /* Return the ARM builtin for CODE. */
25046 static tree
25047 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25049 if (code >= ARM_BUILTIN_MAX)
25050 return error_mark_node;
25052 return arm_builtin_decls[code];
25055 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25057 static const char *
25058 arm_invalid_parameter_type (const_tree t)
25060 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25061 return N_("function parameters cannot have __fp16 type");
25062 return NULL;
25065 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25067 static const char *
25068 arm_invalid_return_type (const_tree t)
25070 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25071 return N_("functions cannot return __fp16 type");
25072 return NULL;
25075 /* Implement TARGET_PROMOTED_TYPE. */
25077 static tree
25078 arm_promoted_type (const_tree t)
25080 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25081 return float_type_node;
25082 return NULL_TREE;
25085 /* Implement TARGET_CONVERT_TO_TYPE.
25086 Specifically, this hook implements the peculiarity of the ARM
25087 half-precision floating-point C semantics that requires conversions between
25088 __fp16 to or from double to do an intermediate conversion to float. */
25090 static tree
25091 arm_convert_to_type (tree type, tree expr)
25093 tree fromtype = TREE_TYPE (expr);
25094 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25095 return NULL_TREE;
25096 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25097 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25098 return convert (type, convert (float_type_node, expr));
25099 return NULL_TREE;
25102 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25103 This simply adds HFmode as a supported mode; even though we don't
25104 implement arithmetic on this type directly, it's supported by
25105 optabs conversions, much the way the double-word arithmetic is
25106 special-cased in the default hook. */
25108 static bool
25109 arm_scalar_mode_supported_p (machine_mode mode)
25111 if (mode == HFmode)
25112 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25113 else if (ALL_FIXED_POINT_MODE_P (mode))
25114 return true;
25115 else
25116 return default_scalar_mode_supported_p (mode);
25119 /* Errors in the source file can cause expand_expr to return const0_rtx
25120 where we expect a vector. To avoid crashing, use one of the vector
25121 clear instructions. */
25123 static rtx
25124 safe_vector_operand (rtx x, machine_mode mode)
25126 if (x != const0_rtx)
25127 return x;
25128 x = gen_reg_rtx (mode);
25130 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25131 : gen_rtx_SUBREG (DImode, x, 0)));
25132 return x;
25135 /* Function to expand ternary builtins. */
25136 static rtx
25137 arm_expand_ternop_builtin (enum insn_code icode,
25138 tree exp, rtx target)
25140 rtx pat;
25141 tree arg0 = CALL_EXPR_ARG (exp, 0);
25142 tree arg1 = CALL_EXPR_ARG (exp, 1);
25143 tree arg2 = CALL_EXPR_ARG (exp, 2);
25145 rtx op0 = expand_normal (arg0);
25146 rtx op1 = expand_normal (arg1);
25147 rtx op2 = expand_normal (arg2);
25148 rtx op3 = NULL_RTX;
25150 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25151 lane operand depending on endianness. */
25152 bool builtin_sha1cpm_p = false;
25154 if (insn_data[icode].n_operands == 5)
25156 gcc_assert (icode == CODE_FOR_crypto_sha1c
25157 || icode == CODE_FOR_crypto_sha1p
25158 || icode == CODE_FOR_crypto_sha1m);
25159 builtin_sha1cpm_p = true;
25161 machine_mode tmode = insn_data[icode].operand[0].mode;
25162 machine_mode mode0 = insn_data[icode].operand[1].mode;
25163 machine_mode mode1 = insn_data[icode].operand[2].mode;
25164 machine_mode mode2 = insn_data[icode].operand[3].mode;
25167 if (VECTOR_MODE_P (mode0))
25168 op0 = safe_vector_operand (op0, mode0);
25169 if (VECTOR_MODE_P (mode1))
25170 op1 = safe_vector_operand (op1, mode1);
25171 if (VECTOR_MODE_P (mode2))
25172 op2 = safe_vector_operand (op2, mode2);
25174 if (! target
25175 || GET_MODE (target) != tmode
25176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25177 target = gen_reg_rtx (tmode);
25179 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25180 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25181 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25183 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25184 op0 = copy_to_mode_reg (mode0, op0);
25185 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25186 op1 = copy_to_mode_reg (mode1, op1);
25187 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25188 op2 = copy_to_mode_reg (mode2, op2);
25189 if (builtin_sha1cpm_p)
25190 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25192 if (builtin_sha1cpm_p)
25193 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25194 else
25195 pat = GEN_FCN (icode) (target, op0, op1, op2);
25196 if (! pat)
25197 return 0;
25198 emit_insn (pat);
25199 return target;
25202 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25204 static rtx
25205 arm_expand_binop_builtin (enum insn_code icode,
25206 tree exp, rtx target)
25208 rtx pat;
25209 tree arg0 = CALL_EXPR_ARG (exp, 0);
25210 tree arg1 = CALL_EXPR_ARG (exp, 1);
25211 rtx op0 = expand_normal (arg0);
25212 rtx op1 = expand_normal (arg1);
25213 machine_mode tmode = insn_data[icode].operand[0].mode;
25214 machine_mode mode0 = insn_data[icode].operand[1].mode;
25215 machine_mode mode1 = insn_data[icode].operand[2].mode;
25217 if (VECTOR_MODE_P (mode0))
25218 op0 = safe_vector_operand (op0, mode0);
25219 if (VECTOR_MODE_P (mode1))
25220 op1 = safe_vector_operand (op1, mode1);
25222 if (! target
25223 || GET_MODE (target) != tmode
25224 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25225 target = gen_reg_rtx (tmode);
25227 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25228 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25230 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25231 op0 = copy_to_mode_reg (mode0, op0);
25232 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25233 op1 = copy_to_mode_reg (mode1, op1);
25235 pat = GEN_FCN (icode) (target, op0, op1);
25236 if (! pat)
25237 return 0;
25238 emit_insn (pat);
25239 return target;
25242 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25244 static rtx
25245 arm_expand_unop_builtin (enum insn_code icode,
25246 tree exp, rtx target, int do_load)
25248 rtx pat;
25249 tree arg0 = CALL_EXPR_ARG (exp, 0);
25250 rtx op0 = expand_normal (arg0);
25251 rtx op1 = NULL_RTX;
25252 machine_mode tmode = insn_data[icode].operand[0].mode;
25253 machine_mode mode0 = insn_data[icode].operand[1].mode;
25254 bool builtin_sha1h_p = false;
25256 if (insn_data[icode].n_operands == 3)
25258 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25259 builtin_sha1h_p = true;
25262 if (! target
25263 || GET_MODE (target) != tmode
25264 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25265 target = gen_reg_rtx (tmode);
25266 if (do_load)
25267 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25268 else
25270 if (VECTOR_MODE_P (mode0))
25271 op0 = safe_vector_operand (op0, mode0);
25273 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25274 op0 = copy_to_mode_reg (mode0, op0);
25276 if (builtin_sha1h_p)
25277 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25279 if (builtin_sha1h_p)
25280 pat = GEN_FCN (icode) (target, op0, op1);
25281 else
25282 pat = GEN_FCN (icode) (target, op0);
25283 if (! pat)
25284 return 0;
25285 emit_insn (pat);
25286 return target;
25289 typedef enum {
25290 NEON_ARG_COPY_TO_REG,
25291 NEON_ARG_CONSTANT,
25292 NEON_ARG_MEMORY,
25293 NEON_ARG_STOP
25294 } builtin_arg;
25296 #define NEON_MAX_BUILTIN_ARGS 5
25298 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25299 and return an expression for the accessed memory.
25301 The intrinsic function operates on a block of registers that has
25302 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25303 function references the memory at EXP of type TYPE and in mode
25304 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25305 available. */
25307 static tree
25308 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25309 machine_mode reg_mode,
25310 neon_builtin_type_mode type_mode)
25312 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25313 tree elem_type, upper_bound, array_type;
25315 /* Work out the size of the register block in bytes. */
25316 reg_size = GET_MODE_SIZE (reg_mode);
25318 /* Work out the size of each vector in bytes. */
25319 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25320 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25322 /* Work out how many vectors there are. */
25323 gcc_assert (reg_size % vector_size == 0);
25324 nvectors = reg_size / vector_size;
25326 /* Work out the type of each element. */
25327 gcc_assert (POINTER_TYPE_P (type));
25328 elem_type = TREE_TYPE (type);
25330 /* Work out how many elements are being loaded or stored.
25331 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25332 and memory elements; anything else implies a lane load or store. */
25333 if (mem_mode == reg_mode)
25334 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25335 else
25336 nelems = nvectors;
25338 /* Create a type that describes the full access. */
25339 upper_bound = build_int_cst (size_type_node, nelems - 1);
25340 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25342 /* Dereference EXP using that type. */
25343 return fold_build2 (MEM_REF, array_type, exp,
25344 build_int_cst (build_pointer_type (array_type), 0));
25347 /* Expand a Neon builtin. */
25348 static rtx
25349 arm_expand_neon_args (rtx target, int icode, int have_retval,
25350 neon_builtin_type_mode type_mode,
25351 tree exp, int fcode, ...)
25353 va_list ap;
25354 rtx pat;
25355 tree arg[NEON_MAX_BUILTIN_ARGS];
25356 rtx op[NEON_MAX_BUILTIN_ARGS];
25357 tree arg_type;
25358 tree formals;
25359 machine_mode tmode = insn_data[icode].operand[0].mode;
25360 machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25361 machine_mode other_mode;
25362 int argc = 0;
25363 int opno;
25365 if (have_retval
25366 && (!target
25367 || GET_MODE (target) != tmode
25368 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25369 target = gen_reg_rtx (tmode);
25371 va_start (ap, fcode);
25373 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25375 for (;;)
25377 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25379 if (thisarg == NEON_ARG_STOP)
25380 break;
25381 else
25383 opno = argc + have_retval;
25384 mode[argc] = insn_data[icode].operand[opno].mode;
25385 arg[argc] = CALL_EXPR_ARG (exp, argc);
25386 arg_type = TREE_VALUE (formals);
25387 if (thisarg == NEON_ARG_MEMORY)
25389 other_mode = insn_data[icode].operand[1 - opno].mode;
25390 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25391 mode[argc], other_mode,
25392 type_mode);
25395 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25396 be returned. */
25397 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25398 (thisarg == NEON_ARG_MEMORY
25399 ? EXPAND_MEMORY : EXPAND_NORMAL));
25401 switch (thisarg)
25403 case NEON_ARG_COPY_TO_REG:
25404 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25405 if (!(*insn_data[icode].operand[opno].predicate)
25406 (op[argc], mode[argc]))
25407 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25408 break;
25410 case NEON_ARG_CONSTANT:
25411 /* FIXME: This error message is somewhat unhelpful. */
25412 if (!(*insn_data[icode].operand[opno].predicate)
25413 (op[argc], mode[argc]))
25414 error ("argument must be a constant");
25415 break;
25417 case NEON_ARG_MEMORY:
25418 /* Check if expand failed. */
25419 if (op[argc] == const0_rtx)
25420 return 0;
25421 gcc_assert (MEM_P (op[argc]));
25422 PUT_MODE (op[argc], mode[argc]);
25423 /* ??? arm_neon.h uses the same built-in functions for signed
25424 and unsigned accesses, casting where necessary. This isn't
25425 alias safe. */
25426 set_mem_alias_set (op[argc], 0);
25427 if (!(*insn_data[icode].operand[opno].predicate)
25428 (op[argc], mode[argc]))
25429 op[argc] = (replace_equiv_address
25430 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25431 break;
25433 case NEON_ARG_STOP:
25434 gcc_unreachable ();
25437 argc++;
25438 formals = TREE_CHAIN (formals);
25442 va_end (ap);
25444 if (have_retval)
25445 switch (argc)
25447 case 1:
25448 pat = GEN_FCN (icode) (target, op[0]);
25449 break;
25451 case 2:
25452 pat = GEN_FCN (icode) (target, op[0], op[1]);
25453 break;
25455 case 3:
25456 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25457 break;
25459 case 4:
25460 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25461 break;
25463 case 5:
25464 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25465 break;
25467 default:
25468 gcc_unreachable ();
25470 else
25471 switch (argc)
25473 case 1:
25474 pat = GEN_FCN (icode) (op[0]);
25475 break;
25477 case 2:
25478 pat = GEN_FCN (icode) (op[0], op[1]);
25479 break;
25481 case 3:
25482 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25483 break;
25485 case 4:
25486 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25487 break;
25489 case 5:
25490 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25491 break;
25493 default:
25494 gcc_unreachable ();
25497 if (!pat)
25498 return 0;
25500 emit_insn (pat);
25502 return target;
25505 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25506 constants defined per-instruction or per instruction-variant. Instead, the
25507 required info is looked up in the table neon_builtin_data. */
25508 static rtx
25509 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25511 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25512 neon_itype itype = d->itype;
25513 enum insn_code icode = d->code;
25514 neon_builtin_type_mode type_mode = d->mode;
25516 switch (itype)
25518 case NEON_UNOP:
25519 case NEON_CONVERT:
25520 case NEON_DUPLANE:
25521 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25522 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25524 case NEON_BINOP:
25525 case NEON_SETLANE:
25526 case NEON_SCALARMUL:
25527 case NEON_SCALARMULL:
25528 case NEON_SCALARMULH:
25529 case NEON_SHIFTINSERT:
25530 case NEON_LOGICBINOP:
25531 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25532 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25533 NEON_ARG_STOP);
25535 case NEON_TERNOP:
25536 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25537 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25538 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25540 case NEON_GETLANE:
25541 case NEON_FIXCONV:
25542 case NEON_SHIFTIMM:
25543 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25544 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25545 NEON_ARG_STOP);
25547 case NEON_CREATE:
25548 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25549 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25551 case NEON_DUP:
25552 case NEON_RINT:
25553 case NEON_SPLIT:
25554 case NEON_FLOAT_WIDEN:
25555 case NEON_FLOAT_NARROW:
25556 case NEON_BSWAP:
25557 case NEON_REINTERP:
25558 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25559 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25561 case NEON_COPYSIGNF:
25562 case NEON_COMBINE:
25563 case NEON_VTBL:
25564 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25565 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25567 case NEON_LANEMUL:
25568 case NEON_LANEMULL:
25569 case NEON_LANEMULH:
25570 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25571 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25572 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25574 case NEON_LANEMAC:
25575 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25576 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25577 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25579 case NEON_SHIFTACC:
25580 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25581 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25582 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25584 case NEON_SCALARMAC:
25585 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25586 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25587 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25589 case NEON_SELECT:
25590 case NEON_VTBX:
25591 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25592 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25593 NEON_ARG_STOP);
25595 case NEON_LOAD1:
25596 case NEON_LOADSTRUCT:
25597 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25598 NEON_ARG_MEMORY, NEON_ARG_STOP);
25600 case NEON_LOAD1LANE:
25601 case NEON_LOADSTRUCTLANE:
25602 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25603 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25604 NEON_ARG_STOP);
25606 case NEON_STORE1:
25607 case NEON_STORESTRUCT:
25608 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25609 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25611 case NEON_STORE1LANE:
25612 case NEON_STORESTRUCTLANE:
25613 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25614 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25615 NEON_ARG_STOP);
25618 gcc_unreachable ();
25621 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25622 void
25623 neon_reinterpret (rtx dest, rtx src)
25625 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25628 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25629 not to early-clobber SRC registers in the process.
25631 We assume that the operands described by SRC and DEST represent a
25632 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25633 number of components into which the copy has been decomposed. */
25634 void
25635 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25637 unsigned int i;
25639 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25640 || REGNO (operands[0]) < REGNO (operands[1]))
25642 for (i = 0; i < count; i++)
25644 operands[2 * i] = dest[i];
25645 operands[2 * i + 1] = src[i];
25648 else
25650 for (i = 0; i < count; i++)
25652 operands[2 * i] = dest[count - i - 1];
25653 operands[2 * i + 1] = src[count - i - 1];
25658 /* Split operands into moves from op[1] + op[2] into op[0]. */
25660 void
25661 neon_split_vcombine (rtx operands[3])
25663 unsigned int dest = REGNO (operands[0]);
25664 unsigned int src1 = REGNO (operands[1]);
25665 unsigned int src2 = REGNO (operands[2]);
25666 machine_mode halfmode = GET_MODE (operands[1]);
25667 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25668 rtx destlo, desthi;
25670 if (src1 == dest && src2 == dest + halfregs)
25672 /* No-op move. Can't split to nothing; emit something. */
25673 emit_note (NOTE_INSN_DELETED);
25674 return;
25677 /* Preserve register attributes for variable tracking. */
25678 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25679 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25680 GET_MODE_SIZE (halfmode));
25682 /* Special case of reversed high/low parts. Use VSWP. */
25683 if (src2 == dest && src1 == dest + halfregs)
25685 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25686 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25687 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25688 return;
25691 if (!reg_overlap_mentioned_p (operands[2], destlo))
25693 /* Try to avoid unnecessary moves if part of the result
25694 is in the right place already. */
25695 if (src1 != dest)
25696 emit_move_insn (destlo, operands[1]);
25697 if (src2 != dest + halfregs)
25698 emit_move_insn (desthi, operands[2]);
25700 else
25702 if (src2 != dest + halfregs)
25703 emit_move_insn (desthi, operands[2]);
25704 if (src1 != dest)
25705 emit_move_insn (destlo, operands[1]);
25709 /* Expand an expression EXP that calls a built-in function,
25710 with result going to TARGET if that's convenient
25711 (and in mode MODE if that's convenient).
25712 SUBTARGET may be used as the target for computing one of EXP's operands.
25713 IGNORE is nonzero if the value is to be ignored. */
25715 static rtx
25716 arm_expand_builtin (tree exp,
25717 rtx target,
25718 rtx subtarget ATTRIBUTE_UNUSED,
25719 machine_mode mode ATTRIBUTE_UNUSED,
25720 int ignore ATTRIBUTE_UNUSED)
25722 const struct builtin_description * d;
25723 enum insn_code icode;
25724 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25725 tree arg0;
25726 tree arg1;
25727 tree arg2;
25728 rtx op0;
25729 rtx op1;
25730 rtx op2;
25731 rtx pat;
25732 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25733 size_t i;
25734 machine_mode tmode;
25735 machine_mode mode0;
25736 machine_mode mode1;
25737 machine_mode mode2;
25738 int opint;
25739 int selector;
25740 int mask;
25741 int imm;
25743 if (fcode >= ARM_BUILTIN_NEON_BASE)
25744 return arm_expand_neon_builtin (fcode, exp, target);
25746 switch (fcode)
25748 case ARM_BUILTIN_GET_FPSCR:
25749 case ARM_BUILTIN_SET_FPSCR:
25750 if (fcode == ARM_BUILTIN_GET_FPSCR)
25752 icode = CODE_FOR_get_fpscr;
25753 target = gen_reg_rtx (SImode);
25754 pat = GEN_FCN (icode) (target);
25756 else
25758 target = NULL_RTX;
25759 icode = CODE_FOR_set_fpscr;
25760 arg0 = CALL_EXPR_ARG (exp, 0);
25761 op0 = expand_normal (arg0);
25762 pat = GEN_FCN (icode) (op0);
25764 emit_insn (pat);
25765 return target;
25767 case ARM_BUILTIN_TEXTRMSB:
25768 case ARM_BUILTIN_TEXTRMUB:
25769 case ARM_BUILTIN_TEXTRMSH:
25770 case ARM_BUILTIN_TEXTRMUH:
25771 case ARM_BUILTIN_TEXTRMSW:
25772 case ARM_BUILTIN_TEXTRMUW:
25773 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25774 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25775 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25776 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25777 : CODE_FOR_iwmmxt_textrmw);
25779 arg0 = CALL_EXPR_ARG (exp, 0);
25780 arg1 = CALL_EXPR_ARG (exp, 1);
25781 op0 = expand_normal (arg0);
25782 op1 = expand_normal (arg1);
25783 tmode = insn_data[icode].operand[0].mode;
25784 mode0 = insn_data[icode].operand[1].mode;
25785 mode1 = insn_data[icode].operand[2].mode;
25787 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25788 op0 = copy_to_mode_reg (mode0, op0);
25789 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25791 /* @@@ better error message */
25792 error ("selector must be an immediate");
25793 return gen_reg_rtx (tmode);
25796 opint = INTVAL (op1);
25797 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25799 if (opint > 7 || opint < 0)
25800 error ("the range of selector should be in 0 to 7");
25802 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25804 if (opint > 3 || opint < 0)
25805 error ("the range of selector should be in 0 to 3");
25807 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25809 if (opint > 1 || opint < 0)
25810 error ("the range of selector should be in 0 to 1");
25813 if (target == 0
25814 || GET_MODE (target) != tmode
25815 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25816 target = gen_reg_rtx (tmode);
25817 pat = GEN_FCN (icode) (target, op0, op1);
25818 if (! pat)
25819 return 0;
25820 emit_insn (pat);
25821 return target;
25823 case ARM_BUILTIN_WALIGNI:
25824 /* If op2 is immediate, call walighi, else call walighr. */
25825 arg0 = CALL_EXPR_ARG (exp, 0);
25826 arg1 = CALL_EXPR_ARG (exp, 1);
25827 arg2 = CALL_EXPR_ARG (exp, 2);
25828 op0 = expand_normal (arg0);
25829 op1 = expand_normal (arg1);
25830 op2 = expand_normal (arg2);
25831 if (CONST_INT_P (op2))
25833 icode = CODE_FOR_iwmmxt_waligni;
25834 tmode = insn_data[icode].operand[0].mode;
25835 mode0 = insn_data[icode].operand[1].mode;
25836 mode1 = insn_data[icode].operand[2].mode;
25837 mode2 = insn_data[icode].operand[3].mode;
25838 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25839 op0 = copy_to_mode_reg (mode0, op0);
25840 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25841 op1 = copy_to_mode_reg (mode1, op1);
25842 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25843 selector = INTVAL (op2);
25844 if (selector > 7 || selector < 0)
25845 error ("the range of selector should be in 0 to 7");
25847 else
25849 icode = CODE_FOR_iwmmxt_walignr;
25850 tmode = insn_data[icode].operand[0].mode;
25851 mode0 = insn_data[icode].operand[1].mode;
25852 mode1 = insn_data[icode].operand[2].mode;
25853 mode2 = insn_data[icode].operand[3].mode;
25854 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25855 op0 = copy_to_mode_reg (mode0, op0);
25856 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25857 op1 = copy_to_mode_reg (mode1, op1);
25858 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25859 op2 = copy_to_mode_reg (mode2, op2);
25861 if (target == 0
25862 || GET_MODE (target) != tmode
25863 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25864 target = gen_reg_rtx (tmode);
25865 pat = GEN_FCN (icode) (target, op0, op1, op2);
25866 if (!pat)
25867 return 0;
25868 emit_insn (pat);
25869 return target;
25871 case ARM_BUILTIN_TINSRB:
25872 case ARM_BUILTIN_TINSRH:
25873 case ARM_BUILTIN_TINSRW:
25874 case ARM_BUILTIN_WMERGE:
25875 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25876 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25877 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25878 : CODE_FOR_iwmmxt_tinsrw);
25879 arg0 = CALL_EXPR_ARG (exp, 0);
25880 arg1 = CALL_EXPR_ARG (exp, 1);
25881 arg2 = CALL_EXPR_ARG (exp, 2);
25882 op0 = expand_normal (arg0);
25883 op1 = expand_normal (arg1);
25884 op2 = expand_normal (arg2);
25885 tmode = insn_data[icode].operand[0].mode;
25886 mode0 = insn_data[icode].operand[1].mode;
25887 mode1 = insn_data[icode].operand[2].mode;
25888 mode2 = insn_data[icode].operand[3].mode;
25890 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25891 op0 = copy_to_mode_reg (mode0, op0);
25892 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25893 op1 = copy_to_mode_reg (mode1, op1);
25894 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25896 error ("selector must be an immediate");
25897 return const0_rtx;
25899 if (icode == CODE_FOR_iwmmxt_wmerge)
25901 selector = INTVAL (op2);
25902 if (selector > 7 || selector < 0)
25903 error ("the range of selector should be in 0 to 7");
25905 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25906 || (icode == CODE_FOR_iwmmxt_tinsrh)
25907 || (icode == CODE_FOR_iwmmxt_tinsrw))
25909 mask = 0x01;
25910 selector= INTVAL (op2);
25911 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25912 error ("the range of selector should be in 0 to 7");
25913 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25914 error ("the range of selector should be in 0 to 3");
25915 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25916 error ("the range of selector should be in 0 to 1");
25917 mask <<= selector;
25918 op2 = GEN_INT (mask);
25920 if (target == 0
25921 || GET_MODE (target) != tmode
25922 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25923 target = gen_reg_rtx (tmode);
25924 pat = GEN_FCN (icode) (target, op0, op1, op2);
25925 if (! pat)
25926 return 0;
25927 emit_insn (pat);
25928 return target;
25930 case ARM_BUILTIN_SETWCGR0:
25931 case ARM_BUILTIN_SETWCGR1:
25932 case ARM_BUILTIN_SETWCGR2:
25933 case ARM_BUILTIN_SETWCGR3:
25934 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25935 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25936 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25937 : CODE_FOR_iwmmxt_setwcgr3);
25938 arg0 = CALL_EXPR_ARG (exp, 0);
25939 op0 = expand_normal (arg0);
25940 mode0 = insn_data[icode].operand[0].mode;
25941 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25942 op0 = copy_to_mode_reg (mode0, op0);
25943 pat = GEN_FCN (icode) (op0);
25944 if (!pat)
25945 return 0;
25946 emit_insn (pat);
25947 return 0;
25949 case ARM_BUILTIN_GETWCGR0:
25950 case ARM_BUILTIN_GETWCGR1:
25951 case ARM_BUILTIN_GETWCGR2:
25952 case ARM_BUILTIN_GETWCGR3:
25953 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25954 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25955 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25956 : CODE_FOR_iwmmxt_getwcgr3);
25957 tmode = insn_data[icode].operand[0].mode;
25958 if (target == 0
25959 || GET_MODE (target) != tmode
25960 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25961 target = gen_reg_rtx (tmode);
25962 pat = GEN_FCN (icode) (target);
25963 if (!pat)
25964 return 0;
25965 emit_insn (pat);
25966 return target;
25968 case ARM_BUILTIN_WSHUFH:
25969 icode = CODE_FOR_iwmmxt_wshufh;
25970 arg0 = CALL_EXPR_ARG (exp, 0);
25971 arg1 = CALL_EXPR_ARG (exp, 1);
25972 op0 = expand_normal (arg0);
25973 op1 = expand_normal (arg1);
25974 tmode = insn_data[icode].operand[0].mode;
25975 mode1 = insn_data[icode].operand[1].mode;
25976 mode2 = insn_data[icode].operand[2].mode;
25978 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25979 op0 = copy_to_mode_reg (mode1, op0);
25980 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25982 error ("mask must be an immediate");
25983 return const0_rtx;
25985 selector = INTVAL (op1);
25986 if (selector < 0 || selector > 255)
25987 error ("the range of mask should be in 0 to 255");
25988 if (target == 0
25989 || GET_MODE (target) != tmode
25990 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25991 target = gen_reg_rtx (tmode);
25992 pat = GEN_FCN (icode) (target, op0, op1);
25993 if (! pat)
25994 return 0;
25995 emit_insn (pat);
25996 return target;
25998 case ARM_BUILTIN_WMADDS:
25999 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
26000 case ARM_BUILTIN_WMADDSX:
26001 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
26002 case ARM_BUILTIN_WMADDSN:
26003 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
26004 case ARM_BUILTIN_WMADDU:
26005 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
26006 case ARM_BUILTIN_WMADDUX:
26007 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
26008 case ARM_BUILTIN_WMADDUN:
26009 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
26010 case ARM_BUILTIN_WSADBZ:
26011 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
26012 case ARM_BUILTIN_WSADHZ:
26013 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
26015 /* Several three-argument builtins. */
26016 case ARM_BUILTIN_WMACS:
26017 case ARM_BUILTIN_WMACU:
26018 case ARM_BUILTIN_TMIA:
26019 case ARM_BUILTIN_TMIAPH:
26020 case ARM_BUILTIN_TMIATT:
26021 case ARM_BUILTIN_TMIATB:
26022 case ARM_BUILTIN_TMIABT:
26023 case ARM_BUILTIN_TMIABB:
26024 case ARM_BUILTIN_WQMIABB:
26025 case ARM_BUILTIN_WQMIABT:
26026 case ARM_BUILTIN_WQMIATB:
26027 case ARM_BUILTIN_WQMIATT:
26028 case ARM_BUILTIN_WQMIABBN:
26029 case ARM_BUILTIN_WQMIABTN:
26030 case ARM_BUILTIN_WQMIATBN:
26031 case ARM_BUILTIN_WQMIATTN:
26032 case ARM_BUILTIN_WMIABB:
26033 case ARM_BUILTIN_WMIABT:
26034 case ARM_BUILTIN_WMIATB:
26035 case ARM_BUILTIN_WMIATT:
26036 case ARM_BUILTIN_WMIABBN:
26037 case ARM_BUILTIN_WMIABTN:
26038 case ARM_BUILTIN_WMIATBN:
26039 case ARM_BUILTIN_WMIATTN:
26040 case ARM_BUILTIN_WMIAWBB:
26041 case ARM_BUILTIN_WMIAWBT:
26042 case ARM_BUILTIN_WMIAWTB:
26043 case ARM_BUILTIN_WMIAWTT:
26044 case ARM_BUILTIN_WMIAWBBN:
26045 case ARM_BUILTIN_WMIAWBTN:
26046 case ARM_BUILTIN_WMIAWTBN:
26047 case ARM_BUILTIN_WMIAWTTN:
26048 case ARM_BUILTIN_WSADB:
26049 case ARM_BUILTIN_WSADH:
26050 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26051 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26052 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26053 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26054 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26055 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26056 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26057 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26058 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26059 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26060 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26061 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26062 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26063 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26064 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26065 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26066 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26067 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26068 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26069 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26070 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26071 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26072 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26073 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26074 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26075 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26076 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26077 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26078 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26079 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26080 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26081 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26082 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26083 : CODE_FOR_iwmmxt_wsadh);
26084 arg0 = CALL_EXPR_ARG (exp, 0);
26085 arg1 = CALL_EXPR_ARG (exp, 1);
26086 arg2 = CALL_EXPR_ARG (exp, 2);
26087 op0 = expand_normal (arg0);
26088 op1 = expand_normal (arg1);
26089 op2 = expand_normal (arg2);
26090 tmode = insn_data[icode].operand[0].mode;
26091 mode0 = insn_data[icode].operand[1].mode;
26092 mode1 = insn_data[icode].operand[2].mode;
26093 mode2 = insn_data[icode].operand[3].mode;
26095 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26096 op0 = copy_to_mode_reg (mode0, op0);
26097 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26098 op1 = copy_to_mode_reg (mode1, op1);
26099 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26100 op2 = copy_to_mode_reg (mode2, op2);
26101 if (target == 0
26102 || GET_MODE (target) != tmode
26103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26104 target = gen_reg_rtx (tmode);
26105 pat = GEN_FCN (icode) (target, op0, op1, op2);
26106 if (! pat)
26107 return 0;
26108 emit_insn (pat);
26109 return target;
26111 case ARM_BUILTIN_WZERO:
26112 target = gen_reg_rtx (DImode);
26113 emit_insn (gen_iwmmxt_clrdi (target));
26114 return target;
26116 case ARM_BUILTIN_WSRLHI:
26117 case ARM_BUILTIN_WSRLWI:
26118 case ARM_BUILTIN_WSRLDI:
26119 case ARM_BUILTIN_WSLLHI:
26120 case ARM_BUILTIN_WSLLWI:
26121 case ARM_BUILTIN_WSLLDI:
26122 case ARM_BUILTIN_WSRAHI:
26123 case ARM_BUILTIN_WSRAWI:
26124 case ARM_BUILTIN_WSRADI:
26125 case ARM_BUILTIN_WRORHI:
26126 case ARM_BUILTIN_WRORWI:
26127 case ARM_BUILTIN_WRORDI:
26128 case ARM_BUILTIN_WSRLH:
26129 case ARM_BUILTIN_WSRLW:
26130 case ARM_BUILTIN_WSRLD:
26131 case ARM_BUILTIN_WSLLH:
26132 case ARM_BUILTIN_WSLLW:
26133 case ARM_BUILTIN_WSLLD:
26134 case ARM_BUILTIN_WSRAH:
26135 case ARM_BUILTIN_WSRAW:
26136 case ARM_BUILTIN_WSRAD:
26137 case ARM_BUILTIN_WRORH:
26138 case ARM_BUILTIN_WRORW:
26139 case ARM_BUILTIN_WRORD:
26140 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26141 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26142 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26143 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26144 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26145 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26146 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26147 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26148 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26149 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26150 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26151 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26152 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26153 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26154 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26155 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26156 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26157 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26158 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26159 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26160 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26161 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26162 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26163 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26164 : CODE_FOR_nothing);
26165 arg1 = CALL_EXPR_ARG (exp, 1);
26166 op1 = expand_normal (arg1);
26167 if (GET_MODE (op1) == VOIDmode)
26169 imm = INTVAL (op1);
26170 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26171 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26172 && (imm < 0 || imm > 32))
26174 if (fcode == ARM_BUILTIN_WRORHI)
26175 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26176 else if (fcode == ARM_BUILTIN_WRORWI)
26177 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26178 else if (fcode == ARM_BUILTIN_WRORH)
26179 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26180 else
26181 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26183 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26184 && (imm < 0 || imm > 64))
26186 if (fcode == ARM_BUILTIN_WRORDI)
26187 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26188 else
26189 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26191 else if (imm < 0)
26193 if (fcode == ARM_BUILTIN_WSRLHI)
26194 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26195 else if (fcode == ARM_BUILTIN_WSRLWI)
26196 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26197 else if (fcode == ARM_BUILTIN_WSRLDI)
26198 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26199 else if (fcode == ARM_BUILTIN_WSLLHI)
26200 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26201 else if (fcode == ARM_BUILTIN_WSLLWI)
26202 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26203 else if (fcode == ARM_BUILTIN_WSLLDI)
26204 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26205 else if (fcode == ARM_BUILTIN_WSRAHI)
26206 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26207 else if (fcode == ARM_BUILTIN_WSRAWI)
26208 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26209 else if (fcode == ARM_BUILTIN_WSRADI)
26210 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26211 else if (fcode == ARM_BUILTIN_WSRLH)
26212 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26213 else if (fcode == ARM_BUILTIN_WSRLW)
26214 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26215 else if (fcode == ARM_BUILTIN_WSRLD)
26216 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26217 else if (fcode == ARM_BUILTIN_WSLLH)
26218 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26219 else if (fcode == ARM_BUILTIN_WSLLW)
26220 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26221 else if (fcode == ARM_BUILTIN_WSLLD)
26222 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26223 else if (fcode == ARM_BUILTIN_WSRAH)
26224 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26225 else if (fcode == ARM_BUILTIN_WSRAW)
26226 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26227 else
26228 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26231 return arm_expand_binop_builtin (icode, exp, target);
26233 default:
26234 break;
26237 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26238 if (d->code == (const enum arm_builtins) fcode)
26239 return arm_expand_binop_builtin (d->icode, exp, target);
26241 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26242 if (d->code == (const enum arm_builtins) fcode)
26243 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26245 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26246 if (d->code == (const enum arm_builtins) fcode)
26247 return arm_expand_ternop_builtin (d->icode, exp, target);
26249 /* @@@ Should really do something sensible here. */
26250 return NULL_RTX;
26253 /* Return the number (counting from 0) of
26254 the least significant set bit in MASK. */
26256 inline static int
26257 number_of_first_bit_set (unsigned mask)
26259 return ctz_hwi (mask);
26262 /* Like emit_multi_reg_push, but allowing for a different set of
26263 registers to be described as saved. MASK is the set of registers
26264 to be saved; REAL_REGS is the set of registers to be described as
26265 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26267 static rtx_insn *
26268 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26270 unsigned long regno;
26271 rtx par[10], tmp, reg;
26272 rtx_insn *insn;
26273 int i, j;
26275 /* Build the parallel of the registers actually being stored. */
26276 for (i = 0; mask; ++i, mask &= mask - 1)
26278 regno = ctz_hwi (mask);
26279 reg = gen_rtx_REG (SImode, regno);
26281 if (i == 0)
26282 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26283 else
26284 tmp = gen_rtx_USE (VOIDmode, reg);
26286 par[i] = tmp;
26289 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26290 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26291 tmp = gen_frame_mem (BLKmode, tmp);
26292 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26293 par[0] = tmp;
26295 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26296 insn = emit_insn (tmp);
26298 /* Always build the stack adjustment note for unwind info. */
26299 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26300 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26301 par[0] = tmp;
26303 /* Build the parallel of the registers recorded as saved for unwind. */
26304 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26306 regno = ctz_hwi (real_regs);
26307 reg = gen_rtx_REG (SImode, regno);
26309 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26310 tmp = gen_frame_mem (SImode, tmp);
26311 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26312 RTX_FRAME_RELATED_P (tmp) = 1;
26313 par[j + 1] = tmp;
26316 if (j == 0)
26317 tmp = par[0];
26318 else
26320 RTX_FRAME_RELATED_P (par[0]) = 1;
26321 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26324 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26326 return insn;
26329 /* Emit code to push or pop registers to or from the stack. F is the
26330 assembly file. MASK is the registers to pop. */
26331 static void
26332 thumb_pop (FILE *f, unsigned long mask)
26334 int regno;
26335 int lo_mask = mask & 0xFF;
26336 int pushed_words = 0;
26338 gcc_assert (mask);
26340 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26342 /* Special case. Do not generate a POP PC statement here, do it in
26343 thumb_exit() */
26344 thumb_exit (f, -1);
26345 return;
26348 fprintf (f, "\tpop\t{");
26350 /* Look at the low registers first. */
26351 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26353 if (lo_mask & 1)
26355 asm_fprintf (f, "%r", regno);
26357 if ((lo_mask & ~1) != 0)
26358 fprintf (f, ", ");
26360 pushed_words++;
26364 if (mask & (1 << PC_REGNUM))
26366 /* Catch popping the PC. */
26367 if (TARGET_INTERWORK || TARGET_BACKTRACE
26368 || crtl->calls_eh_return)
26370 /* The PC is never poped directly, instead
26371 it is popped into r3 and then BX is used. */
26372 fprintf (f, "}\n");
26374 thumb_exit (f, -1);
26376 return;
26378 else
26380 if (mask & 0xFF)
26381 fprintf (f, ", ");
26383 asm_fprintf (f, "%r", PC_REGNUM);
26387 fprintf (f, "}\n");
26390 /* Generate code to return from a thumb function.
26391 If 'reg_containing_return_addr' is -1, then the return address is
26392 actually on the stack, at the stack pointer. */
26393 static void
26394 thumb_exit (FILE *f, int reg_containing_return_addr)
26396 unsigned regs_available_for_popping;
26397 unsigned regs_to_pop;
26398 int pops_needed;
26399 unsigned available;
26400 unsigned required;
26401 machine_mode mode;
26402 int size;
26403 int restore_a4 = FALSE;
26405 /* Compute the registers we need to pop. */
26406 regs_to_pop = 0;
26407 pops_needed = 0;
26409 if (reg_containing_return_addr == -1)
26411 regs_to_pop |= 1 << LR_REGNUM;
26412 ++pops_needed;
26415 if (TARGET_BACKTRACE)
26417 /* Restore the (ARM) frame pointer and stack pointer. */
26418 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26419 pops_needed += 2;
26422 /* If there is nothing to pop then just emit the BX instruction and
26423 return. */
26424 if (pops_needed == 0)
26426 if (crtl->calls_eh_return)
26427 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26429 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26430 return;
26432 /* Otherwise if we are not supporting interworking and we have not created
26433 a backtrace structure and the function was not entered in ARM mode then
26434 just pop the return address straight into the PC. */
26435 else if (!TARGET_INTERWORK
26436 && !TARGET_BACKTRACE
26437 && !is_called_in_ARM_mode (current_function_decl)
26438 && !crtl->calls_eh_return)
26440 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26441 return;
26444 /* Find out how many of the (return) argument registers we can corrupt. */
26445 regs_available_for_popping = 0;
26447 /* If returning via __builtin_eh_return, the bottom three registers
26448 all contain information needed for the return. */
26449 if (crtl->calls_eh_return)
26450 size = 12;
26451 else
26453 /* If we can deduce the registers used from the function's
26454 return value. This is more reliable that examining
26455 df_regs_ever_live_p () because that will be set if the register is
26456 ever used in the function, not just if the register is used
26457 to hold a return value. */
26459 if (crtl->return_rtx != 0)
26460 mode = GET_MODE (crtl->return_rtx);
26461 else
26462 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26464 size = GET_MODE_SIZE (mode);
26466 if (size == 0)
26468 /* In a void function we can use any argument register.
26469 In a function that returns a structure on the stack
26470 we can use the second and third argument registers. */
26471 if (mode == VOIDmode)
26472 regs_available_for_popping =
26473 (1 << ARG_REGISTER (1))
26474 | (1 << ARG_REGISTER (2))
26475 | (1 << ARG_REGISTER (3));
26476 else
26477 regs_available_for_popping =
26478 (1 << ARG_REGISTER (2))
26479 | (1 << ARG_REGISTER (3));
26481 else if (size <= 4)
26482 regs_available_for_popping =
26483 (1 << ARG_REGISTER (2))
26484 | (1 << ARG_REGISTER (3));
26485 else if (size <= 8)
26486 regs_available_for_popping =
26487 (1 << ARG_REGISTER (3));
26490 /* Match registers to be popped with registers into which we pop them. */
26491 for (available = regs_available_for_popping,
26492 required = regs_to_pop;
26493 required != 0 && available != 0;
26494 available &= ~(available & - available),
26495 required &= ~(required & - required))
26496 -- pops_needed;
26498 /* If we have any popping registers left over, remove them. */
26499 if (available > 0)
26500 regs_available_for_popping &= ~available;
26502 /* Otherwise if we need another popping register we can use
26503 the fourth argument register. */
26504 else if (pops_needed)
26506 /* If we have not found any free argument registers and
26507 reg a4 contains the return address, we must move it. */
26508 if (regs_available_for_popping == 0
26509 && reg_containing_return_addr == LAST_ARG_REGNUM)
26511 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26512 reg_containing_return_addr = LR_REGNUM;
26514 else if (size > 12)
26516 /* Register a4 is being used to hold part of the return value,
26517 but we have dire need of a free, low register. */
26518 restore_a4 = TRUE;
26520 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26523 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26525 /* The fourth argument register is available. */
26526 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26528 --pops_needed;
26532 /* Pop as many registers as we can. */
26533 thumb_pop (f, regs_available_for_popping);
26535 /* Process the registers we popped. */
26536 if (reg_containing_return_addr == -1)
26538 /* The return address was popped into the lowest numbered register. */
26539 regs_to_pop &= ~(1 << LR_REGNUM);
26541 reg_containing_return_addr =
26542 number_of_first_bit_set (regs_available_for_popping);
26544 /* Remove this register for the mask of available registers, so that
26545 the return address will not be corrupted by further pops. */
26546 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26549 /* If we popped other registers then handle them here. */
26550 if (regs_available_for_popping)
26552 int frame_pointer;
26554 /* Work out which register currently contains the frame pointer. */
26555 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26557 /* Move it into the correct place. */
26558 asm_fprintf (f, "\tmov\t%r, %r\n",
26559 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26561 /* (Temporarily) remove it from the mask of popped registers. */
26562 regs_available_for_popping &= ~(1 << frame_pointer);
26563 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26565 if (regs_available_for_popping)
26567 int stack_pointer;
26569 /* We popped the stack pointer as well,
26570 find the register that contains it. */
26571 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26573 /* Move it into the stack register. */
26574 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26576 /* At this point we have popped all necessary registers, so
26577 do not worry about restoring regs_available_for_popping
26578 to its correct value:
26580 assert (pops_needed == 0)
26581 assert (regs_available_for_popping == (1 << frame_pointer))
26582 assert (regs_to_pop == (1 << STACK_POINTER)) */
26584 else
26586 /* Since we have just move the popped value into the frame
26587 pointer, the popping register is available for reuse, and
26588 we know that we still have the stack pointer left to pop. */
26589 regs_available_for_popping |= (1 << frame_pointer);
26593 /* If we still have registers left on the stack, but we no longer have
26594 any registers into which we can pop them, then we must move the return
26595 address into the link register and make available the register that
26596 contained it. */
26597 if (regs_available_for_popping == 0 && pops_needed > 0)
26599 regs_available_for_popping |= 1 << reg_containing_return_addr;
26601 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26602 reg_containing_return_addr);
26604 reg_containing_return_addr = LR_REGNUM;
26607 /* If we have registers left on the stack then pop some more.
26608 We know that at most we will want to pop FP and SP. */
26609 if (pops_needed > 0)
26611 int popped_into;
26612 int move_to;
26614 thumb_pop (f, regs_available_for_popping);
26616 /* We have popped either FP or SP.
26617 Move whichever one it is into the correct register. */
26618 popped_into = number_of_first_bit_set (regs_available_for_popping);
26619 move_to = number_of_first_bit_set (regs_to_pop);
26621 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26623 regs_to_pop &= ~(1 << move_to);
26625 --pops_needed;
26628 /* If we still have not popped everything then we must have only
26629 had one register available to us and we are now popping the SP. */
26630 if (pops_needed > 0)
26632 int popped_into;
26634 thumb_pop (f, regs_available_for_popping);
26636 popped_into = number_of_first_bit_set (regs_available_for_popping);
26638 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26640 assert (regs_to_pop == (1 << STACK_POINTER))
26641 assert (pops_needed == 1)
26645 /* If necessary restore the a4 register. */
26646 if (restore_a4)
26648 if (reg_containing_return_addr != LR_REGNUM)
26650 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26651 reg_containing_return_addr = LR_REGNUM;
26654 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26657 if (crtl->calls_eh_return)
26658 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26660 /* Return to caller. */
26661 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26664 /* Scan INSN just before assembler is output for it.
26665 For Thumb-1, we track the status of the condition codes; this
26666 information is used in the cbranchsi4_insn pattern. */
26667 void
26668 thumb1_final_prescan_insn (rtx_insn *insn)
26670 if (flag_print_asm_name)
26671 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26672 INSN_ADDRESSES (INSN_UID (insn)));
26673 /* Don't overwrite the previous setter when we get to a cbranch. */
26674 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26676 enum attr_conds conds;
26678 if (cfun->machine->thumb1_cc_insn)
26680 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26681 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26682 CC_STATUS_INIT;
26684 conds = get_attr_conds (insn);
26685 if (conds == CONDS_SET)
26687 rtx set = single_set (insn);
26688 cfun->machine->thumb1_cc_insn = insn;
26689 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26690 cfun->machine->thumb1_cc_op1 = const0_rtx;
26691 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26692 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26694 rtx src1 = XEXP (SET_SRC (set), 1);
26695 if (src1 == const0_rtx)
26696 cfun->machine->thumb1_cc_mode = CCmode;
26698 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26700 /* Record the src register operand instead of dest because
26701 cprop_hardreg pass propagates src. */
26702 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26705 else if (conds != CONDS_NOCOND)
26706 cfun->machine->thumb1_cc_insn = NULL_RTX;
26709 /* Check if unexpected far jump is used. */
26710 if (cfun->machine->lr_save_eliminated
26711 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26712 internal_error("Unexpected thumb1 far jump");
26716 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26718 unsigned HOST_WIDE_INT mask = 0xff;
26719 int i;
26721 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26722 if (val == 0) /* XXX */
26723 return 0;
26725 for (i = 0; i < 25; i++)
26726 if ((val & (mask << i)) == val)
26727 return 1;
26729 return 0;
26732 /* Returns nonzero if the current function contains,
26733 or might contain a far jump. */
26734 static int
26735 thumb_far_jump_used_p (void)
26737 rtx_insn *insn;
26738 bool far_jump = false;
26739 unsigned int func_size = 0;
26741 /* This test is only important for leaf functions. */
26742 /* assert (!leaf_function_p ()); */
26744 /* If we have already decided that far jumps may be used,
26745 do not bother checking again, and always return true even if
26746 it turns out that they are not being used. Once we have made
26747 the decision that far jumps are present (and that hence the link
26748 register will be pushed onto the stack) we cannot go back on it. */
26749 if (cfun->machine->far_jump_used)
26750 return 1;
26752 /* If this function is not being called from the prologue/epilogue
26753 generation code then it must be being called from the
26754 INITIAL_ELIMINATION_OFFSET macro. */
26755 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26757 /* In this case we know that we are being asked about the elimination
26758 of the arg pointer register. If that register is not being used,
26759 then there are no arguments on the stack, and we do not have to
26760 worry that a far jump might force the prologue to push the link
26761 register, changing the stack offsets. In this case we can just
26762 return false, since the presence of far jumps in the function will
26763 not affect stack offsets.
26765 If the arg pointer is live (or if it was live, but has now been
26766 eliminated and so set to dead) then we do have to test to see if
26767 the function might contain a far jump. This test can lead to some
26768 false negatives, since before reload is completed, then length of
26769 branch instructions is not known, so gcc defaults to returning their
26770 longest length, which in turn sets the far jump attribute to true.
26772 A false negative will not result in bad code being generated, but it
26773 will result in a needless push and pop of the link register. We
26774 hope that this does not occur too often.
26776 If we need doubleword stack alignment this could affect the other
26777 elimination offsets so we can't risk getting it wrong. */
26778 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26779 cfun->machine->arg_pointer_live = 1;
26780 else if (!cfun->machine->arg_pointer_live)
26781 return 0;
26784 /* We should not change far_jump_used during or after reload, as there is
26785 no chance to change stack frame layout. */
26786 if (reload_in_progress || reload_completed)
26787 return 0;
26789 /* Check to see if the function contains a branch
26790 insn with the far jump attribute set. */
26791 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26793 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26795 far_jump = true;
26797 func_size += get_attr_length (insn);
26800 /* Attribute far_jump will always be true for thumb1 before
26801 shorten_branch pass. So checking far_jump attribute before
26802 shorten_branch isn't much useful.
26804 Following heuristic tries to estimate more accurately if a far jump
26805 may finally be used. The heuristic is very conservative as there is
26806 no chance to roll-back the decision of not to use far jump.
26808 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26809 2-byte insn is associated with a 4 byte constant pool. Using
26810 function size 2048/3 as the threshold is conservative enough. */
26811 if (far_jump)
26813 if ((func_size * 3) >= 2048)
26815 /* Record the fact that we have decided that
26816 the function does use far jumps. */
26817 cfun->machine->far_jump_used = 1;
26818 return 1;
26822 return 0;
26825 /* Return nonzero if FUNC must be entered in ARM mode. */
26827 is_called_in_ARM_mode (tree func)
26829 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26831 /* Ignore the problem about functions whose address is taken. */
26832 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26833 return TRUE;
26835 #ifdef ARM_PE
26836 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26837 #else
26838 return FALSE;
26839 #endif
26842 /* Given the stack offsets and register mask in OFFSETS, decide how
26843 many additional registers to push instead of subtracting a constant
26844 from SP. For epilogues the principle is the same except we use pop.
26845 FOR_PROLOGUE indicates which we're generating. */
26846 static int
26847 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26849 HOST_WIDE_INT amount;
26850 unsigned long live_regs_mask = offsets->saved_regs_mask;
26851 /* Extract a mask of the ones we can give to the Thumb's push/pop
26852 instruction. */
26853 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26854 /* Then count how many other high registers will need to be pushed. */
26855 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26856 int n_free, reg_base, size;
26858 if (!for_prologue && frame_pointer_needed)
26859 amount = offsets->locals_base - offsets->saved_regs;
26860 else
26861 amount = offsets->outgoing_args - offsets->saved_regs;
26863 /* If the stack frame size is 512 exactly, we can save one load
26864 instruction, which should make this a win even when optimizing
26865 for speed. */
26866 if (!optimize_size && amount != 512)
26867 return 0;
26869 /* Can't do this if there are high registers to push. */
26870 if (high_regs_pushed != 0)
26871 return 0;
26873 /* Shouldn't do it in the prologue if no registers would normally
26874 be pushed at all. In the epilogue, also allow it if we'll have
26875 a pop insn for the PC. */
26876 if (l_mask == 0
26877 && (for_prologue
26878 || TARGET_BACKTRACE
26879 || (live_regs_mask & 1 << LR_REGNUM) == 0
26880 || TARGET_INTERWORK
26881 || crtl->args.pretend_args_size != 0))
26882 return 0;
26884 /* Don't do this if thumb_expand_prologue wants to emit instructions
26885 between the push and the stack frame allocation. */
26886 if (for_prologue
26887 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26888 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26889 return 0;
26891 reg_base = 0;
26892 n_free = 0;
26893 if (!for_prologue)
26895 size = arm_size_return_regs ();
26896 reg_base = ARM_NUM_INTS (size);
26897 live_regs_mask >>= reg_base;
26900 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26901 && (for_prologue || call_used_regs[reg_base + n_free]))
26903 live_regs_mask >>= 1;
26904 n_free++;
26907 if (n_free == 0)
26908 return 0;
26909 gcc_assert (amount / 4 * 4 == amount);
26911 if (amount >= 512 && (amount - n_free * 4) < 512)
26912 return (amount - 508) / 4;
26913 if (amount <= n_free * 4)
26914 return amount / 4;
26915 return 0;
26918 /* The bits which aren't usefully expanded as rtl. */
26919 const char *
26920 thumb1_unexpanded_epilogue (void)
26922 arm_stack_offsets *offsets;
26923 int regno;
26924 unsigned long live_regs_mask = 0;
26925 int high_regs_pushed = 0;
26926 int extra_pop;
26927 int had_to_push_lr;
26928 int size;
26930 if (cfun->machine->return_used_this_function != 0)
26931 return "";
26933 if (IS_NAKED (arm_current_func_type ()))
26934 return "";
26936 offsets = arm_get_frame_offsets ();
26937 live_regs_mask = offsets->saved_regs_mask;
26938 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26940 /* If we can deduce the registers used from the function's return value.
26941 This is more reliable that examining df_regs_ever_live_p () because that
26942 will be set if the register is ever used in the function, not just if
26943 the register is used to hold a return value. */
26944 size = arm_size_return_regs ();
26946 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26947 if (extra_pop > 0)
26949 unsigned long extra_mask = (1 << extra_pop) - 1;
26950 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26953 /* The prolog may have pushed some high registers to use as
26954 work registers. e.g. the testsuite file:
26955 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26956 compiles to produce:
26957 push {r4, r5, r6, r7, lr}
26958 mov r7, r9
26959 mov r6, r8
26960 push {r6, r7}
26961 as part of the prolog. We have to undo that pushing here. */
26963 if (high_regs_pushed)
26965 unsigned long mask = live_regs_mask & 0xff;
26966 int next_hi_reg;
26968 /* The available low registers depend on the size of the value we are
26969 returning. */
26970 if (size <= 12)
26971 mask |= 1 << 3;
26972 if (size <= 8)
26973 mask |= 1 << 2;
26975 if (mask == 0)
26976 /* Oh dear! We have no low registers into which we can pop
26977 high registers! */
26978 internal_error
26979 ("no low registers available for popping high registers");
26981 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26982 if (live_regs_mask & (1 << next_hi_reg))
26983 break;
26985 while (high_regs_pushed)
26987 /* Find lo register(s) into which the high register(s) can
26988 be popped. */
26989 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26991 if (mask & (1 << regno))
26992 high_regs_pushed--;
26993 if (high_regs_pushed == 0)
26994 break;
26997 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26999 /* Pop the values into the low register(s). */
27000 thumb_pop (asm_out_file, mask);
27002 /* Move the value(s) into the high registers. */
27003 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
27005 if (mask & (1 << regno))
27007 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
27008 regno);
27010 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
27011 if (live_regs_mask & (1 << next_hi_reg))
27012 break;
27016 live_regs_mask &= ~0x0f00;
27019 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
27020 live_regs_mask &= 0xff;
27022 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
27024 /* Pop the return address into the PC. */
27025 if (had_to_push_lr)
27026 live_regs_mask |= 1 << PC_REGNUM;
27028 /* Either no argument registers were pushed or a backtrace
27029 structure was created which includes an adjusted stack
27030 pointer, so just pop everything. */
27031 if (live_regs_mask)
27032 thumb_pop (asm_out_file, live_regs_mask);
27034 /* We have either just popped the return address into the
27035 PC or it is was kept in LR for the entire function.
27036 Note that thumb_pop has already called thumb_exit if the
27037 PC was in the list. */
27038 if (!had_to_push_lr)
27039 thumb_exit (asm_out_file, LR_REGNUM);
27041 else
27043 /* Pop everything but the return address. */
27044 if (live_regs_mask)
27045 thumb_pop (asm_out_file, live_regs_mask);
27047 if (had_to_push_lr)
27049 if (size > 12)
27051 /* We have no free low regs, so save one. */
27052 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27053 LAST_ARG_REGNUM);
27056 /* Get the return address into a temporary register. */
27057 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27059 if (size > 12)
27061 /* Move the return address to lr. */
27062 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27063 LAST_ARG_REGNUM);
27064 /* Restore the low register. */
27065 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27066 IP_REGNUM);
27067 regno = LR_REGNUM;
27069 else
27070 regno = LAST_ARG_REGNUM;
27072 else
27073 regno = LR_REGNUM;
27075 /* Remove the argument registers that were pushed onto the stack. */
27076 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27077 SP_REGNUM, SP_REGNUM,
27078 crtl->args.pretend_args_size);
27080 thumb_exit (asm_out_file, regno);
27083 return "";
27086 /* Functions to save and restore machine-specific function data. */
27087 static struct machine_function *
27088 arm_init_machine_status (void)
27090 struct machine_function *machine;
27091 machine = ggc_cleared_alloc<machine_function> ();
27093 #if ARM_FT_UNKNOWN != 0
27094 machine->func_type = ARM_FT_UNKNOWN;
27095 #endif
27096 return machine;
27099 /* Return an RTX indicating where the return address to the
27100 calling function can be found. */
27102 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27104 if (count != 0)
27105 return NULL_RTX;
27107 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27110 /* Do anything needed before RTL is emitted for each function. */
27111 void
27112 arm_init_expanders (void)
27114 /* Arrange to initialize and mark the machine per-function status. */
27115 init_machine_status = arm_init_machine_status;
27117 /* This is to stop the combine pass optimizing away the alignment
27118 adjustment of va_arg. */
27119 /* ??? It is claimed that this should not be necessary. */
27120 if (cfun)
27121 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27125 /* Like arm_compute_initial_elimination offset. Simpler because there
27126 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27127 to point at the base of the local variables after static stack
27128 space for a function has been allocated. */
27130 HOST_WIDE_INT
27131 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27133 arm_stack_offsets *offsets;
27135 offsets = arm_get_frame_offsets ();
27137 switch (from)
27139 case ARG_POINTER_REGNUM:
27140 switch (to)
27142 case STACK_POINTER_REGNUM:
27143 return offsets->outgoing_args - offsets->saved_args;
27145 case FRAME_POINTER_REGNUM:
27146 return offsets->soft_frame - offsets->saved_args;
27148 case ARM_HARD_FRAME_POINTER_REGNUM:
27149 return offsets->saved_regs - offsets->saved_args;
27151 case THUMB_HARD_FRAME_POINTER_REGNUM:
27152 return offsets->locals_base - offsets->saved_args;
27154 default:
27155 gcc_unreachable ();
27157 break;
27159 case FRAME_POINTER_REGNUM:
27160 switch (to)
27162 case STACK_POINTER_REGNUM:
27163 return offsets->outgoing_args - offsets->soft_frame;
27165 case ARM_HARD_FRAME_POINTER_REGNUM:
27166 return offsets->saved_regs - offsets->soft_frame;
27168 case THUMB_HARD_FRAME_POINTER_REGNUM:
27169 return offsets->locals_base - offsets->soft_frame;
27171 default:
27172 gcc_unreachable ();
27174 break;
27176 default:
27177 gcc_unreachable ();
27181 /* Generate the function's prologue. */
27183 void
27184 thumb1_expand_prologue (void)
27186 rtx_insn *insn;
27188 HOST_WIDE_INT amount;
27189 arm_stack_offsets *offsets;
27190 unsigned long func_type;
27191 int regno;
27192 unsigned long live_regs_mask;
27193 unsigned long l_mask;
27194 unsigned high_regs_pushed = 0;
27196 func_type = arm_current_func_type ();
27198 /* Naked functions don't have prologues. */
27199 if (IS_NAKED (func_type))
27200 return;
27202 if (IS_INTERRUPT (func_type))
27204 error ("interrupt Service Routines cannot be coded in Thumb mode");
27205 return;
27208 if (is_called_in_ARM_mode (current_function_decl))
27209 emit_insn (gen_prologue_thumb1_interwork ());
27211 offsets = arm_get_frame_offsets ();
27212 live_regs_mask = offsets->saved_regs_mask;
27214 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27215 l_mask = live_regs_mask & 0x40ff;
27216 /* Then count how many other high registers will need to be pushed. */
27217 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27219 if (crtl->args.pretend_args_size)
27221 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27223 if (cfun->machine->uses_anonymous_args)
27225 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27226 unsigned long mask;
27228 mask = 1ul << (LAST_ARG_REGNUM + 1);
27229 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27231 insn = thumb1_emit_multi_reg_push (mask, 0);
27233 else
27235 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27236 stack_pointer_rtx, x));
27238 RTX_FRAME_RELATED_P (insn) = 1;
27241 if (TARGET_BACKTRACE)
27243 HOST_WIDE_INT offset = 0;
27244 unsigned work_register;
27245 rtx work_reg, x, arm_hfp_rtx;
27247 /* We have been asked to create a stack backtrace structure.
27248 The code looks like this:
27250 0 .align 2
27251 0 func:
27252 0 sub SP, #16 Reserve space for 4 registers.
27253 2 push {R7} Push low registers.
27254 4 add R7, SP, #20 Get the stack pointer before the push.
27255 6 str R7, [SP, #8] Store the stack pointer
27256 (before reserving the space).
27257 8 mov R7, PC Get hold of the start of this code + 12.
27258 10 str R7, [SP, #16] Store it.
27259 12 mov R7, FP Get hold of the current frame pointer.
27260 14 str R7, [SP, #4] Store it.
27261 16 mov R7, LR Get hold of the current return address.
27262 18 str R7, [SP, #12] Store it.
27263 20 add R7, SP, #16 Point at the start of the
27264 backtrace structure.
27265 22 mov FP, R7 Put this value into the frame pointer. */
27267 work_register = thumb_find_work_register (live_regs_mask);
27268 work_reg = gen_rtx_REG (SImode, work_register);
27269 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27271 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27272 stack_pointer_rtx, GEN_INT (-16)));
27273 RTX_FRAME_RELATED_P (insn) = 1;
27275 if (l_mask)
27277 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27278 RTX_FRAME_RELATED_P (insn) = 1;
27280 offset = bit_count (l_mask) * UNITS_PER_WORD;
27283 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27284 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27286 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27287 x = gen_frame_mem (SImode, x);
27288 emit_move_insn (x, work_reg);
27290 /* Make sure that the instruction fetching the PC is in the right place
27291 to calculate "start of backtrace creation code + 12". */
27292 /* ??? The stores using the common WORK_REG ought to be enough to
27293 prevent the scheduler from doing anything weird. Failing that
27294 we could always move all of the following into an UNSPEC_VOLATILE. */
27295 if (l_mask)
27297 x = gen_rtx_REG (SImode, PC_REGNUM);
27298 emit_move_insn (work_reg, x);
27300 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27301 x = gen_frame_mem (SImode, x);
27302 emit_move_insn (x, work_reg);
27304 emit_move_insn (work_reg, arm_hfp_rtx);
27306 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27307 x = gen_frame_mem (SImode, x);
27308 emit_move_insn (x, work_reg);
27310 else
27312 emit_move_insn (work_reg, arm_hfp_rtx);
27314 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27315 x = gen_frame_mem (SImode, x);
27316 emit_move_insn (x, work_reg);
27318 x = gen_rtx_REG (SImode, PC_REGNUM);
27319 emit_move_insn (work_reg, x);
27321 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27322 x = gen_frame_mem (SImode, x);
27323 emit_move_insn (x, work_reg);
27326 x = gen_rtx_REG (SImode, LR_REGNUM);
27327 emit_move_insn (work_reg, x);
27329 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27330 x = gen_frame_mem (SImode, x);
27331 emit_move_insn (x, work_reg);
27333 x = GEN_INT (offset + 12);
27334 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27336 emit_move_insn (arm_hfp_rtx, work_reg);
27338 /* Optimization: If we are not pushing any low registers but we are going
27339 to push some high registers then delay our first push. This will just
27340 be a push of LR and we can combine it with the push of the first high
27341 register. */
27342 else if ((l_mask & 0xff) != 0
27343 || (high_regs_pushed == 0 && l_mask))
27345 unsigned long mask = l_mask;
27346 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27347 insn = thumb1_emit_multi_reg_push (mask, mask);
27348 RTX_FRAME_RELATED_P (insn) = 1;
27351 if (high_regs_pushed)
27353 unsigned pushable_regs;
27354 unsigned next_hi_reg;
27355 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27356 : crtl->args.info.nregs;
27357 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27359 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27360 if (live_regs_mask & (1 << next_hi_reg))
27361 break;
27363 /* Here we need to mask out registers used for passing arguments
27364 even if they can be pushed. This is to avoid using them to stash the high
27365 registers. Such kind of stash may clobber the use of arguments. */
27366 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27368 if (pushable_regs == 0)
27369 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27371 while (high_regs_pushed > 0)
27373 unsigned long real_regs_mask = 0;
27375 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27377 if (pushable_regs & (1 << regno))
27379 emit_move_insn (gen_rtx_REG (SImode, regno),
27380 gen_rtx_REG (SImode, next_hi_reg));
27382 high_regs_pushed --;
27383 real_regs_mask |= (1 << next_hi_reg);
27385 if (high_regs_pushed)
27387 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27388 next_hi_reg --)
27389 if (live_regs_mask & (1 << next_hi_reg))
27390 break;
27392 else
27394 pushable_regs &= ~((1 << regno) - 1);
27395 break;
27400 /* If we had to find a work register and we have not yet
27401 saved the LR then add it to the list of regs to push. */
27402 if (l_mask == (1 << LR_REGNUM))
27404 pushable_regs |= l_mask;
27405 real_regs_mask |= l_mask;
27406 l_mask = 0;
27409 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27410 RTX_FRAME_RELATED_P (insn) = 1;
27414 /* Load the pic register before setting the frame pointer,
27415 so we can use r7 as a temporary work register. */
27416 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27417 arm_load_pic_register (live_regs_mask);
27419 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27420 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27421 stack_pointer_rtx);
27423 if (flag_stack_usage_info)
27424 current_function_static_stack_size
27425 = offsets->outgoing_args - offsets->saved_args;
27427 amount = offsets->outgoing_args - offsets->saved_regs;
27428 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27429 if (amount)
27431 if (amount < 512)
27433 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27434 GEN_INT (- amount)));
27435 RTX_FRAME_RELATED_P (insn) = 1;
27437 else
27439 rtx reg, dwarf;
27441 /* The stack decrement is too big for an immediate value in a single
27442 insn. In theory we could issue multiple subtracts, but after
27443 three of them it becomes more space efficient to place the full
27444 value in the constant pool and load into a register. (Also the
27445 ARM debugger really likes to see only one stack decrement per
27446 function). So instead we look for a scratch register into which
27447 we can load the decrement, and then we subtract this from the
27448 stack pointer. Unfortunately on the thumb the only available
27449 scratch registers are the argument registers, and we cannot use
27450 these as they may hold arguments to the function. Instead we
27451 attempt to locate a call preserved register which is used by this
27452 function. If we can find one, then we know that it will have
27453 been pushed at the start of the prologue and so we can corrupt
27454 it now. */
27455 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27456 if (live_regs_mask & (1 << regno))
27457 break;
27459 gcc_assert(regno <= LAST_LO_REGNUM);
27461 reg = gen_rtx_REG (SImode, regno);
27463 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27465 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27466 stack_pointer_rtx, reg));
27468 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27469 plus_constant (Pmode, stack_pointer_rtx,
27470 -amount));
27471 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27472 RTX_FRAME_RELATED_P (insn) = 1;
27476 if (frame_pointer_needed)
27477 thumb_set_frame_pointer (offsets);
27479 /* If we are profiling, make sure no instructions are scheduled before
27480 the call to mcount. Similarly if the user has requested no
27481 scheduling in the prolog. Similarly if we want non-call exceptions
27482 using the EABI unwinder, to prevent faulting instructions from being
27483 swapped with a stack adjustment. */
27484 if (crtl->profile || !TARGET_SCHED_PROLOG
27485 || (arm_except_unwind_info (&global_options) == UI_TARGET
27486 && cfun->can_throw_non_call_exceptions))
27487 emit_insn (gen_blockage ());
27489 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27490 if (live_regs_mask & 0xff)
27491 cfun->machine->lr_save_eliminated = 0;
27494 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27495 POP instruction can be generated. LR should be replaced by PC. All
27496 the checks required are already done by USE_RETURN_INSN (). Hence,
27497 all we really need to check here is if single register is to be
27498 returned, or multiple register return. */
27499 void
27500 thumb2_expand_return (bool simple_return)
27502 int i, num_regs;
27503 unsigned long saved_regs_mask;
27504 arm_stack_offsets *offsets;
27506 offsets = arm_get_frame_offsets ();
27507 saved_regs_mask = offsets->saved_regs_mask;
27509 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27510 if (saved_regs_mask & (1 << i))
27511 num_regs++;
27513 if (!simple_return && saved_regs_mask)
27515 if (num_regs == 1)
27517 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27518 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27519 rtx addr = gen_rtx_MEM (SImode,
27520 gen_rtx_POST_INC (SImode,
27521 stack_pointer_rtx));
27522 set_mem_alias_set (addr, get_frame_alias_set ());
27523 XVECEXP (par, 0, 0) = ret_rtx;
27524 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27525 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27526 emit_jump_insn (par);
27528 else
27530 saved_regs_mask &= ~ (1 << LR_REGNUM);
27531 saved_regs_mask |= (1 << PC_REGNUM);
27532 arm_emit_multi_reg_pop (saved_regs_mask);
27535 else
27537 emit_jump_insn (simple_return_rtx);
27541 void
27542 thumb1_expand_epilogue (void)
27544 HOST_WIDE_INT amount;
27545 arm_stack_offsets *offsets;
27546 int regno;
27548 /* Naked functions don't have prologues. */
27549 if (IS_NAKED (arm_current_func_type ()))
27550 return;
27552 offsets = arm_get_frame_offsets ();
27553 amount = offsets->outgoing_args - offsets->saved_regs;
27555 if (frame_pointer_needed)
27557 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27558 amount = offsets->locals_base - offsets->saved_regs;
27560 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27562 gcc_assert (amount >= 0);
27563 if (amount)
27565 emit_insn (gen_blockage ());
27567 if (amount < 512)
27568 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27569 GEN_INT (amount)));
27570 else
27572 /* r3 is always free in the epilogue. */
27573 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27575 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27576 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27580 /* Emit a USE (stack_pointer_rtx), so that
27581 the stack adjustment will not be deleted. */
27582 emit_insn (gen_force_register_use (stack_pointer_rtx));
27584 if (crtl->profile || !TARGET_SCHED_PROLOG)
27585 emit_insn (gen_blockage ());
27587 /* Emit a clobber for each insn that will be restored in the epilogue,
27588 so that flow2 will get register lifetimes correct. */
27589 for (regno = 0; regno < 13; regno++)
27590 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27591 emit_clobber (gen_rtx_REG (SImode, regno));
27593 if (! df_regs_ever_live_p (LR_REGNUM))
27594 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27597 /* Epilogue code for APCS frame. */
27598 static void
27599 arm_expand_epilogue_apcs_frame (bool really_return)
27601 unsigned long func_type;
27602 unsigned long saved_regs_mask;
27603 int num_regs = 0;
27604 int i;
27605 int floats_from_frame = 0;
27606 arm_stack_offsets *offsets;
27608 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27609 func_type = arm_current_func_type ();
27611 /* Get frame offsets for ARM. */
27612 offsets = arm_get_frame_offsets ();
27613 saved_regs_mask = offsets->saved_regs_mask;
27615 /* Find the offset of the floating-point save area in the frame. */
27616 floats_from_frame
27617 = (offsets->saved_args
27618 + arm_compute_static_chain_stack_bytes ()
27619 - offsets->frame);
27621 /* Compute how many core registers saved and how far away the floats are. */
27622 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27623 if (saved_regs_mask & (1 << i))
27625 num_regs++;
27626 floats_from_frame += 4;
27629 if (TARGET_HARD_FLOAT && TARGET_VFP)
27631 int start_reg;
27632 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27634 /* The offset is from IP_REGNUM. */
27635 int saved_size = arm_get_vfp_saved_size ();
27636 if (saved_size > 0)
27638 rtx_insn *insn;
27639 floats_from_frame += saved_size;
27640 insn = emit_insn (gen_addsi3 (ip_rtx,
27641 hard_frame_pointer_rtx,
27642 GEN_INT (-floats_from_frame)));
27643 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27644 ip_rtx, hard_frame_pointer_rtx);
27647 /* Generate VFP register multi-pop. */
27648 start_reg = FIRST_VFP_REGNUM;
27650 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27651 /* Look for a case where a reg does not need restoring. */
27652 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27653 && (!df_regs_ever_live_p (i + 1)
27654 || call_used_regs[i + 1]))
27656 if (start_reg != i)
27657 arm_emit_vfp_multi_reg_pop (start_reg,
27658 (i - start_reg) / 2,
27659 gen_rtx_REG (SImode,
27660 IP_REGNUM));
27661 start_reg = i + 2;
27664 /* Restore the remaining regs that we have discovered (or possibly
27665 even all of them, if the conditional in the for loop never
27666 fired). */
27667 if (start_reg != i)
27668 arm_emit_vfp_multi_reg_pop (start_reg,
27669 (i - start_reg) / 2,
27670 gen_rtx_REG (SImode, IP_REGNUM));
27673 if (TARGET_IWMMXT)
27675 /* The frame pointer is guaranteed to be non-double-word aligned, as
27676 it is set to double-word-aligned old_stack_pointer - 4. */
27677 rtx_insn *insn;
27678 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27680 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27681 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27683 rtx addr = gen_frame_mem (V2SImode,
27684 plus_constant (Pmode, hard_frame_pointer_rtx,
27685 - lrm_count * 4));
27686 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27687 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27688 gen_rtx_REG (V2SImode, i),
27689 NULL_RTX);
27690 lrm_count += 2;
27694 /* saved_regs_mask should contain IP which contains old stack pointer
27695 at the time of activation creation. Since SP and IP are adjacent registers,
27696 we can restore the value directly into SP. */
27697 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27698 saved_regs_mask &= ~(1 << IP_REGNUM);
27699 saved_regs_mask |= (1 << SP_REGNUM);
27701 /* There are two registers left in saved_regs_mask - LR and PC. We
27702 only need to restore LR (the return address), but to
27703 save time we can load it directly into PC, unless we need a
27704 special function exit sequence, or we are not really returning. */
27705 if (really_return
27706 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27707 && !crtl->calls_eh_return)
27708 /* Delete LR from the register mask, so that LR on
27709 the stack is loaded into the PC in the register mask. */
27710 saved_regs_mask &= ~(1 << LR_REGNUM);
27711 else
27712 saved_regs_mask &= ~(1 << PC_REGNUM);
27714 num_regs = bit_count (saved_regs_mask);
27715 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27717 rtx_insn *insn;
27718 emit_insn (gen_blockage ());
27719 /* Unwind the stack to just below the saved registers. */
27720 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27721 hard_frame_pointer_rtx,
27722 GEN_INT (- 4 * num_regs)));
27724 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27725 stack_pointer_rtx, hard_frame_pointer_rtx);
27728 arm_emit_multi_reg_pop (saved_regs_mask);
27730 if (IS_INTERRUPT (func_type))
27732 /* Interrupt handlers will have pushed the
27733 IP onto the stack, so restore it now. */
27734 rtx_insn *insn;
27735 rtx addr = gen_rtx_MEM (SImode,
27736 gen_rtx_POST_INC (SImode,
27737 stack_pointer_rtx));
27738 set_mem_alias_set (addr, get_frame_alias_set ());
27739 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27740 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27741 gen_rtx_REG (SImode, IP_REGNUM),
27742 NULL_RTX);
27745 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27746 return;
27748 if (crtl->calls_eh_return)
27749 emit_insn (gen_addsi3 (stack_pointer_rtx,
27750 stack_pointer_rtx,
27751 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27753 if (IS_STACKALIGN (func_type))
27754 /* Restore the original stack pointer. Before prologue, the stack was
27755 realigned and the original stack pointer saved in r0. For details,
27756 see comment in arm_expand_prologue. */
27757 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27759 emit_jump_insn (simple_return_rtx);
27762 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27763 function is not a sibcall. */
27764 void
27765 arm_expand_epilogue (bool really_return)
27767 unsigned long func_type;
27768 unsigned long saved_regs_mask;
27769 int num_regs = 0;
27770 int i;
27771 int amount;
27772 arm_stack_offsets *offsets;
27774 func_type = arm_current_func_type ();
27776 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27777 let output_return_instruction take care of instruction emission if any. */
27778 if (IS_NAKED (func_type)
27779 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27781 if (really_return)
27782 emit_jump_insn (simple_return_rtx);
27783 return;
27786 /* If we are throwing an exception, then we really must be doing a
27787 return, so we can't tail-call. */
27788 gcc_assert (!crtl->calls_eh_return || really_return);
27790 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27792 arm_expand_epilogue_apcs_frame (really_return);
27793 return;
27796 /* Get frame offsets for ARM. */
27797 offsets = arm_get_frame_offsets ();
27798 saved_regs_mask = offsets->saved_regs_mask;
27799 num_regs = bit_count (saved_regs_mask);
27801 if (frame_pointer_needed)
27803 rtx_insn *insn;
27804 /* Restore stack pointer if necessary. */
27805 if (TARGET_ARM)
27807 /* In ARM mode, frame pointer points to first saved register.
27808 Restore stack pointer to last saved register. */
27809 amount = offsets->frame - offsets->saved_regs;
27811 /* Force out any pending memory operations that reference stacked data
27812 before stack de-allocation occurs. */
27813 emit_insn (gen_blockage ());
27814 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27815 hard_frame_pointer_rtx,
27816 GEN_INT (amount)));
27817 arm_add_cfa_adjust_cfa_note (insn, amount,
27818 stack_pointer_rtx,
27819 hard_frame_pointer_rtx);
27821 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27822 deleted. */
27823 emit_insn (gen_force_register_use (stack_pointer_rtx));
27825 else
27827 /* In Thumb-2 mode, the frame pointer points to the last saved
27828 register. */
27829 amount = offsets->locals_base - offsets->saved_regs;
27830 if (amount)
27832 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27833 hard_frame_pointer_rtx,
27834 GEN_INT (amount)));
27835 arm_add_cfa_adjust_cfa_note (insn, amount,
27836 hard_frame_pointer_rtx,
27837 hard_frame_pointer_rtx);
27840 /* Force out any pending memory operations that reference stacked data
27841 before stack de-allocation occurs. */
27842 emit_insn (gen_blockage ());
27843 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27844 hard_frame_pointer_rtx));
27845 arm_add_cfa_adjust_cfa_note (insn, 0,
27846 stack_pointer_rtx,
27847 hard_frame_pointer_rtx);
27848 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27849 deleted. */
27850 emit_insn (gen_force_register_use (stack_pointer_rtx));
27853 else
27855 /* Pop off outgoing args and local frame to adjust stack pointer to
27856 last saved register. */
27857 amount = offsets->outgoing_args - offsets->saved_regs;
27858 if (amount)
27860 rtx_insn *tmp;
27861 /* Force out any pending memory operations that reference stacked data
27862 before stack de-allocation occurs. */
27863 emit_insn (gen_blockage ());
27864 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27865 stack_pointer_rtx,
27866 GEN_INT (amount)));
27867 arm_add_cfa_adjust_cfa_note (tmp, amount,
27868 stack_pointer_rtx, stack_pointer_rtx);
27869 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27870 not deleted. */
27871 emit_insn (gen_force_register_use (stack_pointer_rtx));
27875 if (TARGET_HARD_FLOAT && TARGET_VFP)
27877 /* Generate VFP register multi-pop. */
27878 int end_reg = LAST_VFP_REGNUM + 1;
27880 /* Scan the registers in reverse order. We need to match
27881 any groupings made in the prologue and generate matching
27882 vldm operations. The need to match groups is because,
27883 unlike pop, vldm can only do consecutive regs. */
27884 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27885 /* Look for a case where a reg does not need restoring. */
27886 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27887 && (!df_regs_ever_live_p (i + 1)
27888 || call_used_regs[i + 1]))
27890 /* Restore the regs discovered so far (from reg+2 to
27891 end_reg). */
27892 if (end_reg > i + 2)
27893 arm_emit_vfp_multi_reg_pop (i + 2,
27894 (end_reg - (i + 2)) / 2,
27895 stack_pointer_rtx);
27896 end_reg = i;
27899 /* Restore the remaining regs that we have discovered (or possibly
27900 even all of them, if the conditional in the for loop never
27901 fired). */
27902 if (end_reg > i + 2)
27903 arm_emit_vfp_multi_reg_pop (i + 2,
27904 (end_reg - (i + 2)) / 2,
27905 stack_pointer_rtx);
27908 if (TARGET_IWMMXT)
27909 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27910 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27912 rtx_insn *insn;
27913 rtx addr = gen_rtx_MEM (V2SImode,
27914 gen_rtx_POST_INC (SImode,
27915 stack_pointer_rtx));
27916 set_mem_alias_set (addr, get_frame_alias_set ());
27917 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27918 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27919 gen_rtx_REG (V2SImode, i),
27920 NULL_RTX);
27921 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27922 stack_pointer_rtx, stack_pointer_rtx);
27925 if (saved_regs_mask)
27927 rtx insn;
27928 bool return_in_pc = false;
27930 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27931 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27932 && !IS_STACKALIGN (func_type)
27933 && really_return
27934 && crtl->args.pretend_args_size == 0
27935 && saved_regs_mask & (1 << LR_REGNUM)
27936 && !crtl->calls_eh_return)
27938 saved_regs_mask &= ~(1 << LR_REGNUM);
27939 saved_regs_mask |= (1 << PC_REGNUM);
27940 return_in_pc = true;
27943 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27945 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27946 if (saved_regs_mask & (1 << i))
27948 rtx addr = gen_rtx_MEM (SImode,
27949 gen_rtx_POST_INC (SImode,
27950 stack_pointer_rtx));
27951 set_mem_alias_set (addr, get_frame_alias_set ());
27953 if (i == PC_REGNUM)
27955 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27956 XVECEXP (insn, 0, 0) = ret_rtx;
27957 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27958 gen_rtx_REG (SImode, i),
27959 addr);
27960 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27961 insn = emit_jump_insn (insn);
27963 else
27965 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27966 addr));
27967 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27968 gen_rtx_REG (SImode, i),
27969 NULL_RTX);
27970 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27971 stack_pointer_rtx,
27972 stack_pointer_rtx);
27976 else
27978 if (TARGET_LDRD
27979 && current_tune->prefer_ldrd_strd
27980 && !optimize_function_for_size_p (cfun))
27982 if (TARGET_THUMB2)
27983 thumb2_emit_ldrd_pop (saved_regs_mask);
27984 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27985 arm_emit_ldrd_pop (saved_regs_mask);
27986 else
27987 arm_emit_multi_reg_pop (saved_regs_mask);
27989 else
27990 arm_emit_multi_reg_pop (saved_regs_mask);
27993 if (return_in_pc == true)
27994 return;
27997 if (crtl->args.pretend_args_size)
27999 int i, j;
28000 rtx dwarf = NULL_RTX;
28001 rtx_insn *tmp =
28002 emit_insn (gen_addsi3 (stack_pointer_rtx,
28003 stack_pointer_rtx,
28004 GEN_INT (crtl->args.pretend_args_size)));
28006 RTX_FRAME_RELATED_P (tmp) = 1;
28008 if (cfun->machine->uses_anonymous_args)
28010 /* Restore pretend args. Refer arm_expand_prologue on how to save
28011 pretend_args in stack. */
28012 int num_regs = crtl->args.pretend_args_size / 4;
28013 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28014 for (j = 0, i = 0; j < num_regs; i++)
28015 if (saved_regs_mask & (1 << i))
28017 rtx reg = gen_rtx_REG (SImode, i);
28018 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28019 j++;
28021 REG_NOTES (tmp) = dwarf;
28023 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
28024 stack_pointer_rtx, stack_pointer_rtx);
28027 if (!really_return)
28028 return;
28030 if (crtl->calls_eh_return)
28031 emit_insn (gen_addsi3 (stack_pointer_rtx,
28032 stack_pointer_rtx,
28033 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28035 if (IS_STACKALIGN (func_type))
28036 /* Restore the original stack pointer. Before prologue, the stack was
28037 realigned and the original stack pointer saved in r0. For details,
28038 see comment in arm_expand_prologue. */
28039 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
28041 emit_jump_insn (simple_return_rtx);
28044 /* Implementation of insn prologue_thumb1_interwork. This is the first
28045 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28047 const char *
28048 thumb1_output_interwork (void)
28050 const char * name;
28051 FILE *f = asm_out_file;
28053 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28054 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28055 == SYMBOL_REF);
28056 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28058 /* Generate code sequence to switch us into Thumb mode. */
28059 /* The .code 32 directive has already been emitted by
28060 ASM_DECLARE_FUNCTION_NAME. */
28061 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28062 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28064 /* Generate a label, so that the debugger will notice the
28065 change in instruction sets. This label is also used by
28066 the assembler to bypass the ARM code when this function
28067 is called from a Thumb encoded function elsewhere in the
28068 same file. Hence the definition of STUB_NAME here must
28069 agree with the definition in gas/config/tc-arm.c. */
28071 #define STUB_NAME ".real_start_of"
28073 fprintf (f, "\t.code\t16\n");
28074 #ifdef ARM_PE
28075 if (arm_dllexport_name_p (name))
28076 name = arm_strip_name_encoding (name);
28077 #endif
28078 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28079 fprintf (f, "\t.thumb_func\n");
28080 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28082 return "";
28085 /* Handle the case of a double word load into a low register from
28086 a computed memory address. The computed address may involve a
28087 register which is overwritten by the load. */
28088 const char *
28089 thumb_load_double_from_address (rtx *operands)
28091 rtx addr;
28092 rtx base;
28093 rtx offset;
28094 rtx arg1;
28095 rtx arg2;
28097 gcc_assert (REG_P (operands[0]));
28098 gcc_assert (MEM_P (operands[1]));
28100 /* Get the memory address. */
28101 addr = XEXP (operands[1], 0);
28103 /* Work out how the memory address is computed. */
28104 switch (GET_CODE (addr))
28106 case REG:
28107 operands[2] = adjust_address (operands[1], SImode, 4);
28109 if (REGNO (operands[0]) == REGNO (addr))
28111 output_asm_insn ("ldr\t%H0, %2", operands);
28112 output_asm_insn ("ldr\t%0, %1", operands);
28114 else
28116 output_asm_insn ("ldr\t%0, %1", operands);
28117 output_asm_insn ("ldr\t%H0, %2", operands);
28119 break;
28121 case CONST:
28122 /* Compute <address> + 4 for the high order load. */
28123 operands[2] = adjust_address (operands[1], SImode, 4);
28125 output_asm_insn ("ldr\t%0, %1", operands);
28126 output_asm_insn ("ldr\t%H0, %2", operands);
28127 break;
28129 case PLUS:
28130 arg1 = XEXP (addr, 0);
28131 arg2 = XEXP (addr, 1);
28133 if (CONSTANT_P (arg1))
28134 base = arg2, offset = arg1;
28135 else
28136 base = arg1, offset = arg2;
28138 gcc_assert (REG_P (base));
28140 /* Catch the case of <address> = <reg> + <reg> */
28141 if (REG_P (offset))
28143 int reg_offset = REGNO (offset);
28144 int reg_base = REGNO (base);
28145 int reg_dest = REGNO (operands[0]);
28147 /* Add the base and offset registers together into the
28148 higher destination register. */
28149 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28150 reg_dest + 1, reg_base, reg_offset);
28152 /* Load the lower destination register from the address in
28153 the higher destination register. */
28154 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28155 reg_dest, reg_dest + 1);
28157 /* Load the higher destination register from its own address
28158 plus 4. */
28159 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28160 reg_dest + 1, reg_dest + 1);
28162 else
28164 /* Compute <address> + 4 for the high order load. */
28165 operands[2] = adjust_address (operands[1], SImode, 4);
28167 /* If the computed address is held in the low order register
28168 then load the high order register first, otherwise always
28169 load the low order register first. */
28170 if (REGNO (operands[0]) == REGNO (base))
28172 output_asm_insn ("ldr\t%H0, %2", operands);
28173 output_asm_insn ("ldr\t%0, %1", operands);
28175 else
28177 output_asm_insn ("ldr\t%0, %1", operands);
28178 output_asm_insn ("ldr\t%H0, %2", operands);
28181 break;
28183 case LABEL_REF:
28184 /* With no registers to worry about we can just load the value
28185 directly. */
28186 operands[2] = adjust_address (operands[1], SImode, 4);
28188 output_asm_insn ("ldr\t%H0, %2", operands);
28189 output_asm_insn ("ldr\t%0, %1", operands);
28190 break;
28192 default:
28193 gcc_unreachable ();
28196 return "";
28199 const char *
28200 thumb_output_move_mem_multiple (int n, rtx *operands)
28202 rtx tmp;
28204 switch (n)
28206 case 2:
28207 if (REGNO (operands[4]) > REGNO (operands[5]))
28209 tmp = operands[4];
28210 operands[4] = operands[5];
28211 operands[5] = tmp;
28213 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28214 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28215 break;
28217 case 3:
28218 if (REGNO (operands[4]) > REGNO (operands[5]))
28220 tmp = operands[4];
28221 operands[4] = operands[5];
28222 operands[5] = tmp;
28224 if (REGNO (operands[5]) > REGNO (operands[6]))
28226 tmp = operands[5];
28227 operands[5] = operands[6];
28228 operands[6] = tmp;
28230 if (REGNO (operands[4]) > REGNO (operands[5]))
28232 tmp = operands[4];
28233 operands[4] = operands[5];
28234 operands[5] = tmp;
28237 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28238 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28239 break;
28241 default:
28242 gcc_unreachable ();
28245 return "";
28248 /* Output a call-via instruction for thumb state. */
28249 const char *
28250 thumb_call_via_reg (rtx reg)
28252 int regno = REGNO (reg);
28253 rtx *labelp;
28255 gcc_assert (regno < LR_REGNUM);
28257 /* If we are in the normal text section we can use a single instance
28258 per compilation unit. If we are doing function sections, then we need
28259 an entry per section, since we can't rely on reachability. */
28260 if (in_section == text_section)
28262 thumb_call_reg_needed = 1;
28264 if (thumb_call_via_label[regno] == NULL)
28265 thumb_call_via_label[regno] = gen_label_rtx ();
28266 labelp = thumb_call_via_label + regno;
28268 else
28270 if (cfun->machine->call_via[regno] == NULL)
28271 cfun->machine->call_via[regno] = gen_label_rtx ();
28272 labelp = cfun->machine->call_via + regno;
28275 output_asm_insn ("bl\t%a0", labelp);
28276 return "";
28279 /* Routines for generating rtl. */
28280 void
28281 thumb_expand_movmemqi (rtx *operands)
28283 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28284 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28285 HOST_WIDE_INT len = INTVAL (operands[2]);
28286 HOST_WIDE_INT offset = 0;
28288 while (len >= 12)
28290 emit_insn (gen_movmem12b (out, in, out, in));
28291 len -= 12;
28294 if (len >= 8)
28296 emit_insn (gen_movmem8b (out, in, out, in));
28297 len -= 8;
28300 if (len >= 4)
28302 rtx reg = gen_reg_rtx (SImode);
28303 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28304 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28305 len -= 4;
28306 offset += 4;
28309 if (len >= 2)
28311 rtx reg = gen_reg_rtx (HImode);
28312 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28313 plus_constant (Pmode, in,
28314 offset))));
28315 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28316 offset)),
28317 reg));
28318 len -= 2;
28319 offset += 2;
28322 if (len)
28324 rtx reg = gen_reg_rtx (QImode);
28325 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28326 plus_constant (Pmode, in,
28327 offset))));
28328 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28329 offset)),
28330 reg));
28334 void
28335 thumb_reload_out_hi (rtx *operands)
28337 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28340 /* Handle reading a half-word from memory during reload. */
28341 void
28342 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28344 gcc_unreachable ();
28347 /* Return the length of a function name prefix
28348 that starts with the character 'c'. */
28349 static int
28350 arm_get_strip_length (int c)
28352 switch (c)
28354 ARM_NAME_ENCODING_LENGTHS
28355 default: return 0;
28359 /* Return a pointer to a function's name with any
28360 and all prefix encodings stripped from it. */
28361 const char *
28362 arm_strip_name_encoding (const char *name)
28364 int skip;
28366 while ((skip = arm_get_strip_length (* name)))
28367 name += skip;
28369 return name;
28372 /* If there is a '*' anywhere in the name's prefix, then
28373 emit the stripped name verbatim, otherwise prepend an
28374 underscore if leading underscores are being used. */
28375 void
28376 arm_asm_output_labelref (FILE *stream, const char *name)
28378 int skip;
28379 int verbatim = 0;
28381 while ((skip = arm_get_strip_length (* name)))
28383 verbatim |= (*name == '*');
28384 name += skip;
28387 if (verbatim)
28388 fputs (name, stream);
28389 else
28390 asm_fprintf (stream, "%U%s", name);
28393 /* This function is used to emit an EABI tag and its associated value.
28394 We emit the numerical value of the tag in case the assembler does not
28395 support textual tags. (Eg gas prior to 2.20). If requested we include
28396 the tag name in a comment so that anyone reading the assembler output
28397 will know which tag is being set.
28399 This function is not static because arm-c.c needs it too. */
28401 void
28402 arm_emit_eabi_attribute (const char *name, int num, int val)
28404 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28405 if (flag_verbose_asm || flag_debug_asm)
28406 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28407 asm_fprintf (asm_out_file, "\n");
28410 static void
28411 arm_file_start (void)
28413 int val;
28415 if (TARGET_UNIFIED_ASM)
28416 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28418 if (TARGET_BPABI)
28420 const char *fpu_name;
28421 if (arm_selected_arch)
28423 /* armv7ve doesn't support any extensions. */
28424 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28426 /* Keep backward compatability for assemblers
28427 which don't support armv7ve. */
28428 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28429 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28430 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28431 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28432 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28434 else
28436 const char* pos = strchr (arm_selected_arch->name, '+');
28437 if (pos)
28439 char buf[15];
28440 gcc_assert (strlen (arm_selected_arch->name)
28441 <= sizeof (buf) / sizeof (*pos));
28442 strncpy (buf, arm_selected_arch->name,
28443 (pos - arm_selected_arch->name) * sizeof (*pos));
28444 buf[pos - arm_selected_arch->name] = '\0';
28445 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28446 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28448 else
28449 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28452 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28453 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28454 else
28456 const char* truncated_name
28457 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28458 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28461 if (TARGET_SOFT_FLOAT)
28463 fpu_name = "softvfp";
28465 else
28467 fpu_name = arm_fpu_desc->name;
28468 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28470 if (TARGET_HARD_FLOAT)
28471 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28472 if (TARGET_HARD_FLOAT_ABI)
28473 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28476 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28478 /* Some of these attributes only apply when the corresponding features
28479 are used. However we don't have any easy way of figuring this out.
28480 Conservatively record the setting that would have been used. */
28482 if (flag_rounding_math)
28483 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28485 if (!flag_unsafe_math_optimizations)
28487 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28488 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28490 if (flag_signaling_nans)
28491 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28493 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28494 flag_finite_math_only ? 1 : 3);
28496 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28497 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28498 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28499 flag_short_enums ? 1 : 2);
28501 /* Tag_ABI_optimization_goals. */
28502 if (optimize_size)
28503 val = 4;
28504 else if (optimize >= 2)
28505 val = 2;
28506 else if (optimize)
28507 val = 1;
28508 else
28509 val = 6;
28510 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28512 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28513 unaligned_access);
28515 if (arm_fp16_format)
28516 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28517 (int) arm_fp16_format);
28519 if (arm_lang_output_object_attributes_hook)
28520 arm_lang_output_object_attributes_hook();
28523 default_file_start ();
28526 static void
28527 arm_file_end (void)
28529 int regno;
28531 if (NEED_INDICATE_EXEC_STACK)
28532 /* Add .note.GNU-stack. */
28533 file_end_indicate_exec_stack ();
28535 if (! thumb_call_reg_needed)
28536 return;
28538 switch_to_section (text_section);
28539 asm_fprintf (asm_out_file, "\t.code 16\n");
28540 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28542 for (regno = 0; regno < LR_REGNUM; regno++)
28544 rtx label = thumb_call_via_label[regno];
28546 if (label != 0)
28548 targetm.asm_out.internal_label (asm_out_file, "L",
28549 CODE_LABEL_NUMBER (label));
28550 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28555 #ifndef ARM_PE
28556 /* Symbols in the text segment can be accessed without indirecting via the
28557 constant pool; it may take an extra binary operation, but this is still
28558 faster than indirecting via memory. Don't do this when not optimizing,
28559 since we won't be calculating al of the offsets necessary to do this
28560 simplification. */
28562 static void
28563 arm_encode_section_info (tree decl, rtx rtl, int first)
28565 if (optimize > 0 && TREE_CONSTANT (decl))
28566 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28568 default_encode_section_info (decl, rtl, first);
28570 #endif /* !ARM_PE */
28572 static void
28573 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28575 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28576 && !strcmp (prefix, "L"))
28578 arm_ccfsm_state = 0;
28579 arm_target_insn = NULL;
28581 default_internal_label (stream, prefix, labelno);
28584 /* Output code to add DELTA to the first argument, and then jump
28585 to FUNCTION. Used for C++ multiple inheritance. */
28586 static void
28587 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28588 HOST_WIDE_INT delta,
28589 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28590 tree function)
28592 static int thunk_label = 0;
28593 char label[256];
28594 char labelpc[256];
28595 int mi_delta = delta;
28596 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28597 int shift = 0;
28598 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28599 ? 1 : 0);
28600 if (mi_delta < 0)
28601 mi_delta = - mi_delta;
28603 final_start_function (emit_barrier (), file, 1);
28605 if (TARGET_THUMB1)
28607 int labelno = thunk_label++;
28608 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28609 /* Thunks are entered in arm mode when avaiable. */
28610 if (TARGET_THUMB1_ONLY)
28612 /* push r3 so we can use it as a temporary. */
28613 /* TODO: Omit this save if r3 is not used. */
28614 fputs ("\tpush {r3}\n", file);
28615 fputs ("\tldr\tr3, ", file);
28617 else
28619 fputs ("\tldr\tr12, ", file);
28621 assemble_name (file, label);
28622 fputc ('\n', file);
28623 if (flag_pic)
28625 /* If we are generating PIC, the ldr instruction below loads
28626 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28627 the address of the add + 8, so we have:
28629 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28630 = target + 1.
28632 Note that we have "+ 1" because some versions of GNU ld
28633 don't set the low bit of the result for R_ARM_REL32
28634 relocations against thumb function symbols.
28635 On ARMv6M this is +4, not +8. */
28636 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28637 assemble_name (file, labelpc);
28638 fputs (":\n", file);
28639 if (TARGET_THUMB1_ONLY)
28641 /* This is 2 insns after the start of the thunk, so we know it
28642 is 4-byte aligned. */
28643 fputs ("\tadd\tr3, pc, r3\n", file);
28644 fputs ("\tmov r12, r3\n", file);
28646 else
28647 fputs ("\tadd\tr12, pc, r12\n", file);
28649 else if (TARGET_THUMB1_ONLY)
28650 fputs ("\tmov r12, r3\n", file);
28652 if (TARGET_THUMB1_ONLY)
28654 if (mi_delta > 255)
28656 fputs ("\tldr\tr3, ", file);
28657 assemble_name (file, label);
28658 fputs ("+4\n", file);
28659 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28660 mi_op, this_regno, this_regno);
28662 else if (mi_delta != 0)
28664 /* Thumb1 unified syntax requires s suffix in instruction name when
28665 one of the operands is immediate. */
28666 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28667 mi_op, this_regno, this_regno,
28668 mi_delta);
28671 else
28673 /* TODO: Use movw/movt for large constants when available. */
28674 while (mi_delta != 0)
28676 if ((mi_delta & (3 << shift)) == 0)
28677 shift += 2;
28678 else
28680 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28681 mi_op, this_regno, this_regno,
28682 mi_delta & (0xff << shift));
28683 mi_delta &= ~(0xff << shift);
28684 shift += 8;
28688 if (TARGET_THUMB1)
28690 if (TARGET_THUMB1_ONLY)
28691 fputs ("\tpop\t{r3}\n", file);
28693 fprintf (file, "\tbx\tr12\n");
28694 ASM_OUTPUT_ALIGN (file, 2);
28695 assemble_name (file, label);
28696 fputs (":\n", file);
28697 if (flag_pic)
28699 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28700 rtx tem = XEXP (DECL_RTL (function), 0);
28701 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28702 pipeline offset is four rather than eight. Adjust the offset
28703 accordingly. */
28704 tem = plus_constant (GET_MODE (tem), tem,
28705 TARGET_THUMB1_ONLY ? -3 : -7);
28706 tem = gen_rtx_MINUS (GET_MODE (tem),
28707 tem,
28708 gen_rtx_SYMBOL_REF (Pmode,
28709 ggc_strdup (labelpc)));
28710 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28712 else
28713 /* Output ".word .LTHUNKn". */
28714 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28716 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28717 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28719 else
28721 fputs ("\tb\t", file);
28722 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28723 if (NEED_PLT_RELOC)
28724 fputs ("(PLT)", file);
28725 fputc ('\n', file);
28728 final_end_function ();
28732 arm_emit_vector_const (FILE *file, rtx x)
28734 int i;
28735 const char * pattern;
28737 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28739 switch (GET_MODE (x))
28741 case V2SImode: pattern = "%08x"; break;
28742 case V4HImode: pattern = "%04x"; break;
28743 case V8QImode: pattern = "%02x"; break;
28744 default: gcc_unreachable ();
28747 fprintf (file, "0x");
28748 for (i = CONST_VECTOR_NUNITS (x); i--;)
28750 rtx element;
28752 element = CONST_VECTOR_ELT (x, i);
28753 fprintf (file, pattern, INTVAL (element));
28756 return 1;
28759 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28760 HFmode constant pool entries are actually loaded with ldr. */
28761 void
28762 arm_emit_fp16_const (rtx c)
28764 REAL_VALUE_TYPE r;
28765 long bits;
28767 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28768 bits = real_to_target (NULL, &r, HFmode);
28769 if (WORDS_BIG_ENDIAN)
28770 assemble_zeros (2);
28771 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28772 if (!WORDS_BIG_ENDIAN)
28773 assemble_zeros (2);
28776 const char *
28777 arm_output_load_gr (rtx *operands)
28779 rtx reg;
28780 rtx offset;
28781 rtx wcgr;
28782 rtx sum;
28784 if (!MEM_P (operands [1])
28785 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28786 || !REG_P (reg = XEXP (sum, 0))
28787 || !CONST_INT_P (offset = XEXP (sum, 1))
28788 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28789 return "wldrw%?\t%0, %1";
28791 /* Fix up an out-of-range load of a GR register. */
28792 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28793 wcgr = operands[0];
28794 operands[0] = reg;
28795 output_asm_insn ("ldr%?\t%0, %1", operands);
28797 operands[0] = wcgr;
28798 operands[1] = reg;
28799 output_asm_insn ("tmcr%?\t%0, %1", operands);
28800 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28802 return "";
28805 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28807 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28808 named arg and all anonymous args onto the stack.
28809 XXX I know the prologue shouldn't be pushing registers, but it is faster
28810 that way. */
28812 static void
28813 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28814 machine_mode mode,
28815 tree type,
28816 int *pretend_size,
28817 int second_time ATTRIBUTE_UNUSED)
28819 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28820 int nregs;
28822 cfun->machine->uses_anonymous_args = 1;
28823 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28825 nregs = pcum->aapcs_ncrn;
28826 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28827 nregs++;
28829 else
28830 nregs = pcum->nregs;
28832 if (nregs < NUM_ARG_REGS)
28833 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28836 /* We can't rely on the caller doing the proper promotion when
28837 using APCS or ATPCS. */
28839 static bool
28840 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28842 return !TARGET_AAPCS_BASED;
28845 static machine_mode
28846 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28847 machine_mode mode,
28848 int *punsignedp ATTRIBUTE_UNUSED,
28849 const_tree fntype ATTRIBUTE_UNUSED,
28850 int for_return ATTRIBUTE_UNUSED)
28852 if (GET_MODE_CLASS (mode) == MODE_INT
28853 && GET_MODE_SIZE (mode) < 4)
28854 return SImode;
28856 return mode;
28859 /* AAPCS based ABIs use short enums by default. */
28861 static bool
28862 arm_default_short_enums (void)
28864 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28868 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28870 static bool
28871 arm_align_anon_bitfield (void)
28873 return TARGET_AAPCS_BASED;
28877 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28879 static tree
28880 arm_cxx_guard_type (void)
28882 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28886 /* The EABI says test the least significant bit of a guard variable. */
28888 static bool
28889 arm_cxx_guard_mask_bit (void)
28891 return TARGET_AAPCS_BASED;
28895 /* The EABI specifies that all array cookies are 8 bytes long. */
28897 static tree
28898 arm_get_cookie_size (tree type)
28900 tree size;
28902 if (!TARGET_AAPCS_BASED)
28903 return default_cxx_get_cookie_size (type);
28905 size = build_int_cst (sizetype, 8);
28906 return size;
28910 /* The EABI says that array cookies should also contain the element size. */
28912 static bool
28913 arm_cookie_has_size (void)
28915 return TARGET_AAPCS_BASED;
28919 /* The EABI says constructors and destructors should return a pointer to
28920 the object constructed/destroyed. */
28922 static bool
28923 arm_cxx_cdtor_returns_this (void)
28925 return TARGET_AAPCS_BASED;
28928 /* The EABI says that an inline function may never be the key
28929 method. */
28931 static bool
28932 arm_cxx_key_method_may_be_inline (void)
28934 return !TARGET_AAPCS_BASED;
28937 static void
28938 arm_cxx_determine_class_data_visibility (tree decl)
28940 if (!TARGET_AAPCS_BASED
28941 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28942 return;
28944 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28945 is exported. However, on systems without dynamic vague linkage,
28946 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28947 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28948 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28949 else
28950 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28951 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28954 static bool
28955 arm_cxx_class_data_always_comdat (void)
28957 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28958 vague linkage if the class has no key function. */
28959 return !TARGET_AAPCS_BASED;
28963 /* The EABI says __aeabi_atexit should be used to register static
28964 destructors. */
28966 static bool
28967 arm_cxx_use_aeabi_atexit (void)
28969 return TARGET_AAPCS_BASED;
28973 void
28974 arm_set_return_address (rtx source, rtx scratch)
28976 arm_stack_offsets *offsets;
28977 HOST_WIDE_INT delta;
28978 rtx addr;
28979 unsigned long saved_regs;
28981 offsets = arm_get_frame_offsets ();
28982 saved_regs = offsets->saved_regs_mask;
28984 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28985 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28986 else
28988 if (frame_pointer_needed)
28989 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28990 else
28992 /* LR will be the first saved register. */
28993 delta = offsets->outgoing_args - (offsets->frame + 4);
28996 if (delta >= 4096)
28998 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28999 GEN_INT (delta & ~4095)));
29000 addr = scratch;
29001 delta &= 4095;
29003 else
29004 addr = stack_pointer_rtx;
29006 addr = plus_constant (Pmode, addr, delta);
29008 /* The store needs to be marked as frame related in order to prevent
29009 DSE from deleting it as dead if it is based on fp. */
29010 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29011 RTX_FRAME_RELATED_P (insn) = 1;
29012 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29017 void
29018 thumb_set_return_address (rtx source, rtx scratch)
29020 arm_stack_offsets *offsets;
29021 HOST_WIDE_INT delta;
29022 HOST_WIDE_INT limit;
29023 int reg;
29024 rtx addr;
29025 unsigned long mask;
29027 emit_use (source);
29029 offsets = arm_get_frame_offsets ();
29030 mask = offsets->saved_regs_mask;
29031 if (mask & (1 << LR_REGNUM))
29033 limit = 1024;
29034 /* Find the saved regs. */
29035 if (frame_pointer_needed)
29037 delta = offsets->soft_frame - offsets->saved_args;
29038 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29039 if (TARGET_THUMB1)
29040 limit = 128;
29042 else
29044 delta = offsets->outgoing_args - offsets->saved_args;
29045 reg = SP_REGNUM;
29047 /* Allow for the stack frame. */
29048 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29049 delta -= 16;
29050 /* The link register is always the first saved register. */
29051 delta -= 4;
29053 /* Construct the address. */
29054 addr = gen_rtx_REG (SImode, reg);
29055 if (delta > limit)
29057 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29058 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29059 addr = scratch;
29061 else
29062 addr = plus_constant (Pmode, addr, delta);
29064 /* The store needs to be marked as frame related in order to prevent
29065 DSE from deleting it as dead if it is based on fp. */
29066 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29067 RTX_FRAME_RELATED_P (insn) = 1;
29068 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29070 else
29071 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29074 /* Implements target hook vector_mode_supported_p. */
29075 bool
29076 arm_vector_mode_supported_p (machine_mode mode)
29078 /* Neon also supports V2SImode, etc. listed in the clause below. */
29079 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29080 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29081 return true;
29083 if ((TARGET_NEON || TARGET_IWMMXT)
29084 && ((mode == V2SImode)
29085 || (mode == V4HImode)
29086 || (mode == V8QImode)))
29087 return true;
29089 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29090 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29091 || mode == V2HAmode))
29092 return true;
29094 return false;
29097 /* Implements target hook array_mode_supported_p. */
29099 static bool
29100 arm_array_mode_supported_p (machine_mode mode,
29101 unsigned HOST_WIDE_INT nelems)
29103 if (TARGET_NEON
29104 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29105 && (nelems >= 2 && nelems <= 4))
29106 return true;
29108 return false;
29111 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29112 registers when autovectorizing for Neon, at least until multiple vector
29113 widths are supported properly by the middle-end. */
29115 static machine_mode
29116 arm_preferred_simd_mode (machine_mode mode)
29118 if (TARGET_NEON)
29119 switch (mode)
29121 case SFmode:
29122 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29123 case SImode:
29124 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29125 case HImode:
29126 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29127 case QImode:
29128 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29129 case DImode:
29130 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29131 return V2DImode;
29132 break;
29134 default:;
29137 if (TARGET_REALLY_IWMMXT)
29138 switch (mode)
29140 case SImode:
29141 return V2SImode;
29142 case HImode:
29143 return V4HImode;
29144 case QImode:
29145 return V8QImode;
29147 default:;
29150 return word_mode;
29153 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29155 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29156 using r0-r4 for function arguments, r7 for the stack frame and don't have
29157 enough left over to do doubleword arithmetic. For Thumb-2 all the
29158 potentially problematic instructions accept high registers so this is not
29159 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29160 that require many low registers. */
29161 static bool
29162 arm_class_likely_spilled_p (reg_class_t rclass)
29164 if ((TARGET_THUMB1 && rclass == LO_REGS)
29165 || rclass == CC_REG)
29166 return true;
29168 return false;
29171 /* Implements target hook small_register_classes_for_mode_p. */
29172 bool
29173 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29175 return TARGET_THUMB1;
29178 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29179 ARM insns and therefore guarantee that the shift count is modulo 256.
29180 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29181 guarantee no particular behavior for out-of-range counts. */
29183 static unsigned HOST_WIDE_INT
29184 arm_shift_truncation_mask (machine_mode mode)
29186 return mode == SImode ? 255 : 0;
29190 /* Map internal gcc register numbers to DWARF2 register numbers. */
29192 unsigned int
29193 arm_dbx_register_number (unsigned int regno)
29195 if (regno < 16)
29196 return regno;
29198 if (IS_VFP_REGNUM (regno))
29200 /* See comment in arm_dwarf_register_span. */
29201 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29202 return 64 + regno - FIRST_VFP_REGNUM;
29203 else
29204 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29207 if (IS_IWMMXT_GR_REGNUM (regno))
29208 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29210 if (IS_IWMMXT_REGNUM (regno))
29211 return 112 + regno - FIRST_IWMMXT_REGNUM;
29213 gcc_unreachable ();
29216 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29217 GCC models tham as 64 32-bit registers, so we need to describe this to
29218 the DWARF generation code. Other registers can use the default. */
29219 static rtx
29220 arm_dwarf_register_span (rtx rtl)
29222 machine_mode mode;
29223 unsigned regno;
29224 rtx parts[16];
29225 int nregs;
29226 int i;
29228 regno = REGNO (rtl);
29229 if (!IS_VFP_REGNUM (regno))
29230 return NULL_RTX;
29232 /* XXX FIXME: The EABI defines two VFP register ranges:
29233 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29234 256-287: D0-D31
29235 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29236 corresponding D register. Until GDB supports this, we shall use the
29237 legacy encodings. We also use these encodings for D0-D15 for
29238 compatibility with older debuggers. */
29239 mode = GET_MODE (rtl);
29240 if (GET_MODE_SIZE (mode) < 8)
29241 return NULL_RTX;
29243 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29245 nregs = GET_MODE_SIZE (mode) / 4;
29246 for (i = 0; i < nregs; i += 2)
29247 if (TARGET_BIG_END)
29249 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29250 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29252 else
29254 parts[i] = gen_rtx_REG (SImode, regno + i);
29255 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29258 else
29260 nregs = GET_MODE_SIZE (mode) / 8;
29261 for (i = 0; i < nregs; i++)
29262 parts[i] = gen_rtx_REG (DImode, regno + i);
29265 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29268 #if ARM_UNWIND_INFO
29269 /* Emit unwind directives for a store-multiple instruction or stack pointer
29270 push during alignment.
29271 These should only ever be generated by the function prologue code, so
29272 expect them to have a particular form.
29273 The store-multiple instruction sometimes pushes pc as the last register,
29274 although it should not be tracked into unwind information, or for -Os
29275 sometimes pushes some dummy registers before first register that needs
29276 to be tracked in unwind information; such dummy registers are there just
29277 to avoid separate stack adjustment, and will not be restored in the
29278 epilogue. */
29280 static void
29281 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29283 int i;
29284 HOST_WIDE_INT offset;
29285 HOST_WIDE_INT nregs;
29286 int reg_size;
29287 unsigned reg;
29288 unsigned lastreg;
29289 unsigned padfirst = 0, padlast = 0;
29290 rtx e;
29292 e = XVECEXP (p, 0, 0);
29293 gcc_assert (GET_CODE (e) == SET);
29295 /* First insn will adjust the stack pointer. */
29296 gcc_assert (GET_CODE (e) == SET
29297 && REG_P (SET_DEST (e))
29298 && REGNO (SET_DEST (e)) == SP_REGNUM
29299 && GET_CODE (SET_SRC (e)) == PLUS);
29301 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29302 nregs = XVECLEN (p, 0) - 1;
29303 gcc_assert (nregs);
29305 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29306 if (reg < 16)
29308 /* For -Os dummy registers can be pushed at the beginning to
29309 avoid separate stack pointer adjustment. */
29310 e = XVECEXP (p, 0, 1);
29311 e = XEXP (SET_DEST (e), 0);
29312 if (GET_CODE (e) == PLUS)
29313 padfirst = INTVAL (XEXP (e, 1));
29314 gcc_assert (padfirst == 0 || optimize_size);
29315 /* The function prologue may also push pc, but not annotate it as it is
29316 never restored. We turn this into a stack pointer adjustment. */
29317 e = XVECEXP (p, 0, nregs);
29318 e = XEXP (SET_DEST (e), 0);
29319 if (GET_CODE (e) == PLUS)
29320 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29321 else
29322 padlast = offset - 4;
29323 gcc_assert (padlast == 0 || padlast == 4);
29324 if (padlast == 4)
29325 fprintf (asm_out_file, "\t.pad #4\n");
29326 reg_size = 4;
29327 fprintf (asm_out_file, "\t.save {");
29329 else if (IS_VFP_REGNUM (reg))
29331 reg_size = 8;
29332 fprintf (asm_out_file, "\t.vsave {");
29334 else
29335 /* Unknown register type. */
29336 gcc_unreachable ();
29338 /* If the stack increment doesn't match the size of the saved registers,
29339 something has gone horribly wrong. */
29340 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29342 offset = padfirst;
29343 lastreg = 0;
29344 /* The remaining insns will describe the stores. */
29345 for (i = 1; i <= nregs; i++)
29347 /* Expect (set (mem <addr>) (reg)).
29348 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29349 e = XVECEXP (p, 0, i);
29350 gcc_assert (GET_CODE (e) == SET
29351 && MEM_P (SET_DEST (e))
29352 && REG_P (SET_SRC (e)));
29354 reg = REGNO (SET_SRC (e));
29355 gcc_assert (reg >= lastreg);
29357 if (i != 1)
29358 fprintf (asm_out_file, ", ");
29359 /* We can't use %r for vfp because we need to use the
29360 double precision register names. */
29361 if (IS_VFP_REGNUM (reg))
29362 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29363 else
29364 asm_fprintf (asm_out_file, "%r", reg);
29366 #ifdef ENABLE_CHECKING
29367 /* Check that the addresses are consecutive. */
29368 e = XEXP (SET_DEST (e), 0);
29369 if (GET_CODE (e) == PLUS)
29370 gcc_assert (REG_P (XEXP (e, 0))
29371 && REGNO (XEXP (e, 0)) == SP_REGNUM
29372 && CONST_INT_P (XEXP (e, 1))
29373 && offset == INTVAL (XEXP (e, 1)));
29374 else
29375 gcc_assert (i == 1
29376 && REG_P (e)
29377 && REGNO (e) == SP_REGNUM);
29378 offset += reg_size;
29379 #endif
29381 fprintf (asm_out_file, "}\n");
29382 if (padfirst)
29383 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29386 /* Emit unwind directives for a SET. */
29388 static void
29389 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29391 rtx e0;
29392 rtx e1;
29393 unsigned reg;
29395 e0 = XEXP (p, 0);
29396 e1 = XEXP (p, 1);
29397 switch (GET_CODE (e0))
29399 case MEM:
29400 /* Pushing a single register. */
29401 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29402 || !REG_P (XEXP (XEXP (e0, 0), 0))
29403 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29404 abort ();
29406 asm_fprintf (asm_out_file, "\t.save ");
29407 if (IS_VFP_REGNUM (REGNO (e1)))
29408 asm_fprintf(asm_out_file, "{d%d}\n",
29409 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29410 else
29411 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29412 break;
29414 case REG:
29415 if (REGNO (e0) == SP_REGNUM)
29417 /* A stack increment. */
29418 if (GET_CODE (e1) != PLUS
29419 || !REG_P (XEXP (e1, 0))
29420 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29421 || !CONST_INT_P (XEXP (e1, 1)))
29422 abort ();
29424 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29425 -INTVAL (XEXP (e1, 1)));
29427 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29429 HOST_WIDE_INT offset;
29431 if (GET_CODE (e1) == PLUS)
29433 if (!REG_P (XEXP (e1, 0))
29434 || !CONST_INT_P (XEXP (e1, 1)))
29435 abort ();
29436 reg = REGNO (XEXP (e1, 0));
29437 offset = INTVAL (XEXP (e1, 1));
29438 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29439 HARD_FRAME_POINTER_REGNUM, reg,
29440 offset);
29442 else if (REG_P (e1))
29444 reg = REGNO (e1);
29445 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29446 HARD_FRAME_POINTER_REGNUM, reg);
29448 else
29449 abort ();
29451 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29453 /* Move from sp to reg. */
29454 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29456 else if (GET_CODE (e1) == PLUS
29457 && REG_P (XEXP (e1, 0))
29458 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29459 && CONST_INT_P (XEXP (e1, 1)))
29461 /* Set reg to offset from sp. */
29462 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29463 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29465 else
29466 abort ();
29467 break;
29469 default:
29470 abort ();
29475 /* Emit unwind directives for the given insn. */
29477 static void
29478 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29480 rtx note, pat;
29481 bool handled_one = false;
29483 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29484 return;
29486 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29487 && (TREE_NOTHROW (current_function_decl)
29488 || crtl->all_throwers_are_sibcalls))
29489 return;
29491 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29492 return;
29494 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29496 switch (REG_NOTE_KIND (note))
29498 case REG_FRAME_RELATED_EXPR:
29499 pat = XEXP (note, 0);
29500 goto found;
29502 case REG_CFA_REGISTER:
29503 pat = XEXP (note, 0);
29504 if (pat == NULL)
29506 pat = PATTERN (insn);
29507 if (GET_CODE (pat) == PARALLEL)
29508 pat = XVECEXP (pat, 0, 0);
29511 /* Only emitted for IS_STACKALIGN re-alignment. */
29513 rtx dest, src;
29514 unsigned reg;
29516 src = SET_SRC (pat);
29517 dest = SET_DEST (pat);
29519 gcc_assert (src == stack_pointer_rtx);
29520 reg = REGNO (dest);
29521 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29522 reg + 0x90, reg);
29524 handled_one = true;
29525 break;
29527 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29528 to get correct dwarf information for shrink-wrap. We should not
29529 emit unwind information for it because these are used either for
29530 pretend arguments or notes to adjust sp and restore registers from
29531 stack. */
29532 case REG_CFA_DEF_CFA:
29533 case REG_CFA_ADJUST_CFA:
29534 case REG_CFA_RESTORE:
29535 return;
29537 case REG_CFA_EXPRESSION:
29538 case REG_CFA_OFFSET:
29539 /* ??? Only handling here what we actually emit. */
29540 gcc_unreachable ();
29542 default:
29543 break;
29546 if (handled_one)
29547 return;
29548 pat = PATTERN (insn);
29549 found:
29551 switch (GET_CODE (pat))
29553 case SET:
29554 arm_unwind_emit_set (asm_out_file, pat);
29555 break;
29557 case SEQUENCE:
29558 /* Store multiple. */
29559 arm_unwind_emit_sequence (asm_out_file, pat);
29560 break;
29562 default:
29563 abort();
29568 /* Output a reference from a function exception table to the type_info
29569 object X. The EABI specifies that the symbol should be relocated by
29570 an R_ARM_TARGET2 relocation. */
29572 static bool
29573 arm_output_ttype (rtx x)
29575 fputs ("\t.word\t", asm_out_file);
29576 output_addr_const (asm_out_file, x);
29577 /* Use special relocations for symbol references. */
29578 if (!CONST_INT_P (x))
29579 fputs ("(TARGET2)", asm_out_file);
29580 fputc ('\n', asm_out_file);
29582 return TRUE;
29585 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29587 static void
29588 arm_asm_emit_except_personality (rtx personality)
29590 fputs ("\t.personality\t", asm_out_file);
29591 output_addr_const (asm_out_file, personality);
29592 fputc ('\n', asm_out_file);
29595 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29597 static void
29598 arm_asm_init_sections (void)
29600 exception_section = get_unnamed_section (0, output_section_asm_op,
29601 "\t.handlerdata");
29603 #endif /* ARM_UNWIND_INFO */
29605 /* Output unwind directives for the start/end of a function. */
29607 void
29608 arm_output_fn_unwind (FILE * f, bool prologue)
29610 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29611 return;
29613 if (prologue)
29614 fputs ("\t.fnstart\n", f);
29615 else
29617 /* If this function will never be unwound, then mark it as such.
29618 The came condition is used in arm_unwind_emit to suppress
29619 the frame annotations. */
29620 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29621 && (TREE_NOTHROW (current_function_decl)
29622 || crtl->all_throwers_are_sibcalls))
29623 fputs("\t.cantunwind\n", f);
29625 fputs ("\t.fnend\n", f);
29629 static bool
29630 arm_emit_tls_decoration (FILE *fp, rtx x)
29632 enum tls_reloc reloc;
29633 rtx val;
29635 val = XVECEXP (x, 0, 0);
29636 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29638 output_addr_const (fp, val);
29640 switch (reloc)
29642 case TLS_GD32:
29643 fputs ("(tlsgd)", fp);
29644 break;
29645 case TLS_LDM32:
29646 fputs ("(tlsldm)", fp);
29647 break;
29648 case TLS_LDO32:
29649 fputs ("(tlsldo)", fp);
29650 break;
29651 case TLS_IE32:
29652 fputs ("(gottpoff)", fp);
29653 break;
29654 case TLS_LE32:
29655 fputs ("(tpoff)", fp);
29656 break;
29657 case TLS_DESCSEQ:
29658 fputs ("(tlsdesc)", fp);
29659 break;
29660 default:
29661 gcc_unreachable ();
29664 switch (reloc)
29666 case TLS_GD32:
29667 case TLS_LDM32:
29668 case TLS_IE32:
29669 case TLS_DESCSEQ:
29670 fputs (" + (. - ", fp);
29671 output_addr_const (fp, XVECEXP (x, 0, 2));
29672 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29673 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29674 output_addr_const (fp, XVECEXP (x, 0, 3));
29675 fputc (')', fp);
29676 break;
29677 default:
29678 break;
29681 return TRUE;
29684 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29686 static void
29687 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29689 gcc_assert (size == 4);
29690 fputs ("\t.word\t", file);
29691 output_addr_const (file, x);
29692 fputs ("(tlsldo)", file);
29695 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29697 static bool
29698 arm_output_addr_const_extra (FILE *fp, rtx x)
29700 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29701 return arm_emit_tls_decoration (fp, x);
29702 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29704 char label[256];
29705 int labelno = INTVAL (XVECEXP (x, 0, 0));
29707 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29708 assemble_name_raw (fp, label);
29710 return TRUE;
29712 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29714 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29715 if (GOT_PCREL)
29716 fputs ("+.", fp);
29717 fputs ("-(", fp);
29718 output_addr_const (fp, XVECEXP (x, 0, 0));
29719 fputc (')', fp);
29720 return TRUE;
29722 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29724 output_addr_const (fp, XVECEXP (x, 0, 0));
29725 if (GOT_PCREL)
29726 fputs ("+.", fp);
29727 fputs ("-(", fp);
29728 output_addr_const (fp, XVECEXP (x, 0, 1));
29729 fputc (')', fp);
29730 return TRUE;
29732 else if (GET_CODE (x) == CONST_VECTOR)
29733 return arm_emit_vector_const (fp, x);
29735 return FALSE;
29738 /* Output assembly for a shift instruction.
29739 SET_FLAGS determines how the instruction modifies the condition codes.
29740 0 - Do not set condition codes.
29741 1 - Set condition codes.
29742 2 - Use smallest instruction. */
29743 const char *
29744 arm_output_shift(rtx * operands, int set_flags)
29746 char pattern[100];
29747 static const char flag_chars[3] = {'?', '.', '!'};
29748 const char *shift;
29749 HOST_WIDE_INT val;
29750 char c;
29752 c = flag_chars[set_flags];
29753 if (TARGET_UNIFIED_ASM)
29755 shift = shift_op(operands[3], &val);
29756 if (shift)
29758 if (val != -1)
29759 operands[2] = GEN_INT(val);
29760 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29762 else
29763 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29765 else
29766 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29767 output_asm_insn (pattern, operands);
29768 return "";
29771 /* Output assembly for a WMMX immediate shift instruction. */
29772 const char *
29773 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29775 int shift = INTVAL (operands[2]);
29776 char templ[50];
29777 machine_mode opmode = GET_MODE (operands[0]);
29779 gcc_assert (shift >= 0);
29781 /* If the shift value in the register versions is > 63 (for D qualifier),
29782 31 (for W qualifier) or 15 (for H qualifier). */
29783 if (((opmode == V4HImode) && (shift > 15))
29784 || ((opmode == V2SImode) && (shift > 31))
29785 || ((opmode == DImode) && (shift > 63)))
29787 if (wror_or_wsra)
29789 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29790 output_asm_insn (templ, operands);
29791 if (opmode == DImode)
29793 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29794 output_asm_insn (templ, operands);
29797 else
29799 /* The destination register will contain all zeros. */
29800 sprintf (templ, "wzero\t%%0");
29801 output_asm_insn (templ, operands);
29803 return "";
29806 if ((opmode == DImode) && (shift > 32))
29808 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29809 output_asm_insn (templ, operands);
29810 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29811 output_asm_insn (templ, operands);
29813 else
29815 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29816 output_asm_insn (templ, operands);
29818 return "";
29821 /* Output assembly for a WMMX tinsr instruction. */
29822 const char *
29823 arm_output_iwmmxt_tinsr (rtx *operands)
29825 int mask = INTVAL (operands[3]);
29826 int i;
29827 char templ[50];
29828 int units = mode_nunits[GET_MODE (operands[0])];
29829 gcc_assert ((mask & (mask - 1)) == 0);
29830 for (i = 0; i < units; ++i)
29832 if ((mask & 0x01) == 1)
29834 break;
29836 mask >>= 1;
29838 gcc_assert (i < units);
29840 switch (GET_MODE (operands[0]))
29842 case V8QImode:
29843 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29844 break;
29845 case V4HImode:
29846 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29847 break;
29848 case V2SImode:
29849 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29850 break;
29851 default:
29852 gcc_unreachable ();
29853 break;
29855 output_asm_insn (templ, operands);
29857 return "";
29860 /* Output a Thumb-1 casesi dispatch sequence. */
29861 const char *
29862 thumb1_output_casesi (rtx *operands)
29864 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29866 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29868 switch (GET_MODE(diff_vec))
29870 case QImode:
29871 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29872 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29873 case HImode:
29874 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29875 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29876 case SImode:
29877 return "bl\t%___gnu_thumb1_case_si";
29878 default:
29879 gcc_unreachable ();
29883 /* Output a Thumb-2 casesi instruction. */
29884 const char *
29885 thumb2_output_casesi (rtx *operands)
29887 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29889 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29891 output_asm_insn ("cmp\t%0, %1", operands);
29892 output_asm_insn ("bhi\t%l3", operands);
29893 switch (GET_MODE(diff_vec))
29895 case QImode:
29896 return "tbb\t[%|pc, %0]";
29897 case HImode:
29898 return "tbh\t[%|pc, %0, lsl #1]";
29899 case SImode:
29900 if (flag_pic)
29902 output_asm_insn ("adr\t%4, %l2", operands);
29903 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29904 output_asm_insn ("add\t%4, %4, %5", operands);
29905 return "bx\t%4";
29907 else
29909 output_asm_insn ("adr\t%4, %l2", operands);
29910 return "ldr\t%|pc, [%4, %0, lsl #2]";
29912 default:
29913 gcc_unreachable ();
29917 /* Most ARM cores are single issue, but some newer ones can dual issue.
29918 The scheduler descriptions rely on this being correct. */
29919 static int
29920 arm_issue_rate (void)
29922 switch (arm_tune)
29924 case cortexa15:
29925 case cortexa57:
29926 return 3;
29928 case cortexr4:
29929 case cortexr4f:
29930 case cortexr5:
29931 case genericv7a:
29932 case cortexa5:
29933 case cortexa7:
29934 case cortexa8:
29935 case cortexa9:
29936 case cortexa12:
29937 case cortexa53:
29938 case fa726te:
29939 case marvell_pj4:
29940 return 2;
29942 default:
29943 return 1;
29947 /* A table and a function to perform ARM-specific name mangling for
29948 NEON vector types in order to conform to the AAPCS (see "Procedure
29949 Call Standard for the ARM Architecture", Appendix A). To qualify
29950 for emission with the mangled names defined in that document, a
29951 vector type must not only be of the correct mode but also be
29952 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29953 typedef struct
29955 machine_mode mode;
29956 const char *element_type_name;
29957 const char *aapcs_name;
29958 } arm_mangle_map_entry;
29960 static arm_mangle_map_entry arm_mangle_map[] = {
29961 /* 64-bit containerized types. */
29962 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29963 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29964 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29965 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29966 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29967 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29968 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29969 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29970 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29971 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29973 /* 128-bit containerized types. */
29974 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29975 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29976 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29977 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29978 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29979 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29980 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29981 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29982 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29983 { VOIDmode, NULL, NULL }
29986 const char *
29987 arm_mangle_type (const_tree type)
29989 arm_mangle_map_entry *pos = arm_mangle_map;
29991 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29992 has to be managled as if it is in the "std" namespace. */
29993 if (TARGET_AAPCS_BASED
29994 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29995 return "St9__va_list";
29997 /* Half-precision float. */
29998 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29999 return "Dh";
30001 if (TREE_CODE (type) != VECTOR_TYPE)
30002 return NULL;
30004 /* Check the mode of the vector type, and the name of the vector
30005 element type, against the table. */
30006 while (pos->mode != VOIDmode)
30008 tree elt_type = TREE_TYPE (type);
30010 if (pos->mode == TYPE_MODE (type)
30011 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
30012 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
30013 pos->element_type_name))
30014 return pos->aapcs_name;
30016 pos++;
30019 /* Use the default mangling for unrecognized (possibly user-defined)
30020 vector types. */
30021 return NULL;
30024 /* Order of allocation of core registers for Thumb: this allocation is
30025 written over the corresponding initial entries of the array
30026 initialized with REG_ALLOC_ORDER. We allocate all low registers
30027 first. Saving and restoring a low register is usually cheaper than
30028 using a call-clobbered high register. */
30030 static const int thumb_core_reg_alloc_order[] =
30032 3, 2, 1, 0, 4, 5, 6, 7,
30033 14, 12, 8, 9, 10, 11
30036 /* Adjust register allocation order when compiling for Thumb. */
30038 void
30039 arm_order_regs_for_local_alloc (void)
30041 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30042 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30043 if (TARGET_THUMB)
30044 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30045 sizeof (thumb_core_reg_alloc_order));
30048 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30050 bool
30051 arm_frame_pointer_required (void)
30053 return (cfun->has_nonlocal_label
30054 || SUBTARGET_FRAME_POINTER_REQUIRED
30055 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30058 /* Only thumb1 can't support conditional execution, so return true if
30059 the target is not thumb1. */
30060 static bool
30061 arm_have_conditional_execution (void)
30063 return !TARGET_THUMB1;
30066 tree
30067 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30069 machine_mode in_mode, out_mode;
30070 int in_n, out_n;
30071 bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30073 if (TREE_CODE (type_out) != VECTOR_TYPE
30074 || TREE_CODE (type_in) != VECTOR_TYPE)
30075 return NULL_TREE;
30077 out_mode = TYPE_MODE (TREE_TYPE (type_out));
30078 out_n = TYPE_VECTOR_SUBPARTS (type_out);
30079 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30080 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30082 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30083 decl of the vectorized builtin for the appropriate vector mode.
30084 NULL_TREE is returned if no such builtin is available. */
30085 #undef ARM_CHECK_BUILTIN_MODE
30086 #define ARM_CHECK_BUILTIN_MODE(C) \
30087 (TARGET_NEON && TARGET_FPU_ARMV8 \
30088 && flag_unsafe_math_optimizations \
30089 && ARM_CHECK_BUILTIN_MODE_1 (C))
30091 #undef ARM_CHECK_BUILTIN_MODE_1
30092 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30093 (out_mode == SFmode && out_n == C \
30094 && in_mode == SFmode && in_n == C)
30096 #undef ARM_FIND_VRINT_VARIANT
30097 #define ARM_FIND_VRINT_VARIANT(N) \
30098 (ARM_CHECK_BUILTIN_MODE (2) \
30099 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30100 : (ARM_CHECK_BUILTIN_MODE (4) \
30101 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30102 : NULL_TREE))
30104 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30106 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30107 switch (fn)
30109 case BUILT_IN_FLOORF:
30110 return ARM_FIND_VRINT_VARIANT (vrintm);
30111 case BUILT_IN_CEILF:
30112 return ARM_FIND_VRINT_VARIANT (vrintp);
30113 case BUILT_IN_TRUNCF:
30114 return ARM_FIND_VRINT_VARIANT (vrintz);
30115 case BUILT_IN_ROUNDF:
30116 return ARM_FIND_VRINT_VARIANT (vrinta);
30117 #undef ARM_CHECK_BUILTIN_MODE_1
30118 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30119 (out_mode == SImode && out_n == C \
30120 && in_mode == SFmode && in_n == C)
30122 #define ARM_FIND_VCVT_VARIANT(N) \
30123 (ARM_CHECK_BUILTIN_MODE (2) \
30124 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30125 : (ARM_CHECK_BUILTIN_MODE (4) \
30126 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30127 : NULL_TREE))
30129 #define ARM_FIND_VCVTU_VARIANT(N) \
30130 (ARM_CHECK_BUILTIN_MODE (2) \
30131 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30132 : (ARM_CHECK_BUILTIN_MODE (4) \
30133 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30134 : NULL_TREE))
30135 case BUILT_IN_LROUNDF:
30136 return out_unsigned_p
30137 ? ARM_FIND_VCVTU_VARIANT (vcvta)
30138 : ARM_FIND_VCVT_VARIANT (vcvta);
30139 case BUILT_IN_LCEILF:
30140 return out_unsigned_p
30141 ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30142 : ARM_FIND_VCVT_VARIANT (vcvtp);
30143 case BUILT_IN_LFLOORF:
30144 return out_unsigned_p
30145 ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30146 : ARM_FIND_VCVT_VARIANT (vcvtm);
30147 #undef ARM_CHECK_BUILTIN_MODE
30148 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30149 (out_mode == N##mode && out_n == C \
30150 && in_mode == N##mode && in_n == C)
30151 case BUILT_IN_BSWAP16:
30152 if (ARM_CHECK_BUILTIN_MODE (4, HI))
30153 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30154 else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30155 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30156 else
30157 return NULL_TREE;
30158 case BUILT_IN_BSWAP32:
30159 if (ARM_CHECK_BUILTIN_MODE (2, SI))
30160 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30161 else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30162 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30163 else
30164 return NULL_TREE;
30165 case BUILT_IN_BSWAP64:
30166 if (ARM_CHECK_BUILTIN_MODE (2, DI))
30167 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30168 else
30169 return NULL_TREE;
30170 case BUILT_IN_COPYSIGNF:
30171 if (ARM_CHECK_BUILTIN_MODE (2, SF))
30172 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30173 else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30174 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30175 else
30176 return NULL_TREE;
30178 default:
30179 return NULL_TREE;
30182 return NULL_TREE;
30184 #undef ARM_FIND_VCVT_VARIANT
30185 #undef ARM_FIND_VCVTU_VARIANT
30186 #undef ARM_CHECK_BUILTIN_MODE
30187 #undef ARM_FIND_VRINT_VARIANT
30190 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30191 static HOST_WIDE_INT
30192 arm_vector_alignment (const_tree type)
30194 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30196 if (TARGET_AAPCS_BASED)
30197 align = MIN (align, 64);
30199 return align;
30202 static unsigned int
30203 arm_autovectorize_vector_sizes (void)
30205 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30208 static bool
30209 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30211 /* Vectors which aren't in packed structures will not be less aligned than
30212 the natural alignment of their element type, so this is safe. */
30213 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30214 return !is_packed;
30216 return default_builtin_vector_alignment_reachable (type, is_packed);
30219 static bool
30220 arm_builtin_support_vector_misalignment (machine_mode mode,
30221 const_tree type, int misalignment,
30222 bool is_packed)
30224 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30226 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30228 if (is_packed)
30229 return align == 1;
30231 /* If the misalignment is unknown, we should be able to handle the access
30232 so long as it is not to a member of a packed data structure. */
30233 if (misalignment == -1)
30234 return true;
30236 /* Return true if the misalignment is a multiple of the natural alignment
30237 of the vector's element type. This is probably always going to be
30238 true in practice, since we've already established that this isn't a
30239 packed access. */
30240 return ((misalignment % align) == 0);
30243 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30244 is_packed);
30247 static void
30248 arm_conditional_register_usage (void)
30250 int regno;
30252 if (TARGET_THUMB1 && optimize_size)
30254 /* When optimizing for size on Thumb-1, it's better not
30255 to use the HI regs, because of the overhead of
30256 stacking them. */
30257 for (regno = FIRST_HI_REGNUM;
30258 regno <= LAST_HI_REGNUM; ++regno)
30259 fixed_regs[regno] = call_used_regs[regno] = 1;
30262 /* The link register can be clobbered by any branch insn,
30263 but we have no way to track that at present, so mark
30264 it as unavailable. */
30265 if (TARGET_THUMB1)
30266 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30268 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30270 /* VFPv3 registers are disabled when earlier VFP
30271 versions are selected due to the definition of
30272 LAST_VFP_REGNUM. */
30273 for (regno = FIRST_VFP_REGNUM;
30274 regno <= LAST_VFP_REGNUM; ++ regno)
30276 fixed_regs[regno] = 0;
30277 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30278 || regno >= FIRST_VFP_REGNUM + 32;
30282 if (TARGET_REALLY_IWMMXT)
30284 regno = FIRST_IWMMXT_GR_REGNUM;
30285 /* The 2002/10/09 revision of the XScale ABI has wCG0
30286 and wCG1 as call-preserved registers. The 2002/11/21
30287 revision changed this so that all wCG registers are
30288 scratch registers. */
30289 for (regno = FIRST_IWMMXT_GR_REGNUM;
30290 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30291 fixed_regs[regno] = 0;
30292 /* The XScale ABI has wR0 - wR9 as scratch registers,
30293 the rest as call-preserved registers. */
30294 for (regno = FIRST_IWMMXT_REGNUM;
30295 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30297 fixed_regs[regno] = 0;
30298 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30302 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30304 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30305 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30307 else if (TARGET_APCS_STACK)
30309 fixed_regs[10] = 1;
30310 call_used_regs[10] = 1;
30312 /* -mcaller-super-interworking reserves r11 for calls to
30313 _interwork_r11_call_via_rN(). Making the register global
30314 is an easy way of ensuring that it remains valid for all
30315 calls. */
30316 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30317 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30319 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30320 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30321 if (TARGET_CALLER_INTERWORKING)
30322 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30324 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30327 static reg_class_t
30328 arm_preferred_rename_class (reg_class_t rclass)
30330 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30331 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30332 and code size can be reduced. */
30333 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30334 return LO_REGS;
30335 else
30336 return NO_REGS;
30339 /* Compute the atrribute "length" of insn "*push_multi".
30340 So this function MUST be kept in sync with that insn pattern. */
30342 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30344 int i, regno, hi_reg;
30345 int num_saves = XVECLEN (parallel_op, 0);
30347 /* ARM mode. */
30348 if (TARGET_ARM)
30349 return 4;
30350 /* Thumb1 mode. */
30351 if (TARGET_THUMB1)
30352 return 2;
30354 /* Thumb2 mode. */
30355 regno = REGNO (first_op);
30356 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30357 for (i = 1; i < num_saves && !hi_reg; i++)
30359 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30360 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30363 if (!hi_reg)
30364 return 2;
30365 return 4;
30368 /* Compute the number of instructions emitted by output_move_double. */
30370 arm_count_output_move_double_insns (rtx *operands)
30372 int count;
30373 rtx ops[2];
30374 /* output_move_double may modify the operands array, so call it
30375 here on a copy of the array. */
30376 ops[0] = operands[0];
30377 ops[1] = operands[1];
30378 output_move_double (ops, false, &count);
30379 return count;
30383 vfp3_const_double_for_fract_bits (rtx operand)
30385 REAL_VALUE_TYPE r0;
30387 if (!CONST_DOUBLE_P (operand))
30388 return 0;
30390 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30391 if (exact_real_inverse (DFmode, &r0))
30393 if (exact_real_truncate (DFmode, &r0))
30395 HOST_WIDE_INT value = real_to_integer (&r0);
30396 value = value & 0xffffffff;
30397 if ((value != 0) && ( (value & (value - 1)) == 0))
30398 return int_log2 (value);
30401 return 0;
30405 vfp3_const_double_for_bits (rtx operand)
30407 REAL_VALUE_TYPE r0;
30409 if (!CONST_DOUBLE_P (operand))
30410 return 0;
30412 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30413 if (exact_real_truncate (DFmode, &r0))
30415 HOST_WIDE_INT value = real_to_integer (&r0);
30416 value = value & 0xffffffff;
30417 if ((value != 0) && ( (value & (value - 1)) == 0))
30418 return int_log2 (value);
30421 return 0;
30424 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30426 static void
30427 arm_pre_atomic_barrier (enum memmodel model)
30429 if (need_atomic_barrier_p (model, true))
30430 emit_insn (gen_memory_barrier ());
30433 static void
30434 arm_post_atomic_barrier (enum memmodel model)
30436 if (need_atomic_barrier_p (model, false))
30437 emit_insn (gen_memory_barrier ());
30440 /* Emit the load-exclusive and store-exclusive instructions.
30441 Use acquire and release versions if necessary. */
30443 static void
30444 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30446 rtx (*gen) (rtx, rtx);
30448 if (acq)
30450 switch (mode)
30452 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30453 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30454 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30455 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30456 default:
30457 gcc_unreachable ();
30460 else
30462 switch (mode)
30464 case QImode: gen = gen_arm_load_exclusiveqi; break;
30465 case HImode: gen = gen_arm_load_exclusivehi; break;
30466 case SImode: gen = gen_arm_load_exclusivesi; break;
30467 case DImode: gen = gen_arm_load_exclusivedi; break;
30468 default:
30469 gcc_unreachable ();
30473 emit_insn (gen (rval, mem));
30476 static void
30477 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30478 rtx mem, bool rel)
30480 rtx (*gen) (rtx, rtx, rtx);
30482 if (rel)
30484 switch (mode)
30486 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30487 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30488 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30489 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30490 default:
30491 gcc_unreachable ();
30494 else
30496 switch (mode)
30498 case QImode: gen = gen_arm_store_exclusiveqi; break;
30499 case HImode: gen = gen_arm_store_exclusivehi; break;
30500 case SImode: gen = gen_arm_store_exclusivesi; break;
30501 case DImode: gen = gen_arm_store_exclusivedi; break;
30502 default:
30503 gcc_unreachable ();
30507 emit_insn (gen (bval, rval, mem));
30510 /* Mark the previous jump instruction as unlikely. */
30512 static void
30513 emit_unlikely_jump (rtx insn)
30515 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30517 insn = emit_jump_insn (insn);
30518 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30521 /* Expand a compare and swap pattern. */
30523 void
30524 arm_expand_compare_and_swap (rtx operands[])
30526 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30527 machine_mode mode;
30528 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30530 bval = operands[0];
30531 rval = operands[1];
30532 mem = operands[2];
30533 oldval = operands[3];
30534 newval = operands[4];
30535 is_weak = operands[5];
30536 mod_s = operands[6];
30537 mod_f = operands[7];
30538 mode = GET_MODE (mem);
30540 /* Normally the succ memory model must be stronger than fail, but in the
30541 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30542 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30544 if (TARGET_HAVE_LDACQ
30545 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30546 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30547 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30549 switch (mode)
30551 case QImode:
30552 case HImode:
30553 /* For narrow modes, we're going to perform the comparison in SImode,
30554 so do the zero-extension now. */
30555 rval = gen_reg_rtx (SImode);
30556 oldval = convert_modes (SImode, mode, oldval, true);
30557 /* FALLTHRU */
30559 case SImode:
30560 /* Force the value into a register if needed. We waited until after
30561 the zero-extension above to do this properly. */
30562 if (!arm_add_operand (oldval, SImode))
30563 oldval = force_reg (SImode, oldval);
30564 break;
30566 case DImode:
30567 if (!cmpdi_operand (oldval, mode))
30568 oldval = force_reg (mode, oldval);
30569 break;
30571 default:
30572 gcc_unreachable ();
30575 switch (mode)
30577 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30578 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30579 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30580 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30581 default:
30582 gcc_unreachable ();
30585 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30587 if (mode == QImode || mode == HImode)
30588 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30590 /* In all cases, we arrange for success to be signaled by Z set.
30591 This arrangement allows for the boolean result to be used directly
30592 in a subsequent branch, post optimization. */
30593 x = gen_rtx_REG (CCmode, CC_REGNUM);
30594 x = gen_rtx_EQ (SImode, x, const0_rtx);
30595 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30598 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30599 another memory store between the load-exclusive and store-exclusive can
30600 reset the monitor from Exclusive to Open state. This means we must wait
30601 until after reload to split the pattern, lest we get a register spill in
30602 the middle of the atomic sequence. */
30604 void
30605 arm_split_compare_and_swap (rtx operands[])
30607 rtx rval, mem, oldval, newval, scratch;
30608 machine_mode mode;
30609 enum memmodel mod_s, mod_f;
30610 bool is_weak;
30611 rtx_code_label *label1, *label2;
30612 rtx x, cond;
30614 rval = operands[0];
30615 mem = operands[1];
30616 oldval = operands[2];
30617 newval = operands[3];
30618 is_weak = (operands[4] != const0_rtx);
30619 mod_s = (enum memmodel) INTVAL (operands[5]);
30620 mod_f = (enum memmodel) INTVAL (operands[6]);
30621 scratch = operands[7];
30622 mode = GET_MODE (mem);
30624 bool use_acquire = TARGET_HAVE_LDACQ
30625 && !(mod_s == MEMMODEL_RELAXED
30626 || mod_s == MEMMODEL_CONSUME
30627 || mod_s == MEMMODEL_RELEASE);
30629 bool use_release = TARGET_HAVE_LDACQ
30630 && !(mod_s == MEMMODEL_RELAXED
30631 || mod_s == MEMMODEL_CONSUME
30632 || mod_s == MEMMODEL_ACQUIRE);
30634 /* Checks whether a barrier is needed and emits one accordingly. */
30635 if (!(use_acquire || use_release))
30636 arm_pre_atomic_barrier (mod_s);
30638 label1 = NULL;
30639 if (!is_weak)
30641 label1 = gen_label_rtx ();
30642 emit_label (label1);
30644 label2 = gen_label_rtx ();
30646 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30648 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30649 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30650 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30651 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30652 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30654 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30656 /* Weak or strong, we want EQ to be true for success, so that we
30657 match the flags that we got from the compare above. */
30658 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30659 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30660 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30662 if (!is_weak)
30664 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30665 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30666 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30667 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30670 if (mod_f != MEMMODEL_RELAXED)
30671 emit_label (label2);
30673 /* Checks whether a barrier is needed and emits one accordingly. */
30674 if (!(use_acquire || use_release))
30675 arm_post_atomic_barrier (mod_s);
30677 if (mod_f == MEMMODEL_RELAXED)
30678 emit_label (label2);
30681 void
30682 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30683 rtx value, rtx model_rtx, rtx cond)
30685 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30686 machine_mode mode = GET_MODE (mem);
30687 machine_mode wmode = (mode == DImode ? DImode : SImode);
30688 rtx_code_label *label;
30689 rtx x;
30691 bool use_acquire = TARGET_HAVE_LDACQ
30692 && !(model == MEMMODEL_RELAXED
30693 || model == MEMMODEL_CONSUME
30694 || model == MEMMODEL_RELEASE);
30696 bool use_release = TARGET_HAVE_LDACQ
30697 && !(model == MEMMODEL_RELAXED
30698 || model == MEMMODEL_CONSUME
30699 || model == MEMMODEL_ACQUIRE);
30701 /* Checks whether a barrier is needed and emits one accordingly. */
30702 if (!(use_acquire || use_release))
30703 arm_pre_atomic_barrier (model);
30705 label = gen_label_rtx ();
30706 emit_label (label);
30708 if (new_out)
30709 new_out = gen_lowpart (wmode, new_out);
30710 if (old_out)
30711 old_out = gen_lowpart (wmode, old_out);
30712 else
30713 old_out = new_out;
30714 value = simplify_gen_subreg (wmode, value, mode, 0);
30716 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30718 switch (code)
30720 case SET:
30721 new_out = value;
30722 break;
30724 case NOT:
30725 x = gen_rtx_AND (wmode, old_out, value);
30726 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30727 x = gen_rtx_NOT (wmode, new_out);
30728 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30729 break;
30731 case MINUS:
30732 if (CONST_INT_P (value))
30734 value = GEN_INT (-INTVAL (value));
30735 code = PLUS;
30737 /* FALLTHRU */
30739 case PLUS:
30740 if (mode == DImode)
30742 /* DImode plus/minus need to clobber flags. */
30743 /* The adddi3 and subdi3 patterns are incorrectly written so that
30744 they require matching operands, even when we could easily support
30745 three operands. Thankfully, this can be fixed up post-splitting,
30746 as the individual add+adc patterns do accept three operands and
30747 post-reload cprop can make these moves go away. */
30748 emit_move_insn (new_out, old_out);
30749 if (code == PLUS)
30750 x = gen_adddi3 (new_out, new_out, value);
30751 else
30752 x = gen_subdi3 (new_out, new_out, value);
30753 emit_insn (x);
30754 break;
30756 /* FALLTHRU */
30758 default:
30759 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30760 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30761 break;
30764 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30765 use_release);
30767 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30768 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30770 /* Checks whether a barrier is needed and emits one accordingly. */
30771 if (!(use_acquire || use_release))
30772 arm_post_atomic_barrier (model);
30775 #define MAX_VECT_LEN 16
30777 struct expand_vec_perm_d
30779 rtx target, op0, op1;
30780 unsigned char perm[MAX_VECT_LEN];
30781 machine_mode vmode;
30782 unsigned char nelt;
30783 bool one_vector_p;
30784 bool testing_p;
30787 /* Generate a variable permutation. */
30789 static void
30790 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30792 machine_mode vmode = GET_MODE (target);
30793 bool one_vector_p = rtx_equal_p (op0, op1);
30795 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30796 gcc_checking_assert (GET_MODE (op0) == vmode);
30797 gcc_checking_assert (GET_MODE (op1) == vmode);
30798 gcc_checking_assert (GET_MODE (sel) == vmode);
30799 gcc_checking_assert (TARGET_NEON);
30801 if (one_vector_p)
30803 if (vmode == V8QImode)
30804 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30805 else
30806 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30808 else
30810 rtx pair;
30812 if (vmode == V8QImode)
30814 pair = gen_reg_rtx (V16QImode);
30815 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30816 pair = gen_lowpart (TImode, pair);
30817 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30819 else
30821 pair = gen_reg_rtx (OImode);
30822 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30823 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30828 void
30829 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30831 machine_mode vmode = GET_MODE (target);
30832 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30833 bool one_vector_p = rtx_equal_p (op0, op1);
30834 rtx rmask[MAX_VECT_LEN], mask;
30836 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30837 numbering of elements for big-endian, we must reverse the order. */
30838 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30840 /* The VTBL instruction does not use a modulo index, so we must take care
30841 of that ourselves. */
30842 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30843 for (i = 0; i < nelt; ++i)
30844 rmask[i] = mask;
30845 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30846 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30848 arm_expand_vec_perm_1 (target, op0, op1, sel);
30851 /* Generate or test for an insn that supports a constant permutation. */
30853 /* Recognize patterns for the VUZP insns. */
30855 static bool
30856 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30858 unsigned int i, odd, mask, nelt = d->nelt;
30859 rtx out0, out1, in0, in1, x;
30860 rtx (*gen)(rtx, rtx, rtx, rtx);
30862 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30863 return false;
30865 /* Note that these are little-endian tests. Adjust for big-endian later. */
30866 if (d->perm[0] == 0)
30867 odd = 0;
30868 else if (d->perm[0] == 1)
30869 odd = 1;
30870 else
30871 return false;
30872 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30874 for (i = 0; i < nelt; i++)
30876 unsigned elt = (i * 2 + odd) & mask;
30877 if (d->perm[i] != elt)
30878 return false;
30881 /* Success! */
30882 if (d->testing_p)
30883 return true;
30885 switch (d->vmode)
30887 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30888 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30889 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30890 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30891 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30892 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30893 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30894 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30895 default:
30896 gcc_unreachable ();
30899 in0 = d->op0;
30900 in1 = d->op1;
30901 if (BYTES_BIG_ENDIAN)
30903 x = in0, in0 = in1, in1 = x;
30904 odd = !odd;
30907 out0 = d->target;
30908 out1 = gen_reg_rtx (d->vmode);
30909 if (odd)
30910 x = out0, out0 = out1, out1 = x;
30912 emit_insn (gen (out0, in0, in1, out1));
30913 return true;
30916 /* Recognize patterns for the VZIP insns. */
30918 static bool
30919 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30921 unsigned int i, high, mask, nelt = d->nelt;
30922 rtx out0, out1, in0, in1, x;
30923 rtx (*gen)(rtx, rtx, rtx, rtx);
30925 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30926 return false;
30928 /* Note that these are little-endian tests. Adjust for big-endian later. */
30929 high = nelt / 2;
30930 if (d->perm[0] == high)
30932 else if (d->perm[0] == 0)
30933 high = 0;
30934 else
30935 return false;
30936 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30938 for (i = 0; i < nelt / 2; i++)
30940 unsigned elt = (i + high) & mask;
30941 if (d->perm[i * 2] != elt)
30942 return false;
30943 elt = (elt + nelt) & mask;
30944 if (d->perm[i * 2 + 1] != elt)
30945 return false;
30948 /* Success! */
30949 if (d->testing_p)
30950 return true;
30952 switch (d->vmode)
30954 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30955 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30956 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30957 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30958 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30959 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30960 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30961 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30962 default:
30963 gcc_unreachable ();
30966 in0 = d->op0;
30967 in1 = d->op1;
30968 if (BYTES_BIG_ENDIAN)
30970 x = in0, in0 = in1, in1 = x;
30971 high = !high;
30974 out0 = d->target;
30975 out1 = gen_reg_rtx (d->vmode);
30976 if (high)
30977 x = out0, out0 = out1, out1 = x;
30979 emit_insn (gen (out0, in0, in1, out1));
30980 return true;
30983 /* Recognize patterns for the VREV insns. */
30985 static bool
30986 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30988 unsigned int i, j, diff, nelt = d->nelt;
30989 rtx (*gen)(rtx, rtx, rtx);
30991 if (!d->one_vector_p)
30992 return false;
30994 diff = d->perm[0];
30995 switch (diff)
30997 case 7:
30998 switch (d->vmode)
31000 case V16QImode: gen = gen_neon_vrev64v16qi; break;
31001 case V8QImode: gen = gen_neon_vrev64v8qi; break;
31002 default:
31003 return false;
31005 break;
31006 case 3:
31007 switch (d->vmode)
31009 case V16QImode: gen = gen_neon_vrev32v16qi; break;
31010 case V8QImode: gen = gen_neon_vrev32v8qi; break;
31011 case V8HImode: gen = gen_neon_vrev64v8hi; break;
31012 case V4HImode: gen = gen_neon_vrev64v4hi; break;
31013 default:
31014 return false;
31016 break;
31017 case 1:
31018 switch (d->vmode)
31020 case V16QImode: gen = gen_neon_vrev16v16qi; break;
31021 case V8QImode: gen = gen_neon_vrev16v8qi; break;
31022 case V8HImode: gen = gen_neon_vrev32v8hi; break;
31023 case V4HImode: gen = gen_neon_vrev32v4hi; break;
31024 case V4SImode: gen = gen_neon_vrev64v4si; break;
31025 case V2SImode: gen = gen_neon_vrev64v2si; break;
31026 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
31027 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
31028 default:
31029 return false;
31031 break;
31032 default:
31033 return false;
31036 for (i = 0; i < nelt ; i += diff + 1)
31037 for (j = 0; j <= diff; j += 1)
31039 /* This is guaranteed to be true as the value of diff
31040 is 7, 3, 1 and we should have enough elements in the
31041 queue to generate this. Getting a vector mask with a
31042 value of diff other than these values implies that
31043 something is wrong by the time we get here. */
31044 gcc_assert (i + j < nelt);
31045 if (d->perm[i + j] != i + diff - j)
31046 return false;
31049 /* Success! */
31050 if (d->testing_p)
31051 return true;
31053 /* ??? The third operand is an artifact of the builtin infrastructure
31054 and is ignored by the actual instruction. */
31055 emit_insn (gen (d->target, d->op0, const0_rtx));
31056 return true;
31059 /* Recognize patterns for the VTRN insns. */
31061 static bool
31062 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31064 unsigned int i, odd, mask, nelt = d->nelt;
31065 rtx out0, out1, in0, in1, x;
31066 rtx (*gen)(rtx, rtx, rtx, rtx);
31068 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31069 return false;
31071 /* Note that these are little-endian tests. Adjust for big-endian later. */
31072 if (d->perm[0] == 0)
31073 odd = 0;
31074 else if (d->perm[0] == 1)
31075 odd = 1;
31076 else
31077 return false;
31078 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31080 for (i = 0; i < nelt; i += 2)
31082 if (d->perm[i] != i + odd)
31083 return false;
31084 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31085 return false;
31088 /* Success! */
31089 if (d->testing_p)
31090 return true;
31092 switch (d->vmode)
31094 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31095 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
31096 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
31097 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
31098 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
31099 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
31100 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
31101 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
31102 default:
31103 gcc_unreachable ();
31106 in0 = d->op0;
31107 in1 = d->op1;
31108 if (BYTES_BIG_ENDIAN)
31110 x = in0, in0 = in1, in1 = x;
31111 odd = !odd;
31114 out0 = d->target;
31115 out1 = gen_reg_rtx (d->vmode);
31116 if (odd)
31117 x = out0, out0 = out1, out1 = x;
31119 emit_insn (gen (out0, in0, in1, out1));
31120 return true;
31123 /* Recognize patterns for the VEXT insns. */
31125 static bool
31126 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31128 unsigned int i, nelt = d->nelt;
31129 rtx (*gen) (rtx, rtx, rtx, rtx);
31130 rtx offset;
31132 unsigned int location;
31134 unsigned int next = d->perm[0] + 1;
31136 /* TODO: Handle GCC's numbering of elements for big-endian. */
31137 if (BYTES_BIG_ENDIAN)
31138 return false;
31140 /* Check if the extracted indexes are increasing by one. */
31141 for (i = 1; i < nelt; next++, i++)
31143 /* If we hit the most significant element of the 2nd vector in
31144 the previous iteration, no need to test further. */
31145 if (next == 2 * nelt)
31146 return false;
31148 /* If we are operating on only one vector: it could be a
31149 rotation. If there are only two elements of size < 64, let
31150 arm_evpc_neon_vrev catch it. */
31151 if (d->one_vector_p && (next == nelt))
31153 if ((nelt == 2) && (d->vmode != V2DImode))
31154 return false;
31155 else
31156 next = 0;
31159 if (d->perm[i] != next)
31160 return false;
31163 location = d->perm[0];
31165 switch (d->vmode)
31167 case V16QImode: gen = gen_neon_vextv16qi; break;
31168 case V8QImode: gen = gen_neon_vextv8qi; break;
31169 case V4HImode: gen = gen_neon_vextv4hi; break;
31170 case V8HImode: gen = gen_neon_vextv8hi; break;
31171 case V2SImode: gen = gen_neon_vextv2si; break;
31172 case V4SImode: gen = gen_neon_vextv4si; break;
31173 case V2SFmode: gen = gen_neon_vextv2sf; break;
31174 case V4SFmode: gen = gen_neon_vextv4sf; break;
31175 case V2DImode: gen = gen_neon_vextv2di; break;
31176 default:
31177 return false;
31180 /* Success! */
31181 if (d->testing_p)
31182 return true;
31184 offset = GEN_INT (location);
31185 emit_insn (gen (d->target, d->op0, d->op1, offset));
31186 return true;
31189 /* The NEON VTBL instruction is a fully variable permuation that's even
31190 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31191 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31192 can do slightly better by expanding this as a constant where we don't
31193 have to apply a mask. */
31195 static bool
31196 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31198 rtx rperm[MAX_VECT_LEN], sel;
31199 machine_mode vmode = d->vmode;
31200 unsigned int i, nelt = d->nelt;
31202 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31203 numbering of elements for big-endian, we must reverse the order. */
31204 if (BYTES_BIG_ENDIAN)
31205 return false;
31207 if (d->testing_p)
31208 return true;
31210 /* Generic code will try constant permutation twice. Once with the
31211 original mode and again with the elements lowered to QImode.
31212 So wait and don't do the selector expansion ourselves. */
31213 if (vmode != V8QImode && vmode != V16QImode)
31214 return false;
31216 for (i = 0; i < nelt; ++i)
31217 rperm[i] = GEN_INT (d->perm[i]);
31218 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31219 sel = force_reg (vmode, sel);
31221 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31222 return true;
31225 static bool
31226 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31228 /* Check if the input mask matches vext before reordering the
31229 operands. */
31230 if (TARGET_NEON)
31231 if (arm_evpc_neon_vext (d))
31232 return true;
31234 /* The pattern matching functions above are written to look for a small
31235 number to begin the sequence (0, 1, N/2). If we begin with an index
31236 from the second operand, we can swap the operands. */
31237 if (d->perm[0] >= d->nelt)
31239 unsigned i, nelt = d->nelt;
31240 rtx x;
31242 for (i = 0; i < nelt; ++i)
31243 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31245 x = d->op0;
31246 d->op0 = d->op1;
31247 d->op1 = x;
31250 if (TARGET_NEON)
31252 if (arm_evpc_neon_vuzp (d))
31253 return true;
31254 if (arm_evpc_neon_vzip (d))
31255 return true;
31256 if (arm_evpc_neon_vrev (d))
31257 return true;
31258 if (arm_evpc_neon_vtrn (d))
31259 return true;
31260 return arm_evpc_neon_vtbl (d);
31262 return false;
31265 /* Expand a vec_perm_const pattern. */
31267 bool
31268 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31270 struct expand_vec_perm_d d;
31271 int i, nelt, which;
31273 d.target = target;
31274 d.op0 = op0;
31275 d.op1 = op1;
31277 d.vmode = GET_MODE (target);
31278 gcc_assert (VECTOR_MODE_P (d.vmode));
31279 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31280 d.testing_p = false;
31282 for (i = which = 0; i < nelt; ++i)
31284 rtx e = XVECEXP (sel, 0, i);
31285 int ei = INTVAL (e) & (2 * nelt - 1);
31286 which |= (ei < nelt ? 1 : 2);
31287 d.perm[i] = ei;
31290 switch (which)
31292 default:
31293 gcc_unreachable();
31295 case 3:
31296 d.one_vector_p = false;
31297 if (!rtx_equal_p (op0, op1))
31298 break;
31300 /* The elements of PERM do not suggest that only the first operand
31301 is used, but both operands are identical. Allow easier matching
31302 of the permutation by folding the permutation into the single
31303 input vector. */
31304 /* FALLTHRU */
31305 case 2:
31306 for (i = 0; i < nelt; ++i)
31307 d.perm[i] &= nelt - 1;
31308 d.op0 = op1;
31309 d.one_vector_p = true;
31310 break;
31312 case 1:
31313 d.op1 = op0;
31314 d.one_vector_p = true;
31315 break;
31318 return arm_expand_vec_perm_const_1 (&d);
31321 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31323 static bool
31324 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31325 const unsigned char *sel)
31327 struct expand_vec_perm_d d;
31328 unsigned int i, nelt, which;
31329 bool ret;
31331 d.vmode = vmode;
31332 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31333 d.testing_p = true;
31334 memcpy (d.perm, sel, nelt);
31336 /* Categorize the set of elements in the selector. */
31337 for (i = which = 0; i < nelt; ++i)
31339 unsigned char e = d.perm[i];
31340 gcc_assert (e < 2 * nelt);
31341 which |= (e < nelt ? 1 : 2);
31344 /* For all elements from second vector, fold the elements to first. */
31345 if (which == 2)
31346 for (i = 0; i < nelt; ++i)
31347 d.perm[i] -= nelt;
31349 /* Check whether the mask can be applied to the vector type. */
31350 d.one_vector_p = (which != 3);
31352 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31353 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31354 if (!d.one_vector_p)
31355 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31357 start_sequence ();
31358 ret = arm_expand_vec_perm_const_1 (&d);
31359 end_sequence ();
31361 return ret;
31364 bool
31365 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31367 /* If we are soft float and we do not have ldrd
31368 then all auto increment forms are ok. */
31369 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31370 return true;
31372 switch (code)
31374 /* Post increment and Pre Decrement are supported for all
31375 instruction forms except for vector forms. */
31376 case ARM_POST_INC:
31377 case ARM_PRE_DEC:
31378 if (VECTOR_MODE_P (mode))
31380 if (code != ARM_PRE_DEC)
31381 return true;
31382 else
31383 return false;
31386 return true;
31388 case ARM_POST_DEC:
31389 case ARM_PRE_INC:
31390 /* Without LDRD and mode size greater than
31391 word size, there is no point in auto-incrementing
31392 because ldm and stm will not have these forms. */
31393 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31394 return false;
31396 /* Vector and floating point modes do not support
31397 these auto increment forms. */
31398 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31399 return false;
31401 return true;
31403 default:
31404 return false;
31408 return false;
31411 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31412 on ARM, since we know that shifts by negative amounts are no-ops.
31413 Additionally, the default expansion code is not available or suitable
31414 for post-reload insn splits (this can occur when the register allocator
31415 chooses not to do a shift in NEON).
31417 This function is used in both initial expand and post-reload splits, and
31418 handles all kinds of 64-bit shifts.
31420 Input requirements:
31421 - It is safe for the input and output to be the same register, but
31422 early-clobber rules apply for the shift amount and scratch registers.
31423 - Shift by register requires both scratch registers. In all other cases
31424 the scratch registers may be NULL.
31425 - Ashiftrt by a register also clobbers the CC register. */
31426 void
31427 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31428 rtx amount, rtx scratch1, rtx scratch2)
31430 rtx out_high = gen_highpart (SImode, out);
31431 rtx out_low = gen_lowpart (SImode, out);
31432 rtx in_high = gen_highpart (SImode, in);
31433 rtx in_low = gen_lowpart (SImode, in);
31435 /* Terminology:
31436 in = the register pair containing the input value.
31437 out = the destination register pair.
31438 up = the high- or low-part of each pair.
31439 down = the opposite part to "up".
31440 In a shift, we can consider bits to shift from "up"-stream to
31441 "down"-stream, so in a left-shift "up" is the low-part and "down"
31442 is the high-part of each register pair. */
31444 rtx out_up = code == ASHIFT ? out_low : out_high;
31445 rtx out_down = code == ASHIFT ? out_high : out_low;
31446 rtx in_up = code == ASHIFT ? in_low : in_high;
31447 rtx in_down = code == ASHIFT ? in_high : in_low;
31449 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31450 gcc_assert (out
31451 && (REG_P (out) || GET_CODE (out) == SUBREG)
31452 && GET_MODE (out) == DImode);
31453 gcc_assert (in
31454 && (REG_P (in) || GET_CODE (in) == SUBREG)
31455 && GET_MODE (in) == DImode);
31456 gcc_assert (amount
31457 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31458 && GET_MODE (amount) == SImode)
31459 || CONST_INT_P (amount)));
31460 gcc_assert (scratch1 == NULL
31461 || (GET_CODE (scratch1) == SCRATCH)
31462 || (GET_MODE (scratch1) == SImode
31463 && REG_P (scratch1)));
31464 gcc_assert (scratch2 == NULL
31465 || (GET_CODE (scratch2) == SCRATCH)
31466 || (GET_MODE (scratch2) == SImode
31467 && REG_P (scratch2)));
31468 gcc_assert (!REG_P (out) || !REG_P (amount)
31469 || !HARD_REGISTER_P (out)
31470 || (REGNO (out) != REGNO (amount)
31471 && REGNO (out) + 1 != REGNO (amount)));
31473 /* Macros to make following code more readable. */
31474 #define SUB_32(DEST,SRC) \
31475 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31476 #define RSB_32(DEST,SRC) \
31477 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31478 #define SUB_S_32(DEST,SRC) \
31479 gen_addsi3_compare0 ((DEST), (SRC), \
31480 GEN_INT (-32))
31481 #define SET(DEST,SRC) \
31482 gen_rtx_SET (SImode, (DEST), (SRC))
31483 #define SHIFT(CODE,SRC,AMOUNT) \
31484 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31485 #define LSHIFT(CODE,SRC,AMOUNT) \
31486 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31487 SImode, (SRC), (AMOUNT))
31488 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31489 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31490 SImode, (SRC), (AMOUNT))
31491 #define ORR(A,B) \
31492 gen_rtx_IOR (SImode, (A), (B))
31493 #define BRANCH(COND,LABEL) \
31494 gen_arm_cond_branch ((LABEL), \
31495 gen_rtx_ ## COND (CCmode, cc_reg, \
31496 const0_rtx), \
31497 cc_reg)
31499 /* Shifts by register and shifts by constant are handled separately. */
31500 if (CONST_INT_P (amount))
31502 /* We have a shift-by-constant. */
31504 /* First, handle out-of-range shift amounts.
31505 In both cases we try to match the result an ARM instruction in a
31506 shift-by-register would give. This helps reduce execution
31507 differences between optimization levels, but it won't stop other
31508 parts of the compiler doing different things. This is "undefined
31509 behaviour, in any case. */
31510 if (INTVAL (amount) <= 0)
31511 emit_insn (gen_movdi (out, in));
31512 else if (INTVAL (amount) >= 64)
31514 if (code == ASHIFTRT)
31516 rtx const31_rtx = GEN_INT (31);
31517 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31518 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31520 else
31521 emit_insn (gen_movdi (out, const0_rtx));
31524 /* Now handle valid shifts. */
31525 else if (INTVAL (amount) < 32)
31527 /* Shifts by a constant less than 32. */
31528 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31530 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31531 emit_insn (SET (out_down,
31532 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31533 out_down)));
31534 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31536 else
31538 /* Shifts by a constant greater than 31. */
31539 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31541 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31542 if (code == ASHIFTRT)
31543 emit_insn (gen_ashrsi3 (out_up, in_up,
31544 GEN_INT (31)));
31545 else
31546 emit_insn (SET (out_up, const0_rtx));
31549 else
31551 /* We have a shift-by-register. */
31552 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31554 /* This alternative requires the scratch registers. */
31555 gcc_assert (scratch1 && REG_P (scratch1));
31556 gcc_assert (scratch2 && REG_P (scratch2));
31558 /* We will need the values "amount-32" and "32-amount" later.
31559 Swapping them around now allows the later code to be more general. */
31560 switch (code)
31562 case ASHIFT:
31563 emit_insn (SUB_32 (scratch1, amount));
31564 emit_insn (RSB_32 (scratch2, amount));
31565 break;
31566 case ASHIFTRT:
31567 emit_insn (RSB_32 (scratch1, amount));
31568 /* Also set CC = amount > 32. */
31569 emit_insn (SUB_S_32 (scratch2, amount));
31570 break;
31571 case LSHIFTRT:
31572 emit_insn (RSB_32 (scratch1, amount));
31573 emit_insn (SUB_32 (scratch2, amount));
31574 break;
31575 default:
31576 gcc_unreachable ();
31579 /* Emit code like this:
31581 arithmetic-left:
31582 out_down = in_down << amount;
31583 out_down = (in_up << (amount - 32)) | out_down;
31584 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31585 out_up = in_up << amount;
31587 arithmetic-right:
31588 out_down = in_down >> amount;
31589 out_down = (in_up << (32 - amount)) | out_down;
31590 if (amount < 32)
31591 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31592 out_up = in_up << amount;
31594 logical-right:
31595 out_down = in_down >> amount;
31596 out_down = (in_up << (32 - amount)) | out_down;
31597 if (amount < 32)
31598 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31599 out_up = in_up << amount;
31601 The ARM and Thumb2 variants are the same but implemented slightly
31602 differently. If this were only called during expand we could just
31603 use the Thumb2 case and let combine do the right thing, but this
31604 can also be called from post-reload splitters. */
31606 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31608 if (!TARGET_THUMB2)
31610 /* Emit code for ARM mode. */
31611 emit_insn (SET (out_down,
31612 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31613 if (code == ASHIFTRT)
31615 rtx_code_label *done_label = gen_label_rtx ();
31616 emit_jump_insn (BRANCH (LT, done_label));
31617 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31618 out_down)));
31619 emit_label (done_label);
31621 else
31622 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31623 out_down)));
31625 else
31627 /* Emit code for Thumb2 mode.
31628 Thumb2 can't do shift and or in one insn. */
31629 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31630 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31632 if (code == ASHIFTRT)
31634 rtx_code_label *done_label = gen_label_rtx ();
31635 emit_jump_insn (BRANCH (LT, done_label));
31636 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31637 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31638 emit_label (done_label);
31640 else
31642 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31643 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31647 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31650 #undef SUB_32
31651 #undef RSB_32
31652 #undef SUB_S_32
31653 #undef SET
31654 #undef SHIFT
31655 #undef LSHIFT
31656 #undef REV_LSHIFT
31657 #undef ORR
31658 #undef BRANCH
31662 /* Returns true if a valid comparison operation and makes
31663 the operands in a form that is valid. */
31664 bool
31665 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31667 enum rtx_code code = GET_CODE (*comparison);
31668 int code_int;
31669 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31670 ? GET_MODE (*op2) : GET_MODE (*op1);
31672 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31674 if (code == UNEQ || code == LTGT)
31675 return false;
31677 code_int = (int)code;
31678 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31679 PUT_CODE (*comparison, (enum rtx_code)code_int);
31681 switch (mode)
31683 case SImode:
31684 if (!arm_add_operand (*op1, mode))
31685 *op1 = force_reg (mode, *op1);
31686 if (!arm_add_operand (*op2, mode))
31687 *op2 = force_reg (mode, *op2);
31688 return true;
31690 case DImode:
31691 if (!cmpdi_operand (*op1, mode))
31692 *op1 = force_reg (mode, *op1);
31693 if (!cmpdi_operand (*op2, mode))
31694 *op2 = force_reg (mode, *op2);
31695 return true;
31697 case SFmode:
31698 case DFmode:
31699 if (!arm_float_compare_operand (*op1, mode))
31700 *op1 = force_reg (mode, *op1);
31701 if (!arm_float_compare_operand (*op2, mode))
31702 *op2 = force_reg (mode, *op2);
31703 return true;
31704 default:
31705 break;
31708 return false;
31712 /* Maximum number of instructions to set block of memory. */
31713 static int
31714 arm_block_set_max_insns (void)
31716 if (optimize_function_for_size_p (cfun))
31717 return 4;
31718 else
31719 return current_tune->max_insns_inline_memset;
31722 /* Return TRUE if it's profitable to set block of memory for
31723 non-vectorized case. VAL is the value to set the memory
31724 with. LENGTH is the number of bytes to set. ALIGN is the
31725 alignment of the destination memory in bytes. UNALIGNED_P
31726 is TRUE if we can only set the memory with instructions
31727 meeting alignment requirements. USE_STRD_P is TRUE if we
31728 can use strd to set the memory. */
31729 static bool
31730 arm_block_set_non_vect_profit_p (rtx val,
31731 unsigned HOST_WIDE_INT length,
31732 unsigned HOST_WIDE_INT align,
31733 bool unaligned_p, bool use_strd_p)
31735 int num = 0;
31736 /* For leftovers in bytes of 0-7, we can set the memory block using
31737 strb/strh/str with minimum instruction number. */
31738 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31740 if (unaligned_p)
31742 num = arm_const_inline_cost (SET, val);
31743 num += length / align + length % align;
31745 else if (use_strd_p)
31747 num = arm_const_double_inline_cost (val);
31748 num += (length >> 3) + leftover[length & 7];
31750 else
31752 num = arm_const_inline_cost (SET, val);
31753 num += (length >> 2) + leftover[length & 3];
31756 /* We may be able to combine last pair STRH/STRB into a single STR
31757 by shifting one byte back. */
31758 if (unaligned_access && length > 3 && (length & 3) == 3)
31759 num--;
31761 return (num <= arm_block_set_max_insns ());
31764 /* Return TRUE if it's profitable to set block of memory for
31765 vectorized case. LENGTH is the number of bytes to set.
31766 ALIGN is the alignment of destination memory in bytes.
31767 MODE is the vector mode used to set the memory. */
31768 static bool
31769 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31770 unsigned HOST_WIDE_INT align,
31771 machine_mode mode)
31773 int num;
31774 bool unaligned_p = ((align & 3) != 0);
31775 unsigned int nelt = GET_MODE_NUNITS (mode);
31777 /* Instruction loading constant value. */
31778 num = 1;
31779 /* Instructions storing the memory. */
31780 num += (length + nelt - 1) / nelt;
31781 /* Instructions adjusting the address expression. Only need to
31782 adjust address expression if it's 4 bytes aligned and bytes
31783 leftover can only be stored by mis-aligned store instruction. */
31784 if (!unaligned_p && (length & 3) != 0)
31785 num++;
31787 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31788 if (!unaligned_p && mode == V16QImode)
31789 num--;
31791 return (num <= arm_block_set_max_insns ());
31794 /* Set a block of memory using vectorization instructions for the
31795 unaligned case. We fill the first LENGTH bytes of the memory
31796 area starting from DSTBASE with byte constant VALUE. ALIGN is
31797 the alignment requirement of memory. Return TRUE if succeeded. */
31798 static bool
31799 arm_block_set_unaligned_vect (rtx dstbase,
31800 unsigned HOST_WIDE_INT length,
31801 unsigned HOST_WIDE_INT value,
31802 unsigned HOST_WIDE_INT align)
31804 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31805 rtx dst, mem;
31806 rtx val_elt, val_vec, reg;
31807 rtx rval[MAX_VECT_LEN];
31808 rtx (*gen_func) (rtx, rtx);
31809 machine_mode mode;
31810 unsigned HOST_WIDE_INT v = value;
31812 gcc_assert ((align & 0x3) != 0);
31813 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31814 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31815 if (length >= nelt_v16)
31817 mode = V16QImode;
31818 gen_func = gen_movmisalignv16qi;
31820 else
31822 mode = V8QImode;
31823 gen_func = gen_movmisalignv8qi;
31825 nelt_mode = GET_MODE_NUNITS (mode);
31826 gcc_assert (length >= nelt_mode);
31827 /* Skip if it isn't profitable. */
31828 if (!arm_block_set_vect_profit_p (length, align, mode))
31829 return false;
31831 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31832 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31834 v = sext_hwi (v, BITS_PER_WORD);
31835 val_elt = GEN_INT (v);
31836 for (j = 0; j < nelt_mode; j++)
31837 rval[j] = val_elt;
31839 reg = gen_reg_rtx (mode);
31840 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31841 /* Emit instruction loading the constant value. */
31842 emit_move_insn (reg, val_vec);
31844 /* Handle nelt_mode bytes in a vector. */
31845 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31847 emit_insn ((*gen_func) (mem, reg));
31848 if (i + 2 * nelt_mode <= length)
31849 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31852 /* If there are not less than nelt_v8 bytes leftover, we must be in
31853 V16QI mode. */
31854 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31856 /* Handle (8, 16) bytes leftover. */
31857 if (i + nelt_v8 < length)
31859 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31860 /* We are shifting bytes back, set the alignment accordingly. */
31861 if ((length & 1) != 0 && align >= 2)
31862 set_mem_align (mem, BITS_PER_UNIT);
31864 emit_insn (gen_movmisalignv16qi (mem, reg));
31866 /* Handle (0, 8] bytes leftover. */
31867 else if (i < length && i + nelt_v8 >= length)
31869 if (mode == V16QImode)
31871 reg = gen_lowpart (V8QImode, reg);
31872 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31874 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31875 + (nelt_mode - nelt_v8))));
31876 /* We are shifting bytes back, set the alignment accordingly. */
31877 if ((length & 1) != 0 && align >= 2)
31878 set_mem_align (mem, BITS_PER_UNIT);
31880 emit_insn (gen_movmisalignv8qi (mem, reg));
31883 return true;
31886 /* Set a block of memory using vectorization instructions for the
31887 aligned case. We fill the first LENGTH bytes of the memory area
31888 starting from DSTBASE with byte constant VALUE. ALIGN is the
31889 alignment requirement of memory. Return TRUE if succeeded. */
31890 static bool
31891 arm_block_set_aligned_vect (rtx dstbase,
31892 unsigned HOST_WIDE_INT length,
31893 unsigned HOST_WIDE_INT value,
31894 unsigned HOST_WIDE_INT align)
31896 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31897 rtx dst, addr, mem;
31898 rtx val_elt, val_vec, reg;
31899 rtx rval[MAX_VECT_LEN];
31900 machine_mode mode;
31901 unsigned HOST_WIDE_INT v = value;
31903 gcc_assert ((align & 0x3) == 0);
31904 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31905 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31906 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31907 mode = V16QImode;
31908 else
31909 mode = V8QImode;
31911 nelt_mode = GET_MODE_NUNITS (mode);
31912 gcc_assert (length >= nelt_mode);
31913 /* Skip if it isn't profitable. */
31914 if (!arm_block_set_vect_profit_p (length, align, mode))
31915 return false;
31917 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31919 v = sext_hwi (v, BITS_PER_WORD);
31920 val_elt = GEN_INT (v);
31921 for (j = 0; j < nelt_mode; j++)
31922 rval[j] = val_elt;
31924 reg = gen_reg_rtx (mode);
31925 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31926 /* Emit instruction loading the constant value. */
31927 emit_move_insn (reg, val_vec);
31929 i = 0;
31930 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31931 if (mode == V16QImode)
31933 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31934 emit_insn (gen_movmisalignv16qi (mem, reg));
31935 i += nelt_mode;
31936 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31937 if (i + nelt_v8 < length && i + nelt_v16 > length)
31939 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31940 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31941 /* We are shifting bytes back, set the alignment accordingly. */
31942 if ((length & 0x3) == 0)
31943 set_mem_align (mem, BITS_PER_UNIT * 4);
31944 else if ((length & 0x1) == 0)
31945 set_mem_align (mem, BITS_PER_UNIT * 2);
31946 else
31947 set_mem_align (mem, BITS_PER_UNIT);
31949 emit_insn (gen_movmisalignv16qi (mem, reg));
31950 return true;
31952 /* Fall through for bytes leftover. */
31953 mode = V8QImode;
31954 nelt_mode = GET_MODE_NUNITS (mode);
31955 reg = gen_lowpart (V8QImode, reg);
31958 /* Handle 8 bytes in a vector. */
31959 for (; (i + nelt_mode <= length); i += nelt_mode)
31961 addr = plus_constant (Pmode, dst, i);
31962 mem = adjust_automodify_address (dstbase, mode, addr, i);
31963 emit_move_insn (mem, reg);
31966 /* Handle single word leftover by shifting 4 bytes back. We can
31967 use aligned access for this case. */
31968 if (i + UNITS_PER_WORD == length)
31970 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31971 mem = adjust_automodify_address (dstbase, mode,
31972 addr, i - UNITS_PER_WORD);
31973 /* We are shifting 4 bytes back, set the alignment accordingly. */
31974 if (align > UNITS_PER_WORD)
31975 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31977 emit_move_insn (mem, reg);
31979 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31980 We have to use unaligned access for this case. */
31981 else if (i < length)
31983 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31984 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31985 /* We are shifting bytes back, set the alignment accordingly. */
31986 if ((length & 1) == 0)
31987 set_mem_align (mem, BITS_PER_UNIT * 2);
31988 else
31989 set_mem_align (mem, BITS_PER_UNIT);
31991 emit_insn (gen_movmisalignv8qi (mem, reg));
31994 return true;
31997 /* Set a block of memory using plain strh/strb instructions, only
31998 using instructions allowed by ALIGN on processor. We fill the
31999 first LENGTH bytes of the memory area starting from DSTBASE
32000 with byte constant VALUE. ALIGN is the alignment requirement
32001 of memory. */
32002 static bool
32003 arm_block_set_unaligned_non_vect (rtx dstbase,
32004 unsigned HOST_WIDE_INT length,
32005 unsigned HOST_WIDE_INT value,
32006 unsigned HOST_WIDE_INT align)
32008 unsigned int i;
32009 rtx dst, addr, mem;
32010 rtx val_exp, val_reg, reg;
32011 machine_mode mode;
32012 HOST_WIDE_INT v = value;
32014 gcc_assert (align == 1 || align == 2);
32016 if (align == 2)
32017 v |= (value << BITS_PER_UNIT);
32019 v = sext_hwi (v, BITS_PER_WORD);
32020 val_exp = GEN_INT (v);
32021 /* Skip if it isn't profitable. */
32022 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32023 align, true, false))
32024 return false;
32026 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32027 mode = (align == 2 ? HImode : QImode);
32028 val_reg = force_reg (SImode, val_exp);
32029 reg = gen_lowpart (mode, val_reg);
32031 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32033 addr = plus_constant (Pmode, dst, i);
32034 mem = adjust_automodify_address (dstbase, mode, addr, i);
32035 emit_move_insn (mem, reg);
32038 /* Handle single byte leftover. */
32039 if (i + 1 == length)
32041 reg = gen_lowpart (QImode, val_reg);
32042 addr = plus_constant (Pmode, dst, i);
32043 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32044 emit_move_insn (mem, reg);
32045 i++;
32048 gcc_assert (i == length);
32049 return true;
32052 /* Set a block of memory using plain strd/str/strh/strb instructions,
32053 to permit unaligned copies on processors which support unaligned
32054 semantics for those instructions. We fill the first LENGTH bytes
32055 of the memory area starting from DSTBASE with byte constant VALUE.
32056 ALIGN is the alignment requirement of memory. */
32057 static bool
32058 arm_block_set_aligned_non_vect (rtx dstbase,
32059 unsigned HOST_WIDE_INT length,
32060 unsigned HOST_WIDE_INT value,
32061 unsigned HOST_WIDE_INT align)
32063 unsigned int i;
32064 rtx dst, addr, mem;
32065 rtx val_exp, val_reg, reg;
32066 unsigned HOST_WIDE_INT v;
32067 bool use_strd_p;
32069 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32070 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32072 v = (value | (value << 8) | (value << 16) | (value << 24));
32073 if (length < UNITS_PER_WORD)
32074 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32076 if (use_strd_p)
32077 v |= (v << BITS_PER_WORD);
32078 else
32079 v = sext_hwi (v, BITS_PER_WORD);
32081 val_exp = GEN_INT (v);
32082 /* Skip if it isn't profitable. */
32083 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32084 align, false, use_strd_p))
32086 if (!use_strd_p)
32087 return false;
32089 /* Try without strd. */
32090 v = (v >> BITS_PER_WORD);
32091 v = sext_hwi (v, BITS_PER_WORD);
32092 val_exp = GEN_INT (v);
32093 use_strd_p = false;
32094 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32095 align, false, use_strd_p))
32096 return false;
32099 i = 0;
32100 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32101 /* Handle double words using strd if possible. */
32102 if (use_strd_p)
32104 val_reg = force_reg (DImode, val_exp);
32105 reg = val_reg;
32106 for (; (i + 8 <= length); i += 8)
32108 addr = plus_constant (Pmode, dst, i);
32109 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32110 emit_move_insn (mem, reg);
32113 else
32114 val_reg = force_reg (SImode, val_exp);
32116 /* Handle words. */
32117 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32118 for (; (i + 4 <= length); i += 4)
32120 addr = plus_constant (Pmode, dst, i);
32121 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32122 if ((align & 3) == 0)
32123 emit_move_insn (mem, reg);
32124 else
32125 emit_insn (gen_unaligned_storesi (mem, reg));
32128 /* Merge last pair of STRH and STRB into a STR if possible. */
32129 if (unaligned_access && i > 0 && (i + 3) == length)
32131 addr = plus_constant (Pmode, dst, i - 1);
32132 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32133 /* We are shifting one byte back, set the alignment accordingly. */
32134 if ((align & 1) == 0)
32135 set_mem_align (mem, BITS_PER_UNIT);
32137 /* Most likely this is an unaligned access, and we can't tell at
32138 compilation time. */
32139 emit_insn (gen_unaligned_storesi (mem, reg));
32140 return true;
32143 /* Handle half word leftover. */
32144 if (i + 2 <= length)
32146 reg = gen_lowpart (HImode, val_reg);
32147 addr = plus_constant (Pmode, dst, i);
32148 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32149 if ((align & 1) == 0)
32150 emit_move_insn (mem, reg);
32151 else
32152 emit_insn (gen_unaligned_storehi (mem, reg));
32154 i += 2;
32157 /* Handle single byte leftover. */
32158 if (i + 1 == length)
32160 reg = gen_lowpart (QImode, val_reg);
32161 addr = plus_constant (Pmode, dst, i);
32162 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32163 emit_move_insn (mem, reg);
32166 return true;
32169 /* Set a block of memory using vectorization instructions for both
32170 aligned and unaligned cases. We fill the first LENGTH bytes of
32171 the memory area starting from DSTBASE with byte constant VALUE.
32172 ALIGN is the alignment requirement of memory. */
32173 static bool
32174 arm_block_set_vect (rtx dstbase,
32175 unsigned HOST_WIDE_INT length,
32176 unsigned HOST_WIDE_INT value,
32177 unsigned HOST_WIDE_INT align)
32179 /* Check whether we need to use unaligned store instruction. */
32180 if (((align & 3) != 0 || (length & 3) != 0)
32181 /* Check whether unaligned store instruction is available. */
32182 && (!unaligned_access || BYTES_BIG_ENDIAN))
32183 return false;
32185 if ((align & 3) == 0)
32186 return arm_block_set_aligned_vect (dstbase, length, value, align);
32187 else
32188 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32191 /* Expand string store operation. Firstly we try to do that by using
32192 vectorization instructions, then try with ARM unaligned access and
32193 double-word store if profitable. OPERANDS[0] is the destination,
32194 OPERANDS[1] is the number of bytes, operands[2] is the value to
32195 initialize the memory, OPERANDS[3] is the known alignment of the
32196 destination. */
32197 bool
32198 arm_gen_setmem (rtx *operands)
32200 rtx dstbase = operands[0];
32201 unsigned HOST_WIDE_INT length;
32202 unsigned HOST_WIDE_INT value;
32203 unsigned HOST_WIDE_INT align;
32205 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32206 return false;
32208 length = UINTVAL (operands[1]);
32209 if (length > 64)
32210 return false;
32212 value = (UINTVAL (operands[2]) & 0xFF);
32213 align = UINTVAL (operands[3]);
32214 if (TARGET_NEON && length >= 8
32215 && current_tune->string_ops_prefer_neon
32216 && arm_block_set_vect (dstbase, length, value, align))
32217 return true;
32219 if (!unaligned_access && (align & 3) != 0)
32220 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32222 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32225 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32227 static unsigned HOST_WIDE_INT
32228 arm_asan_shadow_offset (void)
32230 return (unsigned HOST_WIDE_INT) 1 << 29;
32234 /* This is a temporary fix for PR60655. Ideally we need
32235 to handle most of these cases in the generic part but
32236 currently we reject minus (..) (sym_ref). We try to
32237 ameliorate the case with minus (sym_ref1) (sym_ref2)
32238 where they are in the same section. */
32240 static bool
32241 arm_const_not_ok_for_debug_p (rtx p)
32243 tree decl_op0 = NULL;
32244 tree decl_op1 = NULL;
32246 if (GET_CODE (p) == MINUS)
32248 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32250 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32251 if (decl_op1
32252 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32253 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32255 if ((TREE_CODE (decl_op1) == VAR_DECL
32256 || TREE_CODE (decl_op1) == CONST_DECL)
32257 && (TREE_CODE (decl_op0) == VAR_DECL
32258 || TREE_CODE (decl_op0) == CONST_DECL))
32259 return (get_variable_section (decl_op1, false)
32260 != get_variable_section (decl_op0, false));
32262 if (TREE_CODE (decl_op1) == LABEL_DECL
32263 && TREE_CODE (decl_op0) == LABEL_DECL)
32264 return (DECL_CONTEXT (decl_op1)
32265 != DECL_CONTEXT (decl_op0));
32268 return true;
32272 return false;
32275 static void
32276 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32278 const unsigned ARM_FE_INVALID = 1;
32279 const unsigned ARM_FE_DIVBYZERO = 2;
32280 const unsigned ARM_FE_OVERFLOW = 4;
32281 const unsigned ARM_FE_UNDERFLOW = 8;
32282 const unsigned ARM_FE_INEXACT = 16;
32283 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32284 | ARM_FE_DIVBYZERO
32285 | ARM_FE_OVERFLOW
32286 | ARM_FE_UNDERFLOW
32287 | ARM_FE_INEXACT);
32288 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32289 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32290 tree new_fenv_var, reload_fenv, restore_fnenv;
32291 tree update_call, atomic_feraiseexcept, hold_fnclex;
32293 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32294 return;
32296 /* Generate the equivalent of :
32297 unsigned int fenv_var;
32298 fenv_var = __builtin_arm_get_fpscr ();
32300 unsigned int masked_fenv;
32301 masked_fenv = fenv_var & mask;
32303 __builtin_arm_set_fpscr (masked_fenv); */
32305 fenv_var = create_tmp_var (unsigned_type_node, NULL);
32306 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32307 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32308 mask = build_int_cst (unsigned_type_node,
32309 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32310 | ARM_FE_ALL_EXCEPT));
32311 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32312 fenv_var, build_call_expr (get_fpscr, 0));
32313 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32314 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32315 *hold = build2 (COMPOUND_EXPR, void_type_node,
32316 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32317 hold_fnclex);
32319 /* Store the value of masked_fenv to clear the exceptions:
32320 __builtin_arm_set_fpscr (masked_fenv); */
32322 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32324 /* Generate the equivalent of :
32325 unsigned int new_fenv_var;
32326 new_fenv_var = __builtin_arm_get_fpscr ();
32328 __builtin_arm_set_fpscr (fenv_var);
32330 __atomic_feraiseexcept (new_fenv_var); */
32332 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32333 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32334 build_call_expr (get_fpscr, 0));
32335 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32336 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32337 update_call = build_call_expr (atomic_feraiseexcept, 1,
32338 fold_convert (integer_type_node, new_fenv_var));
32339 *update = build2 (COMPOUND_EXPR, void_type_node,
32340 build2 (COMPOUND_EXPR, void_type_node,
32341 reload_fenv, restore_fnenv), update_call);
32344 /* return TRUE if x is a reference to a value in a constant pool */
32345 extern bool
32346 arm_is_constant_pool_ref (rtx x)
32348 return (MEM_P (x)
32349 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32350 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32353 #include "gt-arm.h"