[ARM Refactor Builtins: 1/8] Remove arm_neon.h's "Magic Words"
[official-gcc.git] / gcc / config / arm / arm.c
blob36ce3877326156101bde809eda8e84debef0cb8c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
91 void (*arm_lang_output_object_attributes_hook)(void);
93 struct four_ints
95 int i[4];
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
121 static void arm_print_operand (FILE *, rtx, int);
122 static void arm_print_operand_address (FILE *, rtx);
123 static bool arm_print_operand_punct_valid_p (unsigned char code);
124 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
125 static arm_cc get_arm_condition_code (rtx);
126 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
127 static const char *output_multi_immediate (rtx *, const char *, const char *,
128 int, HOST_WIDE_INT);
129 static const char *shift_op (rtx, HOST_WIDE_INT *);
130 static struct machine_function *arm_init_machine_status (void);
131 static void thumb_exit (FILE *, int);
132 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
133 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
134 static Mnode *add_minipool_forward_ref (Mfix *);
135 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
136 static Mnode *add_minipool_backward_ref (Mfix *);
137 static void assign_minipool_offsets (Mfix *);
138 static void arm_print_value (FILE *, rtx);
139 static void dump_minipool (rtx_insn *);
140 static int arm_barrier_cost (rtx);
141 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
142 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
143 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
144 machine_mode, rtx);
145 static void arm_reorg (void);
146 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
148 static unsigned long arm_compute_save_reg_mask (void);
149 static unsigned long arm_isr_value (tree);
150 static unsigned long arm_compute_func_type (void);
151 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
153 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
156 #endif
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
159 static int arm_comp_type_attributes (const_tree, const_tree);
160 static void arm_set_default_type_attributes (tree);
161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence);
166 static int optimal_immediate_sequence_1 (enum rtx_code code,
167 unsigned HOST_WIDE_INT val,
168 struct four_ints *return_sequence,
169 int i);
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree, tree);
172 static machine_mode arm_promote_function_mode (const_tree,
173 machine_mode, int *,
174 const_tree, int);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 static rtx arm_function_value (const_tree, const_tree, bool);
177 static rtx arm_libcall_value_1 (machine_mode);
178 static rtx arm_libcall_value (machine_mode, const_rtx);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
182 tree);
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
185 static bool arm_legitimate_constant_p (machine_mode, rtx);
186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
196 static void arm_init_builtins (void);
197 static void arm_init_iwmmxt_builtins (void);
198 static rtx safe_vector_operand (rtx, machine_mode);
199 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
200 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
201 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
202 static tree arm_builtin_decl (unsigned, bool);
203 static void emit_constant_insn (rtx cond, rtx pattern);
204 static rtx_insn *emit_set_insn (rtx, rtx);
205 static rtx emit_multi_reg_push (unsigned long, unsigned long);
206 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
207 tree, bool);
208 static rtx arm_function_arg (cumulative_args_t, machine_mode,
209 const_tree, bool);
210 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
211 const_tree, bool);
212 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
213 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
214 const_tree);
215 static rtx aapcs_libcall_value (machine_mode);
216 static int aapcs_select_return_coproc (const_tree, const_tree);
218 #ifdef OBJECT_FORMAT_ELF
219 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
220 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
221 #endif
222 #ifndef ARM_PE
223 static void arm_encode_section_info (tree, rtx, int);
224 #endif
226 static void arm_file_end (void);
227 static void arm_file_start (void);
229 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
230 tree, int *, int);
231 static bool arm_pass_by_reference (cumulative_args_t,
232 machine_mode, const_tree, bool);
233 static bool arm_promote_prototypes (const_tree);
234 static bool arm_default_short_enums (void);
235 static bool arm_align_anon_bitfield (void);
236 static bool arm_return_in_msb (const_tree);
237 static bool arm_must_pass_in_stack (machine_mode, const_tree);
238 static bool arm_return_in_memory (const_tree, const_tree);
239 #if ARM_UNWIND_INFO
240 static void arm_unwind_emit (FILE *, rtx_insn *);
241 static bool arm_output_ttype (rtx);
242 static void arm_asm_emit_except_personality (rtx);
243 static void arm_asm_init_sections (void);
244 #endif
245 static rtx arm_dwarf_register_span (rtx);
247 static tree arm_cxx_guard_type (void);
248 static bool arm_cxx_guard_mask_bit (void);
249 static tree arm_get_cookie_size (tree);
250 static bool arm_cookie_has_size (void);
251 static bool arm_cxx_cdtor_returns_this (void);
252 static bool arm_cxx_key_method_may_be_inline (void);
253 static void arm_cxx_determine_class_data_visibility (tree);
254 static bool arm_cxx_class_data_always_comdat (void);
255 static bool arm_cxx_use_aeabi_atexit (void);
256 static void arm_init_libfuncs (void);
257 static tree arm_build_builtin_va_list (void);
258 static void arm_expand_builtin_va_start (tree, rtx);
259 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static void arm_option_override (void);
261 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
262 static bool arm_cannot_copy_insn_p (rtx_insn *);
263 static int arm_issue_rate (void);
264 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
265 static bool arm_output_addr_const_extra (FILE *, rtx);
266 static bool arm_allocate_stack_slots_for_args (void);
267 static bool arm_warn_func_return (tree);
268 static const char *arm_invalid_parameter_type (const_tree t);
269 static const char *arm_invalid_return_type (const_tree t);
270 static tree arm_promoted_type (const_tree t);
271 static tree arm_convert_to_type (tree type, tree expr);
272 static bool arm_scalar_mode_supported_p (machine_mode);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx, tree, rtx);
277 static rtx arm_trampoline_adjust_address (rtx);
278 static rtx arm_pic_static_addr (rtx orig, rtx reg);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (machine_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
294 static unsigned int arm_autovectorize_vector_sizes (void);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
300 const unsigned char *sel);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
317 /* Table of machine attributes. */
318 static const struct attribute_spec arm_attribute_table[] =
320 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
321 affects_type_identity } */
322 /* Function calls made to this symbol must be done indirectly, because
323 it may lie outside of the 26 bit addressing range of a normal function
324 call. */
325 { "long_call", 0, 0, false, true, true, NULL, false },
326 /* Whereas these functions are always known to reside within the 26 bit
327 addressing range. */
328 { "short_call", 0, 0, false, true, true, NULL, false },
329 /* Specify the procedure call conventions for a function. */
330 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
331 false },
332 /* Interrupt Service Routines have special prologue and epilogue requirements. */
333 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
334 false },
335 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
336 false },
337 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
338 false },
339 #ifdef ARM_PE
340 /* ARM/PE has three new attributes:
341 interfacearm - ?
342 dllexport - for exporting a function/variable that will live in a dll
343 dllimport - for importing a function/variable from a dll
345 Microsoft allows multiple declspecs in one __declspec, separating
346 them with spaces. We do NOT support this. Instead, use __declspec
347 multiple times.
349 { "dllimport", 0, 0, true, false, false, NULL, false },
350 { "dllexport", 0, 0, true, false, false, NULL, false },
351 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
352 false },
353 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
354 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
355 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
356 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
357 false },
358 #endif
359 { NULL, 0, 0, false, false, false, NULL, false }
362 /* Initialize the GCC target structure. */
363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
364 #undef TARGET_MERGE_DECL_ATTRIBUTES
365 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #endif
368 #undef TARGET_LEGITIMIZE_ADDRESS
369 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
371 #undef TARGET_LRA_P
372 #define TARGET_LRA_P arm_lra_p
374 #undef TARGET_ATTRIBUTE_TABLE
375 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
377 #undef TARGET_ASM_FILE_START
378 #define TARGET_ASM_FILE_START arm_file_start
379 #undef TARGET_ASM_FILE_END
380 #define TARGET_ASM_FILE_END arm_file_end
382 #undef TARGET_ASM_ALIGNED_SI_OP
383 #define TARGET_ASM_ALIGNED_SI_OP NULL
384 #undef TARGET_ASM_INTEGER
385 #define TARGET_ASM_INTEGER arm_assemble_integer
387 #undef TARGET_PRINT_OPERAND
388 #define TARGET_PRINT_OPERAND arm_print_operand
389 #undef TARGET_PRINT_OPERAND_ADDRESS
390 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
391 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
392 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
400 #undef TARGET_ASM_FUNCTION_EPILOGUE
401 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
403 #undef TARGET_OPTION_OVERRIDE
404 #define TARGET_OPTION_OVERRIDE arm_option_override
406 #undef TARGET_COMP_TYPE_ATTRIBUTES
407 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
409 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
410 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
412 #undef TARGET_SCHED_ADJUST_COST
413 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
415 #undef TARGET_SCHED_REORDER
416 #define TARGET_SCHED_REORDER arm_sched_reorder
418 #undef TARGET_REGISTER_MOVE_COST
419 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
421 #undef TARGET_MEMORY_MOVE_COST
422 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
424 #undef TARGET_ENCODE_SECTION_INFO
425 #ifdef ARM_PE
426 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
427 #else
428 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
429 #endif
431 #undef TARGET_STRIP_NAME_ENCODING
432 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
434 #undef TARGET_ASM_INTERNAL_LABEL
435 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
437 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
438 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
440 #undef TARGET_FUNCTION_VALUE
441 #define TARGET_FUNCTION_VALUE arm_function_value
443 #undef TARGET_LIBCALL_VALUE
444 #define TARGET_LIBCALL_VALUE arm_libcall_value
446 #undef TARGET_FUNCTION_VALUE_REGNO_P
447 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
449 #undef TARGET_ASM_OUTPUT_MI_THUNK
450 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
451 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
452 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
454 #undef TARGET_RTX_COSTS
455 #define TARGET_RTX_COSTS arm_rtx_costs
456 #undef TARGET_ADDRESS_COST
457 #define TARGET_ADDRESS_COST arm_address_cost
459 #undef TARGET_SHIFT_TRUNCATION_MASK
460 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
461 #undef TARGET_VECTOR_MODE_SUPPORTED_P
462 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
463 #undef TARGET_ARRAY_MODE_SUPPORTED_P
464 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
465 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
466 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
467 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
468 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
469 arm_autovectorize_vector_sizes
471 #undef TARGET_MACHINE_DEPENDENT_REORG
472 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
474 #undef TARGET_INIT_BUILTINS
475 #define TARGET_INIT_BUILTINS arm_init_builtins
476 #undef TARGET_EXPAND_BUILTIN
477 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
478 #undef TARGET_BUILTIN_DECL
479 #define TARGET_BUILTIN_DECL arm_builtin_decl
481 #undef TARGET_INIT_LIBFUNCS
482 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
484 #undef TARGET_PROMOTE_FUNCTION_MODE
485 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
486 #undef TARGET_PROMOTE_PROTOTYPES
487 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
488 #undef TARGET_PASS_BY_REFERENCE
489 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
490 #undef TARGET_ARG_PARTIAL_BYTES
491 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
492 #undef TARGET_FUNCTION_ARG
493 #define TARGET_FUNCTION_ARG arm_function_arg
494 #undef TARGET_FUNCTION_ARG_ADVANCE
495 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
496 #undef TARGET_FUNCTION_ARG_BOUNDARY
497 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
499 #undef TARGET_SETUP_INCOMING_VARARGS
500 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
502 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
503 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
505 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
506 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
507 #undef TARGET_TRAMPOLINE_INIT
508 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
509 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
510 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
512 #undef TARGET_WARN_FUNC_RETURN
513 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
515 #undef TARGET_DEFAULT_SHORT_ENUMS
516 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
518 #undef TARGET_ALIGN_ANON_BITFIELD
519 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
521 #undef TARGET_NARROW_VOLATILE_BITFIELD
522 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
524 #undef TARGET_CXX_GUARD_TYPE
525 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
527 #undef TARGET_CXX_GUARD_MASK_BIT
528 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
530 #undef TARGET_CXX_GET_COOKIE_SIZE
531 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
533 #undef TARGET_CXX_COOKIE_HAS_SIZE
534 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
536 #undef TARGET_CXX_CDTOR_RETURNS_THIS
537 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
539 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
540 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
542 #undef TARGET_CXX_USE_AEABI_ATEXIT
543 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
545 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
546 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
547 arm_cxx_determine_class_data_visibility
549 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
550 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
552 #undef TARGET_RETURN_IN_MSB
553 #define TARGET_RETURN_IN_MSB arm_return_in_msb
555 #undef TARGET_RETURN_IN_MEMORY
556 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
558 #undef TARGET_MUST_PASS_IN_STACK
559 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
561 #if ARM_UNWIND_INFO
562 #undef TARGET_ASM_UNWIND_EMIT
563 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
565 /* EABI unwinding tables use a different format for the typeinfo tables. */
566 #undef TARGET_ASM_TTYPE
567 #define TARGET_ASM_TTYPE arm_output_ttype
569 #undef TARGET_ARM_EABI_UNWINDER
570 #define TARGET_ARM_EABI_UNWINDER true
572 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
573 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
575 #undef TARGET_ASM_INIT_SECTIONS
576 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
577 #endif /* ARM_UNWIND_INFO */
579 #undef TARGET_DWARF_REGISTER_SPAN
580 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
582 #undef TARGET_CANNOT_COPY_INSN_P
583 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
585 #ifdef HAVE_AS_TLS
586 #undef TARGET_HAVE_TLS
587 #define TARGET_HAVE_TLS true
588 #endif
590 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
591 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
593 #undef TARGET_LEGITIMATE_CONSTANT_P
594 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
596 #undef TARGET_CANNOT_FORCE_CONST_MEM
597 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
599 #undef TARGET_MAX_ANCHOR_OFFSET
600 #define TARGET_MAX_ANCHOR_OFFSET 4095
602 /* The minimum is set such that the total size of the block
603 for a particular anchor is -4088 + 1 + 4095 bytes, which is
604 divisible by eight, ensuring natural spacing of anchors. */
605 #undef TARGET_MIN_ANCHOR_OFFSET
606 #define TARGET_MIN_ANCHOR_OFFSET -4088
608 #undef TARGET_SCHED_ISSUE_RATE
609 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
611 #undef TARGET_MANGLE_TYPE
612 #define TARGET_MANGLE_TYPE arm_mangle_type
614 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
615 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
617 #undef TARGET_BUILD_BUILTIN_VA_LIST
618 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
619 #undef TARGET_EXPAND_BUILTIN_VA_START
620 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
621 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
622 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
624 #ifdef HAVE_AS_TLS
625 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
626 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
627 #endif
629 #undef TARGET_LEGITIMATE_ADDRESS_P
630 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
632 #undef TARGET_PREFERRED_RELOAD_CLASS
633 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
635 #undef TARGET_INVALID_PARAMETER_TYPE
636 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
638 #undef TARGET_INVALID_RETURN_TYPE
639 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
641 #undef TARGET_PROMOTED_TYPE
642 #define TARGET_PROMOTED_TYPE arm_promoted_type
644 #undef TARGET_CONVERT_TO_TYPE
645 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
647 #undef TARGET_SCALAR_MODE_SUPPORTED_P
648 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
650 #undef TARGET_FRAME_POINTER_REQUIRED
651 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
653 #undef TARGET_CAN_ELIMINATE
654 #define TARGET_CAN_ELIMINATE arm_can_eliminate
656 #undef TARGET_CONDITIONAL_REGISTER_USAGE
657 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
659 #undef TARGET_CLASS_LIKELY_SPILLED_P
660 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
662 #undef TARGET_VECTORIZE_BUILTINS
663 #define TARGET_VECTORIZE_BUILTINS
665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
667 arm_builtin_vectorized_function
669 #undef TARGET_VECTOR_ALIGNMENT
670 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
672 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
673 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
674 arm_vector_alignment_reachable
676 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
677 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
678 arm_builtin_support_vector_misalignment
680 #undef TARGET_PREFERRED_RENAME_CLASS
681 #define TARGET_PREFERRED_RENAME_CLASS \
682 arm_preferred_rename_class
684 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
685 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
686 arm_vectorize_vec_perm_const_ok
688 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
689 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
690 arm_builtin_vectorization_cost
691 #undef TARGET_VECTORIZE_ADD_STMT_COST
692 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
694 #undef TARGET_CANONICALIZE_COMPARISON
695 #define TARGET_CANONICALIZE_COMPARISON \
696 arm_canonicalize_comparison
698 #undef TARGET_ASAN_SHADOW_OFFSET
699 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
701 #undef MAX_INSN_PER_IT_BLOCK
702 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
704 #undef TARGET_CAN_USE_DOLOOP_P
705 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
707 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
708 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
710 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
711 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
713 #undef TARGET_SCHED_FUSION_PRIORITY
714 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
716 struct gcc_target targetm = TARGET_INITIALIZER;
718 /* Obstack for minipool constant handling. */
719 static struct obstack minipool_obstack;
720 static char * minipool_startobj;
722 /* The maximum number of insns skipped which
723 will be conditionalised if possible. */
724 static int max_insns_skipped = 5;
726 extern FILE * asm_out_file;
728 /* True if we are currently building a constant table. */
729 int making_const_table;
731 /* The processor for which instructions should be scheduled. */
732 enum processor_type arm_tune = arm_none;
734 /* The current tuning set. */
735 const struct tune_params *current_tune;
737 /* Which floating point hardware to schedule for. */
738 int arm_fpu_attr;
740 /* Which floating popint hardware to use. */
741 const struct arm_fpu_desc *arm_fpu_desc;
743 /* Used for Thumb call_via trampolines. */
744 rtx thumb_call_via_label[14];
745 static int thumb_call_reg_needed;
747 /* Bit values used to identify processor capabilities. */
748 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
749 #define FL_ARCH3M (1 << 1) /* Extended multiply */
750 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
751 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
752 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
753 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
754 #define FL_THUMB (1 << 6) /* Thumb aware */
755 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
756 #define FL_STRONG (1 << 8) /* StrongARM */
757 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
758 #define FL_XSCALE (1 << 10) /* XScale */
759 /* spare (1 << 11) */
760 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
761 media instructions. */
762 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
763 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
764 Note: ARM6 & 7 derivatives only. */
765 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
766 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
767 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
768 profile. */
769 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
770 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
771 #define FL_NEON (1 << 20) /* Neon instructions. */
772 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
773 architecture. */
774 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
775 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
776 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
777 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
779 #define FL_SMALLMUL (1 << 26) /* Small multiply supported. */
781 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
782 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
784 /* Flags that only effect tuning, not available instructions. */
785 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
786 | FL_CO_PROC)
788 #define FL_FOR_ARCH2 FL_NOTM
789 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
790 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
791 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
792 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
793 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
794 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
795 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
796 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
797 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
798 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
799 #define FL_FOR_ARCH6J FL_FOR_ARCH6
800 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
801 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
802 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
803 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
804 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
805 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
806 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
807 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
808 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
809 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
810 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
811 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
813 /* The bits in this mask specify which
814 instructions we are allowed to generate. */
815 static unsigned long insn_flags = 0;
817 /* The bits in this mask specify which instruction scheduling options should
818 be used. */
819 static unsigned long tune_flags = 0;
821 /* The highest ARM architecture version supported by the
822 target. */
823 enum base_architecture arm_base_arch = BASE_ARCH_0;
825 /* The following are used in the arm.md file as equivalents to bits
826 in the above two flag variables. */
828 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
829 int arm_arch3m = 0;
831 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
832 int arm_arch4 = 0;
834 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
835 int arm_arch4t = 0;
837 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
838 int arm_arch5 = 0;
840 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
841 int arm_arch5e = 0;
843 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
844 int arm_arch6 = 0;
846 /* Nonzero if this chip supports the ARM 6K extensions. */
847 int arm_arch6k = 0;
849 /* Nonzero if instructions present in ARMv6-M can be used. */
850 int arm_arch6m = 0;
852 /* Nonzero if this chip supports the ARM 7 extensions. */
853 int arm_arch7 = 0;
855 /* Nonzero if instructions not present in the 'M' profile can be used. */
856 int arm_arch_notm = 0;
858 /* Nonzero if instructions present in ARMv7E-M can be used. */
859 int arm_arch7em = 0;
861 /* Nonzero if instructions present in ARMv8 can be used. */
862 int arm_arch8 = 0;
864 /* Nonzero if this chip can benefit from load scheduling. */
865 int arm_ld_sched = 0;
867 /* Nonzero if this chip is a StrongARM. */
868 int arm_tune_strongarm = 0;
870 /* Nonzero if this chip supports Intel Wireless MMX technology. */
871 int arm_arch_iwmmxt = 0;
873 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
874 int arm_arch_iwmmxt2 = 0;
876 /* Nonzero if this chip is an XScale. */
877 int arm_arch_xscale = 0;
879 /* Nonzero if tuning for XScale */
880 int arm_tune_xscale = 0;
882 /* Nonzero if we want to tune for stores that access the write-buffer.
883 This typically means an ARM6 or ARM7 with MMU or MPU. */
884 int arm_tune_wbuf = 0;
886 /* Nonzero if tuning for Cortex-A9. */
887 int arm_tune_cortex_a9 = 0;
889 /* Nonzero if generating Thumb instructions. */
890 int thumb_code = 0;
892 /* Nonzero if generating Thumb-1 instructions. */
893 int thumb1_code = 0;
895 /* Nonzero if we should define __THUMB_INTERWORK__ in the
896 preprocessor.
897 XXX This is a bit of a hack, it's intended to help work around
898 problems in GLD which doesn't understand that armv5t code is
899 interworking clean. */
900 int arm_cpp_interwork = 0;
902 /* Nonzero if chip supports Thumb 2. */
903 int arm_arch_thumb2;
905 /* Nonzero if chip supports integer division instruction. */
906 int arm_arch_arm_hwdiv;
907 int arm_arch_thumb_hwdiv;
909 /* Nonzero if we should use Neon to handle 64-bits operations rather
910 than core registers. */
911 int prefer_neon_for_64bits = 0;
913 /* Nonzero if we shouldn't use literal pools. */
914 bool arm_disable_literal_pool = false;
916 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
917 we must report the mode of the memory reference from
918 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
919 machine_mode output_memory_reference_mode;
921 /* The register number to be used for the PIC offset register. */
922 unsigned arm_pic_register = INVALID_REGNUM;
924 enum arm_pcs arm_pcs_default;
926 /* For an explanation of these variables, see final_prescan_insn below. */
927 int arm_ccfsm_state;
928 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
929 enum arm_cond_code arm_current_cc;
931 rtx arm_target_insn;
932 int arm_target_label;
933 /* The number of conditionally executed insns, including the current insn. */
934 int arm_condexec_count = 0;
935 /* A bitmask specifying the patterns for the IT block.
936 Zero means do not output an IT block before this insn. */
937 int arm_condexec_mask = 0;
938 /* The number of bits used in arm_condexec_mask. */
939 int arm_condexec_masklen = 0;
941 /* Nonzero if chip supports the ARMv8 CRC instructions. */
942 int arm_arch_crc = 0;
944 /* Nonzero if the core has a very small, high-latency, multiply unit. */
945 int arm_m_profile_small_mul = 0;
947 /* The condition codes of the ARM, and the inverse function. */
948 static const char * const arm_condition_codes[] =
950 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
951 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
954 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
955 int arm_regs_in_sequence[] =
957 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
960 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
961 #define streq(string1, string2) (strcmp (string1, string2) == 0)
963 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
964 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
965 | (1 << PIC_OFFSET_TABLE_REGNUM)))
967 /* Initialization code. */
969 struct processors
971 const char *const name;
972 enum processor_type core;
973 const char *arch;
974 enum base_architecture base_arch;
975 const unsigned long flags;
976 const struct tune_params *const tune;
980 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
981 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
982 prefetch_slots, \
983 l1_size, \
984 l1_line_size
986 /* arm generic vectorizer costs. */
987 static const
988 struct cpu_vec_costs arm_default_vec_cost = {
989 1, /* scalar_stmt_cost. */
990 1, /* scalar load_cost. */
991 1, /* scalar_store_cost. */
992 1, /* vec_stmt_cost. */
993 1, /* vec_to_scalar_cost. */
994 1, /* scalar_to_vec_cost. */
995 1, /* vec_align_load_cost. */
996 1, /* vec_unalign_load_cost. */
997 1, /* vec_unalign_store_cost. */
998 1, /* vec_store_cost. */
999 3, /* cond_taken_branch_cost. */
1000 1, /* cond_not_taken_branch_cost. */
1003 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1004 #include "aarch-cost-tables.h"
1008 const struct cpu_cost_table cortexa9_extra_costs =
1010 /* ALU */
1012 0, /* arith. */
1013 0, /* logical. */
1014 0, /* shift. */
1015 COSTS_N_INSNS (1), /* shift_reg. */
1016 COSTS_N_INSNS (1), /* arith_shift. */
1017 COSTS_N_INSNS (2), /* arith_shift_reg. */
1018 0, /* log_shift. */
1019 COSTS_N_INSNS (1), /* log_shift_reg. */
1020 COSTS_N_INSNS (1), /* extend. */
1021 COSTS_N_INSNS (2), /* extend_arith. */
1022 COSTS_N_INSNS (1), /* bfi. */
1023 COSTS_N_INSNS (1), /* bfx. */
1024 0, /* clz. */
1025 0, /* rev. */
1026 0, /* non_exec. */
1027 true /* non_exec_costs_exec. */
1030 /* MULT SImode */
1032 COSTS_N_INSNS (3), /* simple. */
1033 COSTS_N_INSNS (3), /* flag_setting. */
1034 COSTS_N_INSNS (2), /* extend. */
1035 COSTS_N_INSNS (3), /* add. */
1036 COSTS_N_INSNS (2), /* extend_add. */
1037 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1039 /* MULT DImode */
1041 0, /* simple (N/A). */
1042 0, /* flag_setting (N/A). */
1043 COSTS_N_INSNS (4), /* extend. */
1044 0, /* add (N/A). */
1045 COSTS_N_INSNS (4), /* extend_add. */
1046 0 /* idiv (N/A). */
1049 /* LD/ST */
1051 COSTS_N_INSNS (2), /* load. */
1052 COSTS_N_INSNS (2), /* load_sign_extend. */
1053 COSTS_N_INSNS (2), /* ldrd. */
1054 COSTS_N_INSNS (2), /* ldm_1st. */
1055 1, /* ldm_regs_per_insn_1st. */
1056 2, /* ldm_regs_per_insn_subsequent. */
1057 COSTS_N_INSNS (5), /* loadf. */
1058 COSTS_N_INSNS (5), /* loadd. */
1059 COSTS_N_INSNS (1), /* load_unaligned. */
1060 COSTS_N_INSNS (2), /* store. */
1061 COSTS_N_INSNS (2), /* strd. */
1062 COSTS_N_INSNS (2), /* stm_1st. */
1063 1, /* stm_regs_per_insn_1st. */
1064 2, /* stm_regs_per_insn_subsequent. */
1065 COSTS_N_INSNS (1), /* storef. */
1066 COSTS_N_INSNS (1), /* stored. */
1067 COSTS_N_INSNS (1) /* store_unaligned. */
1070 /* FP SFmode */
1072 COSTS_N_INSNS (14), /* div. */
1073 COSTS_N_INSNS (4), /* mult. */
1074 COSTS_N_INSNS (7), /* mult_addsub. */
1075 COSTS_N_INSNS (30), /* fma. */
1076 COSTS_N_INSNS (3), /* addsub. */
1077 COSTS_N_INSNS (1), /* fpconst. */
1078 COSTS_N_INSNS (1), /* neg. */
1079 COSTS_N_INSNS (3), /* compare. */
1080 COSTS_N_INSNS (3), /* widen. */
1081 COSTS_N_INSNS (3), /* narrow. */
1082 COSTS_N_INSNS (3), /* toint. */
1083 COSTS_N_INSNS (3), /* fromint. */
1084 COSTS_N_INSNS (3) /* roundint. */
1086 /* FP DFmode */
1088 COSTS_N_INSNS (24), /* div. */
1089 COSTS_N_INSNS (5), /* mult. */
1090 COSTS_N_INSNS (8), /* mult_addsub. */
1091 COSTS_N_INSNS (30), /* fma. */
1092 COSTS_N_INSNS (3), /* addsub. */
1093 COSTS_N_INSNS (1), /* fpconst. */
1094 COSTS_N_INSNS (1), /* neg. */
1095 COSTS_N_INSNS (3), /* compare. */
1096 COSTS_N_INSNS (3), /* widen. */
1097 COSTS_N_INSNS (3), /* narrow. */
1098 COSTS_N_INSNS (3), /* toint. */
1099 COSTS_N_INSNS (3), /* fromint. */
1100 COSTS_N_INSNS (3) /* roundint. */
1103 /* Vector */
1105 COSTS_N_INSNS (1) /* alu. */
1109 const struct cpu_cost_table cortexa8_extra_costs =
1111 /* ALU */
1113 0, /* arith. */
1114 0, /* logical. */
1115 COSTS_N_INSNS (1), /* shift. */
1116 0, /* shift_reg. */
1117 COSTS_N_INSNS (1), /* arith_shift. */
1118 0, /* arith_shift_reg. */
1119 COSTS_N_INSNS (1), /* log_shift. */
1120 0, /* log_shift_reg. */
1121 0, /* extend. */
1122 0, /* extend_arith. */
1123 0, /* bfi. */
1124 0, /* bfx. */
1125 0, /* clz. */
1126 0, /* rev. */
1127 0, /* non_exec. */
1128 true /* non_exec_costs_exec. */
1131 /* MULT SImode */
1133 COSTS_N_INSNS (1), /* simple. */
1134 COSTS_N_INSNS (1), /* flag_setting. */
1135 COSTS_N_INSNS (1), /* extend. */
1136 COSTS_N_INSNS (1), /* add. */
1137 COSTS_N_INSNS (1), /* extend_add. */
1138 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1140 /* MULT DImode */
1142 0, /* simple (N/A). */
1143 0, /* flag_setting (N/A). */
1144 COSTS_N_INSNS (2), /* extend. */
1145 0, /* add (N/A). */
1146 COSTS_N_INSNS (2), /* extend_add. */
1147 0 /* idiv (N/A). */
1150 /* LD/ST */
1152 COSTS_N_INSNS (1), /* load. */
1153 COSTS_N_INSNS (1), /* load_sign_extend. */
1154 COSTS_N_INSNS (1), /* ldrd. */
1155 COSTS_N_INSNS (1), /* ldm_1st. */
1156 1, /* ldm_regs_per_insn_1st. */
1157 2, /* ldm_regs_per_insn_subsequent. */
1158 COSTS_N_INSNS (1), /* loadf. */
1159 COSTS_N_INSNS (1), /* loadd. */
1160 COSTS_N_INSNS (1), /* load_unaligned. */
1161 COSTS_N_INSNS (1), /* store. */
1162 COSTS_N_INSNS (1), /* strd. */
1163 COSTS_N_INSNS (1), /* stm_1st. */
1164 1, /* stm_regs_per_insn_1st. */
1165 2, /* stm_regs_per_insn_subsequent. */
1166 COSTS_N_INSNS (1), /* storef. */
1167 COSTS_N_INSNS (1), /* stored. */
1168 COSTS_N_INSNS (1) /* store_unaligned. */
1171 /* FP SFmode */
1173 COSTS_N_INSNS (36), /* div. */
1174 COSTS_N_INSNS (11), /* mult. */
1175 COSTS_N_INSNS (20), /* mult_addsub. */
1176 COSTS_N_INSNS (30), /* fma. */
1177 COSTS_N_INSNS (9), /* addsub. */
1178 COSTS_N_INSNS (3), /* fpconst. */
1179 COSTS_N_INSNS (3), /* neg. */
1180 COSTS_N_INSNS (6), /* compare. */
1181 COSTS_N_INSNS (4), /* widen. */
1182 COSTS_N_INSNS (4), /* narrow. */
1183 COSTS_N_INSNS (8), /* toint. */
1184 COSTS_N_INSNS (8), /* fromint. */
1185 COSTS_N_INSNS (8) /* roundint. */
1187 /* FP DFmode */
1189 COSTS_N_INSNS (64), /* div. */
1190 COSTS_N_INSNS (16), /* mult. */
1191 COSTS_N_INSNS (25), /* mult_addsub. */
1192 COSTS_N_INSNS (30), /* fma. */
1193 COSTS_N_INSNS (9), /* addsub. */
1194 COSTS_N_INSNS (3), /* fpconst. */
1195 COSTS_N_INSNS (3), /* neg. */
1196 COSTS_N_INSNS (6), /* compare. */
1197 COSTS_N_INSNS (6), /* widen. */
1198 COSTS_N_INSNS (6), /* narrow. */
1199 COSTS_N_INSNS (8), /* toint. */
1200 COSTS_N_INSNS (8), /* fromint. */
1201 COSTS_N_INSNS (8) /* roundint. */
1204 /* Vector */
1206 COSTS_N_INSNS (1) /* alu. */
1210 const struct cpu_cost_table cortexa5_extra_costs =
1212 /* ALU */
1214 0, /* arith. */
1215 0, /* logical. */
1216 COSTS_N_INSNS (1), /* shift. */
1217 COSTS_N_INSNS (1), /* shift_reg. */
1218 COSTS_N_INSNS (1), /* arith_shift. */
1219 COSTS_N_INSNS (1), /* arith_shift_reg. */
1220 COSTS_N_INSNS (1), /* log_shift. */
1221 COSTS_N_INSNS (1), /* log_shift_reg. */
1222 COSTS_N_INSNS (1), /* extend. */
1223 COSTS_N_INSNS (1), /* extend_arith. */
1224 COSTS_N_INSNS (1), /* bfi. */
1225 COSTS_N_INSNS (1), /* bfx. */
1226 COSTS_N_INSNS (1), /* clz. */
1227 COSTS_N_INSNS (1), /* rev. */
1228 0, /* non_exec. */
1229 true /* non_exec_costs_exec. */
1233 /* MULT SImode */
1235 0, /* simple. */
1236 COSTS_N_INSNS (1), /* flag_setting. */
1237 COSTS_N_INSNS (1), /* extend. */
1238 COSTS_N_INSNS (1), /* add. */
1239 COSTS_N_INSNS (1), /* extend_add. */
1240 COSTS_N_INSNS (7) /* idiv. */
1242 /* MULT DImode */
1244 0, /* simple (N/A). */
1245 0, /* flag_setting (N/A). */
1246 COSTS_N_INSNS (1), /* extend. */
1247 0, /* add. */
1248 COSTS_N_INSNS (2), /* extend_add. */
1249 0 /* idiv (N/A). */
1252 /* LD/ST */
1254 COSTS_N_INSNS (1), /* load. */
1255 COSTS_N_INSNS (1), /* load_sign_extend. */
1256 COSTS_N_INSNS (6), /* ldrd. */
1257 COSTS_N_INSNS (1), /* ldm_1st. */
1258 1, /* ldm_regs_per_insn_1st. */
1259 2, /* ldm_regs_per_insn_subsequent. */
1260 COSTS_N_INSNS (2), /* loadf. */
1261 COSTS_N_INSNS (4), /* loadd. */
1262 COSTS_N_INSNS (1), /* load_unaligned. */
1263 COSTS_N_INSNS (1), /* store. */
1264 COSTS_N_INSNS (3), /* strd. */
1265 COSTS_N_INSNS (1), /* stm_1st. */
1266 1, /* stm_regs_per_insn_1st. */
1267 2, /* stm_regs_per_insn_subsequent. */
1268 COSTS_N_INSNS (2), /* storef. */
1269 COSTS_N_INSNS (2), /* stored. */
1270 COSTS_N_INSNS (1) /* store_unaligned. */
1273 /* FP SFmode */
1275 COSTS_N_INSNS (15), /* div. */
1276 COSTS_N_INSNS (3), /* mult. */
1277 COSTS_N_INSNS (7), /* mult_addsub. */
1278 COSTS_N_INSNS (7), /* fma. */
1279 COSTS_N_INSNS (3), /* addsub. */
1280 COSTS_N_INSNS (3), /* fpconst. */
1281 COSTS_N_INSNS (3), /* neg. */
1282 COSTS_N_INSNS (3), /* compare. */
1283 COSTS_N_INSNS (3), /* widen. */
1284 COSTS_N_INSNS (3), /* narrow. */
1285 COSTS_N_INSNS (3), /* toint. */
1286 COSTS_N_INSNS (3), /* fromint. */
1287 COSTS_N_INSNS (3) /* roundint. */
1289 /* FP DFmode */
1291 COSTS_N_INSNS (30), /* div. */
1292 COSTS_N_INSNS (6), /* mult. */
1293 COSTS_N_INSNS (10), /* mult_addsub. */
1294 COSTS_N_INSNS (7), /* fma. */
1295 COSTS_N_INSNS (3), /* addsub. */
1296 COSTS_N_INSNS (3), /* fpconst. */
1297 COSTS_N_INSNS (3), /* neg. */
1298 COSTS_N_INSNS (3), /* compare. */
1299 COSTS_N_INSNS (3), /* widen. */
1300 COSTS_N_INSNS (3), /* narrow. */
1301 COSTS_N_INSNS (3), /* toint. */
1302 COSTS_N_INSNS (3), /* fromint. */
1303 COSTS_N_INSNS (3) /* roundint. */
1306 /* Vector */
1308 COSTS_N_INSNS (1) /* alu. */
1313 const struct cpu_cost_table cortexa7_extra_costs =
1315 /* ALU */
1317 0, /* arith. */
1318 0, /* logical. */
1319 COSTS_N_INSNS (1), /* shift. */
1320 COSTS_N_INSNS (1), /* shift_reg. */
1321 COSTS_N_INSNS (1), /* arith_shift. */
1322 COSTS_N_INSNS (1), /* arith_shift_reg. */
1323 COSTS_N_INSNS (1), /* log_shift. */
1324 COSTS_N_INSNS (1), /* log_shift_reg. */
1325 COSTS_N_INSNS (1), /* extend. */
1326 COSTS_N_INSNS (1), /* extend_arith. */
1327 COSTS_N_INSNS (1), /* bfi. */
1328 COSTS_N_INSNS (1), /* bfx. */
1329 COSTS_N_INSNS (1), /* clz. */
1330 COSTS_N_INSNS (1), /* rev. */
1331 0, /* non_exec. */
1332 true /* non_exec_costs_exec. */
1336 /* MULT SImode */
1338 0, /* simple. */
1339 COSTS_N_INSNS (1), /* flag_setting. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* add. */
1342 COSTS_N_INSNS (1), /* extend_add. */
1343 COSTS_N_INSNS (7) /* idiv. */
1345 /* MULT DImode */
1347 0, /* simple (N/A). */
1348 0, /* flag_setting (N/A). */
1349 COSTS_N_INSNS (1), /* extend. */
1350 0, /* add. */
1351 COSTS_N_INSNS (2), /* extend_add. */
1352 0 /* idiv (N/A). */
1355 /* LD/ST */
1357 COSTS_N_INSNS (1), /* load. */
1358 COSTS_N_INSNS (1), /* load_sign_extend. */
1359 COSTS_N_INSNS (3), /* ldrd. */
1360 COSTS_N_INSNS (1), /* ldm_1st. */
1361 1, /* ldm_regs_per_insn_1st. */
1362 2, /* ldm_regs_per_insn_subsequent. */
1363 COSTS_N_INSNS (2), /* loadf. */
1364 COSTS_N_INSNS (2), /* loadd. */
1365 COSTS_N_INSNS (1), /* load_unaligned. */
1366 COSTS_N_INSNS (1), /* store. */
1367 COSTS_N_INSNS (3), /* strd. */
1368 COSTS_N_INSNS (1), /* stm_1st. */
1369 1, /* stm_regs_per_insn_1st. */
1370 2, /* stm_regs_per_insn_subsequent. */
1371 COSTS_N_INSNS (2), /* storef. */
1372 COSTS_N_INSNS (2), /* stored. */
1373 COSTS_N_INSNS (1) /* store_unaligned. */
1376 /* FP SFmode */
1378 COSTS_N_INSNS (15), /* div. */
1379 COSTS_N_INSNS (3), /* mult. */
1380 COSTS_N_INSNS (7), /* mult_addsub. */
1381 COSTS_N_INSNS (7), /* fma. */
1382 COSTS_N_INSNS (3), /* addsub. */
1383 COSTS_N_INSNS (3), /* fpconst. */
1384 COSTS_N_INSNS (3), /* neg. */
1385 COSTS_N_INSNS (3), /* compare. */
1386 COSTS_N_INSNS (3), /* widen. */
1387 COSTS_N_INSNS (3), /* narrow. */
1388 COSTS_N_INSNS (3), /* toint. */
1389 COSTS_N_INSNS (3), /* fromint. */
1390 COSTS_N_INSNS (3) /* roundint. */
1392 /* FP DFmode */
1394 COSTS_N_INSNS (30), /* div. */
1395 COSTS_N_INSNS (6), /* mult. */
1396 COSTS_N_INSNS (10), /* mult_addsub. */
1397 COSTS_N_INSNS (7), /* fma. */
1398 COSTS_N_INSNS (3), /* addsub. */
1399 COSTS_N_INSNS (3), /* fpconst. */
1400 COSTS_N_INSNS (3), /* neg. */
1401 COSTS_N_INSNS (3), /* compare. */
1402 COSTS_N_INSNS (3), /* widen. */
1403 COSTS_N_INSNS (3), /* narrow. */
1404 COSTS_N_INSNS (3), /* toint. */
1405 COSTS_N_INSNS (3), /* fromint. */
1406 COSTS_N_INSNS (3) /* roundint. */
1409 /* Vector */
1411 COSTS_N_INSNS (1) /* alu. */
1415 const struct cpu_cost_table cortexa12_extra_costs =
1417 /* ALU */
1419 0, /* arith. */
1420 0, /* logical. */
1421 0, /* shift. */
1422 COSTS_N_INSNS (1), /* shift_reg. */
1423 COSTS_N_INSNS (1), /* arith_shift. */
1424 COSTS_N_INSNS (1), /* arith_shift_reg. */
1425 COSTS_N_INSNS (1), /* log_shift. */
1426 COSTS_N_INSNS (1), /* log_shift_reg. */
1427 0, /* extend. */
1428 COSTS_N_INSNS (1), /* extend_arith. */
1429 0, /* bfi. */
1430 COSTS_N_INSNS (1), /* bfx. */
1431 COSTS_N_INSNS (1), /* clz. */
1432 COSTS_N_INSNS (1), /* rev. */
1433 0, /* non_exec. */
1434 true /* non_exec_costs_exec. */
1436 /* MULT SImode */
1439 COSTS_N_INSNS (2), /* simple. */
1440 COSTS_N_INSNS (3), /* flag_setting. */
1441 COSTS_N_INSNS (2), /* extend. */
1442 COSTS_N_INSNS (3), /* add. */
1443 COSTS_N_INSNS (2), /* extend_add. */
1444 COSTS_N_INSNS (18) /* idiv. */
1446 /* MULT DImode */
1448 0, /* simple (N/A). */
1449 0, /* flag_setting (N/A). */
1450 COSTS_N_INSNS (3), /* extend. */
1451 0, /* add (N/A). */
1452 COSTS_N_INSNS (3), /* extend_add. */
1453 0 /* idiv (N/A). */
1456 /* LD/ST */
1458 COSTS_N_INSNS (3), /* load. */
1459 COSTS_N_INSNS (3), /* load_sign_extend. */
1460 COSTS_N_INSNS (3), /* ldrd. */
1461 COSTS_N_INSNS (3), /* ldm_1st. */
1462 1, /* ldm_regs_per_insn_1st. */
1463 2, /* ldm_regs_per_insn_subsequent. */
1464 COSTS_N_INSNS (3), /* loadf. */
1465 COSTS_N_INSNS (3), /* loadd. */
1466 0, /* load_unaligned. */
1467 0, /* store. */
1468 0, /* strd. */
1469 0, /* stm_1st. */
1470 1, /* stm_regs_per_insn_1st. */
1471 2, /* stm_regs_per_insn_subsequent. */
1472 COSTS_N_INSNS (2), /* storef. */
1473 COSTS_N_INSNS (2), /* stored. */
1474 0 /* store_unaligned. */
1477 /* FP SFmode */
1479 COSTS_N_INSNS (17), /* div. */
1480 COSTS_N_INSNS (4), /* mult. */
1481 COSTS_N_INSNS (8), /* mult_addsub. */
1482 COSTS_N_INSNS (8), /* fma. */
1483 COSTS_N_INSNS (4), /* addsub. */
1484 COSTS_N_INSNS (2), /* fpconst. */
1485 COSTS_N_INSNS (2), /* neg. */
1486 COSTS_N_INSNS (2), /* compare. */
1487 COSTS_N_INSNS (4), /* widen. */
1488 COSTS_N_INSNS (4), /* narrow. */
1489 COSTS_N_INSNS (4), /* toint. */
1490 COSTS_N_INSNS (4), /* fromint. */
1491 COSTS_N_INSNS (4) /* roundint. */
1493 /* FP DFmode */
1495 COSTS_N_INSNS (31), /* div. */
1496 COSTS_N_INSNS (4), /* mult. */
1497 COSTS_N_INSNS (8), /* mult_addsub. */
1498 COSTS_N_INSNS (8), /* fma. */
1499 COSTS_N_INSNS (4), /* addsub. */
1500 COSTS_N_INSNS (2), /* fpconst. */
1501 COSTS_N_INSNS (2), /* neg. */
1502 COSTS_N_INSNS (2), /* compare. */
1503 COSTS_N_INSNS (4), /* widen. */
1504 COSTS_N_INSNS (4), /* narrow. */
1505 COSTS_N_INSNS (4), /* toint. */
1506 COSTS_N_INSNS (4), /* fromint. */
1507 COSTS_N_INSNS (4) /* roundint. */
1510 /* Vector */
1512 COSTS_N_INSNS (1) /* alu. */
1516 const struct cpu_cost_table cortexa15_extra_costs =
1518 /* ALU */
1520 0, /* arith. */
1521 0, /* logical. */
1522 0, /* shift. */
1523 0, /* shift_reg. */
1524 COSTS_N_INSNS (1), /* arith_shift. */
1525 COSTS_N_INSNS (1), /* arith_shift_reg. */
1526 COSTS_N_INSNS (1), /* log_shift. */
1527 COSTS_N_INSNS (1), /* log_shift_reg. */
1528 0, /* extend. */
1529 COSTS_N_INSNS (1), /* extend_arith. */
1530 COSTS_N_INSNS (1), /* bfi. */
1531 0, /* bfx. */
1532 0, /* clz. */
1533 0, /* rev. */
1534 0, /* non_exec. */
1535 true /* non_exec_costs_exec. */
1537 /* MULT SImode */
1540 COSTS_N_INSNS (2), /* simple. */
1541 COSTS_N_INSNS (3), /* flag_setting. */
1542 COSTS_N_INSNS (2), /* extend. */
1543 COSTS_N_INSNS (2), /* add. */
1544 COSTS_N_INSNS (2), /* extend_add. */
1545 COSTS_N_INSNS (18) /* idiv. */
1547 /* MULT DImode */
1549 0, /* simple (N/A). */
1550 0, /* flag_setting (N/A). */
1551 COSTS_N_INSNS (3), /* extend. */
1552 0, /* add (N/A). */
1553 COSTS_N_INSNS (3), /* extend_add. */
1554 0 /* idiv (N/A). */
1557 /* LD/ST */
1559 COSTS_N_INSNS (3), /* load. */
1560 COSTS_N_INSNS (3), /* load_sign_extend. */
1561 COSTS_N_INSNS (3), /* ldrd. */
1562 COSTS_N_INSNS (4), /* ldm_1st. */
1563 1, /* ldm_regs_per_insn_1st. */
1564 2, /* ldm_regs_per_insn_subsequent. */
1565 COSTS_N_INSNS (4), /* loadf. */
1566 COSTS_N_INSNS (4), /* loadd. */
1567 0, /* load_unaligned. */
1568 0, /* store. */
1569 0, /* strd. */
1570 COSTS_N_INSNS (1), /* stm_1st. */
1571 1, /* stm_regs_per_insn_1st. */
1572 2, /* stm_regs_per_insn_subsequent. */
1573 0, /* storef. */
1574 0, /* stored. */
1575 0 /* store_unaligned. */
1578 /* FP SFmode */
1580 COSTS_N_INSNS (17), /* div. */
1581 COSTS_N_INSNS (4), /* mult. */
1582 COSTS_N_INSNS (8), /* mult_addsub. */
1583 COSTS_N_INSNS (8), /* fma. */
1584 COSTS_N_INSNS (4), /* addsub. */
1585 COSTS_N_INSNS (2), /* fpconst. */
1586 COSTS_N_INSNS (2), /* neg. */
1587 COSTS_N_INSNS (5), /* compare. */
1588 COSTS_N_INSNS (4), /* widen. */
1589 COSTS_N_INSNS (4), /* narrow. */
1590 COSTS_N_INSNS (4), /* toint. */
1591 COSTS_N_INSNS (4), /* fromint. */
1592 COSTS_N_INSNS (4) /* roundint. */
1594 /* FP DFmode */
1596 COSTS_N_INSNS (31), /* div. */
1597 COSTS_N_INSNS (4), /* mult. */
1598 COSTS_N_INSNS (8), /* mult_addsub. */
1599 COSTS_N_INSNS (8), /* fma. */
1600 COSTS_N_INSNS (4), /* addsub. */
1601 COSTS_N_INSNS (2), /* fpconst. */
1602 COSTS_N_INSNS (2), /* neg. */
1603 COSTS_N_INSNS (2), /* compare. */
1604 COSTS_N_INSNS (4), /* widen. */
1605 COSTS_N_INSNS (4), /* narrow. */
1606 COSTS_N_INSNS (4), /* toint. */
1607 COSTS_N_INSNS (4), /* fromint. */
1608 COSTS_N_INSNS (4) /* roundint. */
1611 /* Vector */
1613 COSTS_N_INSNS (1) /* alu. */
1617 const struct cpu_cost_table v7m_extra_costs =
1619 /* ALU */
1621 0, /* arith. */
1622 0, /* logical. */
1623 0, /* shift. */
1624 0, /* shift_reg. */
1625 0, /* arith_shift. */
1626 COSTS_N_INSNS (1), /* arith_shift_reg. */
1627 0, /* log_shift. */
1628 COSTS_N_INSNS (1), /* log_shift_reg. */
1629 0, /* extend. */
1630 COSTS_N_INSNS (1), /* extend_arith. */
1631 0, /* bfi. */
1632 0, /* bfx. */
1633 0, /* clz. */
1634 0, /* rev. */
1635 COSTS_N_INSNS (1), /* non_exec. */
1636 false /* non_exec_costs_exec. */
1639 /* MULT SImode */
1641 COSTS_N_INSNS (1), /* simple. */
1642 COSTS_N_INSNS (1), /* flag_setting. */
1643 COSTS_N_INSNS (2), /* extend. */
1644 COSTS_N_INSNS (1), /* add. */
1645 COSTS_N_INSNS (3), /* extend_add. */
1646 COSTS_N_INSNS (8) /* idiv. */
1648 /* MULT DImode */
1650 0, /* simple (N/A). */
1651 0, /* flag_setting (N/A). */
1652 COSTS_N_INSNS (2), /* extend. */
1653 0, /* add (N/A). */
1654 COSTS_N_INSNS (3), /* extend_add. */
1655 0 /* idiv (N/A). */
1658 /* LD/ST */
1660 COSTS_N_INSNS (2), /* load. */
1661 0, /* load_sign_extend. */
1662 COSTS_N_INSNS (3), /* ldrd. */
1663 COSTS_N_INSNS (2), /* ldm_1st. */
1664 1, /* ldm_regs_per_insn_1st. */
1665 1, /* ldm_regs_per_insn_subsequent. */
1666 COSTS_N_INSNS (2), /* loadf. */
1667 COSTS_N_INSNS (3), /* loadd. */
1668 COSTS_N_INSNS (1), /* load_unaligned. */
1669 COSTS_N_INSNS (2), /* store. */
1670 COSTS_N_INSNS (3), /* strd. */
1671 COSTS_N_INSNS (2), /* stm_1st. */
1672 1, /* stm_regs_per_insn_1st. */
1673 1, /* stm_regs_per_insn_subsequent. */
1674 COSTS_N_INSNS (2), /* storef. */
1675 COSTS_N_INSNS (3), /* stored. */
1676 COSTS_N_INSNS (1) /* store_unaligned. */
1679 /* FP SFmode */
1681 COSTS_N_INSNS (7), /* div. */
1682 COSTS_N_INSNS (2), /* mult. */
1683 COSTS_N_INSNS (5), /* mult_addsub. */
1684 COSTS_N_INSNS (3), /* fma. */
1685 COSTS_N_INSNS (1), /* addsub. */
1686 0, /* fpconst. */
1687 0, /* neg. */
1688 0, /* compare. */
1689 0, /* widen. */
1690 0, /* narrow. */
1691 0, /* toint. */
1692 0, /* fromint. */
1693 0 /* roundint. */
1695 /* FP DFmode */
1697 COSTS_N_INSNS (15), /* div. */
1698 COSTS_N_INSNS (5), /* mult. */
1699 COSTS_N_INSNS (7), /* mult_addsub. */
1700 COSTS_N_INSNS (7), /* fma. */
1701 COSTS_N_INSNS (3), /* addsub. */
1702 0, /* fpconst. */
1703 0, /* neg. */
1704 0, /* compare. */
1705 0, /* widen. */
1706 0, /* narrow. */
1707 0, /* toint. */
1708 0, /* fromint. */
1709 0 /* roundint. */
1712 /* Vector */
1714 COSTS_N_INSNS (1) /* alu. */
1718 const struct tune_params arm_slowmul_tune =
1720 arm_slowmul_rtx_costs,
1721 NULL,
1722 NULL, /* Sched adj cost. */
1723 3, /* Constant limit. */
1724 5, /* Max cond insns. */
1725 ARM_PREFETCH_NOT_BENEFICIAL,
1726 true, /* Prefer constant pool. */
1727 arm_default_branch_cost,
1728 false, /* Prefer LDRD/STRD. */
1729 {true, true}, /* Prefer non short circuit. */
1730 &arm_default_vec_cost, /* Vectorizer costs. */
1731 false, /* Prefer Neon for 64-bits bitops. */
1732 false, false, /* Prefer 32-bit encodings. */
1733 false, /* Prefer Neon for stringops. */
1734 8 /* Maximum insns to inline memset. */
1737 const struct tune_params arm_fastmul_tune =
1739 arm_fastmul_rtx_costs,
1740 NULL,
1741 NULL, /* Sched adj cost. */
1742 1, /* Constant limit. */
1743 5, /* Max cond insns. */
1744 ARM_PREFETCH_NOT_BENEFICIAL,
1745 true, /* Prefer constant pool. */
1746 arm_default_branch_cost,
1747 false, /* Prefer LDRD/STRD. */
1748 {true, true}, /* Prefer non short circuit. */
1749 &arm_default_vec_cost, /* Vectorizer costs. */
1750 false, /* Prefer Neon for 64-bits bitops. */
1751 false, false, /* Prefer 32-bit encodings. */
1752 false, /* Prefer Neon for stringops. */
1753 8 /* Maximum insns to inline memset. */
1756 /* StrongARM has early execution of branches, so a sequence that is worth
1757 skipping is shorter. Set max_insns_skipped to a lower value. */
1759 const struct tune_params arm_strongarm_tune =
1761 arm_fastmul_rtx_costs,
1762 NULL,
1763 NULL, /* Sched adj cost. */
1764 1, /* Constant limit. */
1765 3, /* Max cond insns. */
1766 ARM_PREFETCH_NOT_BENEFICIAL,
1767 true, /* Prefer constant pool. */
1768 arm_default_branch_cost,
1769 false, /* Prefer LDRD/STRD. */
1770 {true, true}, /* Prefer non short circuit. */
1771 &arm_default_vec_cost, /* Vectorizer costs. */
1772 false, /* Prefer Neon for 64-bits bitops. */
1773 false, false, /* Prefer 32-bit encodings. */
1774 false, /* Prefer Neon for stringops. */
1775 8 /* Maximum insns to inline memset. */
1778 const struct tune_params arm_xscale_tune =
1780 arm_xscale_rtx_costs,
1781 NULL,
1782 xscale_sched_adjust_cost,
1783 2, /* Constant limit. */
1784 3, /* Max cond insns. */
1785 ARM_PREFETCH_NOT_BENEFICIAL,
1786 true, /* Prefer constant pool. */
1787 arm_default_branch_cost,
1788 false, /* Prefer LDRD/STRD. */
1789 {true, true}, /* Prefer non short circuit. */
1790 &arm_default_vec_cost, /* Vectorizer costs. */
1791 false, /* Prefer Neon for 64-bits bitops. */
1792 false, false, /* Prefer 32-bit encodings. */
1793 false, /* Prefer Neon for stringops. */
1794 8 /* Maximum insns to inline memset. */
1797 const struct tune_params arm_9e_tune =
1799 arm_9e_rtx_costs,
1800 NULL,
1801 NULL, /* Sched adj cost. */
1802 1, /* Constant limit. */
1803 5, /* Max cond insns. */
1804 ARM_PREFETCH_NOT_BENEFICIAL,
1805 true, /* Prefer constant pool. */
1806 arm_default_branch_cost,
1807 false, /* Prefer LDRD/STRD. */
1808 {true, true}, /* Prefer non short circuit. */
1809 &arm_default_vec_cost, /* Vectorizer costs. */
1810 false, /* Prefer Neon for 64-bits bitops. */
1811 false, false, /* Prefer 32-bit encodings. */
1812 false, /* Prefer Neon for stringops. */
1813 8 /* Maximum insns to inline memset. */
1816 const struct tune_params arm_v6t2_tune =
1818 arm_9e_rtx_costs,
1819 NULL,
1820 NULL, /* Sched adj cost. */
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 ARM_PREFETCH_NOT_BENEFICIAL,
1824 false, /* Prefer constant pool. */
1825 arm_default_branch_cost,
1826 false, /* Prefer LDRD/STRD. */
1827 {true, true}, /* Prefer non short circuit. */
1828 &arm_default_vec_cost, /* Vectorizer costs. */
1829 false, /* Prefer Neon for 64-bits bitops. */
1830 false, false, /* Prefer 32-bit encodings. */
1831 false, /* Prefer Neon for stringops. */
1832 8 /* Maximum insns to inline memset. */
1835 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1836 const struct tune_params arm_cortex_tune =
1838 arm_9e_rtx_costs,
1839 &generic_extra_costs,
1840 NULL, /* Sched adj cost. */
1841 1, /* Constant limit. */
1842 5, /* Max cond insns. */
1843 ARM_PREFETCH_NOT_BENEFICIAL,
1844 false, /* Prefer constant pool. */
1845 arm_default_branch_cost,
1846 false, /* Prefer LDRD/STRD. */
1847 {true, true}, /* Prefer non short circuit. */
1848 &arm_default_vec_cost, /* Vectorizer costs. */
1849 false, /* Prefer Neon for 64-bits bitops. */
1850 false, false, /* Prefer 32-bit encodings. */
1851 false, /* Prefer Neon for stringops. */
1852 8 /* Maximum insns to inline memset. */
1855 const struct tune_params arm_cortex_a8_tune =
1857 arm_9e_rtx_costs,
1858 &cortexa8_extra_costs,
1859 NULL, /* Sched adj cost. */
1860 1, /* Constant limit. */
1861 5, /* Max cond insns. */
1862 ARM_PREFETCH_NOT_BENEFICIAL,
1863 false, /* Prefer constant pool. */
1864 arm_default_branch_cost,
1865 false, /* Prefer LDRD/STRD. */
1866 {true, true}, /* Prefer non short circuit. */
1867 &arm_default_vec_cost, /* Vectorizer costs. */
1868 false, /* Prefer Neon for 64-bits bitops. */
1869 false, false, /* Prefer 32-bit encodings. */
1870 true, /* Prefer Neon for stringops. */
1871 8 /* Maximum insns to inline memset. */
1874 const struct tune_params arm_cortex_a7_tune =
1876 arm_9e_rtx_costs,
1877 &cortexa7_extra_costs,
1878 NULL,
1879 1, /* Constant limit. */
1880 5, /* Max cond insns. */
1881 ARM_PREFETCH_NOT_BENEFICIAL,
1882 false, /* Prefer constant pool. */
1883 arm_default_branch_cost,
1884 false, /* Prefer LDRD/STRD. */
1885 {true, true}, /* Prefer non short circuit. */
1886 &arm_default_vec_cost, /* Vectorizer costs. */
1887 false, /* Prefer Neon for 64-bits bitops. */
1888 false, false, /* Prefer 32-bit encodings. */
1889 true, /* Prefer Neon for stringops. */
1890 8 /* Maximum insns to inline memset. */
1893 const struct tune_params arm_cortex_a15_tune =
1895 arm_9e_rtx_costs,
1896 &cortexa15_extra_costs,
1897 NULL, /* Sched adj cost. */
1898 1, /* Constant limit. */
1899 2, /* Max cond insns. */
1900 ARM_PREFETCH_NOT_BENEFICIAL,
1901 false, /* Prefer constant pool. */
1902 arm_default_branch_cost,
1903 true, /* Prefer LDRD/STRD. */
1904 {true, true}, /* Prefer non short circuit. */
1905 &arm_default_vec_cost, /* Vectorizer costs. */
1906 false, /* Prefer Neon for 64-bits bitops. */
1907 true, true, /* Prefer 32-bit encodings. */
1908 true, /* Prefer Neon for stringops. */
1909 8 /* Maximum insns to inline memset. */
1912 const struct tune_params arm_cortex_a53_tune =
1914 arm_9e_rtx_costs,
1915 &cortexa53_extra_costs,
1916 NULL, /* Scheduler cost adjustment. */
1917 1, /* Constant limit. */
1918 5, /* Max cond insns. */
1919 ARM_PREFETCH_NOT_BENEFICIAL,
1920 false, /* Prefer constant pool. */
1921 arm_default_branch_cost,
1922 false, /* Prefer LDRD/STRD. */
1923 {true, true}, /* Prefer non short circuit. */
1924 &arm_default_vec_cost, /* Vectorizer costs. */
1925 false, /* Prefer Neon for 64-bits bitops. */
1926 false, false, /* Prefer 32-bit encodings. */
1927 false, /* Prefer Neon for stringops. */
1928 8 /* Maximum insns to inline memset. */
1931 const struct tune_params arm_cortex_a57_tune =
1933 arm_9e_rtx_costs,
1934 &cortexa57_extra_costs,
1935 NULL, /* Scheduler cost adjustment. */
1936 1, /* Constant limit. */
1937 2, /* Max cond insns. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 false, /* Prefer constant pool. */
1940 arm_default_branch_cost,
1941 true, /* Prefer LDRD/STRD. */
1942 {true, true}, /* Prefer non short circuit. */
1943 &arm_default_vec_cost, /* Vectorizer costs. */
1944 false, /* Prefer Neon for 64-bits bitops. */
1945 true, true, /* Prefer 32-bit encodings. */
1946 false, /* Prefer Neon for stringops. */
1947 8 /* Maximum insns to inline memset. */
1950 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1951 less appealing. Set max_insns_skipped to a low value. */
1953 const struct tune_params arm_cortex_a5_tune =
1955 arm_9e_rtx_costs,
1956 &cortexa5_extra_costs,
1957 NULL, /* Sched adj cost. */
1958 1, /* Constant limit. */
1959 1, /* Max cond insns. */
1960 ARM_PREFETCH_NOT_BENEFICIAL,
1961 false, /* Prefer constant pool. */
1962 arm_cortex_a5_branch_cost,
1963 false, /* Prefer LDRD/STRD. */
1964 {false, false}, /* Prefer non short circuit. */
1965 &arm_default_vec_cost, /* Vectorizer costs. */
1966 false, /* Prefer Neon for 64-bits bitops. */
1967 false, false, /* Prefer 32-bit encodings. */
1968 true, /* Prefer Neon for stringops. */
1969 8 /* Maximum insns to inline memset. */
1972 const struct tune_params arm_cortex_a9_tune =
1974 arm_9e_rtx_costs,
1975 &cortexa9_extra_costs,
1976 cortex_a9_sched_adjust_cost,
1977 1, /* Constant limit. */
1978 5, /* Max cond insns. */
1979 ARM_PREFETCH_BENEFICIAL(4,32,32),
1980 false, /* Prefer constant pool. */
1981 arm_default_branch_cost,
1982 false, /* Prefer LDRD/STRD. */
1983 {true, true}, /* Prefer non short circuit. */
1984 &arm_default_vec_cost, /* Vectorizer costs. */
1985 false, /* Prefer Neon for 64-bits bitops. */
1986 false, false, /* Prefer 32-bit encodings. */
1987 false, /* Prefer Neon for stringops. */
1988 8 /* Maximum insns to inline memset. */
1991 const struct tune_params arm_cortex_a12_tune =
1993 arm_9e_rtx_costs,
1994 &cortexa12_extra_costs,
1995 NULL,
1996 1, /* Constant limit. */
1997 5, /* Max cond insns. */
1998 ARM_PREFETCH_BENEFICIAL(4,32,32),
1999 false, /* Prefer constant pool. */
2000 arm_default_branch_cost,
2001 true, /* Prefer LDRD/STRD. */
2002 {true, true}, /* Prefer non short circuit. */
2003 &arm_default_vec_cost, /* Vectorizer costs. */
2004 false, /* Prefer Neon for 64-bits bitops. */
2005 false, false, /* Prefer 32-bit encodings. */
2006 true, /* Prefer Neon for stringops. */
2007 8 /* Maximum insns to inline memset. */
2010 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2011 cycle to execute each. An LDR from the constant pool also takes two cycles
2012 to execute, but mildly increases pipelining opportunity (consecutive
2013 loads/stores can be pipelined together, saving one cycle), and may also
2014 improve icache utilisation. Hence we prefer the constant pool for such
2015 processors. */
2017 const struct tune_params arm_v7m_tune =
2019 arm_9e_rtx_costs,
2020 &v7m_extra_costs,
2021 NULL, /* Sched adj cost. */
2022 1, /* Constant limit. */
2023 2, /* Max cond insns. */
2024 ARM_PREFETCH_NOT_BENEFICIAL,
2025 true, /* Prefer constant pool. */
2026 arm_cortex_m_branch_cost,
2027 false, /* Prefer LDRD/STRD. */
2028 {false, false}, /* Prefer non short circuit. */
2029 &arm_default_vec_cost, /* Vectorizer costs. */
2030 false, /* Prefer Neon for 64-bits bitops. */
2031 false, false, /* Prefer 32-bit encodings. */
2032 false, /* Prefer Neon for stringops. */
2033 8 /* Maximum insns to inline memset. */
2036 /* Cortex-M7 tuning. */
2038 const struct tune_params arm_cortex_m7_tune =
2040 arm_9e_rtx_costs,
2041 &v7m_extra_costs,
2042 NULL, /* Sched adj cost. */
2043 0, /* Constant limit. */
2044 0, /* Max cond insns. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 true, /* Prefer constant pool. */
2047 arm_cortex_m_branch_cost,
2048 false, /* Prefer LDRD/STRD. */
2049 {true, true}, /* Prefer non short circuit. */
2050 &arm_default_vec_cost, /* Vectorizer costs. */
2051 false, /* Prefer Neon for 64-bits bitops. */
2052 false, false, /* Prefer 32-bit encodings. */
2053 false, /* Prefer Neon for stringops. */
2054 8 /* Maximum insns to inline memset. */
2057 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2058 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2059 const struct tune_params arm_v6m_tune =
2061 arm_9e_rtx_costs,
2062 NULL,
2063 NULL, /* Sched adj cost. */
2064 1, /* Constant limit. */
2065 5, /* Max cond insns. */
2066 ARM_PREFETCH_NOT_BENEFICIAL,
2067 false, /* Prefer constant pool. */
2068 arm_default_branch_cost,
2069 false, /* Prefer LDRD/STRD. */
2070 {false, false}, /* Prefer non short circuit. */
2071 &arm_default_vec_cost, /* Vectorizer costs. */
2072 false, /* Prefer Neon for 64-bits bitops. */
2073 false, false, /* Prefer 32-bit encodings. */
2074 false, /* Prefer Neon for stringops. */
2075 8 /* Maximum insns to inline memset. */
2078 const struct tune_params arm_fa726te_tune =
2080 arm_9e_rtx_costs,
2081 NULL,
2082 fa726te_sched_adjust_cost,
2083 1, /* Constant limit. */
2084 5, /* Max cond insns. */
2085 ARM_PREFETCH_NOT_BENEFICIAL,
2086 true, /* Prefer constant pool. */
2087 arm_default_branch_cost,
2088 false, /* Prefer LDRD/STRD. */
2089 {true, true}, /* Prefer non short circuit. */
2090 &arm_default_vec_cost, /* Vectorizer costs. */
2091 false, /* Prefer Neon for 64-bits bitops. */
2092 false, false, /* Prefer 32-bit encodings. */
2093 false, /* Prefer Neon for stringops. */
2094 8 /* Maximum insns to inline memset. */
2098 /* Not all of these give usefully different compilation alternatives,
2099 but there is no simple way of generalizing them. */
2100 static const struct processors all_cores[] =
2102 /* ARM Cores */
2103 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2104 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2105 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2106 #include "arm-cores.def"
2107 #undef ARM_CORE
2108 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2111 static const struct processors all_architectures[] =
2113 /* ARM Architectures */
2114 /* We don't specify tuning costs here as it will be figured out
2115 from the core. */
2117 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2118 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2119 #include "arm-arches.def"
2120 #undef ARM_ARCH
2121 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2125 /* These are populated as commandline arguments are processed, or NULL
2126 if not specified. */
2127 static const struct processors *arm_selected_arch;
2128 static const struct processors *arm_selected_cpu;
2129 static const struct processors *arm_selected_tune;
2131 /* The name of the preprocessor macro to define for this architecture. */
2133 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2135 /* Available values for -mfpu=. */
2137 static const struct arm_fpu_desc all_fpus[] =
2139 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2140 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2141 #include "arm-fpus.def"
2142 #undef ARM_FPU
2146 /* Supported TLS relocations. */
2148 enum tls_reloc {
2149 TLS_GD32,
2150 TLS_LDM32,
2151 TLS_LDO32,
2152 TLS_IE32,
2153 TLS_LE32,
2154 TLS_DESCSEQ /* GNU scheme */
2157 /* The maximum number of insns to be used when loading a constant. */
2158 inline static int
2159 arm_constant_limit (bool size_p)
2161 return size_p ? 1 : current_tune->constant_limit;
2164 /* Emit an insn that's a simple single-set. Both the operands must be known
2165 to be valid. */
2166 inline static rtx_insn *
2167 emit_set_insn (rtx x, rtx y)
2169 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2172 /* Return the number of bits set in VALUE. */
2173 static unsigned
2174 bit_count (unsigned long value)
2176 unsigned long count = 0;
2178 while (value)
2180 count++;
2181 value &= value - 1; /* Clear the least-significant set bit. */
2184 return count;
2187 typedef struct
2189 machine_mode mode;
2190 const char *name;
2191 } arm_fixed_mode_set;
2193 /* A small helper for setting fixed-point library libfuncs. */
2195 static void
2196 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2197 const char *funcname, const char *modename,
2198 int num_suffix)
2200 char buffer[50];
2202 if (num_suffix == 0)
2203 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2204 else
2205 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2207 set_optab_libfunc (optable, mode, buffer);
2210 static void
2211 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2212 machine_mode from, const char *funcname,
2213 const char *toname, const char *fromname)
2215 char buffer[50];
2216 const char *maybe_suffix_2 = "";
2218 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2219 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2220 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2221 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2222 maybe_suffix_2 = "2";
2224 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2225 maybe_suffix_2);
2227 set_conv_libfunc (optable, to, from, buffer);
2230 /* Set up library functions unique to ARM. */
2232 static void
2233 arm_init_libfuncs (void)
2235 /* For Linux, we have access to kernel support for atomic operations. */
2236 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2237 init_sync_libfuncs (2 * UNITS_PER_WORD);
2239 /* There are no special library functions unless we are using the
2240 ARM BPABI. */
2241 if (!TARGET_BPABI)
2242 return;
2244 /* The functions below are described in Section 4 of the "Run-Time
2245 ABI for the ARM architecture", Version 1.0. */
2247 /* Double-precision floating-point arithmetic. Table 2. */
2248 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2249 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2250 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2251 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2252 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2254 /* Double-precision comparisons. Table 3. */
2255 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2256 set_optab_libfunc (ne_optab, DFmode, NULL);
2257 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2258 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2259 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2260 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2261 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2263 /* Single-precision floating-point arithmetic. Table 4. */
2264 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2265 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2266 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2267 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2268 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2270 /* Single-precision comparisons. Table 5. */
2271 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2272 set_optab_libfunc (ne_optab, SFmode, NULL);
2273 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2274 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2275 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2276 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2277 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2279 /* Floating-point to integer conversions. Table 6. */
2280 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2281 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2282 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2283 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2284 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2285 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2286 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2287 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2289 /* Conversions between floating types. Table 7. */
2290 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2291 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2293 /* Integer to floating-point conversions. Table 8. */
2294 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2295 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2296 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2297 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2298 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2299 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2300 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2301 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2303 /* Long long. Table 9. */
2304 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2305 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2306 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2307 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2308 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2309 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2310 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2311 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2313 /* Integer (32/32->32) division. \S 4.3.1. */
2314 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2315 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2317 /* The divmod functions are designed so that they can be used for
2318 plain division, even though they return both the quotient and the
2319 remainder. The quotient is returned in the usual location (i.e.,
2320 r0 for SImode, {r0, r1} for DImode), just as would be expected
2321 for an ordinary division routine. Because the AAPCS calling
2322 conventions specify that all of { r0, r1, r2, r3 } are
2323 callee-saved registers, there is no need to tell the compiler
2324 explicitly that those registers are clobbered by these
2325 routines. */
2326 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2327 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2329 /* For SImode division the ABI provides div-without-mod routines,
2330 which are faster. */
2331 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2332 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2334 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2335 divmod libcalls instead. */
2336 set_optab_libfunc (smod_optab, DImode, NULL);
2337 set_optab_libfunc (umod_optab, DImode, NULL);
2338 set_optab_libfunc (smod_optab, SImode, NULL);
2339 set_optab_libfunc (umod_optab, SImode, NULL);
2341 /* Half-precision float operations. The compiler handles all operations
2342 with NULL libfuncs by converting the SFmode. */
2343 switch (arm_fp16_format)
2345 case ARM_FP16_FORMAT_IEEE:
2346 case ARM_FP16_FORMAT_ALTERNATIVE:
2348 /* Conversions. */
2349 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2350 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2351 ? "__gnu_f2h_ieee"
2352 : "__gnu_f2h_alternative"));
2353 set_conv_libfunc (sext_optab, SFmode, HFmode,
2354 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2355 ? "__gnu_h2f_ieee"
2356 : "__gnu_h2f_alternative"));
2358 /* Arithmetic. */
2359 set_optab_libfunc (add_optab, HFmode, NULL);
2360 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2361 set_optab_libfunc (smul_optab, HFmode, NULL);
2362 set_optab_libfunc (neg_optab, HFmode, NULL);
2363 set_optab_libfunc (sub_optab, HFmode, NULL);
2365 /* Comparisons. */
2366 set_optab_libfunc (eq_optab, HFmode, NULL);
2367 set_optab_libfunc (ne_optab, HFmode, NULL);
2368 set_optab_libfunc (lt_optab, HFmode, NULL);
2369 set_optab_libfunc (le_optab, HFmode, NULL);
2370 set_optab_libfunc (ge_optab, HFmode, NULL);
2371 set_optab_libfunc (gt_optab, HFmode, NULL);
2372 set_optab_libfunc (unord_optab, HFmode, NULL);
2373 break;
2375 default:
2376 break;
2379 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2381 const arm_fixed_mode_set fixed_arith_modes[] =
2383 { QQmode, "qq" },
2384 { UQQmode, "uqq" },
2385 { HQmode, "hq" },
2386 { UHQmode, "uhq" },
2387 { SQmode, "sq" },
2388 { USQmode, "usq" },
2389 { DQmode, "dq" },
2390 { UDQmode, "udq" },
2391 { TQmode, "tq" },
2392 { UTQmode, "utq" },
2393 { HAmode, "ha" },
2394 { UHAmode, "uha" },
2395 { SAmode, "sa" },
2396 { USAmode, "usa" },
2397 { DAmode, "da" },
2398 { UDAmode, "uda" },
2399 { TAmode, "ta" },
2400 { UTAmode, "uta" }
2402 const arm_fixed_mode_set fixed_conv_modes[] =
2404 { QQmode, "qq" },
2405 { UQQmode, "uqq" },
2406 { HQmode, "hq" },
2407 { UHQmode, "uhq" },
2408 { SQmode, "sq" },
2409 { USQmode, "usq" },
2410 { DQmode, "dq" },
2411 { UDQmode, "udq" },
2412 { TQmode, "tq" },
2413 { UTQmode, "utq" },
2414 { HAmode, "ha" },
2415 { UHAmode, "uha" },
2416 { SAmode, "sa" },
2417 { USAmode, "usa" },
2418 { DAmode, "da" },
2419 { UDAmode, "uda" },
2420 { TAmode, "ta" },
2421 { UTAmode, "uta" },
2422 { QImode, "qi" },
2423 { HImode, "hi" },
2424 { SImode, "si" },
2425 { DImode, "di" },
2426 { TImode, "ti" },
2427 { SFmode, "sf" },
2428 { DFmode, "df" }
2430 unsigned int i, j;
2432 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2434 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2435 "add", fixed_arith_modes[i].name, 3);
2436 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2437 "ssadd", fixed_arith_modes[i].name, 3);
2438 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2439 "usadd", fixed_arith_modes[i].name, 3);
2440 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2441 "sub", fixed_arith_modes[i].name, 3);
2442 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2443 "sssub", fixed_arith_modes[i].name, 3);
2444 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2445 "ussub", fixed_arith_modes[i].name, 3);
2446 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2447 "mul", fixed_arith_modes[i].name, 3);
2448 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2449 "ssmul", fixed_arith_modes[i].name, 3);
2450 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2451 "usmul", fixed_arith_modes[i].name, 3);
2452 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2453 "div", fixed_arith_modes[i].name, 3);
2454 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2455 "udiv", fixed_arith_modes[i].name, 3);
2456 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2457 "ssdiv", fixed_arith_modes[i].name, 3);
2458 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2459 "usdiv", fixed_arith_modes[i].name, 3);
2460 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2461 "neg", fixed_arith_modes[i].name, 2);
2462 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2463 "ssneg", fixed_arith_modes[i].name, 2);
2464 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2465 "usneg", fixed_arith_modes[i].name, 2);
2466 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2467 "ashl", fixed_arith_modes[i].name, 3);
2468 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2469 "ashr", fixed_arith_modes[i].name, 3);
2470 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2471 "lshr", fixed_arith_modes[i].name, 3);
2472 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2473 "ssashl", fixed_arith_modes[i].name, 3);
2474 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2475 "usashl", fixed_arith_modes[i].name, 3);
2476 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2477 "cmp", fixed_arith_modes[i].name, 2);
2480 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2481 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2483 if (i == j
2484 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2485 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2486 continue;
2488 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2489 fixed_conv_modes[j].mode, "fract",
2490 fixed_conv_modes[i].name,
2491 fixed_conv_modes[j].name);
2492 arm_set_fixed_conv_libfunc (satfract_optab,
2493 fixed_conv_modes[i].mode,
2494 fixed_conv_modes[j].mode, "satfract",
2495 fixed_conv_modes[i].name,
2496 fixed_conv_modes[j].name);
2497 arm_set_fixed_conv_libfunc (fractuns_optab,
2498 fixed_conv_modes[i].mode,
2499 fixed_conv_modes[j].mode, "fractuns",
2500 fixed_conv_modes[i].name,
2501 fixed_conv_modes[j].name);
2502 arm_set_fixed_conv_libfunc (satfractuns_optab,
2503 fixed_conv_modes[i].mode,
2504 fixed_conv_modes[j].mode, "satfractuns",
2505 fixed_conv_modes[i].name,
2506 fixed_conv_modes[j].name);
2510 if (TARGET_AAPCS_BASED)
2511 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2514 /* On AAPCS systems, this is the "struct __va_list". */
2515 static GTY(()) tree va_list_type;
2517 /* Return the type to use as __builtin_va_list. */
2518 static tree
2519 arm_build_builtin_va_list (void)
2521 tree va_list_name;
2522 tree ap_field;
2524 if (!TARGET_AAPCS_BASED)
2525 return std_build_builtin_va_list ();
2527 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2528 defined as:
2530 struct __va_list
2532 void *__ap;
2535 The C Library ABI further reinforces this definition in \S
2536 4.1.
2538 We must follow this definition exactly. The structure tag
2539 name is visible in C++ mangled names, and thus forms a part
2540 of the ABI. The field name may be used by people who
2541 #include <stdarg.h>. */
2542 /* Create the type. */
2543 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2544 /* Give it the required name. */
2545 va_list_name = build_decl (BUILTINS_LOCATION,
2546 TYPE_DECL,
2547 get_identifier ("__va_list"),
2548 va_list_type);
2549 DECL_ARTIFICIAL (va_list_name) = 1;
2550 TYPE_NAME (va_list_type) = va_list_name;
2551 TYPE_STUB_DECL (va_list_type) = va_list_name;
2552 /* Create the __ap field. */
2553 ap_field = build_decl (BUILTINS_LOCATION,
2554 FIELD_DECL,
2555 get_identifier ("__ap"),
2556 ptr_type_node);
2557 DECL_ARTIFICIAL (ap_field) = 1;
2558 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2559 TYPE_FIELDS (va_list_type) = ap_field;
2560 /* Compute its layout. */
2561 layout_type (va_list_type);
2563 return va_list_type;
2566 /* Return an expression of type "void *" pointing to the next
2567 available argument in a variable-argument list. VALIST is the
2568 user-level va_list object, of type __builtin_va_list. */
2569 static tree
2570 arm_extract_valist_ptr (tree valist)
2572 if (TREE_TYPE (valist) == error_mark_node)
2573 return error_mark_node;
2575 /* On an AAPCS target, the pointer is stored within "struct
2576 va_list". */
2577 if (TARGET_AAPCS_BASED)
2579 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2580 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2581 valist, ap_field, NULL_TREE);
2584 return valist;
2587 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2588 static void
2589 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2591 valist = arm_extract_valist_ptr (valist);
2592 std_expand_builtin_va_start (valist, nextarg);
2595 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2596 static tree
2597 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2598 gimple_seq *post_p)
2600 valist = arm_extract_valist_ptr (valist);
2601 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2604 /* Fix up any incompatible options that the user has specified. */
2605 static void
2606 arm_option_override (void)
2608 if (global_options_set.x_arm_arch_option)
2609 arm_selected_arch = &all_architectures[arm_arch_option];
2611 if (global_options_set.x_arm_cpu_option)
2613 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2614 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2617 if (global_options_set.x_arm_tune_option)
2618 arm_selected_tune = &all_cores[(int) arm_tune_option];
2620 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2621 SUBTARGET_OVERRIDE_OPTIONS;
2622 #endif
2624 if (arm_selected_arch)
2626 if (arm_selected_cpu)
2628 /* Check for conflict between mcpu and march. */
2629 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2631 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2632 arm_selected_cpu->name, arm_selected_arch->name);
2633 /* -march wins for code generation.
2634 -mcpu wins for default tuning. */
2635 if (!arm_selected_tune)
2636 arm_selected_tune = arm_selected_cpu;
2638 arm_selected_cpu = arm_selected_arch;
2640 else
2641 /* -mcpu wins. */
2642 arm_selected_arch = NULL;
2644 else
2645 /* Pick a CPU based on the architecture. */
2646 arm_selected_cpu = arm_selected_arch;
2649 /* If the user did not specify a processor, choose one for them. */
2650 if (!arm_selected_cpu)
2652 const struct processors * sel;
2653 unsigned int sought;
2655 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2656 if (!arm_selected_cpu->name)
2658 #ifdef SUBTARGET_CPU_DEFAULT
2659 /* Use the subtarget default CPU if none was specified by
2660 configure. */
2661 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2662 #endif
2663 /* Default to ARM6. */
2664 if (!arm_selected_cpu->name)
2665 arm_selected_cpu = &all_cores[arm6];
2668 sel = arm_selected_cpu;
2669 insn_flags = sel->flags;
2671 /* Now check to see if the user has specified some command line
2672 switch that require certain abilities from the cpu. */
2673 sought = 0;
2675 if (TARGET_INTERWORK || TARGET_THUMB)
2677 sought |= (FL_THUMB | FL_MODE32);
2679 /* There are no ARM processors that support both APCS-26 and
2680 interworking. Therefore we force FL_MODE26 to be removed
2681 from insn_flags here (if it was set), so that the search
2682 below will always be able to find a compatible processor. */
2683 insn_flags &= ~FL_MODE26;
2686 if (sought != 0 && ((sought & insn_flags) != sought))
2688 /* Try to locate a CPU type that supports all of the abilities
2689 of the default CPU, plus the extra abilities requested by
2690 the user. */
2691 for (sel = all_cores; sel->name != NULL; sel++)
2692 if ((sel->flags & sought) == (sought | insn_flags))
2693 break;
2695 if (sel->name == NULL)
2697 unsigned current_bit_count = 0;
2698 const struct processors * best_fit = NULL;
2700 /* Ideally we would like to issue an error message here
2701 saying that it was not possible to find a CPU compatible
2702 with the default CPU, but which also supports the command
2703 line options specified by the programmer, and so they
2704 ought to use the -mcpu=<name> command line option to
2705 override the default CPU type.
2707 If we cannot find a cpu that has both the
2708 characteristics of the default cpu and the given
2709 command line options we scan the array again looking
2710 for a best match. */
2711 for (sel = all_cores; sel->name != NULL; sel++)
2712 if ((sel->flags & sought) == sought)
2714 unsigned count;
2716 count = bit_count (sel->flags & insn_flags);
2718 if (count >= current_bit_count)
2720 best_fit = sel;
2721 current_bit_count = count;
2725 gcc_assert (best_fit);
2726 sel = best_fit;
2729 arm_selected_cpu = sel;
2733 gcc_assert (arm_selected_cpu);
2734 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2735 if (!arm_selected_tune)
2736 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2738 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2739 insn_flags = arm_selected_cpu->flags;
2740 arm_base_arch = arm_selected_cpu->base_arch;
2742 arm_tune = arm_selected_tune->core;
2743 tune_flags = arm_selected_tune->flags;
2744 current_tune = arm_selected_tune->tune;
2746 /* Make sure that the processor choice does not conflict with any of the
2747 other command line choices. */
2748 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2749 error ("target CPU does not support ARM mode");
2751 /* BPABI targets use linker tricks to allow interworking on cores
2752 without thumb support. */
2753 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2755 warning (0, "target CPU does not support interworking" );
2756 target_flags &= ~MASK_INTERWORK;
2759 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2761 warning (0, "target CPU does not support THUMB instructions");
2762 target_flags &= ~MASK_THUMB;
2765 if (TARGET_APCS_FRAME && TARGET_THUMB)
2767 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2768 target_flags &= ~MASK_APCS_FRAME;
2771 /* Callee super interworking implies thumb interworking. Adding
2772 this to the flags here simplifies the logic elsewhere. */
2773 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2774 target_flags |= MASK_INTERWORK;
2776 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2777 from here where no function is being compiled currently. */
2778 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2779 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2781 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2782 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2784 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2786 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2787 target_flags |= MASK_APCS_FRAME;
2790 if (TARGET_POKE_FUNCTION_NAME)
2791 target_flags |= MASK_APCS_FRAME;
2793 if (TARGET_APCS_REENT && flag_pic)
2794 error ("-fpic and -mapcs-reent are incompatible");
2796 if (TARGET_APCS_REENT)
2797 warning (0, "APCS reentrant code not supported. Ignored");
2799 /* If this target is normally configured to use APCS frames, warn if they
2800 are turned off and debugging is turned on. */
2801 if (TARGET_ARM
2802 && write_symbols != NO_DEBUG
2803 && !TARGET_APCS_FRAME
2804 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2805 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2807 if (TARGET_APCS_FLOAT)
2808 warning (0, "passing floating point arguments in fp regs not yet supported");
2810 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2811 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2812 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2813 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2814 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2815 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2816 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2817 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2818 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2819 arm_arch6m = arm_arch6 && !arm_arch_notm;
2820 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2821 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2822 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2823 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2824 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2826 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2827 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2828 thumb_code = TARGET_ARM == 0;
2829 thumb1_code = TARGET_THUMB1 != 0;
2830 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2831 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2832 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2833 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2834 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2835 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2836 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2837 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2838 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2839 if (arm_restrict_it == 2)
2840 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2842 if (!TARGET_THUMB2)
2843 arm_restrict_it = 0;
2845 /* If we are not using the default (ARM mode) section anchor offset
2846 ranges, then set the correct ranges now. */
2847 if (TARGET_THUMB1)
2849 /* Thumb-1 LDR instructions cannot have negative offsets.
2850 Permissible positive offset ranges are 5-bit (for byte loads),
2851 6-bit (for halfword loads), or 7-bit (for word loads).
2852 Empirical results suggest a 7-bit anchor range gives the best
2853 overall code size. */
2854 targetm.min_anchor_offset = 0;
2855 targetm.max_anchor_offset = 127;
2857 else if (TARGET_THUMB2)
2859 /* The minimum is set such that the total size of the block
2860 for a particular anchor is 248 + 1 + 4095 bytes, which is
2861 divisible by eight, ensuring natural spacing of anchors. */
2862 targetm.min_anchor_offset = -248;
2863 targetm.max_anchor_offset = 4095;
2866 /* V5 code we generate is completely interworking capable, so we turn off
2867 TARGET_INTERWORK here to avoid many tests later on. */
2869 /* XXX However, we must pass the right pre-processor defines to CPP
2870 or GLD can get confused. This is a hack. */
2871 if (TARGET_INTERWORK)
2872 arm_cpp_interwork = 1;
2874 if (arm_arch5)
2875 target_flags &= ~MASK_INTERWORK;
2877 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2878 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2880 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2881 error ("iwmmxt abi requires an iwmmxt capable cpu");
2883 if (!global_options_set.x_arm_fpu_index)
2885 const char *target_fpu_name;
2886 bool ok;
2888 #ifdef FPUTYPE_DEFAULT
2889 target_fpu_name = FPUTYPE_DEFAULT;
2890 #else
2891 target_fpu_name = "vfp";
2892 #endif
2894 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2895 CL_TARGET);
2896 gcc_assert (ok);
2899 arm_fpu_desc = &all_fpus[arm_fpu_index];
2901 if (TARGET_NEON && !arm_arch7)
2902 error ("target CPU does not support NEON");
2904 switch (arm_fpu_desc->model)
2906 case ARM_FP_MODEL_VFP:
2907 arm_fpu_attr = FPU_VFP;
2908 break;
2910 default:
2911 gcc_unreachable();
2914 if (TARGET_AAPCS_BASED)
2916 if (TARGET_CALLER_INTERWORKING)
2917 error ("AAPCS does not support -mcaller-super-interworking");
2918 else
2919 if (TARGET_CALLEE_INTERWORKING)
2920 error ("AAPCS does not support -mcallee-super-interworking");
2923 /* iWMMXt and NEON are incompatible. */
2924 if (TARGET_IWMMXT && TARGET_NEON)
2925 error ("iWMMXt and NEON are incompatible");
2927 /* iWMMXt unsupported under Thumb mode. */
2928 if (TARGET_THUMB && TARGET_IWMMXT)
2929 error ("iWMMXt unsupported under Thumb mode");
2931 /* __fp16 support currently assumes the core has ldrh. */
2932 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2933 sorry ("__fp16 and no ldrh");
2935 /* If soft-float is specified then don't use FPU. */
2936 if (TARGET_SOFT_FLOAT)
2937 arm_fpu_attr = FPU_NONE;
2939 if (TARGET_AAPCS_BASED)
2941 if (arm_abi == ARM_ABI_IWMMXT)
2942 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2943 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2944 && TARGET_HARD_FLOAT
2945 && TARGET_VFP)
2946 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2947 else
2948 arm_pcs_default = ARM_PCS_AAPCS;
2950 else
2952 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2953 sorry ("-mfloat-abi=hard and VFP");
2955 if (arm_abi == ARM_ABI_APCS)
2956 arm_pcs_default = ARM_PCS_APCS;
2957 else
2958 arm_pcs_default = ARM_PCS_ATPCS;
2961 /* For arm2/3 there is no need to do any scheduling if we are doing
2962 software floating-point. */
2963 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2964 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2966 /* Use the cp15 method if it is available. */
2967 if (target_thread_pointer == TP_AUTO)
2969 if (arm_arch6k && !TARGET_THUMB1)
2970 target_thread_pointer = TP_CP15;
2971 else
2972 target_thread_pointer = TP_SOFT;
2975 if (TARGET_HARD_TP && TARGET_THUMB1)
2976 error ("can not use -mtp=cp15 with 16-bit Thumb");
2978 /* Override the default structure alignment for AAPCS ABI. */
2979 if (!global_options_set.x_arm_structure_size_boundary)
2981 if (TARGET_AAPCS_BASED)
2982 arm_structure_size_boundary = 8;
2984 else
2986 if (arm_structure_size_boundary != 8
2987 && arm_structure_size_boundary != 32
2988 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2990 if (ARM_DOUBLEWORD_ALIGN)
2991 warning (0,
2992 "structure size boundary can only be set to 8, 32 or 64");
2993 else
2994 warning (0, "structure size boundary can only be set to 8 or 32");
2995 arm_structure_size_boundary
2996 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3000 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3002 error ("RTP PIC is incompatible with Thumb");
3003 flag_pic = 0;
3006 /* If stack checking is disabled, we can use r10 as the PIC register,
3007 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3008 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3010 if (TARGET_VXWORKS_RTP)
3011 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3012 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3015 if (flag_pic && TARGET_VXWORKS_RTP)
3016 arm_pic_register = 9;
3018 if (arm_pic_register_string != NULL)
3020 int pic_register = decode_reg_name (arm_pic_register_string);
3022 if (!flag_pic)
3023 warning (0, "-mpic-register= is useless without -fpic");
3025 /* Prevent the user from choosing an obviously stupid PIC register. */
3026 else if (pic_register < 0 || call_used_regs[pic_register]
3027 || pic_register == HARD_FRAME_POINTER_REGNUM
3028 || pic_register == STACK_POINTER_REGNUM
3029 || pic_register >= PC_REGNUM
3030 || (TARGET_VXWORKS_RTP
3031 && (unsigned int) pic_register != arm_pic_register))
3032 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3033 else
3034 arm_pic_register = pic_register;
3037 if (TARGET_VXWORKS_RTP
3038 && !global_options_set.x_arm_pic_data_is_text_relative)
3039 arm_pic_data_is_text_relative = 0;
3041 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3042 if (fix_cm3_ldrd == 2)
3044 if (arm_selected_cpu->core == cortexm3)
3045 fix_cm3_ldrd = 1;
3046 else
3047 fix_cm3_ldrd = 0;
3050 /* Enable -munaligned-access by default for
3051 - all ARMv6 architecture-based processors
3052 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3053 - ARMv8 architecture-base processors.
3055 Disable -munaligned-access by default for
3056 - all pre-ARMv6 architecture-based processors
3057 - ARMv6-M architecture-based processors. */
3059 if (unaligned_access == 2)
3061 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3062 unaligned_access = 1;
3063 else
3064 unaligned_access = 0;
3066 else if (unaligned_access == 1
3067 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3069 warning (0, "target CPU does not support unaligned accesses");
3070 unaligned_access = 0;
3073 if (TARGET_THUMB1 && flag_schedule_insns)
3075 /* Don't warn since it's on by default in -O2. */
3076 flag_schedule_insns = 0;
3079 if (optimize_size)
3081 /* If optimizing for size, bump the number of instructions that we
3082 are prepared to conditionally execute (even on a StrongARM). */
3083 max_insns_skipped = 6;
3085 /* For THUMB2, we limit the conditional sequence to one IT block. */
3086 if (TARGET_THUMB2)
3087 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3089 else
3090 max_insns_skipped = current_tune->max_insns_skipped;
3092 /* Hot/Cold partitioning is not currently supported, since we can't
3093 handle literal pool placement in that case. */
3094 if (flag_reorder_blocks_and_partition)
3096 inform (input_location,
3097 "-freorder-blocks-and-partition not supported on this architecture");
3098 flag_reorder_blocks_and_partition = 0;
3099 flag_reorder_blocks = 1;
3102 if (flag_pic)
3103 /* Hoisting PIC address calculations more aggressively provides a small,
3104 but measurable, size reduction for PIC code. Therefore, we decrease
3105 the bar for unrestricted expression hoisting to the cost of PIC address
3106 calculation, which is 2 instructions. */
3107 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3108 global_options.x_param_values,
3109 global_options_set.x_param_values);
3111 /* ARM EABI defaults to strict volatile bitfields. */
3112 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3113 && abi_version_at_least(2))
3114 flag_strict_volatile_bitfields = 1;
3116 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3117 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3118 if (flag_prefetch_loop_arrays < 0
3119 && HAVE_prefetch
3120 && optimize >= 3
3121 && current_tune->num_prefetch_slots > 0)
3122 flag_prefetch_loop_arrays = 1;
3124 /* Set up parameters to be used in prefetching algorithm. Do not override the
3125 defaults unless we are tuning for a core we have researched values for. */
3126 if (current_tune->num_prefetch_slots > 0)
3127 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3128 current_tune->num_prefetch_slots,
3129 global_options.x_param_values,
3130 global_options_set.x_param_values);
3131 if (current_tune->l1_cache_line_size >= 0)
3132 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3133 current_tune->l1_cache_line_size,
3134 global_options.x_param_values,
3135 global_options_set.x_param_values);
3136 if (current_tune->l1_cache_size >= 0)
3137 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3138 current_tune->l1_cache_size,
3139 global_options.x_param_values,
3140 global_options_set.x_param_values);
3142 /* Use Neon to perform 64-bits operations rather than core
3143 registers. */
3144 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3145 if (use_neon_for_64bits == 1)
3146 prefer_neon_for_64bits = true;
3148 /* Use the alternative scheduling-pressure algorithm by default. */
3149 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3150 global_options.x_param_values,
3151 global_options_set.x_param_values);
3153 /* Disable shrink-wrap when optimizing function for size, since it tends to
3154 generate additional returns. */
3155 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3156 flag_shrink_wrap = false;
3157 /* TBD: Dwarf info for apcs frame is not handled yet. */
3158 if (TARGET_APCS_FRAME)
3159 flag_shrink_wrap = false;
3161 /* We only support -mslow-flash-data on armv7-m targets. */
3162 if (target_slow_flash_data
3163 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3164 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3165 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3167 /* Currently, for slow flash data, we just disable literal pools. */
3168 if (target_slow_flash_data)
3169 arm_disable_literal_pool = true;
3171 /* Thumb2 inline assembly code should always use unified syntax.
3172 This will apply to ARM and Thumb1 eventually. */
3173 if (TARGET_THUMB2)
3174 inline_asm_unified = 1;
3176 /* Disable scheduling fusion by default if it's not armv7 processor
3177 or doesn't prefer ldrd/strd. */
3178 if (flag_schedule_fusion == 2
3179 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3180 flag_schedule_fusion = 0;
3182 /* Register global variables with the garbage collector. */
3183 arm_add_gc_roots ();
3186 static void
3187 arm_add_gc_roots (void)
3189 gcc_obstack_init(&minipool_obstack);
3190 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3193 /* A table of known ARM exception types.
3194 For use with the interrupt function attribute. */
3196 typedef struct
3198 const char *const arg;
3199 const unsigned long return_value;
3201 isr_attribute_arg;
3203 static const isr_attribute_arg isr_attribute_args [] =
3205 { "IRQ", ARM_FT_ISR },
3206 { "irq", ARM_FT_ISR },
3207 { "FIQ", ARM_FT_FIQ },
3208 { "fiq", ARM_FT_FIQ },
3209 { "ABORT", ARM_FT_ISR },
3210 { "abort", ARM_FT_ISR },
3211 { "ABORT", ARM_FT_ISR },
3212 { "abort", ARM_FT_ISR },
3213 { "UNDEF", ARM_FT_EXCEPTION },
3214 { "undef", ARM_FT_EXCEPTION },
3215 { "SWI", ARM_FT_EXCEPTION },
3216 { "swi", ARM_FT_EXCEPTION },
3217 { NULL, ARM_FT_NORMAL }
3220 /* Returns the (interrupt) function type of the current
3221 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3223 static unsigned long
3224 arm_isr_value (tree argument)
3226 const isr_attribute_arg * ptr;
3227 const char * arg;
3229 if (!arm_arch_notm)
3230 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3232 /* No argument - default to IRQ. */
3233 if (argument == NULL_TREE)
3234 return ARM_FT_ISR;
3236 /* Get the value of the argument. */
3237 if (TREE_VALUE (argument) == NULL_TREE
3238 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3239 return ARM_FT_UNKNOWN;
3241 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3243 /* Check it against the list of known arguments. */
3244 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3245 if (streq (arg, ptr->arg))
3246 return ptr->return_value;
3248 /* An unrecognized interrupt type. */
3249 return ARM_FT_UNKNOWN;
3252 /* Computes the type of the current function. */
3254 static unsigned long
3255 arm_compute_func_type (void)
3257 unsigned long type = ARM_FT_UNKNOWN;
3258 tree a;
3259 tree attr;
3261 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3263 /* Decide if the current function is volatile. Such functions
3264 never return, and many memory cycles can be saved by not storing
3265 register values that will never be needed again. This optimization
3266 was added to speed up context switching in a kernel application. */
3267 if (optimize > 0
3268 && (TREE_NOTHROW (current_function_decl)
3269 || !(flag_unwind_tables
3270 || (flag_exceptions
3271 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3272 && TREE_THIS_VOLATILE (current_function_decl))
3273 type |= ARM_FT_VOLATILE;
3275 if (cfun->static_chain_decl != NULL)
3276 type |= ARM_FT_NESTED;
3278 attr = DECL_ATTRIBUTES (current_function_decl);
3280 a = lookup_attribute ("naked", attr);
3281 if (a != NULL_TREE)
3282 type |= ARM_FT_NAKED;
3284 a = lookup_attribute ("isr", attr);
3285 if (a == NULL_TREE)
3286 a = lookup_attribute ("interrupt", attr);
3288 if (a == NULL_TREE)
3289 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3290 else
3291 type |= arm_isr_value (TREE_VALUE (a));
3293 return type;
3296 /* Returns the type of the current function. */
3298 unsigned long
3299 arm_current_func_type (void)
3301 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3302 cfun->machine->func_type = arm_compute_func_type ();
3304 return cfun->machine->func_type;
3307 bool
3308 arm_allocate_stack_slots_for_args (void)
3310 /* Naked functions should not allocate stack slots for arguments. */
3311 return !IS_NAKED (arm_current_func_type ());
3314 static bool
3315 arm_warn_func_return (tree decl)
3317 /* Naked functions are implemented entirely in assembly, including the
3318 return sequence, so suppress warnings about this. */
3319 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3323 /* Output assembler code for a block containing the constant parts
3324 of a trampoline, leaving space for the variable parts.
3326 On the ARM, (if r8 is the static chain regnum, and remembering that
3327 referencing pc adds an offset of 8) the trampoline looks like:
3328 ldr r8, [pc, #0]
3329 ldr pc, [pc]
3330 .word static chain value
3331 .word function's address
3332 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3334 static void
3335 arm_asm_trampoline_template (FILE *f)
3337 if (TARGET_ARM)
3339 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3340 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3342 else if (TARGET_THUMB2)
3344 /* The Thumb-2 trampoline is similar to the arm implementation.
3345 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3346 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3347 STATIC_CHAIN_REGNUM, PC_REGNUM);
3348 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3350 else
3352 ASM_OUTPUT_ALIGN (f, 2);
3353 fprintf (f, "\t.code\t16\n");
3354 fprintf (f, ".Ltrampoline_start:\n");
3355 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3356 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3357 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3358 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3359 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3360 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3362 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3363 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3366 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3368 static void
3369 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3371 rtx fnaddr, mem, a_tramp;
3373 emit_block_move (m_tramp, assemble_trampoline_template (),
3374 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3376 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3377 emit_move_insn (mem, chain_value);
3379 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3380 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3381 emit_move_insn (mem, fnaddr);
3383 a_tramp = XEXP (m_tramp, 0);
3384 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3385 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3386 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3389 /* Thumb trampolines should be entered in thumb mode, so set
3390 the bottom bit of the address. */
3392 static rtx
3393 arm_trampoline_adjust_address (rtx addr)
3395 if (TARGET_THUMB)
3396 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3397 NULL, 0, OPTAB_LIB_WIDEN);
3398 return addr;
3401 /* Return 1 if it is possible to return using a single instruction.
3402 If SIBLING is non-null, this is a test for a return before a sibling
3403 call. SIBLING is the call insn, so we can examine its register usage. */
3406 use_return_insn (int iscond, rtx sibling)
3408 int regno;
3409 unsigned int func_type;
3410 unsigned long saved_int_regs;
3411 unsigned HOST_WIDE_INT stack_adjust;
3412 arm_stack_offsets *offsets;
3414 /* Never use a return instruction before reload has run. */
3415 if (!reload_completed)
3416 return 0;
3418 func_type = arm_current_func_type ();
3420 /* Naked, volatile and stack alignment functions need special
3421 consideration. */
3422 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3423 return 0;
3425 /* So do interrupt functions that use the frame pointer and Thumb
3426 interrupt functions. */
3427 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3428 return 0;
3430 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3431 && !optimize_function_for_size_p (cfun))
3432 return 0;
3434 offsets = arm_get_frame_offsets ();
3435 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3437 /* As do variadic functions. */
3438 if (crtl->args.pretend_args_size
3439 || cfun->machine->uses_anonymous_args
3440 /* Or if the function calls __builtin_eh_return () */
3441 || crtl->calls_eh_return
3442 /* Or if the function calls alloca */
3443 || cfun->calls_alloca
3444 /* Or if there is a stack adjustment. However, if the stack pointer
3445 is saved on the stack, we can use a pre-incrementing stack load. */
3446 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3447 && stack_adjust == 4)))
3448 return 0;
3450 saved_int_regs = offsets->saved_regs_mask;
3452 /* Unfortunately, the insn
3454 ldmib sp, {..., sp, ...}
3456 triggers a bug on most SA-110 based devices, such that the stack
3457 pointer won't be correctly restored if the instruction takes a
3458 page fault. We work around this problem by popping r3 along with
3459 the other registers, since that is never slower than executing
3460 another instruction.
3462 We test for !arm_arch5 here, because code for any architecture
3463 less than this could potentially be run on one of the buggy
3464 chips. */
3465 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3467 /* Validate that r3 is a call-clobbered register (always true in
3468 the default abi) ... */
3469 if (!call_used_regs[3])
3470 return 0;
3472 /* ... that it isn't being used for a return value ... */
3473 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3474 return 0;
3476 /* ... or for a tail-call argument ... */
3477 if (sibling)
3479 gcc_assert (CALL_P (sibling));
3481 if (find_regno_fusage (sibling, USE, 3))
3482 return 0;
3485 /* ... and that there are no call-saved registers in r0-r2
3486 (always true in the default ABI). */
3487 if (saved_int_regs & 0x7)
3488 return 0;
3491 /* Can't be done if interworking with Thumb, and any registers have been
3492 stacked. */
3493 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3494 return 0;
3496 /* On StrongARM, conditional returns are expensive if they aren't
3497 taken and multiple registers have been stacked. */
3498 if (iscond && arm_tune_strongarm)
3500 /* Conditional return when just the LR is stored is a simple
3501 conditional-load instruction, that's not expensive. */
3502 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3503 return 0;
3505 if (flag_pic
3506 && arm_pic_register != INVALID_REGNUM
3507 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3508 return 0;
3511 /* If there are saved registers but the LR isn't saved, then we need
3512 two instructions for the return. */
3513 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3514 return 0;
3516 /* Can't be done if any of the VFP regs are pushed,
3517 since this also requires an insn. */
3518 if (TARGET_HARD_FLOAT && TARGET_VFP)
3519 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3520 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3521 return 0;
3523 if (TARGET_REALLY_IWMMXT)
3524 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3525 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3526 return 0;
3528 return 1;
3531 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3532 shrink-wrapping if possible. This is the case if we need to emit a
3533 prologue, which we can test by looking at the offsets. */
3534 bool
3535 use_simple_return_p (void)
3537 arm_stack_offsets *offsets;
3539 offsets = arm_get_frame_offsets ();
3540 return offsets->outgoing_args != 0;
3543 /* Return TRUE if int I is a valid immediate ARM constant. */
3546 const_ok_for_arm (HOST_WIDE_INT i)
3548 int lowbit;
3550 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3551 be all zero, or all one. */
3552 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3553 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3554 != ((~(unsigned HOST_WIDE_INT) 0)
3555 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3556 return FALSE;
3558 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3560 /* Fast return for 0 and small values. We must do this for zero, since
3561 the code below can't handle that one case. */
3562 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3563 return TRUE;
3565 /* Get the number of trailing zeros. */
3566 lowbit = ffs((int) i) - 1;
3568 /* Only even shifts are allowed in ARM mode so round down to the
3569 nearest even number. */
3570 if (TARGET_ARM)
3571 lowbit &= ~1;
3573 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3574 return TRUE;
3576 if (TARGET_ARM)
3578 /* Allow rotated constants in ARM mode. */
3579 if (lowbit <= 4
3580 && ((i & ~0xc000003f) == 0
3581 || (i & ~0xf000000f) == 0
3582 || (i & ~0xfc000003) == 0))
3583 return TRUE;
3585 else
3587 HOST_WIDE_INT v;
3589 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3590 v = i & 0xff;
3591 v |= v << 16;
3592 if (i == v || i == (v | (v << 8)))
3593 return TRUE;
3595 /* Allow repeated pattern 0xXY00XY00. */
3596 v = i & 0xff00;
3597 v |= v << 16;
3598 if (i == v)
3599 return TRUE;
3602 return FALSE;
3605 /* Return true if I is a valid constant for the operation CODE. */
3607 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3609 if (const_ok_for_arm (i))
3610 return 1;
3612 switch (code)
3614 case SET:
3615 /* See if we can use movw. */
3616 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3617 return 1;
3618 else
3619 /* Otherwise, try mvn. */
3620 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3622 case PLUS:
3623 /* See if we can use addw or subw. */
3624 if (TARGET_THUMB2
3625 && ((i & 0xfffff000) == 0
3626 || ((-i) & 0xfffff000) == 0))
3627 return 1;
3628 /* else fall through. */
3630 case COMPARE:
3631 case EQ:
3632 case NE:
3633 case GT:
3634 case LE:
3635 case LT:
3636 case GE:
3637 case GEU:
3638 case LTU:
3639 case GTU:
3640 case LEU:
3641 case UNORDERED:
3642 case ORDERED:
3643 case UNEQ:
3644 case UNGE:
3645 case UNLT:
3646 case UNGT:
3647 case UNLE:
3648 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3650 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3651 case XOR:
3652 return 0;
3654 case IOR:
3655 if (TARGET_THUMB2)
3656 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3657 return 0;
3659 case AND:
3660 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3662 default:
3663 gcc_unreachable ();
3667 /* Return true if I is a valid di mode constant for the operation CODE. */
3669 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3671 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3672 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3673 rtx hi = GEN_INT (hi_val);
3674 rtx lo = GEN_INT (lo_val);
3676 if (TARGET_THUMB1)
3677 return 0;
3679 switch (code)
3681 case AND:
3682 case IOR:
3683 case XOR:
3684 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3685 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3686 case PLUS:
3687 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3689 default:
3690 return 0;
3694 /* Emit a sequence of insns to handle a large constant.
3695 CODE is the code of the operation required, it can be any of SET, PLUS,
3696 IOR, AND, XOR, MINUS;
3697 MODE is the mode in which the operation is being performed;
3698 VAL is the integer to operate on;
3699 SOURCE is the other operand (a register, or a null-pointer for SET);
3700 SUBTARGETS means it is safe to create scratch registers if that will
3701 either produce a simpler sequence, or we will want to cse the values.
3702 Return value is the number of insns emitted. */
3704 /* ??? Tweak this for thumb2. */
3706 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3707 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3709 rtx cond;
3711 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3712 cond = COND_EXEC_TEST (PATTERN (insn));
3713 else
3714 cond = NULL_RTX;
3716 if (subtargets || code == SET
3717 || (REG_P (target) && REG_P (source)
3718 && REGNO (target) != REGNO (source)))
3720 /* After arm_reorg has been called, we can't fix up expensive
3721 constants by pushing them into memory so we must synthesize
3722 them in-line, regardless of the cost. This is only likely to
3723 be more costly on chips that have load delay slots and we are
3724 compiling without running the scheduler (so no splitting
3725 occurred before the final instruction emission).
3727 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3729 if (!cfun->machine->after_arm_reorg
3730 && !cond
3731 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3732 1, 0)
3733 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3734 + (code != SET))))
3736 if (code == SET)
3738 /* Currently SET is the only monadic value for CODE, all
3739 the rest are diadic. */
3740 if (TARGET_USE_MOVT)
3741 arm_emit_movpair (target, GEN_INT (val));
3742 else
3743 emit_set_insn (target, GEN_INT (val));
3745 return 1;
3747 else
3749 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3751 if (TARGET_USE_MOVT)
3752 arm_emit_movpair (temp, GEN_INT (val));
3753 else
3754 emit_set_insn (temp, GEN_INT (val));
3756 /* For MINUS, the value is subtracted from, since we never
3757 have subtraction of a constant. */
3758 if (code == MINUS)
3759 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3760 else
3761 emit_set_insn (target,
3762 gen_rtx_fmt_ee (code, mode, source, temp));
3763 return 2;
3768 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3772 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3773 ARM/THUMB2 immediates, and add up to VAL.
3774 Thr function return value gives the number of insns required. */
3775 static int
3776 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3777 struct four_ints *return_sequence)
3779 int best_consecutive_zeros = 0;
3780 int i;
3781 int best_start = 0;
3782 int insns1, insns2;
3783 struct four_ints tmp_sequence;
3785 /* If we aren't targeting ARM, the best place to start is always at
3786 the bottom, otherwise look more closely. */
3787 if (TARGET_ARM)
3789 for (i = 0; i < 32; i += 2)
3791 int consecutive_zeros = 0;
3793 if (!(val & (3 << i)))
3795 while ((i < 32) && !(val & (3 << i)))
3797 consecutive_zeros += 2;
3798 i += 2;
3800 if (consecutive_zeros > best_consecutive_zeros)
3802 best_consecutive_zeros = consecutive_zeros;
3803 best_start = i - consecutive_zeros;
3805 i -= 2;
3810 /* So long as it won't require any more insns to do so, it's
3811 desirable to emit a small constant (in bits 0...9) in the last
3812 insn. This way there is more chance that it can be combined with
3813 a later addressing insn to form a pre-indexed load or store
3814 operation. Consider:
3816 *((volatile int *)0xe0000100) = 1;
3817 *((volatile int *)0xe0000110) = 2;
3819 We want this to wind up as:
3821 mov rA, #0xe0000000
3822 mov rB, #1
3823 str rB, [rA, #0x100]
3824 mov rB, #2
3825 str rB, [rA, #0x110]
3827 rather than having to synthesize both large constants from scratch.
3829 Therefore, we calculate how many insns would be required to emit
3830 the constant starting from `best_start', and also starting from
3831 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3832 yield a shorter sequence, we may as well use zero. */
3833 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3834 if (best_start != 0
3835 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3837 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3838 if (insns2 <= insns1)
3840 *return_sequence = tmp_sequence;
3841 insns1 = insns2;
3845 return insns1;
3848 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3849 static int
3850 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3851 struct four_ints *return_sequence, int i)
3853 int remainder = val & 0xffffffff;
3854 int insns = 0;
3856 /* Try and find a way of doing the job in either two or three
3857 instructions.
3859 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3860 location. We start at position I. This may be the MSB, or
3861 optimial_immediate_sequence may have positioned it at the largest block
3862 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3863 wrapping around to the top of the word when we drop off the bottom.
3864 In the worst case this code should produce no more than four insns.
3866 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3867 constants, shifted to any arbitrary location. We should always start
3868 at the MSB. */
3871 int end;
3872 unsigned int b1, b2, b3, b4;
3873 unsigned HOST_WIDE_INT result;
3874 int loc;
3876 gcc_assert (insns < 4);
3878 if (i <= 0)
3879 i += 32;
3881 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3882 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3884 loc = i;
3885 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3886 /* We can use addw/subw for the last 12 bits. */
3887 result = remainder;
3888 else
3890 /* Use an 8-bit shifted/rotated immediate. */
3891 end = i - 8;
3892 if (end < 0)
3893 end += 32;
3894 result = remainder & ((0x0ff << end)
3895 | ((i < end) ? (0xff >> (32 - end))
3896 : 0));
3897 i -= 8;
3900 else
3902 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3903 arbitrary shifts. */
3904 i -= TARGET_ARM ? 2 : 1;
3905 continue;
3908 /* Next, see if we can do a better job with a thumb2 replicated
3909 constant.
3911 We do it this way around to catch the cases like 0x01F001E0 where
3912 two 8-bit immediates would work, but a replicated constant would
3913 make it worse.
3915 TODO: 16-bit constants that don't clear all the bits, but still win.
3916 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3917 if (TARGET_THUMB2)
3919 b1 = (remainder & 0xff000000) >> 24;
3920 b2 = (remainder & 0x00ff0000) >> 16;
3921 b3 = (remainder & 0x0000ff00) >> 8;
3922 b4 = remainder & 0xff;
3924 if (loc > 24)
3926 /* The 8-bit immediate already found clears b1 (and maybe b2),
3927 but must leave b3 and b4 alone. */
3929 /* First try to find a 32-bit replicated constant that clears
3930 almost everything. We can assume that we can't do it in one,
3931 or else we wouldn't be here. */
3932 unsigned int tmp = b1 & b2 & b3 & b4;
3933 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3934 + (tmp << 24);
3935 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3936 + (tmp == b3) + (tmp == b4);
3937 if (tmp
3938 && (matching_bytes >= 3
3939 || (matching_bytes == 2
3940 && const_ok_for_op (remainder & ~tmp2, code))))
3942 /* At least 3 of the bytes match, and the fourth has at
3943 least as many bits set, or two of the bytes match
3944 and it will only require one more insn to finish. */
3945 result = tmp2;
3946 i = tmp != b1 ? 32
3947 : tmp != b2 ? 24
3948 : tmp != b3 ? 16
3949 : 8;
3952 /* Second, try to find a 16-bit replicated constant that can
3953 leave three of the bytes clear. If b2 or b4 is already
3954 zero, then we can. If the 8-bit from above would not
3955 clear b2 anyway, then we still win. */
3956 else if (b1 == b3 && (!b2 || !b4
3957 || (remainder & 0x00ff0000 & ~result)))
3959 result = remainder & 0xff00ff00;
3960 i = 24;
3963 else if (loc > 16)
3965 /* The 8-bit immediate already found clears b2 (and maybe b3)
3966 and we don't get here unless b1 is alredy clear, but it will
3967 leave b4 unchanged. */
3969 /* If we can clear b2 and b4 at once, then we win, since the
3970 8-bits couldn't possibly reach that far. */
3971 if (b2 == b4)
3973 result = remainder & 0x00ff00ff;
3974 i = 16;
3979 return_sequence->i[insns++] = result;
3980 remainder &= ~result;
3982 if (code == SET || code == MINUS)
3983 code = PLUS;
3985 while (remainder);
3987 return insns;
3990 /* Emit an instruction with the indicated PATTERN. If COND is
3991 non-NULL, conditionalize the execution of the instruction on COND
3992 being true. */
3994 static void
3995 emit_constant_insn (rtx cond, rtx pattern)
3997 if (cond)
3998 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3999 emit_insn (pattern);
4002 /* As above, but extra parameter GENERATE which, if clear, suppresses
4003 RTL generation. */
4005 static int
4006 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4007 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4008 int generate)
4010 int can_invert = 0;
4011 int can_negate = 0;
4012 int final_invert = 0;
4013 int i;
4014 int set_sign_bit_copies = 0;
4015 int clear_sign_bit_copies = 0;
4016 int clear_zero_bit_copies = 0;
4017 int set_zero_bit_copies = 0;
4018 int insns = 0, neg_insns, inv_insns;
4019 unsigned HOST_WIDE_INT temp1, temp2;
4020 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4021 struct four_ints *immediates;
4022 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4024 /* Find out which operations are safe for a given CODE. Also do a quick
4025 check for degenerate cases; these can occur when DImode operations
4026 are split. */
4027 switch (code)
4029 case SET:
4030 can_invert = 1;
4031 break;
4033 case PLUS:
4034 can_negate = 1;
4035 break;
4037 case IOR:
4038 if (remainder == 0xffffffff)
4040 if (generate)
4041 emit_constant_insn (cond,
4042 gen_rtx_SET (VOIDmode, target,
4043 GEN_INT (ARM_SIGN_EXTEND (val))));
4044 return 1;
4047 if (remainder == 0)
4049 if (reload_completed && rtx_equal_p (target, source))
4050 return 0;
4052 if (generate)
4053 emit_constant_insn (cond,
4054 gen_rtx_SET (VOIDmode, target, source));
4055 return 1;
4057 break;
4059 case AND:
4060 if (remainder == 0)
4062 if (generate)
4063 emit_constant_insn (cond,
4064 gen_rtx_SET (VOIDmode, target, const0_rtx));
4065 return 1;
4067 if (remainder == 0xffffffff)
4069 if (reload_completed && rtx_equal_p (target, source))
4070 return 0;
4071 if (generate)
4072 emit_constant_insn (cond,
4073 gen_rtx_SET (VOIDmode, target, source));
4074 return 1;
4076 can_invert = 1;
4077 break;
4079 case XOR:
4080 if (remainder == 0)
4082 if (reload_completed && rtx_equal_p (target, source))
4083 return 0;
4084 if (generate)
4085 emit_constant_insn (cond,
4086 gen_rtx_SET (VOIDmode, target, source));
4087 return 1;
4090 if (remainder == 0xffffffff)
4092 if (generate)
4093 emit_constant_insn (cond,
4094 gen_rtx_SET (VOIDmode, target,
4095 gen_rtx_NOT (mode, source)));
4096 return 1;
4098 final_invert = 1;
4099 break;
4101 case MINUS:
4102 /* We treat MINUS as (val - source), since (source - val) is always
4103 passed as (source + (-val)). */
4104 if (remainder == 0)
4106 if (generate)
4107 emit_constant_insn (cond,
4108 gen_rtx_SET (VOIDmode, target,
4109 gen_rtx_NEG (mode, source)));
4110 return 1;
4112 if (const_ok_for_arm (val))
4114 if (generate)
4115 emit_constant_insn (cond,
4116 gen_rtx_SET (VOIDmode, target,
4117 gen_rtx_MINUS (mode, GEN_INT (val),
4118 source)));
4119 return 1;
4122 break;
4124 default:
4125 gcc_unreachable ();
4128 /* If we can do it in one insn get out quickly. */
4129 if (const_ok_for_op (val, code))
4131 if (generate)
4132 emit_constant_insn (cond,
4133 gen_rtx_SET (VOIDmode, target,
4134 (source
4135 ? gen_rtx_fmt_ee (code, mode, source,
4136 GEN_INT (val))
4137 : GEN_INT (val))));
4138 return 1;
4141 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4142 insn. */
4143 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4144 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4146 if (generate)
4148 if (mode == SImode && i == 16)
4149 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4150 smaller insn. */
4151 emit_constant_insn (cond,
4152 gen_zero_extendhisi2
4153 (target, gen_lowpart (HImode, source)));
4154 else
4155 /* Extz only supports SImode, but we can coerce the operands
4156 into that mode. */
4157 emit_constant_insn (cond,
4158 gen_extzv_t2 (gen_lowpart (SImode, target),
4159 gen_lowpart (SImode, source),
4160 GEN_INT (i), const0_rtx));
4163 return 1;
4166 /* Calculate a few attributes that may be useful for specific
4167 optimizations. */
4168 /* Count number of leading zeros. */
4169 for (i = 31; i >= 0; i--)
4171 if ((remainder & (1 << i)) == 0)
4172 clear_sign_bit_copies++;
4173 else
4174 break;
4177 /* Count number of leading 1's. */
4178 for (i = 31; i >= 0; i--)
4180 if ((remainder & (1 << i)) != 0)
4181 set_sign_bit_copies++;
4182 else
4183 break;
4186 /* Count number of trailing zero's. */
4187 for (i = 0; i <= 31; i++)
4189 if ((remainder & (1 << i)) == 0)
4190 clear_zero_bit_copies++;
4191 else
4192 break;
4195 /* Count number of trailing 1's. */
4196 for (i = 0; i <= 31; i++)
4198 if ((remainder & (1 << i)) != 0)
4199 set_zero_bit_copies++;
4200 else
4201 break;
4204 switch (code)
4206 case SET:
4207 /* See if we can do this by sign_extending a constant that is known
4208 to be negative. This is a good, way of doing it, since the shift
4209 may well merge into a subsequent insn. */
4210 if (set_sign_bit_copies > 1)
4212 if (const_ok_for_arm
4213 (temp1 = ARM_SIGN_EXTEND (remainder
4214 << (set_sign_bit_copies - 1))))
4216 if (generate)
4218 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4219 emit_constant_insn (cond,
4220 gen_rtx_SET (VOIDmode, new_src,
4221 GEN_INT (temp1)));
4222 emit_constant_insn (cond,
4223 gen_ashrsi3 (target, new_src,
4224 GEN_INT (set_sign_bit_copies - 1)));
4226 return 2;
4228 /* For an inverted constant, we will need to set the low bits,
4229 these will be shifted out of harm's way. */
4230 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4231 if (const_ok_for_arm (~temp1))
4233 if (generate)
4235 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4236 emit_constant_insn (cond,
4237 gen_rtx_SET (VOIDmode, new_src,
4238 GEN_INT (temp1)));
4239 emit_constant_insn (cond,
4240 gen_ashrsi3 (target, new_src,
4241 GEN_INT (set_sign_bit_copies - 1)));
4243 return 2;
4247 /* See if we can calculate the value as the difference between two
4248 valid immediates. */
4249 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4251 int topshift = clear_sign_bit_copies & ~1;
4253 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4254 & (0xff000000 >> topshift));
4256 /* If temp1 is zero, then that means the 9 most significant
4257 bits of remainder were 1 and we've caused it to overflow.
4258 When topshift is 0 we don't need to do anything since we
4259 can borrow from 'bit 32'. */
4260 if (temp1 == 0 && topshift != 0)
4261 temp1 = 0x80000000 >> (topshift - 1);
4263 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4265 if (const_ok_for_arm (temp2))
4267 if (generate)
4269 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4270 emit_constant_insn (cond,
4271 gen_rtx_SET (VOIDmode, new_src,
4272 GEN_INT (temp1)));
4273 emit_constant_insn (cond,
4274 gen_addsi3 (target, new_src,
4275 GEN_INT (-temp2)));
4278 return 2;
4282 /* See if we can generate this by setting the bottom (or the top)
4283 16 bits, and then shifting these into the other half of the
4284 word. We only look for the simplest cases, to do more would cost
4285 too much. Be careful, however, not to generate this when the
4286 alternative would take fewer insns. */
4287 if (val & 0xffff0000)
4289 temp1 = remainder & 0xffff0000;
4290 temp2 = remainder & 0x0000ffff;
4292 /* Overlaps outside this range are best done using other methods. */
4293 for (i = 9; i < 24; i++)
4295 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4296 && !const_ok_for_arm (temp2))
4298 rtx new_src = (subtargets
4299 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4300 : target);
4301 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4302 source, subtargets, generate);
4303 source = new_src;
4304 if (generate)
4305 emit_constant_insn
4306 (cond,
4307 gen_rtx_SET
4308 (VOIDmode, target,
4309 gen_rtx_IOR (mode,
4310 gen_rtx_ASHIFT (mode, source,
4311 GEN_INT (i)),
4312 source)));
4313 return insns + 1;
4317 /* Don't duplicate cases already considered. */
4318 for (i = 17; i < 24; i++)
4320 if (((temp1 | (temp1 >> i)) == remainder)
4321 && !const_ok_for_arm (temp1))
4323 rtx new_src = (subtargets
4324 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4325 : target);
4326 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4327 source, subtargets, generate);
4328 source = new_src;
4329 if (generate)
4330 emit_constant_insn
4331 (cond,
4332 gen_rtx_SET (VOIDmode, target,
4333 gen_rtx_IOR
4334 (mode,
4335 gen_rtx_LSHIFTRT (mode, source,
4336 GEN_INT (i)),
4337 source)));
4338 return insns + 1;
4342 break;
4344 case IOR:
4345 case XOR:
4346 /* If we have IOR or XOR, and the constant can be loaded in a
4347 single instruction, and we can find a temporary to put it in,
4348 then this can be done in two instructions instead of 3-4. */
4349 if (subtargets
4350 /* TARGET can't be NULL if SUBTARGETS is 0 */
4351 || (reload_completed && !reg_mentioned_p (target, source)))
4353 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4355 if (generate)
4357 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4359 emit_constant_insn (cond,
4360 gen_rtx_SET (VOIDmode, sub,
4361 GEN_INT (val)));
4362 emit_constant_insn (cond,
4363 gen_rtx_SET (VOIDmode, target,
4364 gen_rtx_fmt_ee (code, mode,
4365 source, sub)));
4367 return 2;
4371 if (code == XOR)
4372 break;
4374 /* Convert.
4375 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4376 and the remainder 0s for e.g. 0xfff00000)
4377 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4379 This can be done in 2 instructions by using shifts with mov or mvn.
4380 e.g. for
4381 x = x | 0xfff00000;
4382 we generate.
4383 mvn r0, r0, asl #12
4384 mvn r0, r0, lsr #12 */
4385 if (set_sign_bit_copies > 8
4386 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4388 if (generate)
4390 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4391 rtx shift = GEN_INT (set_sign_bit_copies);
4393 emit_constant_insn
4394 (cond,
4395 gen_rtx_SET (VOIDmode, sub,
4396 gen_rtx_NOT (mode,
4397 gen_rtx_ASHIFT (mode,
4398 source,
4399 shift))));
4400 emit_constant_insn
4401 (cond,
4402 gen_rtx_SET (VOIDmode, target,
4403 gen_rtx_NOT (mode,
4404 gen_rtx_LSHIFTRT (mode, sub,
4405 shift))));
4407 return 2;
4410 /* Convert
4411 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4413 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4415 For eg. r0 = r0 | 0xfff
4416 mvn r0, r0, lsr #12
4417 mvn r0, r0, asl #12
4420 if (set_zero_bit_copies > 8
4421 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4423 if (generate)
4425 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4426 rtx shift = GEN_INT (set_zero_bit_copies);
4428 emit_constant_insn
4429 (cond,
4430 gen_rtx_SET (VOIDmode, sub,
4431 gen_rtx_NOT (mode,
4432 gen_rtx_LSHIFTRT (mode,
4433 source,
4434 shift))));
4435 emit_constant_insn
4436 (cond,
4437 gen_rtx_SET (VOIDmode, target,
4438 gen_rtx_NOT (mode,
4439 gen_rtx_ASHIFT (mode, sub,
4440 shift))));
4442 return 2;
4445 /* This will never be reached for Thumb2 because orn is a valid
4446 instruction. This is for Thumb1 and the ARM 32 bit cases.
4448 x = y | constant (such that ~constant is a valid constant)
4449 Transform this to
4450 x = ~(~y & ~constant).
4452 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4454 if (generate)
4456 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4457 emit_constant_insn (cond,
4458 gen_rtx_SET (VOIDmode, sub,
4459 gen_rtx_NOT (mode, source)));
4460 source = sub;
4461 if (subtargets)
4462 sub = gen_reg_rtx (mode);
4463 emit_constant_insn (cond,
4464 gen_rtx_SET (VOIDmode, sub,
4465 gen_rtx_AND (mode, source,
4466 GEN_INT (temp1))));
4467 emit_constant_insn (cond,
4468 gen_rtx_SET (VOIDmode, target,
4469 gen_rtx_NOT (mode, sub)));
4471 return 3;
4473 break;
4475 case AND:
4476 /* See if two shifts will do 2 or more insn's worth of work. */
4477 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4479 HOST_WIDE_INT shift_mask = ((0xffffffff
4480 << (32 - clear_sign_bit_copies))
4481 & 0xffffffff);
4483 if ((remainder | shift_mask) != 0xffffffff)
4485 if (generate)
4487 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4488 insns = arm_gen_constant (AND, mode, cond,
4489 remainder | shift_mask,
4490 new_src, source, subtargets, 1);
4491 source = new_src;
4493 else
4495 rtx targ = subtargets ? NULL_RTX : target;
4496 insns = arm_gen_constant (AND, mode, cond,
4497 remainder | shift_mask,
4498 targ, source, subtargets, 0);
4502 if (generate)
4504 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4505 rtx shift = GEN_INT (clear_sign_bit_copies);
4507 emit_insn (gen_ashlsi3 (new_src, source, shift));
4508 emit_insn (gen_lshrsi3 (target, new_src, shift));
4511 return insns + 2;
4514 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4516 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4518 if ((remainder | shift_mask) != 0xffffffff)
4520 if (generate)
4522 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4524 insns = arm_gen_constant (AND, mode, cond,
4525 remainder | shift_mask,
4526 new_src, source, subtargets, 1);
4527 source = new_src;
4529 else
4531 rtx targ = subtargets ? NULL_RTX : target;
4533 insns = arm_gen_constant (AND, mode, cond,
4534 remainder | shift_mask,
4535 targ, source, subtargets, 0);
4539 if (generate)
4541 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4542 rtx shift = GEN_INT (clear_zero_bit_copies);
4544 emit_insn (gen_lshrsi3 (new_src, source, shift));
4545 emit_insn (gen_ashlsi3 (target, new_src, shift));
4548 return insns + 2;
4551 break;
4553 default:
4554 break;
4557 /* Calculate what the instruction sequences would be if we generated it
4558 normally, negated, or inverted. */
4559 if (code == AND)
4560 /* AND cannot be split into multiple insns, so invert and use BIC. */
4561 insns = 99;
4562 else
4563 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4565 if (can_negate)
4566 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4567 &neg_immediates);
4568 else
4569 neg_insns = 99;
4571 if (can_invert || final_invert)
4572 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4573 &inv_immediates);
4574 else
4575 inv_insns = 99;
4577 immediates = &pos_immediates;
4579 /* Is the negated immediate sequence more efficient? */
4580 if (neg_insns < insns && neg_insns <= inv_insns)
4582 insns = neg_insns;
4583 immediates = &neg_immediates;
4585 else
4586 can_negate = 0;
4588 /* Is the inverted immediate sequence more efficient?
4589 We must allow for an extra NOT instruction for XOR operations, although
4590 there is some chance that the final 'mvn' will get optimized later. */
4591 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4593 insns = inv_insns;
4594 immediates = &inv_immediates;
4596 else
4598 can_invert = 0;
4599 final_invert = 0;
4602 /* Now output the chosen sequence as instructions. */
4603 if (generate)
4605 for (i = 0; i < insns; i++)
4607 rtx new_src, temp1_rtx;
4609 temp1 = immediates->i[i];
4611 if (code == SET || code == MINUS)
4612 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4613 else if ((final_invert || i < (insns - 1)) && subtargets)
4614 new_src = gen_reg_rtx (mode);
4615 else
4616 new_src = target;
4618 if (can_invert)
4619 temp1 = ~temp1;
4620 else if (can_negate)
4621 temp1 = -temp1;
4623 temp1 = trunc_int_for_mode (temp1, mode);
4624 temp1_rtx = GEN_INT (temp1);
4626 if (code == SET)
4628 else if (code == MINUS)
4629 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4630 else
4631 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4633 emit_constant_insn (cond,
4634 gen_rtx_SET (VOIDmode, new_src,
4635 temp1_rtx));
4636 source = new_src;
4638 if (code == SET)
4640 can_negate = can_invert;
4641 can_invert = 0;
4642 code = PLUS;
4644 else if (code == MINUS)
4645 code = PLUS;
4649 if (final_invert)
4651 if (generate)
4652 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4653 gen_rtx_NOT (mode, source)));
4654 insns++;
4657 return insns;
4660 /* Canonicalize a comparison so that we are more likely to recognize it.
4661 This can be done for a few constant compares, where we can make the
4662 immediate value easier to load. */
4664 static void
4665 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4666 bool op0_preserve_value)
4668 machine_mode mode;
4669 unsigned HOST_WIDE_INT i, maxval;
4671 mode = GET_MODE (*op0);
4672 if (mode == VOIDmode)
4673 mode = GET_MODE (*op1);
4675 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4677 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4678 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4679 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4680 for GTU/LEU in Thumb mode. */
4681 if (mode == DImode)
4683 rtx tem;
4685 if (*code == GT || *code == LE
4686 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4688 /* Missing comparison. First try to use an available
4689 comparison. */
4690 if (CONST_INT_P (*op1))
4692 i = INTVAL (*op1);
4693 switch (*code)
4695 case GT:
4696 case LE:
4697 if (i != maxval
4698 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4700 *op1 = GEN_INT (i + 1);
4701 *code = *code == GT ? GE : LT;
4702 return;
4704 break;
4705 case GTU:
4706 case LEU:
4707 if (i != ~((unsigned HOST_WIDE_INT) 0)
4708 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4710 *op1 = GEN_INT (i + 1);
4711 *code = *code == GTU ? GEU : LTU;
4712 return;
4714 break;
4715 default:
4716 gcc_unreachable ();
4720 /* If that did not work, reverse the condition. */
4721 if (!op0_preserve_value)
4723 tem = *op0;
4724 *op0 = *op1;
4725 *op1 = tem;
4726 *code = (int)swap_condition ((enum rtx_code)*code);
4729 return;
4732 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4733 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4734 to facilitate possible combining with a cmp into 'ands'. */
4735 if (mode == SImode
4736 && GET_CODE (*op0) == ZERO_EXTEND
4737 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4738 && GET_MODE (XEXP (*op0, 0)) == QImode
4739 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4740 && subreg_lowpart_p (XEXP (*op0, 0))
4741 && *op1 == const0_rtx)
4742 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4743 GEN_INT (255));
4745 /* Comparisons smaller than DImode. Only adjust comparisons against
4746 an out-of-range constant. */
4747 if (!CONST_INT_P (*op1)
4748 || const_ok_for_arm (INTVAL (*op1))
4749 || const_ok_for_arm (- INTVAL (*op1)))
4750 return;
4752 i = INTVAL (*op1);
4754 switch (*code)
4756 case EQ:
4757 case NE:
4758 return;
4760 case GT:
4761 case LE:
4762 if (i != maxval
4763 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4765 *op1 = GEN_INT (i + 1);
4766 *code = *code == GT ? GE : LT;
4767 return;
4769 break;
4771 case GE:
4772 case LT:
4773 if (i != ~maxval
4774 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4776 *op1 = GEN_INT (i - 1);
4777 *code = *code == GE ? GT : LE;
4778 return;
4780 break;
4782 case GTU:
4783 case LEU:
4784 if (i != ~((unsigned HOST_WIDE_INT) 0)
4785 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4787 *op1 = GEN_INT (i + 1);
4788 *code = *code == GTU ? GEU : LTU;
4789 return;
4791 break;
4793 case GEU:
4794 case LTU:
4795 if (i != 0
4796 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4798 *op1 = GEN_INT (i - 1);
4799 *code = *code == GEU ? GTU : LEU;
4800 return;
4802 break;
4804 default:
4805 gcc_unreachable ();
4810 /* Define how to find the value returned by a function. */
4812 static rtx
4813 arm_function_value(const_tree type, const_tree func,
4814 bool outgoing ATTRIBUTE_UNUSED)
4816 machine_mode mode;
4817 int unsignedp ATTRIBUTE_UNUSED;
4818 rtx r ATTRIBUTE_UNUSED;
4820 mode = TYPE_MODE (type);
4822 if (TARGET_AAPCS_BASED)
4823 return aapcs_allocate_return_reg (mode, type, func);
4825 /* Promote integer types. */
4826 if (INTEGRAL_TYPE_P (type))
4827 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4829 /* Promotes small structs returned in a register to full-word size
4830 for big-endian AAPCS. */
4831 if (arm_return_in_msb (type))
4833 HOST_WIDE_INT size = int_size_in_bytes (type);
4834 if (size % UNITS_PER_WORD != 0)
4836 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4837 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4841 return arm_libcall_value_1 (mode);
4844 /* libcall hashtable helpers. */
4846 struct libcall_hasher : typed_noop_remove <rtx_def>
4848 typedef rtx_def value_type;
4849 typedef rtx_def compare_type;
4850 static inline hashval_t hash (const value_type *);
4851 static inline bool equal (const value_type *, const compare_type *);
4852 static inline void remove (value_type *);
4855 inline bool
4856 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4858 return rtx_equal_p (p1, p2);
4861 inline hashval_t
4862 libcall_hasher::hash (const value_type *p1)
4864 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4867 typedef hash_table<libcall_hasher> libcall_table_type;
4869 static void
4870 add_libcall (libcall_table_type *htab, rtx libcall)
4872 *htab->find_slot (libcall, INSERT) = libcall;
4875 static bool
4876 arm_libcall_uses_aapcs_base (const_rtx libcall)
4878 static bool init_done = false;
4879 static libcall_table_type *libcall_htab = NULL;
4881 if (!init_done)
4883 init_done = true;
4885 libcall_htab = new libcall_table_type (31);
4886 add_libcall (libcall_htab,
4887 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4888 add_libcall (libcall_htab,
4889 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4890 add_libcall (libcall_htab,
4891 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4892 add_libcall (libcall_htab,
4893 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4895 add_libcall (libcall_htab,
4896 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4897 add_libcall (libcall_htab,
4898 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4899 add_libcall (libcall_htab,
4900 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4901 add_libcall (libcall_htab,
4902 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4904 add_libcall (libcall_htab,
4905 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4906 add_libcall (libcall_htab,
4907 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4908 add_libcall (libcall_htab,
4909 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4910 add_libcall (libcall_htab,
4911 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4912 add_libcall (libcall_htab,
4913 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4914 add_libcall (libcall_htab,
4915 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4916 add_libcall (libcall_htab,
4917 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4918 add_libcall (libcall_htab,
4919 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4921 /* Values from double-precision helper functions are returned in core
4922 registers if the selected core only supports single-precision
4923 arithmetic, even if we are using the hard-float ABI. The same is
4924 true for single-precision helpers, but we will never be using the
4925 hard-float ABI on a CPU which doesn't support single-precision
4926 operations in hardware. */
4927 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4928 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4929 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4930 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4931 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4932 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4933 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4934 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4935 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4936 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4937 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4938 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4939 SFmode));
4940 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4941 DFmode));
4944 return libcall && libcall_htab->find (libcall) != NULL;
4947 static rtx
4948 arm_libcall_value_1 (machine_mode mode)
4950 if (TARGET_AAPCS_BASED)
4951 return aapcs_libcall_value (mode);
4952 else if (TARGET_IWMMXT_ABI
4953 && arm_vector_mode_supported_p (mode))
4954 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4955 else
4956 return gen_rtx_REG (mode, ARG_REGISTER (1));
4959 /* Define how to find the value returned by a library function
4960 assuming the value has mode MODE. */
4962 static rtx
4963 arm_libcall_value (machine_mode mode, const_rtx libcall)
4965 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4966 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4968 /* The following libcalls return their result in integer registers,
4969 even though they return a floating point value. */
4970 if (arm_libcall_uses_aapcs_base (libcall))
4971 return gen_rtx_REG (mode, ARG_REGISTER(1));
4975 return arm_libcall_value_1 (mode);
4978 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4980 static bool
4981 arm_function_value_regno_p (const unsigned int regno)
4983 if (regno == ARG_REGISTER (1)
4984 || (TARGET_32BIT
4985 && TARGET_AAPCS_BASED
4986 && TARGET_VFP
4987 && TARGET_HARD_FLOAT
4988 && regno == FIRST_VFP_REGNUM)
4989 || (TARGET_IWMMXT_ABI
4990 && regno == FIRST_IWMMXT_REGNUM))
4991 return true;
4993 return false;
4996 /* Determine the amount of memory needed to store the possible return
4997 registers of an untyped call. */
4999 arm_apply_result_size (void)
5001 int size = 16;
5003 if (TARGET_32BIT)
5005 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5006 size += 32;
5007 if (TARGET_IWMMXT_ABI)
5008 size += 8;
5011 return size;
5014 /* Decide whether TYPE should be returned in memory (true)
5015 or in a register (false). FNTYPE is the type of the function making
5016 the call. */
5017 static bool
5018 arm_return_in_memory (const_tree type, const_tree fntype)
5020 HOST_WIDE_INT size;
5022 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5024 if (TARGET_AAPCS_BASED)
5026 /* Simple, non-aggregate types (ie not including vectors and
5027 complex) are always returned in a register (or registers).
5028 We don't care about which register here, so we can short-cut
5029 some of the detail. */
5030 if (!AGGREGATE_TYPE_P (type)
5031 && TREE_CODE (type) != VECTOR_TYPE
5032 && TREE_CODE (type) != COMPLEX_TYPE)
5033 return false;
5035 /* Any return value that is no larger than one word can be
5036 returned in r0. */
5037 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5038 return false;
5040 /* Check any available co-processors to see if they accept the
5041 type as a register candidate (VFP, for example, can return
5042 some aggregates in consecutive registers). These aren't
5043 available if the call is variadic. */
5044 if (aapcs_select_return_coproc (type, fntype) >= 0)
5045 return false;
5047 /* Vector values should be returned using ARM registers, not
5048 memory (unless they're over 16 bytes, which will break since
5049 we only have four call-clobbered registers to play with). */
5050 if (TREE_CODE (type) == VECTOR_TYPE)
5051 return (size < 0 || size > (4 * UNITS_PER_WORD));
5053 /* The rest go in memory. */
5054 return true;
5057 if (TREE_CODE (type) == VECTOR_TYPE)
5058 return (size < 0 || size > (4 * UNITS_PER_WORD));
5060 if (!AGGREGATE_TYPE_P (type) &&
5061 (TREE_CODE (type) != VECTOR_TYPE))
5062 /* All simple types are returned in registers. */
5063 return false;
5065 if (arm_abi != ARM_ABI_APCS)
5067 /* ATPCS and later return aggregate types in memory only if they are
5068 larger than a word (or are variable size). */
5069 return (size < 0 || size > UNITS_PER_WORD);
5072 /* For the arm-wince targets we choose to be compatible with Microsoft's
5073 ARM and Thumb compilers, which always return aggregates in memory. */
5074 #ifndef ARM_WINCE
5075 /* All structures/unions bigger than one word are returned in memory.
5076 Also catch the case where int_size_in_bytes returns -1. In this case
5077 the aggregate is either huge or of variable size, and in either case
5078 we will want to return it via memory and not in a register. */
5079 if (size < 0 || size > UNITS_PER_WORD)
5080 return true;
5082 if (TREE_CODE (type) == RECORD_TYPE)
5084 tree field;
5086 /* For a struct the APCS says that we only return in a register
5087 if the type is 'integer like' and every addressable element
5088 has an offset of zero. For practical purposes this means
5089 that the structure can have at most one non bit-field element
5090 and that this element must be the first one in the structure. */
5092 /* Find the first field, ignoring non FIELD_DECL things which will
5093 have been created by C++. */
5094 for (field = TYPE_FIELDS (type);
5095 field && TREE_CODE (field) != FIELD_DECL;
5096 field = DECL_CHAIN (field))
5097 continue;
5099 if (field == NULL)
5100 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5102 /* Check that the first field is valid for returning in a register. */
5104 /* ... Floats are not allowed */
5105 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5106 return true;
5108 /* ... Aggregates that are not themselves valid for returning in
5109 a register are not allowed. */
5110 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5111 return true;
5113 /* Now check the remaining fields, if any. Only bitfields are allowed,
5114 since they are not addressable. */
5115 for (field = DECL_CHAIN (field);
5116 field;
5117 field = DECL_CHAIN (field))
5119 if (TREE_CODE (field) != FIELD_DECL)
5120 continue;
5122 if (!DECL_BIT_FIELD_TYPE (field))
5123 return true;
5126 return false;
5129 if (TREE_CODE (type) == UNION_TYPE)
5131 tree field;
5133 /* Unions can be returned in registers if every element is
5134 integral, or can be returned in an integer register. */
5135 for (field = TYPE_FIELDS (type);
5136 field;
5137 field = DECL_CHAIN (field))
5139 if (TREE_CODE (field) != FIELD_DECL)
5140 continue;
5142 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5143 return true;
5145 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5146 return true;
5149 return false;
5151 #endif /* not ARM_WINCE */
5153 /* Return all other types in memory. */
5154 return true;
5157 const struct pcs_attribute_arg
5159 const char *arg;
5160 enum arm_pcs value;
5161 } pcs_attribute_args[] =
5163 {"aapcs", ARM_PCS_AAPCS},
5164 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5165 #if 0
5166 /* We could recognize these, but changes would be needed elsewhere
5167 * to implement them. */
5168 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5169 {"atpcs", ARM_PCS_ATPCS},
5170 {"apcs", ARM_PCS_APCS},
5171 #endif
5172 {NULL, ARM_PCS_UNKNOWN}
5175 static enum arm_pcs
5176 arm_pcs_from_attribute (tree attr)
5178 const struct pcs_attribute_arg *ptr;
5179 const char *arg;
5181 /* Get the value of the argument. */
5182 if (TREE_VALUE (attr) == NULL_TREE
5183 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5184 return ARM_PCS_UNKNOWN;
5186 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5188 /* Check it against the list of known arguments. */
5189 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5190 if (streq (arg, ptr->arg))
5191 return ptr->value;
5193 /* An unrecognized interrupt type. */
5194 return ARM_PCS_UNKNOWN;
5197 /* Get the PCS variant to use for this call. TYPE is the function's type
5198 specification, DECL is the specific declartion. DECL may be null if
5199 the call could be indirect or if this is a library call. */
5200 static enum arm_pcs
5201 arm_get_pcs_model (const_tree type, const_tree decl)
5203 bool user_convention = false;
5204 enum arm_pcs user_pcs = arm_pcs_default;
5205 tree attr;
5207 gcc_assert (type);
5209 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5210 if (attr)
5212 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5213 user_convention = true;
5216 if (TARGET_AAPCS_BASED)
5218 /* Detect varargs functions. These always use the base rules
5219 (no argument is ever a candidate for a co-processor
5220 register). */
5221 bool base_rules = stdarg_p (type);
5223 if (user_convention)
5225 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5226 sorry ("non-AAPCS derived PCS variant");
5227 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5228 error ("variadic functions must use the base AAPCS variant");
5231 if (base_rules)
5232 return ARM_PCS_AAPCS;
5233 else if (user_convention)
5234 return user_pcs;
5235 else if (decl && flag_unit_at_a_time)
5237 /* Local functions never leak outside this compilation unit,
5238 so we are free to use whatever conventions are
5239 appropriate. */
5240 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5241 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5242 if (i && i->local)
5243 return ARM_PCS_AAPCS_LOCAL;
5246 else if (user_convention && user_pcs != arm_pcs_default)
5247 sorry ("PCS variant");
5249 /* For everything else we use the target's default. */
5250 return arm_pcs_default;
5254 static void
5255 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5256 const_tree fntype ATTRIBUTE_UNUSED,
5257 rtx libcall ATTRIBUTE_UNUSED,
5258 const_tree fndecl ATTRIBUTE_UNUSED)
5260 /* Record the unallocated VFP registers. */
5261 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5262 pcum->aapcs_vfp_reg_alloc = 0;
5265 /* Walk down the type tree of TYPE counting consecutive base elements.
5266 If *MODEP is VOIDmode, then set it to the first valid floating point
5267 type. If a non-floating point type is found, or if a floating point
5268 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5269 otherwise return the count in the sub-tree. */
5270 static int
5271 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5273 machine_mode mode;
5274 HOST_WIDE_INT size;
5276 switch (TREE_CODE (type))
5278 case REAL_TYPE:
5279 mode = TYPE_MODE (type);
5280 if (mode != DFmode && mode != SFmode)
5281 return -1;
5283 if (*modep == VOIDmode)
5284 *modep = mode;
5286 if (*modep == mode)
5287 return 1;
5289 break;
5291 case COMPLEX_TYPE:
5292 mode = TYPE_MODE (TREE_TYPE (type));
5293 if (mode != DFmode && mode != SFmode)
5294 return -1;
5296 if (*modep == VOIDmode)
5297 *modep = mode;
5299 if (*modep == mode)
5300 return 2;
5302 break;
5304 case VECTOR_TYPE:
5305 /* Use V2SImode and V4SImode as representatives of all 64-bit
5306 and 128-bit vector types, whether or not those modes are
5307 supported with the present options. */
5308 size = int_size_in_bytes (type);
5309 switch (size)
5311 case 8:
5312 mode = V2SImode;
5313 break;
5314 case 16:
5315 mode = V4SImode;
5316 break;
5317 default:
5318 return -1;
5321 if (*modep == VOIDmode)
5322 *modep = mode;
5324 /* Vector modes are considered to be opaque: two vectors are
5325 equivalent for the purposes of being homogeneous aggregates
5326 if they are the same size. */
5327 if (*modep == mode)
5328 return 1;
5330 break;
5332 case ARRAY_TYPE:
5334 int count;
5335 tree index = TYPE_DOMAIN (type);
5337 /* Can't handle incomplete types nor sizes that are not
5338 fixed. */
5339 if (!COMPLETE_TYPE_P (type)
5340 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5341 return -1;
5343 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5344 if (count == -1
5345 || !index
5346 || !TYPE_MAX_VALUE (index)
5347 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5348 || !TYPE_MIN_VALUE (index)
5349 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5350 || count < 0)
5351 return -1;
5353 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5354 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5356 /* There must be no padding. */
5357 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5358 return -1;
5360 return count;
5363 case RECORD_TYPE:
5365 int count = 0;
5366 int sub_count;
5367 tree field;
5369 /* Can't handle incomplete types nor sizes that are not
5370 fixed. */
5371 if (!COMPLETE_TYPE_P (type)
5372 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5373 return -1;
5375 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5377 if (TREE_CODE (field) != FIELD_DECL)
5378 continue;
5380 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5381 if (sub_count < 0)
5382 return -1;
5383 count += sub_count;
5386 /* There must be no padding. */
5387 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5388 return -1;
5390 return count;
5393 case UNION_TYPE:
5394 case QUAL_UNION_TYPE:
5396 /* These aren't very interesting except in a degenerate case. */
5397 int count = 0;
5398 int sub_count;
5399 tree field;
5401 /* Can't handle incomplete types nor sizes that are not
5402 fixed. */
5403 if (!COMPLETE_TYPE_P (type)
5404 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5405 return -1;
5407 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5409 if (TREE_CODE (field) != FIELD_DECL)
5410 continue;
5412 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5413 if (sub_count < 0)
5414 return -1;
5415 count = count > sub_count ? count : sub_count;
5418 /* There must be no padding. */
5419 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5420 return -1;
5422 return count;
5425 default:
5426 break;
5429 return -1;
5432 /* Return true if PCS_VARIANT should use VFP registers. */
5433 static bool
5434 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5436 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5438 static bool seen_thumb1_vfp = false;
5440 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5442 sorry ("Thumb-1 hard-float VFP ABI");
5443 /* sorry() is not immediately fatal, so only display this once. */
5444 seen_thumb1_vfp = true;
5447 return true;
5450 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5451 return false;
5453 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5454 (TARGET_VFP_DOUBLE || !is_double));
5457 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5458 suitable for passing or returning in VFP registers for the PCS
5459 variant selected. If it is, then *BASE_MODE is updated to contain
5460 a machine mode describing each element of the argument's type and
5461 *COUNT to hold the number of such elements. */
5462 static bool
5463 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5464 machine_mode mode, const_tree type,
5465 machine_mode *base_mode, int *count)
5467 machine_mode new_mode = VOIDmode;
5469 /* If we have the type information, prefer that to working things
5470 out from the mode. */
5471 if (type)
5473 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5475 if (ag_count > 0 && ag_count <= 4)
5476 *count = ag_count;
5477 else
5478 return false;
5480 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5481 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5482 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5484 *count = 1;
5485 new_mode = mode;
5487 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5489 *count = 2;
5490 new_mode = (mode == DCmode ? DFmode : SFmode);
5492 else
5493 return false;
5496 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5497 return false;
5499 *base_mode = new_mode;
5500 return true;
5503 static bool
5504 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5505 machine_mode mode, const_tree type)
5507 int count ATTRIBUTE_UNUSED;
5508 machine_mode ag_mode ATTRIBUTE_UNUSED;
5510 if (!use_vfp_abi (pcs_variant, false))
5511 return false;
5512 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5513 &ag_mode, &count);
5516 static bool
5517 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5518 const_tree type)
5520 if (!use_vfp_abi (pcum->pcs_variant, false))
5521 return false;
5523 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5524 &pcum->aapcs_vfp_rmode,
5525 &pcum->aapcs_vfp_rcount);
5528 static bool
5529 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5530 const_tree type ATTRIBUTE_UNUSED)
5532 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5533 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5534 int regno;
5536 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5537 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5539 pcum->aapcs_vfp_reg_alloc = mask << regno;
5540 if (mode == BLKmode
5541 || (mode == TImode && ! TARGET_NEON)
5542 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5544 int i;
5545 int rcount = pcum->aapcs_vfp_rcount;
5546 int rshift = shift;
5547 machine_mode rmode = pcum->aapcs_vfp_rmode;
5548 rtx par;
5549 if (!TARGET_NEON)
5551 /* Avoid using unsupported vector modes. */
5552 if (rmode == V2SImode)
5553 rmode = DImode;
5554 else if (rmode == V4SImode)
5556 rmode = DImode;
5557 rcount *= 2;
5558 rshift /= 2;
5561 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5562 for (i = 0; i < rcount; i++)
5564 rtx tmp = gen_rtx_REG (rmode,
5565 FIRST_VFP_REGNUM + regno + i * rshift);
5566 tmp = gen_rtx_EXPR_LIST
5567 (VOIDmode, tmp,
5568 GEN_INT (i * GET_MODE_SIZE (rmode)));
5569 XVECEXP (par, 0, i) = tmp;
5572 pcum->aapcs_reg = par;
5574 else
5575 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5576 return true;
5578 return false;
5581 static rtx
5582 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5583 machine_mode mode,
5584 const_tree type ATTRIBUTE_UNUSED)
5586 if (!use_vfp_abi (pcs_variant, false))
5587 return NULL;
5589 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5591 int count;
5592 machine_mode ag_mode;
5593 int i;
5594 rtx par;
5595 int shift;
5597 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5598 &ag_mode, &count);
5600 if (!TARGET_NEON)
5602 if (ag_mode == V2SImode)
5603 ag_mode = DImode;
5604 else if (ag_mode == V4SImode)
5606 ag_mode = DImode;
5607 count *= 2;
5610 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5611 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5612 for (i = 0; i < count; i++)
5614 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5615 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5616 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5617 XVECEXP (par, 0, i) = tmp;
5620 return par;
5623 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5626 static void
5627 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5628 machine_mode mode ATTRIBUTE_UNUSED,
5629 const_tree type ATTRIBUTE_UNUSED)
5631 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5632 pcum->aapcs_vfp_reg_alloc = 0;
5633 return;
5636 #define AAPCS_CP(X) \
5638 aapcs_ ## X ## _cum_init, \
5639 aapcs_ ## X ## _is_call_candidate, \
5640 aapcs_ ## X ## _allocate, \
5641 aapcs_ ## X ## _is_return_candidate, \
5642 aapcs_ ## X ## _allocate_return_reg, \
5643 aapcs_ ## X ## _advance \
5646 /* Table of co-processors that can be used to pass arguments in
5647 registers. Idealy no arugment should be a candidate for more than
5648 one co-processor table entry, but the table is processed in order
5649 and stops after the first match. If that entry then fails to put
5650 the argument into a co-processor register, the argument will go on
5651 the stack. */
5652 static struct
5654 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5655 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5657 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5658 BLKmode) is a candidate for this co-processor's registers; this
5659 function should ignore any position-dependent state in
5660 CUMULATIVE_ARGS and only use call-type dependent information. */
5661 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5663 /* Return true if the argument does get a co-processor register; it
5664 should set aapcs_reg to an RTX of the register allocated as is
5665 required for a return from FUNCTION_ARG. */
5666 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5668 /* Return true if a result of mode MODE (or type TYPE if MODE is
5669 BLKmode) is can be returned in this co-processor's registers. */
5670 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5672 /* Allocate and return an RTX element to hold the return type of a
5673 call, this routine must not fail and will only be called if
5674 is_return_candidate returned true with the same parameters. */
5675 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5677 /* Finish processing this argument and prepare to start processing
5678 the next one. */
5679 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5680 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5682 AAPCS_CP(vfp)
5685 #undef AAPCS_CP
5687 static int
5688 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5689 const_tree type)
5691 int i;
5693 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5694 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5695 return i;
5697 return -1;
5700 static int
5701 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5703 /* We aren't passed a decl, so we can't check that a call is local.
5704 However, it isn't clear that that would be a win anyway, since it
5705 might limit some tail-calling opportunities. */
5706 enum arm_pcs pcs_variant;
5708 if (fntype)
5710 const_tree fndecl = NULL_TREE;
5712 if (TREE_CODE (fntype) == FUNCTION_DECL)
5714 fndecl = fntype;
5715 fntype = TREE_TYPE (fntype);
5718 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5720 else
5721 pcs_variant = arm_pcs_default;
5723 if (pcs_variant != ARM_PCS_AAPCS)
5725 int i;
5727 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5728 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5729 TYPE_MODE (type),
5730 type))
5731 return i;
5733 return -1;
5736 static rtx
5737 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5738 const_tree fntype)
5740 /* We aren't passed a decl, so we can't check that a call is local.
5741 However, it isn't clear that that would be a win anyway, since it
5742 might limit some tail-calling opportunities. */
5743 enum arm_pcs pcs_variant;
5744 int unsignedp ATTRIBUTE_UNUSED;
5746 if (fntype)
5748 const_tree fndecl = NULL_TREE;
5750 if (TREE_CODE (fntype) == FUNCTION_DECL)
5752 fndecl = fntype;
5753 fntype = TREE_TYPE (fntype);
5756 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5758 else
5759 pcs_variant = arm_pcs_default;
5761 /* Promote integer types. */
5762 if (type && INTEGRAL_TYPE_P (type))
5763 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5765 if (pcs_variant != ARM_PCS_AAPCS)
5767 int i;
5769 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5770 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5771 type))
5772 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5773 mode, type);
5776 /* Promotes small structs returned in a register to full-word size
5777 for big-endian AAPCS. */
5778 if (type && arm_return_in_msb (type))
5780 HOST_WIDE_INT size = int_size_in_bytes (type);
5781 if (size % UNITS_PER_WORD != 0)
5783 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5784 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5788 return gen_rtx_REG (mode, R0_REGNUM);
5791 static rtx
5792 aapcs_libcall_value (machine_mode mode)
5794 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5795 && GET_MODE_SIZE (mode) <= 4)
5796 mode = SImode;
5798 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5801 /* Lay out a function argument using the AAPCS rules. The rule
5802 numbers referred to here are those in the AAPCS. */
5803 static void
5804 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5805 const_tree type, bool named)
5807 int nregs, nregs2;
5808 int ncrn;
5810 /* We only need to do this once per argument. */
5811 if (pcum->aapcs_arg_processed)
5812 return;
5814 pcum->aapcs_arg_processed = true;
5816 /* Special case: if named is false then we are handling an incoming
5817 anonymous argument which is on the stack. */
5818 if (!named)
5819 return;
5821 /* Is this a potential co-processor register candidate? */
5822 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5824 int slot = aapcs_select_call_coproc (pcum, mode, type);
5825 pcum->aapcs_cprc_slot = slot;
5827 /* We don't have to apply any of the rules from part B of the
5828 preparation phase, these are handled elsewhere in the
5829 compiler. */
5831 if (slot >= 0)
5833 /* A Co-processor register candidate goes either in its own
5834 class of registers or on the stack. */
5835 if (!pcum->aapcs_cprc_failed[slot])
5837 /* C1.cp - Try to allocate the argument to co-processor
5838 registers. */
5839 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5840 return;
5842 /* C2.cp - Put the argument on the stack and note that we
5843 can't assign any more candidates in this slot. We also
5844 need to note that we have allocated stack space, so that
5845 we won't later try to split a non-cprc candidate between
5846 core registers and the stack. */
5847 pcum->aapcs_cprc_failed[slot] = true;
5848 pcum->can_split = false;
5851 /* We didn't get a register, so this argument goes on the
5852 stack. */
5853 gcc_assert (pcum->can_split == false);
5854 return;
5858 /* C3 - For double-word aligned arguments, round the NCRN up to the
5859 next even number. */
5860 ncrn = pcum->aapcs_ncrn;
5861 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5862 ncrn++;
5864 nregs = ARM_NUM_REGS2(mode, type);
5866 /* Sigh, this test should really assert that nregs > 0, but a GCC
5867 extension allows empty structs and then gives them empty size; it
5868 then allows such a structure to be passed by value. For some of
5869 the code below we have to pretend that such an argument has
5870 non-zero size so that we 'locate' it correctly either in
5871 registers or on the stack. */
5872 gcc_assert (nregs >= 0);
5874 nregs2 = nregs ? nregs : 1;
5876 /* C4 - Argument fits entirely in core registers. */
5877 if (ncrn + nregs2 <= NUM_ARG_REGS)
5879 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5880 pcum->aapcs_next_ncrn = ncrn + nregs;
5881 return;
5884 /* C5 - Some core registers left and there are no arguments already
5885 on the stack: split this argument between the remaining core
5886 registers and the stack. */
5887 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5889 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5890 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5891 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5892 return;
5895 /* C6 - NCRN is set to 4. */
5896 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5898 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5899 return;
5902 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5903 for a call to a function whose data type is FNTYPE.
5904 For a library call, FNTYPE is NULL. */
5905 void
5906 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5907 rtx libname,
5908 tree fndecl ATTRIBUTE_UNUSED)
5910 /* Long call handling. */
5911 if (fntype)
5912 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5913 else
5914 pcum->pcs_variant = arm_pcs_default;
5916 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5918 if (arm_libcall_uses_aapcs_base (libname))
5919 pcum->pcs_variant = ARM_PCS_AAPCS;
5921 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5922 pcum->aapcs_reg = NULL_RTX;
5923 pcum->aapcs_partial = 0;
5924 pcum->aapcs_arg_processed = false;
5925 pcum->aapcs_cprc_slot = -1;
5926 pcum->can_split = true;
5928 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5930 int i;
5932 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5934 pcum->aapcs_cprc_failed[i] = false;
5935 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5938 return;
5941 /* Legacy ABIs */
5943 /* On the ARM, the offset starts at 0. */
5944 pcum->nregs = 0;
5945 pcum->iwmmxt_nregs = 0;
5946 pcum->can_split = true;
5948 /* Varargs vectors are treated the same as long long.
5949 named_count avoids having to change the way arm handles 'named' */
5950 pcum->named_count = 0;
5951 pcum->nargs = 0;
5953 if (TARGET_REALLY_IWMMXT && fntype)
5955 tree fn_arg;
5957 for (fn_arg = TYPE_ARG_TYPES (fntype);
5958 fn_arg;
5959 fn_arg = TREE_CHAIN (fn_arg))
5960 pcum->named_count += 1;
5962 if (! pcum->named_count)
5963 pcum->named_count = INT_MAX;
5967 /* Return true if we use LRA instead of reload pass. */
5968 static bool
5969 arm_lra_p (void)
5971 return arm_lra_flag;
5974 /* Return true if mode/type need doubleword alignment. */
5975 static bool
5976 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5978 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5979 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5983 /* Determine where to put an argument to a function.
5984 Value is zero to push the argument on the stack,
5985 or a hard register in which to store the argument.
5987 MODE is the argument's machine mode.
5988 TYPE is the data type of the argument (as a tree).
5989 This is null for libcalls where that information may
5990 not be available.
5991 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5992 the preceding args and about the function being called.
5993 NAMED is nonzero if this argument is a named parameter
5994 (otherwise it is an extra parameter matching an ellipsis).
5996 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5997 other arguments are passed on the stack. If (NAMED == 0) (which happens
5998 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5999 defined), say it is passed in the stack (function_prologue will
6000 indeed make it pass in the stack if necessary). */
6002 static rtx
6003 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6004 const_tree type, bool named)
6006 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6007 int nregs;
6009 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6010 a call insn (op3 of a call_value insn). */
6011 if (mode == VOIDmode)
6012 return const0_rtx;
6014 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6016 aapcs_layout_arg (pcum, mode, type, named);
6017 return pcum->aapcs_reg;
6020 /* Varargs vectors are treated the same as long long.
6021 named_count avoids having to change the way arm handles 'named' */
6022 if (TARGET_IWMMXT_ABI
6023 && arm_vector_mode_supported_p (mode)
6024 && pcum->named_count > pcum->nargs + 1)
6026 if (pcum->iwmmxt_nregs <= 9)
6027 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6028 else
6030 pcum->can_split = false;
6031 return NULL_RTX;
6035 /* Put doubleword aligned quantities in even register pairs. */
6036 if (pcum->nregs & 1
6037 && ARM_DOUBLEWORD_ALIGN
6038 && arm_needs_doubleword_align (mode, type))
6039 pcum->nregs++;
6041 /* Only allow splitting an arg between regs and memory if all preceding
6042 args were allocated to regs. For args passed by reference we only count
6043 the reference pointer. */
6044 if (pcum->can_split)
6045 nregs = 1;
6046 else
6047 nregs = ARM_NUM_REGS2 (mode, type);
6049 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6050 return NULL_RTX;
6052 return gen_rtx_REG (mode, pcum->nregs);
6055 static unsigned int
6056 arm_function_arg_boundary (machine_mode mode, const_tree type)
6058 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6059 ? DOUBLEWORD_ALIGNMENT
6060 : PARM_BOUNDARY);
6063 static int
6064 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6065 tree type, bool named)
6067 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6068 int nregs = pcum->nregs;
6070 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6072 aapcs_layout_arg (pcum, mode, type, named);
6073 return pcum->aapcs_partial;
6076 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6077 return 0;
6079 if (NUM_ARG_REGS > nregs
6080 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6081 && pcum->can_split)
6082 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6084 return 0;
6087 /* Update the data in PCUM to advance over an argument
6088 of mode MODE and data type TYPE.
6089 (TYPE is null for libcalls where that information may not be available.) */
6091 static void
6092 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6093 const_tree type, bool named)
6095 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6097 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6099 aapcs_layout_arg (pcum, mode, type, named);
6101 if (pcum->aapcs_cprc_slot >= 0)
6103 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6104 type);
6105 pcum->aapcs_cprc_slot = -1;
6108 /* Generic stuff. */
6109 pcum->aapcs_arg_processed = false;
6110 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6111 pcum->aapcs_reg = NULL_RTX;
6112 pcum->aapcs_partial = 0;
6114 else
6116 pcum->nargs += 1;
6117 if (arm_vector_mode_supported_p (mode)
6118 && pcum->named_count > pcum->nargs
6119 && TARGET_IWMMXT_ABI)
6120 pcum->iwmmxt_nregs += 1;
6121 else
6122 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6126 /* Variable sized types are passed by reference. This is a GCC
6127 extension to the ARM ABI. */
6129 static bool
6130 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6131 machine_mode mode ATTRIBUTE_UNUSED,
6132 const_tree type, bool named ATTRIBUTE_UNUSED)
6134 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6137 /* Encode the current state of the #pragma [no_]long_calls. */
6138 typedef enum
6140 OFF, /* No #pragma [no_]long_calls is in effect. */
6141 LONG, /* #pragma long_calls is in effect. */
6142 SHORT /* #pragma no_long_calls is in effect. */
6143 } arm_pragma_enum;
6145 static arm_pragma_enum arm_pragma_long_calls = OFF;
6147 void
6148 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6150 arm_pragma_long_calls = LONG;
6153 void
6154 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6156 arm_pragma_long_calls = SHORT;
6159 void
6160 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6162 arm_pragma_long_calls = OFF;
6165 /* Handle an attribute requiring a FUNCTION_DECL;
6166 arguments as in struct attribute_spec.handler. */
6167 static tree
6168 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6169 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6171 if (TREE_CODE (*node) != FUNCTION_DECL)
6173 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6174 name);
6175 *no_add_attrs = true;
6178 return NULL_TREE;
6181 /* Handle an "interrupt" or "isr" attribute;
6182 arguments as in struct attribute_spec.handler. */
6183 static tree
6184 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6185 bool *no_add_attrs)
6187 if (DECL_P (*node))
6189 if (TREE_CODE (*node) != FUNCTION_DECL)
6191 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6192 name);
6193 *no_add_attrs = true;
6195 /* FIXME: the argument if any is checked for type attributes;
6196 should it be checked for decl ones? */
6198 else
6200 if (TREE_CODE (*node) == FUNCTION_TYPE
6201 || TREE_CODE (*node) == METHOD_TYPE)
6203 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6205 warning (OPT_Wattributes, "%qE attribute ignored",
6206 name);
6207 *no_add_attrs = true;
6210 else if (TREE_CODE (*node) == POINTER_TYPE
6211 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6212 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6213 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6215 *node = build_variant_type_copy (*node);
6216 TREE_TYPE (*node) = build_type_attribute_variant
6217 (TREE_TYPE (*node),
6218 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6219 *no_add_attrs = true;
6221 else
6223 /* Possibly pass this attribute on from the type to a decl. */
6224 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6225 | (int) ATTR_FLAG_FUNCTION_NEXT
6226 | (int) ATTR_FLAG_ARRAY_NEXT))
6228 *no_add_attrs = true;
6229 return tree_cons (name, args, NULL_TREE);
6231 else
6233 warning (OPT_Wattributes, "%qE attribute ignored",
6234 name);
6239 return NULL_TREE;
6242 /* Handle a "pcs" attribute; arguments as in struct
6243 attribute_spec.handler. */
6244 static tree
6245 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6246 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6248 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6250 warning (OPT_Wattributes, "%qE attribute ignored", name);
6251 *no_add_attrs = true;
6253 return NULL_TREE;
6256 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6257 /* Handle the "notshared" attribute. This attribute is another way of
6258 requesting hidden visibility. ARM's compiler supports
6259 "__declspec(notshared)"; we support the same thing via an
6260 attribute. */
6262 static tree
6263 arm_handle_notshared_attribute (tree *node,
6264 tree name ATTRIBUTE_UNUSED,
6265 tree args ATTRIBUTE_UNUSED,
6266 int flags ATTRIBUTE_UNUSED,
6267 bool *no_add_attrs)
6269 tree decl = TYPE_NAME (*node);
6271 if (decl)
6273 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6274 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6275 *no_add_attrs = false;
6277 return NULL_TREE;
6279 #endif
6281 /* Return 0 if the attributes for two types are incompatible, 1 if they
6282 are compatible, and 2 if they are nearly compatible (which causes a
6283 warning to be generated). */
6284 static int
6285 arm_comp_type_attributes (const_tree type1, const_tree type2)
6287 int l1, l2, s1, s2;
6289 /* Check for mismatch of non-default calling convention. */
6290 if (TREE_CODE (type1) != FUNCTION_TYPE)
6291 return 1;
6293 /* Check for mismatched call attributes. */
6294 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6295 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6296 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6297 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6299 /* Only bother to check if an attribute is defined. */
6300 if (l1 | l2 | s1 | s2)
6302 /* If one type has an attribute, the other must have the same attribute. */
6303 if ((l1 != l2) || (s1 != s2))
6304 return 0;
6306 /* Disallow mixed attributes. */
6307 if ((l1 & s2) || (l2 & s1))
6308 return 0;
6311 /* Check for mismatched ISR attribute. */
6312 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6313 if (! l1)
6314 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6315 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6316 if (! l2)
6317 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6318 if (l1 != l2)
6319 return 0;
6321 return 1;
6324 /* Assigns default attributes to newly defined type. This is used to
6325 set short_call/long_call attributes for function types of
6326 functions defined inside corresponding #pragma scopes. */
6327 static void
6328 arm_set_default_type_attributes (tree type)
6330 /* Add __attribute__ ((long_call)) to all functions, when
6331 inside #pragma long_calls or __attribute__ ((short_call)),
6332 when inside #pragma no_long_calls. */
6333 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6335 tree type_attr_list, attr_name;
6336 type_attr_list = TYPE_ATTRIBUTES (type);
6338 if (arm_pragma_long_calls == LONG)
6339 attr_name = get_identifier ("long_call");
6340 else if (arm_pragma_long_calls == SHORT)
6341 attr_name = get_identifier ("short_call");
6342 else
6343 return;
6345 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6346 TYPE_ATTRIBUTES (type) = type_attr_list;
6350 /* Return true if DECL is known to be linked into section SECTION. */
6352 static bool
6353 arm_function_in_section_p (tree decl, section *section)
6355 /* We can only be certain about functions defined in the same
6356 compilation unit. */
6357 if (!TREE_STATIC (decl))
6358 return false;
6360 /* Make sure that SYMBOL always binds to the definition in this
6361 compilation unit. */
6362 if (!targetm.binds_local_p (decl))
6363 return false;
6365 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6366 if (!DECL_SECTION_NAME (decl))
6368 /* Make sure that we will not create a unique section for DECL. */
6369 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6370 return false;
6373 return function_section (decl) == section;
6376 /* Return nonzero if a 32-bit "long_call" should be generated for
6377 a call from the current function to DECL. We generate a long_call
6378 if the function:
6380 a. has an __attribute__((long call))
6381 or b. is within the scope of a #pragma long_calls
6382 or c. the -mlong-calls command line switch has been specified
6384 However we do not generate a long call if the function:
6386 d. has an __attribute__ ((short_call))
6387 or e. is inside the scope of a #pragma no_long_calls
6388 or f. is defined in the same section as the current function. */
6390 bool
6391 arm_is_long_call_p (tree decl)
6393 tree attrs;
6395 if (!decl)
6396 return TARGET_LONG_CALLS;
6398 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6399 if (lookup_attribute ("short_call", attrs))
6400 return false;
6402 /* For "f", be conservative, and only cater for cases in which the
6403 whole of the current function is placed in the same section. */
6404 if (!flag_reorder_blocks_and_partition
6405 && TREE_CODE (decl) == FUNCTION_DECL
6406 && arm_function_in_section_p (decl, current_function_section ()))
6407 return false;
6409 if (lookup_attribute ("long_call", attrs))
6410 return true;
6412 return TARGET_LONG_CALLS;
6415 /* Return nonzero if it is ok to make a tail-call to DECL. */
6416 static bool
6417 arm_function_ok_for_sibcall (tree decl, tree exp)
6419 unsigned long func_type;
6421 if (cfun->machine->sibcall_blocked)
6422 return false;
6424 /* Never tailcall something if we are generating code for Thumb-1. */
6425 if (TARGET_THUMB1)
6426 return false;
6428 /* The PIC register is live on entry to VxWorks PLT entries, so we
6429 must make the call before restoring the PIC register. */
6430 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6431 return false;
6433 /* If we are interworking and the function is not declared static
6434 then we can't tail-call it unless we know that it exists in this
6435 compilation unit (since it might be a Thumb routine). */
6436 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6437 && !TREE_ASM_WRITTEN (decl))
6438 return false;
6440 func_type = arm_current_func_type ();
6441 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6442 if (IS_INTERRUPT (func_type))
6443 return false;
6445 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6447 /* Check that the return value locations are the same. For
6448 example that we aren't returning a value from the sibling in
6449 a VFP register but then need to transfer it to a core
6450 register. */
6451 rtx a, b;
6453 a = arm_function_value (TREE_TYPE (exp), decl, false);
6454 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6455 cfun->decl, false);
6456 if (!rtx_equal_p (a, b))
6457 return false;
6460 /* Never tailcall if function may be called with a misaligned SP. */
6461 if (IS_STACKALIGN (func_type))
6462 return false;
6464 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6465 references should become a NOP. Don't convert such calls into
6466 sibling calls. */
6467 if (TARGET_AAPCS_BASED
6468 && arm_abi == ARM_ABI_AAPCS
6469 && decl
6470 && DECL_WEAK (decl))
6471 return false;
6473 /* Everything else is ok. */
6474 return true;
6478 /* Addressing mode support functions. */
6480 /* Return nonzero if X is a legitimate immediate operand when compiling
6481 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6483 legitimate_pic_operand_p (rtx x)
6485 if (GET_CODE (x) == SYMBOL_REF
6486 || (GET_CODE (x) == CONST
6487 && GET_CODE (XEXP (x, 0)) == PLUS
6488 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6489 return 0;
6491 return 1;
6494 /* Record that the current function needs a PIC register. Initialize
6495 cfun->machine->pic_reg if we have not already done so. */
6497 static void
6498 require_pic_register (void)
6500 /* A lot of the logic here is made obscure by the fact that this
6501 routine gets called as part of the rtx cost estimation process.
6502 We don't want those calls to affect any assumptions about the real
6503 function; and further, we can't call entry_of_function() until we
6504 start the real expansion process. */
6505 if (!crtl->uses_pic_offset_table)
6507 gcc_assert (can_create_pseudo_p ());
6508 if (arm_pic_register != INVALID_REGNUM
6509 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6511 if (!cfun->machine->pic_reg)
6512 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6514 /* Play games to avoid marking the function as needing pic
6515 if we are being called as part of the cost-estimation
6516 process. */
6517 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6518 crtl->uses_pic_offset_table = 1;
6520 else
6522 rtx_insn *seq, *insn;
6524 if (!cfun->machine->pic_reg)
6525 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6527 /* Play games to avoid marking the function as needing pic
6528 if we are being called as part of the cost-estimation
6529 process. */
6530 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6532 crtl->uses_pic_offset_table = 1;
6533 start_sequence ();
6535 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6536 && arm_pic_register > LAST_LO_REGNUM)
6537 emit_move_insn (cfun->machine->pic_reg,
6538 gen_rtx_REG (Pmode, arm_pic_register));
6539 else
6540 arm_load_pic_register (0UL);
6542 seq = get_insns ();
6543 end_sequence ();
6545 for (insn = seq; insn; insn = NEXT_INSN (insn))
6546 if (INSN_P (insn))
6547 INSN_LOCATION (insn) = prologue_location;
6549 /* We can be called during expansion of PHI nodes, where
6550 we can't yet emit instructions directly in the final
6551 insn stream. Queue the insns on the entry edge, they will
6552 be committed after everything else is expanded. */
6553 insert_insn_on_edge (seq,
6554 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6561 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6563 if (GET_CODE (orig) == SYMBOL_REF
6564 || GET_CODE (orig) == LABEL_REF)
6566 rtx insn;
6568 if (reg == 0)
6570 gcc_assert (can_create_pseudo_p ());
6571 reg = gen_reg_rtx (Pmode);
6574 /* VxWorks does not impose a fixed gap between segments; the run-time
6575 gap can be different from the object-file gap. We therefore can't
6576 use GOTOFF unless we are absolutely sure that the symbol is in the
6577 same segment as the GOT. Unfortunately, the flexibility of linker
6578 scripts means that we can't be sure of that in general, so assume
6579 that GOTOFF is never valid on VxWorks. */
6580 if ((GET_CODE (orig) == LABEL_REF
6581 || (GET_CODE (orig) == SYMBOL_REF &&
6582 SYMBOL_REF_LOCAL_P (orig)))
6583 && NEED_GOT_RELOC
6584 && arm_pic_data_is_text_relative)
6585 insn = arm_pic_static_addr (orig, reg);
6586 else
6588 rtx pat;
6589 rtx mem;
6591 /* If this function doesn't have a pic register, create one now. */
6592 require_pic_register ();
6594 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6596 /* Make the MEM as close to a constant as possible. */
6597 mem = SET_SRC (pat);
6598 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6599 MEM_READONLY_P (mem) = 1;
6600 MEM_NOTRAP_P (mem) = 1;
6602 insn = emit_insn (pat);
6605 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6606 by loop. */
6607 set_unique_reg_note (insn, REG_EQUAL, orig);
6609 return reg;
6611 else if (GET_CODE (orig) == CONST)
6613 rtx base, offset;
6615 if (GET_CODE (XEXP (orig, 0)) == PLUS
6616 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6617 return orig;
6619 /* Handle the case where we have: const (UNSPEC_TLS). */
6620 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6621 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6622 return orig;
6624 /* Handle the case where we have:
6625 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6626 CONST_INT. */
6627 if (GET_CODE (XEXP (orig, 0)) == PLUS
6628 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6629 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6631 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6632 return orig;
6635 if (reg == 0)
6637 gcc_assert (can_create_pseudo_p ());
6638 reg = gen_reg_rtx (Pmode);
6641 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6643 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6644 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6645 base == reg ? 0 : reg);
6647 if (CONST_INT_P (offset))
6649 /* The base register doesn't really matter, we only want to
6650 test the index for the appropriate mode. */
6651 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6653 gcc_assert (can_create_pseudo_p ());
6654 offset = force_reg (Pmode, offset);
6657 if (CONST_INT_P (offset))
6658 return plus_constant (Pmode, base, INTVAL (offset));
6661 if (GET_MODE_SIZE (mode) > 4
6662 && (GET_MODE_CLASS (mode) == MODE_INT
6663 || TARGET_SOFT_FLOAT))
6665 emit_insn (gen_addsi3 (reg, base, offset));
6666 return reg;
6669 return gen_rtx_PLUS (Pmode, base, offset);
6672 return orig;
6676 /* Find a spare register to use during the prolog of a function. */
6678 static int
6679 thumb_find_work_register (unsigned long pushed_regs_mask)
6681 int reg;
6683 /* Check the argument registers first as these are call-used. The
6684 register allocation order means that sometimes r3 might be used
6685 but earlier argument registers might not, so check them all. */
6686 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6687 if (!df_regs_ever_live_p (reg))
6688 return reg;
6690 /* Before going on to check the call-saved registers we can try a couple
6691 more ways of deducing that r3 is available. The first is when we are
6692 pushing anonymous arguments onto the stack and we have less than 4
6693 registers worth of fixed arguments(*). In this case r3 will be part of
6694 the variable argument list and so we can be sure that it will be
6695 pushed right at the start of the function. Hence it will be available
6696 for the rest of the prologue.
6697 (*): ie crtl->args.pretend_args_size is greater than 0. */
6698 if (cfun->machine->uses_anonymous_args
6699 && crtl->args.pretend_args_size > 0)
6700 return LAST_ARG_REGNUM;
6702 /* The other case is when we have fixed arguments but less than 4 registers
6703 worth. In this case r3 might be used in the body of the function, but
6704 it is not being used to convey an argument into the function. In theory
6705 we could just check crtl->args.size to see how many bytes are
6706 being passed in argument registers, but it seems that it is unreliable.
6707 Sometimes it will have the value 0 when in fact arguments are being
6708 passed. (See testcase execute/20021111-1.c for an example). So we also
6709 check the args_info.nregs field as well. The problem with this field is
6710 that it makes no allowances for arguments that are passed to the
6711 function but which are not used. Hence we could miss an opportunity
6712 when a function has an unused argument in r3. But it is better to be
6713 safe than to be sorry. */
6714 if (! cfun->machine->uses_anonymous_args
6715 && crtl->args.size >= 0
6716 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6717 && (TARGET_AAPCS_BASED
6718 ? crtl->args.info.aapcs_ncrn < 4
6719 : crtl->args.info.nregs < 4))
6720 return LAST_ARG_REGNUM;
6722 /* Otherwise look for a call-saved register that is going to be pushed. */
6723 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6724 if (pushed_regs_mask & (1 << reg))
6725 return reg;
6727 if (TARGET_THUMB2)
6729 /* Thumb-2 can use high regs. */
6730 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6731 if (pushed_regs_mask & (1 << reg))
6732 return reg;
6734 /* Something went wrong - thumb_compute_save_reg_mask()
6735 should have arranged for a suitable register to be pushed. */
6736 gcc_unreachable ();
6739 static GTY(()) int pic_labelno;
6741 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6742 low register. */
6744 void
6745 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6747 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6749 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6750 return;
6752 gcc_assert (flag_pic);
6754 pic_reg = cfun->machine->pic_reg;
6755 if (TARGET_VXWORKS_RTP)
6757 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6758 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6759 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6761 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6763 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6764 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6766 else
6768 /* We use an UNSPEC rather than a LABEL_REF because this label
6769 never appears in the code stream. */
6771 labelno = GEN_INT (pic_labelno++);
6772 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6773 l1 = gen_rtx_CONST (VOIDmode, l1);
6775 /* On the ARM the PC register contains 'dot + 8' at the time of the
6776 addition, on the Thumb it is 'dot + 4'. */
6777 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6778 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6779 UNSPEC_GOTSYM_OFF);
6780 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6782 if (TARGET_32BIT)
6784 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6786 else /* TARGET_THUMB1 */
6788 if (arm_pic_register != INVALID_REGNUM
6789 && REGNO (pic_reg) > LAST_LO_REGNUM)
6791 /* We will have pushed the pic register, so we should always be
6792 able to find a work register. */
6793 pic_tmp = gen_rtx_REG (SImode,
6794 thumb_find_work_register (saved_regs));
6795 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6796 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6797 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6799 else if (arm_pic_register != INVALID_REGNUM
6800 && arm_pic_register > LAST_LO_REGNUM
6801 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6803 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6804 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6805 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6807 else
6808 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6812 /* Need to emit this whether or not we obey regdecls,
6813 since setjmp/longjmp can cause life info to screw up. */
6814 emit_use (pic_reg);
6817 /* Generate code to load the address of a static var when flag_pic is set. */
6818 static rtx
6819 arm_pic_static_addr (rtx orig, rtx reg)
6821 rtx l1, labelno, offset_rtx, insn;
6823 gcc_assert (flag_pic);
6825 /* We use an UNSPEC rather than a LABEL_REF because this label
6826 never appears in the code stream. */
6827 labelno = GEN_INT (pic_labelno++);
6828 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6829 l1 = gen_rtx_CONST (VOIDmode, l1);
6831 /* On the ARM the PC register contains 'dot + 8' at the time of the
6832 addition, on the Thumb it is 'dot + 4'. */
6833 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6834 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6835 UNSPEC_SYMBOL_OFFSET);
6836 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6838 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6839 return insn;
6842 /* Return nonzero if X is valid as an ARM state addressing register. */
6843 static int
6844 arm_address_register_rtx_p (rtx x, int strict_p)
6846 int regno;
6848 if (!REG_P (x))
6849 return 0;
6851 regno = REGNO (x);
6853 if (strict_p)
6854 return ARM_REGNO_OK_FOR_BASE_P (regno);
6856 return (regno <= LAST_ARM_REGNUM
6857 || regno >= FIRST_PSEUDO_REGISTER
6858 || regno == FRAME_POINTER_REGNUM
6859 || regno == ARG_POINTER_REGNUM);
6862 /* Return TRUE if this rtx is the difference of a symbol and a label,
6863 and will reduce to a PC-relative relocation in the object file.
6864 Expressions like this can be left alone when generating PIC, rather
6865 than forced through the GOT. */
6866 static int
6867 pcrel_constant_p (rtx x)
6869 if (GET_CODE (x) == MINUS)
6870 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6872 return FALSE;
6875 /* Return true if X will surely end up in an index register after next
6876 splitting pass. */
6877 static bool
6878 will_be_in_index_register (const_rtx x)
6880 /* arm.md: calculate_pic_address will split this into a register. */
6881 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6884 /* Return nonzero if X is a valid ARM state address operand. */
6886 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6887 int strict_p)
6889 bool use_ldrd;
6890 enum rtx_code code = GET_CODE (x);
6892 if (arm_address_register_rtx_p (x, strict_p))
6893 return 1;
6895 use_ldrd = (TARGET_LDRD
6896 && (mode == DImode
6897 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6899 if (code == POST_INC || code == PRE_DEC
6900 || ((code == PRE_INC || code == POST_DEC)
6901 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6902 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6904 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6905 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6906 && GET_CODE (XEXP (x, 1)) == PLUS
6907 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6909 rtx addend = XEXP (XEXP (x, 1), 1);
6911 /* Don't allow ldrd post increment by register because it's hard
6912 to fixup invalid register choices. */
6913 if (use_ldrd
6914 && GET_CODE (x) == POST_MODIFY
6915 && REG_P (addend))
6916 return 0;
6918 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6919 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6922 /* After reload constants split into minipools will have addresses
6923 from a LABEL_REF. */
6924 else if (reload_completed
6925 && (code == LABEL_REF
6926 || (code == CONST
6927 && GET_CODE (XEXP (x, 0)) == PLUS
6928 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6929 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6930 return 1;
6932 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6933 return 0;
6935 else if (code == PLUS)
6937 rtx xop0 = XEXP (x, 0);
6938 rtx xop1 = XEXP (x, 1);
6940 return ((arm_address_register_rtx_p (xop0, strict_p)
6941 && ((CONST_INT_P (xop1)
6942 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6943 || (!strict_p && will_be_in_index_register (xop1))))
6944 || (arm_address_register_rtx_p (xop1, strict_p)
6945 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6948 #if 0
6949 /* Reload currently can't handle MINUS, so disable this for now */
6950 else if (GET_CODE (x) == MINUS)
6952 rtx xop0 = XEXP (x, 0);
6953 rtx xop1 = XEXP (x, 1);
6955 return (arm_address_register_rtx_p (xop0, strict_p)
6956 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6958 #endif
6960 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6961 && code == SYMBOL_REF
6962 && CONSTANT_POOL_ADDRESS_P (x)
6963 && ! (flag_pic
6964 && symbol_mentioned_p (get_pool_constant (x))
6965 && ! pcrel_constant_p (get_pool_constant (x))))
6966 return 1;
6968 return 0;
6971 /* Return nonzero if X is a valid Thumb-2 address operand. */
6972 static int
6973 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6975 bool use_ldrd;
6976 enum rtx_code code = GET_CODE (x);
6978 if (arm_address_register_rtx_p (x, strict_p))
6979 return 1;
6981 use_ldrd = (TARGET_LDRD
6982 && (mode == DImode
6983 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6985 if (code == POST_INC || code == PRE_DEC
6986 || ((code == PRE_INC || code == POST_DEC)
6987 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6988 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6990 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6991 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6992 && GET_CODE (XEXP (x, 1)) == PLUS
6993 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6995 /* Thumb-2 only has autoincrement by constant. */
6996 rtx addend = XEXP (XEXP (x, 1), 1);
6997 HOST_WIDE_INT offset;
6999 if (!CONST_INT_P (addend))
7000 return 0;
7002 offset = INTVAL(addend);
7003 if (GET_MODE_SIZE (mode) <= 4)
7004 return (offset > -256 && offset < 256);
7006 return (use_ldrd && offset > -1024 && offset < 1024
7007 && (offset & 3) == 0);
7010 /* After reload constants split into minipools will have addresses
7011 from a LABEL_REF. */
7012 else if (reload_completed
7013 && (code == LABEL_REF
7014 || (code == CONST
7015 && GET_CODE (XEXP (x, 0)) == PLUS
7016 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7017 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7018 return 1;
7020 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7021 return 0;
7023 else if (code == PLUS)
7025 rtx xop0 = XEXP (x, 0);
7026 rtx xop1 = XEXP (x, 1);
7028 return ((arm_address_register_rtx_p (xop0, strict_p)
7029 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7030 || (!strict_p && will_be_in_index_register (xop1))))
7031 || (arm_address_register_rtx_p (xop1, strict_p)
7032 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7035 /* Normally we can assign constant values to target registers without
7036 the help of constant pool. But there are cases we have to use constant
7037 pool like:
7038 1) assign a label to register.
7039 2) sign-extend a 8bit value to 32bit and then assign to register.
7041 Constant pool access in format:
7042 (set (reg r0) (mem (symbol_ref (".LC0"))))
7043 will cause the use of literal pool (later in function arm_reorg).
7044 So here we mark such format as an invalid format, then the compiler
7045 will adjust it into:
7046 (set (reg r0) (symbol_ref (".LC0")))
7047 (set (reg r0) (mem (reg r0))).
7048 No extra register is required, and (mem (reg r0)) won't cause the use
7049 of literal pools. */
7050 else if (arm_disable_literal_pool && code == SYMBOL_REF
7051 && CONSTANT_POOL_ADDRESS_P (x))
7052 return 0;
7054 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7055 && code == SYMBOL_REF
7056 && CONSTANT_POOL_ADDRESS_P (x)
7057 && ! (flag_pic
7058 && symbol_mentioned_p (get_pool_constant (x))
7059 && ! pcrel_constant_p (get_pool_constant (x))))
7060 return 1;
7062 return 0;
7065 /* Return nonzero if INDEX is valid for an address index operand in
7066 ARM state. */
7067 static int
7068 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7069 int strict_p)
7071 HOST_WIDE_INT range;
7072 enum rtx_code code = GET_CODE (index);
7074 /* Standard coprocessor addressing modes. */
7075 if (TARGET_HARD_FLOAT
7076 && TARGET_VFP
7077 && (mode == SFmode || mode == DFmode))
7078 return (code == CONST_INT && INTVAL (index) < 1024
7079 && INTVAL (index) > -1024
7080 && (INTVAL (index) & 3) == 0);
7082 /* For quad modes, we restrict the constant offset to be slightly less
7083 than what the instruction format permits. We do this because for
7084 quad mode moves, we will actually decompose them into two separate
7085 double-mode reads or writes. INDEX must therefore be a valid
7086 (double-mode) offset and so should INDEX+8. */
7087 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7088 return (code == CONST_INT
7089 && INTVAL (index) < 1016
7090 && INTVAL (index) > -1024
7091 && (INTVAL (index) & 3) == 0);
7093 /* We have no such constraint on double mode offsets, so we permit the
7094 full range of the instruction format. */
7095 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7096 return (code == CONST_INT
7097 && INTVAL (index) < 1024
7098 && INTVAL (index) > -1024
7099 && (INTVAL (index) & 3) == 0);
7101 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7102 return (code == CONST_INT
7103 && INTVAL (index) < 1024
7104 && INTVAL (index) > -1024
7105 && (INTVAL (index) & 3) == 0);
7107 if (arm_address_register_rtx_p (index, strict_p)
7108 && (GET_MODE_SIZE (mode) <= 4))
7109 return 1;
7111 if (mode == DImode || mode == DFmode)
7113 if (code == CONST_INT)
7115 HOST_WIDE_INT val = INTVAL (index);
7117 if (TARGET_LDRD)
7118 return val > -256 && val < 256;
7119 else
7120 return val > -4096 && val < 4092;
7123 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7126 if (GET_MODE_SIZE (mode) <= 4
7127 && ! (arm_arch4
7128 && (mode == HImode
7129 || mode == HFmode
7130 || (mode == QImode && outer == SIGN_EXTEND))))
7132 if (code == MULT)
7134 rtx xiop0 = XEXP (index, 0);
7135 rtx xiop1 = XEXP (index, 1);
7137 return ((arm_address_register_rtx_p (xiop0, strict_p)
7138 && power_of_two_operand (xiop1, SImode))
7139 || (arm_address_register_rtx_p (xiop1, strict_p)
7140 && power_of_two_operand (xiop0, SImode)));
7142 else if (code == LSHIFTRT || code == ASHIFTRT
7143 || code == ASHIFT || code == ROTATERT)
7145 rtx op = XEXP (index, 1);
7147 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7148 && CONST_INT_P (op)
7149 && INTVAL (op) > 0
7150 && INTVAL (op) <= 31);
7154 /* For ARM v4 we may be doing a sign-extend operation during the
7155 load. */
7156 if (arm_arch4)
7158 if (mode == HImode
7159 || mode == HFmode
7160 || (outer == SIGN_EXTEND && mode == QImode))
7161 range = 256;
7162 else
7163 range = 4096;
7165 else
7166 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7168 return (code == CONST_INT
7169 && INTVAL (index) < range
7170 && INTVAL (index) > -range);
7173 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7174 index operand. i.e. 1, 2, 4 or 8. */
7175 static bool
7176 thumb2_index_mul_operand (rtx op)
7178 HOST_WIDE_INT val;
7180 if (!CONST_INT_P (op))
7181 return false;
7183 val = INTVAL(op);
7184 return (val == 1 || val == 2 || val == 4 || val == 8);
7187 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7188 static int
7189 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7191 enum rtx_code code = GET_CODE (index);
7193 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7194 /* Standard coprocessor addressing modes. */
7195 if (TARGET_HARD_FLOAT
7196 && TARGET_VFP
7197 && (mode == SFmode || mode == DFmode))
7198 return (code == CONST_INT && INTVAL (index) < 1024
7199 /* Thumb-2 allows only > -256 index range for it's core register
7200 load/stores. Since we allow SF/DF in core registers, we have
7201 to use the intersection between -256~4096 (core) and -1024~1024
7202 (coprocessor). */
7203 && INTVAL (index) > -256
7204 && (INTVAL (index) & 3) == 0);
7206 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7208 /* For DImode assume values will usually live in core regs
7209 and only allow LDRD addressing modes. */
7210 if (!TARGET_LDRD || mode != DImode)
7211 return (code == CONST_INT
7212 && INTVAL (index) < 1024
7213 && INTVAL (index) > -1024
7214 && (INTVAL (index) & 3) == 0);
7217 /* For quad modes, we restrict the constant offset to be slightly less
7218 than what the instruction format permits. We do this because for
7219 quad mode moves, we will actually decompose them into two separate
7220 double-mode reads or writes. INDEX must therefore be a valid
7221 (double-mode) offset and so should INDEX+8. */
7222 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7223 return (code == CONST_INT
7224 && INTVAL (index) < 1016
7225 && INTVAL (index) > -1024
7226 && (INTVAL (index) & 3) == 0);
7228 /* We have no such constraint on double mode offsets, so we permit the
7229 full range of the instruction format. */
7230 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7231 return (code == CONST_INT
7232 && INTVAL (index) < 1024
7233 && INTVAL (index) > -1024
7234 && (INTVAL (index) & 3) == 0);
7236 if (arm_address_register_rtx_p (index, strict_p)
7237 && (GET_MODE_SIZE (mode) <= 4))
7238 return 1;
7240 if (mode == DImode || mode == DFmode)
7242 if (code == CONST_INT)
7244 HOST_WIDE_INT val = INTVAL (index);
7245 /* ??? Can we assume ldrd for thumb2? */
7246 /* Thumb-2 ldrd only has reg+const addressing modes. */
7247 /* ldrd supports offsets of +-1020.
7248 However the ldr fallback does not. */
7249 return val > -256 && val < 256 && (val & 3) == 0;
7251 else
7252 return 0;
7255 if (code == MULT)
7257 rtx xiop0 = XEXP (index, 0);
7258 rtx xiop1 = XEXP (index, 1);
7260 return ((arm_address_register_rtx_p (xiop0, strict_p)
7261 && thumb2_index_mul_operand (xiop1))
7262 || (arm_address_register_rtx_p (xiop1, strict_p)
7263 && thumb2_index_mul_operand (xiop0)));
7265 else if (code == ASHIFT)
7267 rtx op = XEXP (index, 1);
7269 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7270 && CONST_INT_P (op)
7271 && INTVAL (op) > 0
7272 && INTVAL (op) <= 3);
7275 return (code == CONST_INT
7276 && INTVAL (index) < 4096
7277 && INTVAL (index) > -256);
7280 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7281 static int
7282 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7284 int regno;
7286 if (!REG_P (x))
7287 return 0;
7289 regno = REGNO (x);
7291 if (strict_p)
7292 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7294 return (regno <= LAST_LO_REGNUM
7295 || regno > LAST_VIRTUAL_REGISTER
7296 || regno == FRAME_POINTER_REGNUM
7297 || (GET_MODE_SIZE (mode) >= 4
7298 && (regno == STACK_POINTER_REGNUM
7299 || regno >= FIRST_PSEUDO_REGISTER
7300 || x == hard_frame_pointer_rtx
7301 || x == arg_pointer_rtx)));
7304 /* Return nonzero if x is a legitimate index register. This is the case
7305 for any base register that can access a QImode object. */
7306 inline static int
7307 thumb1_index_register_rtx_p (rtx x, int strict_p)
7309 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7312 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7314 The AP may be eliminated to either the SP or the FP, so we use the
7315 least common denominator, e.g. SImode, and offsets from 0 to 64.
7317 ??? Verify whether the above is the right approach.
7319 ??? Also, the FP may be eliminated to the SP, so perhaps that
7320 needs special handling also.
7322 ??? Look at how the mips16 port solves this problem. It probably uses
7323 better ways to solve some of these problems.
7325 Although it is not incorrect, we don't accept QImode and HImode
7326 addresses based on the frame pointer or arg pointer until the
7327 reload pass starts. This is so that eliminating such addresses
7328 into stack based ones won't produce impossible code. */
7330 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7332 /* ??? Not clear if this is right. Experiment. */
7333 if (GET_MODE_SIZE (mode) < 4
7334 && !(reload_in_progress || reload_completed)
7335 && (reg_mentioned_p (frame_pointer_rtx, x)
7336 || reg_mentioned_p (arg_pointer_rtx, x)
7337 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7338 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7339 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7340 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7341 return 0;
7343 /* Accept any base register. SP only in SImode or larger. */
7344 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7345 return 1;
7347 /* This is PC relative data before arm_reorg runs. */
7348 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7349 && GET_CODE (x) == SYMBOL_REF
7350 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7351 return 1;
7353 /* This is PC relative data after arm_reorg runs. */
7354 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7355 && reload_completed
7356 && (GET_CODE (x) == LABEL_REF
7357 || (GET_CODE (x) == CONST
7358 && GET_CODE (XEXP (x, 0)) == PLUS
7359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7360 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7361 return 1;
7363 /* Post-inc indexing only supported for SImode and larger. */
7364 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7365 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7366 return 1;
7368 else if (GET_CODE (x) == PLUS)
7370 /* REG+REG address can be any two index registers. */
7371 /* We disallow FRAME+REG addressing since we know that FRAME
7372 will be replaced with STACK, and SP relative addressing only
7373 permits SP+OFFSET. */
7374 if (GET_MODE_SIZE (mode) <= 4
7375 && XEXP (x, 0) != frame_pointer_rtx
7376 && XEXP (x, 1) != frame_pointer_rtx
7377 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7378 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7379 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7380 return 1;
7382 /* REG+const has 5-7 bit offset for non-SP registers. */
7383 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7384 || XEXP (x, 0) == arg_pointer_rtx)
7385 && CONST_INT_P (XEXP (x, 1))
7386 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7387 return 1;
7389 /* REG+const has 10-bit offset for SP, but only SImode and
7390 larger is supported. */
7391 /* ??? Should probably check for DI/DFmode overflow here
7392 just like GO_IF_LEGITIMATE_OFFSET does. */
7393 else if (REG_P (XEXP (x, 0))
7394 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7395 && GET_MODE_SIZE (mode) >= 4
7396 && CONST_INT_P (XEXP (x, 1))
7397 && INTVAL (XEXP (x, 1)) >= 0
7398 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7399 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7400 return 1;
7402 else if (REG_P (XEXP (x, 0))
7403 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7404 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7405 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7406 && REGNO (XEXP (x, 0))
7407 <= LAST_VIRTUAL_POINTER_REGISTER))
7408 && GET_MODE_SIZE (mode) >= 4
7409 && CONST_INT_P (XEXP (x, 1))
7410 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7411 return 1;
7414 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7415 && GET_MODE_SIZE (mode) == 4
7416 && GET_CODE (x) == SYMBOL_REF
7417 && CONSTANT_POOL_ADDRESS_P (x)
7418 && ! (flag_pic
7419 && symbol_mentioned_p (get_pool_constant (x))
7420 && ! pcrel_constant_p (get_pool_constant (x))))
7421 return 1;
7423 return 0;
7426 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7427 instruction of mode MODE. */
7429 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7431 switch (GET_MODE_SIZE (mode))
7433 case 1:
7434 return val >= 0 && val < 32;
7436 case 2:
7437 return val >= 0 && val < 64 && (val & 1) == 0;
7439 default:
7440 return (val >= 0
7441 && (val + GET_MODE_SIZE (mode)) <= 128
7442 && (val & 3) == 0);
7446 bool
7447 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7449 if (TARGET_ARM)
7450 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7451 else if (TARGET_THUMB2)
7452 return thumb2_legitimate_address_p (mode, x, strict_p);
7453 else /* if (TARGET_THUMB1) */
7454 return thumb1_legitimate_address_p (mode, x, strict_p);
7457 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7459 Given an rtx X being reloaded into a reg required to be
7460 in class CLASS, return the class of reg to actually use.
7461 In general this is just CLASS, but for the Thumb core registers and
7462 immediate constants we prefer a LO_REGS class or a subset. */
7464 static reg_class_t
7465 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7467 if (TARGET_32BIT)
7468 return rclass;
7469 else
7471 if (rclass == GENERAL_REGS)
7472 return LO_REGS;
7473 else
7474 return rclass;
7478 /* Build the SYMBOL_REF for __tls_get_addr. */
7480 static GTY(()) rtx tls_get_addr_libfunc;
7482 static rtx
7483 get_tls_get_addr (void)
7485 if (!tls_get_addr_libfunc)
7486 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7487 return tls_get_addr_libfunc;
7491 arm_load_tp (rtx target)
7493 if (!target)
7494 target = gen_reg_rtx (SImode);
7496 if (TARGET_HARD_TP)
7498 /* Can return in any reg. */
7499 emit_insn (gen_load_tp_hard (target));
7501 else
7503 /* Always returned in r0. Immediately copy the result into a pseudo,
7504 otherwise other uses of r0 (e.g. setting up function arguments) may
7505 clobber the value. */
7507 rtx tmp;
7509 emit_insn (gen_load_tp_soft ());
7511 tmp = gen_rtx_REG (SImode, 0);
7512 emit_move_insn (target, tmp);
7514 return target;
7517 static rtx
7518 load_tls_operand (rtx x, rtx reg)
7520 rtx tmp;
7522 if (reg == NULL_RTX)
7523 reg = gen_reg_rtx (SImode);
7525 tmp = gen_rtx_CONST (SImode, x);
7527 emit_move_insn (reg, tmp);
7529 return reg;
7532 static rtx
7533 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7535 rtx insns, label, labelno, sum;
7537 gcc_assert (reloc != TLS_DESCSEQ);
7538 start_sequence ();
7540 labelno = GEN_INT (pic_labelno++);
7541 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7542 label = gen_rtx_CONST (VOIDmode, label);
7544 sum = gen_rtx_UNSPEC (Pmode,
7545 gen_rtvec (4, x, GEN_INT (reloc), label,
7546 GEN_INT (TARGET_ARM ? 8 : 4)),
7547 UNSPEC_TLS);
7548 reg = load_tls_operand (sum, reg);
7550 if (TARGET_ARM)
7551 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7552 else
7553 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7555 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7556 LCT_PURE, /* LCT_CONST? */
7557 Pmode, 1, reg, Pmode);
7559 insns = get_insns ();
7560 end_sequence ();
7562 return insns;
7565 static rtx
7566 arm_tls_descseq_addr (rtx x, rtx reg)
7568 rtx labelno = GEN_INT (pic_labelno++);
7569 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7570 rtx sum = gen_rtx_UNSPEC (Pmode,
7571 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7572 gen_rtx_CONST (VOIDmode, label),
7573 GEN_INT (!TARGET_ARM)),
7574 UNSPEC_TLS);
7575 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7577 emit_insn (gen_tlscall (x, labelno));
7578 if (!reg)
7579 reg = gen_reg_rtx (SImode);
7580 else
7581 gcc_assert (REGNO (reg) != 0);
7583 emit_move_insn (reg, reg0);
7585 return reg;
7589 legitimize_tls_address (rtx x, rtx reg)
7591 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7592 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7594 switch (model)
7596 case TLS_MODEL_GLOBAL_DYNAMIC:
7597 if (TARGET_GNU2_TLS)
7599 reg = arm_tls_descseq_addr (x, reg);
7601 tp = arm_load_tp (NULL_RTX);
7603 dest = gen_rtx_PLUS (Pmode, tp, reg);
7605 else
7607 /* Original scheme */
7608 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7609 dest = gen_reg_rtx (Pmode);
7610 emit_libcall_block (insns, dest, ret, x);
7612 return dest;
7614 case TLS_MODEL_LOCAL_DYNAMIC:
7615 if (TARGET_GNU2_TLS)
7617 reg = arm_tls_descseq_addr (x, reg);
7619 tp = arm_load_tp (NULL_RTX);
7621 dest = gen_rtx_PLUS (Pmode, tp, reg);
7623 else
7625 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7627 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7628 share the LDM result with other LD model accesses. */
7629 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7630 UNSPEC_TLS);
7631 dest = gen_reg_rtx (Pmode);
7632 emit_libcall_block (insns, dest, ret, eqv);
7634 /* Load the addend. */
7635 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7636 GEN_INT (TLS_LDO32)),
7637 UNSPEC_TLS);
7638 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7639 dest = gen_rtx_PLUS (Pmode, dest, addend);
7641 return dest;
7643 case TLS_MODEL_INITIAL_EXEC:
7644 labelno = GEN_INT (pic_labelno++);
7645 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7646 label = gen_rtx_CONST (VOIDmode, label);
7647 sum = gen_rtx_UNSPEC (Pmode,
7648 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7649 GEN_INT (TARGET_ARM ? 8 : 4)),
7650 UNSPEC_TLS);
7651 reg = load_tls_operand (sum, reg);
7653 if (TARGET_ARM)
7654 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7655 else if (TARGET_THUMB2)
7656 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7657 else
7659 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7660 emit_move_insn (reg, gen_const_mem (SImode, reg));
7663 tp = arm_load_tp (NULL_RTX);
7665 return gen_rtx_PLUS (Pmode, tp, reg);
7667 case TLS_MODEL_LOCAL_EXEC:
7668 tp = arm_load_tp (NULL_RTX);
7670 reg = gen_rtx_UNSPEC (Pmode,
7671 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7672 UNSPEC_TLS);
7673 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7675 return gen_rtx_PLUS (Pmode, tp, reg);
7677 default:
7678 abort ();
7682 /* Try machine-dependent ways of modifying an illegitimate address
7683 to be legitimate. If we find one, return the new, valid address. */
7685 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7687 if (arm_tls_referenced_p (x))
7689 rtx addend = NULL;
7691 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7693 addend = XEXP (XEXP (x, 0), 1);
7694 x = XEXP (XEXP (x, 0), 0);
7697 if (GET_CODE (x) != SYMBOL_REF)
7698 return x;
7700 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7702 x = legitimize_tls_address (x, NULL_RTX);
7704 if (addend)
7706 x = gen_rtx_PLUS (SImode, x, addend);
7707 orig_x = x;
7709 else
7710 return x;
7713 if (!TARGET_ARM)
7715 /* TODO: legitimize_address for Thumb2. */
7716 if (TARGET_THUMB2)
7717 return x;
7718 return thumb_legitimize_address (x, orig_x, mode);
7721 if (GET_CODE (x) == PLUS)
7723 rtx xop0 = XEXP (x, 0);
7724 rtx xop1 = XEXP (x, 1);
7726 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7727 xop0 = force_reg (SImode, xop0);
7729 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7730 && !symbol_mentioned_p (xop1))
7731 xop1 = force_reg (SImode, xop1);
7733 if (ARM_BASE_REGISTER_RTX_P (xop0)
7734 && CONST_INT_P (xop1))
7736 HOST_WIDE_INT n, low_n;
7737 rtx base_reg, val;
7738 n = INTVAL (xop1);
7740 /* VFP addressing modes actually allow greater offsets, but for
7741 now we just stick with the lowest common denominator. */
7742 if (mode == DImode
7743 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7745 low_n = n & 0x0f;
7746 n &= ~0x0f;
7747 if (low_n > 4)
7749 n += 16;
7750 low_n -= 16;
7753 else
7755 low_n = ((mode) == TImode ? 0
7756 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7757 n -= low_n;
7760 base_reg = gen_reg_rtx (SImode);
7761 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7762 emit_move_insn (base_reg, val);
7763 x = plus_constant (Pmode, base_reg, low_n);
7765 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7766 x = gen_rtx_PLUS (SImode, xop0, xop1);
7769 /* XXX We don't allow MINUS any more -- see comment in
7770 arm_legitimate_address_outer_p (). */
7771 else if (GET_CODE (x) == MINUS)
7773 rtx xop0 = XEXP (x, 0);
7774 rtx xop1 = XEXP (x, 1);
7776 if (CONSTANT_P (xop0))
7777 xop0 = force_reg (SImode, xop0);
7779 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7780 xop1 = force_reg (SImode, xop1);
7782 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7783 x = gen_rtx_MINUS (SImode, xop0, xop1);
7786 /* Make sure to take full advantage of the pre-indexed addressing mode
7787 with absolute addresses which often allows for the base register to
7788 be factorized for multiple adjacent memory references, and it might
7789 even allows for the mini pool to be avoided entirely. */
7790 else if (CONST_INT_P (x) && optimize > 0)
7792 unsigned int bits;
7793 HOST_WIDE_INT mask, base, index;
7794 rtx base_reg;
7796 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7797 use a 8-bit index. So let's use a 12-bit index for SImode only and
7798 hope that arm_gen_constant will enable ldrb to use more bits. */
7799 bits = (mode == SImode) ? 12 : 8;
7800 mask = (1 << bits) - 1;
7801 base = INTVAL (x) & ~mask;
7802 index = INTVAL (x) & mask;
7803 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7805 /* It'll most probably be more efficient to generate the base
7806 with more bits set and use a negative index instead. */
7807 base |= mask;
7808 index -= mask;
7810 base_reg = force_reg (SImode, GEN_INT (base));
7811 x = plus_constant (Pmode, base_reg, index);
7814 if (flag_pic)
7816 /* We need to find and carefully transform any SYMBOL and LABEL
7817 references; so go back to the original address expression. */
7818 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7820 if (new_x != orig_x)
7821 x = new_x;
7824 return x;
7828 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7829 to be legitimate. If we find one, return the new, valid address. */
7831 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7833 if (GET_CODE (x) == PLUS
7834 && CONST_INT_P (XEXP (x, 1))
7835 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7836 || INTVAL (XEXP (x, 1)) < 0))
7838 rtx xop0 = XEXP (x, 0);
7839 rtx xop1 = XEXP (x, 1);
7840 HOST_WIDE_INT offset = INTVAL (xop1);
7842 /* Try and fold the offset into a biasing of the base register and
7843 then offsetting that. Don't do this when optimizing for space
7844 since it can cause too many CSEs. */
7845 if (optimize_size && offset >= 0
7846 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7848 HOST_WIDE_INT delta;
7850 if (offset >= 256)
7851 delta = offset - (256 - GET_MODE_SIZE (mode));
7852 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7853 delta = 31 * GET_MODE_SIZE (mode);
7854 else
7855 delta = offset & (~31 * GET_MODE_SIZE (mode));
7857 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7858 NULL_RTX);
7859 x = plus_constant (Pmode, xop0, delta);
7861 else if (offset < 0 && offset > -256)
7862 /* Small negative offsets are best done with a subtract before the
7863 dereference, forcing these into a register normally takes two
7864 instructions. */
7865 x = force_operand (x, NULL_RTX);
7866 else
7868 /* For the remaining cases, force the constant into a register. */
7869 xop1 = force_reg (SImode, xop1);
7870 x = gen_rtx_PLUS (SImode, xop0, xop1);
7873 else if (GET_CODE (x) == PLUS
7874 && s_register_operand (XEXP (x, 1), SImode)
7875 && !s_register_operand (XEXP (x, 0), SImode))
7877 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7879 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7882 if (flag_pic)
7884 /* We need to find and carefully transform any SYMBOL and LABEL
7885 references; so go back to the original address expression. */
7886 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7888 if (new_x != orig_x)
7889 x = new_x;
7892 return x;
7895 bool
7896 arm_legitimize_reload_address (rtx *p,
7897 machine_mode mode,
7898 int opnum, int type,
7899 int ind_levels ATTRIBUTE_UNUSED)
7901 /* We must recognize output that we have already generated ourselves. */
7902 if (GET_CODE (*p) == PLUS
7903 && GET_CODE (XEXP (*p, 0)) == PLUS
7904 && REG_P (XEXP (XEXP (*p, 0), 0))
7905 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7906 && CONST_INT_P (XEXP (*p, 1)))
7908 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7909 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7910 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7911 return true;
7914 if (GET_CODE (*p) == PLUS
7915 && REG_P (XEXP (*p, 0))
7916 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7917 /* If the base register is equivalent to a constant, let the generic
7918 code handle it. Otherwise we will run into problems if a future
7919 reload pass decides to rematerialize the constant. */
7920 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7921 && CONST_INT_P (XEXP (*p, 1)))
7923 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7924 HOST_WIDE_INT low, high;
7926 /* Detect coprocessor load/stores. */
7927 bool coproc_p = ((TARGET_HARD_FLOAT
7928 && TARGET_VFP
7929 && (mode == SFmode || mode == DFmode))
7930 || (TARGET_REALLY_IWMMXT
7931 && VALID_IWMMXT_REG_MODE (mode))
7932 || (TARGET_NEON
7933 && (VALID_NEON_DREG_MODE (mode)
7934 || VALID_NEON_QREG_MODE (mode))));
7936 /* For some conditions, bail out when lower two bits are unaligned. */
7937 if ((val & 0x3) != 0
7938 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7939 && (coproc_p
7940 /* For DI, and DF under soft-float: */
7941 || ((mode == DImode || mode == DFmode)
7942 /* Without ldrd, we use stm/ldm, which does not
7943 fair well with unaligned bits. */
7944 && (! TARGET_LDRD
7945 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7946 || TARGET_THUMB2))))
7947 return false;
7949 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7950 of which the (reg+high) gets turned into a reload add insn,
7951 we try to decompose the index into high/low values that can often
7952 also lead to better reload CSE.
7953 For example:
7954 ldr r0, [r2, #4100] // Offset too large
7955 ldr r1, [r2, #4104] // Offset too large
7957 is best reloaded as:
7958 add t1, r2, #4096
7959 ldr r0, [t1, #4]
7960 add t2, r2, #4096
7961 ldr r1, [t2, #8]
7963 which post-reload CSE can simplify in most cases to eliminate the
7964 second add instruction:
7965 add t1, r2, #4096
7966 ldr r0, [t1, #4]
7967 ldr r1, [t1, #8]
7969 The idea here is that we want to split out the bits of the constant
7970 as a mask, rather than as subtracting the maximum offset that the
7971 respective type of load/store used can handle.
7973 When encountering negative offsets, we can still utilize it even if
7974 the overall offset is positive; sometimes this may lead to an immediate
7975 that can be constructed with fewer instructions.
7976 For example:
7977 ldr r0, [r2, #0x3FFFFC]
7979 This is best reloaded as:
7980 add t1, r2, #0x400000
7981 ldr r0, [t1, #-4]
7983 The trick for spotting this for a load insn with N bits of offset
7984 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7985 negative offset that is going to make bit N and all the bits below
7986 it become zero in the remainder part.
7988 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7989 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7990 used in most cases of ARM load/store instructions. */
7992 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7993 (((VAL) & ((1 << (N)) - 1)) \
7994 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7995 : 0)
7997 if (coproc_p)
7999 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
8001 /* NEON quad-word load/stores are made of two double-word accesses,
8002 so the valid index range is reduced by 8. Treat as 9-bit range if
8003 we go over it. */
8004 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
8005 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
8007 else if (GET_MODE_SIZE (mode) == 8)
8009 if (TARGET_LDRD)
8010 low = (TARGET_THUMB2
8011 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8012 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8013 else
8014 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8015 to access doublewords. The supported load/store offsets are
8016 -8, -4, and 4, which we try to produce here. */
8017 low = ((val & 0xf) ^ 0x8) - 0x8;
8019 else if (GET_MODE_SIZE (mode) < 8)
8021 /* NEON element load/stores do not have an offset. */
8022 if (TARGET_NEON_FP16 && mode == HFmode)
8023 return false;
8025 if (TARGET_THUMB2)
8027 /* Thumb-2 has an asymmetrical index range of (-256,4096).
8028 Try the wider 12-bit range first, and re-try if the result
8029 is out of range. */
8030 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8031 if (low < -255)
8032 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8034 else
8036 if (mode == HImode || mode == HFmode)
8038 if (arm_arch4)
8039 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8040 else
8042 /* The storehi/movhi_bytes fallbacks can use only
8043 [-4094,+4094] of the full ldrb/strb index range. */
8044 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8045 if (low == 4095 || low == -4095)
8046 return false;
8049 else
8050 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8053 else
8054 return false;
8056 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8057 ^ (unsigned HOST_WIDE_INT) 0x80000000)
8058 - (unsigned HOST_WIDE_INT) 0x80000000);
8059 /* Check for overflow or zero */
8060 if (low == 0 || high == 0 || (high + low != val))
8061 return false;
8063 /* Reload the high part into a base reg; leave the low part
8064 in the mem.
8065 Note that replacing this gen_rtx_PLUS with plus_constant is
8066 wrong in this case because we rely on the
8067 (plus (plus reg c1) c2) structure being preserved so that
8068 XEXP (*p, 0) in push_reload below uses the correct term. */
8069 *p = gen_rtx_PLUS (GET_MODE (*p),
8070 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8071 GEN_INT (high)),
8072 GEN_INT (low));
8073 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8074 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8075 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8076 return true;
8079 return false;
8083 thumb_legitimize_reload_address (rtx *x_p,
8084 machine_mode mode,
8085 int opnum, int type,
8086 int ind_levels ATTRIBUTE_UNUSED)
8088 rtx x = *x_p;
8090 if (GET_CODE (x) == PLUS
8091 && GET_MODE_SIZE (mode) < 4
8092 && REG_P (XEXP (x, 0))
8093 && XEXP (x, 0) == stack_pointer_rtx
8094 && CONST_INT_P (XEXP (x, 1))
8095 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8097 rtx orig_x = x;
8099 x = copy_rtx (x);
8100 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8101 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8102 return x;
8105 /* If both registers are hi-regs, then it's better to reload the
8106 entire expression rather than each register individually. That
8107 only requires one reload register rather than two. */
8108 if (GET_CODE (x) == PLUS
8109 && REG_P (XEXP (x, 0))
8110 && REG_P (XEXP (x, 1))
8111 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8112 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8114 rtx orig_x = x;
8116 x = copy_rtx (x);
8117 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8118 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8119 return x;
8122 return NULL;
8125 /* Return TRUE if X contains any TLS symbol references. */
8127 bool
8128 arm_tls_referenced_p (rtx x)
8130 if (! TARGET_HAVE_TLS)
8131 return false;
8133 subrtx_iterator::array_type array;
8134 FOR_EACH_SUBRTX (iter, array, x, ALL)
8136 const_rtx x = *iter;
8137 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8138 return true;
8140 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8141 TLS offsets, not real symbol references. */
8142 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8143 iter.skip_subrtxes ();
8145 return false;
8148 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8150 On the ARM, allow any integer (invalid ones are removed later by insn
8151 patterns), nice doubles and symbol_refs which refer to the function's
8152 constant pool XXX.
8154 When generating pic allow anything. */
8156 static bool
8157 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8159 /* At present, we have no support for Neon structure constants, so forbid
8160 them here. It might be possible to handle simple cases like 0 and -1
8161 in future. */
8162 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8163 return false;
8165 return flag_pic || !label_mentioned_p (x);
8168 static bool
8169 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8171 return (CONST_INT_P (x)
8172 || CONST_DOUBLE_P (x)
8173 || CONSTANT_ADDRESS_P (x)
8174 || flag_pic);
8177 static bool
8178 arm_legitimate_constant_p (machine_mode mode, rtx x)
8180 return (!arm_cannot_force_const_mem (mode, x)
8181 && (TARGET_32BIT
8182 ? arm_legitimate_constant_p_1 (mode, x)
8183 : thumb_legitimate_constant_p (mode, x)));
8186 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8188 static bool
8189 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8191 rtx base, offset;
8193 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8195 split_const (x, &base, &offset);
8196 if (GET_CODE (base) == SYMBOL_REF
8197 && !offset_within_block_p (base, INTVAL (offset)))
8198 return true;
8200 return arm_tls_referenced_p (x);
8203 #define REG_OR_SUBREG_REG(X) \
8204 (REG_P (X) \
8205 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8207 #define REG_OR_SUBREG_RTX(X) \
8208 (REG_P (X) ? (X) : SUBREG_REG (X))
8210 static inline int
8211 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8213 machine_mode mode = GET_MODE (x);
8214 int total, words;
8216 switch (code)
8218 case ASHIFT:
8219 case ASHIFTRT:
8220 case LSHIFTRT:
8221 case ROTATERT:
8222 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8224 case PLUS:
8225 case MINUS:
8226 case COMPARE:
8227 case NEG:
8228 case NOT:
8229 return COSTS_N_INSNS (1);
8231 case MULT:
8232 if (CONST_INT_P (XEXP (x, 1)))
8234 int cycles = 0;
8235 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8237 while (i)
8239 i >>= 2;
8240 cycles++;
8242 return COSTS_N_INSNS (2) + cycles;
8244 return COSTS_N_INSNS (1) + 16;
8246 case SET:
8247 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8248 the mode. */
8249 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8250 return (COSTS_N_INSNS (words)
8251 + 4 * ((MEM_P (SET_SRC (x)))
8252 + MEM_P (SET_DEST (x))));
8254 case CONST_INT:
8255 if (outer == SET)
8257 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8258 return 0;
8259 if (thumb_shiftable_const (INTVAL (x)))
8260 return COSTS_N_INSNS (2);
8261 return COSTS_N_INSNS (3);
8263 else if ((outer == PLUS || outer == COMPARE)
8264 && INTVAL (x) < 256 && INTVAL (x) > -256)
8265 return 0;
8266 else if ((outer == IOR || outer == XOR || outer == AND)
8267 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8268 return COSTS_N_INSNS (1);
8269 else if (outer == AND)
8271 int i;
8272 /* This duplicates the tests in the andsi3 expander. */
8273 for (i = 9; i <= 31; i++)
8274 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8275 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8276 return COSTS_N_INSNS (2);
8278 else if (outer == ASHIFT || outer == ASHIFTRT
8279 || outer == LSHIFTRT)
8280 return 0;
8281 return COSTS_N_INSNS (2);
8283 case CONST:
8284 case CONST_DOUBLE:
8285 case LABEL_REF:
8286 case SYMBOL_REF:
8287 return COSTS_N_INSNS (3);
8289 case UDIV:
8290 case UMOD:
8291 case DIV:
8292 case MOD:
8293 return 100;
8295 case TRUNCATE:
8296 return 99;
8298 case AND:
8299 case XOR:
8300 case IOR:
8301 /* XXX guess. */
8302 return 8;
8304 case MEM:
8305 /* XXX another guess. */
8306 /* Memory costs quite a lot for the first word, but subsequent words
8307 load at the equivalent of a single insn each. */
8308 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8309 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8310 ? 4 : 0));
8312 case IF_THEN_ELSE:
8313 /* XXX a guess. */
8314 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8315 return 14;
8316 return 2;
8318 case SIGN_EXTEND:
8319 case ZERO_EXTEND:
8320 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8321 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8323 if (mode == SImode)
8324 return total;
8326 if (arm_arch6)
8327 return total + COSTS_N_INSNS (1);
8329 /* Assume a two-shift sequence. Increase the cost slightly so
8330 we prefer actual shifts over an extend operation. */
8331 return total + 1 + COSTS_N_INSNS (2);
8333 default:
8334 return 99;
8338 static inline bool
8339 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8341 machine_mode mode = GET_MODE (x);
8342 enum rtx_code subcode;
8343 rtx operand;
8344 enum rtx_code code = GET_CODE (x);
8345 *total = 0;
8347 switch (code)
8349 case MEM:
8350 /* Memory costs quite a lot for the first word, but subsequent words
8351 load at the equivalent of a single insn each. */
8352 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8353 return true;
8355 case DIV:
8356 case MOD:
8357 case UDIV:
8358 case UMOD:
8359 if (TARGET_HARD_FLOAT && mode == SFmode)
8360 *total = COSTS_N_INSNS (2);
8361 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8362 *total = COSTS_N_INSNS (4);
8363 else
8364 *total = COSTS_N_INSNS (20);
8365 return false;
8367 case ROTATE:
8368 if (REG_P (XEXP (x, 1)))
8369 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8370 else if (!CONST_INT_P (XEXP (x, 1)))
8371 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8373 /* Fall through */
8374 case ROTATERT:
8375 if (mode != SImode)
8377 *total += COSTS_N_INSNS (4);
8378 return true;
8381 /* Fall through */
8382 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8383 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8384 if (mode == DImode)
8386 *total += COSTS_N_INSNS (3);
8387 return true;
8390 *total += COSTS_N_INSNS (1);
8391 /* Increase the cost of complex shifts because they aren't any faster,
8392 and reduce dual issue opportunities. */
8393 if (arm_tune_cortex_a9
8394 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8395 ++*total;
8397 return true;
8399 case MINUS:
8400 if (mode == DImode)
8402 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8403 if (CONST_INT_P (XEXP (x, 0))
8404 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8406 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8407 return true;
8410 if (CONST_INT_P (XEXP (x, 1))
8411 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8413 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8414 return true;
8417 return false;
8420 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8422 if (TARGET_HARD_FLOAT
8423 && (mode == SFmode
8424 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8426 *total = COSTS_N_INSNS (1);
8427 if (CONST_DOUBLE_P (XEXP (x, 0))
8428 && arm_const_double_rtx (XEXP (x, 0)))
8430 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8431 return true;
8434 if (CONST_DOUBLE_P (XEXP (x, 1))
8435 && arm_const_double_rtx (XEXP (x, 1)))
8437 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8438 return true;
8441 return false;
8443 *total = COSTS_N_INSNS (20);
8444 return false;
8447 *total = COSTS_N_INSNS (1);
8448 if (CONST_INT_P (XEXP (x, 0))
8449 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8451 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8452 return true;
8455 subcode = GET_CODE (XEXP (x, 1));
8456 if (subcode == ASHIFT || subcode == ASHIFTRT
8457 || subcode == LSHIFTRT
8458 || subcode == ROTATE || subcode == ROTATERT)
8460 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8461 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8462 return true;
8465 /* A shift as a part of RSB costs no more than RSB itself. */
8466 if (GET_CODE (XEXP (x, 0)) == MULT
8467 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8469 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8470 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8471 return true;
8474 if (subcode == MULT
8475 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8477 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8478 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8479 return true;
8482 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8483 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8485 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8486 if (REG_P (XEXP (XEXP (x, 1), 0))
8487 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8488 *total += COSTS_N_INSNS (1);
8490 return true;
8493 /* Fall through */
8495 case PLUS:
8496 if (code == PLUS && arm_arch6 && mode == SImode
8497 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8498 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8500 *total = COSTS_N_INSNS (1);
8501 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8502 0, speed);
8503 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8504 return true;
8507 /* MLA: All arguments must be registers. We filter out
8508 multiplication by a power of two, so that we fall down into
8509 the code below. */
8510 if (GET_CODE (XEXP (x, 0)) == MULT
8511 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8513 /* The cost comes from the cost of the multiply. */
8514 return false;
8517 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8519 if (TARGET_HARD_FLOAT
8520 && (mode == SFmode
8521 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8523 *total = COSTS_N_INSNS (1);
8524 if (CONST_DOUBLE_P (XEXP (x, 1))
8525 && arm_const_double_rtx (XEXP (x, 1)))
8527 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8528 return true;
8531 return false;
8534 *total = COSTS_N_INSNS (20);
8535 return false;
8538 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8539 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8541 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8542 if (REG_P (XEXP (XEXP (x, 0), 0))
8543 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8544 *total += COSTS_N_INSNS (1);
8545 return true;
8548 /* Fall through */
8550 case AND: case XOR: case IOR:
8552 /* Normally the frame registers will be spilt into reg+const during
8553 reload, so it is a bad idea to combine them with other instructions,
8554 since then they might not be moved outside of loops. As a compromise
8555 we allow integration with ops that have a constant as their second
8556 operand. */
8557 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8558 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8559 && !CONST_INT_P (XEXP (x, 1)))
8560 *total = COSTS_N_INSNS (1);
8562 if (mode == DImode)
8564 *total += COSTS_N_INSNS (2);
8565 if (CONST_INT_P (XEXP (x, 1))
8566 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8568 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8569 return true;
8572 return false;
8575 *total += COSTS_N_INSNS (1);
8576 if (CONST_INT_P (XEXP (x, 1))
8577 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8579 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8580 return true;
8582 subcode = GET_CODE (XEXP (x, 0));
8583 if (subcode == ASHIFT || subcode == ASHIFTRT
8584 || subcode == LSHIFTRT
8585 || subcode == ROTATE || subcode == ROTATERT)
8587 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8588 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8589 return true;
8592 if (subcode == MULT
8593 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8595 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8596 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8597 return true;
8600 if (subcode == UMIN || subcode == UMAX
8601 || subcode == SMIN || subcode == SMAX)
8603 *total = COSTS_N_INSNS (3);
8604 return true;
8607 return false;
8609 case MULT:
8610 /* This should have been handled by the CPU specific routines. */
8611 gcc_unreachable ();
8613 case TRUNCATE:
8614 if (arm_arch3m && mode == SImode
8615 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8616 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8617 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8618 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8619 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8620 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8622 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8623 return true;
8625 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8626 return false;
8628 case NEG:
8629 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8631 if (TARGET_HARD_FLOAT
8632 && (mode == SFmode
8633 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8635 *total = COSTS_N_INSNS (1);
8636 return false;
8638 *total = COSTS_N_INSNS (2);
8639 return false;
8642 /* Fall through */
8643 case NOT:
8644 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8645 if (mode == SImode && code == NOT)
8647 subcode = GET_CODE (XEXP (x, 0));
8648 if (subcode == ASHIFT || subcode == ASHIFTRT
8649 || subcode == LSHIFTRT
8650 || subcode == ROTATE || subcode == ROTATERT
8651 || (subcode == MULT
8652 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8654 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8655 /* Register shifts cost an extra cycle. */
8656 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8657 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8658 subcode, 1, speed);
8659 return true;
8663 return false;
8665 case IF_THEN_ELSE:
8666 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8668 *total = COSTS_N_INSNS (4);
8669 return true;
8672 operand = XEXP (x, 0);
8674 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8675 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8676 && REG_P (XEXP (operand, 0))
8677 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8678 *total += COSTS_N_INSNS (1);
8679 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8680 + rtx_cost (XEXP (x, 2), code, 2, speed));
8681 return true;
8683 case NE:
8684 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8686 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8687 return true;
8689 goto scc_insn;
8691 case GE:
8692 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8693 && mode == SImode && XEXP (x, 1) == const0_rtx)
8695 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8696 return true;
8698 goto scc_insn;
8700 case LT:
8701 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8702 && mode == SImode && XEXP (x, 1) == const0_rtx)
8704 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8705 return true;
8707 goto scc_insn;
8709 case EQ:
8710 case GT:
8711 case LE:
8712 case GEU:
8713 case LTU:
8714 case GTU:
8715 case LEU:
8716 case UNORDERED:
8717 case ORDERED:
8718 case UNEQ:
8719 case UNGE:
8720 case UNLT:
8721 case UNGT:
8722 case UNLE:
8723 scc_insn:
8724 /* SCC insns. In the case where the comparison has already been
8725 performed, then they cost 2 instructions. Otherwise they need
8726 an additional comparison before them. */
8727 *total = COSTS_N_INSNS (2);
8728 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8730 return true;
8733 /* Fall through */
8734 case COMPARE:
8735 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8737 *total = 0;
8738 return true;
8741 *total += COSTS_N_INSNS (1);
8742 if (CONST_INT_P (XEXP (x, 1))
8743 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8745 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8746 return true;
8749 subcode = GET_CODE (XEXP (x, 0));
8750 if (subcode == ASHIFT || subcode == ASHIFTRT
8751 || subcode == LSHIFTRT
8752 || subcode == ROTATE || subcode == ROTATERT)
8754 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8755 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8756 return true;
8759 if (subcode == MULT
8760 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8762 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8763 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8764 return true;
8767 return false;
8769 case UMIN:
8770 case UMAX:
8771 case SMIN:
8772 case SMAX:
8773 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8774 if (!CONST_INT_P (XEXP (x, 1))
8775 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8776 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8777 return true;
8779 case ABS:
8780 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8782 if (TARGET_HARD_FLOAT
8783 && (mode == SFmode
8784 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8786 *total = COSTS_N_INSNS (1);
8787 return false;
8789 *total = COSTS_N_INSNS (20);
8790 return false;
8792 *total = COSTS_N_INSNS (1);
8793 if (mode == DImode)
8794 *total += COSTS_N_INSNS (3);
8795 return false;
8797 case SIGN_EXTEND:
8798 case ZERO_EXTEND:
8799 *total = 0;
8800 if (GET_MODE_CLASS (mode) == MODE_INT)
8802 rtx op = XEXP (x, 0);
8803 machine_mode opmode = GET_MODE (op);
8805 if (mode == DImode)
8806 *total += COSTS_N_INSNS (1);
8808 if (opmode != SImode)
8810 if (MEM_P (op))
8812 /* If !arm_arch4, we use one of the extendhisi2_mem
8813 or movhi_bytes patterns for HImode. For a QImode
8814 sign extension, we first zero-extend from memory
8815 and then perform a shift sequence. */
8816 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8817 *total += COSTS_N_INSNS (2);
8819 else if (arm_arch6)
8820 *total += COSTS_N_INSNS (1);
8822 /* We don't have the necessary insn, so we need to perform some
8823 other operation. */
8824 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8825 /* An and with constant 255. */
8826 *total += COSTS_N_INSNS (1);
8827 else
8828 /* A shift sequence. Increase costs slightly to avoid
8829 combining two shifts into an extend operation. */
8830 *total += COSTS_N_INSNS (2) + 1;
8833 return false;
8836 switch (GET_MODE (XEXP (x, 0)))
8838 case V8QImode:
8839 case V4HImode:
8840 case V2SImode:
8841 case V4QImode:
8842 case V2HImode:
8843 *total = COSTS_N_INSNS (1);
8844 return false;
8846 default:
8847 gcc_unreachable ();
8849 gcc_unreachable ();
8851 case ZERO_EXTRACT:
8852 case SIGN_EXTRACT:
8853 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8854 return true;
8856 case CONST_INT:
8857 if (const_ok_for_arm (INTVAL (x))
8858 || const_ok_for_arm (~INTVAL (x)))
8859 *total = COSTS_N_INSNS (1);
8860 else
8861 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8862 INTVAL (x), NULL_RTX,
8863 NULL_RTX, 0, 0));
8864 return true;
8866 case CONST:
8867 case LABEL_REF:
8868 case SYMBOL_REF:
8869 *total = COSTS_N_INSNS (3);
8870 return true;
8872 case HIGH:
8873 *total = COSTS_N_INSNS (1);
8874 return true;
8876 case LO_SUM:
8877 *total = COSTS_N_INSNS (1);
8878 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8879 return true;
8881 case CONST_DOUBLE:
8882 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8883 && (mode == SFmode || !TARGET_VFP_SINGLE))
8884 *total = COSTS_N_INSNS (1);
8885 else
8886 *total = COSTS_N_INSNS (4);
8887 return true;
8889 case SET:
8890 /* The vec_extract patterns accept memory operands that require an
8891 address reload. Account for the cost of that reload to give the
8892 auto-inc-dec pass an incentive to try to replace them. */
8893 if (TARGET_NEON && MEM_P (SET_DEST (x))
8894 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8896 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8897 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8898 *total += COSTS_N_INSNS (1);
8899 return true;
8901 /* Likewise for the vec_set patterns. */
8902 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8903 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8904 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8906 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8907 *total = rtx_cost (mem, code, 0, speed);
8908 if (!neon_vector_mem_operand (mem, 2, true))
8909 *total += COSTS_N_INSNS (1);
8910 return true;
8912 return false;
8914 case UNSPEC:
8915 /* We cost this as high as our memory costs to allow this to
8916 be hoisted from loops. */
8917 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8919 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8921 return true;
8923 case CONST_VECTOR:
8924 if (TARGET_NEON
8925 && TARGET_HARD_FLOAT
8926 && outer == SET
8927 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8928 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8929 *total = COSTS_N_INSNS (1);
8930 else
8931 *total = COSTS_N_INSNS (4);
8932 return true;
8934 default:
8935 *total = COSTS_N_INSNS (4);
8936 return false;
8940 /* Estimates the size cost of thumb1 instructions.
8941 For now most of the code is copied from thumb1_rtx_costs. We need more
8942 fine grain tuning when we have more related test cases. */
8943 static inline int
8944 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8946 machine_mode mode = GET_MODE (x);
8947 int words;
8949 switch (code)
8951 case ASHIFT:
8952 case ASHIFTRT:
8953 case LSHIFTRT:
8954 case ROTATERT:
8955 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8957 case PLUS:
8958 case MINUS:
8959 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8960 defined by RTL expansion, especially for the expansion of
8961 multiplication. */
8962 if ((GET_CODE (XEXP (x, 0)) == MULT
8963 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8964 || (GET_CODE (XEXP (x, 1)) == MULT
8965 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8966 return COSTS_N_INSNS (2);
8967 /* On purpose fall through for normal RTX. */
8968 case COMPARE:
8969 case NEG:
8970 case NOT:
8971 return COSTS_N_INSNS (1);
8973 case MULT:
8974 if (CONST_INT_P (XEXP (x, 1)))
8976 /* Thumb1 mul instruction can't operate on const. We must Load it
8977 into a register first. */
8978 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8979 /* For the targets which have a very small and high-latency multiply
8980 unit, we prefer to synthesize the mult with up to 5 instructions,
8981 giving a good balance between size and performance. */
8982 if (arm_arch6m && arm_m_profile_small_mul)
8983 return COSTS_N_INSNS (5);
8984 else
8985 return COSTS_N_INSNS (1) + const_size;
8987 return COSTS_N_INSNS (1);
8989 case SET:
8990 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8991 the mode. */
8992 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8993 return COSTS_N_INSNS (words)
8994 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8995 || satisfies_constraint_K (SET_SRC (x))
8996 /* thumb1_movdi_insn. */
8997 || ((words > 1) && MEM_P (SET_SRC (x))));
8999 case CONST_INT:
9000 if (outer == SET)
9002 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9003 return COSTS_N_INSNS (1);
9004 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9005 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9006 return COSTS_N_INSNS (2);
9007 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9008 if (thumb_shiftable_const (INTVAL (x)))
9009 return COSTS_N_INSNS (2);
9010 return COSTS_N_INSNS (3);
9012 else if ((outer == PLUS || outer == COMPARE)
9013 && INTVAL (x) < 256 && INTVAL (x) > -256)
9014 return 0;
9015 else if ((outer == IOR || outer == XOR || outer == AND)
9016 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9017 return COSTS_N_INSNS (1);
9018 else if (outer == AND)
9020 int i;
9021 /* This duplicates the tests in the andsi3 expander. */
9022 for (i = 9; i <= 31; i++)
9023 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9024 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9025 return COSTS_N_INSNS (2);
9027 else if (outer == ASHIFT || outer == ASHIFTRT
9028 || outer == LSHIFTRT)
9029 return 0;
9030 return COSTS_N_INSNS (2);
9032 case CONST:
9033 case CONST_DOUBLE:
9034 case LABEL_REF:
9035 case SYMBOL_REF:
9036 return COSTS_N_INSNS (3);
9038 case UDIV:
9039 case UMOD:
9040 case DIV:
9041 case MOD:
9042 return 100;
9044 case TRUNCATE:
9045 return 99;
9047 case AND:
9048 case XOR:
9049 case IOR:
9050 return COSTS_N_INSNS (1);
9052 case MEM:
9053 return (COSTS_N_INSNS (1)
9054 + COSTS_N_INSNS (1)
9055 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9056 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9057 ? COSTS_N_INSNS (1) : 0));
9059 case IF_THEN_ELSE:
9060 /* XXX a guess. */
9061 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9062 return 14;
9063 return 2;
9065 case ZERO_EXTEND:
9066 /* XXX still guessing. */
9067 switch (GET_MODE (XEXP (x, 0)))
9069 case QImode:
9070 return (1 + (mode == DImode ? 4 : 0)
9071 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9073 case HImode:
9074 return (4 + (mode == DImode ? 4 : 0)
9075 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9077 case SImode:
9078 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9080 default:
9081 return 99;
9084 default:
9085 return 99;
9089 /* RTX costs when optimizing for size. */
9090 static bool
9091 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9092 int *total)
9094 machine_mode mode = GET_MODE (x);
9095 if (TARGET_THUMB1)
9097 *total = thumb1_size_rtx_costs (x, code, outer_code);
9098 return true;
9101 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9102 switch (code)
9104 case MEM:
9105 /* A memory access costs 1 insn if the mode is small, or the address is
9106 a single register, otherwise it costs one insn per word. */
9107 if (REG_P (XEXP (x, 0)))
9108 *total = COSTS_N_INSNS (1);
9109 else if (flag_pic
9110 && GET_CODE (XEXP (x, 0)) == PLUS
9111 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9112 /* This will be split into two instructions.
9113 See arm.md:calculate_pic_address. */
9114 *total = COSTS_N_INSNS (2);
9115 else
9116 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9117 return true;
9119 case DIV:
9120 case MOD:
9121 case UDIV:
9122 case UMOD:
9123 /* Needs a libcall, so it costs about this. */
9124 *total = COSTS_N_INSNS (2);
9125 return false;
9127 case ROTATE:
9128 if (mode == SImode && REG_P (XEXP (x, 1)))
9130 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9131 return true;
9133 /* Fall through */
9134 case ROTATERT:
9135 case ASHIFT:
9136 case LSHIFTRT:
9137 case ASHIFTRT:
9138 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9140 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9141 return true;
9143 else if (mode == SImode)
9145 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9146 /* Slightly disparage register shifts, but not by much. */
9147 if (!CONST_INT_P (XEXP (x, 1)))
9148 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9149 return true;
9152 /* Needs a libcall. */
9153 *total = COSTS_N_INSNS (2);
9154 return false;
9156 case MINUS:
9157 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9158 && (mode == SFmode || !TARGET_VFP_SINGLE))
9160 *total = COSTS_N_INSNS (1);
9161 return false;
9164 if (mode == SImode)
9166 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9167 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9169 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9170 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9171 || subcode1 == ROTATE || subcode1 == ROTATERT
9172 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9173 || subcode1 == ASHIFTRT)
9175 /* It's just the cost of the two operands. */
9176 *total = 0;
9177 return false;
9180 *total = COSTS_N_INSNS (1);
9181 return false;
9184 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9185 return false;
9187 case PLUS:
9188 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9189 && (mode == SFmode || !TARGET_VFP_SINGLE))
9191 *total = COSTS_N_INSNS (1);
9192 return false;
9195 /* A shift as a part of ADD costs nothing. */
9196 if (GET_CODE (XEXP (x, 0)) == MULT
9197 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9199 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9200 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9201 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9202 return true;
9205 /* Fall through */
9206 case AND: case XOR: case IOR:
9207 if (mode == SImode)
9209 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9211 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9212 || subcode == LSHIFTRT || subcode == ASHIFTRT
9213 || (code == AND && subcode == NOT))
9215 /* It's just the cost of the two operands. */
9216 *total = 0;
9217 return false;
9221 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9222 return false;
9224 case MULT:
9225 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9226 return false;
9228 case NEG:
9229 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9230 && (mode == SFmode || !TARGET_VFP_SINGLE))
9232 *total = COSTS_N_INSNS (1);
9233 return false;
9236 /* Fall through */
9237 case NOT:
9238 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9240 return false;
9242 case IF_THEN_ELSE:
9243 *total = 0;
9244 return false;
9246 case COMPARE:
9247 if (cc_register (XEXP (x, 0), VOIDmode))
9248 * total = 0;
9249 else
9250 *total = COSTS_N_INSNS (1);
9251 return false;
9253 case ABS:
9254 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9255 && (mode == SFmode || !TARGET_VFP_SINGLE))
9256 *total = COSTS_N_INSNS (1);
9257 else
9258 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9259 return false;
9261 case SIGN_EXTEND:
9262 case ZERO_EXTEND:
9263 return arm_rtx_costs_1 (x, outer_code, total, 0);
9265 case CONST_INT:
9266 if (const_ok_for_arm (INTVAL (x)))
9267 /* A multiplication by a constant requires another instruction
9268 to load the constant to a register. */
9269 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9270 ? 1 : 0);
9271 else if (const_ok_for_arm (~INTVAL (x)))
9272 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9273 else if (const_ok_for_arm (-INTVAL (x)))
9275 if (outer_code == COMPARE || outer_code == PLUS
9276 || outer_code == MINUS)
9277 *total = 0;
9278 else
9279 *total = COSTS_N_INSNS (1);
9281 else
9282 *total = COSTS_N_INSNS (2);
9283 return true;
9285 case CONST:
9286 case LABEL_REF:
9287 case SYMBOL_REF:
9288 *total = COSTS_N_INSNS (2);
9289 return true;
9291 case CONST_DOUBLE:
9292 *total = COSTS_N_INSNS (4);
9293 return true;
9295 case CONST_VECTOR:
9296 if (TARGET_NEON
9297 && TARGET_HARD_FLOAT
9298 && outer_code == SET
9299 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9300 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9301 *total = COSTS_N_INSNS (1);
9302 else
9303 *total = COSTS_N_INSNS (4);
9304 return true;
9306 case HIGH:
9307 case LO_SUM:
9308 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9309 cost of these slightly. */
9310 *total = COSTS_N_INSNS (1) + 1;
9311 return true;
9313 case SET:
9314 return false;
9316 default:
9317 if (mode != VOIDmode)
9318 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9319 else
9320 *total = COSTS_N_INSNS (4); /* How knows? */
9321 return false;
9325 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9326 operand, then return the operand that is being shifted. If the shift
9327 is not by a constant, then set SHIFT_REG to point to the operand.
9328 Return NULL if OP is not a shifter operand. */
9329 static rtx
9330 shifter_op_p (rtx op, rtx *shift_reg)
9332 enum rtx_code code = GET_CODE (op);
9334 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9335 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9336 return XEXP (op, 0);
9337 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9338 return XEXP (op, 0);
9339 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9340 || code == ASHIFTRT)
9342 if (!CONST_INT_P (XEXP (op, 1)))
9343 *shift_reg = XEXP (op, 1);
9344 return XEXP (op, 0);
9347 return NULL;
9350 static bool
9351 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9353 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9354 gcc_assert (GET_CODE (x) == UNSPEC);
9356 switch (XINT (x, 1))
9358 case UNSPEC_UNALIGNED_LOAD:
9359 /* We can only do unaligned loads into the integer unit, and we can't
9360 use LDM or LDRD. */
9361 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9362 if (speed_p)
9363 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9364 + extra_cost->ldst.load_unaligned);
9366 #ifdef NOT_YET
9367 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9368 ADDR_SPACE_GENERIC, speed_p);
9369 #endif
9370 return true;
9372 case UNSPEC_UNALIGNED_STORE:
9373 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9374 if (speed_p)
9375 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9376 + extra_cost->ldst.store_unaligned);
9378 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9379 #ifdef NOT_YET
9380 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9381 ADDR_SPACE_GENERIC, speed_p);
9382 #endif
9383 return true;
9385 case UNSPEC_VRINTZ:
9386 case UNSPEC_VRINTP:
9387 case UNSPEC_VRINTM:
9388 case UNSPEC_VRINTR:
9389 case UNSPEC_VRINTX:
9390 case UNSPEC_VRINTA:
9391 *cost = COSTS_N_INSNS (1);
9392 if (speed_p)
9393 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9395 return true;
9396 default:
9397 *cost = COSTS_N_INSNS (2);
9398 break;
9400 return false;
9403 /* Cost of a libcall. We assume one insn per argument, an amount for the
9404 call (one insn for -Os) and then one for processing the result. */
9405 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9407 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9408 do \
9410 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9411 if (shift_op != NULL \
9412 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9414 if (shift_reg) \
9416 if (speed_p) \
9417 *cost += extra_cost->alu.arith_shift_reg; \
9418 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9420 else if (speed_p) \
9421 *cost += extra_cost->alu.arith_shift; \
9423 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9424 + rtx_cost (XEXP (x, 1 - IDX), \
9425 OP, 1, speed_p)); \
9426 return true; \
9429 while (0);
9431 /* RTX costs. Make an estimate of the cost of executing the operation
9432 X, which is contained with an operation with code OUTER_CODE.
9433 SPEED_P indicates whether the cost desired is the performance cost,
9434 or the size cost. The estimate is stored in COST and the return
9435 value is TRUE if the cost calculation is final, or FALSE if the
9436 caller should recurse through the operands of X to add additional
9437 costs.
9439 We currently make no attempt to model the size savings of Thumb-2
9440 16-bit instructions. At the normal points in compilation where
9441 this code is called we have no measure of whether the condition
9442 flags are live or not, and thus no realistic way to determine what
9443 the size will eventually be. */
9444 static bool
9445 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9446 const struct cpu_cost_table *extra_cost,
9447 int *cost, bool speed_p)
9449 machine_mode mode = GET_MODE (x);
9451 if (TARGET_THUMB1)
9453 if (speed_p)
9454 *cost = thumb1_rtx_costs (x, code, outer_code);
9455 else
9456 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9457 return true;
9460 switch (code)
9462 case SET:
9463 *cost = 0;
9464 /* SET RTXs don't have a mode so we get it from the destination. */
9465 mode = GET_MODE (SET_DEST (x));
9467 if (REG_P (SET_SRC (x))
9468 && REG_P (SET_DEST (x)))
9470 /* Assume that most copies can be done with a single insn,
9471 unless we don't have HW FP, in which case everything
9472 larger than word mode will require two insns. */
9473 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9474 && GET_MODE_SIZE (mode) > 4)
9475 || mode == DImode)
9476 ? 2 : 1);
9477 /* Conditional register moves can be encoded
9478 in 16 bits in Thumb mode. */
9479 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9480 *cost >>= 1;
9482 return true;
9485 if (CONST_INT_P (SET_SRC (x)))
9487 /* Handle CONST_INT here, since the value doesn't have a mode
9488 and we would otherwise be unable to work out the true cost. */
9489 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9490 outer_code = SET;
9491 /* Slightly lower the cost of setting a core reg to a constant.
9492 This helps break up chains and allows for better scheduling. */
9493 if (REG_P (SET_DEST (x))
9494 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9495 *cost -= 1;
9496 x = SET_SRC (x);
9497 /* Immediate moves with an immediate in the range [0, 255] can be
9498 encoded in 16 bits in Thumb mode. */
9499 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9500 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9501 *cost >>= 1;
9502 goto const_int_cost;
9505 return false;
9507 case MEM:
9508 /* A memory access costs 1 insn if the mode is small, or the address is
9509 a single register, otherwise it costs one insn per word. */
9510 if (REG_P (XEXP (x, 0)))
9511 *cost = COSTS_N_INSNS (1);
9512 else if (flag_pic
9513 && GET_CODE (XEXP (x, 0)) == PLUS
9514 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9515 /* This will be split into two instructions.
9516 See arm.md:calculate_pic_address. */
9517 *cost = COSTS_N_INSNS (2);
9518 else
9519 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9521 /* For speed optimizations, add the costs of the address and
9522 accessing memory. */
9523 if (speed_p)
9524 #ifdef NOT_YET
9525 *cost += (extra_cost->ldst.load
9526 + arm_address_cost (XEXP (x, 0), mode,
9527 ADDR_SPACE_GENERIC, speed_p));
9528 #else
9529 *cost += extra_cost->ldst.load;
9530 #endif
9531 return true;
9533 case PARALLEL:
9535 /* Calculations of LDM costs are complex. We assume an initial cost
9536 (ldm_1st) which will load the number of registers mentioned in
9537 ldm_regs_per_insn_1st registers; then each additional
9538 ldm_regs_per_insn_subsequent registers cost one more insn. The
9539 formula for N regs is thus:
9541 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9542 + ldm_regs_per_insn_subsequent - 1)
9543 / ldm_regs_per_insn_subsequent).
9545 Additional costs may also be added for addressing. A similar
9546 formula is used for STM. */
9548 bool is_ldm = load_multiple_operation (x, SImode);
9549 bool is_stm = store_multiple_operation (x, SImode);
9551 *cost = COSTS_N_INSNS (1);
9553 if (is_ldm || is_stm)
9555 if (speed_p)
9557 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9558 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9559 ? extra_cost->ldst.ldm_regs_per_insn_1st
9560 : extra_cost->ldst.stm_regs_per_insn_1st;
9561 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9562 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9563 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9565 *cost += regs_per_insn_1st
9566 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9567 + regs_per_insn_sub - 1)
9568 / regs_per_insn_sub);
9569 return true;
9573 return false;
9575 case DIV:
9576 case UDIV:
9577 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9578 && (mode == SFmode || !TARGET_VFP_SINGLE))
9579 *cost = COSTS_N_INSNS (speed_p
9580 ? extra_cost->fp[mode != SFmode].div : 1);
9581 else if (mode == SImode && TARGET_IDIV)
9582 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9583 else
9584 *cost = LIBCALL_COST (2);
9585 return false; /* All arguments must be in registers. */
9587 case MOD:
9588 case UMOD:
9589 *cost = LIBCALL_COST (2);
9590 return false; /* All arguments must be in registers. */
9592 case ROTATE:
9593 if (mode == SImode && REG_P (XEXP (x, 1)))
9595 *cost = (COSTS_N_INSNS (2)
9596 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9597 if (speed_p)
9598 *cost += extra_cost->alu.shift_reg;
9599 return true;
9601 /* Fall through */
9602 case ROTATERT:
9603 case ASHIFT:
9604 case LSHIFTRT:
9605 case ASHIFTRT:
9606 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9608 *cost = (COSTS_N_INSNS (3)
9609 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9610 if (speed_p)
9611 *cost += 2 * extra_cost->alu.shift;
9612 return true;
9614 else if (mode == SImode)
9616 *cost = (COSTS_N_INSNS (1)
9617 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9618 /* Slightly disparage register shifts at -Os, but not by much. */
9619 if (!CONST_INT_P (XEXP (x, 1)))
9620 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9621 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9622 return true;
9624 else if (GET_MODE_CLASS (mode) == MODE_INT
9625 && GET_MODE_SIZE (mode) < 4)
9627 if (code == ASHIFT)
9629 *cost = (COSTS_N_INSNS (1)
9630 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9631 /* Slightly disparage register shifts at -Os, but not by
9632 much. */
9633 if (!CONST_INT_P (XEXP (x, 1)))
9634 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9635 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9637 else if (code == LSHIFTRT || code == ASHIFTRT)
9639 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9641 /* Can use SBFX/UBFX. */
9642 *cost = COSTS_N_INSNS (1);
9643 if (speed_p)
9644 *cost += extra_cost->alu.bfx;
9645 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9647 else
9649 *cost = COSTS_N_INSNS (2);
9650 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9651 if (speed_p)
9653 if (CONST_INT_P (XEXP (x, 1)))
9654 *cost += 2 * extra_cost->alu.shift;
9655 else
9656 *cost += (extra_cost->alu.shift
9657 + extra_cost->alu.shift_reg);
9659 else
9660 /* Slightly disparage register shifts. */
9661 *cost += !CONST_INT_P (XEXP (x, 1));
9664 else /* Rotates. */
9666 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9667 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9668 if (speed_p)
9670 if (CONST_INT_P (XEXP (x, 1)))
9671 *cost += (2 * extra_cost->alu.shift
9672 + extra_cost->alu.log_shift);
9673 else
9674 *cost += (extra_cost->alu.shift
9675 + extra_cost->alu.shift_reg
9676 + extra_cost->alu.log_shift_reg);
9679 return true;
9682 *cost = LIBCALL_COST (2);
9683 return false;
9685 case BSWAP:
9686 if (arm_arch6)
9688 if (mode == SImode)
9690 *cost = COSTS_N_INSNS (1);
9691 if (speed_p)
9692 *cost += extra_cost->alu.rev;
9694 return false;
9697 else
9699 /* No rev instruction available. Look at arm_legacy_rev
9700 and thumb_legacy_rev for the form of RTL used then. */
9701 if (TARGET_THUMB)
9703 *cost = COSTS_N_INSNS (10);
9705 if (speed_p)
9707 *cost += 6 * extra_cost->alu.shift;
9708 *cost += 3 * extra_cost->alu.logical;
9711 else
9713 *cost = COSTS_N_INSNS (5);
9715 if (speed_p)
9717 *cost += 2 * extra_cost->alu.shift;
9718 *cost += extra_cost->alu.arith_shift;
9719 *cost += 2 * extra_cost->alu.logical;
9722 return true;
9724 return false;
9726 case MINUS:
9727 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9728 && (mode == SFmode || !TARGET_VFP_SINGLE))
9730 *cost = COSTS_N_INSNS (1);
9731 if (GET_CODE (XEXP (x, 0)) == MULT
9732 || GET_CODE (XEXP (x, 1)) == MULT)
9734 rtx mul_op0, mul_op1, sub_op;
9736 if (speed_p)
9737 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9739 if (GET_CODE (XEXP (x, 0)) == MULT)
9741 mul_op0 = XEXP (XEXP (x, 0), 0);
9742 mul_op1 = XEXP (XEXP (x, 0), 1);
9743 sub_op = XEXP (x, 1);
9745 else
9747 mul_op0 = XEXP (XEXP (x, 1), 0);
9748 mul_op1 = XEXP (XEXP (x, 1), 1);
9749 sub_op = XEXP (x, 0);
9752 /* The first operand of the multiply may be optionally
9753 negated. */
9754 if (GET_CODE (mul_op0) == NEG)
9755 mul_op0 = XEXP (mul_op0, 0);
9757 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9758 + rtx_cost (mul_op1, code, 0, speed_p)
9759 + rtx_cost (sub_op, code, 0, speed_p));
9761 return true;
9764 if (speed_p)
9765 *cost += extra_cost->fp[mode != SFmode].addsub;
9766 return false;
9769 if (mode == SImode)
9771 rtx shift_by_reg = NULL;
9772 rtx shift_op;
9773 rtx non_shift_op;
9775 *cost = COSTS_N_INSNS (1);
9777 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9778 if (shift_op == NULL)
9780 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9781 non_shift_op = XEXP (x, 0);
9783 else
9784 non_shift_op = XEXP (x, 1);
9786 if (shift_op != NULL)
9788 if (shift_by_reg != NULL)
9790 if (speed_p)
9791 *cost += extra_cost->alu.arith_shift_reg;
9792 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9794 else if (speed_p)
9795 *cost += extra_cost->alu.arith_shift;
9797 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9798 + rtx_cost (non_shift_op, code, 0, speed_p));
9799 return true;
9802 if (arm_arch_thumb2
9803 && GET_CODE (XEXP (x, 1)) == MULT)
9805 /* MLS. */
9806 if (speed_p)
9807 *cost += extra_cost->mult[0].add;
9808 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9809 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9810 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9811 return true;
9814 if (CONST_INT_P (XEXP (x, 0)))
9816 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9817 INTVAL (XEXP (x, 0)), NULL_RTX,
9818 NULL_RTX, 1, 0);
9819 *cost = COSTS_N_INSNS (insns);
9820 if (speed_p)
9821 *cost += insns * extra_cost->alu.arith;
9822 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9823 return true;
9825 else if (speed_p)
9826 *cost += extra_cost->alu.arith;
9828 return false;
9831 if (GET_MODE_CLASS (mode) == MODE_INT
9832 && GET_MODE_SIZE (mode) < 4)
9834 rtx shift_op, shift_reg;
9835 shift_reg = NULL;
9837 /* We check both sides of the MINUS for shifter operands since,
9838 unlike PLUS, it's not commutative. */
9840 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9841 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9843 /* Slightly disparage, as we might need to widen the result. */
9844 *cost = 1 + COSTS_N_INSNS (1);
9845 if (speed_p)
9846 *cost += extra_cost->alu.arith;
9848 if (CONST_INT_P (XEXP (x, 0)))
9850 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9851 return true;
9854 return false;
9857 if (mode == DImode)
9859 *cost = COSTS_N_INSNS (2);
9861 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9863 rtx op1 = XEXP (x, 1);
9865 if (speed_p)
9866 *cost += 2 * extra_cost->alu.arith;
9868 if (GET_CODE (op1) == ZERO_EXTEND)
9869 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9870 else
9871 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9872 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9873 0, speed_p);
9874 return true;
9876 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9878 if (speed_p)
9879 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9880 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9881 0, speed_p)
9882 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9883 return true;
9885 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9886 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9888 if (speed_p)
9889 *cost += (extra_cost->alu.arith
9890 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9891 ? extra_cost->alu.arith
9892 : extra_cost->alu.arith_shift));
9893 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9894 + rtx_cost (XEXP (XEXP (x, 1), 0),
9895 GET_CODE (XEXP (x, 1)), 0, speed_p));
9896 return true;
9899 if (speed_p)
9900 *cost += 2 * extra_cost->alu.arith;
9901 return false;
9904 /* Vector mode? */
9906 *cost = LIBCALL_COST (2);
9907 return false;
9909 case PLUS:
9910 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9911 && (mode == SFmode || !TARGET_VFP_SINGLE))
9913 *cost = COSTS_N_INSNS (1);
9914 if (GET_CODE (XEXP (x, 0)) == MULT)
9916 rtx mul_op0, mul_op1, add_op;
9918 if (speed_p)
9919 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9921 mul_op0 = XEXP (XEXP (x, 0), 0);
9922 mul_op1 = XEXP (XEXP (x, 0), 1);
9923 add_op = XEXP (x, 1);
9925 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9926 + rtx_cost (mul_op1, code, 0, speed_p)
9927 + rtx_cost (add_op, code, 0, speed_p));
9929 return true;
9932 if (speed_p)
9933 *cost += extra_cost->fp[mode != SFmode].addsub;
9934 return false;
9936 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9938 *cost = LIBCALL_COST (2);
9939 return false;
9942 /* Narrow modes can be synthesized in SImode, but the range
9943 of useful sub-operations is limited. Check for shift operations
9944 on one of the operands. Only left shifts can be used in the
9945 narrow modes. */
9946 if (GET_MODE_CLASS (mode) == MODE_INT
9947 && GET_MODE_SIZE (mode) < 4)
9949 rtx shift_op, shift_reg;
9950 shift_reg = NULL;
9952 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9954 if (CONST_INT_P (XEXP (x, 1)))
9956 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9957 INTVAL (XEXP (x, 1)), NULL_RTX,
9958 NULL_RTX, 1, 0);
9959 *cost = COSTS_N_INSNS (insns);
9960 if (speed_p)
9961 *cost += insns * extra_cost->alu.arith;
9962 /* Slightly penalize a narrow operation as the result may
9963 need widening. */
9964 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9965 return true;
9968 /* Slightly penalize a narrow operation as the result may
9969 need widening. */
9970 *cost = 1 + COSTS_N_INSNS (1);
9971 if (speed_p)
9972 *cost += extra_cost->alu.arith;
9974 return false;
9977 if (mode == SImode)
9979 rtx shift_op, shift_reg;
9981 *cost = COSTS_N_INSNS (1);
9982 if (TARGET_INT_SIMD
9983 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9984 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9986 /* UXTA[BH] or SXTA[BH]. */
9987 if (speed_p)
9988 *cost += extra_cost->alu.extend_arith;
9989 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9990 speed_p)
9991 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9992 return true;
9995 shift_reg = NULL;
9996 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9997 if (shift_op != NULL)
9999 if (shift_reg)
10001 if (speed_p)
10002 *cost += extra_cost->alu.arith_shift_reg;
10003 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10005 else if (speed_p)
10006 *cost += extra_cost->alu.arith_shift;
10008 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10009 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10010 return true;
10012 if (GET_CODE (XEXP (x, 0)) == MULT)
10014 rtx mul_op = XEXP (x, 0);
10016 *cost = COSTS_N_INSNS (1);
10018 if (TARGET_DSP_MULTIPLY
10019 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10020 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10021 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10022 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10023 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10024 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10025 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10026 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10027 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10028 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10029 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10030 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10031 == 16))))))
10033 /* SMLA[BT][BT]. */
10034 if (speed_p)
10035 *cost += extra_cost->mult[0].extend_add;
10036 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10037 SIGN_EXTEND, 0, speed_p)
10038 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10039 SIGN_EXTEND, 0, speed_p)
10040 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10041 return true;
10044 if (speed_p)
10045 *cost += extra_cost->mult[0].add;
10046 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10047 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10048 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10049 return true;
10051 if (CONST_INT_P (XEXP (x, 1)))
10053 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10054 INTVAL (XEXP (x, 1)), NULL_RTX,
10055 NULL_RTX, 1, 0);
10056 *cost = COSTS_N_INSNS (insns);
10057 if (speed_p)
10058 *cost += insns * extra_cost->alu.arith;
10059 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10060 return true;
10062 else if (speed_p)
10063 *cost += extra_cost->alu.arith;
10065 return false;
10068 if (mode == DImode)
10070 if (arm_arch3m
10071 && GET_CODE (XEXP (x, 0)) == MULT
10072 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10073 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10074 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10075 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10077 *cost = COSTS_N_INSNS (1);
10078 if (speed_p)
10079 *cost += extra_cost->mult[1].extend_add;
10080 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10081 ZERO_EXTEND, 0, speed_p)
10082 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10083 ZERO_EXTEND, 0, speed_p)
10084 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10085 return true;
10088 *cost = COSTS_N_INSNS (2);
10090 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10091 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10093 if (speed_p)
10094 *cost += (extra_cost->alu.arith
10095 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10096 ? extra_cost->alu.arith
10097 : extra_cost->alu.arith_shift));
10099 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10100 speed_p)
10101 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10102 return true;
10105 if (speed_p)
10106 *cost += 2 * extra_cost->alu.arith;
10107 return false;
10110 /* Vector mode? */
10111 *cost = LIBCALL_COST (2);
10112 return false;
10113 case IOR:
10114 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10116 *cost = COSTS_N_INSNS (1);
10117 if (speed_p)
10118 *cost += extra_cost->alu.rev;
10120 return true;
10122 /* Fall through. */
10123 case AND: case XOR:
10124 if (mode == SImode)
10126 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10127 rtx op0 = XEXP (x, 0);
10128 rtx shift_op, shift_reg;
10130 *cost = COSTS_N_INSNS (1);
10132 if (subcode == NOT
10133 && (code == AND
10134 || (code == IOR && TARGET_THUMB2)))
10135 op0 = XEXP (op0, 0);
10137 shift_reg = NULL;
10138 shift_op = shifter_op_p (op0, &shift_reg);
10139 if (shift_op != NULL)
10141 if (shift_reg)
10143 if (speed_p)
10144 *cost += extra_cost->alu.log_shift_reg;
10145 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10147 else if (speed_p)
10148 *cost += extra_cost->alu.log_shift;
10150 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10151 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10152 return true;
10155 if (CONST_INT_P (XEXP (x, 1)))
10157 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10158 INTVAL (XEXP (x, 1)), NULL_RTX,
10159 NULL_RTX, 1, 0);
10161 *cost = COSTS_N_INSNS (insns);
10162 if (speed_p)
10163 *cost += insns * extra_cost->alu.logical;
10164 *cost += rtx_cost (op0, code, 0, speed_p);
10165 return true;
10168 if (speed_p)
10169 *cost += extra_cost->alu.logical;
10170 *cost += (rtx_cost (op0, code, 0, speed_p)
10171 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10172 return true;
10175 if (mode == DImode)
10177 rtx op0 = XEXP (x, 0);
10178 enum rtx_code subcode = GET_CODE (op0);
10180 *cost = COSTS_N_INSNS (2);
10182 if (subcode == NOT
10183 && (code == AND
10184 || (code == IOR && TARGET_THUMB2)))
10185 op0 = XEXP (op0, 0);
10187 if (GET_CODE (op0) == ZERO_EXTEND)
10189 if (speed_p)
10190 *cost += 2 * extra_cost->alu.logical;
10192 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10193 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10194 return true;
10196 else if (GET_CODE (op0) == SIGN_EXTEND)
10198 if (speed_p)
10199 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10201 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10202 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10203 return true;
10206 if (speed_p)
10207 *cost += 2 * extra_cost->alu.logical;
10209 return true;
10211 /* Vector mode? */
10213 *cost = LIBCALL_COST (2);
10214 return false;
10216 case MULT:
10217 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10218 && (mode == SFmode || !TARGET_VFP_SINGLE))
10220 rtx op0 = XEXP (x, 0);
10222 *cost = COSTS_N_INSNS (1);
10224 if (GET_CODE (op0) == NEG)
10225 op0 = XEXP (op0, 0);
10227 if (speed_p)
10228 *cost += extra_cost->fp[mode != SFmode].mult;
10230 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10231 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10232 return true;
10234 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10236 *cost = LIBCALL_COST (2);
10237 return false;
10240 if (mode == SImode)
10242 *cost = COSTS_N_INSNS (1);
10243 if (TARGET_DSP_MULTIPLY
10244 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10245 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10246 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10247 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10248 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10249 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10250 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10251 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10252 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10253 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10254 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10255 && (INTVAL (XEXP (XEXP (x, 1), 1))
10256 == 16))))))
10258 /* SMUL[TB][TB]. */
10259 if (speed_p)
10260 *cost += extra_cost->mult[0].extend;
10261 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10262 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10263 return true;
10265 if (speed_p)
10266 *cost += extra_cost->mult[0].simple;
10267 return false;
10270 if (mode == DImode)
10272 if (arm_arch3m
10273 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10274 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10275 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10276 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10278 *cost = COSTS_N_INSNS (1);
10279 if (speed_p)
10280 *cost += extra_cost->mult[1].extend;
10281 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10282 ZERO_EXTEND, 0, speed_p)
10283 + rtx_cost (XEXP (XEXP (x, 1), 0),
10284 ZERO_EXTEND, 0, speed_p));
10285 return true;
10288 *cost = LIBCALL_COST (2);
10289 return false;
10292 /* Vector mode? */
10293 *cost = LIBCALL_COST (2);
10294 return false;
10296 case NEG:
10297 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10298 && (mode == SFmode || !TARGET_VFP_SINGLE))
10300 *cost = COSTS_N_INSNS (1);
10301 if (speed_p)
10302 *cost += extra_cost->fp[mode != SFmode].neg;
10304 return false;
10306 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10308 *cost = LIBCALL_COST (1);
10309 return false;
10312 if (mode == SImode)
10314 if (GET_CODE (XEXP (x, 0)) == ABS)
10316 *cost = COSTS_N_INSNS (2);
10317 /* Assume the non-flag-changing variant. */
10318 if (speed_p)
10319 *cost += (extra_cost->alu.log_shift
10320 + extra_cost->alu.arith_shift);
10321 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10322 return true;
10325 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10326 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10328 *cost = COSTS_N_INSNS (2);
10329 /* No extra cost for MOV imm and MVN imm. */
10330 /* If the comparison op is using the flags, there's no further
10331 cost, otherwise we need to add the cost of the comparison. */
10332 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10333 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10334 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10336 *cost += (COSTS_N_INSNS (1)
10337 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10338 speed_p)
10339 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10340 speed_p));
10341 if (speed_p)
10342 *cost += extra_cost->alu.arith;
10344 return true;
10346 *cost = COSTS_N_INSNS (1);
10347 if (speed_p)
10348 *cost += extra_cost->alu.arith;
10349 return false;
10352 if (GET_MODE_CLASS (mode) == MODE_INT
10353 && GET_MODE_SIZE (mode) < 4)
10355 /* Slightly disparage, as we might need an extend operation. */
10356 *cost = 1 + COSTS_N_INSNS (1);
10357 if (speed_p)
10358 *cost += extra_cost->alu.arith;
10359 return false;
10362 if (mode == DImode)
10364 *cost = COSTS_N_INSNS (2);
10365 if (speed_p)
10366 *cost += 2 * extra_cost->alu.arith;
10367 return false;
10370 /* Vector mode? */
10371 *cost = LIBCALL_COST (1);
10372 return false;
10374 case NOT:
10375 if (mode == SImode)
10377 rtx shift_op;
10378 rtx shift_reg = NULL;
10380 *cost = COSTS_N_INSNS (1);
10381 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10383 if (shift_op)
10385 if (shift_reg != NULL)
10387 if (speed_p)
10388 *cost += extra_cost->alu.log_shift_reg;
10389 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10391 else if (speed_p)
10392 *cost += extra_cost->alu.log_shift;
10393 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10394 return true;
10397 if (speed_p)
10398 *cost += extra_cost->alu.logical;
10399 return false;
10401 if (mode == DImode)
10403 *cost = COSTS_N_INSNS (2);
10404 return false;
10407 /* Vector mode? */
10409 *cost += LIBCALL_COST (1);
10410 return false;
10412 case IF_THEN_ELSE:
10414 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10416 *cost = COSTS_N_INSNS (4);
10417 return true;
10419 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10420 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10422 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10423 /* Assume that if one arm of the if_then_else is a register,
10424 that it will be tied with the result and eliminate the
10425 conditional insn. */
10426 if (REG_P (XEXP (x, 1)))
10427 *cost += op2cost;
10428 else if (REG_P (XEXP (x, 2)))
10429 *cost += op1cost;
10430 else
10432 if (speed_p)
10434 if (extra_cost->alu.non_exec_costs_exec)
10435 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10436 else
10437 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10439 else
10440 *cost += op1cost + op2cost;
10443 return true;
10445 case COMPARE:
10446 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10447 *cost = 0;
10448 else
10450 machine_mode op0mode;
10451 /* We'll mostly assume that the cost of a compare is the cost of the
10452 LHS. However, there are some notable exceptions. */
10454 /* Floating point compares are never done as side-effects. */
10455 op0mode = GET_MODE (XEXP (x, 0));
10456 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10457 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10459 *cost = COSTS_N_INSNS (1);
10460 if (speed_p)
10461 *cost += extra_cost->fp[op0mode != SFmode].compare;
10463 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10465 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10466 return true;
10469 return false;
10471 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10473 *cost = LIBCALL_COST (2);
10474 return false;
10477 /* DImode compares normally take two insns. */
10478 if (op0mode == DImode)
10480 *cost = COSTS_N_INSNS (2);
10481 if (speed_p)
10482 *cost += 2 * extra_cost->alu.arith;
10483 return false;
10486 if (op0mode == SImode)
10488 rtx shift_op;
10489 rtx shift_reg;
10491 if (XEXP (x, 1) == const0_rtx
10492 && !(REG_P (XEXP (x, 0))
10493 || (GET_CODE (XEXP (x, 0)) == SUBREG
10494 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10496 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10498 /* Multiply operations that set the flags are often
10499 significantly more expensive. */
10500 if (speed_p
10501 && GET_CODE (XEXP (x, 0)) == MULT
10502 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10503 *cost += extra_cost->mult[0].flag_setting;
10505 if (speed_p
10506 && GET_CODE (XEXP (x, 0)) == PLUS
10507 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10508 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10509 0), 1), mode))
10510 *cost += extra_cost->mult[0].flag_setting;
10511 return true;
10514 shift_reg = NULL;
10515 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10516 if (shift_op != NULL)
10518 *cost = COSTS_N_INSNS (1);
10519 if (shift_reg != NULL)
10521 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10522 if (speed_p)
10523 *cost += extra_cost->alu.arith_shift_reg;
10525 else if (speed_p)
10526 *cost += extra_cost->alu.arith_shift;
10527 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10528 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10529 return true;
10532 *cost = COSTS_N_INSNS (1);
10533 if (speed_p)
10534 *cost += extra_cost->alu.arith;
10535 if (CONST_INT_P (XEXP (x, 1))
10536 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10538 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10539 return true;
10541 return false;
10544 /* Vector mode? */
10546 *cost = LIBCALL_COST (2);
10547 return false;
10549 return true;
10551 case EQ:
10552 case NE:
10553 case LT:
10554 case LE:
10555 case GT:
10556 case GE:
10557 case LTU:
10558 case LEU:
10559 case GEU:
10560 case GTU:
10561 case ORDERED:
10562 case UNORDERED:
10563 case UNEQ:
10564 case UNLE:
10565 case UNLT:
10566 case UNGE:
10567 case UNGT:
10568 case LTGT:
10569 if (outer_code == SET)
10571 /* Is it a store-flag operation? */
10572 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10573 && XEXP (x, 1) == const0_rtx)
10575 /* Thumb also needs an IT insn. */
10576 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10577 return true;
10579 if (XEXP (x, 1) == const0_rtx)
10581 switch (code)
10583 case LT:
10584 /* LSR Rd, Rn, #31. */
10585 *cost = COSTS_N_INSNS (1);
10586 if (speed_p)
10587 *cost += extra_cost->alu.shift;
10588 break;
10590 case EQ:
10591 /* RSBS T1, Rn, #0
10592 ADC Rd, Rn, T1. */
10594 case NE:
10595 /* SUBS T1, Rn, #1
10596 SBC Rd, Rn, T1. */
10597 *cost = COSTS_N_INSNS (2);
10598 break;
10600 case LE:
10601 /* RSBS T1, Rn, Rn, LSR #31
10602 ADC Rd, Rn, T1. */
10603 *cost = COSTS_N_INSNS (2);
10604 if (speed_p)
10605 *cost += extra_cost->alu.arith_shift;
10606 break;
10608 case GT:
10609 /* RSB Rd, Rn, Rn, ASR #1
10610 LSR Rd, Rd, #31. */
10611 *cost = COSTS_N_INSNS (2);
10612 if (speed_p)
10613 *cost += (extra_cost->alu.arith_shift
10614 + extra_cost->alu.shift);
10615 break;
10617 case GE:
10618 /* ASR Rd, Rn, #31
10619 ADD Rd, Rn, #1. */
10620 *cost = COSTS_N_INSNS (2);
10621 if (speed_p)
10622 *cost += extra_cost->alu.shift;
10623 break;
10625 default:
10626 /* Remaining cases are either meaningless or would take
10627 three insns anyway. */
10628 *cost = COSTS_N_INSNS (3);
10629 break;
10631 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10632 return true;
10634 else
10636 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10637 if (CONST_INT_P (XEXP (x, 1))
10638 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10640 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10641 return true;
10644 return false;
10647 /* Not directly inside a set. If it involves the condition code
10648 register it must be the condition for a branch, cond_exec or
10649 I_T_E operation. Since the comparison is performed elsewhere
10650 this is just the control part which has no additional
10651 cost. */
10652 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10653 && XEXP (x, 1) == const0_rtx)
10655 *cost = 0;
10656 return true;
10658 return false;
10660 case ABS:
10661 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10662 && (mode == SFmode || !TARGET_VFP_SINGLE))
10664 *cost = COSTS_N_INSNS (1);
10665 if (speed_p)
10666 *cost += extra_cost->fp[mode != SFmode].neg;
10668 return false;
10670 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10672 *cost = LIBCALL_COST (1);
10673 return false;
10676 if (mode == SImode)
10678 *cost = COSTS_N_INSNS (1);
10679 if (speed_p)
10680 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10681 return false;
10683 /* Vector mode? */
10684 *cost = LIBCALL_COST (1);
10685 return false;
10687 case SIGN_EXTEND:
10688 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10689 && MEM_P (XEXP (x, 0)))
10691 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10693 if (mode == DImode)
10694 *cost += COSTS_N_INSNS (1);
10696 if (!speed_p)
10697 return true;
10699 if (GET_MODE (XEXP (x, 0)) == SImode)
10700 *cost += extra_cost->ldst.load;
10701 else
10702 *cost += extra_cost->ldst.load_sign_extend;
10704 if (mode == DImode)
10705 *cost += extra_cost->alu.shift;
10707 return true;
10710 /* Widening from less than 32-bits requires an extend operation. */
10711 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10713 /* We have SXTB/SXTH. */
10714 *cost = COSTS_N_INSNS (1);
10715 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10716 if (speed_p)
10717 *cost += extra_cost->alu.extend;
10719 else if (GET_MODE (XEXP (x, 0)) != SImode)
10721 /* Needs two shifts. */
10722 *cost = COSTS_N_INSNS (2);
10723 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10724 if (speed_p)
10725 *cost += 2 * extra_cost->alu.shift;
10728 /* Widening beyond 32-bits requires one more insn. */
10729 if (mode == DImode)
10731 *cost += COSTS_N_INSNS (1);
10732 if (speed_p)
10733 *cost += extra_cost->alu.shift;
10736 return true;
10738 case ZERO_EXTEND:
10739 if ((arm_arch4
10740 || GET_MODE (XEXP (x, 0)) == SImode
10741 || GET_MODE (XEXP (x, 0)) == QImode)
10742 && MEM_P (XEXP (x, 0)))
10744 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10746 if (mode == DImode)
10747 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10749 return true;
10752 /* Widening from less than 32-bits requires an extend operation. */
10753 if (GET_MODE (XEXP (x, 0)) == QImode)
10755 /* UXTB can be a shorter instruction in Thumb2, but it might
10756 be slower than the AND Rd, Rn, #255 alternative. When
10757 optimizing for speed it should never be slower to use
10758 AND, and we don't really model 16-bit vs 32-bit insns
10759 here. */
10760 *cost = COSTS_N_INSNS (1);
10761 if (speed_p)
10762 *cost += extra_cost->alu.logical;
10764 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10766 /* We have UXTB/UXTH. */
10767 *cost = COSTS_N_INSNS (1);
10768 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10769 if (speed_p)
10770 *cost += extra_cost->alu.extend;
10772 else if (GET_MODE (XEXP (x, 0)) != SImode)
10774 /* Needs two shifts. It's marginally preferable to use
10775 shifts rather than two BIC instructions as the second
10776 shift may merge with a subsequent insn as a shifter
10777 op. */
10778 *cost = COSTS_N_INSNS (2);
10779 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10780 if (speed_p)
10781 *cost += 2 * extra_cost->alu.shift;
10783 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10784 *cost = COSTS_N_INSNS (1);
10786 /* Widening beyond 32-bits requires one more insn. */
10787 if (mode == DImode)
10789 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10792 return true;
10794 case CONST_INT:
10795 *cost = 0;
10796 /* CONST_INT has no mode, so we cannot tell for sure how many
10797 insns are really going to be needed. The best we can do is
10798 look at the value passed. If it fits in SImode, then assume
10799 that's the mode it will be used for. Otherwise assume it
10800 will be used in DImode. */
10801 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10802 mode = SImode;
10803 else
10804 mode = DImode;
10806 /* Avoid blowing up in arm_gen_constant (). */
10807 if (!(outer_code == PLUS
10808 || outer_code == AND
10809 || outer_code == IOR
10810 || outer_code == XOR
10811 || outer_code == MINUS))
10812 outer_code = SET;
10814 const_int_cost:
10815 if (mode == SImode)
10817 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10818 INTVAL (x), NULL, NULL,
10819 0, 0));
10820 /* Extra costs? */
10822 else
10824 *cost += COSTS_N_INSNS (arm_gen_constant
10825 (outer_code, SImode, NULL,
10826 trunc_int_for_mode (INTVAL (x), SImode),
10827 NULL, NULL, 0, 0)
10828 + arm_gen_constant (outer_code, SImode, NULL,
10829 INTVAL (x) >> 32, NULL,
10830 NULL, 0, 0));
10831 /* Extra costs? */
10834 return true;
10836 case CONST:
10837 case LABEL_REF:
10838 case SYMBOL_REF:
10839 if (speed_p)
10841 if (arm_arch_thumb2 && !flag_pic)
10842 *cost = COSTS_N_INSNS (2);
10843 else
10844 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10846 else
10847 *cost = COSTS_N_INSNS (2);
10849 if (flag_pic)
10851 *cost += COSTS_N_INSNS (1);
10852 if (speed_p)
10853 *cost += extra_cost->alu.arith;
10856 return true;
10858 case CONST_FIXED:
10859 *cost = COSTS_N_INSNS (4);
10860 /* Fixme. */
10861 return true;
10863 case CONST_DOUBLE:
10864 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10865 && (mode == SFmode || !TARGET_VFP_SINGLE))
10867 if (vfp3_const_double_rtx (x))
10869 *cost = COSTS_N_INSNS (1);
10870 if (speed_p)
10871 *cost += extra_cost->fp[mode == DFmode].fpconst;
10872 return true;
10875 if (speed_p)
10877 *cost = COSTS_N_INSNS (1);
10878 if (mode == DFmode)
10879 *cost += extra_cost->ldst.loadd;
10880 else
10881 *cost += extra_cost->ldst.loadf;
10883 else
10884 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10886 return true;
10888 *cost = COSTS_N_INSNS (4);
10889 return true;
10891 case CONST_VECTOR:
10892 /* Fixme. */
10893 if (TARGET_NEON
10894 && TARGET_HARD_FLOAT
10895 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10896 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10897 *cost = COSTS_N_INSNS (1);
10898 else
10899 *cost = COSTS_N_INSNS (4);
10900 return true;
10902 case HIGH:
10903 case LO_SUM:
10904 *cost = COSTS_N_INSNS (1);
10905 /* When optimizing for size, we prefer constant pool entries to
10906 MOVW/MOVT pairs, so bump the cost of these slightly. */
10907 if (!speed_p)
10908 *cost += 1;
10909 return true;
10911 case CLZ:
10912 *cost = COSTS_N_INSNS (1);
10913 if (speed_p)
10914 *cost += extra_cost->alu.clz;
10915 return false;
10917 case SMIN:
10918 if (XEXP (x, 1) == const0_rtx)
10920 *cost = COSTS_N_INSNS (1);
10921 if (speed_p)
10922 *cost += extra_cost->alu.log_shift;
10923 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10924 return true;
10926 /* Fall through. */
10927 case SMAX:
10928 case UMIN:
10929 case UMAX:
10930 *cost = COSTS_N_INSNS (2);
10931 return false;
10933 case TRUNCATE:
10934 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10935 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10936 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10937 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10938 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10939 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10940 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10941 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10942 == ZERO_EXTEND))))
10944 *cost = COSTS_N_INSNS (1);
10945 if (speed_p)
10946 *cost += extra_cost->mult[1].extend;
10947 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10948 speed_p)
10949 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10950 0, speed_p));
10951 return true;
10953 *cost = LIBCALL_COST (1);
10954 return false;
10956 case UNSPEC:
10957 return arm_unspec_cost (x, outer_code, speed_p, cost);
10959 case PC:
10960 /* Reading the PC is like reading any other register. Writing it
10961 is more expensive, but we take that into account elsewhere. */
10962 *cost = 0;
10963 return true;
10965 case ZERO_EXTRACT:
10966 /* TODO: Simple zero_extract of bottom bits using AND. */
10967 /* Fall through. */
10968 case SIGN_EXTRACT:
10969 if (arm_arch6
10970 && mode == SImode
10971 && CONST_INT_P (XEXP (x, 1))
10972 && CONST_INT_P (XEXP (x, 2)))
10974 *cost = COSTS_N_INSNS (1);
10975 if (speed_p)
10976 *cost += extra_cost->alu.bfx;
10977 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10978 return true;
10980 /* Without UBFX/SBFX, need to resort to shift operations. */
10981 *cost = COSTS_N_INSNS (2);
10982 if (speed_p)
10983 *cost += 2 * extra_cost->alu.shift;
10984 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10985 return true;
10987 case FLOAT_EXTEND:
10988 if (TARGET_HARD_FLOAT)
10990 *cost = COSTS_N_INSNS (1);
10991 if (speed_p)
10992 *cost += extra_cost->fp[mode == DFmode].widen;
10993 if (!TARGET_FPU_ARMV8
10994 && GET_MODE (XEXP (x, 0)) == HFmode)
10996 /* Pre v8, widening HF->DF is a two-step process, first
10997 widening to SFmode. */
10998 *cost += COSTS_N_INSNS (1);
10999 if (speed_p)
11000 *cost += extra_cost->fp[0].widen;
11002 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11003 return true;
11006 *cost = LIBCALL_COST (1);
11007 return false;
11009 case FLOAT_TRUNCATE:
11010 if (TARGET_HARD_FLOAT)
11012 *cost = COSTS_N_INSNS (1);
11013 if (speed_p)
11014 *cost += extra_cost->fp[mode == DFmode].narrow;
11015 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11016 return true;
11017 /* Vector modes? */
11019 *cost = LIBCALL_COST (1);
11020 return false;
11022 case FMA:
11023 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11025 rtx op0 = XEXP (x, 0);
11026 rtx op1 = XEXP (x, 1);
11027 rtx op2 = XEXP (x, 2);
11029 *cost = COSTS_N_INSNS (1);
11031 /* vfms or vfnma. */
11032 if (GET_CODE (op0) == NEG)
11033 op0 = XEXP (op0, 0);
11035 /* vfnms or vfnma. */
11036 if (GET_CODE (op2) == NEG)
11037 op2 = XEXP (op2, 0);
11039 *cost += rtx_cost (op0, FMA, 0, speed_p);
11040 *cost += rtx_cost (op1, FMA, 1, speed_p);
11041 *cost += rtx_cost (op2, FMA, 2, speed_p);
11043 if (speed_p)
11044 *cost += extra_cost->fp[mode ==DFmode].fma;
11046 return true;
11049 *cost = LIBCALL_COST (3);
11050 return false;
11052 case FIX:
11053 case UNSIGNED_FIX:
11054 if (TARGET_HARD_FLOAT)
11056 if (GET_MODE_CLASS (mode) == MODE_INT)
11058 *cost = COSTS_N_INSNS (1);
11059 if (speed_p)
11060 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11061 /* Strip of the 'cost' of rounding towards zero. */
11062 if (GET_CODE (XEXP (x, 0)) == FIX)
11063 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11064 else
11065 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11066 /* ??? Increase the cost to deal with transferring from
11067 FP -> CORE registers? */
11068 return true;
11070 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11071 && TARGET_FPU_ARMV8)
11073 *cost = COSTS_N_INSNS (1);
11074 if (speed_p)
11075 *cost += extra_cost->fp[mode == DFmode].roundint;
11076 return false;
11078 /* Vector costs? */
11080 *cost = LIBCALL_COST (1);
11081 return false;
11083 case FLOAT:
11084 case UNSIGNED_FLOAT:
11085 if (TARGET_HARD_FLOAT)
11087 /* ??? Increase the cost to deal with transferring from CORE
11088 -> FP registers? */
11089 *cost = COSTS_N_INSNS (1);
11090 if (speed_p)
11091 *cost += extra_cost->fp[mode == DFmode].fromint;
11092 return false;
11094 *cost = LIBCALL_COST (1);
11095 return false;
11097 case CALL:
11098 *cost = COSTS_N_INSNS (1);
11099 return true;
11101 case ASM_OPERANDS:
11103 /* Just a guess. Guess number of instructions in the asm
11104 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11105 though (see PR60663). */
11106 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11107 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11109 *cost = COSTS_N_INSNS (asm_length + num_operands);
11110 return true;
11112 default:
11113 if (mode != VOIDmode)
11114 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11115 else
11116 *cost = COSTS_N_INSNS (4); /* Who knows? */
11117 return false;
11121 #undef HANDLE_NARROW_SHIFT_ARITH
11123 /* RTX costs when optimizing for size. */
11124 static bool
11125 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11126 int *total, bool speed)
11128 bool result;
11130 if (TARGET_OLD_RTX_COSTS
11131 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11133 /* Old way. (Deprecated.) */
11134 if (!speed)
11135 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11136 (enum rtx_code) outer_code, total);
11137 else
11138 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11139 (enum rtx_code) outer_code, total,
11140 speed);
11142 else
11144 /* New way. */
11145 if (current_tune->insn_extra_cost)
11146 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11147 (enum rtx_code) outer_code,
11148 current_tune->insn_extra_cost,
11149 total, speed);
11150 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11151 && current_tune->insn_extra_cost != NULL */
11152 else
11153 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11154 (enum rtx_code) outer_code,
11155 &generic_extra_costs, total, speed);
11158 if (dump_file && (dump_flags & TDF_DETAILS))
11160 print_rtl_single (dump_file, x);
11161 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11162 *total, result ? "final" : "partial");
11164 return result;
11167 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11168 supported on any "slowmul" cores, so it can be ignored. */
11170 static bool
11171 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11172 int *total, bool speed)
11174 machine_mode mode = GET_MODE (x);
11176 if (TARGET_THUMB)
11178 *total = thumb1_rtx_costs (x, code, outer_code);
11179 return true;
11182 switch (code)
11184 case MULT:
11185 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11186 || mode == DImode)
11188 *total = COSTS_N_INSNS (20);
11189 return false;
11192 if (CONST_INT_P (XEXP (x, 1)))
11194 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11195 & (unsigned HOST_WIDE_INT) 0xffffffff);
11196 int cost, const_ok = const_ok_for_arm (i);
11197 int j, booth_unit_size;
11199 /* Tune as appropriate. */
11200 cost = const_ok ? 4 : 8;
11201 booth_unit_size = 2;
11202 for (j = 0; i && j < 32; j += booth_unit_size)
11204 i >>= booth_unit_size;
11205 cost++;
11208 *total = COSTS_N_INSNS (cost);
11209 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11210 return true;
11213 *total = COSTS_N_INSNS (20);
11214 return false;
11216 default:
11217 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11222 /* RTX cost for cores with a fast multiply unit (M variants). */
11224 static bool
11225 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11226 int *total, bool speed)
11228 machine_mode mode = GET_MODE (x);
11230 if (TARGET_THUMB1)
11232 *total = thumb1_rtx_costs (x, code, outer_code);
11233 return true;
11236 /* ??? should thumb2 use different costs? */
11237 switch (code)
11239 case MULT:
11240 /* There is no point basing this on the tuning, since it is always the
11241 fast variant if it exists at all. */
11242 if (mode == DImode
11243 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11244 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11245 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11247 *total = COSTS_N_INSNS(2);
11248 return false;
11252 if (mode == DImode)
11254 *total = COSTS_N_INSNS (5);
11255 return false;
11258 if (CONST_INT_P (XEXP (x, 1)))
11260 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11261 & (unsigned HOST_WIDE_INT) 0xffffffff);
11262 int cost, const_ok = const_ok_for_arm (i);
11263 int j, booth_unit_size;
11265 /* Tune as appropriate. */
11266 cost = const_ok ? 4 : 8;
11267 booth_unit_size = 8;
11268 for (j = 0; i && j < 32; j += booth_unit_size)
11270 i >>= booth_unit_size;
11271 cost++;
11274 *total = COSTS_N_INSNS(cost);
11275 return false;
11278 if (mode == SImode)
11280 *total = COSTS_N_INSNS (4);
11281 return false;
11284 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11286 if (TARGET_HARD_FLOAT
11287 && (mode == SFmode
11288 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11290 *total = COSTS_N_INSNS (1);
11291 return false;
11295 /* Requires a lib call */
11296 *total = COSTS_N_INSNS (20);
11297 return false;
11299 default:
11300 return arm_rtx_costs_1 (x, outer_code, total, speed);
11305 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11306 so it can be ignored. */
11308 static bool
11309 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11310 int *total, bool speed)
11312 machine_mode mode = GET_MODE (x);
11314 if (TARGET_THUMB)
11316 *total = thumb1_rtx_costs (x, code, outer_code);
11317 return true;
11320 switch (code)
11322 case COMPARE:
11323 if (GET_CODE (XEXP (x, 0)) != MULT)
11324 return arm_rtx_costs_1 (x, outer_code, total, speed);
11326 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11327 will stall until the multiplication is complete. */
11328 *total = COSTS_N_INSNS (3);
11329 return false;
11331 case MULT:
11332 /* There is no point basing this on the tuning, since it is always the
11333 fast variant if it exists at all. */
11334 if (mode == DImode
11335 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11336 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11337 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11339 *total = COSTS_N_INSNS (2);
11340 return false;
11344 if (mode == DImode)
11346 *total = COSTS_N_INSNS (5);
11347 return false;
11350 if (CONST_INT_P (XEXP (x, 1)))
11352 /* If operand 1 is a constant we can more accurately
11353 calculate the cost of the multiply. The multiplier can
11354 retire 15 bits on the first cycle and a further 12 on the
11355 second. We do, of course, have to load the constant into
11356 a register first. */
11357 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11358 /* There's a general overhead of one cycle. */
11359 int cost = 1;
11360 unsigned HOST_WIDE_INT masked_const;
11362 if (i & 0x80000000)
11363 i = ~i;
11365 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11367 masked_const = i & 0xffff8000;
11368 if (masked_const != 0)
11370 cost++;
11371 masked_const = i & 0xf8000000;
11372 if (masked_const != 0)
11373 cost++;
11375 *total = COSTS_N_INSNS (cost);
11376 return false;
11379 if (mode == SImode)
11381 *total = COSTS_N_INSNS (3);
11382 return false;
11385 /* Requires a lib call */
11386 *total = COSTS_N_INSNS (20);
11387 return false;
11389 default:
11390 return arm_rtx_costs_1 (x, outer_code, total, speed);
11395 /* RTX costs for 9e (and later) cores. */
11397 static bool
11398 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11399 int *total, bool speed)
11401 machine_mode mode = GET_MODE (x);
11403 if (TARGET_THUMB1)
11405 switch (code)
11407 case MULT:
11408 /* Small multiply: 32 cycles for an integer multiply inst. */
11409 if (arm_arch6m && arm_m_profile_small_mul)
11410 *total = COSTS_N_INSNS (32);
11411 else
11412 *total = COSTS_N_INSNS (3);
11413 return true;
11415 default:
11416 *total = thumb1_rtx_costs (x, code, outer_code);
11417 return true;
11421 switch (code)
11423 case MULT:
11424 /* There is no point basing this on the tuning, since it is always the
11425 fast variant if it exists at all. */
11426 if (mode == DImode
11427 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11428 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11429 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11431 *total = COSTS_N_INSNS (2);
11432 return false;
11436 if (mode == DImode)
11438 *total = COSTS_N_INSNS (5);
11439 return false;
11442 if (mode == SImode)
11444 *total = COSTS_N_INSNS (2);
11445 return false;
11448 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11450 if (TARGET_HARD_FLOAT
11451 && (mode == SFmode
11452 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11454 *total = COSTS_N_INSNS (1);
11455 return false;
11459 *total = COSTS_N_INSNS (20);
11460 return false;
11462 default:
11463 return arm_rtx_costs_1 (x, outer_code, total, speed);
11466 /* All address computations that can be done are free, but rtx cost returns
11467 the same for practically all of them. So we weight the different types
11468 of address here in the order (most pref first):
11469 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11470 static inline int
11471 arm_arm_address_cost (rtx x)
11473 enum rtx_code c = GET_CODE (x);
11475 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11476 return 0;
11477 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11478 return 10;
11480 if (c == PLUS)
11482 if (CONST_INT_P (XEXP (x, 1)))
11483 return 2;
11485 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11486 return 3;
11488 return 4;
11491 return 6;
11494 static inline int
11495 arm_thumb_address_cost (rtx x)
11497 enum rtx_code c = GET_CODE (x);
11499 if (c == REG)
11500 return 1;
11501 if (c == PLUS
11502 && REG_P (XEXP (x, 0))
11503 && CONST_INT_P (XEXP (x, 1)))
11504 return 1;
11506 return 2;
11509 static int
11510 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11511 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11513 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11516 /* Adjust cost hook for XScale. */
11517 static bool
11518 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11520 /* Some true dependencies can have a higher cost depending
11521 on precisely how certain input operands are used. */
11522 if (REG_NOTE_KIND(link) == 0
11523 && recog_memoized (insn) >= 0
11524 && recog_memoized (dep) >= 0)
11526 int shift_opnum = get_attr_shift (insn);
11527 enum attr_type attr_type = get_attr_type (dep);
11529 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11530 operand for INSN. If we have a shifted input operand and the
11531 instruction we depend on is another ALU instruction, then we may
11532 have to account for an additional stall. */
11533 if (shift_opnum != 0
11534 && (attr_type == TYPE_ALU_SHIFT_IMM
11535 || attr_type == TYPE_ALUS_SHIFT_IMM
11536 || attr_type == TYPE_LOGIC_SHIFT_IMM
11537 || attr_type == TYPE_LOGICS_SHIFT_IMM
11538 || attr_type == TYPE_ALU_SHIFT_REG
11539 || attr_type == TYPE_ALUS_SHIFT_REG
11540 || attr_type == TYPE_LOGIC_SHIFT_REG
11541 || attr_type == TYPE_LOGICS_SHIFT_REG
11542 || attr_type == TYPE_MOV_SHIFT
11543 || attr_type == TYPE_MVN_SHIFT
11544 || attr_type == TYPE_MOV_SHIFT_REG
11545 || attr_type == TYPE_MVN_SHIFT_REG))
11547 rtx shifted_operand;
11548 int opno;
11550 /* Get the shifted operand. */
11551 extract_insn (insn);
11552 shifted_operand = recog_data.operand[shift_opnum];
11554 /* Iterate over all the operands in DEP. If we write an operand
11555 that overlaps with SHIFTED_OPERAND, then we have increase the
11556 cost of this dependency. */
11557 extract_insn (dep);
11558 preprocess_constraints (dep);
11559 for (opno = 0; opno < recog_data.n_operands; opno++)
11561 /* We can ignore strict inputs. */
11562 if (recog_data.operand_type[opno] == OP_IN)
11563 continue;
11565 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11566 shifted_operand))
11568 *cost = 2;
11569 return false;
11574 return true;
11577 /* Adjust cost hook for Cortex A9. */
11578 static bool
11579 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11581 switch (REG_NOTE_KIND (link))
11583 case REG_DEP_ANTI:
11584 *cost = 0;
11585 return false;
11587 case REG_DEP_TRUE:
11588 case REG_DEP_OUTPUT:
11589 if (recog_memoized (insn) >= 0
11590 && recog_memoized (dep) >= 0)
11592 if (GET_CODE (PATTERN (insn)) == SET)
11594 if (GET_MODE_CLASS
11595 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11596 || GET_MODE_CLASS
11597 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11599 enum attr_type attr_type_insn = get_attr_type (insn);
11600 enum attr_type attr_type_dep = get_attr_type (dep);
11602 /* By default all dependencies of the form
11603 s0 = s0 <op> s1
11604 s0 = s0 <op> s2
11605 have an extra latency of 1 cycle because
11606 of the input and output dependency in this
11607 case. However this gets modeled as an true
11608 dependency and hence all these checks. */
11609 if (REG_P (SET_DEST (PATTERN (insn)))
11610 && REG_P (SET_DEST (PATTERN (dep)))
11611 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11612 SET_DEST (PATTERN (dep))))
11614 /* FMACS is a special case where the dependent
11615 instruction can be issued 3 cycles before
11616 the normal latency in case of an output
11617 dependency. */
11618 if ((attr_type_insn == TYPE_FMACS
11619 || attr_type_insn == TYPE_FMACD)
11620 && (attr_type_dep == TYPE_FMACS
11621 || attr_type_dep == TYPE_FMACD))
11623 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11624 *cost = insn_default_latency (dep) - 3;
11625 else
11626 *cost = insn_default_latency (dep);
11627 return false;
11629 else
11631 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11632 *cost = insn_default_latency (dep) + 1;
11633 else
11634 *cost = insn_default_latency (dep);
11636 return false;
11641 break;
11643 default:
11644 gcc_unreachable ();
11647 return true;
11650 /* Adjust cost hook for FA726TE. */
11651 static bool
11652 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11654 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11655 have penalty of 3. */
11656 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11657 && recog_memoized (insn) >= 0
11658 && recog_memoized (dep) >= 0
11659 && get_attr_conds (dep) == CONDS_SET)
11661 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11662 if (get_attr_conds (insn) == CONDS_USE
11663 && get_attr_type (insn) != TYPE_BRANCH)
11665 *cost = 3;
11666 return false;
11669 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11670 || get_attr_conds (insn) == CONDS_USE)
11672 *cost = 0;
11673 return false;
11677 return true;
11680 /* Implement TARGET_REGISTER_MOVE_COST.
11682 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11683 it is typically more expensive than a single memory access. We set
11684 the cost to less than two memory accesses so that floating
11685 point to integer conversion does not go through memory. */
11688 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11689 reg_class_t from, reg_class_t to)
11691 if (TARGET_32BIT)
11693 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11694 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11695 return 15;
11696 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11697 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11698 return 4;
11699 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11700 return 20;
11701 else
11702 return 2;
11704 else
11706 if (from == HI_REGS || to == HI_REGS)
11707 return 4;
11708 else
11709 return 2;
11713 /* Implement TARGET_MEMORY_MOVE_COST. */
11716 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11717 bool in ATTRIBUTE_UNUSED)
11719 if (TARGET_32BIT)
11720 return 10;
11721 else
11723 if (GET_MODE_SIZE (mode) < 4)
11724 return 8;
11725 else
11726 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11730 /* Vectorizer cost model implementation. */
11732 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11733 static int
11734 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11735 tree vectype,
11736 int misalign ATTRIBUTE_UNUSED)
11738 unsigned elements;
11740 switch (type_of_cost)
11742 case scalar_stmt:
11743 return current_tune->vec_costs->scalar_stmt_cost;
11745 case scalar_load:
11746 return current_tune->vec_costs->scalar_load_cost;
11748 case scalar_store:
11749 return current_tune->vec_costs->scalar_store_cost;
11751 case vector_stmt:
11752 return current_tune->vec_costs->vec_stmt_cost;
11754 case vector_load:
11755 return current_tune->vec_costs->vec_align_load_cost;
11757 case vector_store:
11758 return current_tune->vec_costs->vec_store_cost;
11760 case vec_to_scalar:
11761 return current_tune->vec_costs->vec_to_scalar_cost;
11763 case scalar_to_vec:
11764 return current_tune->vec_costs->scalar_to_vec_cost;
11766 case unaligned_load:
11767 return current_tune->vec_costs->vec_unalign_load_cost;
11769 case unaligned_store:
11770 return current_tune->vec_costs->vec_unalign_store_cost;
11772 case cond_branch_taken:
11773 return current_tune->vec_costs->cond_taken_branch_cost;
11775 case cond_branch_not_taken:
11776 return current_tune->vec_costs->cond_not_taken_branch_cost;
11778 case vec_perm:
11779 case vec_promote_demote:
11780 return current_tune->vec_costs->vec_stmt_cost;
11782 case vec_construct:
11783 elements = TYPE_VECTOR_SUBPARTS (vectype);
11784 return elements / 2 + 1;
11786 default:
11787 gcc_unreachable ();
11791 /* Implement targetm.vectorize.add_stmt_cost. */
11793 static unsigned
11794 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11795 struct _stmt_vec_info *stmt_info, int misalign,
11796 enum vect_cost_model_location where)
11798 unsigned *cost = (unsigned *) data;
11799 unsigned retval = 0;
11801 if (flag_vect_cost_model)
11803 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11804 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11806 /* Statements in an inner loop relative to the loop being
11807 vectorized are weighted more heavily. The value here is
11808 arbitrary and could potentially be improved with analysis. */
11809 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11810 count *= 50; /* FIXME. */
11812 retval = (unsigned) (count * stmt_cost);
11813 cost[where] += retval;
11816 return retval;
11819 /* Return true if and only if this insn can dual-issue only as older. */
11820 static bool
11821 cortexa7_older_only (rtx_insn *insn)
11823 if (recog_memoized (insn) < 0)
11824 return false;
11826 switch (get_attr_type (insn))
11828 case TYPE_ALU_DSP_REG:
11829 case TYPE_ALU_SREG:
11830 case TYPE_ALUS_SREG:
11831 case TYPE_LOGIC_REG:
11832 case TYPE_LOGICS_REG:
11833 case TYPE_ADC_REG:
11834 case TYPE_ADCS_REG:
11835 case TYPE_ADR:
11836 case TYPE_BFM:
11837 case TYPE_REV:
11838 case TYPE_MVN_REG:
11839 case TYPE_SHIFT_IMM:
11840 case TYPE_SHIFT_REG:
11841 case TYPE_LOAD_BYTE:
11842 case TYPE_LOAD1:
11843 case TYPE_STORE1:
11844 case TYPE_FFARITHS:
11845 case TYPE_FADDS:
11846 case TYPE_FFARITHD:
11847 case TYPE_FADDD:
11848 case TYPE_FMOV:
11849 case TYPE_F_CVT:
11850 case TYPE_FCMPS:
11851 case TYPE_FCMPD:
11852 case TYPE_FCONSTS:
11853 case TYPE_FCONSTD:
11854 case TYPE_FMULS:
11855 case TYPE_FMACS:
11856 case TYPE_FMULD:
11857 case TYPE_FMACD:
11858 case TYPE_FDIVS:
11859 case TYPE_FDIVD:
11860 case TYPE_F_MRC:
11861 case TYPE_F_MRRC:
11862 case TYPE_F_FLAG:
11863 case TYPE_F_LOADS:
11864 case TYPE_F_STORES:
11865 return true;
11866 default:
11867 return false;
11871 /* Return true if and only if this insn can dual-issue as younger. */
11872 static bool
11873 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11875 if (recog_memoized (insn) < 0)
11877 if (verbose > 5)
11878 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11879 return false;
11882 switch (get_attr_type (insn))
11884 case TYPE_ALU_IMM:
11885 case TYPE_ALUS_IMM:
11886 case TYPE_LOGIC_IMM:
11887 case TYPE_LOGICS_IMM:
11888 case TYPE_EXTEND:
11889 case TYPE_MVN_IMM:
11890 case TYPE_MOV_IMM:
11891 case TYPE_MOV_REG:
11892 case TYPE_MOV_SHIFT:
11893 case TYPE_MOV_SHIFT_REG:
11894 case TYPE_BRANCH:
11895 case TYPE_CALL:
11896 return true;
11897 default:
11898 return false;
11903 /* Look for an instruction that can dual issue only as an older
11904 instruction, and move it in front of any instructions that can
11905 dual-issue as younger, while preserving the relative order of all
11906 other instructions in the ready list. This is a hueuristic to help
11907 dual-issue in later cycles, by postponing issue of more flexible
11908 instructions. This heuristic may affect dual issue opportunities
11909 in the current cycle. */
11910 static void
11911 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11912 int *n_readyp, int clock)
11914 int i;
11915 int first_older_only = -1, first_younger = -1;
11917 if (verbose > 5)
11918 fprintf (file,
11919 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11920 clock,
11921 *n_readyp);
11923 /* Traverse the ready list from the head (the instruction to issue
11924 first), and looking for the first instruction that can issue as
11925 younger and the first instruction that can dual-issue only as
11926 older. */
11927 for (i = *n_readyp - 1; i >= 0; i--)
11929 rtx_insn *insn = ready[i];
11930 if (cortexa7_older_only (insn))
11932 first_older_only = i;
11933 if (verbose > 5)
11934 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11935 break;
11937 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11938 first_younger = i;
11941 /* Nothing to reorder because either no younger insn found or insn
11942 that can dual-issue only as older appears before any insn that
11943 can dual-issue as younger. */
11944 if (first_younger == -1)
11946 if (verbose > 5)
11947 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11948 return;
11951 /* Nothing to reorder because no older-only insn in the ready list. */
11952 if (first_older_only == -1)
11954 if (verbose > 5)
11955 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11956 return;
11959 /* Move first_older_only insn before first_younger. */
11960 if (verbose > 5)
11961 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11962 INSN_UID(ready [first_older_only]),
11963 INSN_UID(ready [first_younger]));
11964 rtx_insn *first_older_only_insn = ready [first_older_only];
11965 for (i = first_older_only; i < first_younger; i++)
11967 ready[i] = ready[i+1];
11970 ready[i] = first_older_only_insn;
11971 return;
11974 /* Implement TARGET_SCHED_REORDER. */
11975 static int
11976 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11977 int clock)
11979 switch (arm_tune)
11981 case cortexa7:
11982 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11983 break;
11984 default:
11985 /* Do nothing for other cores. */
11986 break;
11989 return arm_issue_rate ();
11992 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11993 It corrects the value of COST based on the relationship between
11994 INSN and DEP through the dependence LINK. It returns the new
11995 value. There is a per-core adjust_cost hook to adjust scheduler costs
11996 and the per-core hook can choose to completely override the generic
11997 adjust_cost function. Only put bits of code into arm_adjust_cost that
11998 are common across all cores. */
11999 static int
12000 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12002 rtx i_pat, d_pat;
12004 /* When generating Thumb-1 code, we want to place flag-setting operations
12005 close to a conditional branch which depends on them, so that we can
12006 omit the comparison. */
12007 if (TARGET_THUMB1
12008 && REG_NOTE_KIND (link) == 0
12009 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12010 && recog_memoized (dep) >= 0
12011 && get_attr_conds (dep) == CONDS_SET)
12012 return 0;
12014 if (current_tune->sched_adjust_cost != NULL)
12016 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12017 return cost;
12020 /* XXX Is this strictly true? */
12021 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12022 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12023 return 0;
12025 /* Call insns don't incur a stall, even if they follow a load. */
12026 if (REG_NOTE_KIND (link) == 0
12027 && CALL_P (insn))
12028 return 1;
12030 if ((i_pat = single_set (insn)) != NULL
12031 && MEM_P (SET_SRC (i_pat))
12032 && (d_pat = single_set (dep)) != NULL
12033 && MEM_P (SET_DEST (d_pat)))
12035 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12036 /* This is a load after a store, there is no conflict if the load reads
12037 from a cached area. Assume that loads from the stack, and from the
12038 constant pool are cached, and that others will miss. This is a
12039 hack. */
12041 if ((GET_CODE (src_mem) == SYMBOL_REF
12042 && CONSTANT_POOL_ADDRESS_P (src_mem))
12043 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12044 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12045 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12046 return 1;
12049 return cost;
12053 arm_max_conditional_execute (void)
12055 return max_insns_skipped;
12058 static int
12059 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12061 if (TARGET_32BIT)
12062 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12063 else
12064 return (optimize > 0) ? 2 : 0;
12067 static int
12068 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12070 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12073 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12074 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12075 sequences of non-executed instructions in IT blocks probably take the same
12076 amount of time as executed instructions (and the IT instruction itself takes
12077 space in icache). This function was experimentally determined to give good
12078 results on a popular embedded benchmark. */
12080 static int
12081 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12083 return (TARGET_32BIT && speed_p) ? 1
12084 : arm_default_branch_cost (speed_p, predictable_p);
12087 static bool fp_consts_inited = false;
12089 static REAL_VALUE_TYPE value_fp0;
12091 static void
12092 init_fp_table (void)
12094 REAL_VALUE_TYPE r;
12096 r = REAL_VALUE_ATOF ("0", DFmode);
12097 value_fp0 = r;
12098 fp_consts_inited = true;
12101 /* Return TRUE if rtx X is a valid immediate FP constant. */
12103 arm_const_double_rtx (rtx x)
12105 REAL_VALUE_TYPE r;
12107 if (!fp_consts_inited)
12108 init_fp_table ();
12110 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12111 if (REAL_VALUE_MINUS_ZERO (r))
12112 return 0;
12114 if (REAL_VALUES_EQUAL (r, value_fp0))
12115 return 1;
12117 return 0;
12120 /* VFPv3 has a fairly wide range of representable immediates, formed from
12121 "quarter-precision" floating-point values. These can be evaluated using this
12122 formula (with ^ for exponentiation):
12124 -1^s * n * 2^-r
12126 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12127 16 <= n <= 31 and 0 <= r <= 7.
12129 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12131 - A (most-significant) is the sign bit.
12132 - BCD are the exponent (encoded as r XOR 3).
12133 - EFGH are the mantissa (encoded as n - 16).
12136 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12137 fconst[sd] instruction, or -1 if X isn't suitable. */
12138 static int
12139 vfp3_const_double_index (rtx x)
12141 REAL_VALUE_TYPE r, m;
12142 int sign, exponent;
12143 unsigned HOST_WIDE_INT mantissa, mant_hi;
12144 unsigned HOST_WIDE_INT mask;
12145 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12146 bool fail;
12148 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12149 return -1;
12151 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12153 /* We can't represent these things, so detect them first. */
12154 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12155 return -1;
12157 /* Extract sign, exponent and mantissa. */
12158 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12159 r = real_value_abs (&r);
12160 exponent = REAL_EXP (&r);
12161 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12162 highest (sign) bit, with a fixed binary point at bit point_pos.
12163 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12164 bits for the mantissa, this may fail (low bits would be lost). */
12165 real_ldexp (&m, &r, point_pos - exponent);
12166 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12167 mantissa = w.elt (0);
12168 mant_hi = w.elt (1);
12170 /* If there are bits set in the low part of the mantissa, we can't
12171 represent this value. */
12172 if (mantissa != 0)
12173 return -1;
12175 /* Now make it so that mantissa contains the most-significant bits, and move
12176 the point_pos to indicate that the least-significant bits have been
12177 discarded. */
12178 point_pos -= HOST_BITS_PER_WIDE_INT;
12179 mantissa = mant_hi;
12181 /* We can permit four significant bits of mantissa only, plus a high bit
12182 which is always 1. */
12183 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12184 if ((mantissa & mask) != 0)
12185 return -1;
12187 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12188 mantissa >>= point_pos - 5;
12190 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12191 floating-point immediate zero with Neon using an integer-zero load, but
12192 that case is handled elsewhere.) */
12193 if (mantissa == 0)
12194 return -1;
12196 gcc_assert (mantissa >= 16 && mantissa <= 31);
12198 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12199 normalized significands are in the range [1, 2). (Our mantissa is shifted
12200 left 4 places at this point relative to normalized IEEE754 values). GCC
12201 internally uses [0.5, 1) (see real.c), so the exponent returned from
12202 REAL_EXP must be altered. */
12203 exponent = 5 - exponent;
12205 if (exponent < 0 || exponent > 7)
12206 return -1;
12208 /* Sign, mantissa and exponent are now in the correct form to plug into the
12209 formula described in the comment above. */
12210 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12213 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12215 vfp3_const_double_rtx (rtx x)
12217 if (!TARGET_VFP3)
12218 return 0;
12220 return vfp3_const_double_index (x) != -1;
12223 /* Recognize immediates which can be used in various Neon instructions. Legal
12224 immediates are described by the following table (for VMVN variants, the
12225 bitwise inverse of the constant shown is recognized. In either case, VMOV
12226 is output and the correct instruction to use for a given constant is chosen
12227 by the assembler). The constant shown is replicated across all elements of
12228 the destination vector.
12230 insn elems variant constant (binary)
12231 ---- ----- ------- -----------------
12232 vmov i32 0 00000000 00000000 00000000 abcdefgh
12233 vmov i32 1 00000000 00000000 abcdefgh 00000000
12234 vmov i32 2 00000000 abcdefgh 00000000 00000000
12235 vmov i32 3 abcdefgh 00000000 00000000 00000000
12236 vmov i16 4 00000000 abcdefgh
12237 vmov i16 5 abcdefgh 00000000
12238 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12239 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12240 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12241 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12242 vmvn i16 10 00000000 abcdefgh
12243 vmvn i16 11 abcdefgh 00000000
12244 vmov i32 12 00000000 00000000 abcdefgh 11111111
12245 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12246 vmov i32 14 00000000 abcdefgh 11111111 11111111
12247 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12248 vmov i8 16 abcdefgh
12249 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12250 eeeeeeee ffffffff gggggggg hhhhhhhh
12251 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12252 vmov f32 19 00000000 00000000 00000000 00000000
12254 For case 18, B = !b. Representable values are exactly those accepted by
12255 vfp3_const_double_index, but are output as floating-point numbers rather
12256 than indices.
12258 For case 19, we will change it to vmov.i32 when assembling.
12260 Variants 0-5 (inclusive) may also be used as immediates for the second
12261 operand of VORR/VBIC instructions.
12263 The INVERSE argument causes the bitwise inverse of the given operand to be
12264 recognized instead (used for recognizing legal immediates for the VAND/VORN
12265 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12266 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12267 output, rather than the real insns vbic/vorr).
12269 INVERSE makes no difference to the recognition of float vectors.
12271 The return value is the variant of immediate as shown in the above table, or
12272 -1 if the given value doesn't match any of the listed patterns.
12274 static int
12275 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12276 rtx *modconst, int *elementwidth)
12278 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12279 matches = 1; \
12280 for (i = 0; i < idx; i += (STRIDE)) \
12281 if (!(TEST)) \
12282 matches = 0; \
12283 if (matches) \
12285 immtype = (CLASS); \
12286 elsize = (ELSIZE); \
12287 break; \
12290 unsigned int i, elsize = 0, idx = 0, n_elts;
12291 unsigned int innersize;
12292 unsigned char bytes[16];
12293 int immtype = -1, matches;
12294 unsigned int invmask = inverse ? 0xff : 0;
12295 bool vector = GET_CODE (op) == CONST_VECTOR;
12297 if (vector)
12299 n_elts = CONST_VECTOR_NUNITS (op);
12300 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12302 else
12304 n_elts = 1;
12305 if (mode == VOIDmode)
12306 mode = DImode;
12307 innersize = GET_MODE_SIZE (mode);
12310 /* Vectors of float constants. */
12311 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12313 rtx el0 = CONST_VECTOR_ELT (op, 0);
12314 REAL_VALUE_TYPE r0;
12316 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12317 return -1;
12319 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12321 for (i = 1; i < n_elts; i++)
12323 rtx elt = CONST_VECTOR_ELT (op, i);
12324 REAL_VALUE_TYPE re;
12326 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12328 if (!REAL_VALUES_EQUAL (r0, re))
12329 return -1;
12332 if (modconst)
12333 *modconst = CONST_VECTOR_ELT (op, 0);
12335 if (elementwidth)
12336 *elementwidth = 0;
12338 if (el0 == CONST0_RTX (GET_MODE (el0)))
12339 return 19;
12340 else
12341 return 18;
12344 /* Splat vector constant out into a byte vector. */
12345 for (i = 0; i < n_elts; i++)
12347 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12348 unsigned HOST_WIDE_INT elpart;
12349 unsigned int part, parts;
12351 if (CONST_INT_P (el))
12353 elpart = INTVAL (el);
12354 parts = 1;
12356 else if (CONST_DOUBLE_P (el))
12358 elpart = CONST_DOUBLE_LOW (el);
12359 parts = 2;
12361 else
12362 gcc_unreachable ();
12364 for (part = 0; part < parts; part++)
12366 unsigned int byte;
12367 for (byte = 0; byte < innersize; byte++)
12369 bytes[idx++] = (elpart & 0xff) ^ invmask;
12370 elpart >>= BITS_PER_UNIT;
12372 if (CONST_DOUBLE_P (el))
12373 elpart = CONST_DOUBLE_HIGH (el);
12377 /* Sanity check. */
12378 gcc_assert (idx == GET_MODE_SIZE (mode));
12382 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12383 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12385 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12386 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12388 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12389 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12391 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12392 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12394 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12396 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12398 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12399 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12401 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12402 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12404 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12405 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12407 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12408 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12410 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12412 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12414 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12415 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12417 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12418 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12420 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12421 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12423 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12424 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12426 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12428 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12429 && bytes[i] == bytes[(i + 8) % idx]);
12431 while (0);
12433 if (immtype == -1)
12434 return -1;
12436 if (elementwidth)
12437 *elementwidth = elsize;
12439 if (modconst)
12441 unsigned HOST_WIDE_INT imm = 0;
12443 /* Un-invert bytes of recognized vector, if necessary. */
12444 if (invmask != 0)
12445 for (i = 0; i < idx; i++)
12446 bytes[i] ^= invmask;
12448 if (immtype == 17)
12450 /* FIXME: Broken on 32-bit H_W_I hosts. */
12451 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12453 for (i = 0; i < 8; i++)
12454 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12455 << (i * BITS_PER_UNIT);
12457 *modconst = GEN_INT (imm);
12459 else
12461 unsigned HOST_WIDE_INT imm = 0;
12463 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12464 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12466 *modconst = GEN_INT (imm);
12470 return immtype;
12471 #undef CHECK
12474 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12475 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12476 float elements), and a modified constant (whatever should be output for a
12477 VMOV) in *MODCONST. */
12480 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12481 rtx *modconst, int *elementwidth)
12483 rtx tmpconst;
12484 int tmpwidth;
12485 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12487 if (retval == -1)
12488 return 0;
12490 if (modconst)
12491 *modconst = tmpconst;
12493 if (elementwidth)
12494 *elementwidth = tmpwidth;
12496 return 1;
12499 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12500 the immediate is valid, write a constant suitable for using as an operand
12501 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12502 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12505 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12506 rtx *modconst, int *elementwidth)
12508 rtx tmpconst;
12509 int tmpwidth;
12510 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12512 if (retval < 0 || retval > 5)
12513 return 0;
12515 if (modconst)
12516 *modconst = tmpconst;
12518 if (elementwidth)
12519 *elementwidth = tmpwidth;
12521 return 1;
12524 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12525 the immediate is valid, write a constant suitable for using as an operand
12526 to VSHR/VSHL to *MODCONST and the corresponding element width to
12527 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12528 because they have different limitations. */
12531 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12532 rtx *modconst, int *elementwidth,
12533 bool isleftshift)
12535 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12536 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12537 unsigned HOST_WIDE_INT last_elt = 0;
12538 unsigned HOST_WIDE_INT maxshift;
12540 /* Split vector constant out into a byte vector. */
12541 for (i = 0; i < n_elts; i++)
12543 rtx el = CONST_VECTOR_ELT (op, i);
12544 unsigned HOST_WIDE_INT elpart;
12546 if (CONST_INT_P (el))
12547 elpart = INTVAL (el);
12548 else if (CONST_DOUBLE_P (el))
12549 return 0;
12550 else
12551 gcc_unreachable ();
12553 if (i != 0 && elpart != last_elt)
12554 return 0;
12556 last_elt = elpart;
12559 /* Shift less than element size. */
12560 maxshift = innersize * 8;
12562 if (isleftshift)
12564 /* Left shift immediate value can be from 0 to <size>-1. */
12565 if (last_elt >= maxshift)
12566 return 0;
12568 else
12570 /* Right shift immediate value can be from 1 to <size>. */
12571 if (last_elt == 0 || last_elt > maxshift)
12572 return 0;
12575 if (elementwidth)
12576 *elementwidth = innersize * 8;
12578 if (modconst)
12579 *modconst = CONST_VECTOR_ELT (op, 0);
12581 return 1;
12584 /* Return a string suitable for output of Neon immediate logic operation
12585 MNEM. */
12587 char *
12588 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12589 int inverse, int quad)
12591 int width, is_valid;
12592 static char templ[40];
12594 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12596 gcc_assert (is_valid != 0);
12598 if (quad)
12599 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12600 else
12601 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12603 return templ;
12606 /* Return a string suitable for output of Neon immediate shift operation
12607 (VSHR or VSHL) MNEM. */
12609 char *
12610 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12611 machine_mode mode, int quad,
12612 bool isleftshift)
12614 int width, is_valid;
12615 static char templ[40];
12617 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12618 gcc_assert (is_valid != 0);
12620 if (quad)
12621 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12622 else
12623 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12625 return templ;
12628 /* Output a sequence of pairwise operations to implement a reduction.
12629 NOTE: We do "too much work" here, because pairwise operations work on two
12630 registers-worth of operands in one go. Unfortunately we can't exploit those
12631 extra calculations to do the full operation in fewer steps, I don't think.
12632 Although all vector elements of the result but the first are ignored, we
12633 actually calculate the same result in each of the elements. An alternative
12634 such as initially loading a vector with zero to use as each of the second
12635 operands would use up an additional register and take an extra instruction,
12636 for no particular gain. */
12638 void
12639 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12640 rtx (*reduc) (rtx, rtx, rtx))
12642 machine_mode inner = GET_MODE_INNER (mode);
12643 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12644 rtx tmpsum = op1;
12646 for (i = parts / 2; i >= 1; i /= 2)
12648 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12649 emit_insn (reduc (dest, tmpsum, tmpsum));
12650 tmpsum = dest;
12654 /* If VALS is a vector constant that can be loaded into a register
12655 using VDUP, generate instructions to do so and return an RTX to
12656 assign to the register. Otherwise return NULL_RTX. */
12658 static rtx
12659 neon_vdup_constant (rtx vals)
12661 machine_mode mode = GET_MODE (vals);
12662 machine_mode inner_mode = GET_MODE_INNER (mode);
12663 int n_elts = GET_MODE_NUNITS (mode);
12664 bool all_same = true;
12665 rtx x;
12666 int i;
12668 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12669 return NULL_RTX;
12671 for (i = 0; i < n_elts; ++i)
12673 x = XVECEXP (vals, 0, i);
12674 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12675 all_same = false;
12678 if (!all_same)
12679 /* The elements are not all the same. We could handle repeating
12680 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12681 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12682 vdup.i16). */
12683 return NULL_RTX;
12685 /* We can load this constant by using VDUP and a constant in a
12686 single ARM register. This will be cheaper than a vector
12687 load. */
12689 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12690 return gen_rtx_VEC_DUPLICATE (mode, x);
12693 /* Generate code to load VALS, which is a PARALLEL containing only
12694 constants (for vec_init) or CONST_VECTOR, efficiently into a
12695 register. Returns an RTX to copy into the register, or NULL_RTX
12696 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12699 neon_make_constant (rtx vals)
12701 machine_mode mode = GET_MODE (vals);
12702 rtx target;
12703 rtx const_vec = NULL_RTX;
12704 int n_elts = GET_MODE_NUNITS (mode);
12705 int n_const = 0;
12706 int i;
12708 if (GET_CODE (vals) == CONST_VECTOR)
12709 const_vec = vals;
12710 else if (GET_CODE (vals) == PARALLEL)
12712 /* A CONST_VECTOR must contain only CONST_INTs and
12713 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12714 Only store valid constants in a CONST_VECTOR. */
12715 for (i = 0; i < n_elts; ++i)
12717 rtx x = XVECEXP (vals, 0, i);
12718 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12719 n_const++;
12721 if (n_const == n_elts)
12722 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12724 else
12725 gcc_unreachable ();
12727 if (const_vec != NULL
12728 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12729 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12730 return const_vec;
12731 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12732 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12733 pipeline cycle; creating the constant takes one or two ARM
12734 pipeline cycles. */
12735 return target;
12736 else if (const_vec != NULL_RTX)
12737 /* Load from constant pool. On Cortex-A8 this takes two cycles
12738 (for either double or quad vectors). We can not take advantage
12739 of single-cycle VLD1 because we need a PC-relative addressing
12740 mode. */
12741 return const_vec;
12742 else
12743 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12744 We can not construct an initializer. */
12745 return NULL_RTX;
12748 /* Initialize vector TARGET to VALS. */
12750 void
12751 neon_expand_vector_init (rtx target, rtx vals)
12753 machine_mode mode = GET_MODE (target);
12754 machine_mode inner_mode = GET_MODE_INNER (mode);
12755 int n_elts = GET_MODE_NUNITS (mode);
12756 int n_var = 0, one_var = -1;
12757 bool all_same = true;
12758 rtx x, mem;
12759 int i;
12761 for (i = 0; i < n_elts; ++i)
12763 x = XVECEXP (vals, 0, i);
12764 if (!CONSTANT_P (x))
12765 ++n_var, one_var = i;
12767 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12768 all_same = false;
12771 if (n_var == 0)
12773 rtx constant = neon_make_constant (vals);
12774 if (constant != NULL_RTX)
12776 emit_move_insn (target, constant);
12777 return;
12781 /* Splat a single non-constant element if we can. */
12782 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12784 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12785 emit_insn (gen_rtx_SET (VOIDmode, target,
12786 gen_rtx_VEC_DUPLICATE (mode, x)));
12787 return;
12790 /* One field is non-constant. Load constant then overwrite varying
12791 field. This is more efficient than using the stack. */
12792 if (n_var == 1)
12794 rtx copy = copy_rtx (vals);
12795 rtx index = GEN_INT (one_var);
12797 /* Load constant part of vector, substitute neighboring value for
12798 varying element. */
12799 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12800 neon_expand_vector_init (target, copy);
12802 /* Insert variable. */
12803 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12804 switch (mode)
12806 case V8QImode:
12807 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12808 break;
12809 case V16QImode:
12810 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12811 break;
12812 case V4HImode:
12813 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12814 break;
12815 case V8HImode:
12816 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12817 break;
12818 case V2SImode:
12819 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12820 break;
12821 case V4SImode:
12822 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12823 break;
12824 case V2SFmode:
12825 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12826 break;
12827 case V4SFmode:
12828 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12829 break;
12830 case V2DImode:
12831 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12832 break;
12833 default:
12834 gcc_unreachable ();
12836 return;
12839 /* Construct the vector in memory one field at a time
12840 and load the whole vector. */
12841 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12842 for (i = 0; i < n_elts; i++)
12843 emit_move_insn (adjust_address_nv (mem, inner_mode,
12844 i * GET_MODE_SIZE (inner_mode)),
12845 XVECEXP (vals, 0, i));
12846 emit_move_insn (target, mem);
12849 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12850 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12851 reported source locations are bogus. */
12853 static void
12854 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12855 const char *err)
12857 HOST_WIDE_INT lane;
12859 gcc_assert (CONST_INT_P (operand));
12861 lane = INTVAL (operand);
12863 if (lane < low || lane >= high)
12864 error (err);
12867 /* Bounds-check lanes. */
12869 void
12870 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12872 bounds_check (operand, low, high, "lane out of range");
12875 /* Bounds-check constants. */
12877 void
12878 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12880 bounds_check (operand, low, high, "constant out of range");
12883 HOST_WIDE_INT
12884 neon_element_bits (machine_mode mode)
12886 if (mode == DImode)
12887 return GET_MODE_BITSIZE (mode);
12888 else
12889 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12893 /* Predicates for `match_operand' and `match_operator'. */
12895 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12896 WB is true if full writeback address modes are allowed and is false
12897 if limited writeback address modes (POST_INC and PRE_DEC) are
12898 allowed. */
12901 arm_coproc_mem_operand (rtx op, bool wb)
12903 rtx ind;
12905 /* Reject eliminable registers. */
12906 if (! (reload_in_progress || reload_completed || lra_in_progress)
12907 && ( reg_mentioned_p (frame_pointer_rtx, op)
12908 || reg_mentioned_p (arg_pointer_rtx, op)
12909 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12910 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12911 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12912 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12913 return FALSE;
12915 /* Constants are converted into offsets from labels. */
12916 if (!MEM_P (op))
12917 return FALSE;
12919 ind = XEXP (op, 0);
12921 if (reload_completed
12922 && (GET_CODE (ind) == LABEL_REF
12923 || (GET_CODE (ind) == CONST
12924 && GET_CODE (XEXP (ind, 0)) == PLUS
12925 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12926 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12927 return TRUE;
12929 /* Match: (mem (reg)). */
12930 if (REG_P (ind))
12931 return arm_address_register_rtx_p (ind, 0);
12933 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12934 acceptable in any case (subject to verification by
12935 arm_address_register_rtx_p). We need WB to be true to accept
12936 PRE_INC and POST_DEC. */
12937 if (GET_CODE (ind) == POST_INC
12938 || GET_CODE (ind) == PRE_DEC
12939 || (wb
12940 && (GET_CODE (ind) == PRE_INC
12941 || GET_CODE (ind) == POST_DEC)))
12942 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12944 if (wb
12945 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12946 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12947 && GET_CODE (XEXP (ind, 1)) == PLUS
12948 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12949 ind = XEXP (ind, 1);
12951 /* Match:
12952 (plus (reg)
12953 (const)). */
12954 if (GET_CODE (ind) == PLUS
12955 && REG_P (XEXP (ind, 0))
12956 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12957 && CONST_INT_P (XEXP (ind, 1))
12958 && INTVAL (XEXP (ind, 1)) > -1024
12959 && INTVAL (XEXP (ind, 1)) < 1024
12960 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12961 return TRUE;
12963 return FALSE;
12966 /* Return TRUE if OP is a memory operand which we can load or store a vector
12967 to/from. TYPE is one of the following values:
12968 0 - Vector load/stor (vldr)
12969 1 - Core registers (ldm)
12970 2 - Element/structure loads (vld1)
12973 neon_vector_mem_operand (rtx op, int type, bool strict)
12975 rtx ind;
12977 /* Reject eliminable registers. */
12978 if (! (reload_in_progress || reload_completed)
12979 && ( reg_mentioned_p (frame_pointer_rtx, op)
12980 || reg_mentioned_p (arg_pointer_rtx, op)
12981 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12982 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12983 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12984 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12985 return !strict;
12987 /* Constants are converted into offsets from labels. */
12988 if (!MEM_P (op))
12989 return FALSE;
12991 ind = XEXP (op, 0);
12993 if (reload_completed
12994 && (GET_CODE (ind) == LABEL_REF
12995 || (GET_CODE (ind) == CONST
12996 && GET_CODE (XEXP (ind, 0)) == PLUS
12997 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12998 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12999 return TRUE;
13001 /* Match: (mem (reg)). */
13002 if (REG_P (ind))
13003 return arm_address_register_rtx_p (ind, 0);
13005 /* Allow post-increment with Neon registers. */
13006 if ((type != 1 && GET_CODE (ind) == POST_INC)
13007 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13008 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13010 /* Allow post-increment by register for VLDn */
13011 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13012 && GET_CODE (XEXP (ind, 1)) == PLUS
13013 && REG_P (XEXP (XEXP (ind, 1), 1)))
13014 return true;
13016 /* Match:
13017 (plus (reg)
13018 (const)). */
13019 if (type == 0
13020 && GET_CODE (ind) == PLUS
13021 && REG_P (XEXP (ind, 0))
13022 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13023 && CONST_INT_P (XEXP (ind, 1))
13024 && INTVAL (XEXP (ind, 1)) > -1024
13025 /* For quad modes, we restrict the constant offset to be slightly less
13026 than what the instruction format permits. We have no such constraint
13027 on double mode offsets. (This must match arm_legitimate_index_p.) */
13028 && (INTVAL (XEXP (ind, 1))
13029 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13030 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13031 return TRUE;
13033 return FALSE;
13036 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13037 type. */
13039 neon_struct_mem_operand (rtx op)
13041 rtx ind;
13043 /* Reject eliminable registers. */
13044 if (! (reload_in_progress || reload_completed)
13045 && ( reg_mentioned_p (frame_pointer_rtx, op)
13046 || reg_mentioned_p (arg_pointer_rtx, op)
13047 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13048 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13049 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13050 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13051 return FALSE;
13053 /* Constants are converted into offsets from labels. */
13054 if (!MEM_P (op))
13055 return FALSE;
13057 ind = XEXP (op, 0);
13059 if (reload_completed
13060 && (GET_CODE (ind) == LABEL_REF
13061 || (GET_CODE (ind) == CONST
13062 && GET_CODE (XEXP (ind, 0)) == PLUS
13063 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13064 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13065 return TRUE;
13067 /* Match: (mem (reg)). */
13068 if (REG_P (ind))
13069 return arm_address_register_rtx_p (ind, 0);
13071 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13072 if (GET_CODE (ind) == POST_INC
13073 || GET_CODE (ind) == PRE_DEC)
13074 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13076 return FALSE;
13079 /* Return true if X is a register that will be eliminated later on. */
13081 arm_eliminable_register (rtx x)
13083 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13084 || REGNO (x) == ARG_POINTER_REGNUM
13085 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13086 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13089 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13090 coprocessor registers. Otherwise return NO_REGS. */
13092 enum reg_class
13093 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13095 if (mode == HFmode)
13097 if (!TARGET_NEON_FP16)
13098 return GENERAL_REGS;
13099 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13100 return NO_REGS;
13101 return GENERAL_REGS;
13104 /* The neon move patterns handle all legitimate vector and struct
13105 addresses. */
13106 if (TARGET_NEON
13107 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13108 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13109 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13110 || VALID_NEON_STRUCT_MODE (mode)))
13111 return NO_REGS;
13113 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13114 return NO_REGS;
13116 return GENERAL_REGS;
13119 /* Values which must be returned in the most-significant end of the return
13120 register. */
13122 static bool
13123 arm_return_in_msb (const_tree valtype)
13125 return (TARGET_AAPCS_BASED
13126 && BYTES_BIG_ENDIAN
13127 && (AGGREGATE_TYPE_P (valtype)
13128 || TREE_CODE (valtype) == COMPLEX_TYPE
13129 || FIXED_POINT_TYPE_P (valtype)));
13132 /* Return TRUE if X references a SYMBOL_REF. */
13134 symbol_mentioned_p (rtx x)
13136 const char * fmt;
13137 int i;
13139 if (GET_CODE (x) == SYMBOL_REF)
13140 return 1;
13142 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13143 are constant offsets, not symbols. */
13144 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13145 return 0;
13147 fmt = GET_RTX_FORMAT (GET_CODE (x));
13149 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13151 if (fmt[i] == 'E')
13153 int j;
13155 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13156 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13157 return 1;
13159 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13160 return 1;
13163 return 0;
13166 /* Return TRUE if X references a LABEL_REF. */
13168 label_mentioned_p (rtx x)
13170 const char * fmt;
13171 int i;
13173 if (GET_CODE (x) == LABEL_REF)
13174 return 1;
13176 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13177 instruction, but they are constant offsets, not symbols. */
13178 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13179 return 0;
13181 fmt = GET_RTX_FORMAT (GET_CODE (x));
13182 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13184 if (fmt[i] == 'E')
13186 int j;
13188 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13189 if (label_mentioned_p (XVECEXP (x, i, j)))
13190 return 1;
13192 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13193 return 1;
13196 return 0;
13200 tls_mentioned_p (rtx x)
13202 switch (GET_CODE (x))
13204 case CONST:
13205 return tls_mentioned_p (XEXP (x, 0));
13207 case UNSPEC:
13208 if (XINT (x, 1) == UNSPEC_TLS)
13209 return 1;
13211 default:
13212 return 0;
13216 /* Must not copy any rtx that uses a pc-relative address. */
13218 static bool
13219 arm_cannot_copy_insn_p (rtx_insn *insn)
13221 /* The tls call insn cannot be copied, as it is paired with a data
13222 word. */
13223 if (recog_memoized (insn) == CODE_FOR_tlscall)
13224 return true;
13226 subrtx_iterator::array_type array;
13227 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13229 const_rtx x = *iter;
13230 if (GET_CODE (x) == UNSPEC
13231 && (XINT (x, 1) == UNSPEC_PIC_BASE
13232 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13233 return true;
13235 return false;
13238 enum rtx_code
13239 minmax_code (rtx x)
13241 enum rtx_code code = GET_CODE (x);
13243 switch (code)
13245 case SMAX:
13246 return GE;
13247 case SMIN:
13248 return LE;
13249 case UMIN:
13250 return LEU;
13251 case UMAX:
13252 return GEU;
13253 default:
13254 gcc_unreachable ();
13258 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13260 bool
13261 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13262 int *mask, bool *signed_sat)
13264 /* The high bound must be a power of two minus one. */
13265 int log = exact_log2 (INTVAL (hi_bound) + 1);
13266 if (log == -1)
13267 return false;
13269 /* The low bound is either zero (for usat) or one less than the
13270 negation of the high bound (for ssat). */
13271 if (INTVAL (lo_bound) == 0)
13273 if (mask)
13274 *mask = log;
13275 if (signed_sat)
13276 *signed_sat = false;
13278 return true;
13281 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13283 if (mask)
13284 *mask = log + 1;
13285 if (signed_sat)
13286 *signed_sat = true;
13288 return true;
13291 return false;
13294 /* Return 1 if memory locations are adjacent. */
13296 adjacent_mem_locations (rtx a, rtx b)
13298 /* We don't guarantee to preserve the order of these memory refs. */
13299 if (volatile_refs_p (a) || volatile_refs_p (b))
13300 return 0;
13302 if ((REG_P (XEXP (a, 0))
13303 || (GET_CODE (XEXP (a, 0)) == PLUS
13304 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13305 && (REG_P (XEXP (b, 0))
13306 || (GET_CODE (XEXP (b, 0)) == PLUS
13307 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13309 HOST_WIDE_INT val0 = 0, val1 = 0;
13310 rtx reg0, reg1;
13311 int val_diff;
13313 if (GET_CODE (XEXP (a, 0)) == PLUS)
13315 reg0 = XEXP (XEXP (a, 0), 0);
13316 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13318 else
13319 reg0 = XEXP (a, 0);
13321 if (GET_CODE (XEXP (b, 0)) == PLUS)
13323 reg1 = XEXP (XEXP (b, 0), 0);
13324 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13326 else
13327 reg1 = XEXP (b, 0);
13329 /* Don't accept any offset that will require multiple
13330 instructions to handle, since this would cause the
13331 arith_adjacentmem pattern to output an overlong sequence. */
13332 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13333 return 0;
13335 /* Don't allow an eliminable register: register elimination can make
13336 the offset too large. */
13337 if (arm_eliminable_register (reg0))
13338 return 0;
13340 val_diff = val1 - val0;
13342 if (arm_ld_sched)
13344 /* If the target has load delay slots, then there's no benefit
13345 to using an ldm instruction unless the offset is zero and
13346 we are optimizing for size. */
13347 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13348 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13349 && (val_diff == 4 || val_diff == -4));
13352 return ((REGNO (reg0) == REGNO (reg1))
13353 && (val_diff == 4 || val_diff == -4));
13356 return 0;
13359 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13360 for load operations, false for store operations. CONSECUTIVE is true
13361 if the register numbers in the operation must be consecutive in the register
13362 bank. RETURN_PC is true if value is to be loaded in PC.
13363 The pattern we are trying to match for load is:
13364 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13365 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13368 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13370 where
13371 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13372 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13373 3. If consecutive is TRUE, then for kth register being loaded,
13374 REGNO (R_dk) = REGNO (R_d0) + k.
13375 The pattern for store is similar. */
13376 bool
13377 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13378 bool consecutive, bool return_pc)
13380 HOST_WIDE_INT count = XVECLEN (op, 0);
13381 rtx reg, mem, addr;
13382 unsigned regno;
13383 unsigned first_regno;
13384 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13385 rtx elt;
13386 bool addr_reg_in_reglist = false;
13387 bool update = false;
13388 int reg_increment;
13389 int offset_adj;
13390 int regs_per_val;
13392 /* If not in SImode, then registers must be consecutive
13393 (e.g., VLDM instructions for DFmode). */
13394 gcc_assert ((mode == SImode) || consecutive);
13395 /* Setting return_pc for stores is illegal. */
13396 gcc_assert (!return_pc || load);
13398 /* Set up the increments and the regs per val based on the mode. */
13399 reg_increment = GET_MODE_SIZE (mode);
13400 regs_per_val = reg_increment / 4;
13401 offset_adj = return_pc ? 1 : 0;
13403 if (count <= 1
13404 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13405 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13406 return false;
13408 /* Check if this is a write-back. */
13409 elt = XVECEXP (op, 0, offset_adj);
13410 if (GET_CODE (SET_SRC (elt)) == PLUS)
13412 i++;
13413 base = 1;
13414 update = true;
13416 /* The offset adjustment must be the number of registers being
13417 popped times the size of a single register. */
13418 if (!REG_P (SET_DEST (elt))
13419 || !REG_P (XEXP (SET_SRC (elt), 0))
13420 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13421 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13422 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13423 ((count - 1 - offset_adj) * reg_increment))
13424 return false;
13427 i = i + offset_adj;
13428 base = base + offset_adj;
13429 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13430 success depends on the type: VLDM can do just one reg,
13431 LDM must do at least two. */
13432 if ((count <= i) && (mode == SImode))
13433 return false;
13435 elt = XVECEXP (op, 0, i - 1);
13436 if (GET_CODE (elt) != SET)
13437 return false;
13439 if (load)
13441 reg = SET_DEST (elt);
13442 mem = SET_SRC (elt);
13444 else
13446 reg = SET_SRC (elt);
13447 mem = SET_DEST (elt);
13450 if (!REG_P (reg) || !MEM_P (mem))
13451 return false;
13453 regno = REGNO (reg);
13454 first_regno = regno;
13455 addr = XEXP (mem, 0);
13456 if (GET_CODE (addr) == PLUS)
13458 if (!CONST_INT_P (XEXP (addr, 1)))
13459 return false;
13461 offset = INTVAL (XEXP (addr, 1));
13462 addr = XEXP (addr, 0);
13465 if (!REG_P (addr))
13466 return false;
13468 /* Don't allow SP to be loaded unless it is also the base register. It
13469 guarantees that SP is reset correctly when an LDM instruction
13470 is interrupted. Otherwise, we might end up with a corrupt stack. */
13471 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13472 return false;
13474 for (; i < count; i++)
13476 elt = XVECEXP (op, 0, i);
13477 if (GET_CODE (elt) != SET)
13478 return false;
13480 if (load)
13482 reg = SET_DEST (elt);
13483 mem = SET_SRC (elt);
13485 else
13487 reg = SET_SRC (elt);
13488 mem = SET_DEST (elt);
13491 if (!REG_P (reg)
13492 || GET_MODE (reg) != mode
13493 || REGNO (reg) <= regno
13494 || (consecutive
13495 && (REGNO (reg) !=
13496 (unsigned int) (first_regno + regs_per_val * (i - base))))
13497 /* Don't allow SP to be loaded unless it is also the base register. It
13498 guarantees that SP is reset correctly when an LDM instruction
13499 is interrupted. Otherwise, we might end up with a corrupt stack. */
13500 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13501 || !MEM_P (mem)
13502 || GET_MODE (mem) != mode
13503 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13504 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13505 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13506 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13507 offset + (i - base) * reg_increment))
13508 && (!REG_P (XEXP (mem, 0))
13509 || offset + (i - base) * reg_increment != 0)))
13510 return false;
13512 regno = REGNO (reg);
13513 if (regno == REGNO (addr))
13514 addr_reg_in_reglist = true;
13517 if (load)
13519 if (update && addr_reg_in_reglist)
13520 return false;
13522 /* For Thumb-1, address register is always modified - either by write-back
13523 or by explicit load. If the pattern does not describe an update,
13524 then the address register must be in the list of loaded registers. */
13525 if (TARGET_THUMB1)
13526 return update || addr_reg_in_reglist;
13529 return true;
13532 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13533 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13534 instruction. ADD_OFFSET is nonzero if the base address register needs
13535 to be modified with an add instruction before we can use it. */
13537 static bool
13538 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13539 int nops, HOST_WIDE_INT add_offset)
13541 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13542 if the offset isn't small enough. The reason 2 ldrs are faster
13543 is because these ARMs are able to do more than one cache access
13544 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13545 whilst the ARM8 has a double bandwidth cache. This means that
13546 these cores can do both an instruction fetch and a data fetch in
13547 a single cycle, so the trick of calculating the address into a
13548 scratch register (one of the result regs) and then doing a load
13549 multiple actually becomes slower (and no smaller in code size).
13550 That is the transformation
13552 ldr rd1, [rbase + offset]
13553 ldr rd2, [rbase + offset + 4]
13557 add rd1, rbase, offset
13558 ldmia rd1, {rd1, rd2}
13560 produces worse code -- '3 cycles + any stalls on rd2' instead of
13561 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13562 access per cycle, the first sequence could never complete in less
13563 than 6 cycles, whereas the ldm sequence would only take 5 and
13564 would make better use of sequential accesses if not hitting the
13565 cache.
13567 We cheat here and test 'arm_ld_sched' which we currently know to
13568 only be true for the ARM8, ARM9 and StrongARM. If this ever
13569 changes, then the test below needs to be reworked. */
13570 if (nops == 2 && arm_ld_sched && add_offset != 0)
13571 return false;
13573 /* XScale has load-store double instructions, but they have stricter
13574 alignment requirements than load-store multiple, so we cannot
13575 use them.
13577 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13578 the pipeline until completion.
13580 NREGS CYCLES
13586 An ldr instruction takes 1-3 cycles, but does not block the
13587 pipeline.
13589 NREGS CYCLES
13590 1 1-3
13591 2 2-6
13592 3 3-9
13593 4 4-12
13595 Best case ldr will always win. However, the more ldr instructions
13596 we issue, the less likely we are to be able to schedule them well.
13597 Using ldr instructions also increases code size.
13599 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13600 for counts of 3 or 4 regs. */
13601 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13602 return false;
13603 return true;
13606 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13607 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13608 an array ORDER which describes the sequence to use when accessing the
13609 offsets that produces an ascending order. In this sequence, each
13610 offset must be larger by exactly 4 than the previous one. ORDER[0]
13611 must have been filled in with the lowest offset by the caller.
13612 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13613 we use to verify that ORDER produces an ascending order of registers.
13614 Return true if it was possible to construct such an order, false if
13615 not. */
13617 static bool
13618 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13619 int *unsorted_regs)
13621 int i;
13622 for (i = 1; i < nops; i++)
13624 int j;
13626 order[i] = order[i - 1];
13627 for (j = 0; j < nops; j++)
13628 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13630 /* We must find exactly one offset that is higher than the
13631 previous one by 4. */
13632 if (order[i] != order[i - 1])
13633 return false;
13634 order[i] = j;
13636 if (order[i] == order[i - 1])
13637 return false;
13638 /* The register numbers must be ascending. */
13639 if (unsorted_regs != NULL
13640 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13641 return false;
13643 return true;
13646 /* Used to determine in a peephole whether a sequence of load
13647 instructions can be changed into a load-multiple instruction.
13648 NOPS is the number of separate load instructions we are examining. The
13649 first NOPS entries in OPERANDS are the destination registers, the
13650 next NOPS entries are memory operands. If this function is
13651 successful, *BASE is set to the common base register of the memory
13652 accesses; *LOAD_OFFSET is set to the first memory location's offset
13653 from that base register.
13654 REGS is an array filled in with the destination register numbers.
13655 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13656 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13657 the sequence of registers in REGS matches the loads from ascending memory
13658 locations, and the function verifies that the register numbers are
13659 themselves ascending. If CHECK_REGS is false, the register numbers
13660 are stored in the order they are found in the operands. */
13661 static int
13662 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13663 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13665 int unsorted_regs[MAX_LDM_STM_OPS];
13666 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13667 int order[MAX_LDM_STM_OPS];
13668 rtx base_reg_rtx = NULL;
13669 int base_reg = -1;
13670 int i, ldm_case;
13672 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13673 easily extended if required. */
13674 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13676 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13678 /* Loop over the operands and check that the memory references are
13679 suitable (i.e. immediate offsets from the same base register). At
13680 the same time, extract the target register, and the memory
13681 offsets. */
13682 for (i = 0; i < nops; i++)
13684 rtx reg;
13685 rtx offset;
13687 /* Convert a subreg of a mem into the mem itself. */
13688 if (GET_CODE (operands[nops + i]) == SUBREG)
13689 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13691 gcc_assert (MEM_P (operands[nops + i]));
13693 /* Don't reorder volatile memory references; it doesn't seem worth
13694 looking for the case where the order is ok anyway. */
13695 if (MEM_VOLATILE_P (operands[nops + i]))
13696 return 0;
13698 offset = const0_rtx;
13700 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13701 || (GET_CODE (reg) == SUBREG
13702 && REG_P (reg = SUBREG_REG (reg))))
13703 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13704 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13705 || (GET_CODE (reg) == SUBREG
13706 && REG_P (reg = SUBREG_REG (reg))))
13707 && (CONST_INT_P (offset
13708 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13710 if (i == 0)
13712 base_reg = REGNO (reg);
13713 base_reg_rtx = reg;
13714 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13715 return 0;
13717 else if (base_reg != (int) REGNO (reg))
13718 /* Not addressed from the same base register. */
13719 return 0;
13721 unsorted_regs[i] = (REG_P (operands[i])
13722 ? REGNO (operands[i])
13723 : REGNO (SUBREG_REG (operands[i])));
13725 /* If it isn't an integer register, or if it overwrites the
13726 base register but isn't the last insn in the list, then
13727 we can't do this. */
13728 if (unsorted_regs[i] < 0
13729 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13730 || unsorted_regs[i] > 14
13731 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13732 return 0;
13734 /* Don't allow SP to be loaded unless it is also the base
13735 register. It guarantees that SP is reset correctly when
13736 an LDM instruction is interrupted. Otherwise, we might
13737 end up with a corrupt stack. */
13738 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13739 return 0;
13741 unsorted_offsets[i] = INTVAL (offset);
13742 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13743 order[0] = i;
13745 else
13746 /* Not a suitable memory address. */
13747 return 0;
13750 /* All the useful information has now been extracted from the
13751 operands into unsorted_regs and unsorted_offsets; additionally,
13752 order[0] has been set to the lowest offset in the list. Sort
13753 the offsets into order, verifying that they are adjacent, and
13754 check that the register numbers are ascending. */
13755 if (!compute_offset_order (nops, unsorted_offsets, order,
13756 check_regs ? unsorted_regs : NULL))
13757 return 0;
13759 if (saved_order)
13760 memcpy (saved_order, order, sizeof order);
13762 if (base)
13764 *base = base_reg;
13766 for (i = 0; i < nops; i++)
13767 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13769 *load_offset = unsorted_offsets[order[0]];
13772 if (TARGET_THUMB1
13773 && !peep2_reg_dead_p (nops, base_reg_rtx))
13774 return 0;
13776 if (unsorted_offsets[order[0]] == 0)
13777 ldm_case = 1; /* ldmia */
13778 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13779 ldm_case = 2; /* ldmib */
13780 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13781 ldm_case = 3; /* ldmda */
13782 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13783 ldm_case = 4; /* ldmdb */
13784 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13785 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13786 ldm_case = 5;
13787 else
13788 return 0;
13790 if (!multiple_operation_profitable_p (false, nops,
13791 ldm_case == 5
13792 ? unsorted_offsets[order[0]] : 0))
13793 return 0;
13795 return ldm_case;
13798 /* Used to determine in a peephole whether a sequence of store instructions can
13799 be changed into a store-multiple instruction.
13800 NOPS is the number of separate store instructions we are examining.
13801 NOPS_TOTAL is the total number of instructions recognized by the peephole
13802 pattern.
13803 The first NOPS entries in OPERANDS are the source registers, the next
13804 NOPS entries are memory operands. If this function is successful, *BASE is
13805 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13806 to the first memory location's offset from that base register. REGS is an
13807 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13808 likewise filled with the corresponding rtx's.
13809 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13810 numbers to an ascending order of stores.
13811 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13812 from ascending memory locations, and the function verifies that the register
13813 numbers are themselves ascending. If CHECK_REGS is false, the register
13814 numbers are stored in the order they are found in the operands. */
13815 static int
13816 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13817 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13818 HOST_WIDE_INT *load_offset, bool check_regs)
13820 int unsorted_regs[MAX_LDM_STM_OPS];
13821 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13822 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13823 int order[MAX_LDM_STM_OPS];
13824 int base_reg = -1;
13825 rtx base_reg_rtx = NULL;
13826 int i, stm_case;
13828 /* Write back of base register is currently only supported for Thumb 1. */
13829 int base_writeback = TARGET_THUMB1;
13831 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13832 easily extended if required. */
13833 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13835 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13837 /* Loop over the operands and check that the memory references are
13838 suitable (i.e. immediate offsets from the same base register). At
13839 the same time, extract the target register, and the memory
13840 offsets. */
13841 for (i = 0; i < nops; i++)
13843 rtx reg;
13844 rtx offset;
13846 /* Convert a subreg of a mem into the mem itself. */
13847 if (GET_CODE (operands[nops + i]) == SUBREG)
13848 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13850 gcc_assert (MEM_P (operands[nops + i]));
13852 /* Don't reorder volatile memory references; it doesn't seem worth
13853 looking for the case where the order is ok anyway. */
13854 if (MEM_VOLATILE_P (operands[nops + i]))
13855 return 0;
13857 offset = const0_rtx;
13859 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13860 || (GET_CODE (reg) == SUBREG
13861 && REG_P (reg = SUBREG_REG (reg))))
13862 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13863 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13864 || (GET_CODE (reg) == SUBREG
13865 && REG_P (reg = SUBREG_REG (reg))))
13866 && (CONST_INT_P (offset
13867 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13869 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13870 ? operands[i] : SUBREG_REG (operands[i]));
13871 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13873 if (i == 0)
13875 base_reg = REGNO (reg);
13876 base_reg_rtx = reg;
13877 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13878 return 0;
13880 else if (base_reg != (int) REGNO (reg))
13881 /* Not addressed from the same base register. */
13882 return 0;
13884 /* If it isn't an integer register, then we can't do this. */
13885 if (unsorted_regs[i] < 0
13886 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13887 /* The effects are unpredictable if the base register is
13888 both updated and stored. */
13889 || (base_writeback && unsorted_regs[i] == base_reg)
13890 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13891 || unsorted_regs[i] > 14)
13892 return 0;
13894 unsorted_offsets[i] = INTVAL (offset);
13895 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13896 order[0] = i;
13898 else
13899 /* Not a suitable memory address. */
13900 return 0;
13903 /* All the useful information has now been extracted from the
13904 operands into unsorted_regs and unsorted_offsets; additionally,
13905 order[0] has been set to the lowest offset in the list. Sort
13906 the offsets into order, verifying that they are adjacent, and
13907 check that the register numbers are ascending. */
13908 if (!compute_offset_order (nops, unsorted_offsets, order,
13909 check_regs ? unsorted_regs : NULL))
13910 return 0;
13912 if (saved_order)
13913 memcpy (saved_order, order, sizeof order);
13915 if (base)
13917 *base = base_reg;
13919 for (i = 0; i < nops; i++)
13921 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13922 if (reg_rtxs)
13923 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13926 *load_offset = unsorted_offsets[order[0]];
13929 if (TARGET_THUMB1
13930 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13931 return 0;
13933 if (unsorted_offsets[order[0]] == 0)
13934 stm_case = 1; /* stmia */
13935 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13936 stm_case = 2; /* stmib */
13937 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13938 stm_case = 3; /* stmda */
13939 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13940 stm_case = 4; /* stmdb */
13941 else
13942 return 0;
13944 if (!multiple_operation_profitable_p (false, nops, 0))
13945 return 0;
13947 return stm_case;
13950 /* Routines for use in generating RTL. */
13952 /* Generate a load-multiple instruction. COUNT is the number of loads in
13953 the instruction; REGS and MEMS are arrays containing the operands.
13954 BASEREG is the base register to be used in addressing the memory operands.
13955 WBACK_OFFSET is nonzero if the instruction should update the base
13956 register. */
13958 static rtx
13959 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13960 HOST_WIDE_INT wback_offset)
13962 int i = 0, j;
13963 rtx result;
13965 if (!multiple_operation_profitable_p (false, count, 0))
13967 rtx seq;
13969 start_sequence ();
13971 for (i = 0; i < count; i++)
13972 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13974 if (wback_offset != 0)
13975 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13977 seq = get_insns ();
13978 end_sequence ();
13980 return seq;
13983 result = gen_rtx_PARALLEL (VOIDmode,
13984 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13985 if (wback_offset != 0)
13987 XVECEXP (result, 0, 0)
13988 = gen_rtx_SET (VOIDmode, basereg,
13989 plus_constant (Pmode, basereg, wback_offset));
13990 i = 1;
13991 count++;
13994 for (j = 0; i < count; i++, j++)
13995 XVECEXP (result, 0, i)
13996 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13998 return result;
14001 /* Generate a store-multiple instruction. COUNT is the number of stores in
14002 the instruction; REGS and MEMS are arrays containing the operands.
14003 BASEREG is the base register to be used in addressing the memory operands.
14004 WBACK_OFFSET is nonzero if the instruction should update the base
14005 register. */
14007 static rtx
14008 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14009 HOST_WIDE_INT wback_offset)
14011 int i = 0, j;
14012 rtx result;
14014 if (GET_CODE (basereg) == PLUS)
14015 basereg = XEXP (basereg, 0);
14017 if (!multiple_operation_profitable_p (false, count, 0))
14019 rtx seq;
14021 start_sequence ();
14023 for (i = 0; i < count; i++)
14024 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14026 if (wback_offset != 0)
14027 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14029 seq = get_insns ();
14030 end_sequence ();
14032 return seq;
14035 result = gen_rtx_PARALLEL (VOIDmode,
14036 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14037 if (wback_offset != 0)
14039 XVECEXP (result, 0, 0)
14040 = gen_rtx_SET (VOIDmode, basereg,
14041 plus_constant (Pmode, basereg, wback_offset));
14042 i = 1;
14043 count++;
14046 for (j = 0; i < count; i++, j++)
14047 XVECEXP (result, 0, i)
14048 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14050 return result;
14053 /* Generate either a load-multiple or a store-multiple instruction. This
14054 function can be used in situations where we can start with a single MEM
14055 rtx and adjust its address upwards.
14056 COUNT is the number of operations in the instruction, not counting a
14057 possible update of the base register. REGS is an array containing the
14058 register operands.
14059 BASEREG is the base register to be used in addressing the memory operands,
14060 which are constructed from BASEMEM.
14061 WRITE_BACK specifies whether the generated instruction should include an
14062 update of the base register.
14063 OFFSETP is used to pass an offset to and from this function; this offset
14064 is not used when constructing the address (instead BASEMEM should have an
14065 appropriate offset in its address), it is used only for setting
14066 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14068 static rtx
14069 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14070 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14072 rtx mems[MAX_LDM_STM_OPS];
14073 HOST_WIDE_INT offset = *offsetp;
14074 int i;
14076 gcc_assert (count <= MAX_LDM_STM_OPS);
14078 if (GET_CODE (basereg) == PLUS)
14079 basereg = XEXP (basereg, 0);
14081 for (i = 0; i < count; i++)
14083 rtx addr = plus_constant (Pmode, basereg, i * 4);
14084 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14085 offset += 4;
14088 if (write_back)
14089 *offsetp = offset;
14091 if (is_load)
14092 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14093 write_back ? 4 * count : 0);
14094 else
14095 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14096 write_back ? 4 * count : 0);
14100 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14101 rtx basemem, HOST_WIDE_INT *offsetp)
14103 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14104 offsetp);
14108 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14109 rtx basemem, HOST_WIDE_INT *offsetp)
14111 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14112 offsetp);
14115 /* Called from a peephole2 expander to turn a sequence of loads into an
14116 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14117 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14118 is true if we can reorder the registers because they are used commutatively
14119 subsequently.
14120 Returns true iff we could generate a new instruction. */
14122 bool
14123 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14125 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14126 rtx mems[MAX_LDM_STM_OPS];
14127 int i, j, base_reg;
14128 rtx base_reg_rtx;
14129 HOST_WIDE_INT offset;
14130 int write_back = FALSE;
14131 int ldm_case;
14132 rtx addr;
14134 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14135 &base_reg, &offset, !sort_regs);
14137 if (ldm_case == 0)
14138 return false;
14140 if (sort_regs)
14141 for (i = 0; i < nops - 1; i++)
14142 for (j = i + 1; j < nops; j++)
14143 if (regs[i] > regs[j])
14145 int t = regs[i];
14146 regs[i] = regs[j];
14147 regs[j] = t;
14149 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14151 if (TARGET_THUMB1)
14153 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14154 gcc_assert (ldm_case == 1 || ldm_case == 5);
14155 write_back = TRUE;
14158 if (ldm_case == 5)
14160 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14161 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14162 offset = 0;
14163 if (!TARGET_THUMB1)
14165 base_reg = regs[0];
14166 base_reg_rtx = newbase;
14170 for (i = 0; i < nops; i++)
14172 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14173 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14174 SImode, addr, 0);
14176 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14177 write_back ? offset + i * 4 : 0));
14178 return true;
14181 /* Called from a peephole2 expander to turn a sequence of stores into an
14182 STM instruction. OPERANDS are the operands found by the peephole matcher;
14183 NOPS indicates how many separate stores we are trying to combine.
14184 Returns true iff we could generate a new instruction. */
14186 bool
14187 gen_stm_seq (rtx *operands, int nops)
14189 int i;
14190 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14191 rtx mems[MAX_LDM_STM_OPS];
14192 int base_reg;
14193 rtx base_reg_rtx;
14194 HOST_WIDE_INT offset;
14195 int write_back = FALSE;
14196 int stm_case;
14197 rtx addr;
14198 bool base_reg_dies;
14200 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14201 mem_order, &base_reg, &offset, true);
14203 if (stm_case == 0)
14204 return false;
14206 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14208 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14209 if (TARGET_THUMB1)
14211 gcc_assert (base_reg_dies);
14212 write_back = TRUE;
14215 if (stm_case == 5)
14217 gcc_assert (base_reg_dies);
14218 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14219 offset = 0;
14222 addr = plus_constant (Pmode, base_reg_rtx, offset);
14224 for (i = 0; i < nops; i++)
14226 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14227 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14228 SImode, addr, 0);
14230 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14231 write_back ? offset + i * 4 : 0));
14232 return true;
14235 /* Called from a peephole2 expander to turn a sequence of stores that are
14236 preceded by constant loads into an STM instruction. OPERANDS are the
14237 operands found by the peephole matcher; NOPS indicates how many
14238 separate stores we are trying to combine; there are 2 * NOPS
14239 instructions in the peephole.
14240 Returns true iff we could generate a new instruction. */
14242 bool
14243 gen_const_stm_seq (rtx *operands, int nops)
14245 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14246 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14247 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14248 rtx mems[MAX_LDM_STM_OPS];
14249 int base_reg;
14250 rtx base_reg_rtx;
14251 HOST_WIDE_INT offset;
14252 int write_back = FALSE;
14253 int stm_case;
14254 rtx addr;
14255 bool base_reg_dies;
14256 int i, j;
14257 HARD_REG_SET allocated;
14259 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14260 mem_order, &base_reg, &offset, false);
14262 if (stm_case == 0)
14263 return false;
14265 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14267 /* If the same register is used more than once, try to find a free
14268 register. */
14269 CLEAR_HARD_REG_SET (allocated);
14270 for (i = 0; i < nops; i++)
14272 for (j = i + 1; j < nops; j++)
14273 if (regs[i] == regs[j])
14275 rtx t = peep2_find_free_register (0, nops * 2,
14276 TARGET_THUMB1 ? "l" : "r",
14277 SImode, &allocated);
14278 if (t == NULL_RTX)
14279 return false;
14280 reg_rtxs[i] = t;
14281 regs[i] = REGNO (t);
14285 /* Compute an ordering that maps the register numbers to an ascending
14286 sequence. */
14287 reg_order[0] = 0;
14288 for (i = 0; i < nops; i++)
14289 if (regs[i] < regs[reg_order[0]])
14290 reg_order[0] = i;
14292 for (i = 1; i < nops; i++)
14294 int this_order = reg_order[i - 1];
14295 for (j = 0; j < nops; j++)
14296 if (regs[j] > regs[reg_order[i - 1]]
14297 && (this_order == reg_order[i - 1]
14298 || regs[j] < regs[this_order]))
14299 this_order = j;
14300 reg_order[i] = this_order;
14303 /* Ensure that registers that must be live after the instruction end
14304 up with the correct value. */
14305 for (i = 0; i < nops; i++)
14307 int this_order = reg_order[i];
14308 if ((this_order != mem_order[i]
14309 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14310 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14311 return false;
14314 /* Load the constants. */
14315 for (i = 0; i < nops; i++)
14317 rtx op = operands[2 * nops + mem_order[i]];
14318 sorted_regs[i] = regs[reg_order[i]];
14319 emit_move_insn (reg_rtxs[reg_order[i]], op);
14322 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14324 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14325 if (TARGET_THUMB1)
14327 gcc_assert (base_reg_dies);
14328 write_back = TRUE;
14331 if (stm_case == 5)
14333 gcc_assert (base_reg_dies);
14334 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14335 offset = 0;
14338 addr = plus_constant (Pmode, base_reg_rtx, offset);
14340 for (i = 0; i < nops; i++)
14342 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14343 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14344 SImode, addr, 0);
14346 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14347 write_back ? offset + i * 4 : 0));
14348 return true;
14351 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14352 unaligned copies on processors which support unaligned semantics for those
14353 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14354 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14355 An interleave factor of 1 (the minimum) will perform no interleaving.
14356 Load/store multiple are used for aligned addresses where possible. */
14358 static void
14359 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14360 HOST_WIDE_INT length,
14361 unsigned int interleave_factor)
14363 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14364 int *regnos = XALLOCAVEC (int, interleave_factor);
14365 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14366 HOST_WIDE_INT i, j;
14367 HOST_WIDE_INT remaining = length, words;
14368 rtx halfword_tmp = NULL, byte_tmp = NULL;
14369 rtx dst, src;
14370 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14371 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14372 HOST_WIDE_INT srcoffset, dstoffset;
14373 HOST_WIDE_INT src_autoinc, dst_autoinc;
14374 rtx mem, addr;
14376 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14378 /* Use hard registers if we have aligned source or destination so we can use
14379 load/store multiple with contiguous registers. */
14380 if (dst_aligned || src_aligned)
14381 for (i = 0; i < interleave_factor; i++)
14382 regs[i] = gen_rtx_REG (SImode, i);
14383 else
14384 for (i = 0; i < interleave_factor; i++)
14385 regs[i] = gen_reg_rtx (SImode);
14387 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14388 src = copy_addr_to_reg (XEXP (srcbase, 0));
14390 srcoffset = dstoffset = 0;
14392 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14393 For copying the last bytes we want to subtract this offset again. */
14394 src_autoinc = dst_autoinc = 0;
14396 for (i = 0; i < interleave_factor; i++)
14397 regnos[i] = i;
14399 /* Copy BLOCK_SIZE_BYTES chunks. */
14401 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14403 /* Load words. */
14404 if (src_aligned && interleave_factor > 1)
14406 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14407 TRUE, srcbase, &srcoffset));
14408 src_autoinc += UNITS_PER_WORD * interleave_factor;
14410 else
14412 for (j = 0; j < interleave_factor; j++)
14414 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14415 - src_autoinc));
14416 mem = adjust_automodify_address (srcbase, SImode, addr,
14417 srcoffset + j * UNITS_PER_WORD);
14418 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14420 srcoffset += block_size_bytes;
14423 /* Store words. */
14424 if (dst_aligned && interleave_factor > 1)
14426 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14427 TRUE, dstbase, &dstoffset));
14428 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14430 else
14432 for (j = 0; j < interleave_factor; j++)
14434 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14435 - dst_autoinc));
14436 mem = adjust_automodify_address (dstbase, SImode, addr,
14437 dstoffset + j * UNITS_PER_WORD);
14438 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14440 dstoffset += block_size_bytes;
14443 remaining -= block_size_bytes;
14446 /* Copy any whole words left (note these aren't interleaved with any
14447 subsequent halfword/byte load/stores in the interests of simplicity). */
14449 words = remaining / UNITS_PER_WORD;
14451 gcc_assert (words < interleave_factor);
14453 if (src_aligned && words > 1)
14455 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14456 &srcoffset));
14457 src_autoinc += UNITS_PER_WORD * words;
14459 else
14461 for (j = 0; j < words; j++)
14463 addr = plus_constant (Pmode, src,
14464 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14465 mem = adjust_automodify_address (srcbase, SImode, addr,
14466 srcoffset + j * UNITS_PER_WORD);
14467 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14469 srcoffset += words * UNITS_PER_WORD;
14472 if (dst_aligned && words > 1)
14474 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14475 &dstoffset));
14476 dst_autoinc += words * UNITS_PER_WORD;
14478 else
14480 for (j = 0; j < words; j++)
14482 addr = plus_constant (Pmode, dst,
14483 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14484 mem = adjust_automodify_address (dstbase, SImode, addr,
14485 dstoffset + j * UNITS_PER_WORD);
14486 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14488 dstoffset += words * UNITS_PER_WORD;
14491 remaining -= words * UNITS_PER_WORD;
14493 gcc_assert (remaining < 4);
14495 /* Copy a halfword if necessary. */
14497 if (remaining >= 2)
14499 halfword_tmp = gen_reg_rtx (SImode);
14501 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14502 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14503 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14505 /* Either write out immediately, or delay until we've loaded the last
14506 byte, depending on interleave factor. */
14507 if (interleave_factor == 1)
14509 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14510 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14511 emit_insn (gen_unaligned_storehi (mem,
14512 gen_lowpart (HImode, halfword_tmp)));
14513 halfword_tmp = NULL;
14514 dstoffset += 2;
14517 remaining -= 2;
14518 srcoffset += 2;
14521 gcc_assert (remaining < 2);
14523 /* Copy last byte. */
14525 if ((remaining & 1) != 0)
14527 byte_tmp = gen_reg_rtx (SImode);
14529 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14530 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14531 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14533 if (interleave_factor == 1)
14535 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14536 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14537 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14538 byte_tmp = NULL;
14539 dstoffset++;
14542 remaining--;
14543 srcoffset++;
14546 /* Store last halfword if we haven't done so already. */
14548 if (halfword_tmp)
14550 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14551 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14552 emit_insn (gen_unaligned_storehi (mem,
14553 gen_lowpart (HImode, halfword_tmp)));
14554 dstoffset += 2;
14557 /* Likewise for last byte. */
14559 if (byte_tmp)
14561 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14562 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14563 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14564 dstoffset++;
14567 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14570 /* From mips_adjust_block_mem:
14572 Helper function for doing a loop-based block operation on memory
14573 reference MEM. Each iteration of the loop will operate on LENGTH
14574 bytes of MEM.
14576 Create a new base register for use within the loop and point it to
14577 the start of MEM. Create a new memory reference that uses this
14578 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14580 static void
14581 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14582 rtx *loop_mem)
14584 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14586 /* Although the new mem does not refer to a known location,
14587 it does keep up to LENGTH bytes of alignment. */
14588 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14589 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14592 /* From mips_block_move_loop:
14594 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14595 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14596 the memory regions do not overlap. */
14598 static void
14599 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14600 unsigned int interleave_factor,
14601 HOST_WIDE_INT bytes_per_iter)
14603 rtx src_reg, dest_reg, final_src, test;
14604 HOST_WIDE_INT leftover;
14606 leftover = length % bytes_per_iter;
14607 length -= leftover;
14609 /* Create registers and memory references for use within the loop. */
14610 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14611 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14613 /* Calculate the value that SRC_REG should have after the last iteration of
14614 the loop. */
14615 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14616 0, 0, OPTAB_WIDEN);
14618 /* Emit the start of the loop. */
14619 rtx_code_label *label = gen_label_rtx ();
14620 emit_label (label);
14622 /* Emit the loop body. */
14623 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14624 interleave_factor);
14626 /* Move on to the next block. */
14627 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14628 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14630 /* Emit the loop condition. */
14631 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14632 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14634 /* Mop up any left-over bytes. */
14635 if (leftover)
14636 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14639 /* Emit a block move when either the source or destination is unaligned (not
14640 aligned to a four-byte boundary). This may need further tuning depending on
14641 core type, optimize_size setting, etc. */
14643 static int
14644 arm_movmemqi_unaligned (rtx *operands)
14646 HOST_WIDE_INT length = INTVAL (operands[2]);
14648 if (optimize_size)
14650 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14651 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14652 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14653 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14654 or dst_aligned though: allow more interleaving in those cases since the
14655 resulting code can be smaller. */
14656 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14657 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14659 if (length > 12)
14660 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14661 interleave_factor, bytes_per_iter);
14662 else
14663 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14664 interleave_factor);
14666 else
14668 /* Note that the loop created by arm_block_move_unaligned_loop may be
14669 subject to loop unrolling, which makes tuning this condition a little
14670 redundant. */
14671 if (length > 32)
14672 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14673 else
14674 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14677 return 1;
14681 arm_gen_movmemqi (rtx *operands)
14683 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14684 HOST_WIDE_INT srcoffset, dstoffset;
14685 int i;
14686 rtx src, dst, srcbase, dstbase;
14687 rtx part_bytes_reg = NULL;
14688 rtx mem;
14690 if (!CONST_INT_P (operands[2])
14691 || !CONST_INT_P (operands[3])
14692 || INTVAL (operands[2]) > 64)
14693 return 0;
14695 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14696 return arm_movmemqi_unaligned (operands);
14698 if (INTVAL (operands[3]) & 3)
14699 return 0;
14701 dstbase = operands[0];
14702 srcbase = operands[1];
14704 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14705 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14707 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14708 out_words_to_go = INTVAL (operands[2]) / 4;
14709 last_bytes = INTVAL (operands[2]) & 3;
14710 dstoffset = srcoffset = 0;
14712 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14713 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14715 for (i = 0; in_words_to_go >= 2; i+=4)
14717 if (in_words_to_go > 4)
14718 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14719 TRUE, srcbase, &srcoffset));
14720 else
14721 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14722 src, FALSE, srcbase,
14723 &srcoffset));
14725 if (out_words_to_go)
14727 if (out_words_to_go > 4)
14728 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14729 TRUE, dstbase, &dstoffset));
14730 else if (out_words_to_go != 1)
14731 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14732 out_words_to_go, dst,
14733 (last_bytes == 0
14734 ? FALSE : TRUE),
14735 dstbase, &dstoffset));
14736 else
14738 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14739 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14740 if (last_bytes != 0)
14742 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14743 dstoffset += 4;
14748 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14749 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14752 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14753 if (out_words_to_go)
14755 rtx sreg;
14757 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14758 sreg = copy_to_reg (mem);
14760 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14761 emit_move_insn (mem, sreg);
14762 in_words_to_go--;
14764 gcc_assert (!in_words_to_go); /* Sanity check */
14767 if (in_words_to_go)
14769 gcc_assert (in_words_to_go > 0);
14771 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14772 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14775 gcc_assert (!last_bytes || part_bytes_reg);
14777 if (BYTES_BIG_ENDIAN && last_bytes)
14779 rtx tmp = gen_reg_rtx (SImode);
14781 /* The bytes we want are in the top end of the word. */
14782 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14783 GEN_INT (8 * (4 - last_bytes))));
14784 part_bytes_reg = tmp;
14786 while (last_bytes)
14788 mem = adjust_automodify_address (dstbase, QImode,
14789 plus_constant (Pmode, dst,
14790 last_bytes - 1),
14791 dstoffset + last_bytes - 1);
14792 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14794 if (--last_bytes)
14796 tmp = gen_reg_rtx (SImode);
14797 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14798 part_bytes_reg = tmp;
14803 else
14805 if (last_bytes > 1)
14807 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14808 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14809 last_bytes -= 2;
14810 if (last_bytes)
14812 rtx tmp = gen_reg_rtx (SImode);
14813 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14814 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14815 part_bytes_reg = tmp;
14816 dstoffset += 2;
14820 if (last_bytes)
14822 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14823 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14827 return 1;
14830 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14831 by mode size. */
14832 inline static rtx
14833 next_consecutive_mem (rtx mem)
14835 machine_mode mode = GET_MODE (mem);
14836 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14837 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14839 return adjust_automodify_address (mem, mode, addr, offset);
14842 /* Copy using LDRD/STRD instructions whenever possible.
14843 Returns true upon success. */
14844 bool
14845 gen_movmem_ldrd_strd (rtx *operands)
14847 unsigned HOST_WIDE_INT len;
14848 HOST_WIDE_INT align;
14849 rtx src, dst, base;
14850 rtx reg0;
14851 bool src_aligned, dst_aligned;
14852 bool src_volatile, dst_volatile;
14854 gcc_assert (CONST_INT_P (operands[2]));
14855 gcc_assert (CONST_INT_P (operands[3]));
14857 len = UINTVAL (operands[2]);
14858 if (len > 64)
14859 return false;
14861 /* Maximum alignment we can assume for both src and dst buffers. */
14862 align = INTVAL (operands[3]);
14864 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14865 return false;
14867 /* Place src and dst addresses in registers
14868 and update the corresponding mem rtx. */
14869 dst = operands[0];
14870 dst_volatile = MEM_VOLATILE_P (dst);
14871 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14872 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14873 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14875 src = operands[1];
14876 src_volatile = MEM_VOLATILE_P (src);
14877 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14878 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14879 src = adjust_automodify_address (src, VOIDmode, base, 0);
14881 if (!unaligned_access && !(src_aligned && dst_aligned))
14882 return false;
14884 if (src_volatile || dst_volatile)
14885 return false;
14887 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14888 if (!(dst_aligned || src_aligned))
14889 return arm_gen_movmemqi (operands);
14891 src = adjust_address (src, DImode, 0);
14892 dst = adjust_address (dst, DImode, 0);
14893 while (len >= 8)
14895 len -= 8;
14896 reg0 = gen_reg_rtx (DImode);
14897 if (src_aligned)
14898 emit_move_insn (reg0, src);
14899 else
14900 emit_insn (gen_unaligned_loaddi (reg0, src));
14902 if (dst_aligned)
14903 emit_move_insn (dst, reg0);
14904 else
14905 emit_insn (gen_unaligned_storedi (dst, reg0));
14907 src = next_consecutive_mem (src);
14908 dst = next_consecutive_mem (dst);
14911 gcc_assert (len < 8);
14912 if (len >= 4)
14914 /* More than a word but less than a double-word to copy. Copy a word. */
14915 reg0 = gen_reg_rtx (SImode);
14916 src = adjust_address (src, SImode, 0);
14917 dst = adjust_address (dst, SImode, 0);
14918 if (src_aligned)
14919 emit_move_insn (reg0, src);
14920 else
14921 emit_insn (gen_unaligned_loadsi (reg0, src));
14923 if (dst_aligned)
14924 emit_move_insn (dst, reg0);
14925 else
14926 emit_insn (gen_unaligned_storesi (dst, reg0));
14928 src = next_consecutive_mem (src);
14929 dst = next_consecutive_mem (dst);
14930 len -= 4;
14933 if (len == 0)
14934 return true;
14936 /* Copy the remaining bytes. */
14937 if (len >= 2)
14939 dst = adjust_address (dst, HImode, 0);
14940 src = adjust_address (src, HImode, 0);
14941 reg0 = gen_reg_rtx (SImode);
14942 if (src_aligned)
14943 emit_insn (gen_zero_extendhisi2 (reg0, src));
14944 else
14945 emit_insn (gen_unaligned_loadhiu (reg0, src));
14947 if (dst_aligned)
14948 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14949 else
14950 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14952 src = next_consecutive_mem (src);
14953 dst = next_consecutive_mem (dst);
14954 if (len == 2)
14955 return true;
14958 dst = adjust_address (dst, QImode, 0);
14959 src = adjust_address (src, QImode, 0);
14960 reg0 = gen_reg_rtx (QImode);
14961 emit_move_insn (reg0, src);
14962 emit_move_insn (dst, reg0);
14963 return true;
14966 /* Select a dominance comparison mode if possible for a test of the general
14967 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14968 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14969 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14970 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14971 In all cases OP will be either EQ or NE, but we don't need to know which
14972 here. If we are unable to support a dominance comparison we return
14973 CC mode. This will then fail to match for the RTL expressions that
14974 generate this call. */
14975 machine_mode
14976 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14978 enum rtx_code cond1, cond2;
14979 int swapped = 0;
14981 /* Currently we will probably get the wrong result if the individual
14982 comparisons are not simple. This also ensures that it is safe to
14983 reverse a comparison if necessary. */
14984 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14985 != CCmode)
14986 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14987 != CCmode))
14988 return CCmode;
14990 /* The if_then_else variant of this tests the second condition if the
14991 first passes, but is true if the first fails. Reverse the first
14992 condition to get a true "inclusive-or" expression. */
14993 if (cond_or == DOM_CC_NX_OR_Y)
14994 cond1 = reverse_condition (cond1);
14996 /* If the comparisons are not equal, and one doesn't dominate the other,
14997 then we can't do this. */
14998 if (cond1 != cond2
14999 && !comparison_dominates_p (cond1, cond2)
15000 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15001 return CCmode;
15003 if (swapped)
15005 enum rtx_code temp = cond1;
15006 cond1 = cond2;
15007 cond2 = temp;
15010 switch (cond1)
15012 case EQ:
15013 if (cond_or == DOM_CC_X_AND_Y)
15014 return CC_DEQmode;
15016 switch (cond2)
15018 case EQ: return CC_DEQmode;
15019 case LE: return CC_DLEmode;
15020 case LEU: return CC_DLEUmode;
15021 case GE: return CC_DGEmode;
15022 case GEU: return CC_DGEUmode;
15023 default: gcc_unreachable ();
15026 case LT:
15027 if (cond_or == DOM_CC_X_AND_Y)
15028 return CC_DLTmode;
15030 switch (cond2)
15032 case LT:
15033 return CC_DLTmode;
15034 case LE:
15035 return CC_DLEmode;
15036 case NE:
15037 return CC_DNEmode;
15038 default:
15039 gcc_unreachable ();
15042 case GT:
15043 if (cond_or == DOM_CC_X_AND_Y)
15044 return CC_DGTmode;
15046 switch (cond2)
15048 case GT:
15049 return CC_DGTmode;
15050 case GE:
15051 return CC_DGEmode;
15052 case NE:
15053 return CC_DNEmode;
15054 default:
15055 gcc_unreachable ();
15058 case LTU:
15059 if (cond_or == DOM_CC_X_AND_Y)
15060 return CC_DLTUmode;
15062 switch (cond2)
15064 case LTU:
15065 return CC_DLTUmode;
15066 case LEU:
15067 return CC_DLEUmode;
15068 case NE:
15069 return CC_DNEmode;
15070 default:
15071 gcc_unreachable ();
15074 case GTU:
15075 if (cond_or == DOM_CC_X_AND_Y)
15076 return CC_DGTUmode;
15078 switch (cond2)
15080 case GTU:
15081 return CC_DGTUmode;
15082 case GEU:
15083 return CC_DGEUmode;
15084 case NE:
15085 return CC_DNEmode;
15086 default:
15087 gcc_unreachable ();
15090 /* The remaining cases only occur when both comparisons are the
15091 same. */
15092 case NE:
15093 gcc_assert (cond1 == cond2);
15094 return CC_DNEmode;
15096 case LE:
15097 gcc_assert (cond1 == cond2);
15098 return CC_DLEmode;
15100 case GE:
15101 gcc_assert (cond1 == cond2);
15102 return CC_DGEmode;
15104 case LEU:
15105 gcc_assert (cond1 == cond2);
15106 return CC_DLEUmode;
15108 case GEU:
15109 gcc_assert (cond1 == cond2);
15110 return CC_DGEUmode;
15112 default:
15113 gcc_unreachable ();
15117 machine_mode
15118 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15120 /* All floating point compares return CCFP if it is an equality
15121 comparison, and CCFPE otherwise. */
15122 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15124 switch (op)
15126 case EQ:
15127 case NE:
15128 case UNORDERED:
15129 case ORDERED:
15130 case UNLT:
15131 case UNLE:
15132 case UNGT:
15133 case UNGE:
15134 case UNEQ:
15135 case LTGT:
15136 return CCFPmode;
15138 case LT:
15139 case LE:
15140 case GT:
15141 case GE:
15142 return CCFPEmode;
15144 default:
15145 gcc_unreachable ();
15149 /* A compare with a shifted operand. Because of canonicalization, the
15150 comparison will have to be swapped when we emit the assembler. */
15151 if (GET_MODE (y) == SImode
15152 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15153 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15154 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15155 || GET_CODE (x) == ROTATERT))
15156 return CC_SWPmode;
15158 /* This operation is performed swapped, but since we only rely on the Z
15159 flag we don't need an additional mode. */
15160 if (GET_MODE (y) == SImode
15161 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15162 && GET_CODE (x) == NEG
15163 && (op == EQ || op == NE))
15164 return CC_Zmode;
15166 /* This is a special case that is used by combine to allow a
15167 comparison of a shifted byte load to be split into a zero-extend
15168 followed by a comparison of the shifted integer (only valid for
15169 equalities and unsigned inequalities). */
15170 if (GET_MODE (x) == SImode
15171 && GET_CODE (x) == ASHIFT
15172 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15173 && GET_CODE (XEXP (x, 0)) == SUBREG
15174 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15175 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15176 && (op == EQ || op == NE
15177 || op == GEU || op == GTU || op == LTU || op == LEU)
15178 && CONST_INT_P (y))
15179 return CC_Zmode;
15181 /* A construct for a conditional compare, if the false arm contains
15182 0, then both conditions must be true, otherwise either condition
15183 must be true. Not all conditions are possible, so CCmode is
15184 returned if it can't be done. */
15185 if (GET_CODE (x) == IF_THEN_ELSE
15186 && (XEXP (x, 2) == const0_rtx
15187 || XEXP (x, 2) == const1_rtx)
15188 && COMPARISON_P (XEXP (x, 0))
15189 && COMPARISON_P (XEXP (x, 1)))
15190 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15191 INTVAL (XEXP (x, 2)));
15193 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15194 if (GET_CODE (x) == AND
15195 && (op == EQ || op == NE)
15196 && COMPARISON_P (XEXP (x, 0))
15197 && COMPARISON_P (XEXP (x, 1)))
15198 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15199 DOM_CC_X_AND_Y);
15201 if (GET_CODE (x) == IOR
15202 && (op == EQ || op == NE)
15203 && COMPARISON_P (XEXP (x, 0))
15204 && COMPARISON_P (XEXP (x, 1)))
15205 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15206 DOM_CC_X_OR_Y);
15208 /* An operation (on Thumb) where we want to test for a single bit.
15209 This is done by shifting that bit up into the top bit of a
15210 scratch register; we can then branch on the sign bit. */
15211 if (TARGET_THUMB1
15212 && GET_MODE (x) == SImode
15213 && (op == EQ || op == NE)
15214 && GET_CODE (x) == ZERO_EXTRACT
15215 && XEXP (x, 1) == const1_rtx)
15216 return CC_Nmode;
15218 /* An operation that sets the condition codes as a side-effect, the
15219 V flag is not set correctly, so we can only use comparisons where
15220 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15221 instead.) */
15222 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15223 if (GET_MODE (x) == SImode
15224 && y == const0_rtx
15225 && (op == EQ || op == NE || op == LT || op == GE)
15226 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15227 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15228 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15229 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15230 || GET_CODE (x) == LSHIFTRT
15231 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15232 || GET_CODE (x) == ROTATERT
15233 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15234 return CC_NOOVmode;
15236 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15237 return CC_Zmode;
15239 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15240 && GET_CODE (x) == PLUS
15241 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15242 return CC_Cmode;
15244 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15246 switch (op)
15248 case EQ:
15249 case NE:
15250 /* A DImode comparison against zero can be implemented by
15251 or'ing the two halves together. */
15252 if (y == const0_rtx)
15253 return CC_Zmode;
15255 /* We can do an equality test in three Thumb instructions. */
15256 if (!TARGET_32BIT)
15257 return CC_Zmode;
15259 /* FALLTHROUGH */
15261 case LTU:
15262 case LEU:
15263 case GTU:
15264 case GEU:
15265 /* DImode unsigned comparisons can be implemented by cmp +
15266 cmpeq without a scratch register. Not worth doing in
15267 Thumb-2. */
15268 if (TARGET_32BIT)
15269 return CC_CZmode;
15271 /* FALLTHROUGH */
15273 case LT:
15274 case LE:
15275 case GT:
15276 case GE:
15277 /* DImode signed and unsigned comparisons can be implemented
15278 by cmp + sbcs with a scratch register, but that does not
15279 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15280 gcc_assert (op != EQ && op != NE);
15281 return CC_NCVmode;
15283 default:
15284 gcc_unreachable ();
15288 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15289 return GET_MODE (x);
15291 return CCmode;
15294 /* X and Y are two things to compare using CODE. Emit the compare insn and
15295 return the rtx for register 0 in the proper mode. FP means this is a
15296 floating point compare: I don't think that it is needed on the arm. */
15298 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15300 machine_mode mode;
15301 rtx cc_reg;
15302 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15304 /* We might have X as a constant, Y as a register because of the predicates
15305 used for cmpdi. If so, force X to a register here. */
15306 if (dimode_comparison && !REG_P (x))
15307 x = force_reg (DImode, x);
15309 mode = SELECT_CC_MODE (code, x, y);
15310 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15312 if (dimode_comparison
15313 && mode != CC_CZmode)
15315 rtx clobber, set;
15317 /* To compare two non-zero values for equality, XOR them and
15318 then compare against zero. Not used for ARM mode; there
15319 CC_CZmode is cheaper. */
15320 if (mode == CC_Zmode && y != const0_rtx)
15322 gcc_assert (!reload_completed);
15323 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15324 y = const0_rtx;
15327 /* A scratch register is required. */
15328 if (reload_completed)
15329 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15330 else
15331 scratch = gen_rtx_SCRATCH (SImode);
15333 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15334 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15335 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15337 else
15338 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15340 return cc_reg;
15343 /* Generate a sequence of insns that will generate the correct return
15344 address mask depending on the physical architecture that the program
15345 is running on. */
15347 arm_gen_return_addr_mask (void)
15349 rtx reg = gen_reg_rtx (Pmode);
15351 emit_insn (gen_return_addr_mask (reg));
15352 return reg;
15355 void
15356 arm_reload_in_hi (rtx *operands)
15358 rtx ref = operands[1];
15359 rtx base, scratch;
15360 HOST_WIDE_INT offset = 0;
15362 if (GET_CODE (ref) == SUBREG)
15364 offset = SUBREG_BYTE (ref);
15365 ref = SUBREG_REG (ref);
15368 if (REG_P (ref))
15370 /* We have a pseudo which has been spilt onto the stack; there
15371 are two cases here: the first where there is a simple
15372 stack-slot replacement and a second where the stack-slot is
15373 out of range, or is used as a subreg. */
15374 if (reg_equiv_mem (REGNO (ref)))
15376 ref = reg_equiv_mem (REGNO (ref));
15377 base = find_replacement (&XEXP (ref, 0));
15379 else
15380 /* The slot is out of range, or was dressed up in a SUBREG. */
15381 base = reg_equiv_address (REGNO (ref));
15383 else
15384 base = find_replacement (&XEXP (ref, 0));
15386 /* Handle the case where the address is too complex to be offset by 1. */
15387 if (GET_CODE (base) == MINUS
15388 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15390 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15392 emit_set_insn (base_plus, base);
15393 base = base_plus;
15395 else if (GET_CODE (base) == PLUS)
15397 /* The addend must be CONST_INT, or we would have dealt with it above. */
15398 HOST_WIDE_INT hi, lo;
15400 offset += INTVAL (XEXP (base, 1));
15401 base = XEXP (base, 0);
15403 /* Rework the address into a legal sequence of insns. */
15404 /* Valid range for lo is -4095 -> 4095 */
15405 lo = (offset >= 0
15406 ? (offset & 0xfff)
15407 : -((-offset) & 0xfff));
15409 /* Corner case, if lo is the max offset then we would be out of range
15410 once we have added the additional 1 below, so bump the msb into the
15411 pre-loading insn(s). */
15412 if (lo == 4095)
15413 lo &= 0x7ff;
15415 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15416 ^ (HOST_WIDE_INT) 0x80000000)
15417 - (HOST_WIDE_INT) 0x80000000);
15419 gcc_assert (hi + lo == offset);
15421 if (hi != 0)
15423 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15425 /* Get the base address; addsi3 knows how to handle constants
15426 that require more than one insn. */
15427 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15428 base = base_plus;
15429 offset = lo;
15433 /* Operands[2] may overlap operands[0] (though it won't overlap
15434 operands[1]), that's why we asked for a DImode reg -- so we can
15435 use the bit that does not overlap. */
15436 if (REGNO (operands[2]) == REGNO (operands[0]))
15437 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15438 else
15439 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15441 emit_insn (gen_zero_extendqisi2 (scratch,
15442 gen_rtx_MEM (QImode,
15443 plus_constant (Pmode, base,
15444 offset))));
15445 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15446 gen_rtx_MEM (QImode,
15447 plus_constant (Pmode, base,
15448 offset + 1))));
15449 if (!BYTES_BIG_ENDIAN)
15450 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15451 gen_rtx_IOR (SImode,
15452 gen_rtx_ASHIFT
15453 (SImode,
15454 gen_rtx_SUBREG (SImode, operands[0], 0),
15455 GEN_INT (8)),
15456 scratch));
15457 else
15458 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15459 gen_rtx_IOR (SImode,
15460 gen_rtx_ASHIFT (SImode, scratch,
15461 GEN_INT (8)),
15462 gen_rtx_SUBREG (SImode, operands[0], 0)));
15465 /* Handle storing a half-word to memory during reload by synthesizing as two
15466 byte stores. Take care not to clobber the input values until after we
15467 have moved them somewhere safe. This code assumes that if the DImode
15468 scratch in operands[2] overlaps either the input value or output address
15469 in some way, then that value must die in this insn (we absolutely need
15470 two scratch registers for some corner cases). */
15471 void
15472 arm_reload_out_hi (rtx *operands)
15474 rtx ref = operands[0];
15475 rtx outval = operands[1];
15476 rtx base, scratch;
15477 HOST_WIDE_INT offset = 0;
15479 if (GET_CODE (ref) == SUBREG)
15481 offset = SUBREG_BYTE (ref);
15482 ref = SUBREG_REG (ref);
15485 if (REG_P (ref))
15487 /* We have a pseudo which has been spilt onto the stack; there
15488 are two cases here: the first where there is a simple
15489 stack-slot replacement and a second where the stack-slot is
15490 out of range, or is used as a subreg. */
15491 if (reg_equiv_mem (REGNO (ref)))
15493 ref = reg_equiv_mem (REGNO (ref));
15494 base = find_replacement (&XEXP (ref, 0));
15496 else
15497 /* The slot is out of range, or was dressed up in a SUBREG. */
15498 base = reg_equiv_address (REGNO (ref));
15500 else
15501 base = find_replacement (&XEXP (ref, 0));
15503 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15505 /* Handle the case where the address is too complex to be offset by 1. */
15506 if (GET_CODE (base) == MINUS
15507 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15509 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15511 /* Be careful not to destroy OUTVAL. */
15512 if (reg_overlap_mentioned_p (base_plus, outval))
15514 /* Updating base_plus might destroy outval, see if we can
15515 swap the scratch and base_plus. */
15516 if (!reg_overlap_mentioned_p (scratch, outval))
15518 rtx tmp = scratch;
15519 scratch = base_plus;
15520 base_plus = tmp;
15522 else
15524 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15526 /* Be conservative and copy OUTVAL into the scratch now,
15527 this should only be necessary if outval is a subreg
15528 of something larger than a word. */
15529 /* XXX Might this clobber base? I can't see how it can,
15530 since scratch is known to overlap with OUTVAL, and
15531 must be wider than a word. */
15532 emit_insn (gen_movhi (scratch_hi, outval));
15533 outval = scratch_hi;
15537 emit_set_insn (base_plus, base);
15538 base = base_plus;
15540 else if (GET_CODE (base) == PLUS)
15542 /* The addend must be CONST_INT, or we would have dealt with it above. */
15543 HOST_WIDE_INT hi, lo;
15545 offset += INTVAL (XEXP (base, 1));
15546 base = XEXP (base, 0);
15548 /* Rework the address into a legal sequence of insns. */
15549 /* Valid range for lo is -4095 -> 4095 */
15550 lo = (offset >= 0
15551 ? (offset & 0xfff)
15552 : -((-offset) & 0xfff));
15554 /* Corner case, if lo is the max offset then we would be out of range
15555 once we have added the additional 1 below, so bump the msb into the
15556 pre-loading insn(s). */
15557 if (lo == 4095)
15558 lo &= 0x7ff;
15560 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15561 ^ (HOST_WIDE_INT) 0x80000000)
15562 - (HOST_WIDE_INT) 0x80000000);
15564 gcc_assert (hi + lo == offset);
15566 if (hi != 0)
15568 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15570 /* Be careful not to destroy OUTVAL. */
15571 if (reg_overlap_mentioned_p (base_plus, outval))
15573 /* Updating base_plus might destroy outval, see if we
15574 can swap the scratch and base_plus. */
15575 if (!reg_overlap_mentioned_p (scratch, outval))
15577 rtx tmp = scratch;
15578 scratch = base_plus;
15579 base_plus = tmp;
15581 else
15583 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15585 /* Be conservative and copy outval into scratch now,
15586 this should only be necessary if outval is a
15587 subreg of something larger than a word. */
15588 /* XXX Might this clobber base? I can't see how it
15589 can, since scratch is known to overlap with
15590 outval. */
15591 emit_insn (gen_movhi (scratch_hi, outval));
15592 outval = scratch_hi;
15596 /* Get the base address; addsi3 knows how to handle constants
15597 that require more than one insn. */
15598 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15599 base = base_plus;
15600 offset = lo;
15604 if (BYTES_BIG_ENDIAN)
15606 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15607 plus_constant (Pmode, base,
15608 offset + 1)),
15609 gen_lowpart (QImode, outval)));
15610 emit_insn (gen_lshrsi3 (scratch,
15611 gen_rtx_SUBREG (SImode, outval, 0),
15612 GEN_INT (8)));
15613 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15614 offset)),
15615 gen_lowpart (QImode, scratch)));
15617 else
15619 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15620 offset)),
15621 gen_lowpart (QImode, outval)));
15622 emit_insn (gen_lshrsi3 (scratch,
15623 gen_rtx_SUBREG (SImode, outval, 0),
15624 GEN_INT (8)));
15625 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15626 plus_constant (Pmode, base,
15627 offset + 1)),
15628 gen_lowpart (QImode, scratch)));
15632 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15633 (padded to the size of a word) should be passed in a register. */
15635 static bool
15636 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15638 if (TARGET_AAPCS_BASED)
15639 return must_pass_in_stack_var_size (mode, type);
15640 else
15641 return must_pass_in_stack_var_size_or_pad (mode, type);
15645 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15646 Return true if an argument passed on the stack should be padded upwards,
15647 i.e. if the least-significant byte has useful data.
15648 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15649 aggregate types are placed in the lowest memory address. */
15651 bool
15652 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15654 if (!TARGET_AAPCS_BASED)
15655 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15657 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15658 return false;
15660 return true;
15664 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15665 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15666 register has useful data, and return the opposite if the most
15667 significant byte does. */
15669 bool
15670 arm_pad_reg_upward (machine_mode mode,
15671 tree type, int first ATTRIBUTE_UNUSED)
15673 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15675 /* For AAPCS, small aggregates, small fixed-point types,
15676 and small complex types are always padded upwards. */
15677 if (type)
15679 if ((AGGREGATE_TYPE_P (type)
15680 || TREE_CODE (type) == COMPLEX_TYPE
15681 || FIXED_POINT_TYPE_P (type))
15682 && int_size_in_bytes (type) <= 4)
15683 return true;
15685 else
15687 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15688 && GET_MODE_SIZE (mode) <= 4)
15689 return true;
15693 /* Otherwise, use default padding. */
15694 return !BYTES_BIG_ENDIAN;
15697 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15698 assuming that the address in the base register is word aligned. */
15699 bool
15700 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15702 HOST_WIDE_INT max_offset;
15704 /* Offset must be a multiple of 4 in Thumb mode. */
15705 if (TARGET_THUMB2 && ((offset & 3) != 0))
15706 return false;
15708 if (TARGET_THUMB2)
15709 max_offset = 1020;
15710 else if (TARGET_ARM)
15711 max_offset = 255;
15712 else
15713 return false;
15715 return ((offset <= max_offset) && (offset >= -max_offset));
15718 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15719 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15720 Assumes that the address in the base register RN is word aligned. Pattern
15721 guarantees that both memory accesses use the same base register,
15722 the offsets are constants within the range, and the gap between the offsets is 4.
15723 If preload complete then check that registers are legal. WBACK indicates whether
15724 address is updated. LOAD indicates whether memory access is load or store. */
15725 bool
15726 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15727 bool wback, bool load)
15729 unsigned int t, t2, n;
15731 if (!reload_completed)
15732 return true;
15734 if (!offset_ok_for_ldrd_strd (offset))
15735 return false;
15737 t = REGNO (rt);
15738 t2 = REGNO (rt2);
15739 n = REGNO (rn);
15741 if ((TARGET_THUMB2)
15742 && ((wback && (n == t || n == t2))
15743 || (t == SP_REGNUM)
15744 || (t == PC_REGNUM)
15745 || (t2 == SP_REGNUM)
15746 || (t2 == PC_REGNUM)
15747 || (!load && (n == PC_REGNUM))
15748 || (load && (t == t2))
15749 /* Triggers Cortex-M3 LDRD errata. */
15750 || (!wback && load && fix_cm3_ldrd && (n == t))))
15751 return false;
15753 if ((TARGET_ARM)
15754 && ((wback && (n == t || n == t2))
15755 || (t2 == PC_REGNUM)
15756 || (t % 2 != 0) /* First destination register is not even. */
15757 || (t2 != t + 1)
15758 /* PC can be used as base register (for offset addressing only),
15759 but it is depricated. */
15760 || (n == PC_REGNUM)))
15761 return false;
15763 return true;
15766 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15767 operand MEM's address contains an immediate offset from the base
15768 register and has no side effects, in which case it sets BASE and
15769 OFFSET accordingly. */
15770 static bool
15771 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15773 rtx addr;
15775 gcc_assert (base != NULL && offset != NULL);
15777 /* TODO: Handle more general memory operand patterns, such as
15778 PRE_DEC and PRE_INC. */
15780 if (side_effects_p (mem))
15781 return false;
15783 /* Can't deal with subregs. */
15784 if (GET_CODE (mem) == SUBREG)
15785 return false;
15787 gcc_assert (MEM_P (mem));
15789 *offset = const0_rtx;
15791 addr = XEXP (mem, 0);
15793 /* If addr isn't valid for DImode, then we can't handle it. */
15794 if (!arm_legitimate_address_p (DImode, addr,
15795 reload_in_progress || reload_completed))
15796 return false;
15798 if (REG_P (addr))
15800 *base = addr;
15801 return true;
15803 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15805 *base = XEXP (addr, 0);
15806 *offset = XEXP (addr, 1);
15807 return (REG_P (*base) && CONST_INT_P (*offset));
15810 return false;
15813 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15815 /* Called from a peephole2 to replace two word-size accesses with a
15816 single LDRD/STRD instruction. Returns true iff we can generate a
15817 new instruction sequence. That is, both accesses use the same base
15818 register and the gap between constant offsets is 4. This function
15819 may reorder its operands to match ldrd/strd RTL templates.
15820 OPERANDS are the operands found by the peephole matcher;
15821 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15822 corresponding memory operands. LOAD indicaates whether the access
15823 is load or store. CONST_STORE indicates a store of constant
15824 integer values held in OPERANDS[4,5] and assumes that the pattern
15825 is of length 4 insn, for the purpose of checking dead registers.
15826 COMMUTE indicates that register operands may be reordered. */
15827 bool
15828 gen_operands_ldrd_strd (rtx *operands, bool load,
15829 bool const_store, bool commute)
15831 int nops = 2;
15832 HOST_WIDE_INT offsets[2], offset;
15833 rtx base = NULL_RTX;
15834 rtx cur_base, cur_offset, tmp;
15835 int i, gap;
15836 HARD_REG_SET regset;
15838 gcc_assert (!const_store || !load);
15839 /* Check that the memory references are immediate offsets from the
15840 same base register. Extract the base register, the destination
15841 registers, and the corresponding memory offsets. */
15842 for (i = 0; i < nops; i++)
15844 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15845 return false;
15847 if (i == 0)
15848 base = cur_base;
15849 else if (REGNO (base) != REGNO (cur_base))
15850 return false;
15852 offsets[i] = INTVAL (cur_offset);
15853 if (GET_CODE (operands[i]) == SUBREG)
15855 tmp = SUBREG_REG (operands[i]);
15856 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15857 operands[i] = tmp;
15861 /* Make sure there is no dependency between the individual loads. */
15862 if (load && REGNO (operands[0]) == REGNO (base))
15863 return false; /* RAW */
15865 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15866 return false; /* WAW */
15868 /* If the same input register is used in both stores
15869 when storing different constants, try to find a free register.
15870 For example, the code
15871 mov r0, 0
15872 str r0, [r2]
15873 mov r0, 1
15874 str r0, [r2, #4]
15875 can be transformed into
15876 mov r1, 0
15877 strd r1, r0, [r2]
15878 in Thumb mode assuming that r1 is free. */
15879 if (const_store
15880 && REGNO (operands[0]) == REGNO (operands[1])
15881 && INTVAL (operands[4]) != INTVAL (operands[5]))
15883 if (TARGET_THUMB2)
15885 CLEAR_HARD_REG_SET (regset);
15886 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15887 if (tmp == NULL_RTX)
15888 return false;
15890 /* Use the new register in the first load to ensure that
15891 if the original input register is not dead after peephole,
15892 then it will have the correct constant value. */
15893 operands[0] = tmp;
15895 else if (TARGET_ARM)
15897 return false;
15898 int regno = REGNO (operands[0]);
15899 if (!peep2_reg_dead_p (4, operands[0]))
15901 /* When the input register is even and is not dead after the
15902 pattern, it has to hold the second constant but we cannot
15903 form a legal STRD in ARM mode with this register as the second
15904 register. */
15905 if (regno % 2 == 0)
15906 return false;
15908 /* Is regno-1 free? */
15909 SET_HARD_REG_SET (regset);
15910 CLEAR_HARD_REG_BIT(regset, regno - 1);
15911 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15912 if (tmp == NULL_RTX)
15913 return false;
15915 operands[0] = tmp;
15917 else
15919 /* Find a DImode register. */
15920 CLEAR_HARD_REG_SET (regset);
15921 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15922 if (tmp != NULL_RTX)
15924 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15925 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15927 else
15929 /* Can we use the input register to form a DI register? */
15930 SET_HARD_REG_SET (regset);
15931 CLEAR_HARD_REG_BIT(regset,
15932 regno % 2 == 0 ? regno + 1 : regno - 1);
15933 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15934 if (tmp == NULL_RTX)
15935 return false;
15936 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15940 gcc_assert (operands[0] != NULL_RTX);
15941 gcc_assert (operands[1] != NULL_RTX);
15942 gcc_assert (REGNO (operands[0]) % 2 == 0);
15943 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15947 /* Make sure the instructions are ordered with lower memory access first. */
15948 if (offsets[0] > offsets[1])
15950 gap = offsets[0] - offsets[1];
15951 offset = offsets[1];
15953 /* Swap the instructions such that lower memory is accessed first. */
15954 SWAP_RTX (operands[0], operands[1]);
15955 SWAP_RTX (operands[2], operands[3]);
15956 if (const_store)
15957 SWAP_RTX (operands[4], operands[5]);
15959 else
15961 gap = offsets[1] - offsets[0];
15962 offset = offsets[0];
15965 /* Make sure accesses are to consecutive memory locations. */
15966 if (gap != 4)
15967 return false;
15969 /* Make sure we generate legal instructions. */
15970 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15971 false, load))
15972 return true;
15974 /* In Thumb state, where registers are almost unconstrained, there
15975 is little hope to fix it. */
15976 if (TARGET_THUMB2)
15977 return false;
15979 if (load && commute)
15981 /* Try reordering registers. */
15982 SWAP_RTX (operands[0], operands[1]);
15983 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15984 false, load))
15985 return true;
15988 if (const_store)
15990 /* If input registers are dead after this pattern, they can be
15991 reordered or replaced by other registers that are free in the
15992 current pattern. */
15993 if (!peep2_reg_dead_p (4, operands[0])
15994 || !peep2_reg_dead_p (4, operands[1]))
15995 return false;
15997 /* Try to reorder the input registers. */
15998 /* For example, the code
15999 mov r0, 0
16000 mov r1, 1
16001 str r1, [r2]
16002 str r0, [r2, #4]
16003 can be transformed into
16004 mov r1, 0
16005 mov r0, 1
16006 strd r0, [r2]
16008 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16009 false, false))
16011 SWAP_RTX (operands[0], operands[1]);
16012 return true;
16015 /* Try to find a free DI register. */
16016 CLEAR_HARD_REG_SET (regset);
16017 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16018 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16019 while (true)
16021 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16022 if (tmp == NULL_RTX)
16023 return false;
16025 /* DREG must be an even-numbered register in DImode.
16026 Split it into SI registers. */
16027 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16028 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16029 gcc_assert (operands[0] != NULL_RTX);
16030 gcc_assert (operands[1] != NULL_RTX);
16031 gcc_assert (REGNO (operands[0]) % 2 == 0);
16032 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16034 return (operands_ok_ldrd_strd (operands[0], operands[1],
16035 base, offset,
16036 false, load));
16040 return false;
16042 #undef SWAP_RTX
16047 /* Print a symbolic form of X to the debug file, F. */
16048 static void
16049 arm_print_value (FILE *f, rtx x)
16051 switch (GET_CODE (x))
16053 case CONST_INT:
16054 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16055 return;
16057 case CONST_DOUBLE:
16058 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16059 return;
16061 case CONST_VECTOR:
16063 int i;
16065 fprintf (f, "<");
16066 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16068 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16069 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16070 fputc (',', f);
16072 fprintf (f, ">");
16074 return;
16076 case CONST_STRING:
16077 fprintf (f, "\"%s\"", XSTR (x, 0));
16078 return;
16080 case SYMBOL_REF:
16081 fprintf (f, "`%s'", XSTR (x, 0));
16082 return;
16084 case LABEL_REF:
16085 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16086 return;
16088 case CONST:
16089 arm_print_value (f, XEXP (x, 0));
16090 return;
16092 case PLUS:
16093 arm_print_value (f, XEXP (x, 0));
16094 fprintf (f, "+");
16095 arm_print_value (f, XEXP (x, 1));
16096 return;
16098 case PC:
16099 fprintf (f, "pc");
16100 return;
16102 default:
16103 fprintf (f, "????");
16104 return;
16108 /* Routines for manipulation of the constant pool. */
16110 /* Arm instructions cannot load a large constant directly into a
16111 register; they have to come from a pc relative load. The constant
16112 must therefore be placed in the addressable range of the pc
16113 relative load. Depending on the precise pc relative load
16114 instruction the range is somewhere between 256 bytes and 4k. This
16115 means that we often have to dump a constant inside a function, and
16116 generate code to branch around it.
16118 It is important to minimize this, since the branches will slow
16119 things down and make the code larger.
16121 Normally we can hide the table after an existing unconditional
16122 branch so that there is no interruption of the flow, but in the
16123 worst case the code looks like this:
16125 ldr rn, L1
16127 b L2
16128 align
16129 L1: .long value
16133 ldr rn, L3
16135 b L4
16136 align
16137 L3: .long value
16141 We fix this by performing a scan after scheduling, which notices
16142 which instructions need to have their operands fetched from the
16143 constant table and builds the table.
16145 The algorithm starts by building a table of all the constants that
16146 need fixing up and all the natural barriers in the function (places
16147 where a constant table can be dropped without breaking the flow).
16148 For each fixup we note how far the pc-relative replacement will be
16149 able to reach and the offset of the instruction into the function.
16151 Having built the table we then group the fixes together to form
16152 tables that are as large as possible (subject to addressing
16153 constraints) and emit each table of constants after the last
16154 barrier that is within range of all the instructions in the group.
16155 If a group does not contain a barrier, then we forcibly create one
16156 by inserting a jump instruction into the flow. Once the table has
16157 been inserted, the insns are then modified to reference the
16158 relevant entry in the pool.
16160 Possible enhancements to the algorithm (not implemented) are:
16162 1) For some processors and object formats, there may be benefit in
16163 aligning the pools to the start of cache lines; this alignment
16164 would need to be taken into account when calculating addressability
16165 of a pool. */
16167 /* These typedefs are located at the start of this file, so that
16168 they can be used in the prototypes there. This comment is to
16169 remind readers of that fact so that the following structures
16170 can be understood more easily.
16172 typedef struct minipool_node Mnode;
16173 typedef struct minipool_fixup Mfix; */
16175 struct minipool_node
16177 /* Doubly linked chain of entries. */
16178 Mnode * next;
16179 Mnode * prev;
16180 /* The maximum offset into the code that this entry can be placed. While
16181 pushing fixes for forward references, all entries are sorted in order
16182 of increasing max_address. */
16183 HOST_WIDE_INT max_address;
16184 /* Similarly for an entry inserted for a backwards ref. */
16185 HOST_WIDE_INT min_address;
16186 /* The number of fixes referencing this entry. This can become zero
16187 if we "unpush" an entry. In this case we ignore the entry when we
16188 come to emit the code. */
16189 int refcount;
16190 /* The offset from the start of the minipool. */
16191 HOST_WIDE_INT offset;
16192 /* The value in table. */
16193 rtx value;
16194 /* The mode of value. */
16195 machine_mode mode;
16196 /* The size of the value. With iWMMXt enabled
16197 sizes > 4 also imply an alignment of 8-bytes. */
16198 int fix_size;
16201 struct minipool_fixup
16203 Mfix * next;
16204 rtx_insn * insn;
16205 HOST_WIDE_INT address;
16206 rtx * loc;
16207 machine_mode mode;
16208 int fix_size;
16209 rtx value;
16210 Mnode * minipool;
16211 HOST_WIDE_INT forwards;
16212 HOST_WIDE_INT backwards;
16215 /* Fixes less than a word need padding out to a word boundary. */
16216 #define MINIPOOL_FIX_SIZE(mode) \
16217 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16219 static Mnode * minipool_vector_head;
16220 static Mnode * minipool_vector_tail;
16221 static rtx_code_label *minipool_vector_label;
16222 static int minipool_pad;
16224 /* The linked list of all minipool fixes required for this function. */
16225 Mfix * minipool_fix_head;
16226 Mfix * minipool_fix_tail;
16227 /* The fix entry for the current minipool, once it has been placed. */
16228 Mfix * minipool_barrier;
16230 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16231 #define JUMP_TABLES_IN_TEXT_SECTION 0
16232 #endif
16234 static HOST_WIDE_INT
16235 get_jump_table_size (rtx_jump_table_data *insn)
16237 /* ADDR_VECs only take room if read-only data does into the text
16238 section. */
16239 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16241 rtx body = PATTERN (insn);
16242 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16243 HOST_WIDE_INT size;
16244 HOST_WIDE_INT modesize;
16246 modesize = GET_MODE_SIZE (GET_MODE (body));
16247 size = modesize * XVECLEN (body, elt);
16248 switch (modesize)
16250 case 1:
16251 /* Round up size of TBB table to a halfword boundary. */
16252 size = (size + 1) & ~(HOST_WIDE_INT)1;
16253 break;
16254 case 2:
16255 /* No padding necessary for TBH. */
16256 break;
16257 case 4:
16258 /* Add two bytes for alignment on Thumb. */
16259 if (TARGET_THUMB)
16260 size += 2;
16261 break;
16262 default:
16263 gcc_unreachable ();
16265 return size;
16268 return 0;
16271 /* Return the maximum amount of padding that will be inserted before
16272 label LABEL. */
16274 static HOST_WIDE_INT
16275 get_label_padding (rtx label)
16277 HOST_WIDE_INT align, min_insn_size;
16279 align = 1 << label_to_alignment (label);
16280 min_insn_size = TARGET_THUMB ? 2 : 4;
16281 return align > min_insn_size ? align - min_insn_size : 0;
16284 /* Move a minipool fix MP from its current location to before MAX_MP.
16285 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16286 constraints may need updating. */
16287 static Mnode *
16288 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16289 HOST_WIDE_INT max_address)
16291 /* The code below assumes these are different. */
16292 gcc_assert (mp != max_mp);
16294 if (max_mp == NULL)
16296 if (max_address < mp->max_address)
16297 mp->max_address = max_address;
16299 else
16301 if (max_address > max_mp->max_address - mp->fix_size)
16302 mp->max_address = max_mp->max_address - mp->fix_size;
16303 else
16304 mp->max_address = max_address;
16306 /* Unlink MP from its current position. Since max_mp is non-null,
16307 mp->prev must be non-null. */
16308 mp->prev->next = mp->next;
16309 if (mp->next != NULL)
16310 mp->next->prev = mp->prev;
16311 else
16312 minipool_vector_tail = mp->prev;
16314 /* Re-insert it before MAX_MP. */
16315 mp->next = max_mp;
16316 mp->prev = max_mp->prev;
16317 max_mp->prev = mp;
16319 if (mp->prev != NULL)
16320 mp->prev->next = mp;
16321 else
16322 minipool_vector_head = mp;
16325 /* Save the new entry. */
16326 max_mp = mp;
16328 /* Scan over the preceding entries and adjust their addresses as
16329 required. */
16330 while (mp->prev != NULL
16331 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16333 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16334 mp = mp->prev;
16337 return max_mp;
16340 /* Add a constant to the minipool for a forward reference. Returns the
16341 node added or NULL if the constant will not fit in this pool. */
16342 static Mnode *
16343 add_minipool_forward_ref (Mfix *fix)
16345 /* If set, max_mp is the first pool_entry that has a lower
16346 constraint than the one we are trying to add. */
16347 Mnode * max_mp = NULL;
16348 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16349 Mnode * mp;
16351 /* If the minipool starts before the end of FIX->INSN then this FIX
16352 can not be placed into the current pool. Furthermore, adding the
16353 new constant pool entry may cause the pool to start FIX_SIZE bytes
16354 earlier. */
16355 if (minipool_vector_head &&
16356 (fix->address + get_attr_length (fix->insn)
16357 >= minipool_vector_head->max_address - fix->fix_size))
16358 return NULL;
16360 /* Scan the pool to see if a constant with the same value has
16361 already been added. While we are doing this, also note the
16362 location where we must insert the constant if it doesn't already
16363 exist. */
16364 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16366 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16367 && fix->mode == mp->mode
16368 && (!LABEL_P (fix->value)
16369 || (CODE_LABEL_NUMBER (fix->value)
16370 == CODE_LABEL_NUMBER (mp->value)))
16371 && rtx_equal_p (fix->value, mp->value))
16373 /* More than one fix references this entry. */
16374 mp->refcount++;
16375 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16378 /* Note the insertion point if necessary. */
16379 if (max_mp == NULL
16380 && mp->max_address > max_address)
16381 max_mp = mp;
16383 /* If we are inserting an 8-bytes aligned quantity and
16384 we have not already found an insertion point, then
16385 make sure that all such 8-byte aligned quantities are
16386 placed at the start of the pool. */
16387 if (ARM_DOUBLEWORD_ALIGN
16388 && max_mp == NULL
16389 && fix->fix_size >= 8
16390 && mp->fix_size < 8)
16392 max_mp = mp;
16393 max_address = mp->max_address;
16397 /* The value is not currently in the minipool, so we need to create
16398 a new entry for it. If MAX_MP is NULL, the entry will be put on
16399 the end of the list since the placement is less constrained than
16400 any existing entry. Otherwise, we insert the new fix before
16401 MAX_MP and, if necessary, adjust the constraints on the other
16402 entries. */
16403 mp = XNEW (Mnode);
16404 mp->fix_size = fix->fix_size;
16405 mp->mode = fix->mode;
16406 mp->value = fix->value;
16407 mp->refcount = 1;
16408 /* Not yet required for a backwards ref. */
16409 mp->min_address = -65536;
16411 if (max_mp == NULL)
16413 mp->max_address = max_address;
16414 mp->next = NULL;
16415 mp->prev = minipool_vector_tail;
16417 if (mp->prev == NULL)
16419 minipool_vector_head = mp;
16420 minipool_vector_label = gen_label_rtx ();
16422 else
16423 mp->prev->next = mp;
16425 minipool_vector_tail = mp;
16427 else
16429 if (max_address > max_mp->max_address - mp->fix_size)
16430 mp->max_address = max_mp->max_address - mp->fix_size;
16431 else
16432 mp->max_address = max_address;
16434 mp->next = max_mp;
16435 mp->prev = max_mp->prev;
16436 max_mp->prev = mp;
16437 if (mp->prev != NULL)
16438 mp->prev->next = mp;
16439 else
16440 minipool_vector_head = mp;
16443 /* Save the new entry. */
16444 max_mp = mp;
16446 /* Scan over the preceding entries and adjust their addresses as
16447 required. */
16448 while (mp->prev != NULL
16449 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16451 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16452 mp = mp->prev;
16455 return max_mp;
16458 static Mnode *
16459 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16460 HOST_WIDE_INT min_address)
16462 HOST_WIDE_INT offset;
16464 /* The code below assumes these are different. */
16465 gcc_assert (mp != min_mp);
16467 if (min_mp == NULL)
16469 if (min_address > mp->min_address)
16470 mp->min_address = min_address;
16472 else
16474 /* We will adjust this below if it is too loose. */
16475 mp->min_address = min_address;
16477 /* Unlink MP from its current position. Since min_mp is non-null,
16478 mp->next must be non-null. */
16479 mp->next->prev = mp->prev;
16480 if (mp->prev != NULL)
16481 mp->prev->next = mp->next;
16482 else
16483 minipool_vector_head = mp->next;
16485 /* Reinsert it after MIN_MP. */
16486 mp->prev = min_mp;
16487 mp->next = min_mp->next;
16488 min_mp->next = mp;
16489 if (mp->next != NULL)
16490 mp->next->prev = mp;
16491 else
16492 minipool_vector_tail = mp;
16495 min_mp = mp;
16497 offset = 0;
16498 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16500 mp->offset = offset;
16501 if (mp->refcount > 0)
16502 offset += mp->fix_size;
16504 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16505 mp->next->min_address = mp->min_address + mp->fix_size;
16508 return min_mp;
16511 /* Add a constant to the minipool for a backward reference. Returns the
16512 node added or NULL if the constant will not fit in this pool.
16514 Note that the code for insertion for a backwards reference can be
16515 somewhat confusing because the calculated offsets for each fix do
16516 not take into account the size of the pool (which is still under
16517 construction. */
16518 static Mnode *
16519 add_minipool_backward_ref (Mfix *fix)
16521 /* If set, min_mp is the last pool_entry that has a lower constraint
16522 than the one we are trying to add. */
16523 Mnode *min_mp = NULL;
16524 /* This can be negative, since it is only a constraint. */
16525 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16526 Mnode *mp;
16528 /* If we can't reach the current pool from this insn, or if we can't
16529 insert this entry at the end of the pool without pushing other
16530 fixes out of range, then we don't try. This ensures that we
16531 can't fail later on. */
16532 if (min_address >= minipool_barrier->address
16533 || (minipool_vector_tail->min_address + fix->fix_size
16534 >= minipool_barrier->address))
16535 return NULL;
16537 /* Scan the pool to see if a constant with the same value has
16538 already been added. While we are doing this, also note the
16539 location where we must insert the constant if it doesn't already
16540 exist. */
16541 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16543 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16544 && fix->mode == mp->mode
16545 && (!LABEL_P (fix->value)
16546 || (CODE_LABEL_NUMBER (fix->value)
16547 == CODE_LABEL_NUMBER (mp->value)))
16548 && rtx_equal_p (fix->value, mp->value)
16549 /* Check that there is enough slack to move this entry to the
16550 end of the table (this is conservative). */
16551 && (mp->max_address
16552 > (minipool_barrier->address
16553 + minipool_vector_tail->offset
16554 + minipool_vector_tail->fix_size)))
16556 mp->refcount++;
16557 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16560 if (min_mp != NULL)
16561 mp->min_address += fix->fix_size;
16562 else
16564 /* Note the insertion point if necessary. */
16565 if (mp->min_address < min_address)
16567 /* For now, we do not allow the insertion of 8-byte alignment
16568 requiring nodes anywhere but at the start of the pool. */
16569 if (ARM_DOUBLEWORD_ALIGN
16570 && fix->fix_size >= 8 && mp->fix_size < 8)
16571 return NULL;
16572 else
16573 min_mp = mp;
16575 else if (mp->max_address
16576 < minipool_barrier->address + mp->offset + fix->fix_size)
16578 /* Inserting before this entry would push the fix beyond
16579 its maximum address (which can happen if we have
16580 re-located a forwards fix); force the new fix to come
16581 after it. */
16582 if (ARM_DOUBLEWORD_ALIGN
16583 && fix->fix_size >= 8 && mp->fix_size < 8)
16584 return NULL;
16585 else
16587 min_mp = mp;
16588 min_address = mp->min_address + fix->fix_size;
16591 /* Do not insert a non-8-byte aligned quantity before 8-byte
16592 aligned quantities. */
16593 else if (ARM_DOUBLEWORD_ALIGN
16594 && fix->fix_size < 8
16595 && mp->fix_size >= 8)
16597 min_mp = mp;
16598 min_address = mp->min_address + fix->fix_size;
16603 /* We need to create a new entry. */
16604 mp = XNEW (Mnode);
16605 mp->fix_size = fix->fix_size;
16606 mp->mode = fix->mode;
16607 mp->value = fix->value;
16608 mp->refcount = 1;
16609 mp->max_address = minipool_barrier->address + 65536;
16611 mp->min_address = min_address;
16613 if (min_mp == NULL)
16615 mp->prev = NULL;
16616 mp->next = minipool_vector_head;
16618 if (mp->next == NULL)
16620 minipool_vector_tail = mp;
16621 minipool_vector_label = gen_label_rtx ();
16623 else
16624 mp->next->prev = mp;
16626 minipool_vector_head = mp;
16628 else
16630 mp->next = min_mp->next;
16631 mp->prev = min_mp;
16632 min_mp->next = mp;
16634 if (mp->next != NULL)
16635 mp->next->prev = mp;
16636 else
16637 minipool_vector_tail = mp;
16640 /* Save the new entry. */
16641 min_mp = mp;
16643 if (mp->prev)
16644 mp = mp->prev;
16645 else
16646 mp->offset = 0;
16648 /* Scan over the following entries and adjust their offsets. */
16649 while (mp->next != NULL)
16651 if (mp->next->min_address < mp->min_address + mp->fix_size)
16652 mp->next->min_address = mp->min_address + mp->fix_size;
16654 if (mp->refcount)
16655 mp->next->offset = mp->offset + mp->fix_size;
16656 else
16657 mp->next->offset = mp->offset;
16659 mp = mp->next;
16662 return min_mp;
16665 static void
16666 assign_minipool_offsets (Mfix *barrier)
16668 HOST_WIDE_INT offset = 0;
16669 Mnode *mp;
16671 minipool_barrier = barrier;
16673 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16675 mp->offset = offset;
16677 if (mp->refcount > 0)
16678 offset += mp->fix_size;
16682 /* Output the literal table */
16683 static void
16684 dump_minipool (rtx_insn *scan)
16686 Mnode * mp;
16687 Mnode * nmp;
16688 int align64 = 0;
16690 if (ARM_DOUBLEWORD_ALIGN)
16691 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16692 if (mp->refcount > 0 && mp->fix_size >= 8)
16694 align64 = 1;
16695 break;
16698 if (dump_file)
16699 fprintf (dump_file,
16700 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16701 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16703 scan = emit_label_after (gen_label_rtx (), scan);
16704 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16705 scan = emit_label_after (minipool_vector_label, scan);
16707 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16709 if (mp->refcount > 0)
16711 if (dump_file)
16713 fprintf (dump_file,
16714 ";; Offset %u, min %ld, max %ld ",
16715 (unsigned) mp->offset, (unsigned long) mp->min_address,
16716 (unsigned long) mp->max_address);
16717 arm_print_value (dump_file, mp->value);
16718 fputc ('\n', dump_file);
16721 switch (mp->fix_size)
16723 #ifdef HAVE_consttable_1
16724 case 1:
16725 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16726 break;
16728 #endif
16729 #ifdef HAVE_consttable_2
16730 case 2:
16731 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16732 break;
16734 #endif
16735 #ifdef HAVE_consttable_4
16736 case 4:
16737 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16738 break;
16740 #endif
16741 #ifdef HAVE_consttable_8
16742 case 8:
16743 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16744 break;
16746 #endif
16747 #ifdef HAVE_consttable_16
16748 case 16:
16749 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16750 break;
16752 #endif
16753 default:
16754 gcc_unreachable ();
16758 nmp = mp->next;
16759 free (mp);
16762 minipool_vector_head = minipool_vector_tail = NULL;
16763 scan = emit_insn_after (gen_consttable_end (), scan);
16764 scan = emit_barrier_after (scan);
16767 /* Return the cost of forcibly inserting a barrier after INSN. */
16768 static int
16769 arm_barrier_cost (rtx insn)
16771 /* Basing the location of the pool on the loop depth is preferable,
16772 but at the moment, the basic block information seems to be
16773 corrupt by this stage of the compilation. */
16774 int base_cost = 50;
16775 rtx next = next_nonnote_insn (insn);
16777 if (next != NULL && LABEL_P (next))
16778 base_cost -= 20;
16780 switch (GET_CODE (insn))
16782 case CODE_LABEL:
16783 /* It will always be better to place the table before the label, rather
16784 than after it. */
16785 return 50;
16787 case INSN:
16788 case CALL_INSN:
16789 return base_cost;
16791 case JUMP_INSN:
16792 return base_cost - 10;
16794 default:
16795 return base_cost + 10;
16799 /* Find the best place in the insn stream in the range
16800 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16801 Create the barrier by inserting a jump and add a new fix entry for
16802 it. */
16803 static Mfix *
16804 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16806 HOST_WIDE_INT count = 0;
16807 rtx_barrier *barrier;
16808 rtx_insn *from = fix->insn;
16809 /* The instruction after which we will insert the jump. */
16810 rtx_insn *selected = NULL;
16811 int selected_cost;
16812 /* The address at which the jump instruction will be placed. */
16813 HOST_WIDE_INT selected_address;
16814 Mfix * new_fix;
16815 HOST_WIDE_INT max_count = max_address - fix->address;
16816 rtx_code_label *label = gen_label_rtx ();
16818 selected_cost = arm_barrier_cost (from);
16819 selected_address = fix->address;
16821 while (from && count < max_count)
16823 rtx_jump_table_data *tmp;
16824 int new_cost;
16826 /* This code shouldn't have been called if there was a natural barrier
16827 within range. */
16828 gcc_assert (!BARRIER_P (from));
16830 /* Count the length of this insn. This must stay in sync with the
16831 code that pushes minipool fixes. */
16832 if (LABEL_P (from))
16833 count += get_label_padding (from);
16834 else
16835 count += get_attr_length (from);
16837 /* If there is a jump table, add its length. */
16838 if (tablejump_p (from, NULL, &tmp))
16840 count += get_jump_table_size (tmp);
16842 /* Jump tables aren't in a basic block, so base the cost on
16843 the dispatch insn. If we select this location, we will
16844 still put the pool after the table. */
16845 new_cost = arm_barrier_cost (from);
16847 if (count < max_count
16848 && (!selected || new_cost <= selected_cost))
16850 selected = tmp;
16851 selected_cost = new_cost;
16852 selected_address = fix->address + count;
16855 /* Continue after the dispatch table. */
16856 from = NEXT_INSN (tmp);
16857 continue;
16860 new_cost = arm_barrier_cost (from);
16862 if (count < max_count
16863 && (!selected || new_cost <= selected_cost))
16865 selected = from;
16866 selected_cost = new_cost;
16867 selected_address = fix->address + count;
16870 from = NEXT_INSN (from);
16873 /* Make sure that we found a place to insert the jump. */
16874 gcc_assert (selected);
16876 /* Make sure we do not split a call and its corresponding
16877 CALL_ARG_LOCATION note. */
16878 if (CALL_P (selected))
16880 rtx_insn *next = NEXT_INSN (selected);
16881 if (next && NOTE_P (next)
16882 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16883 selected = next;
16886 /* Create a new JUMP_INSN that branches around a barrier. */
16887 from = emit_jump_insn_after (gen_jump (label), selected);
16888 JUMP_LABEL (from) = label;
16889 barrier = emit_barrier_after (from);
16890 emit_label_after (label, barrier);
16892 /* Create a minipool barrier entry for the new barrier. */
16893 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16894 new_fix->insn = barrier;
16895 new_fix->address = selected_address;
16896 new_fix->next = fix->next;
16897 fix->next = new_fix;
16899 return new_fix;
16902 /* Record that there is a natural barrier in the insn stream at
16903 ADDRESS. */
16904 static void
16905 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16907 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16909 fix->insn = insn;
16910 fix->address = address;
16912 fix->next = NULL;
16913 if (minipool_fix_head != NULL)
16914 minipool_fix_tail->next = fix;
16915 else
16916 minipool_fix_head = fix;
16918 minipool_fix_tail = fix;
16921 /* Record INSN, which will need fixing up to load a value from the
16922 minipool. ADDRESS is the offset of the insn since the start of the
16923 function; LOC is a pointer to the part of the insn which requires
16924 fixing; VALUE is the constant that must be loaded, which is of type
16925 MODE. */
16926 static void
16927 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16928 machine_mode mode, rtx value)
16930 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16932 fix->insn = insn;
16933 fix->address = address;
16934 fix->loc = loc;
16935 fix->mode = mode;
16936 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16937 fix->value = value;
16938 fix->forwards = get_attr_pool_range (insn);
16939 fix->backwards = get_attr_neg_pool_range (insn);
16940 fix->minipool = NULL;
16942 /* If an insn doesn't have a range defined for it, then it isn't
16943 expecting to be reworked by this code. Better to stop now than
16944 to generate duff assembly code. */
16945 gcc_assert (fix->forwards || fix->backwards);
16947 /* If an entry requires 8-byte alignment then assume all constant pools
16948 require 4 bytes of padding. Trying to do this later on a per-pool
16949 basis is awkward because existing pool entries have to be modified. */
16950 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16951 minipool_pad = 4;
16953 if (dump_file)
16955 fprintf (dump_file,
16956 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16957 GET_MODE_NAME (mode),
16958 INSN_UID (insn), (unsigned long) address,
16959 -1 * (long)fix->backwards, (long)fix->forwards);
16960 arm_print_value (dump_file, fix->value);
16961 fprintf (dump_file, "\n");
16964 /* Add it to the chain of fixes. */
16965 fix->next = NULL;
16967 if (minipool_fix_head != NULL)
16968 minipool_fix_tail->next = fix;
16969 else
16970 minipool_fix_head = fix;
16972 minipool_fix_tail = fix;
16975 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16976 Returns the number of insns needed, or 99 if we always want to synthesize
16977 the value. */
16979 arm_max_const_double_inline_cost ()
16981 /* Let the value get synthesized to avoid the use of literal pools. */
16982 if (arm_disable_literal_pool)
16983 return 99;
16985 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16988 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16989 Returns the number of insns needed, or 99 if we don't know how to
16990 do it. */
16992 arm_const_double_inline_cost (rtx val)
16994 rtx lowpart, highpart;
16995 machine_mode mode;
16997 mode = GET_MODE (val);
16999 if (mode == VOIDmode)
17000 mode = DImode;
17002 gcc_assert (GET_MODE_SIZE (mode) == 8);
17004 lowpart = gen_lowpart (SImode, val);
17005 highpart = gen_highpart_mode (SImode, mode, val);
17007 gcc_assert (CONST_INT_P (lowpart));
17008 gcc_assert (CONST_INT_P (highpart));
17010 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17011 NULL_RTX, NULL_RTX, 0, 0)
17012 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17013 NULL_RTX, NULL_RTX, 0, 0));
17016 /* Cost of loading a SImode constant. */
17017 static inline int
17018 arm_const_inline_cost (enum rtx_code code, rtx val)
17020 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17021 NULL_RTX, NULL_RTX, 1, 0);
17024 /* Return true if it is worthwhile to split a 64-bit constant into two
17025 32-bit operations. This is the case if optimizing for size, or
17026 if we have load delay slots, or if one 32-bit part can be done with
17027 a single data operation. */
17028 bool
17029 arm_const_double_by_parts (rtx val)
17031 machine_mode mode = GET_MODE (val);
17032 rtx part;
17034 if (optimize_size || arm_ld_sched)
17035 return true;
17037 if (mode == VOIDmode)
17038 mode = DImode;
17040 part = gen_highpart_mode (SImode, mode, val);
17042 gcc_assert (CONST_INT_P (part));
17044 if (const_ok_for_arm (INTVAL (part))
17045 || const_ok_for_arm (~INTVAL (part)))
17046 return true;
17048 part = gen_lowpart (SImode, val);
17050 gcc_assert (CONST_INT_P (part));
17052 if (const_ok_for_arm (INTVAL (part))
17053 || const_ok_for_arm (~INTVAL (part)))
17054 return true;
17056 return false;
17059 /* Return true if it is possible to inline both the high and low parts
17060 of a 64-bit constant into 32-bit data processing instructions. */
17061 bool
17062 arm_const_double_by_immediates (rtx val)
17064 machine_mode mode = GET_MODE (val);
17065 rtx part;
17067 if (mode == VOIDmode)
17068 mode = DImode;
17070 part = gen_highpart_mode (SImode, mode, val);
17072 gcc_assert (CONST_INT_P (part));
17074 if (!const_ok_for_arm (INTVAL (part)))
17075 return false;
17077 part = gen_lowpart (SImode, val);
17079 gcc_assert (CONST_INT_P (part));
17081 if (!const_ok_for_arm (INTVAL (part)))
17082 return false;
17084 return true;
17087 /* Scan INSN and note any of its operands that need fixing.
17088 If DO_PUSHES is false we do not actually push any of the fixups
17089 needed. */
17090 static void
17091 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17093 int opno;
17095 extract_constrain_insn (insn);
17097 if (recog_data.n_alternatives == 0)
17098 return;
17100 /* Fill in recog_op_alt with information about the constraints of
17101 this insn. */
17102 preprocess_constraints (insn);
17104 const operand_alternative *op_alt = which_op_alt ();
17105 for (opno = 0; opno < recog_data.n_operands; opno++)
17107 /* Things we need to fix can only occur in inputs. */
17108 if (recog_data.operand_type[opno] != OP_IN)
17109 continue;
17111 /* If this alternative is a memory reference, then any mention
17112 of constants in this alternative is really to fool reload
17113 into allowing us to accept one there. We need to fix them up
17114 now so that we output the right code. */
17115 if (op_alt[opno].memory_ok)
17117 rtx op = recog_data.operand[opno];
17119 if (CONSTANT_P (op))
17121 if (do_pushes)
17122 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17123 recog_data.operand_mode[opno], op);
17125 else if (MEM_P (op)
17126 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17127 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17129 if (do_pushes)
17131 rtx cop = avoid_constant_pool_reference (op);
17133 /* Casting the address of something to a mode narrower
17134 than a word can cause avoid_constant_pool_reference()
17135 to return the pool reference itself. That's no good to
17136 us here. Lets just hope that we can use the
17137 constant pool value directly. */
17138 if (op == cop)
17139 cop = get_pool_constant (XEXP (op, 0));
17141 push_minipool_fix (insn, address,
17142 recog_data.operand_loc[opno],
17143 recog_data.operand_mode[opno], cop);
17150 return;
17153 /* Rewrite move insn into subtract of 0 if the condition codes will
17154 be useful in next conditional jump insn. */
17156 static void
17157 thumb1_reorg (void)
17159 basic_block bb;
17161 FOR_EACH_BB_FN (bb, cfun)
17163 rtx dest, src;
17164 rtx pat, op0, set = NULL;
17165 rtx_insn *prev, *insn = BB_END (bb);
17166 bool insn_clobbered = false;
17168 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17169 insn = PREV_INSN (insn);
17171 /* Find the last cbranchsi4_insn in basic block BB. */
17172 if (insn == BB_HEAD (bb)
17173 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17174 continue;
17176 /* Get the register with which we are comparing. */
17177 pat = PATTERN (insn);
17178 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17180 /* Find the first flag setting insn before INSN in basic block BB. */
17181 gcc_assert (insn != BB_HEAD (bb));
17182 for (prev = PREV_INSN (insn);
17183 (!insn_clobbered
17184 && prev != BB_HEAD (bb)
17185 && (NOTE_P (prev)
17186 || DEBUG_INSN_P (prev)
17187 || ((set = single_set (prev)) != NULL
17188 && get_attr_conds (prev) == CONDS_NOCOND)));
17189 prev = PREV_INSN (prev))
17191 if (reg_set_p (op0, prev))
17192 insn_clobbered = true;
17195 /* Skip if op0 is clobbered by insn other than prev. */
17196 if (insn_clobbered)
17197 continue;
17199 if (!set)
17200 continue;
17202 dest = SET_DEST (set);
17203 src = SET_SRC (set);
17204 if (!low_register_operand (dest, SImode)
17205 || !low_register_operand (src, SImode))
17206 continue;
17208 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17209 in INSN. Both src and dest of the move insn are checked. */
17210 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17212 dest = copy_rtx (dest);
17213 src = copy_rtx (src);
17214 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17215 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17216 INSN_CODE (prev) = -1;
17217 /* Set test register in INSN to dest. */
17218 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17219 INSN_CODE (insn) = -1;
17224 /* Convert instructions to their cc-clobbering variant if possible, since
17225 that allows us to use smaller encodings. */
17227 static void
17228 thumb2_reorg (void)
17230 basic_block bb;
17231 regset_head live;
17233 INIT_REG_SET (&live);
17235 /* We are freeing block_for_insn in the toplev to keep compatibility
17236 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17237 compute_bb_for_insn ();
17238 df_analyze ();
17240 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17242 FOR_EACH_BB_FN (bb, cfun)
17244 if (current_tune->disparage_flag_setting_t16_encodings
17245 && optimize_bb_for_speed_p (bb))
17246 continue;
17248 rtx_insn *insn;
17249 Convert_Action action = SKIP;
17250 Convert_Action action_for_partial_flag_setting
17251 = (current_tune->disparage_partial_flag_setting_t16_encodings
17252 && optimize_bb_for_speed_p (bb))
17253 ? SKIP : CONV;
17255 COPY_REG_SET (&live, DF_LR_OUT (bb));
17256 df_simulate_initialize_backwards (bb, &live);
17257 FOR_BB_INSNS_REVERSE (bb, insn)
17259 if (NONJUMP_INSN_P (insn)
17260 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17261 && GET_CODE (PATTERN (insn)) == SET)
17263 action = SKIP;
17264 rtx pat = PATTERN (insn);
17265 rtx dst = XEXP (pat, 0);
17266 rtx src = XEXP (pat, 1);
17267 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17269 if (!OBJECT_P (src))
17270 op0 = XEXP (src, 0);
17272 if (BINARY_P (src))
17273 op1 = XEXP (src, 1);
17275 if (low_register_operand (dst, SImode))
17277 switch (GET_CODE (src))
17279 case PLUS:
17280 /* Adding two registers and storing the result
17281 in the first source is already a 16-bit
17282 operation. */
17283 if (rtx_equal_p (dst, op0)
17284 && register_operand (op1, SImode))
17285 break;
17287 if (low_register_operand (op0, SImode))
17289 /* ADDS <Rd>,<Rn>,<Rm> */
17290 if (low_register_operand (op1, SImode))
17291 action = CONV;
17292 /* ADDS <Rdn>,#<imm8> */
17293 /* SUBS <Rdn>,#<imm8> */
17294 else if (rtx_equal_p (dst, op0)
17295 && CONST_INT_P (op1)
17296 && IN_RANGE (INTVAL (op1), -255, 255))
17297 action = CONV;
17298 /* ADDS <Rd>,<Rn>,#<imm3> */
17299 /* SUBS <Rd>,<Rn>,#<imm3> */
17300 else if (CONST_INT_P (op1)
17301 && IN_RANGE (INTVAL (op1), -7, 7))
17302 action = CONV;
17304 /* ADCS <Rd>, <Rn> */
17305 else if (GET_CODE (XEXP (src, 0)) == PLUS
17306 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17307 && low_register_operand (XEXP (XEXP (src, 0), 1),
17308 SImode)
17309 && COMPARISON_P (op1)
17310 && cc_register (XEXP (op1, 0), VOIDmode)
17311 && maybe_get_arm_condition_code (op1) == ARM_CS
17312 && XEXP (op1, 1) == const0_rtx)
17313 action = CONV;
17314 break;
17316 case MINUS:
17317 /* RSBS <Rd>,<Rn>,#0
17318 Not handled here: see NEG below. */
17319 /* SUBS <Rd>,<Rn>,#<imm3>
17320 SUBS <Rdn>,#<imm8>
17321 Not handled here: see PLUS above. */
17322 /* SUBS <Rd>,<Rn>,<Rm> */
17323 if (low_register_operand (op0, SImode)
17324 && low_register_operand (op1, SImode))
17325 action = CONV;
17326 break;
17328 case MULT:
17329 /* MULS <Rdm>,<Rn>,<Rdm>
17330 As an exception to the rule, this is only used
17331 when optimizing for size since MULS is slow on all
17332 known implementations. We do not even want to use
17333 MULS in cold code, if optimizing for speed, so we
17334 test the global flag here. */
17335 if (!optimize_size)
17336 break;
17337 /* else fall through. */
17338 case AND:
17339 case IOR:
17340 case XOR:
17341 /* ANDS <Rdn>,<Rm> */
17342 if (rtx_equal_p (dst, op0)
17343 && low_register_operand (op1, SImode))
17344 action = action_for_partial_flag_setting;
17345 else if (rtx_equal_p (dst, op1)
17346 && low_register_operand (op0, SImode))
17347 action = action_for_partial_flag_setting == SKIP
17348 ? SKIP : SWAP_CONV;
17349 break;
17351 case ASHIFTRT:
17352 case ASHIFT:
17353 case LSHIFTRT:
17354 /* ASRS <Rdn>,<Rm> */
17355 /* LSRS <Rdn>,<Rm> */
17356 /* LSLS <Rdn>,<Rm> */
17357 if (rtx_equal_p (dst, op0)
17358 && low_register_operand (op1, SImode))
17359 action = action_for_partial_flag_setting;
17360 /* ASRS <Rd>,<Rm>,#<imm5> */
17361 /* LSRS <Rd>,<Rm>,#<imm5> */
17362 /* LSLS <Rd>,<Rm>,#<imm5> */
17363 else if (low_register_operand (op0, SImode)
17364 && CONST_INT_P (op1)
17365 && IN_RANGE (INTVAL (op1), 0, 31))
17366 action = action_for_partial_flag_setting;
17367 break;
17369 case ROTATERT:
17370 /* RORS <Rdn>,<Rm> */
17371 if (rtx_equal_p (dst, op0)
17372 && low_register_operand (op1, SImode))
17373 action = action_for_partial_flag_setting;
17374 break;
17376 case NOT:
17377 /* MVNS <Rd>,<Rm> */
17378 if (low_register_operand (op0, SImode))
17379 action = action_for_partial_flag_setting;
17380 break;
17382 case NEG:
17383 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17384 if (low_register_operand (op0, SImode))
17385 action = CONV;
17386 break;
17388 case CONST_INT:
17389 /* MOVS <Rd>,#<imm8> */
17390 if (CONST_INT_P (src)
17391 && IN_RANGE (INTVAL (src), 0, 255))
17392 action = action_for_partial_flag_setting;
17393 break;
17395 case REG:
17396 /* MOVS and MOV<c> with registers have different
17397 encodings, so are not relevant here. */
17398 break;
17400 default:
17401 break;
17405 if (action != SKIP)
17407 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17408 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17409 rtvec vec;
17411 if (action == SWAP_CONV)
17413 src = copy_rtx (src);
17414 XEXP (src, 0) = op1;
17415 XEXP (src, 1) = op0;
17416 pat = gen_rtx_SET (VOIDmode, dst, src);
17417 vec = gen_rtvec (2, pat, clobber);
17419 else /* action == CONV */
17420 vec = gen_rtvec (2, pat, clobber);
17422 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17423 INSN_CODE (insn) = -1;
17427 if (NONDEBUG_INSN_P (insn))
17428 df_simulate_one_insn_backwards (bb, insn, &live);
17432 CLEAR_REG_SET (&live);
17435 /* Gcc puts the pool in the wrong place for ARM, since we can only
17436 load addresses a limited distance around the pc. We do some
17437 special munging to move the constant pool values to the correct
17438 point in the code. */
17439 static void
17440 arm_reorg (void)
17442 rtx_insn *insn;
17443 HOST_WIDE_INT address = 0;
17444 Mfix * fix;
17446 if (TARGET_THUMB1)
17447 thumb1_reorg ();
17448 else if (TARGET_THUMB2)
17449 thumb2_reorg ();
17451 /* Ensure all insns that must be split have been split at this point.
17452 Otherwise, the pool placement code below may compute incorrect
17453 insn lengths. Note that when optimizing, all insns have already
17454 been split at this point. */
17455 if (!optimize)
17456 split_all_insns_noflow ();
17458 minipool_fix_head = minipool_fix_tail = NULL;
17460 /* The first insn must always be a note, or the code below won't
17461 scan it properly. */
17462 insn = get_insns ();
17463 gcc_assert (NOTE_P (insn));
17464 minipool_pad = 0;
17466 /* Scan all the insns and record the operands that will need fixing. */
17467 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17469 if (BARRIER_P (insn))
17470 push_minipool_barrier (insn, address);
17471 else if (INSN_P (insn))
17473 rtx_jump_table_data *table;
17475 note_invalid_constants (insn, address, true);
17476 address += get_attr_length (insn);
17478 /* If the insn is a vector jump, add the size of the table
17479 and skip the table. */
17480 if (tablejump_p (insn, NULL, &table))
17482 address += get_jump_table_size (table);
17483 insn = table;
17486 else if (LABEL_P (insn))
17487 /* Add the worst-case padding due to alignment. We don't add
17488 the _current_ padding because the minipool insertions
17489 themselves might change it. */
17490 address += get_label_padding (insn);
17493 fix = minipool_fix_head;
17495 /* Now scan the fixups and perform the required changes. */
17496 while (fix)
17498 Mfix * ftmp;
17499 Mfix * fdel;
17500 Mfix * last_added_fix;
17501 Mfix * last_barrier = NULL;
17502 Mfix * this_fix;
17504 /* Skip any further barriers before the next fix. */
17505 while (fix && BARRIER_P (fix->insn))
17506 fix = fix->next;
17508 /* No more fixes. */
17509 if (fix == NULL)
17510 break;
17512 last_added_fix = NULL;
17514 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17516 if (BARRIER_P (ftmp->insn))
17518 if (ftmp->address >= minipool_vector_head->max_address)
17519 break;
17521 last_barrier = ftmp;
17523 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17524 break;
17526 last_added_fix = ftmp; /* Keep track of the last fix added. */
17529 /* If we found a barrier, drop back to that; any fixes that we
17530 could have reached but come after the barrier will now go in
17531 the next mini-pool. */
17532 if (last_barrier != NULL)
17534 /* Reduce the refcount for those fixes that won't go into this
17535 pool after all. */
17536 for (fdel = last_barrier->next;
17537 fdel && fdel != ftmp;
17538 fdel = fdel->next)
17540 fdel->minipool->refcount--;
17541 fdel->minipool = NULL;
17544 ftmp = last_barrier;
17546 else
17548 /* ftmp is first fix that we can't fit into this pool and
17549 there no natural barriers that we could use. Insert a
17550 new barrier in the code somewhere between the previous
17551 fix and this one, and arrange to jump around it. */
17552 HOST_WIDE_INT max_address;
17554 /* The last item on the list of fixes must be a barrier, so
17555 we can never run off the end of the list of fixes without
17556 last_barrier being set. */
17557 gcc_assert (ftmp);
17559 max_address = minipool_vector_head->max_address;
17560 /* Check that there isn't another fix that is in range that
17561 we couldn't fit into this pool because the pool was
17562 already too large: we need to put the pool before such an
17563 instruction. The pool itself may come just after the
17564 fix because create_fix_barrier also allows space for a
17565 jump instruction. */
17566 if (ftmp->address < max_address)
17567 max_address = ftmp->address + 1;
17569 last_barrier = create_fix_barrier (last_added_fix, max_address);
17572 assign_minipool_offsets (last_barrier);
17574 while (ftmp)
17576 if (!BARRIER_P (ftmp->insn)
17577 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17578 == NULL))
17579 break;
17581 ftmp = ftmp->next;
17584 /* Scan over the fixes we have identified for this pool, fixing them
17585 up and adding the constants to the pool itself. */
17586 for (this_fix = fix; this_fix && ftmp != this_fix;
17587 this_fix = this_fix->next)
17588 if (!BARRIER_P (this_fix->insn))
17590 rtx addr
17591 = plus_constant (Pmode,
17592 gen_rtx_LABEL_REF (VOIDmode,
17593 minipool_vector_label),
17594 this_fix->minipool->offset);
17595 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17598 dump_minipool (last_barrier->insn);
17599 fix = ftmp;
17602 /* From now on we must synthesize any constants that we can't handle
17603 directly. This can happen if the RTL gets split during final
17604 instruction generation. */
17605 cfun->machine->after_arm_reorg = 1;
17607 /* Free the minipool memory. */
17608 obstack_free (&minipool_obstack, minipool_startobj);
17611 /* Routines to output assembly language. */
17613 /* Return string representation of passed in real value. */
17614 static const char *
17615 fp_const_from_val (REAL_VALUE_TYPE *r)
17617 if (!fp_consts_inited)
17618 init_fp_table ();
17620 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17621 return "0";
17624 /* OPERANDS[0] is the entire list of insns that constitute pop,
17625 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17626 is in the list, UPDATE is true iff the list contains explicit
17627 update of base register. */
17628 void
17629 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17630 bool update)
17632 int i;
17633 char pattern[100];
17634 int offset;
17635 const char *conditional;
17636 int num_saves = XVECLEN (operands[0], 0);
17637 unsigned int regno;
17638 unsigned int regno_base = REGNO (operands[1]);
17640 offset = 0;
17641 offset += update ? 1 : 0;
17642 offset += return_pc ? 1 : 0;
17644 /* Is the base register in the list? */
17645 for (i = offset; i < num_saves; i++)
17647 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17648 /* If SP is in the list, then the base register must be SP. */
17649 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17650 /* If base register is in the list, there must be no explicit update. */
17651 if (regno == regno_base)
17652 gcc_assert (!update);
17655 conditional = reverse ? "%?%D0" : "%?%d0";
17656 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17658 /* Output pop (not stmfd) because it has a shorter encoding. */
17659 gcc_assert (update);
17660 sprintf (pattern, "pop%s\t{", conditional);
17662 else
17664 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17665 It's just a convention, their semantics are identical. */
17666 if (regno_base == SP_REGNUM)
17667 sprintf (pattern, "ldm%sfd\t", conditional);
17668 else if (TARGET_UNIFIED_ASM)
17669 sprintf (pattern, "ldmia%s\t", conditional);
17670 else
17671 sprintf (pattern, "ldm%sia\t", conditional);
17673 strcat (pattern, reg_names[regno_base]);
17674 if (update)
17675 strcat (pattern, "!, {");
17676 else
17677 strcat (pattern, ", {");
17680 /* Output the first destination register. */
17681 strcat (pattern,
17682 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17684 /* Output the rest of the destination registers. */
17685 for (i = offset + 1; i < num_saves; i++)
17687 strcat (pattern, ", ");
17688 strcat (pattern,
17689 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17692 strcat (pattern, "}");
17694 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17695 strcat (pattern, "^");
17697 output_asm_insn (pattern, &cond);
17701 /* Output the assembly for a store multiple. */
17703 const char *
17704 vfp_output_vstmd (rtx * operands)
17706 char pattern[100];
17707 int p;
17708 int base;
17709 int i;
17710 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17711 ? XEXP (operands[0], 0)
17712 : XEXP (XEXP (operands[0], 0), 0);
17713 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17715 if (push_p)
17716 strcpy (pattern, "vpush%?.64\t{%P1");
17717 else
17718 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17720 p = strlen (pattern);
17722 gcc_assert (REG_P (operands[1]));
17724 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17725 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17727 p += sprintf (&pattern[p], ", d%d", base + i);
17729 strcpy (&pattern[p], "}");
17731 output_asm_insn (pattern, operands);
17732 return "";
17736 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17737 number of bytes pushed. */
17739 static int
17740 vfp_emit_fstmd (int base_reg, int count)
17742 rtx par;
17743 rtx dwarf;
17744 rtx tmp, reg;
17745 int i;
17747 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17748 register pairs are stored by a store multiple insn. We avoid this
17749 by pushing an extra pair. */
17750 if (count == 2 && !arm_arch6)
17752 if (base_reg == LAST_VFP_REGNUM - 3)
17753 base_reg -= 2;
17754 count++;
17757 /* FSTMD may not store more than 16 doubleword registers at once. Split
17758 larger stores into multiple parts (up to a maximum of two, in
17759 practice). */
17760 if (count > 16)
17762 int saved;
17763 /* NOTE: base_reg is an internal register number, so each D register
17764 counts as 2. */
17765 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17766 saved += vfp_emit_fstmd (base_reg, 16);
17767 return saved;
17770 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17771 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17773 reg = gen_rtx_REG (DFmode, base_reg);
17774 base_reg += 2;
17776 XVECEXP (par, 0, 0)
17777 = gen_rtx_SET (VOIDmode,
17778 gen_frame_mem
17779 (BLKmode,
17780 gen_rtx_PRE_MODIFY (Pmode,
17781 stack_pointer_rtx,
17782 plus_constant
17783 (Pmode, stack_pointer_rtx,
17784 - (count * 8)))
17786 gen_rtx_UNSPEC (BLKmode,
17787 gen_rtvec (1, reg),
17788 UNSPEC_PUSH_MULT));
17790 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17791 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17792 RTX_FRAME_RELATED_P (tmp) = 1;
17793 XVECEXP (dwarf, 0, 0) = tmp;
17795 tmp = gen_rtx_SET (VOIDmode,
17796 gen_frame_mem (DFmode, stack_pointer_rtx),
17797 reg);
17798 RTX_FRAME_RELATED_P (tmp) = 1;
17799 XVECEXP (dwarf, 0, 1) = tmp;
17801 for (i = 1; i < count; i++)
17803 reg = gen_rtx_REG (DFmode, base_reg);
17804 base_reg += 2;
17805 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17807 tmp = gen_rtx_SET (VOIDmode,
17808 gen_frame_mem (DFmode,
17809 plus_constant (Pmode,
17810 stack_pointer_rtx,
17811 i * 8)),
17812 reg);
17813 RTX_FRAME_RELATED_P (tmp) = 1;
17814 XVECEXP (dwarf, 0, i + 1) = tmp;
17817 par = emit_insn (par);
17818 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17819 RTX_FRAME_RELATED_P (par) = 1;
17821 return count * 8;
17824 /* Emit a call instruction with pattern PAT. ADDR is the address of
17825 the call target. */
17827 void
17828 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17830 rtx insn;
17832 insn = emit_call_insn (pat);
17834 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17835 If the call might use such an entry, add a use of the PIC register
17836 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17837 if (TARGET_VXWORKS_RTP
17838 && flag_pic
17839 && !sibcall
17840 && GET_CODE (addr) == SYMBOL_REF
17841 && (SYMBOL_REF_DECL (addr)
17842 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17843 : !SYMBOL_REF_LOCAL_P (addr)))
17845 require_pic_register ();
17846 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17849 if (TARGET_AAPCS_BASED)
17851 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17852 linker. We need to add an IP clobber to allow setting
17853 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17854 is not needed since it's a fixed register. */
17855 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17856 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17860 /* Output a 'call' insn. */
17861 const char *
17862 output_call (rtx *operands)
17864 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17866 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17867 if (REGNO (operands[0]) == LR_REGNUM)
17869 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17870 output_asm_insn ("mov%?\t%0, %|lr", operands);
17873 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17875 if (TARGET_INTERWORK || arm_arch4t)
17876 output_asm_insn ("bx%?\t%0", operands);
17877 else
17878 output_asm_insn ("mov%?\t%|pc, %0", operands);
17880 return "";
17883 /* Output a 'call' insn that is a reference in memory. This is
17884 disabled for ARMv5 and we prefer a blx instead because otherwise
17885 there's a significant performance overhead. */
17886 const char *
17887 output_call_mem (rtx *operands)
17889 gcc_assert (!arm_arch5);
17890 if (TARGET_INTERWORK)
17892 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17893 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17894 output_asm_insn ("bx%?\t%|ip", operands);
17896 else if (regno_use_in (LR_REGNUM, operands[0]))
17898 /* LR is used in the memory address. We load the address in the
17899 first instruction. It's safe to use IP as the target of the
17900 load since the call will kill it anyway. */
17901 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17902 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17903 if (arm_arch4t)
17904 output_asm_insn ("bx%?\t%|ip", operands);
17905 else
17906 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17908 else
17910 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17911 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17914 return "";
17918 /* Output a move from arm registers to arm registers of a long double
17919 OPERANDS[0] is the destination.
17920 OPERANDS[1] is the source. */
17921 const char *
17922 output_mov_long_double_arm_from_arm (rtx *operands)
17924 /* We have to be careful here because the two might overlap. */
17925 int dest_start = REGNO (operands[0]);
17926 int src_start = REGNO (operands[1]);
17927 rtx ops[2];
17928 int i;
17930 if (dest_start < src_start)
17932 for (i = 0; i < 3; i++)
17934 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17935 ops[1] = gen_rtx_REG (SImode, src_start + i);
17936 output_asm_insn ("mov%?\t%0, %1", ops);
17939 else
17941 for (i = 2; i >= 0; i--)
17943 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17944 ops[1] = gen_rtx_REG (SImode, src_start + i);
17945 output_asm_insn ("mov%?\t%0, %1", ops);
17949 return "";
17952 void
17953 arm_emit_movpair (rtx dest, rtx src)
17955 /* If the src is an immediate, simplify it. */
17956 if (CONST_INT_P (src))
17958 HOST_WIDE_INT val = INTVAL (src);
17959 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17960 if ((val >> 16) & 0x0000ffff)
17961 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17962 GEN_INT (16)),
17963 GEN_INT ((val >> 16) & 0x0000ffff));
17964 return;
17966 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17967 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17970 /* Output a move between double words. It must be REG<-MEM
17971 or MEM<-REG. */
17972 const char *
17973 output_move_double (rtx *operands, bool emit, int *count)
17975 enum rtx_code code0 = GET_CODE (operands[0]);
17976 enum rtx_code code1 = GET_CODE (operands[1]);
17977 rtx otherops[3];
17978 if (count)
17979 *count = 1;
17981 /* The only case when this might happen is when
17982 you are looking at the length of a DImode instruction
17983 that has an invalid constant in it. */
17984 if (code0 == REG && code1 != MEM)
17986 gcc_assert (!emit);
17987 *count = 2;
17988 return "";
17991 if (code0 == REG)
17993 unsigned int reg0 = REGNO (operands[0]);
17995 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17997 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17999 switch (GET_CODE (XEXP (operands[1], 0)))
18001 case REG:
18003 if (emit)
18005 if (TARGET_LDRD
18006 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18007 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
18008 else
18009 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18011 break;
18013 case PRE_INC:
18014 gcc_assert (TARGET_LDRD);
18015 if (emit)
18016 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18017 break;
18019 case PRE_DEC:
18020 if (emit)
18022 if (TARGET_LDRD)
18023 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18024 else
18025 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18027 break;
18029 case POST_INC:
18030 if (emit)
18032 if (TARGET_LDRD)
18033 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18034 else
18035 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18037 break;
18039 case POST_DEC:
18040 gcc_assert (TARGET_LDRD);
18041 if (emit)
18042 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18043 break;
18045 case PRE_MODIFY:
18046 case POST_MODIFY:
18047 /* Autoicrement addressing modes should never have overlapping
18048 base and destination registers, and overlapping index registers
18049 are already prohibited, so this doesn't need to worry about
18050 fix_cm3_ldrd. */
18051 otherops[0] = operands[0];
18052 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18053 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18055 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18057 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18059 /* Registers overlap so split out the increment. */
18060 if (emit)
18062 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18063 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18065 if (count)
18066 *count = 2;
18068 else
18070 /* Use a single insn if we can.
18071 FIXME: IWMMXT allows offsets larger than ldrd can
18072 handle, fix these up with a pair of ldr. */
18073 if (TARGET_THUMB2
18074 || !CONST_INT_P (otherops[2])
18075 || (INTVAL (otherops[2]) > -256
18076 && INTVAL (otherops[2]) < 256))
18078 if (emit)
18079 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18081 else
18083 if (emit)
18085 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18086 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18088 if (count)
18089 *count = 2;
18094 else
18096 /* Use a single insn if we can.
18097 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18098 fix these up with a pair of ldr. */
18099 if (TARGET_THUMB2
18100 || !CONST_INT_P (otherops[2])
18101 || (INTVAL (otherops[2]) > -256
18102 && INTVAL (otherops[2]) < 256))
18104 if (emit)
18105 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18107 else
18109 if (emit)
18111 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18112 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18114 if (count)
18115 *count = 2;
18118 break;
18120 case LABEL_REF:
18121 case CONST:
18122 /* We might be able to use ldrd %0, %1 here. However the range is
18123 different to ldr/adr, and it is broken on some ARMv7-M
18124 implementations. */
18125 /* Use the second register of the pair to avoid problematic
18126 overlap. */
18127 otherops[1] = operands[1];
18128 if (emit)
18129 output_asm_insn ("adr%?\t%0, %1", otherops);
18130 operands[1] = otherops[0];
18131 if (emit)
18133 if (TARGET_LDRD)
18134 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18135 else
18136 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18139 if (count)
18140 *count = 2;
18141 break;
18143 /* ??? This needs checking for thumb2. */
18144 default:
18145 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18146 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18148 otherops[0] = operands[0];
18149 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18150 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18152 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18154 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18156 switch ((int) INTVAL (otherops[2]))
18158 case -8:
18159 if (emit)
18160 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18161 return "";
18162 case -4:
18163 if (TARGET_THUMB2)
18164 break;
18165 if (emit)
18166 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18167 return "";
18168 case 4:
18169 if (TARGET_THUMB2)
18170 break;
18171 if (emit)
18172 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18173 return "";
18176 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18177 operands[1] = otherops[0];
18178 if (TARGET_LDRD
18179 && (REG_P (otherops[2])
18180 || TARGET_THUMB2
18181 || (CONST_INT_P (otherops[2])
18182 && INTVAL (otherops[2]) > -256
18183 && INTVAL (otherops[2]) < 256)))
18185 if (reg_overlap_mentioned_p (operands[0],
18186 otherops[2]))
18188 rtx tmp;
18189 /* Swap base and index registers over to
18190 avoid a conflict. */
18191 tmp = otherops[1];
18192 otherops[1] = otherops[2];
18193 otherops[2] = tmp;
18195 /* If both registers conflict, it will usually
18196 have been fixed by a splitter. */
18197 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18198 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18200 if (emit)
18202 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18203 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18205 if (count)
18206 *count = 2;
18208 else
18210 otherops[0] = operands[0];
18211 if (emit)
18212 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18214 return "";
18217 if (CONST_INT_P (otherops[2]))
18219 if (emit)
18221 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18222 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18223 else
18224 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18227 else
18229 if (emit)
18230 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18233 else
18235 if (emit)
18236 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18239 if (count)
18240 *count = 2;
18242 if (TARGET_LDRD)
18243 return "ldr%(d%)\t%0, [%1]";
18245 return "ldm%(ia%)\t%1, %M0";
18247 else
18249 otherops[1] = adjust_address (operands[1], SImode, 4);
18250 /* Take care of overlapping base/data reg. */
18251 if (reg_mentioned_p (operands[0], operands[1]))
18253 if (emit)
18255 output_asm_insn ("ldr%?\t%0, %1", otherops);
18256 output_asm_insn ("ldr%?\t%0, %1", operands);
18258 if (count)
18259 *count = 2;
18262 else
18264 if (emit)
18266 output_asm_insn ("ldr%?\t%0, %1", operands);
18267 output_asm_insn ("ldr%?\t%0, %1", otherops);
18269 if (count)
18270 *count = 2;
18275 else
18277 /* Constraints should ensure this. */
18278 gcc_assert (code0 == MEM && code1 == REG);
18279 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18280 || (TARGET_ARM && TARGET_LDRD));
18282 switch (GET_CODE (XEXP (operands[0], 0)))
18284 case REG:
18285 if (emit)
18287 if (TARGET_LDRD)
18288 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18289 else
18290 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18292 break;
18294 case PRE_INC:
18295 gcc_assert (TARGET_LDRD);
18296 if (emit)
18297 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18298 break;
18300 case PRE_DEC:
18301 if (emit)
18303 if (TARGET_LDRD)
18304 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18305 else
18306 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18308 break;
18310 case POST_INC:
18311 if (emit)
18313 if (TARGET_LDRD)
18314 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18315 else
18316 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18318 break;
18320 case POST_DEC:
18321 gcc_assert (TARGET_LDRD);
18322 if (emit)
18323 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18324 break;
18326 case PRE_MODIFY:
18327 case POST_MODIFY:
18328 otherops[0] = operands[1];
18329 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18330 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18332 /* IWMMXT allows offsets larger than ldrd can handle,
18333 fix these up with a pair of ldr. */
18334 if (!TARGET_THUMB2
18335 && CONST_INT_P (otherops[2])
18336 && (INTVAL(otherops[2]) <= -256
18337 || INTVAL(otherops[2]) >= 256))
18339 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18341 if (emit)
18343 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18344 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18346 if (count)
18347 *count = 2;
18349 else
18351 if (emit)
18353 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18354 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18356 if (count)
18357 *count = 2;
18360 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18362 if (emit)
18363 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18365 else
18367 if (emit)
18368 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18370 break;
18372 case PLUS:
18373 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18374 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18376 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18378 case -8:
18379 if (emit)
18380 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18381 return "";
18383 case -4:
18384 if (TARGET_THUMB2)
18385 break;
18386 if (emit)
18387 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18388 return "";
18390 case 4:
18391 if (TARGET_THUMB2)
18392 break;
18393 if (emit)
18394 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18395 return "";
18398 if (TARGET_LDRD
18399 && (REG_P (otherops[2])
18400 || TARGET_THUMB2
18401 || (CONST_INT_P (otherops[2])
18402 && INTVAL (otherops[2]) > -256
18403 && INTVAL (otherops[2]) < 256)))
18405 otherops[0] = operands[1];
18406 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18407 if (emit)
18408 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18409 return "";
18411 /* Fall through */
18413 default:
18414 otherops[0] = adjust_address (operands[0], SImode, 4);
18415 otherops[1] = operands[1];
18416 if (emit)
18418 output_asm_insn ("str%?\t%1, %0", operands);
18419 output_asm_insn ("str%?\t%H1, %0", otherops);
18421 if (count)
18422 *count = 2;
18426 return "";
18429 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18430 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18432 const char *
18433 output_move_quad (rtx *operands)
18435 if (REG_P (operands[0]))
18437 /* Load, or reg->reg move. */
18439 if (MEM_P (operands[1]))
18441 switch (GET_CODE (XEXP (operands[1], 0)))
18443 case REG:
18444 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18445 break;
18447 case LABEL_REF:
18448 case CONST:
18449 output_asm_insn ("adr%?\t%0, %1", operands);
18450 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18451 break;
18453 default:
18454 gcc_unreachable ();
18457 else
18459 rtx ops[2];
18460 int dest, src, i;
18462 gcc_assert (REG_P (operands[1]));
18464 dest = REGNO (operands[0]);
18465 src = REGNO (operands[1]);
18467 /* This seems pretty dumb, but hopefully GCC won't try to do it
18468 very often. */
18469 if (dest < src)
18470 for (i = 0; i < 4; i++)
18472 ops[0] = gen_rtx_REG (SImode, dest + i);
18473 ops[1] = gen_rtx_REG (SImode, src + i);
18474 output_asm_insn ("mov%?\t%0, %1", ops);
18476 else
18477 for (i = 3; i >= 0; i--)
18479 ops[0] = gen_rtx_REG (SImode, dest + i);
18480 ops[1] = gen_rtx_REG (SImode, src + i);
18481 output_asm_insn ("mov%?\t%0, %1", ops);
18485 else
18487 gcc_assert (MEM_P (operands[0]));
18488 gcc_assert (REG_P (operands[1]));
18489 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18491 switch (GET_CODE (XEXP (operands[0], 0)))
18493 case REG:
18494 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18495 break;
18497 default:
18498 gcc_unreachable ();
18502 return "";
18505 /* Output a VFP load or store instruction. */
18507 const char *
18508 output_move_vfp (rtx *operands)
18510 rtx reg, mem, addr, ops[2];
18511 int load = REG_P (operands[0]);
18512 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18513 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18514 const char *templ;
18515 char buff[50];
18516 machine_mode mode;
18518 reg = operands[!load];
18519 mem = operands[load];
18521 mode = GET_MODE (reg);
18523 gcc_assert (REG_P (reg));
18524 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18525 gcc_assert (mode == SFmode
18526 || mode == DFmode
18527 || mode == SImode
18528 || mode == DImode
18529 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18530 gcc_assert (MEM_P (mem));
18532 addr = XEXP (mem, 0);
18534 switch (GET_CODE (addr))
18536 case PRE_DEC:
18537 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18538 ops[0] = XEXP (addr, 0);
18539 ops[1] = reg;
18540 break;
18542 case POST_INC:
18543 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18544 ops[0] = XEXP (addr, 0);
18545 ops[1] = reg;
18546 break;
18548 default:
18549 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18550 ops[0] = reg;
18551 ops[1] = mem;
18552 break;
18555 sprintf (buff, templ,
18556 load ? "ld" : "st",
18557 dp ? "64" : "32",
18558 dp ? "P" : "",
18559 integer_p ? "\t%@ int" : "");
18560 output_asm_insn (buff, ops);
18562 return "";
18565 /* Output a Neon double-word or quad-word load or store, or a load
18566 or store for larger structure modes.
18568 WARNING: The ordering of elements is weird in big-endian mode,
18569 because the EABI requires that vectors stored in memory appear
18570 as though they were stored by a VSTM, as required by the EABI.
18571 GCC RTL defines element ordering based on in-memory order.
18572 This can be different from the architectural ordering of elements
18573 within a NEON register. The intrinsics defined in arm_neon.h use the
18574 NEON register element ordering, not the GCC RTL element ordering.
18576 For example, the in-memory ordering of a big-endian a quadword
18577 vector with 16-bit elements when stored from register pair {d0,d1}
18578 will be (lowest address first, d0[N] is NEON register element N):
18580 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18582 When necessary, quadword registers (dN, dN+1) are moved to ARM
18583 registers from rN in the order:
18585 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18587 So that STM/LDM can be used on vectors in ARM registers, and the
18588 same memory layout will result as if VSTM/VLDM were used.
18590 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18591 possible, which allows use of appropriate alignment tags.
18592 Note that the choice of "64" is independent of the actual vector
18593 element size; this size simply ensures that the behavior is
18594 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18596 Due to limitations of those instructions, use of VST1.64/VLD1.64
18597 is not possible if:
18598 - the address contains PRE_DEC, or
18599 - the mode refers to more than 4 double-word registers
18601 In those cases, it would be possible to replace VSTM/VLDM by a
18602 sequence of instructions; this is not currently implemented since
18603 this is not certain to actually improve performance. */
18605 const char *
18606 output_move_neon (rtx *operands)
18608 rtx reg, mem, addr, ops[2];
18609 int regno, nregs, load = REG_P (operands[0]);
18610 const char *templ;
18611 char buff[50];
18612 machine_mode mode;
18614 reg = operands[!load];
18615 mem = operands[load];
18617 mode = GET_MODE (reg);
18619 gcc_assert (REG_P (reg));
18620 regno = REGNO (reg);
18621 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18622 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18623 || NEON_REGNO_OK_FOR_QUAD (regno));
18624 gcc_assert (VALID_NEON_DREG_MODE (mode)
18625 || VALID_NEON_QREG_MODE (mode)
18626 || VALID_NEON_STRUCT_MODE (mode));
18627 gcc_assert (MEM_P (mem));
18629 addr = XEXP (mem, 0);
18631 /* Strip off const from addresses like (const (plus (...))). */
18632 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18633 addr = XEXP (addr, 0);
18635 switch (GET_CODE (addr))
18637 case POST_INC:
18638 /* We have to use vldm / vstm for too-large modes. */
18639 if (nregs > 4)
18641 templ = "v%smia%%?\t%%0!, %%h1";
18642 ops[0] = XEXP (addr, 0);
18644 else
18646 templ = "v%s1.64\t%%h1, %%A0";
18647 ops[0] = mem;
18649 ops[1] = reg;
18650 break;
18652 case PRE_DEC:
18653 /* We have to use vldm / vstm in this case, since there is no
18654 pre-decrement form of the vld1 / vst1 instructions. */
18655 templ = "v%smdb%%?\t%%0!, %%h1";
18656 ops[0] = XEXP (addr, 0);
18657 ops[1] = reg;
18658 break;
18660 case POST_MODIFY:
18661 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18662 gcc_unreachable ();
18664 case REG:
18665 /* We have to use vldm / vstm for too-large modes. */
18666 if (nregs > 1)
18668 if (nregs > 4)
18669 templ = "v%smia%%?\t%%m0, %%h1";
18670 else
18671 templ = "v%s1.64\t%%h1, %%A0";
18673 ops[0] = mem;
18674 ops[1] = reg;
18675 break;
18677 /* Fall through. */
18678 case LABEL_REF:
18679 case PLUS:
18681 int i;
18682 int overlap = -1;
18683 for (i = 0; i < nregs; i++)
18685 /* We're only using DImode here because it's a convenient size. */
18686 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18687 ops[1] = adjust_address (mem, DImode, 8 * i);
18688 if (reg_overlap_mentioned_p (ops[0], mem))
18690 gcc_assert (overlap == -1);
18691 overlap = i;
18693 else
18695 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18696 output_asm_insn (buff, ops);
18699 if (overlap != -1)
18701 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18702 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18703 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18704 output_asm_insn (buff, ops);
18707 return "";
18710 default:
18711 gcc_unreachable ();
18714 sprintf (buff, templ, load ? "ld" : "st");
18715 output_asm_insn (buff, ops);
18717 return "";
18720 /* Compute and return the length of neon_mov<mode>, where <mode> is
18721 one of VSTRUCT modes: EI, OI, CI or XI. */
18723 arm_attr_length_move_neon (rtx_insn *insn)
18725 rtx reg, mem, addr;
18726 int load;
18727 machine_mode mode;
18729 extract_insn_cached (insn);
18731 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18733 mode = GET_MODE (recog_data.operand[0]);
18734 switch (mode)
18736 case EImode:
18737 case OImode:
18738 return 8;
18739 case CImode:
18740 return 12;
18741 case XImode:
18742 return 16;
18743 default:
18744 gcc_unreachable ();
18748 load = REG_P (recog_data.operand[0]);
18749 reg = recog_data.operand[!load];
18750 mem = recog_data.operand[load];
18752 gcc_assert (MEM_P (mem));
18754 mode = GET_MODE (reg);
18755 addr = XEXP (mem, 0);
18757 /* Strip off const from addresses like (const (plus (...))). */
18758 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18759 addr = XEXP (addr, 0);
18761 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18763 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18764 return insns * 4;
18766 else
18767 return 4;
18770 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18771 return zero. */
18774 arm_address_offset_is_imm (rtx_insn *insn)
18776 rtx mem, addr;
18778 extract_insn_cached (insn);
18780 if (REG_P (recog_data.operand[0]))
18781 return 0;
18783 mem = recog_data.operand[0];
18785 gcc_assert (MEM_P (mem));
18787 addr = XEXP (mem, 0);
18789 if (REG_P (addr)
18790 || (GET_CODE (addr) == PLUS
18791 && REG_P (XEXP (addr, 0))
18792 && CONST_INT_P (XEXP (addr, 1))))
18793 return 1;
18794 else
18795 return 0;
18798 /* Output an ADD r, s, #n where n may be too big for one instruction.
18799 If adding zero to one register, output nothing. */
18800 const char *
18801 output_add_immediate (rtx *operands)
18803 HOST_WIDE_INT n = INTVAL (operands[2]);
18805 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18807 if (n < 0)
18808 output_multi_immediate (operands,
18809 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18810 -n);
18811 else
18812 output_multi_immediate (operands,
18813 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18817 return "";
18820 /* Output a multiple immediate operation.
18821 OPERANDS is the vector of operands referred to in the output patterns.
18822 INSTR1 is the output pattern to use for the first constant.
18823 INSTR2 is the output pattern to use for subsequent constants.
18824 IMMED_OP is the index of the constant slot in OPERANDS.
18825 N is the constant value. */
18826 static const char *
18827 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18828 int immed_op, HOST_WIDE_INT n)
18830 #if HOST_BITS_PER_WIDE_INT > 32
18831 n &= 0xffffffff;
18832 #endif
18834 if (n == 0)
18836 /* Quick and easy output. */
18837 operands[immed_op] = const0_rtx;
18838 output_asm_insn (instr1, operands);
18840 else
18842 int i;
18843 const char * instr = instr1;
18845 /* Note that n is never zero here (which would give no output). */
18846 for (i = 0; i < 32; i += 2)
18848 if (n & (3 << i))
18850 operands[immed_op] = GEN_INT (n & (255 << i));
18851 output_asm_insn (instr, operands);
18852 instr = instr2;
18853 i += 6;
18858 return "";
18861 /* Return the name of a shifter operation. */
18862 static const char *
18863 arm_shift_nmem(enum rtx_code code)
18865 switch (code)
18867 case ASHIFT:
18868 return ARM_LSL_NAME;
18870 case ASHIFTRT:
18871 return "asr";
18873 case LSHIFTRT:
18874 return "lsr";
18876 case ROTATERT:
18877 return "ror";
18879 default:
18880 abort();
18884 /* Return the appropriate ARM instruction for the operation code.
18885 The returned result should not be overwritten. OP is the rtx of the
18886 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18887 was shifted. */
18888 const char *
18889 arithmetic_instr (rtx op, int shift_first_arg)
18891 switch (GET_CODE (op))
18893 case PLUS:
18894 return "add";
18896 case MINUS:
18897 return shift_first_arg ? "rsb" : "sub";
18899 case IOR:
18900 return "orr";
18902 case XOR:
18903 return "eor";
18905 case AND:
18906 return "and";
18908 case ASHIFT:
18909 case ASHIFTRT:
18910 case LSHIFTRT:
18911 case ROTATERT:
18912 return arm_shift_nmem(GET_CODE(op));
18914 default:
18915 gcc_unreachable ();
18919 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18920 for the operation code. The returned result should not be overwritten.
18921 OP is the rtx code of the shift.
18922 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18923 shift. */
18924 static const char *
18925 shift_op (rtx op, HOST_WIDE_INT *amountp)
18927 const char * mnem;
18928 enum rtx_code code = GET_CODE (op);
18930 switch (code)
18932 case ROTATE:
18933 if (!CONST_INT_P (XEXP (op, 1)))
18935 output_operand_lossage ("invalid shift operand");
18936 return NULL;
18939 code = ROTATERT;
18940 *amountp = 32 - INTVAL (XEXP (op, 1));
18941 mnem = "ror";
18942 break;
18944 case ASHIFT:
18945 case ASHIFTRT:
18946 case LSHIFTRT:
18947 case ROTATERT:
18948 mnem = arm_shift_nmem(code);
18949 if (CONST_INT_P (XEXP (op, 1)))
18951 *amountp = INTVAL (XEXP (op, 1));
18953 else if (REG_P (XEXP (op, 1)))
18955 *amountp = -1;
18956 return mnem;
18958 else
18960 output_operand_lossage ("invalid shift operand");
18961 return NULL;
18963 break;
18965 case MULT:
18966 /* We never have to worry about the amount being other than a
18967 power of 2, since this case can never be reloaded from a reg. */
18968 if (!CONST_INT_P (XEXP (op, 1)))
18970 output_operand_lossage ("invalid shift operand");
18971 return NULL;
18974 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18976 /* Amount must be a power of two. */
18977 if (*amountp & (*amountp - 1))
18979 output_operand_lossage ("invalid shift operand");
18980 return NULL;
18983 *amountp = int_log2 (*amountp);
18984 return ARM_LSL_NAME;
18986 default:
18987 output_operand_lossage ("invalid shift operand");
18988 return NULL;
18991 /* This is not 100% correct, but follows from the desire to merge
18992 multiplication by a power of 2 with the recognizer for a
18993 shift. >=32 is not a valid shift for "lsl", so we must try and
18994 output a shift that produces the correct arithmetical result.
18995 Using lsr #32 is identical except for the fact that the carry bit
18996 is not set correctly if we set the flags; but we never use the
18997 carry bit from such an operation, so we can ignore that. */
18998 if (code == ROTATERT)
18999 /* Rotate is just modulo 32. */
19000 *amountp &= 31;
19001 else if (*amountp != (*amountp & 31))
19003 if (code == ASHIFT)
19004 mnem = "lsr";
19005 *amountp = 32;
19008 /* Shifts of 0 are no-ops. */
19009 if (*amountp == 0)
19010 return NULL;
19012 return mnem;
19015 /* Obtain the shift from the POWER of two. */
19017 static HOST_WIDE_INT
19018 int_log2 (HOST_WIDE_INT power)
19020 HOST_WIDE_INT shift = 0;
19022 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19024 gcc_assert (shift <= 31);
19025 shift++;
19028 return shift;
19031 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19032 because /bin/as is horribly restrictive. The judgement about
19033 whether or not each character is 'printable' (and can be output as
19034 is) or not (and must be printed with an octal escape) must be made
19035 with reference to the *host* character set -- the situation is
19036 similar to that discussed in the comments above pp_c_char in
19037 c-pretty-print.c. */
19039 #define MAX_ASCII_LEN 51
19041 void
19042 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19044 int i;
19045 int len_so_far = 0;
19047 fputs ("\t.ascii\t\"", stream);
19049 for (i = 0; i < len; i++)
19051 int c = p[i];
19053 if (len_so_far >= MAX_ASCII_LEN)
19055 fputs ("\"\n\t.ascii\t\"", stream);
19056 len_so_far = 0;
19059 if (ISPRINT (c))
19061 if (c == '\\' || c == '\"')
19063 putc ('\\', stream);
19064 len_so_far++;
19066 putc (c, stream);
19067 len_so_far++;
19069 else
19071 fprintf (stream, "\\%03o", c);
19072 len_so_far += 4;
19076 fputs ("\"\n", stream);
19079 /* Compute the register save mask for registers 0 through 12
19080 inclusive. This code is used by arm_compute_save_reg_mask. */
19082 static unsigned long
19083 arm_compute_save_reg0_reg12_mask (void)
19085 unsigned long func_type = arm_current_func_type ();
19086 unsigned long save_reg_mask = 0;
19087 unsigned int reg;
19089 if (IS_INTERRUPT (func_type))
19091 unsigned int max_reg;
19092 /* Interrupt functions must not corrupt any registers,
19093 even call clobbered ones. If this is a leaf function
19094 we can just examine the registers used by the RTL, but
19095 otherwise we have to assume that whatever function is
19096 called might clobber anything, and so we have to save
19097 all the call-clobbered registers as well. */
19098 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19099 /* FIQ handlers have registers r8 - r12 banked, so
19100 we only need to check r0 - r7, Normal ISRs only
19101 bank r14 and r15, so we must check up to r12.
19102 r13 is the stack pointer which is always preserved,
19103 so we do not need to consider it here. */
19104 max_reg = 7;
19105 else
19106 max_reg = 12;
19108 for (reg = 0; reg <= max_reg; reg++)
19109 if (df_regs_ever_live_p (reg)
19110 || (! crtl->is_leaf && call_used_regs[reg]))
19111 save_reg_mask |= (1 << reg);
19113 /* Also save the pic base register if necessary. */
19114 if (flag_pic
19115 && !TARGET_SINGLE_PIC_BASE
19116 && arm_pic_register != INVALID_REGNUM
19117 && crtl->uses_pic_offset_table)
19118 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19120 else if (IS_VOLATILE(func_type))
19122 /* For noreturn functions we historically omitted register saves
19123 altogether. However this really messes up debugging. As a
19124 compromise save just the frame pointers. Combined with the link
19125 register saved elsewhere this should be sufficient to get
19126 a backtrace. */
19127 if (frame_pointer_needed)
19128 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19129 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19130 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19131 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19132 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19134 else
19136 /* In the normal case we only need to save those registers
19137 which are call saved and which are used by this function. */
19138 for (reg = 0; reg <= 11; reg++)
19139 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19140 save_reg_mask |= (1 << reg);
19142 /* Handle the frame pointer as a special case. */
19143 if (frame_pointer_needed)
19144 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19146 /* If we aren't loading the PIC register,
19147 don't stack it even though it may be live. */
19148 if (flag_pic
19149 && !TARGET_SINGLE_PIC_BASE
19150 && arm_pic_register != INVALID_REGNUM
19151 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19152 || crtl->uses_pic_offset_table))
19153 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19155 /* The prologue will copy SP into R0, so save it. */
19156 if (IS_STACKALIGN (func_type))
19157 save_reg_mask |= 1;
19160 /* Save registers so the exception handler can modify them. */
19161 if (crtl->calls_eh_return)
19163 unsigned int i;
19165 for (i = 0; ; i++)
19167 reg = EH_RETURN_DATA_REGNO (i);
19168 if (reg == INVALID_REGNUM)
19169 break;
19170 save_reg_mask |= 1 << reg;
19174 return save_reg_mask;
19177 /* Return true if r3 is live at the start of the function. */
19179 static bool
19180 arm_r3_live_at_start_p (void)
19182 /* Just look at cfg info, which is still close enough to correct at this
19183 point. This gives false positives for broken functions that might use
19184 uninitialized data that happens to be allocated in r3, but who cares? */
19185 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19188 /* Compute the number of bytes used to store the static chain register on the
19189 stack, above the stack frame. We need to know this accurately to get the
19190 alignment of the rest of the stack frame correct. */
19192 static int
19193 arm_compute_static_chain_stack_bytes (void)
19195 /* See the defining assertion in arm_expand_prologue. */
19196 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19197 && IS_NESTED (arm_current_func_type ())
19198 && arm_r3_live_at_start_p ()
19199 && crtl->args.pretend_args_size == 0)
19200 return 4;
19202 return 0;
19205 /* Compute a bit mask of which registers need to be
19206 saved on the stack for the current function.
19207 This is used by arm_get_frame_offsets, which may add extra registers. */
19209 static unsigned long
19210 arm_compute_save_reg_mask (void)
19212 unsigned int save_reg_mask = 0;
19213 unsigned long func_type = arm_current_func_type ();
19214 unsigned int reg;
19216 if (IS_NAKED (func_type))
19217 /* This should never really happen. */
19218 return 0;
19220 /* If we are creating a stack frame, then we must save the frame pointer,
19221 IP (which will hold the old stack pointer), LR and the PC. */
19222 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19223 save_reg_mask |=
19224 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19225 | (1 << IP_REGNUM)
19226 | (1 << LR_REGNUM)
19227 | (1 << PC_REGNUM);
19229 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19231 /* Decide if we need to save the link register.
19232 Interrupt routines have their own banked link register,
19233 so they never need to save it.
19234 Otherwise if we do not use the link register we do not need to save
19235 it. If we are pushing other registers onto the stack however, we
19236 can save an instruction in the epilogue by pushing the link register
19237 now and then popping it back into the PC. This incurs extra memory
19238 accesses though, so we only do it when optimizing for size, and only
19239 if we know that we will not need a fancy return sequence. */
19240 if (df_regs_ever_live_p (LR_REGNUM)
19241 || (save_reg_mask
19242 && optimize_size
19243 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19244 && !crtl->calls_eh_return))
19245 save_reg_mask |= 1 << LR_REGNUM;
19247 if (cfun->machine->lr_save_eliminated)
19248 save_reg_mask &= ~ (1 << LR_REGNUM);
19250 if (TARGET_REALLY_IWMMXT
19251 && ((bit_count (save_reg_mask)
19252 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19253 arm_compute_static_chain_stack_bytes())
19254 ) % 2) != 0)
19256 /* The total number of registers that are going to be pushed
19257 onto the stack is odd. We need to ensure that the stack
19258 is 64-bit aligned before we start to save iWMMXt registers,
19259 and also before we start to create locals. (A local variable
19260 might be a double or long long which we will load/store using
19261 an iWMMXt instruction). Therefore we need to push another
19262 ARM register, so that the stack will be 64-bit aligned. We
19263 try to avoid using the arg registers (r0 -r3) as they might be
19264 used to pass values in a tail call. */
19265 for (reg = 4; reg <= 12; reg++)
19266 if ((save_reg_mask & (1 << reg)) == 0)
19267 break;
19269 if (reg <= 12)
19270 save_reg_mask |= (1 << reg);
19271 else
19273 cfun->machine->sibcall_blocked = 1;
19274 save_reg_mask |= (1 << 3);
19278 /* We may need to push an additional register for use initializing the
19279 PIC base register. */
19280 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19281 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19283 reg = thumb_find_work_register (1 << 4);
19284 if (!call_used_regs[reg])
19285 save_reg_mask |= (1 << reg);
19288 return save_reg_mask;
19292 /* Compute a bit mask of which registers need to be
19293 saved on the stack for the current function. */
19294 static unsigned long
19295 thumb1_compute_save_reg_mask (void)
19297 unsigned long mask;
19298 unsigned reg;
19300 mask = 0;
19301 for (reg = 0; reg < 12; reg ++)
19302 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19303 mask |= 1 << reg;
19305 if (flag_pic
19306 && !TARGET_SINGLE_PIC_BASE
19307 && arm_pic_register != INVALID_REGNUM
19308 && crtl->uses_pic_offset_table)
19309 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19311 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19312 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19313 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19315 /* LR will also be pushed if any lo regs are pushed. */
19316 if (mask & 0xff || thumb_force_lr_save ())
19317 mask |= (1 << LR_REGNUM);
19319 /* Make sure we have a low work register if we need one.
19320 We will need one if we are going to push a high register,
19321 but we are not currently intending to push a low register. */
19322 if ((mask & 0xff) == 0
19323 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19325 /* Use thumb_find_work_register to choose which register
19326 we will use. If the register is live then we will
19327 have to push it. Use LAST_LO_REGNUM as our fallback
19328 choice for the register to select. */
19329 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19330 /* Make sure the register returned by thumb_find_work_register is
19331 not part of the return value. */
19332 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19333 reg = LAST_LO_REGNUM;
19335 if (! call_used_regs[reg])
19336 mask |= 1 << reg;
19339 /* The 504 below is 8 bytes less than 512 because there are two possible
19340 alignment words. We can't tell here if they will be present or not so we
19341 have to play it safe and assume that they are. */
19342 if ((CALLER_INTERWORKING_SLOT_SIZE +
19343 ROUND_UP_WORD (get_frame_size ()) +
19344 crtl->outgoing_args_size) >= 504)
19346 /* This is the same as the code in thumb1_expand_prologue() which
19347 determines which register to use for stack decrement. */
19348 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19349 if (mask & (1 << reg))
19350 break;
19352 if (reg > LAST_LO_REGNUM)
19354 /* Make sure we have a register available for stack decrement. */
19355 mask |= 1 << LAST_LO_REGNUM;
19359 return mask;
19363 /* Return the number of bytes required to save VFP registers. */
19364 static int
19365 arm_get_vfp_saved_size (void)
19367 unsigned int regno;
19368 int count;
19369 int saved;
19371 saved = 0;
19372 /* Space for saved VFP registers. */
19373 if (TARGET_HARD_FLOAT && TARGET_VFP)
19375 count = 0;
19376 for (regno = FIRST_VFP_REGNUM;
19377 regno < LAST_VFP_REGNUM;
19378 regno += 2)
19380 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19381 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19383 if (count > 0)
19385 /* Workaround ARM10 VFPr1 bug. */
19386 if (count == 2 && !arm_arch6)
19387 count++;
19388 saved += count * 8;
19390 count = 0;
19392 else
19393 count++;
19395 if (count > 0)
19397 if (count == 2 && !arm_arch6)
19398 count++;
19399 saved += count * 8;
19402 return saved;
19406 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19407 everything bar the final return instruction. If simple_return is true,
19408 then do not output epilogue, because it has already been emitted in RTL. */
19409 const char *
19410 output_return_instruction (rtx operand, bool really_return, bool reverse,
19411 bool simple_return)
19413 char conditional[10];
19414 char instr[100];
19415 unsigned reg;
19416 unsigned long live_regs_mask;
19417 unsigned long func_type;
19418 arm_stack_offsets *offsets;
19420 func_type = arm_current_func_type ();
19422 if (IS_NAKED (func_type))
19423 return "";
19425 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19427 /* If this function was declared non-returning, and we have
19428 found a tail call, then we have to trust that the called
19429 function won't return. */
19430 if (really_return)
19432 rtx ops[2];
19434 /* Otherwise, trap an attempted return by aborting. */
19435 ops[0] = operand;
19436 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19437 : "abort");
19438 assemble_external_libcall (ops[1]);
19439 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19442 return "";
19445 gcc_assert (!cfun->calls_alloca || really_return);
19447 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19449 cfun->machine->return_used_this_function = 1;
19451 offsets = arm_get_frame_offsets ();
19452 live_regs_mask = offsets->saved_regs_mask;
19454 if (!simple_return && live_regs_mask)
19456 const char * return_reg;
19458 /* If we do not have any special requirements for function exit
19459 (e.g. interworking) then we can load the return address
19460 directly into the PC. Otherwise we must load it into LR. */
19461 if (really_return
19462 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19463 return_reg = reg_names[PC_REGNUM];
19464 else
19465 return_reg = reg_names[LR_REGNUM];
19467 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19469 /* There are three possible reasons for the IP register
19470 being saved. 1) a stack frame was created, in which case
19471 IP contains the old stack pointer, or 2) an ISR routine
19472 corrupted it, or 3) it was saved to align the stack on
19473 iWMMXt. In case 1, restore IP into SP, otherwise just
19474 restore IP. */
19475 if (frame_pointer_needed)
19477 live_regs_mask &= ~ (1 << IP_REGNUM);
19478 live_regs_mask |= (1 << SP_REGNUM);
19480 else
19481 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19484 /* On some ARM architectures it is faster to use LDR rather than
19485 LDM to load a single register. On other architectures, the
19486 cost is the same. In 26 bit mode, or for exception handlers,
19487 we have to use LDM to load the PC so that the CPSR is also
19488 restored. */
19489 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19490 if (live_regs_mask == (1U << reg))
19491 break;
19493 if (reg <= LAST_ARM_REGNUM
19494 && (reg != LR_REGNUM
19495 || ! really_return
19496 || ! IS_INTERRUPT (func_type)))
19498 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19499 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19501 else
19503 char *p;
19504 int first = 1;
19506 /* Generate the load multiple instruction to restore the
19507 registers. Note we can get here, even if
19508 frame_pointer_needed is true, but only if sp already
19509 points to the base of the saved core registers. */
19510 if (live_regs_mask & (1 << SP_REGNUM))
19512 unsigned HOST_WIDE_INT stack_adjust;
19514 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19515 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19517 if (stack_adjust && arm_arch5 && TARGET_ARM)
19518 if (TARGET_UNIFIED_ASM)
19519 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19520 else
19521 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19522 else
19524 /* If we can't use ldmib (SA110 bug),
19525 then try to pop r3 instead. */
19526 if (stack_adjust)
19527 live_regs_mask |= 1 << 3;
19529 if (TARGET_UNIFIED_ASM)
19530 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19531 else
19532 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19535 else
19536 if (TARGET_UNIFIED_ASM)
19537 sprintf (instr, "pop%s\t{", conditional);
19538 else
19539 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19541 p = instr + strlen (instr);
19543 for (reg = 0; reg <= SP_REGNUM; reg++)
19544 if (live_regs_mask & (1 << reg))
19546 int l = strlen (reg_names[reg]);
19548 if (first)
19549 first = 0;
19550 else
19552 memcpy (p, ", ", 2);
19553 p += 2;
19556 memcpy (p, "%|", 2);
19557 memcpy (p + 2, reg_names[reg], l);
19558 p += l + 2;
19561 if (live_regs_mask & (1 << LR_REGNUM))
19563 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19564 /* If returning from an interrupt, restore the CPSR. */
19565 if (IS_INTERRUPT (func_type))
19566 strcat (p, "^");
19568 else
19569 strcpy (p, "}");
19572 output_asm_insn (instr, & operand);
19574 /* See if we need to generate an extra instruction to
19575 perform the actual function return. */
19576 if (really_return
19577 && func_type != ARM_FT_INTERWORKED
19578 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19580 /* The return has already been handled
19581 by loading the LR into the PC. */
19582 return "";
19586 if (really_return)
19588 switch ((int) ARM_FUNC_TYPE (func_type))
19590 case ARM_FT_ISR:
19591 case ARM_FT_FIQ:
19592 /* ??? This is wrong for unified assembly syntax. */
19593 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19594 break;
19596 case ARM_FT_INTERWORKED:
19597 sprintf (instr, "bx%s\t%%|lr", conditional);
19598 break;
19600 case ARM_FT_EXCEPTION:
19601 /* ??? This is wrong for unified assembly syntax. */
19602 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19603 break;
19605 default:
19606 /* Use bx if it's available. */
19607 if (arm_arch5 || arm_arch4t)
19608 sprintf (instr, "bx%s\t%%|lr", conditional);
19609 else
19610 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19611 break;
19614 output_asm_insn (instr, & operand);
19617 return "";
19620 /* Write the function name into the code section, directly preceding
19621 the function prologue.
19623 Code will be output similar to this:
19625 .ascii "arm_poke_function_name", 0
19626 .align
19628 .word 0xff000000 + (t1 - t0)
19629 arm_poke_function_name
19630 mov ip, sp
19631 stmfd sp!, {fp, ip, lr, pc}
19632 sub fp, ip, #4
19634 When performing a stack backtrace, code can inspect the value
19635 of 'pc' stored at 'fp' + 0. If the trace function then looks
19636 at location pc - 12 and the top 8 bits are set, then we know
19637 that there is a function name embedded immediately preceding this
19638 location and has length ((pc[-3]) & 0xff000000).
19640 We assume that pc is declared as a pointer to an unsigned long.
19642 It is of no benefit to output the function name if we are assembling
19643 a leaf function. These function types will not contain a stack
19644 backtrace structure, therefore it is not possible to determine the
19645 function name. */
19646 void
19647 arm_poke_function_name (FILE *stream, const char *name)
19649 unsigned long alignlength;
19650 unsigned long length;
19651 rtx x;
19653 length = strlen (name) + 1;
19654 alignlength = ROUND_UP_WORD (length);
19656 ASM_OUTPUT_ASCII (stream, name, length);
19657 ASM_OUTPUT_ALIGN (stream, 2);
19658 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19659 assemble_aligned_integer (UNITS_PER_WORD, x);
19662 /* Place some comments into the assembler stream
19663 describing the current function. */
19664 static void
19665 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19667 unsigned long func_type;
19669 /* ??? Do we want to print some of the below anyway? */
19670 if (TARGET_THUMB1)
19671 return;
19673 /* Sanity check. */
19674 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19676 func_type = arm_current_func_type ();
19678 switch ((int) ARM_FUNC_TYPE (func_type))
19680 default:
19681 case ARM_FT_NORMAL:
19682 break;
19683 case ARM_FT_INTERWORKED:
19684 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19685 break;
19686 case ARM_FT_ISR:
19687 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19688 break;
19689 case ARM_FT_FIQ:
19690 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19691 break;
19692 case ARM_FT_EXCEPTION:
19693 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19694 break;
19697 if (IS_NAKED (func_type))
19698 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19700 if (IS_VOLATILE (func_type))
19701 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19703 if (IS_NESTED (func_type))
19704 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19705 if (IS_STACKALIGN (func_type))
19706 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19708 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19709 crtl->args.size,
19710 crtl->args.pretend_args_size, frame_size);
19712 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19713 frame_pointer_needed,
19714 cfun->machine->uses_anonymous_args);
19716 if (cfun->machine->lr_save_eliminated)
19717 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19719 if (crtl->calls_eh_return)
19720 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19724 static void
19725 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19726 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19728 arm_stack_offsets *offsets;
19730 if (TARGET_THUMB1)
19732 int regno;
19734 /* Emit any call-via-reg trampolines that are needed for v4t support
19735 of call_reg and call_value_reg type insns. */
19736 for (regno = 0; regno < LR_REGNUM; regno++)
19738 rtx label = cfun->machine->call_via[regno];
19740 if (label != NULL)
19742 switch_to_section (function_section (current_function_decl));
19743 targetm.asm_out.internal_label (asm_out_file, "L",
19744 CODE_LABEL_NUMBER (label));
19745 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19749 /* ??? Probably not safe to set this here, since it assumes that a
19750 function will be emitted as assembly immediately after we generate
19751 RTL for it. This does not happen for inline functions. */
19752 cfun->machine->return_used_this_function = 0;
19754 else /* TARGET_32BIT */
19756 /* We need to take into account any stack-frame rounding. */
19757 offsets = arm_get_frame_offsets ();
19759 gcc_assert (!use_return_insn (FALSE, NULL)
19760 || (cfun->machine->return_used_this_function != 0)
19761 || offsets->saved_regs == offsets->outgoing_args
19762 || frame_pointer_needed);
19766 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19767 STR and STRD. If an even number of registers are being pushed, one
19768 or more STRD patterns are created for each register pair. If an
19769 odd number of registers are pushed, emit an initial STR followed by
19770 as many STRD instructions as are needed. This works best when the
19771 stack is initially 64-bit aligned (the normal case), since it
19772 ensures that each STRD is also 64-bit aligned. */
19773 static void
19774 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19776 int num_regs = 0;
19777 int i;
19778 int regno;
19779 rtx par = NULL_RTX;
19780 rtx dwarf = NULL_RTX;
19781 rtx tmp;
19782 bool first = true;
19784 num_regs = bit_count (saved_regs_mask);
19786 /* Must be at least one register to save, and can't save SP or PC. */
19787 gcc_assert (num_regs > 0 && num_regs <= 14);
19788 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19789 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19791 /* Create sequence for DWARF info. All the frame-related data for
19792 debugging is held in this wrapper. */
19793 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19795 /* Describe the stack adjustment. */
19796 tmp = gen_rtx_SET (VOIDmode,
19797 stack_pointer_rtx,
19798 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19799 RTX_FRAME_RELATED_P (tmp) = 1;
19800 XVECEXP (dwarf, 0, 0) = tmp;
19802 /* Find the first register. */
19803 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19806 i = 0;
19808 /* If there's an odd number of registers to push. Start off by
19809 pushing a single register. This ensures that subsequent strd
19810 operations are dword aligned (assuming that SP was originally
19811 64-bit aligned). */
19812 if ((num_regs & 1) != 0)
19814 rtx reg, mem, insn;
19816 reg = gen_rtx_REG (SImode, regno);
19817 if (num_regs == 1)
19818 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19819 stack_pointer_rtx));
19820 else
19821 mem = gen_frame_mem (Pmode,
19822 gen_rtx_PRE_MODIFY
19823 (Pmode, stack_pointer_rtx,
19824 plus_constant (Pmode, stack_pointer_rtx,
19825 -4 * num_regs)));
19827 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19828 RTX_FRAME_RELATED_P (tmp) = 1;
19829 insn = emit_insn (tmp);
19830 RTX_FRAME_RELATED_P (insn) = 1;
19831 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19832 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19833 reg);
19834 RTX_FRAME_RELATED_P (tmp) = 1;
19835 i++;
19836 regno++;
19837 XVECEXP (dwarf, 0, i) = tmp;
19838 first = false;
19841 while (i < num_regs)
19842 if (saved_regs_mask & (1 << regno))
19844 rtx reg1, reg2, mem1, mem2;
19845 rtx tmp0, tmp1, tmp2;
19846 int regno2;
19848 /* Find the register to pair with this one. */
19849 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19850 regno2++)
19853 reg1 = gen_rtx_REG (SImode, regno);
19854 reg2 = gen_rtx_REG (SImode, regno2);
19856 if (first)
19858 rtx insn;
19860 first = false;
19861 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19862 stack_pointer_rtx,
19863 -4 * num_regs));
19864 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19865 stack_pointer_rtx,
19866 -4 * (num_regs - 1)));
19867 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19868 plus_constant (Pmode, stack_pointer_rtx,
19869 -4 * (num_regs)));
19870 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19871 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19872 RTX_FRAME_RELATED_P (tmp0) = 1;
19873 RTX_FRAME_RELATED_P (tmp1) = 1;
19874 RTX_FRAME_RELATED_P (tmp2) = 1;
19875 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19876 XVECEXP (par, 0, 0) = tmp0;
19877 XVECEXP (par, 0, 1) = tmp1;
19878 XVECEXP (par, 0, 2) = tmp2;
19879 insn = emit_insn (par);
19880 RTX_FRAME_RELATED_P (insn) = 1;
19881 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19883 else
19885 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19886 stack_pointer_rtx,
19887 4 * i));
19888 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19889 stack_pointer_rtx,
19890 4 * (i + 1)));
19891 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19892 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19893 RTX_FRAME_RELATED_P (tmp1) = 1;
19894 RTX_FRAME_RELATED_P (tmp2) = 1;
19895 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19896 XVECEXP (par, 0, 0) = tmp1;
19897 XVECEXP (par, 0, 1) = tmp2;
19898 emit_insn (par);
19901 /* Create unwind information. This is an approximation. */
19902 tmp1 = gen_rtx_SET (VOIDmode,
19903 gen_frame_mem (Pmode,
19904 plus_constant (Pmode,
19905 stack_pointer_rtx,
19906 4 * i)),
19907 reg1);
19908 tmp2 = gen_rtx_SET (VOIDmode,
19909 gen_frame_mem (Pmode,
19910 plus_constant (Pmode,
19911 stack_pointer_rtx,
19912 4 * (i + 1))),
19913 reg2);
19915 RTX_FRAME_RELATED_P (tmp1) = 1;
19916 RTX_FRAME_RELATED_P (tmp2) = 1;
19917 XVECEXP (dwarf, 0, i + 1) = tmp1;
19918 XVECEXP (dwarf, 0, i + 2) = tmp2;
19919 i += 2;
19920 regno = regno2 + 1;
19922 else
19923 regno++;
19925 return;
19928 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19929 whenever possible, otherwise it emits single-word stores. The first store
19930 also allocates stack space for all saved registers, using writeback with
19931 post-addressing mode. All other stores use offset addressing. If no STRD
19932 can be emitted, this function emits a sequence of single-word stores,
19933 and not an STM as before, because single-word stores provide more freedom
19934 scheduling and can be turned into an STM by peephole optimizations. */
19935 static void
19936 arm_emit_strd_push (unsigned long saved_regs_mask)
19938 int num_regs = 0;
19939 int i, j, dwarf_index = 0;
19940 int offset = 0;
19941 rtx dwarf = NULL_RTX;
19942 rtx insn = NULL_RTX;
19943 rtx tmp, mem;
19945 /* TODO: A more efficient code can be emitted by changing the
19946 layout, e.g., first push all pairs that can use STRD to keep the
19947 stack aligned, and then push all other registers. */
19948 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19949 if (saved_regs_mask & (1 << i))
19950 num_regs++;
19952 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19953 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19954 gcc_assert (num_regs > 0);
19956 /* Create sequence for DWARF info. */
19957 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19959 /* For dwarf info, we generate explicit stack update. */
19960 tmp = gen_rtx_SET (VOIDmode,
19961 stack_pointer_rtx,
19962 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19963 RTX_FRAME_RELATED_P (tmp) = 1;
19964 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19966 /* Save registers. */
19967 offset = - 4 * num_regs;
19968 j = 0;
19969 while (j <= LAST_ARM_REGNUM)
19970 if (saved_regs_mask & (1 << j))
19972 if ((j % 2 == 0)
19973 && (saved_regs_mask & (1 << (j + 1))))
19975 /* Current register and previous register form register pair for
19976 which STRD can be generated. */
19977 if (offset < 0)
19979 /* Allocate stack space for all saved registers. */
19980 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19981 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19982 mem = gen_frame_mem (DImode, tmp);
19983 offset = 0;
19985 else if (offset > 0)
19986 mem = gen_frame_mem (DImode,
19987 plus_constant (Pmode,
19988 stack_pointer_rtx,
19989 offset));
19990 else
19991 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19993 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19994 RTX_FRAME_RELATED_P (tmp) = 1;
19995 tmp = emit_insn (tmp);
19997 /* Record the first store insn. */
19998 if (dwarf_index == 1)
19999 insn = tmp;
20001 /* Generate dwarf info. */
20002 mem = gen_frame_mem (SImode,
20003 plus_constant (Pmode,
20004 stack_pointer_rtx,
20005 offset));
20006 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20007 RTX_FRAME_RELATED_P (tmp) = 1;
20008 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20010 mem = gen_frame_mem (SImode,
20011 plus_constant (Pmode,
20012 stack_pointer_rtx,
20013 offset + 4));
20014 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
20015 RTX_FRAME_RELATED_P (tmp) = 1;
20016 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20018 offset += 8;
20019 j += 2;
20021 else
20023 /* Emit a single word store. */
20024 if (offset < 0)
20026 /* Allocate stack space for all saved registers. */
20027 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20028 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20029 mem = gen_frame_mem (SImode, tmp);
20030 offset = 0;
20032 else if (offset > 0)
20033 mem = gen_frame_mem (SImode,
20034 plus_constant (Pmode,
20035 stack_pointer_rtx,
20036 offset));
20037 else
20038 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20040 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20041 RTX_FRAME_RELATED_P (tmp) = 1;
20042 tmp = emit_insn (tmp);
20044 /* Record the first store insn. */
20045 if (dwarf_index == 1)
20046 insn = tmp;
20048 /* Generate dwarf info. */
20049 mem = gen_frame_mem (SImode,
20050 plus_constant(Pmode,
20051 stack_pointer_rtx,
20052 offset));
20053 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20054 RTX_FRAME_RELATED_P (tmp) = 1;
20055 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20057 offset += 4;
20058 j += 1;
20061 else
20062 j++;
20064 /* Attach dwarf info to the first insn we generate. */
20065 gcc_assert (insn != NULL_RTX);
20066 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20067 RTX_FRAME_RELATED_P (insn) = 1;
20070 /* Generate and emit an insn that we will recognize as a push_multi.
20071 Unfortunately, since this insn does not reflect very well the actual
20072 semantics of the operation, we need to annotate the insn for the benefit
20073 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20074 MASK for registers that should be annotated for DWARF2 frame unwind
20075 information. */
20076 static rtx
20077 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20079 int num_regs = 0;
20080 int num_dwarf_regs = 0;
20081 int i, j;
20082 rtx par;
20083 rtx dwarf;
20084 int dwarf_par_index;
20085 rtx tmp, reg;
20087 /* We don't record the PC in the dwarf frame information. */
20088 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20090 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20092 if (mask & (1 << i))
20093 num_regs++;
20094 if (dwarf_regs_mask & (1 << i))
20095 num_dwarf_regs++;
20098 gcc_assert (num_regs && num_regs <= 16);
20099 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20101 /* For the body of the insn we are going to generate an UNSPEC in
20102 parallel with several USEs. This allows the insn to be recognized
20103 by the push_multi pattern in the arm.md file.
20105 The body of the insn looks something like this:
20107 (parallel [
20108 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20109 (const_int:SI <num>)))
20110 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20111 (use (reg:SI XX))
20112 (use (reg:SI YY))
20116 For the frame note however, we try to be more explicit and actually
20117 show each register being stored into the stack frame, plus a (single)
20118 decrement of the stack pointer. We do it this way in order to be
20119 friendly to the stack unwinding code, which only wants to see a single
20120 stack decrement per instruction. The RTL we generate for the note looks
20121 something like this:
20123 (sequence [
20124 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20125 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20126 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20127 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20131 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20132 instead we'd have a parallel expression detailing all
20133 the stores to the various memory addresses so that debug
20134 information is more up-to-date. Remember however while writing
20135 this to take care of the constraints with the push instruction.
20137 Note also that this has to be taken care of for the VFP registers.
20139 For more see PR43399. */
20141 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20142 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20143 dwarf_par_index = 1;
20145 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20147 if (mask & (1 << i))
20149 reg = gen_rtx_REG (SImode, i);
20151 XVECEXP (par, 0, 0)
20152 = gen_rtx_SET (VOIDmode,
20153 gen_frame_mem
20154 (BLKmode,
20155 gen_rtx_PRE_MODIFY (Pmode,
20156 stack_pointer_rtx,
20157 plus_constant
20158 (Pmode, stack_pointer_rtx,
20159 -4 * num_regs))
20161 gen_rtx_UNSPEC (BLKmode,
20162 gen_rtvec (1, reg),
20163 UNSPEC_PUSH_MULT));
20165 if (dwarf_regs_mask & (1 << i))
20167 tmp = gen_rtx_SET (VOIDmode,
20168 gen_frame_mem (SImode, stack_pointer_rtx),
20169 reg);
20170 RTX_FRAME_RELATED_P (tmp) = 1;
20171 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20174 break;
20178 for (j = 1, i++; j < num_regs; i++)
20180 if (mask & (1 << i))
20182 reg = gen_rtx_REG (SImode, i);
20184 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20186 if (dwarf_regs_mask & (1 << i))
20189 = gen_rtx_SET (VOIDmode,
20190 gen_frame_mem
20191 (SImode,
20192 plus_constant (Pmode, stack_pointer_rtx,
20193 4 * j)),
20194 reg);
20195 RTX_FRAME_RELATED_P (tmp) = 1;
20196 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20199 j++;
20203 par = emit_insn (par);
20205 tmp = gen_rtx_SET (VOIDmode,
20206 stack_pointer_rtx,
20207 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20208 RTX_FRAME_RELATED_P (tmp) = 1;
20209 XVECEXP (dwarf, 0, 0) = tmp;
20211 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20213 return par;
20216 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20217 SIZE is the offset to be adjusted.
20218 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20219 static void
20220 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20222 rtx dwarf;
20224 RTX_FRAME_RELATED_P (insn) = 1;
20225 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20226 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20229 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20230 SAVED_REGS_MASK shows which registers need to be restored.
20232 Unfortunately, since this insn does not reflect very well the actual
20233 semantics of the operation, we need to annotate the insn for the benefit
20234 of DWARF2 frame unwind information. */
20235 static void
20236 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20238 int num_regs = 0;
20239 int i, j;
20240 rtx par;
20241 rtx dwarf = NULL_RTX;
20242 rtx tmp, reg;
20243 bool return_in_pc;
20244 int offset_adj;
20245 int emit_update;
20247 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20248 offset_adj = return_in_pc ? 1 : 0;
20249 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20250 if (saved_regs_mask & (1 << i))
20251 num_regs++;
20253 gcc_assert (num_regs && num_regs <= 16);
20255 /* If SP is in reglist, then we don't emit SP update insn. */
20256 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20258 /* The parallel needs to hold num_regs SETs
20259 and one SET for the stack update. */
20260 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20262 if (return_in_pc)
20264 tmp = ret_rtx;
20265 XVECEXP (par, 0, 0) = tmp;
20268 if (emit_update)
20270 /* Increment the stack pointer, based on there being
20271 num_regs 4-byte registers to restore. */
20272 tmp = gen_rtx_SET (VOIDmode,
20273 stack_pointer_rtx,
20274 plus_constant (Pmode,
20275 stack_pointer_rtx,
20276 4 * num_regs));
20277 RTX_FRAME_RELATED_P (tmp) = 1;
20278 XVECEXP (par, 0, offset_adj) = tmp;
20281 /* Now restore every reg, which may include PC. */
20282 for (j = 0, i = 0; j < num_regs; i++)
20283 if (saved_regs_mask & (1 << i))
20285 reg = gen_rtx_REG (SImode, i);
20286 if ((num_regs == 1) && emit_update && !return_in_pc)
20288 /* Emit single load with writeback. */
20289 tmp = gen_frame_mem (SImode,
20290 gen_rtx_POST_INC (Pmode,
20291 stack_pointer_rtx));
20292 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20293 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20294 return;
20297 tmp = gen_rtx_SET (VOIDmode,
20298 reg,
20299 gen_frame_mem
20300 (SImode,
20301 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20302 RTX_FRAME_RELATED_P (tmp) = 1;
20303 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20305 /* We need to maintain a sequence for DWARF info too. As dwarf info
20306 should not have PC, skip PC. */
20307 if (i != PC_REGNUM)
20308 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20310 j++;
20313 if (return_in_pc)
20314 par = emit_jump_insn (par);
20315 else
20316 par = emit_insn (par);
20318 REG_NOTES (par) = dwarf;
20319 if (!return_in_pc)
20320 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20321 stack_pointer_rtx, stack_pointer_rtx);
20324 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20325 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20327 Unfortunately, since this insn does not reflect very well the actual
20328 semantics of the operation, we need to annotate the insn for the benefit
20329 of DWARF2 frame unwind information. */
20330 static void
20331 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20333 int i, j;
20334 rtx par;
20335 rtx dwarf = NULL_RTX;
20336 rtx tmp, reg;
20338 gcc_assert (num_regs && num_regs <= 32);
20340 /* Workaround ARM10 VFPr1 bug. */
20341 if (num_regs == 2 && !arm_arch6)
20343 if (first_reg == 15)
20344 first_reg--;
20346 num_regs++;
20349 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20350 there could be up to 32 D-registers to restore.
20351 If there are more than 16 D-registers, make two recursive calls,
20352 each of which emits one pop_multi instruction. */
20353 if (num_regs > 16)
20355 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20356 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20357 return;
20360 /* The parallel needs to hold num_regs SETs
20361 and one SET for the stack update. */
20362 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20364 /* Increment the stack pointer, based on there being
20365 num_regs 8-byte registers to restore. */
20366 tmp = gen_rtx_SET (VOIDmode,
20367 base_reg,
20368 plus_constant (Pmode, base_reg, 8 * num_regs));
20369 RTX_FRAME_RELATED_P (tmp) = 1;
20370 XVECEXP (par, 0, 0) = tmp;
20372 /* Now show every reg that will be restored, using a SET for each. */
20373 for (j = 0, i=first_reg; j < num_regs; i += 2)
20375 reg = gen_rtx_REG (DFmode, i);
20377 tmp = gen_rtx_SET (VOIDmode,
20378 reg,
20379 gen_frame_mem
20380 (DFmode,
20381 plus_constant (Pmode, base_reg, 8 * j)));
20382 RTX_FRAME_RELATED_P (tmp) = 1;
20383 XVECEXP (par, 0, j + 1) = tmp;
20385 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20387 j++;
20390 par = emit_insn (par);
20391 REG_NOTES (par) = dwarf;
20393 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20394 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20396 RTX_FRAME_RELATED_P (par) = 1;
20397 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20399 else
20400 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20401 base_reg, base_reg);
20404 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20405 number of registers are being popped, multiple LDRD patterns are created for
20406 all register pairs. If odd number of registers are popped, last register is
20407 loaded by using LDR pattern. */
20408 static void
20409 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20411 int num_regs = 0;
20412 int i, j;
20413 rtx par = NULL_RTX;
20414 rtx dwarf = NULL_RTX;
20415 rtx tmp, reg, tmp1;
20416 bool return_in_pc;
20418 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20419 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20420 if (saved_regs_mask & (1 << i))
20421 num_regs++;
20423 gcc_assert (num_regs && num_regs <= 16);
20425 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20426 to be popped. So, if num_regs is even, now it will become odd,
20427 and we can generate pop with PC. If num_regs is odd, it will be
20428 even now, and ldr with return can be generated for PC. */
20429 if (return_in_pc)
20430 num_regs--;
20432 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20434 /* Var j iterates over all the registers to gather all the registers in
20435 saved_regs_mask. Var i gives index of saved registers in stack frame.
20436 A PARALLEL RTX of register-pair is created here, so that pattern for
20437 LDRD can be matched. As PC is always last register to be popped, and
20438 we have already decremented num_regs if PC, we don't have to worry
20439 about PC in this loop. */
20440 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20441 if (saved_regs_mask & (1 << j))
20443 /* Create RTX for memory load. */
20444 reg = gen_rtx_REG (SImode, j);
20445 tmp = gen_rtx_SET (SImode,
20446 reg,
20447 gen_frame_mem (SImode,
20448 plus_constant (Pmode,
20449 stack_pointer_rtx, 4 * i)));
20450 RTX_FRAME_RELATED_P (tmp) = 1;
20452 if (i % 2 == 0)
20454 /* When saved-register index (i) is even, the RTX to be emitted is
20455 yet to be created. Hence create it first. The LDRD pattern we
20456 are generating is :
20457 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20458 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20459 where target registers need not be consecutive. */
20460 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20461 dwarf = NULL_RTX;
20464 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20465 added as 0th element and if i is odd, reg_i is added as 1st element
20466 of LDRD pattern shown above. */
20467 XVECEXP (par, 0, (i % 2)) = tmp;
20468 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20470 if ((i % 2) == 1)
20472 /* When saved-register index (i) is odd, RTXs for both the registers
20473 to be loaded are generated in above given LDRD pattern, and the
20474 pattern can be emitted now. */
20475 par = emit_insn (par);
20476 REG_NOTES (par) = dwarf;
20477 RTX_FRAME_RELATED_P (par) = 1;
20480 i++;
20483 /* If the number of registers pushed is odd AND return_in_pc is false OR
20484 number of registers are even AND return_in_pc is true, last register is
20485 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20486 then LDR with post increment. */
20488 /* Increment the stack pointer, based on there being
20489 num_regs 4-byte registers to restore. */
20490 tmp = gen_rtx_SET (VOIDmode,
20491 stack_pointer_rtx,
20492 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20493 RTX_FRAME_RELATED_P (tmp) = 1;
20494 tmp = emit_insn (tmp);
20495 if (!return_in_pc)
20497 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20498 stack_pointer_rtx, stack_pointer_rtx);
20501 dwarf = NULL_RTX;
20503 if (((num_regs % 2) == 1 && !return_in_pc)
20504 || ((num_regs % 2) == 0 && return_in_pc))
20506 /* Scan for the single register to be popped. Skip until the saved
20507 register is found. */
20508 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20510 /* Gen LDR with post increment here. */
20511 tmp1 = gen_rtx_MEM (SImode,
20512 gen_rtx_POST_INC (SImode,
20513 stack_pointer_rtx));
20514 set_mem_alias_set (tmp1, get_frame_alias_set ());
20516 reg = gen_rtx_REG (SImode, j);
20517 tmp = gen_rtx_SET (SImode, reg, tmp1);
20518 RTX_FRAME_RELATED_P (tmp) = 1;
20519 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20521 if (return_in_pc)
20523 /* If return_in_pc, j must be PC_REGNUM. */
20524 gcc_assert (j == PC_REGNUM);
20525 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20526 XVECEXP (par, 0, 0) = ret_rtx;
20527 XVECEXP (par, 0, 1) = tmp;
20528 par = emit_jump_insn (par);
20530 else
20532 par = emit_insn (tmp);
20533 REG_NOTES (par) = dwarf;
20534 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20535 stack_pointer_rtx, stack_pointer_rtx);
20539 else if ((num_regs % 2) == 1 && return_in_pc)
20541 /* There are 2 registers to be popped. So, generate the pattern
20542 pop_multiple_with_stack_update_and_return to pop in PC. */
20543 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20546 return;
20549 /* LDRD in ARM mode needs consecutive registers as operands. This function
20550 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20551 offset addressing and then generates one separate stack udpate. This provides
20552 more scheduling freedom, compared to writeback on every load. However,
20553 if the function returns using load into PC directly
20554 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20555 before the last load. TODO: Add a peephole optimization to recognize
20556 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20557 peephole optimization to merge the load at stack-offset zero
20558 with the stack update instruction using load with writeback
20559 in post-index addressing mode. */
20560 static void
20561 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20563 int j = 0;
20564 int offset = 0;
20565 rtx par = NULL_RTX;
20566 rtx dwarf = NULL_RTX;
20567 rtx tmp, mem;
20569 /* Restore saved registers. */
20570 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20571 j = 0;
20572 while (j <= LAST_ARM_REGNUM)
20573 if (saved_regs_mask & (1 << j))
20575 if ((j % 2) == 0
20576 && (saved_regs_mask & (1 << (j + 1)))
20577 && (j + 1) != PC_REGNUM)
20579 /* Current register and next register form register pair for which
20580 LDRD can be generated. PC is always the last register popped, and
20581 we handle it separately. */
20582 if (offset > 0)
20583 mem = gen_frame_mem (DImode,
20584 plus_constant (Pmode,
20585 stack_pointer_rtx,
20586 offset));
20587 else
20588 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20590 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20591 tmp = emit_insn (tmp);
20592 RTX_FRAME_RELATED_P (tmp) = 1;
20594 /* Generate dwarf info. */
20596 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20597 gen_rtx_REG (SImode, j),
20598 NULL_RTX);
20599 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20600 gen_rtx_REG (SImode, j + 1),
20601 dwarf);
20603 REG_NOTES (tmp) = dwarf;
20605 offset += 8;
20606 j += 2;
20608 else if (j != PC_REGNUM)
20610 /* Emit a single word load. */
20611 if (offset > 0)
20612 mem = gen_frame_mem (SImode,
20613 plus_constant (Pmode,
20614 stack_pointer_rtx,
20615 offset));
20616 else
20617 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20619 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20620 tmp = emit_insn (tmp);
20621 RTX_FRAME_RELATED_P (tmp) = 1;
20623 /* Generate dwarf info. */
20624 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20625 gen_rtx_REG (SImode, j),
20626 NULL_RTX);
20628 offset += 4;
20629 j += 1;
20631 else /* j == PC_REGNUM */
20632 j++;
20634 else
20635 j++;
20637 /* Update the stack. */
20638 if (offset > 0)
20640 tmp = gen_rtx_SET (Pmode,
20641 stack_pointer_rtx,
20642 plus_constant (Pmode,
20643 stack_pointer_rtx,
20644 offset));
20645 tmp = emit_insn (tmp);
20646 arm_add_cfa_adjust_cfa_note (tmp, offset,
20647 stack_pointer_rtx, stack_pointer_rtx);
20648 offset = 0;
20651 if (saved_regs_mask & (1 << PC_REGNUM))
20653 /* Only PC is to be popped. */
20654 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20655 XVECEXP (par, 0, 0) = ret_rtx;
20656 tmp = gen_rtx_SET (SImode,
20657 gen_rtx_REG (SImode, PC_REGNUM),
20658 gen_frame_mem (SImode,
20659 gen_rtx_POST_INC (SImode,
20660 stack_pointer_rtx)));
20661 RTX_FRAME_RELATED_P (tmp) = 1;
20662 XVECEXP (par, 0, 1) = tmp;
20663 par = emit_jump_insn (par);
20665 /* Generate dwarf info. */
20666 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20667 gen_rtx_REG (SImode, PC_REGNUM),
20668 NULL_RTX);
20669 REG_NOTES (par) = dwarf;
20670 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20671 stack_pointer_rtx, stack_pointer_rtx);
20675 /* Calculate the size of the return value that is passed in registers. */
20676 static unsigned
20677 arm_size_return_regs (void)
20679 machine_mode mode;
20681 if (crtl->return_rtx != 0)
20682 mode = GET_MODE (crtl->return_rtx);
20683 else
20684 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20686 return GET_MODE_SIZE (mode);
20689 /* Return true if the current function needs to save/restore LR. */
20690 static bool
20691 thumb_force_lr_save (void)
20693 return !cfun->machine->lr_save_eliminated
20694 && (!leaf_function_p ()
20695 || thumb_far_jump_used_p ()
20696 || df_regs_ever_live_p (LR_REGNUM));
20699 /* We do not know if r3 will be available because
20700 we do have an indirect tailcall happening in this
20701 particular case. */
20702 static bool
20703 is_indirect_tailcall_p (rtx call)
20705 rtx pat = PATTERN (call);
20707 /* Indirect tail call. */
20708 pat = XVECEXP (pat, 0, 0);
20709 if (GET_CODE (pat) == SET)
20710 pat = SET_SRC (pat);
20712 pat = XEXP (XEXP (pat, 0), 0);
20713 return REG_P (pat);
20716 /* Return true if r3 is used by any of the tail call insns in the
20717 current function. */
20718 static bool
20719 any_sibcall_could_use_r3 (void)
20721 edge_iterator ei;
20722 edge e;
20724 if (!crtl->tail_call_emit)
20725 return false;
20726 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20727 if (e->flags & EDGE_SIBCALL)
20729 rtx call = BB_END (e->src);
20730 if (!CALL_P (call))
20731 call = prev_nonnote_nondebug_insn (call);
20732 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20733 if (find_regno_fusage (call, USE, 3)
20734 || is_indirect_tailcall_p (call))
20735 return true;
20737 return false;
20741 /* Compute the distance from register FROM to register TO.
20742 These can be the arg pointer (26), the soft frame pointer (25),
20743 the stack pointer (13) or the hard frame pointer (11).
20744 In thumb mode r7 is used as the soft frame pointer, if needed.
20745 Typical stack layout looks like this:
20747 old stack pointer -> | |
20748 ----
20749 | | \
20750 | | saved arguments for
20751 | | vararg functions
20752 | | /
20754 hard FP & arg pointer -> | | \
20755 | | stack
20756 | | frame
20757 | | /
20759 | | \
20760 | | call saved
20761 | | registers
20762 soft frame pointer -> | | /
20764 | | \
20765 | | local
20766 | | variables
20767 locals base pointer -> | | /
20769 | | \
20770 | | outgoing
20771 | | arguments
20772 current stack pointer -> | | /
20775 For a given function some or all of these stack components
20776 may not be needed, giving rise to the possibility of
20777 eliminating some of the registers.
20779 The values returned by this function must reflect the behavior
20780 of arm_expand_prologue() and arm_compute_save_reg_mask().
20782 The sign of the number returned reflects the direction of stack
20783 growth, so the values are positive for all eliminations except
20784 from the soft frame pointer to the hard frame pointer.
20786 SFP may point just inside the local variables block to ensure correct
20787 alignment. */
20790 /* Calculate stack offsets. These are used to calculate register elimination
20791 offsets and in prologue/epilogue code. Also calculates which registers
20792 should be saved. */
20794 static arm_stack_offsets *
20795 arm_get_frame_offsets (void)
20797 struct arm_stack_offsets *offsets;
20798 unsigned long func_type;
20799 int leaf;
20800 int saved;
20801 int core_saved;
20802 HOST_WIDE_INT frame_size;
20803 int i;
20805 offsets = &cfun->machine->stack_offsets;
20807 /* We need to know if we are a leaf function. Unfortunately, it
20808 is possible to be called after start_sequence has been called,
20809 which causes get_insns to return the insns for the sequence,
20810 not the function, which will cause leaf_function_p to return
20811 the incorrect result.
20813 to know about leaf functions once reload has completed, and the
20814 frame size cannot be changed after that time, so we can safely
20815 use the cached value. */
20817 if (reload_completed)
20818 return offsets;
20820 /* Initially this is the size of the local variables. It will translated
20821 into an offset once we have determined the size of preceding data. */
20822 frame_size = ROUND_UP_WORD (get_frame_size ());
20824 leaf = leaf_function_p ();
20826 /* Space for variadic functions. */
20827 offsets->saved_args = crtl->args.pretend_args_size;
20829 /* In Thumb mode this is incorrect, but never used. */
20830 offsets->frame
20831 = (offsets->saved_args
20832 + arm_compute_static_chain_stack_bytes ()
20833 + (frame_pointer_needed ? 4 : 0));
20835 if (TARGET_32BIT)
20837 unsigned int regno;
20839 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20840 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20841 saved = core_saved;
20843 /* We know that SP will be doubleword aligned on entry, and we must
20844 preserve that condition at any subroutine call. We also require the
20845 soft frame pointer to be doubleword aligned. */
20847 if (TARGET_REALLY_IWMMXT)
20849 /* Check for the call-saved iWMMXt registers. */
20850 for (regno = FIRST_IWMMXT_REGNUM;
20851 regno <= LAST_IWMMXT_REGNUM;
20852 regno++)
20853 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20854 saved += 8;
20857 func_type = arm_current_func_type ();
20858 /* Space for saved VFP registers. */
20859 if (! IS_VOLATILE (func_type)
20860 && TARGET_HARD_FLOAT && TARGET_VFP)
20861 saved += arm_get_vfp_saved_size ();
20863 else /* TARGET_THUMB1 */
20865 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20866 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20867 saved = core_saved;
20868 if (TARGET_BACKTRACE)
20869 saved += 16;
20872 /* Saved registers include the stack frame. */
20873 offsets->saved_regs
20874 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20875 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20877 /* A leaf function does not need any stack alignment if it has nothing
20878 on the stack. */
20879 if (leaf && frame_size == 0
20880 /* However if it calls alloca(), we have a dynamically allocated
20881 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20882 && ! cfun->calls_alloca)
20884 offsets->outgoing_args = offsets->soft_frame;
20885 offsets->locals_base = offsets->soft_frame;
20886 return offsets;
20889 /* Ensure SFP has the correct alignment. */
20890 if (ARM_DOUBLEWORD_ALIGN
20891 && (offsets->soft_frame & 7))
20893 offsets->soft_frame += 4;
20894 /* Try to align stack by pushing an extra reg. Don't bother doing this
20895 when there is a stack frame as the alignment will be rolled into
20896 the normal stack adjustment. */
20897 if (frame_size + crtl->outgoing_args_size == 0)
20899 int reg = -1;
20901 /* Register r3 is caller-saved. Normally it does not need to be
20902 saved on entry by the prologue. However if we choose to save
20903 it for padding then we may confuse the compiler into thinking
20904 a prologue sequence is required when in fact it is not. This
20905 will occur when shrink-wrapping if r3 is used as a scratch
20906 register and there are no other callee-saved writes.
20908 This situation can be avoided when other callee-saved registers
20909 are available and r3 is not mandatory if we choose a callee-saved
20910 register for padding. */
20911 bool prefer_callee_reg_p = false;
20913 /* If it is safe to use r3, then do so. This sometimes
20914 generates better code on Thumb-2 by avoiding the need to
20915 use 32-bit push/pop instructions. */
20916 if (! any_sibcall_could_use_r3 ()
20917 && arm_size_return_regs () <= 12
20918 && (offsets->saved_regs_mask & (1 << 3)) == 0
20919 && (TARGET_THUMB2
20920 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20922 reg = 3;
20923 if (!TARGET_THUMB2)
20924 prefer_callee_reg_p = true;
20926 if (reg == -1
20927 || prefer_callee_reg_p)
20929 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20931 /* Avoid fixed registers; they may be changed at
20932 arbitrary times so it's unsafe to restore them
20933 during the epilogue. */
20934 if (!fixed_regs[i]
20935 && (offsets->saved_regs_mask & (1 << i)) == 0)
20937 reg = i;
20938 break;
20943 if (reg != -1)
20945 offsets->saved_regs += 4;
20946 offsets->saved_regs_mask |= (1 << reg);
20951 offsets->locals_base = offsets->soft_frame + frame_size;
20952 offsets->outgoing_args = (offsets->locals_base
20953 + crtl->outgoing_args_size);
20955 if (ARM_DOUBLEWORD_ALIGN)
20957 /* Ensure SP remains doubleword aligned. */
20958 if (offsets->outgoing_args & 7)
20959 offsets->outgoing_args += 4;
20960 gcc_assert (!(offsets->outgoing_args & 7));
20963 return offsets;
20967 /* Calculate the relative offsets for the different stack pointers. Positive
20968 offsets are in the direction of stack growth. */
20970 HOST_WIDE_INT
20971 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20973 arm_stack_offsets *offsets;
20975 offsets = arm_get_frame_offsets ();
20977 /* OK, now we have enough information to compute the distances.
20978 There must be an entry in these switch tables for each pair
20979 of registers in ELIMINABLE_REGS, even if some of the entries
20980 seem to be redundant or useless. */
20981 switch (from)
20983 case ARG_POINTER_REGNUM:
20984 switch (to)
20986 case THUMB_HARD_FRAME_POINTER_REGNUM:
20987 return 0;
20989 case FRAME_POINTER_REGNUM:
20990 /* This is the reverse of the soft frame pointer
20991 to hard frame pointer elimination below. */
20992 return offsets->soft_frame - offsets->saved_args;
20994 case ARM_HARD_FRAME_POINTER_REGNUM:
20995 /* This is only non-zero in the case where the static chain register
20996 is stored above the frame. */
20997 return offsets->frame - offsets->saved_args - 4;
20999 case STACK_POINTER_REGNUM:
21000 /* If nothing has been pushed on the stack at all
21001 then this will return -4. This *is* correct! */
21002 return offsets->outgoing_args - (offsets->saved_args + 4);
21004 default:
21005 gcc_unreachable ();
21007 gcc_unreachable ();
21009 case FRAME_POINTER_REGNUM:
21010 switch (to)
21012 case THUMB_HARD_FRAME_POINTER_REGNUM:
21013 return 0;
21015 case ARM_HARD_FRAME_POINTER_REGNUM:
21016 /* The hard frame pointer points to the top entry in the
21017 stack frame. The soft frame pointer to the bottom entry
21018 in the stack frame. If there is no stack frame at all,
21019 then they are identical. */
21021 return offsets->frame - offsets->soft_frame;
21023 case STACK_POINTER_REGNUM:
21024 return offsets->outgoing_args - offsets->soft_frame;
21026 default:
21027 gcc_unreachable ();
21029 gcc_unreachable ();
21031 default:
21032 /* You cannot eliminate from the stack pointer.
21033 In theory you could eliminate from the hard frame
21034 pointer to the stack pointer, but this will never
21035 happen, since if a stack frame is not needed the
21036 hard frame pointer will never be used. */
21037 gcc_unreachable ();
21041 /* Given FROM and TO register numbers, say whether this elimination is
21042 allowed. Frame pointer elimination is automatically handled.
21044 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21045 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21046 pointer, we must eliminate FRAME_POINTER_REGNUM into
21047 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21048 ARG_POINTER_REGNUM. */
21050 bool
21051 arm_can_eliminate (const int from, const int to)
21053 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21054 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21055 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21056 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21057 true);
21060 /* Emit RTL to save coprocessor registers on function entry. Returns the
21061 number of bytes pushed. */
21063 static int
21064 arm_save_coproc_regs(void)
21066 int saved_size = 0;
21067 unsigned reg;
21068 unsigned start_reg;
21069 rtx insn;
21071 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21072 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21074 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21075 insn = gen_rtx_MEM (V2SImode, insn);
21076 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21077 RTX_FRAME_RELATED_P (insn) = 1;
21078 saved_size += 8;
21081 if (TARGET_HARD_FLOAT && TARGET_VFP)
21083 start_reg = FIRST_VFP_REGNUM;
21085 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21087 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21088 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21090 if (start_reg != reg)
21091 saved_size += vfp_emit_fstmd (start_reg,
21092 (reg - start_reg) / 2);
21093 start_reg = reg + 2;
21096 if (start_reg != reg)
21097 saved_size += vfp_emit_fstmd (start_reg,
21098 (reg - start_reg) / 2);
21100 return saved_size;
21104 /* Set the Thumb frame pointer from the stack pointer. */
21106 static void
21107 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21109 HOST_WIDE_INT amount;
21110 rtx insn, dwarf;
21112 amount = offsets->outgoing_args - offsets->locals_base;
21113 if (amount < 1024)
21114 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21115 stack_pointer_rtx, GEN_INT (amount)));
21116 else
21118 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21119 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21120 expects the first two operands to be the same. */
21121 if (TARGET_THUMB2)
21123 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21124 stack_pointer_rtx,
21125 hard_frame_pointer_rtx));
21127 else
21129 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21130 hard_frame_pointer_rtx,
21131 stack_pointer_rtx));
21133 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21134 plus_constant (Pmode, stack_pointer_rtx, amount));
21135 RTX_FRAME_RELATED_P (dwarf) = 1;
21136 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21139 RTX_FRAME_RELATED_P (insn) = 1;
21142 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21143 function. */
21144 void
21145 arm_expand_prologue (void)
21147 rtx amount;
21148 rtx insn;
21149 rtx ip_rtx;
21150 unsigned long live_regs_mask;
21151 unsigned long func_type;
21152 int fp_offset = 0;
21153 int saved_pretend_args = 0;
21154 int saved_regs = 0;
21155 unsigned HOST_WIDE_INT args_to_push;
21156 arm_stack_offsets *offsets;
21158 func_type = arm_current_func_type ();
21160 /* Naked functions don't have prologues. */
21161 if (IS_NAKED (func_type))
21162 return;
21164 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21165 args_to_push = crtl->args.pretend_args_size;
21167 /* Compute which register we will have to save onto the stack. */
21168 offsets = arm_get_frame_offsets ();
21169 live_regs_mask = offsets->saved_regs_mask;
21171 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21173 if (IS_STACKALIGN (func_type))
21175 rtx r0, r1;
21177 /* Handle a word-aligned stack pointer. We generate the following:
21179 mov r0, sp
21180 bic r1, r0, #7
21181 mov sp, r1
21182 <save and restore r0 in normal prologue/epilogue>
21183 mov sp, r0
21184 bx lr
21186 The unwinder doesn't need to know about the stack realignment.
21187 Just tell it we saved SP in r0. */
21188 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21190 r0 = gen_rtx_REG (SImode, 0);
21191 r1 = gen_rtx_REG (SImode, 1);
21193 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21194 RTX_FRAME_RELATED_P (insn) = 1;
21195 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21197 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21199 /* ??? The CFA changes here, which may cause GDB to conclude that it
21200 has entered a different function. That said, the unwind info is
21201 correct, individually, before and after this instruction because
21202 we've described the save of SP, which will override the default
21203 handling of SP as restoring from the CFA. */
21204 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21207 /* For APCS frames, if IP register is clobbered
21208 when creating frame, save that register in a special
21209 way. */
21210 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21212 if (IS_INTERRUPT (func_type))
21214 /* Interrupt functions must not corrupt any registers.
21215 Creating a frame pointer however, corrupts the IP
21216 register, so we must push it first. */
21217 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21219 /* Do not set RTX_FRAME_RELATED_P on this insn.
21220 The dwarf stack unwinding code only wants to see one
21221 stack decrement per function, and this is not it. If
21222 this instruction is labeled as being part of the frame
21223 creation sequence then dwarf2out_frame_debug_expr will
21224 die when it encounters the assignment of IP to FP
21225 later on, since the use of SP here establishes SP as
21226 the CFA register and not IP.
21228 Anyway this instruction is not really part of the stack
21229 frame creation although it is part of the prologue. */
21231 else if (IS_NESTED (func_type))
21233 /* The static chain register is the same as the IP register
21234 used as a scratch register during stack frame creation.
21235 To get around this need to find somewhere to store IP
21236 whilst the frame is being created. We try the following
21237 places in order:
21239 1. The last argument register r3 if it is available.
21240 2. A slot on the stack above the frame if there are no
21241 arguments to push onto the stack.
21242 3. Register r3 again, after pushing the argument registers
21243 onto the stack, if this is a varargs function.
21244 4. The last slot on the stack created for the arguments to
21245 push, if this isn't a varargs function.
21247 Note - we only need to tell the dwarf2 backend about the SP
21248 adjustment in the second variant; the static chain register
21249 doesn't need to be unwound, as it doesn't contain a value
21250 inherited from the caller. */
21252 if (!arm_r3_live_at_start_p ())
21253 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21254 else if (args_to_push == 0)
21256 rtx addr, dwarf;
21258 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21259 saved_regs += 4;
21261 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21262 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21263 fp_offset = 4;
21265 /* Just tell the dwarf backend that we adjusted SP. */
21266 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21267 plus_constant (Pmode, stack_pointer_rtx,
21268 -fp_offset));
21269 RTX_FRAME_RELATED_P (insn) = 1;
21270 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21272 else
21274 /* Store the args on the stack. */
21275 if (cfun->machine->uses_anonymous_args)
21277 insn
21278 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21279 (0xf0 >> (args_to_push / 4)) & 0xf);
21280 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21281 saved_pretend_args = 1;
21283 else
21285 rtx addr, dwarf;
21287 if (args_to_push == 4)
21288 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21289 else
21290 addr
21291 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21292 plus_constant (Pmode,
21293 stack_pointer_rtx,
21294 -args_to_push));
21296 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21298 /* Just tell the dwarf backend that we adjusted SP. */
21299 dwarf
21300 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21301 plus_constant (Pmode, stack_pointer_rtx,
21302 -args_to_push));
21303 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21306 RTX_FRAME_RELATED_P (insn) = 1;
21307 fp_offset = args_to_push;
21308 args_to_push = 0;
21312 insn = emit_set_insn (ip_rtx,
21313 plus_constant (Pmode, stack_pointer_rtx,
21314 fp_offset));
21315 RTX_FRAME_RELATED_P (insn) = 1;
21318 if (args_to_push)
21320 /* Push the argument registers, or reserve space for them. */
21321 if (cfun->machine->uses_anonymous_args)
21322 insn = emit_multi_reg_push
21323 ((0xf0 >> (args_to_push / 4)) & 0xf,
21324 (0xf0 >> (args_to_push / 4)) & 0xf);
21325 else
21326 insn = emit_insn
21327 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21328 GEN_INT (- args_to_push)));
21329 RTX_FRAME_RELATED_P (insn) = 1;
21332 /* If this is an interrupt service routine, and the link register
21333 is going to be pushed, and we're not generating extra
21334 push of IP (needed when frame is needed and frame layout if apcs),
21335 subtracting four from LR now will mean that the function return
21336 can be done with a single instruction. */
21337 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21338 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21339 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21340 && TARGET_ARM)
21342 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21344 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21347 if (live_regs_mask)
21349 unsigned long dwarf_regs_mask = live_regs_mask;
21351 saved_regs += bit_count (live_regs_mask) * 4;
21352 if (optimize_size && !frame_pointer_needed
21353 && saved_regs == offsets->saved_regs - offsets->saved_args)
21355 /* If no coprocessor registers are being pushed and we don't have
21356 to worry about a frame pointer then push extra registers to
21357 create the stack frame. This is done is a way that does not
21358 alter the frame layout, so is independent of the epilogue. */
21359 int n;
21360 int frame;
21361 n = 0;
21362 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21363 n++;
21364 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21365 if (frame && n * 4 >= frame)
21367 n = frame / 4;
21368 live_regs_mask |= (1 << n) - 1;
21369 saved_regs += frame;
21373 if (TARGET_LDRD
21374 && current_tune->prefer_ldrd_strd
21375 && !optimize_function_for_size_p (cfun))
21377 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21378 if (TARGET_THUMB2)
21379 thumb2_emit_strd_push (live_regs_mask);
21380 else if (TARGET_ARM
21381 && !TARGET_APCS_FRAME
21382 && !IS_INTERRUPT (func_type))
21383 arm_emit_strd_push (live_regs_mask);
21384 else
21386 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21387 RTX_FRAME_RELATED_P (insn) = 1;
21390 else
21392 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21393 RTX_FRAME_RELATED_P (insn) = 1;
21397 if (! IS_VOLATILE (func_type))
21398 saved_regs += arm_save_coproc_regs ();
21400 if (frame_pointer_needed && TARGET_ARM)
21402 /* Create the new frame pointer. */
21403 if (TARGET_APCS_FRAME)
21405 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21406 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21407 RTX_FRAME_RELATED_P (insn) = 1;
21409 if (IS_NESTED (func_type))
21411 /* Recover the static chain register. */
21412 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21413 insn = gen_rtx_REG (SImode, 3);
21414 else
21416 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21417 insn = gen_frame_mem (SImode, insn);
21419 emit_set_insn (ip_rtx, insn);
21420 /* Add a USE to stop propagate_one_insn() from barfing. */
21421 emit_insn (gen_force_register_use (ip_rtx));
21424 else
21426 insn = GEN_INT (saved_regs - 4);
21427 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21428 stack_pointer_rtx, insn));
21429 RTX_FRAME_RELATED_P (insn) = 1;
21433 if (flag_stack_usage_info)
21434 current_function_static_stack_size
21435 = offsets->outgoing_args - offsets->saved_args;
21437 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21439 /* This add can produce multiple insns for a large constant, so we
21440 need to get tricky. */
21441 rtx_insn *last = get_last_insn ();
21443 amount = GEN_INT (offsets->saved_args + saved_regs
21444 - offsets->outgoing_args);
21446 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21447 amount));
21450 last = last ? NEXT_INSN (last) : get_insns ();
21451 RTX_FRAME_RELATED_P (last) = 1;
21453 while (last != insn);
21455 /* If the frame pointer is needed, emit a special barrier that
21456 will prevent the scheduler from moving stores to the frame
21457 before the stack adjustment. */
21458 if (frame_pointer_needed)
21459 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21460 hard_frame_pointer_rtx));
21464 if (frame_pointer_needed && TARGET_THUMB2)
21465 thumb_set_frame_pointer (offsets);
21467 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21469 unsigned long mask;
21471 mask = live_regs_mask;
21472 mask &= THUMB2_WORK_REGS;
21473 if (!IS_NESTED (func_type))
21474 mask |= (1 << IP_REGNUM);
21475 arm_load_pic_register (mask);
21478 /* If we are profiling, make sure no instructions are scheduled before
21479 the call to mcount. Similarly if the user has requested no
21480 scheduling in the prolog. Similarly if we want non-call exceptions
21481 using the EABI unwinder, to prevent faulting instructions from being
21482 swapped with a stack adjustment. */
21483 if (crtl->profile || !TARGET_SCHED_PROLOG
21484 || (arm_except_unwind_info (&global_options) == UI_TARGET
21485 && cfun->can_throw_non_call_exceptions))
21486 emit_insn (gen_blockage ());
21488 /* If the link register is being kept alive, with the return address in it,
21489 then make sure that it does not get reused by the ce2 pass. */
21490 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21491 cfun->machine->lr_save_eliminated = 1;
21494 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21495 static void
21496 arm_print_condition (FILE *stream)
21498 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21500 /* Branch conversion is not implemented for Thumb-2. */
21501 if (TARGET_THUMB)
21503 output_operand_lossage ("predicated Thumb instruction");
21504 return;
21506 if (current_insn_predicate != NULL)
21508 output_operand_lossage
21509 ("predicated instruction in conditional sequence");
21510 return;
21513 fputs (arm_condition_codes[arm_current_cc], stream);
21515 else if (current_insn_predicate)
21517 enum arm_cond_code code;
21519 if (TARGET_THUMB1)
21521 output_operand_lossage ("predicated Thumb instruction");
21522 return;
21525 code = get_arm_condition_code (current_insn_predicate);
21526 fputs (arm_condition_codes[code], stream);
21531 /* Globally reserved letters: acln
21532 Puncutation letters currently used: @_|?().!#
21533 Lower case letters currently used: bcdefhimpqtvwxyz
21534 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21535 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21537 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21539 If CODE is 'd', then the X is a condition operand and the instruction
21540 should only be executed if the condition is true.
21541 if CODE is 'D', then the X is a condition operand and the instruction
21542 should only be executed if the condition is false: however, if the mode
21543 of the comparison is CCFPEmode, then always execute the instruction -- we
21544 do this because in these circumstances !GE does not necessarily imply LT;
21545 in these cases the instruction pattern will take care to make sure that
21546 an instruction containing %d will follow, thereby undoing the effects of
21547 doing this instruction unconditionally.
21548 If CODE is 'N' then X is a floating point operand that must be negated
21549 before output.
21550 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21551 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21552 static void
21553 arm_print_operand (FILE *stream, rtx x, int code)
21555 switch (code)
21557 case '@':
21558 fputs (ASM_COMMENT_START, stream);
21559 return;
21561 case '_':
21562 fputs (user_label_prefix, stream);
21563 return;
21565 case '|':
21566 fputs (REGISTER_PREFIX, stream);
21567 return;
21569 case '?':
21570 arm_print_condition (stream);
21571 return;
21573 case '(':
21574 /* Nothing in unified syntax, otherwise the current condition code. */
21575 if (!TARGET_UNIFIED_ASM)
21576 arm_print_condition (stream);
21577 break;
21579 case ')':
21580 /* The current condition code in unified syntax, otherwise nothing. */
21581 if (TARGET_UNIFIED_ASM)
21582 arm_print_condition (stream);
21583 break;
21585 case '.':
21586 /* The current condition code for a condition code setting instruction.
21587 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21588 if (TARGET_UNIFIED_ASM)
21590 fputc('s', stream);
21591 arm_print_condition (stream);
21593 else
21595 arm_print_condition (stream);
21596 fputc('s', stream);
21598 return;
21600 case '!':
21601 /* If the instruction is conditionally executed then print
21602 the current condition code, otherwise print 's'. */
21603 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21604 if (current_insn_predicate)
21605 arm_print_condition (stream);
21606 else
21607 fputc('s', stream);
21608 break;
21610 /* %# is a "break" sequence. It doesn't output anything, but is used to
21611 separate e.g. operand numbers from following text, if that text consists
21612 of further digits which we don't want to be part of the operand
21613 number. */
21614 case '#':
21615 return;
21617 case 'N':
21619 REAL_VALUE_TYPE r;
21620 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21621 r = real_value_negate (&r);
21622 fprintf (stream, "%s", fp_const_from_val (&r));
21624 return;
21626 /* An integer or symbol address without a preceding # sign. */
21627 case 'c':
21628 switch (GET_CODE (x))
21630 case CONST_INT:
21631 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21632 break;
21634 case SYMBOL_REF:
21635 output_addr_const (stream, x);
21636 break;
21638 case CONST:
21639 if (GET_CODE (XEXP (x, 0)) == PLUS
21640 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21642 output_addr_const (stream, x);
21643 break;
21645 /* Fall through. */
21647 default:
21648 output_operand_lossage ("Unsupported operand for code '%c'", code);
21650 return;
21652 /* An integer that we want to print in HEX. */
21653 case 'x':
21654 switch (GET_CODE (x))
21656 case CONST_INT:
21657 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21658 break;
21660 default:
21661 output_operand_lossage ("Unsupported operand for code '%c'", code);
21663 return;
21665 case 'B':
21666 if (CONST_INT_P (x))
21668 HOST_WIDE_INT val;
21669 val = ARM_SIGN_EXTEND (~INTVAL (x));
21670 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21672 else
21674 putc ('~', stream);
21675 output_addr_const (stream, x);
21677 return;
21679 case 'b':
21680 /* Print the log2 of a CONST_INT. */
21682 HOST_WIDE_INT val;
21684 if (!CONST_INT_P (x)
21685 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21686 output_operand_lossage ("Unsupported operand for code '%c'", code);
21687 else
21688 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21690 return;
21692 case 'L':
21693 /* The low 16 bits of an immediate constant. */
21694 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21695 return;
21697 case 'i':
21698 fprintf (stream, "%s", arithmetic_instr (x, 1));
21699 return;
21701 case 'I':
21702 fprintf (stream, "%s", arithmetic_instr (x, 0));
21703 return;
21705 case 'S':
21707 HOST_WIDE_INT val;
21708 const char *shift;
21710 shift = shift_op (x, &val);
21712 if (shift)
21714 fprintf (stream, ", %s ", shift);
21715 if (val == -1)
21716 arm_print_operand (stream, XEXP (x, 1), 0);
21717 else
21718 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21721 return;
21723 /* An explanation of the 'Q', 'R' and 'H' register operands:
21725 In a pair of registers containing a DI or DF value the 'Q'
21726 operand returns the register number of the register containing
21727 the least significant part of the value. The 'R' operand returns
21728 the register number of the register containing the most
21729 significant part of the value.
21731 The 'H' operand returns the higher of the two register numbers.
21732 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21733 same as the 'Q' operand, since the most significant part of the
21734 value is held in the lower number register. The reverse is true
21735 on systems where WORDS_BIG_ENDIAN is false.
21737 The purpose of these operands is to distinguish between cases
21738 where the endian-ness of the values is important (for example
21739 when they are added together), and cases where the endian-ness
21740 is irrelevant, but the order of register operations is important.
21741 For example when loading a value from memory into a register
21742 pair, the endian-ness does not matter. Provided that the value
21743 from the lower memory address is put into the lower numbered
21744 register, and the value from the higher address is put into the
21745 higher numbered register, the load will work regardless of whether
21746 the value being loaded is big-wordian or little-wordian. The
21747 order of the two register loads can matter however, if the address
21748 of the memory location is actually held in one of the registers
21749 being overwritten by the load.
21751 The 'Q' and 'R' constraints are also available for 64-bit
21752 constants. */
21753 case 'Q':
21754 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21756 rtx part = gen_lowpart (SImode, x);
21757 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21758 return;
21761 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21763 output_operand_lossage ("invalid operand for code '%c'", code);
21764 return;
21767 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21768 return;
21770 case 'R':
21771 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21773 machine_mode mode = GET_MODE (x);
21774 rtx part;
21776 if (mode == VOIDmode)
21777 mode = DImode;
21778 part = gen_highpart_mode (SImode, mode, x);
21779 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21780 return;
21783 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21785 output_operand_lossage ("invalid operand for code '%c'", code);
21786 return;
21789 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21790 return;
21792 case 'H':
21793 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21795 output_operand_lossage ("invalid operand for code '%c'", code);
21796 return;
21799 asm_fprintf (stream, "%r", REGNO (x) + 1);
21800 return;
21802 case 'J':
21803 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21805 output_operand_lossage ("invalid operand for code '%c'", code);
21806 return;
21809 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21810 return;
21812 case 'K':
21813 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21815 output_operand_lossage ("invalid operand for code '%c'", code);
21816 return;
21819 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21820 return;
21822 case 'm':
21823 asm_fprintf (stream, "%r",
21824 REG_P (XEXP (x, 0))
21825 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21826 return;
21828 case 'M':
21829 asm_fprintf (stream, "{%r-%r}",
21830 REGNO (x),
21831 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21832 return;
21834 /* Like 'M', but writing doubleword vector registers, for use by Neon
21835 insns. */
21836 case 'h':
21838 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21839 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21840 if (numregs == 1)
21841 asm_fprintf (stream, "{d%d}", regno);
21842 else
21843 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21845 return;
21847 case 'd':
21848 /* CONST_TRUE_RTX means always -- that's the default. */
21849 if (x == const_true_rtx)
21850 return;
21852 if (!COMPARISON_P (x))
21854 output_operand_lossage ("invalid operand for code '%c'", code);
21855 return;
21858 fputs (arm_condition_codes[get_arm_condition_code (x)],
21859 stream);
21860 return;
21862 case 'D':
21863 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21864 want to do that. */
21865 if (x == const_true_rtx)
21867 output_operand_lossage ("instruction never executed");
21868 return;
21870 if (!COMPARISON_P (x))
21872 output_operand_lossage ("invalid operand for code '%c'", code);
21873 return;
21876 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21877 (get_arm_condition_code (x))],
21878 stream);
21879 return;
21881 case 's':
21882 case 'V':
21883 case 'W':
21884 case 'X':
21885 case 'Y':
21886 case 'Z':
21887 /* Former Maverick support, removed after GCC-4.7. */
21888 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21889 return;
21891 case 'U':
21892 if (!REG_P (x)
21893 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21894 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21895 /* Bad value for wCG register number. */
21897 output_operand_lossage ("invalid operand for code '%c'", code);
21898 return;
21901 else
21902 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21903 return;
21905 /* Print an iWMMXt control register name. */
21906 case 'w':
21907 if (!CONST_INT_P (x)
21908 || INTVAL (x) < 0
21909 || INTVAL (x) >= 16)
21910 /* Bad value for wC register number. */
21912 output_operand_lossage ("invalid operand for code '%c'", code);
21913 return;
21916 else
21918 static const char * wc_reg_names [16] =
21920 "wCID", "wCon", "wCSSF", "wCASF",
21921 "wC4", "wC5", "wC6", "wC7",
21922 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21923 "wC12", "wC13", "wC14", "wC15"
21926 fputs (wc_reg_names [INTVAL (x)], stream);
21928 return;
21930 /* Print the high single-precision register of a VFP double-precision
21931 register. */
21932 case 'p':
21934 machine_mode mode = GET_MODE (x);
21935 int regno;
21937 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21939 output_operand_lossage ("invalid operand for code '%c'", code);
21940 return;
21943 regno = REGNO (x);
21944 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21946 output_operand_lossage ("invalid operand for code '%c'", code);
21947 return;
21950 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21952 return;
21954 /* Print a VFP/Neon double precision or quad precision register name. */
21955 case 'P':
21956 case 'q':
21958 machine_mode mode = GET_MODE (x);
21959 int is_quad = (code == 'q');
21960 int regno;
21962 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21964 output_operand_lossage ("invalid operand for code '%c'", code);
21965 return;
21968 if (!REG_P (x)
21969 || !IS_VFP_REGNUM (REGNO (x)))
21971 output_operand_lossage ("invalid operand for code '%c'", code);
21972 return;
21975 regno = REGNO (x);
21976 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21977 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21979 output_operand_lossage ("invalid operand for code '%c'", code);
21980 return;
21983 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21984 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21986 return;
21988 /* These two codes print the low/high doubleword register of a Neon quad
21989 register, respectively. For pair-structure types, can also print
21990 low/high quadword registers. */
21991 case 'e':
21992 case 'f':
21994 machine_mode mode = GET_MODE (x);
21995 int regno;
21997 if ((GET_MODE_SIZE (mode) != 16
21998 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22000 output_operand_lossage ("invalid operand for code '%c'", code);
22001 return;
22004 regno = REGNO (x);
22005 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22007 output_operand_lossage ("invalid operand for code '%c'", code);
22008 return;
22011 if (GET_MODE_SIZE (mode) == 16)
22012 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22013 + (code == 'f' ? 1 : 0));
22014 else
22015 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22016 + (code == 'f' ? 1 : 0));
22018 return;
22020 /* Print a VFPv3 floating-point constant, represented as an integer
22021 index. */
22022 case 'G':
22024 int index = vfp3_const_double_index (x);
22025 gcc_assert (index != -1);
22026 fprintf (stream, "%d", index);
22028 return;
22030 /* Print bits representing opcode features for Neon.
22032 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22033 and polynomials as unsigned.
22035 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22037 Bit 2 is 1 for rounding functions, 0 otherwise. */
22039 /* Identify the type as 's', 'u', 'p' or 'f'. */
22040 case 'T':
22042 HOST_WIDE_INT bits = INTVAL (x);
22043 fputc ("uspf"[bits & 3], stream);
22045 return;
22047 /* Likewise, but signed and unsigned integers are both 'i'. */
22048 case 'F':
22050 HOST_WIDE_INT bits = INTVAL (x);
22051 fputc ("iipf"[bits & 3], stream);
22053 return;
22055 /* As for 'T', but emit 'u' instead of 'p'. */
22056 case 't':
22058 HOST_WIDE_INT bits = INTVAL (x);
22059 fputc ("usuf"[bits & 3], stream);
22061 return;
22063 /* Bit 2: rounding (vs none). */
22064 case 'O':
22066 HOST_WIDE_INT bits = INTVAL (x);
22067 fputs ((bits & 4) != 0 ? "r" : "", stream);
22069 return;
22071 /* Memory operand for vld1/vst1 instruction. */
22072 case 'A':
22074 rtx addr;
22075 bool postinc = FALSE;
22076 rtx postinc_reg = NULL;
22077 unsigned align, memsize, align_bits;
22079 gcc_assert (MEM_P (x));
22080 addr = XEXP (x, 0);
22081 if (GET_CODE (addr) == POST_INC)
22083 postinc = 1;
22084 addr = XEXP (addr, 0);
22086 if (GET_CODE (addr) == POST_MODIFY)
22088 postinc_reg = XEXP( XEXP (addr, 1), 1);
22089 addr = XEXP (addr, 0);
22091 asm_fprintf (stream, "[%r", REGNO (addr));
22093 /* We know the alignment of this access, so we can emit a hint in the
22094 instruction (for some alignments) as an aid to the memory subsystem
22095 of the target. */
22096 align = MEM_ALIGN (x) >> 3;
22097 memsize = MEM_SIZE (x);
22099 /* Only certain alignment specifiers are supported by the hardware. */
22100 if (memsize == 32 && (align % 32) == 0)
22101 align_bits = 256;
22102 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22103 align_bits = 128;
22104 else if (memsize >= 8 && (align % 8) == 0)
22105 align_bits = 64;
22106 else
22107 align_bits = 0;
22109 if (align_bits != 0)
22110 asm_fprintf (stream, ":%d", align_bits);
22112 asm_fprintf (stream, "]");
22114 if (postinc)
22115 fputs("!", stream);
22116 if (postinc_reg)
22117 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22119 return;
22121 case 'C':
22123 rtx addr;
22125 gcc_assert (MEM_P (x));
22126 addr = XEXP (x, 0);
22127 gcc_assert (REG_P (addr));
22128 asm_fprintf (stream, "[%r]", REGNO (addr));
22130 return;
22132 /* Translate an S register number into a D register number and element index. */
22133 case 'y':
22135 machine_mode mode = GET_MODE (x);
22136 int regno;
22138 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22140 output_operand_lossage ("invalid operand for code '%c'", code);
22141 return;
22144 regno = REGNO (x);
22145 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22147 output_operand_lossage ("invalid operand for code '%c'", code);
22148 return;
22151 regno = regno - FIRST_VFP_REGNUM;
22152 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22154 return;
22156 case 'v':
22157 gcc_assert (CONST_DOUBLE_P (x));
22158 int result;
22159 result = vfp3_const_double_for_fract_bits (x);
22160 if (result == 0)
22161 result = vfp3_const_double_for_bits (x);
22162 fprintf (stream, "#%d", result);
22163 return;
22165 /* Register specifier for vld1.16/vst1.16. Translate the S register
22166 number into a D register number and element index. */
22167 case 'z':
22169 machine_mode mode = GET_MODE (x);
22170 int regno;
22172 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22174 output_operand_lossage ("invalid operand for code '%c'", code);
22175 return;
22178 regno = REGNO (x);
22179 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22181 output_operand_lossage ("invalid operand for code '%c'", code);
22182 return;
22185 regno = regno - FIRST_VFP_REGNUM;
22186 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22188 return;
22190 default:
22191 if (x == 0)
22193 output_operand_lossage ("missing operand");
22194 return;
22197 switch (GET_CODE (x))
22199 case REG:
22200 asm_fprintf (stream, "%r", REGNO (x));
22201 break;
22203 case MEM:
22204 output_memory_reference_mode = GET_MODE (x);
22205 output_address (XEXP (x, 0));
22206 break;
22208 case CONST_DOUBLE:
22210 char fpstr[20];
22211 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22212 sizeof (fpstr), 0, 1);
22213 fprintf (stream, "#%s", fpstr);
22215 break;
22217 default:
22218 gcc_assert (GET_CODE (x) != NEG);
22219 fputc ('#', stream);
22220 if (GET_CODE (x) == HIGH)
22222 fputs (":lower16:", stream);
22223 x = XEXP (x, 0);
22226 output_addr_const (stream, x);
22227 break;
22232 /* Target hook for printing a memory address. */
22233 static void
22234 arm_print_operand_address (FILE *stream, rtx x)
22236 if (TARGET_32BIT)
22238 int is_minus = GET_CODE (x) == MINUS;
22240 if (REG_P (x))
22241 asm_fprintf (stream, "[%r]", REGNO (x));
22242 else if (GET_CODE (x) == PLUS || is_minus)
22244 rtx base = XEXP (x, 0);
22245 rtx index = XEXP (x, 1);
22246 HOST_WIDE_INT offset = 0;
22247 if (!REG_P (base)
22248 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22250 /* Ensure that BASE is a register. */
22251 /* (one of them must be). */
22252 /* Also ensure the SP is not used as in index register. */
22253 rtx temp = base;
22254 base = index;
22255 index = temp;
22257 switch (GET_CODE (index))
22259 case CONST_INT:
22260 offset = INTVAL (index);
22261 if (is_minus)
22262 offset = -offset;
22263 asm_fprintf (stream, "[%r, #%wd]",
22264 REGNO (base), offset);
22265 break;
22267 case REG:
22268 asm_fprintf (stream, "[%r, %s%r]",
22269 REGNO (base), is_minus ? "-" : "",
22270 REGNO (index));
22271 break;
22273 case MULT:
22274 case ASHIFTRT:
22275 case LSHIFTRT:
22276 case ASHIFT:
22277 case ROTATERT:
22279 asm_fprintf (stream, "[%r, %s%r",
22280 REGNO (base), is_minus ? "-" : "",
22281 REGNO (XEXP (index, 0)));
22282 arm_print_operand (stream, index, 'S');
22283 fputs ("]", stream);
22284 break;
22287 default:
22288 gcc_unreachable ();
22291 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22292 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22294 extern machine_mode output_memory_reference_mode;
22296 gcc_assert (REG_P (XEXP (x, 0)));
22298 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22299 asm_fprintf (stream, "[%r, #%s%d]!",
22300 REGNO (XEXP (x, 0)),
22301 GET_CODE (x) == PRE_DEC ? "-" : "",
22302 GET_MODE_SIZE (output_memory_reference_mode));
22303 else
22304 asm_fprintf (stream, "[%r], #%s%d",
22305 REGNO (XEXP (x, 0)),
22306 GET_CODE (x) == POST_DEC ? "-" : "",
22307 GET_MODE_SIZE (output_memory_reference_mode));
22309 else if (GET_CODE (x) == PRE_MODIFY)
22311 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22312 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22313 asm_fprintf (stream, "#%wd]!",
22314 INTVAL (XEXP (XEXP (x, 1), 1)));
22315 else
22316 asm_fprintf (stream, "%r]!",
22317 REGNO (XEXP (XEXP (x, 1), 1)));
22319 else if (GET_CODE (x) == POST_MODIFY)
22321 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22322 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22323 asm_fprintf (stream, "#%wd",
22324 INTVAL (XEXP (XEXP (x, 1), 1)));
22325 else
22326 asm_fprintf (stream, "%r",
22327 REGNO (XEXP (XEXP (x, 1), 1)));
22329 else output_addr_const (stream, x);
22331 else
22333 if (REG_P (x))
22334 asm_fprintf (stream, "[%r]", REGNO (x));
22335 else if (GET_CODE (x) == POST_INC)
22336 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22337 else if (GET_CODE (x) == PLUS)
22339 gcc_assert (REG_P (XEXP (x, 0)));
22340 if (CONST_INT_P (XEXP (x, 1)))
22341 asm_fprintf (stream, "[%r, #%wd]",
22342 REGNO (XEXP (x, 0)),
22343 INTVAL (XEXP (x, 1)));
22344 else
22345 asm_fprintf (stream, "[%r, %r]",
22346 REGNO (XEXP (x, 0)),
22347 REGNO (XEXP (x, 1)));
22349 else
22350 output_addr_const (stream, x);
22354 /* Target hook for indicating whether a punctuation character for
22355 TARGET_PRINT_OPERAND is valid. */
22356 static bool
22357 arm_print_operand_punct_valid_p (unsigned char code)
22359 return (code == '@' || code == '|' || code == '.'
22360 || code == '(' || code == ')' || code == '#'
22361 || (TARGET_32BIT && (code == '?'))
22362 || (TARGET_THUMB2 && (code == '!'))
22363 || (TARGET_THUMB && (code == '_')));
22366 /* Target hook for assembling integer objects. The ARM version needs to
22367 handle word-sized values specially. */
22368 static bool
22369 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22371 machine_mode mode;
22373 if (size == UNITS_PER_WORD && aligned_p)
22375 fputs ("\t.word\t", asm_out_file);
22376 output_addr_const (asm_out_file, x);
22378 /* Mark symbols as position independent. We only do this in the
22379 .text segment, not in the .data segment. */
22380 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22381 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22383 /* See legitimize_pic_address for an explanation of the
22384 TARGET_VXWORKS_RTP check. */
22385 if (!arm_pic_data_is_text_relative
22386 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22387 fputs ("(GOT)", asm_out_file);
22388 else
22389 fputs ("(GOTOFF)", asm_out_file);
22391 fputc ('\n', asm_out_file);
22392 return true;
22395 mode = GET_MODE (x);
22397 if (arm_vector_mode_supported_p (mode))
22399 int i, units;
22401 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22403 units = CONST_VECTOR_NUNITS (x);
22404 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22406 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22407 for (i = 0; i < units; i++)
22409 rtx elt = CONST_VECTOR_ELT (x, i);
22410 assemble_integer
22411 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22413 else
22414 for (i = 0; i < units; i++)
22416 rtx elt = CONST_VECTOR_ELT (x, i);
22417 REAL_VALUE_TYPE rval;
22419 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22421 assemble_real
22422 (rval, GET_MODE_INNER (mode),
22423 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22426 return true;
22429 return default_assemble_integer (x, size, aligned_p);
22432 static void
22433 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22435 section *s;
22437 if (!TARGET_AAPCS_BASED)
22439 (is_ctor ?
22440 default_named_section_asm_out_constructor
22441 : default_named_section_asm_out_destructor) (symbol, priority);
22442 return;
22445 /* Put these in the .init_array section, using a special relocation. */
22446 if (priority != DEFAULT_INIT_PRIORITY)
22448 char buf[18];
22449 sprintf (buf, "%s.%.5u",
22450 is_ctor ? ".init_array" : ".fini_array",
22451 priority);
22452 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22454 else if (is_ctor)
22455 s = ctors_section;
22456 else
22457 s = dtors_section;
22459 switch_to_section (s);
22460 assemble_align (POINTER_SIZE);
22461 fputs ("\t.word\t", asm_out_file);
22462 output_addr_const (asm_out_file, symbol);
22463 fputs ("(target1)\n", asm_out_file);
22466 /* Add a function to the list of static constructors. */
22468 static void
22469 arm_elf_asm_constructor (rtx symbol, int priority)
22471 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22474 /* Add a function to the list of static destructors. */
22476 static void
22477 arm_elf_asm_destructor (rtx symbol, int priority)
22479 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22482 /* A finite state machine takes care of noticing whether or not instructions
22483 can be conditionally executed, and thus decrease execution time and code
22484 size by deleting branch instructions. The fsm is controlled by
22485 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22487 /* The state of the fsm controlling condition codes are:
22488 0: normal, do nothing special
22489 1: make ASM_OUTPUT_OPCODE not output this instruction
22490 2: make ASM_OUTPUT_OPCODE not output this instruction
22491 3: make instructions conditional
22492 4: make instructions conditional
22494 State transitions (state->state by whom under condition):
22495 0 -> 1 final_prescan_insn if the `target' is a label
22496 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22497 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22498 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22499 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22500 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22501 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22502 (the target insn is arm_target_insn).
22504 If the jump clobbers the conditions then we use states 2 and 4.
22506 A similar thing can be done with conditional return insns.
22508 XXX In case the `target' is an unconditional branch, this conditionalising
22509 of the instructions always reduces code size, but not always execution
22510 time. But then, I want to reduce the code size to somewhere near what
22511 /bin/cc produces. */
22513 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22514 instructions. When a COND_EXEC instruction is seen the subsequent
22515 instructions are scanned so that multiple conditional instructions can be
22516 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22517 specify the length and true/false mask for the IT block. These will be
22518 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22520 /* Returns the index of the ARM condition code string in
22521 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22522 COMPARISON should be an rtx like `(eq (...) (...))'. */
22524 enum arm_cond_code
22525 maybe_get_arm_condition_code (rtx comparison)
22527 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22528 enum arm_cond_code code;
22529 enum rtx_code comp_code = GET_CODE (comparison);
22531 if (GET_MODE_CLASS (mode) != MODE_CC)
22532 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22533 XEXP (comparison, 1));
22535 switch (mode)
22537 case CC_DNEmode: code = ARM_NE; goto dominance;
22538 case CC_DEQmode: code = ARM_EQ; goto dominance;
22539 case CC_DGEmode: code = ARM_GE; goto dominance;
22540 case CC_DGTmode: code = ARM_GT; goto dominance;
22541 case CC_DLEmode: code = ARM_LE; goto dominance;
22542 case CC_DLTmode: code = ARM_LT; goto dominance;
22543 case CC_DGEUmode: code = ARM_CS; goto dominance;
22544 case CC_DGTUmode: code = ARM_HI; goto dominance;
22545 case CC_DLEUmode: code = ARM_LS; goto dominance;
22546 case CC_DLTUmode: code = ARM_CC;
22548 dominance:
22549 if (comp_code == EQ)
22550 return ARM_INVERSE_CONDITION_CODE (code);
22551 if (comp_code == NE)
22552 return code;
22553 return ARM_NV;
22555 case CC_NOOVmode:
22556 switch (comp_code)
22558 case NE: return ARM_NE;
22559 case EQ: return ARM_EQ;
22560 case GE: return ARM_PL;
22561 case LT: return ARM_MI;
22562 default: return ARM_NV;
22565 case CC_Zmode:
22566 switch (comp_code)
22568 case NE: return ARM_NE;
22569 case EQ: return ARM_EQ;
22570 default: return ARM_NV;
22573 case CC_Nmode:
22574 switch (comp_code)
22576 case NE: return ARM_MI;
22577 case EQ: return ARM_PL;
22578 default: return ARM_NV;
22581 case CCFPEmode:
22582 case CCFPmode:
22583 /* We can handle all cases except UNEQ and LTGT. */
22584 switch (comp_code)
22586 case GE: return ARM_GE;
22587 case GT: return ARM_GT;
22588 case LE: return ARM_LS;
22589 case LT: return ARM_MI;
22590 case NE: return ARM_NE;
22591 case EQ: return ARM_EQ;
22592 case ORDERED: return ARM_VC;
22593 case UNORDERED: return ARM_VS;
22594 case UNLT: return ARM_LT;
22595 case UNLE: return ARM_LE;
22596 case UNGT: return ARM_HI;
22597 case UNGE: return ARM_PL;
22598 /* UNEQ and LTGT do not have a representation. */
22599 case UNEQ: /* Fall through. */
22600 case LTGT: /* Fall through. */
22601 default: return ARM_NV;
22604 case CC_SWPmode:
22605 switch (comp_code)
22607 case NE: return ARM_NE;
22608 case EQ: return ARM_EQ;
22609 case GE: return ARM_LE;
22610 case GT: return ARM_LT;
22611 case LE: return ARM_GE;
22612 case LT: return ARM_GT;
22613 case GEU: return ARM_LS;
22614 case GTU: return ARM_CC;
22615 case LEU: return ARM_CS;
22616 case LTU: return ARM_HI;
22617 default: return ARM_NV;
22620 case CC_Cmode:
22621 switch (comp_code)
22623 case LTU: return ARM_CS;
22624 case GEU: return ARM_CC;
22625 default: return ARM_NV;
22628 case CC_CZmode:
22629 switch (comp_code)
22631 case NE: return ARM_NE;
22632 case EQ: return ARM_EQ;
22633 case GEU: return ARM_CS;
22634 case GTU: return ARM_HI;
22635 case LEU: return ARM_LS;
22636 case LTU: return ARM_CC;
22637 default: return ARM_NV;
22640 case CC_NCVmode:
22641 switch (comp_code)
22643 case GE: return ARM_GE;
22644 case LT: return ARM_LT;
22645 case GEU: return ARM_CS;
22646 case LTU: return ARM_CC;
22647 default: return ARM_NV;
22650 case CCmode:
22651 switch (comp_code)
22653 case NE: return ARM_NE;
22654 case EQ: return ARM_EQ;
22655 case GE: return ARM_GE;
22656 case GT: return ARM_GT;
22657 case LE: return ARM_LE;
22658 case LT: return ARM_LT;
22659 case GEU: return ARM_CS;
22660 case GTU: return ARM_HI;
22661 case LEU: return ARM_LS;
22662 case LTU: return ARM_CC;
22663 default: return ARM_NV;
22666 default: gcc_unreachable ();
22670 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22671 static enum arm_cond_code
22672 get_arm_condition_code (rtx comparison)
22674 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22675 gcc_assert (code != ARM_NV);
22676 return code;
22679 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22680 instructions. */
22681 void
22682 thumb2_final_prescan_insn (rtx_insn *insn)
22684 rtx_insn *first_insn = insn;
22685 rtx body = PATTERN (insn);
22686 rtx predicate;
22687 enum arm_cond_code code;
22688 int n;
22689 int mask;
22690 int max;
22692 /* max_insns_skipped in the tune was already taken into account in the
22693 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22694 just emit the IT blocks as we can. It does not make sense to split
22695 the IT blocks. */
22696 max = MAX_INSN_PER_IT_BLOCK;
22698 /* Remove the previous insn from the count of insns to be output. */
22699 if (arm_condexec_count)
22700 arm_condexec_count--;
22702 /* Nothing to do if we are already inside a conditional block. */
22703 if (arm_condexec_count)
22704 return;
22706 if (GET_CODE (body) != COND_EXEC)
22707 return;
22709 /* Conditional jumps are implemented directly. */
22710 if (JUMP_P (insn))
22711 return;
22713 predicate = COND_EXEC_TEST (body);
22714 arm_current_cc = get_arm_condition_code (predicate);
22716 n = get_attr_ce_count (insn);
22717 arm_condexec_count = 1;
22718 arm_condexec_mask = (1 << n) - 1;
22719 arm_condexec_masklen = n;
22720 /* See if subsequent instructions can be combined into the same block. */
22721 for (;;)
22723 insn = next_nonnote_insn (insn);
22725 /* Jumping into the middle of an IT block is illegal, so a label or
22726 barrier terminates the block. */
22727 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22728 break;
22730 body = PATTERN (insn);
22731 /* USE and CLOBBER aren't really insns, so just skip them. */
22732 if (GET_CODE (body) == USE
22733 || GET_CODE (body) == CLOBBER)
22734 continue;
22736 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22737 if (GET_CODE (body) != COND_EXEC)
22738 break;
22739 /* Maximum number of conditionally executed instructions in a block. */
22740 n = get_attr_ce_count (insn);
22741 if (arm_condexec_masklen + n > max)
22742 break;
22744 predicate = COND_EXEC_TEST (body);
22745 code = get_arm_condition_code (predicate);
22746 mask = (1 << n) - 1;
22747 if (arm_current_cc == code)
22748 arm_condexec_mask |= (mask << arm_condexec_masklen);
22749 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22750 break;
22752 arm_condexec_count++;
22753 arm_condexec_masklen += n;
22755 /* A jump must be the last instruction in a conditional block. */
22756 if (JUMP_P (insn))
22757 break;
22759 /* Restore recog_data (getting the attributes of other insns can
22760 destroy this array, but final.c assumes that it remains intact
22761 across this call). */
22762 extract_constrain_insn_cached (first_insn);
22765 void
22766 arm_final_prescan_insn (rtx_insn *insn)
22768 /* BODY will hold the body of INSN. */
22769 rtx body = PATTERN (insn);
22771 /* This will be 1 if trying to repeat the trick, and things need to be
22772 reversed if it appears to fail. */
22773 int reverse = 0;
22775 /* If we start with a return insn, we only succeed if we find another one. */
22776 int seeking_return = 0;
22777 enum rtx_code return_code = UNKNOWN;
22779 /* START_INSN will hold the insn from where we start looking. This is the
22780 first insn after the following code_label if REVERSE is true. */
22781 rtx_insn *start_insn = insn;
22783 /* If in state 4, check if the target branch is reached, in order to
22784 change back to state 0. */
22785 if (arm_ccfsm_state == 4)
22787 if (insn == arm_target_insn)
22789 arm_target_insn = NULL;
22790 arm_ccfsm_state = 0;
22792 return;
22795 /* If in state 3, it is possible to repeat the trick, if this insn is an
22796 unconditional branch to a label, and immediately following this branch
22797 is the previous target label which is only used once, and the label this
22798 branch jumps to is not too far off. */
22799 if (arm_ccfsm_state == 3)
22801 if (simplejump_p (insn))
22803 start_insn = next_nonnote_insn (start_insn);
22804 if (BARRIER_P (start_insn))
22806 /* XXX Isn't this always a barrier? */
22807 start_insn = next_nonnote_insn (start_insn);
22809 if (LABEL_P (start_insn)
22810 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22811 && LABEL_NUSES (start_insn) == 1)
22812 reverse = TRUE;
22813 else
22814 return;
22816 else if (ANY_RETURN_P (body))
22818 start_insn = next_nonnote_insn (start_insn);
22819 if (BARRIER_P (start_insn))
22820 start_insn = next_nonnote_insn (start_insn);
22821 if (LABEL_P (start_insn)
22822 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22823 && LABEL_NUSES (start_insn) == 1)
22825 reverse = TRUE;
22826 seeking_return = 1;
22827 return_code = GET_CODE (body);
22829 else
22830 return;
22832 else
22833 return;
22836 gcc_assert (!arm_ccfsm_state || reverse);
22837 if (!JUMP_P (insn))
22838 return;
22840 /* This jump might be paralleled with a clobber of the condition codes
22841 the jump should always come first */
22842 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22843 body = XVECEXP (body, 0, 0);
22845 if (reverse
22846 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22847 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22849 int insns_skipped;
22850 int fail = FALSE, succeed = FALSE;
22851 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22852 int then_not_else = TRUE;
22853 rtx_insn *this_insn = start_insn;
22854 rtx label = 0;
22856 /* Register the insn jumped to. */
22857 if (reverse)
22859 if (!seeking_return)
22860 label = XEXP (SET_SRC (body), 0);
22862 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22863 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22864 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22866 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22867 then_not_else = FALSE;
22869 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22871 seeking_return = 1;
22872 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22874 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22876 seeking_return = 1;
22877 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22878 then_not_else = FALSE;
22880 else
22881 gcc_unreachable ();
22883 /* See how many insns this branch skips, and what kind of insns. If all
22884 insns are okay, and the label or unconditional branch to the same
22885 label is not too far away, succeed. */
22886 for (insns_skipped = 0;
22887 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22889 rtx scanbody;
22891 this_insn = next_nonnote_insn (this_insn);
22892 if (!this_insn)
22893 break;
22895 switch (GET_CODE (this_insn))
22897 case CODE_LABEL:
22898 /* Succeed if it is the target label, otherwise fail since
22899 control falls in from somewhere else. */
22900 if (this_insn == label)
22902 arm_ccfsm_state = 1;
22903 succeed = TRUE;
22905 else
22906 fail = TRUE;
22907 break;
22909 case BARRIER:
22910 /* Succeed if the following insn is the target label.
22911 Otherwise fail.
22912 If return insns are used then the last insn in a function
22913 will be a barrier. */
22914 this_insn = next_nonnote_insn (this_insn);
22915 if (this_insn && this_insn == label)
22917 arm_ccfsm_state = 1;
22918 succeed = TRUE;
22920 else
22921 fail = TRUE;
22922 break;
22924 case CALL_INSN:
22925 /* The AAPCS says that conditional calls should not be
22926 used since they make interworking inefficient (the
22927 linker can't transform BL<cond> into BLX). That's
22928 only a problem if the machine has BLX. */
22929 if (arm_arch5)
22931 fail = TRUE;
22932 break;
22935 /* Succeed if the following insn is the target label, or
22936 if the following two insns are a barrier and the
22937 target label. */
22938 this_insn = next_nonnote_insn (this_insn);
22939 if (this_insn && BARRIER_P (this_insn))
22940 this_insn = next_nonnote_insn (this_insn);
22942 if (this_insn && this_insn == label
22943 && insns_skipped < max_insns_skipped)
22945 arm_ccfsm_state = 1;
22946 succeed = TRUE;
22948 else
22949 fail = TRUE;
22950 break;
22952 case JUMP_INSN:
22953 /* If this is an unconditional branch to the same label, succeed.
22954 If it is to another label, do nothing. If it is conditional,
22955 fail. */
22956 /* XXX Probably, the tests for SET and the PC are
22957 unnecessary. */
22959 scanbody = PATTERN (this_insn);
22960 if (GET_CODE (scanbody) == SET
22961 && GET_CODE (SET_DEST (scanbody)) == PC)
22963 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22964 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22966 arm_ccfsm_state = 2;
22967 succeed = TRUE;
22969 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22970 fail = TRUE;
22972 /* Fail if a conditional return is undesirable (e.g. on a
22973 StrongARM), but still allow this if optimizing for size. */
22974 else if (GET_CODE (scanbody) == return_code
22975 && !use_return_insn (TRUE, NULL)
22976 && !optimize_size)
22977 fail = TRUE;
22978 else if (GET_CODE (scanbody) == return_code)
22980 arm_ccfsm_state = 2;
22981 succeed = TRUE;
22983 else if (GET_CODE (scanbody) == PARALLEL)
22985 switch (get_attr_conds (this_insn))
22987 case CONDS_NOCOND:
22988 break;
22989 default:
22990 fail = TRUE;
22991 break;
22994 else
22995 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22997 break;
22999 case INSN:
23000 /* Instructions using or affecting the condition codes make it
23001 fail. */
23002 scanbody = PATTERN (this_insn);
23003 if (!(GET_CODE (scanbody) == SET
23004 || GET_CODE (scanbody) == PARALLEL)
23005 || get_attr_conds (this_insn) != CONDS_NOCOND)
23006 fail = TRUE;
23007 break;
23009 default:
23010 break;
23013 if (succeed)
23015 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23016 arm_target_label = CODE_LABEL_NUMBER (label);
23017 else
23019 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23021 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23023 this_insn = next_nonnote_insn (this_insn);
23024 gcc_assert (!this_insn
23025 || (!BARRIER_P (this_insn)
23026 && !LABEL_P (this_insn)));
23028 if (!this_insn)
23030 /* Oh, dear! we ran off the end.. give up. */
23031 extract_constrain_insn_cached (insn);
23032 arm_ccfsm_state = 0;
23033 arm_target_insn = NULL;
23034 return;
23036 arm_target_insn = this_insn;
23039 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23040 what it was. */
23041 if (!reverse)
23042 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23044 if (reverse || then_not_else)
23045 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23048 /* Restore recog_data (getting the attributes of other insns can
23049 destroy this array, but final.c assumes that it remains intact
23050 across this call. */
23051 extract_constrain_insn_cached (insn);
23055 /* Output IT instructions. */
23056 void
23057 thumb2_asm_output_opcode (FILE * stream)
23059 char buff[5];
23060 int n;
23062 if (arm_condexec_mask)
23064 for (n = 0; n < arm_condexec_masklen; n++)
23065 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23066 buff[n] = 0;
23067 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23068 arm_condition_codes[arm_current_cc]);
23069 arm_condexec_mask = 0;
23073 /* Returns true if REGNO is a valid register
23074 for holding a quantity of type MODE. */
23076 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23078 if (GET_MODE_CLASS (mode) == MODE_CC)
23079 return (regno == CC_REGNUM
23080 || (TARGET_HARD_FLOAT && TARGET_VFP
23081 && regno == VFPCC_REGNUM));
23083 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23084 return false;
23086 if (TARGET_THUMB1)
23087 /* For the Thumb we only allow values bigger than SImode in
23088 registers 0 - 6, so that there is always a second low
23089 register available to hold the upper part of the value.
23090 We probably we ought to ensure that the register is the
23091 start of an even numbered register pair. */
23092 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23094 if (TARGET_HARD_FLOAT && TARGET_VFP
23095 && IS_VFP_REGNUM (regno))
23097 if (mode == SFmode || mode == SImode)
23098 return VFP_REGNO_OK_FOR_SINGLE (regno);
23100 if (mode == DFmode)
23101 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23103 /* VFP registers can hold HFmode values, but there is no point in
23104 putting them there unless we have hardware conversion insns. */
23105 if (mode == HFmode)
23106 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23108 if (TARGET_NEON)
23109 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23110 || (VALID_NEON_QREG_MODE (mode)
23111 && NEON_REGNO_OK_FOR_QUAD (regno))
23112 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23113 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23114 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23115 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23116 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23118 return FALSE;
23121 if (TARGET_REALLY_IWMMXT)
23123 if (IS_IWMMXT_GR_REGNUM (regno))
23124 return mode == SImode;
23126 if (IS_IWMMXT_REGNUM (regno))
23127 return VALID_IWMMXT_REG_MODE (mode);
23130 /* We allow almost any value to be stored in the general registers.
23131 Restrict doubleword quantities to even register pairs in ARM state
23132 so that we can use ldrd. Do not allow very large Neon structure
23133 opaque modes in general registers; they would use too many. */
23134 if (regno <= LAST_ARM_REGNUM)
23136 if (ARM_NUM_REGS (mode) > 4)
23137 return FALSE;
23139 if (TARGET_THUMB2)
23140 return TRUE;
23142 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23145 if (regno == FRAME_POINTER_REGNUM
23146 || regno == ARG_POINTER_REGNUM)
23147 /* We only allow integers in the fake hard registers. */
23148 return GET_MODE_CLASS (mode) == MODE_INT;
23150 return FALSE;
23153 /* Implement MODES_TIEABLE_P. */
23155 bool
23156 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23158 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23159 return true;
23161 /* We specifically want to allow elements of "structure" modes to
23162 be tieable to the structure. This more general condition allows
23163 other rarer situations too. */
23164 if (TARGET_NEON
23165 && (VALID_NEON_DREG_MODE (mode1)
23166 || VALID_NEON_QREG_MODE (mode1)
23167 || VALID_NEON_STRUCT_MODE (mode1))
23168 && (VALID_NEON_DREG_MODE (mode2)
23169 || VALID_NEON_QREG_MODE (mode2)
23170 || VALID_NEON_STRUCT_MODE (mode2)))
23171 return true;
23173 return false;
23176 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23177 not used in arm mode. */
23179 enum reg_class
23180 arm_regno_class (int regno)
23182 if (regno == PC_REGNUM)
23183 return NO_REGS;
23185 if (TARGET_THUMB1)
23187 if (regno == STACK_POINTER_REGNUM)
23188 return STACK_REG;
23189 if (regno == CC_REGNUM)
23190 return CC_REG;
23191 if (regno < 8)
23192 return LO_REGS;
23193 return HI_REGS;
23196 if (TARGET_THUMB2 && regno < 8)
23197 return LO_REGS;
23199 if ( regno <= LAST_ARM_REGNUM
23200 || regno == FRAME_POINTER_REGNUM
23201 || regno == ARG_POINTER_REGNUM)
23202 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23204 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23205 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23207 if (IS_VFP_REGNUM (regno))
23209 if (regno <= D7_VFP_REGNUM)
23210 return VFP_D0_D7_REGS;
23211 else if (regno <= LAST_LO_VFP_REGNUM)
23212 return VFP_LO_REGS;
23213 else
23214 return VFP_HI_REGS;
23217 if (IS_IWMMXT_REGNUM (regno))
23218 return IWMMXT_REGS;
23220 if (IS_IWMMXT_GR_REGNUM (regno))
23221 return IWMMXT_GR_REGS;
23223 return NO_REGS;
23226 /* Handle a special case when computing the offset
23227 of an argument from the frame pointer. */
23229 arm_debugger_arg_offset (int value, rtx addr)
23231 rtx_insn *insn;
23233 /* We are only interested if dbxout_parms() failed to compute the offset. */
23234 if (value != 0)
23235 return 0;
23237 /* We can only cope with the case where the address is held in a register. */
23238 if (!REG_P (addr))
23239 return 0;
23241 /* If we are using the frame pointer to point at the argument, then
23242 an offset of 0 is correct. */
23243 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23244 return 0;
23246 /* If we are using the stack pointer to point at the
23247 argument, then an offset of 0 is correct. */
23248 /* ??? Check this is consistent with thumb2 frame layout. */
23249 if ((TARGET_THUMB || !frame_pointer_needed)
23250 && REGNO (addr) == SP_REGNUM)
23251 return 0;
23253 /* Oh dear. The argument is pointed to by a register rather
23254 than being held in a register, or being stored at a known
23255 offset from the frame pointer. Since GDB only understands
23256 those two kinds of argument we must translate the address
23257 held in the register into an offset from the frame pointer.
23258 We do this by searching through the insns for the function
23259 looking to see where this register gets its value. If the
23260 register is initialized from the frame pointer plus an offset
23261 then we are in luck and we can continue, otherwise we give up.
23263 This code is exercised by producing debugging information
23264 for a function with arguments like this:
23266 double func (double a, double b, int c, double d) {return d;}
23268 Without this code the stab for parameter 'd' will be set to
23269 an offset of 0 from the frame pointer, rather than 8. */
23271 /* The if() statement says:
23273 If the insn is a normal instruction
23274 and if the insn is setting the value in a register
23275 and if the register being set is the register holding the address of the argument
23276 and if the address is computing by an addition
23277 that involves adding to a register
23278 which is the frame pointer
23279 a constant integer
23281 then... */
23283 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23285 if ( NONJUMP_INSN_P (insn)
23286 && GET_CODE (PATTERN (insn)) == SET
23287 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23288 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23289 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23290 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23291 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23294 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23296 break;
23300 if (value == 0)
23302 debug_rtx (addr);
23303 warning (0, "unable to compute real location of stacked parameter");
23304 value = 8; /* XXX magic hack */
23307 return value;
23310 typedef enum {
23311 T_V8QI,
23312 T_V4HI,
23313 T_V4HF,
23314 T_V2SI,
23315 T_V2SF,
23316 T_DI,
23317 T_V16QI,
23318 T_V8HI,
23319 T_V4SI,
23320 T_V4SF,
23321 T_V2DI,
23322 T_TI,
23323 T_EI,
23324 T_OI,
23325 T_MAX /* Size of enum. Keep last. */
23326 } neon_builtin_type_mode;
23328 #define TYPE_MODE_BIT(X) (1 << (X))
23330 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23331 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23332 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23333 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23334 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23335 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23337 #define v8qi_UP T_V8QI
23338 #define v4hi_UP T_V4HI
23339 #define v4hf_UP T_V4HF
23340 #define v2si_UP T_V2SI
23341 #define v2sf_UP T_V2SF
23342 #define di_UP T_DI
23343 #define v16qi_UP T_V16QI
23344 #define v8hi_UP T_V8HI
23345 #define v4si_UP T_V4SI
23346 #define v4sf_UP T_V4SF
23347 #define v2di_UP T_V2DI
23348 #define ti_UP T_TI
23349 #define ei_UP T_EI
23350 #define oi_UP T_OI
23352 #define UP(X) X##_UP
23354 typedef enum {
23355 NEON_BINOP,
23356 NEON_TERNOP,
23357 NEON_UNOP,
23358 NEON_BSWAP,
23359 NEON_GETLANE,
23360 NEON_SETLANE,
23361 NEON_CREATE,
23362 NEON_RINT,
23363 NEON_COPYSIGNF,
23364 NEON_DUP,
23365 NEON_DUPLANE,
23366 NEON_COMBINE,
23367 NEON_SPLIT,
23368 NEON_LANEMUL,
23369 NEON_LANEMULL,
23370 NEON_LANEMULH,
23371 NEON_LANEMAC,
23372 NEON_SCALARMUL,
23373 NEON_SCALARMULL,
23374 NEON_SCALARMULH,
23375 NEON_SCALARMAC,
23376 NEON_CONVERT,
23377 NEON_FLOAT_WIDEN,
23378 NEON_FLOAT_NARROW,
23379 NEON_FIXCONV,
23380 NEON_SELECT,
23381 NEON_REINTERP,
23382 NEON_VTBL,
23383 NEON_VTBX,
23384 NEON_LOAD1,
23385 NEON_LOAD1LANE,
23386 NEON_STORE1,
23387 NEON_STORE1LANE,
23388 NEON_LOADSTRUCT,
23389 NEON_LOADSTRUCTLANE,
23390 NEON_STORESTRUCT,
23391 NEON_STORESTRUCTLANE,
23392 NEON_LOGICBINOP,
23393 NEON_SHIFTINSERT,
23394 NEON_SHIFTIMM,
23395 NEON_SHIFTACC
23396 } neon_itype;
23398 typedef struct {
23399 const char *name;
23400 const neon_itype itype;
23401 const neon_builtin_type_mode mode;
23402 const enum insn_code code;
23403 unsigned int fcode;
23404 } neon_builtin_datum;
23406 #define CF(N,X) CODE_FOR_neon_##N##X
23408 #define VAR1(T, N, A) \
23409 {#N, NEON_##T, UP (A), CF (N, A), 0}
23410 #define VAR2(T, N, A, B) \
23411 VAR1 (T, N, A), \
23412 {#N, NEON_##T, UP (B), CF (N, B), 0}
23413 #define VAR3(T, N, A, B, C) \
23414 VAR2 (T, N, A, B), \
23415 {#N, NEON_##T, UP (C), CF (N, C), 0}
23416 #define VAR4(T, N, A, B, C, D) \
23417 VAR3 (T, N, A, B, C), \
23418 {#N, NEON_##T, UP (D), CF (N, D), 0}
23419 #define VAR5(T, N, A, B, C, D, E) \
23420 VAR4 (T, N, A, B, C, D), \
23421 {#N, NEON_##T, UP (E), CF (N, E), 0}
23422 #define VAR6(T, N, A, B, C, D, E, F) \
23423 VAR5 (T, N, A, B, C, D, E), \
23424 {#N, NEON_##T, UP (F), CF (N, F), 0}
23425 #define VAR7(T, N, A, B, C, D, E, F, G) \
23426 VAR6 (T, N, A, B, C, D, E, F), \
23427 {#N, NEON_##T, UP (G), CF (N, G), 0}
23428 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23429 VAR7 (T, N, A, B, C, D, E, F, G), \
23430 {#N, NEON_##T, UP (H), CF (N, H), 0}
23431 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23432 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23433 {#N, NEON_##T, UP (I), CF (N, I), 0}
23434 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23435 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23436 {#N, NEON_##T, UP (J), CF (N, J), 0}
23438 /* The NEON builtin data can be found in arm_neon_builtins.def.
23439 The mode entries in the following table correspond to the "key" type of the
23440 instruction variant, i.e. equivalent to that which would be specified after
23441 the assembler mnemonic, which usually refers to the last vector operand.
23442 (Signed/unsigned/polynomial types are not differentiated between though, and
23443 are all mapped onto the same mode for a given element size.) The modes
23444 listed per instruction should be the same as those defined for that
23445 instruction's pattern in neon.md. */
23447 static neon_builtin_datum neon_builtin_data[] =
23449 #include "arm_neon_builtins.def"
23452 #undef CF
23453 #undef VAR1
23454 #undef VAR2
23455 #undef VAR3
23456 #undef VAR4
23457 #undef VAR5
23458 #undef VAR6
23459 #undef VAR7
23460 #undef VAR8
23461 #undef VAR9
23462 #undef VAR10
23464 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23465 #define VAR1(T, N, A) \
23466 CF (N, A)
23467 #define VAR2(T, N, A, B) \
23468 VAR1 (T, N, A), \
23469 CF (N, B)
23470 #define VAR3(T, N, A, B, C) \
23471 VAR2 (T, N, A, B), \
23472 CF (N, C)
23473 #define VAR4(T, N, A, B, C, D) \
23474 VAR3 (T, N, A, B, C), \
23475 CF (N, D)
23476 #define VAR5(T, N, A, B, C, D, E) \
23477 VAR4 (T, N, A, B, C, D), \
23478 CF (N, E)
23479 #define VAR6(T, N, A, B, C, D, E, F) \
23480 VAR5 (T, N, A, B, C, D, E), \
23481 CF (N, F)
23482 #define VAR7(T, N, A, B, C, D, E, F, G) \
23483 VAR6 (T, N, A, B, C, D, E, F), \
23484 CF (N, G)
23485 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23486 VAR7 (T, N, A, B, C, D, E, F, G), \
23487 CF (N, H)
23488 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23489 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23490 CF (N, I)
23491 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23492 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23493 CF (N, J)
23494 enum arm_builtins
23496 ARM_BUILTIN_GETWCGR0,
23497 ARM_BUILTIN_GETWCGR1,
23498 ARM_BUILTIN_GETWCGR2,
23499 ARM_BUILTIN_GETWCGR3,
23501 ARM_BUILTIN_SETWCGR0,
23502 ARM_BUILTIN_SETWCGR1,
23503 ARM_BUILTIN_SETWCGR2,
23504 ARM_BUILTIN_SETWCGR3,
23506 ARM_BUILTIN_WZERO,
23508 ARM_BUILTIN_WAVG2BR,
23509 ARM_BUILTIN_WAVG2HR,
23510 ARM_BUILTIN_WAVG2B,
23511 ARM_BUILTIN_WAVG2H,
23513 ARM_BUILTIN_WACCB,
23514 ARM_BUILTIN_WACCH,
23515 ARM_BUILTIN_WACCW,
23517 ARM_BUILTIN_WMACS,
23518 ARM_BUILTIN_WMACSZ,
23519 ARM_BUILTIN_WMACU,
23520 ARM_BUILTIN_WMACUZ,
23522 ARM_BUILTIN_WSADB,
23523 ARM_BUILTIN_WSADBZ,
23524 ARM_BUILTIN_WSADH,
23525 ARM_BUILTIN_WSADHZ,
23527 ARM_BUILTIN_WALIGNI,
23528 ARM_BUILTIN_WALIGNR0,
23529 ARM_BUILTIN_WALIGNR1,
23530 ARM_BUILTIN_WALIGNR2,
23531 ARM_BUILTIN_WALIGNR3,
23533 ARM_BUILTIN_TMIA,
23534 ARM_BUILTIN_TMIAPH,
23535 ARM_BUILTIN_TMIABB,
23536 ARM_BUILTIN_TMIABT,
23537 ARM_BUILTIN_TMIATB,
23538 ARM_BUILTIN_TMIATT,
23540 ARM_BUILTIN_TMOVMSKB,
23541 ARM_BUILTIN_TMOVMSKH,
23542 ARM_BUILTIN_TMOVMSKW,
23544 ARM_BUILTIN_TBCSTB,
23545 ARM_BUILTIN_TBCSTH,
23546 ARM_BUILTIN_TBCSTW,
23548 ARM_BUILTIN_WMADDS,
23549 ARM_BUILTIN_WMADDU,
23551 ARM_BUILTIN_WPACKHSS,
23552 ARM_BUILTIN_WPACKWSS,
23553 ARM_BUILTIN_WPACKDSS,
23554 ARM_BUILTIN_WPACKHUS,
23555 ARM_BUILTIN_WPACKWUS,
23556 ARM_BUILTIN_WPACKDUS,
23558 ARM_BUILTIN_WADDB,
23559 ARM_BUILTIN_WADDH,
23560 ARM_BUILTIN_WADDW,
23561 ARM_BUILTIN_WADDSSB,
23562 ARM_BUILTIN_WADDSSH,
23563 ARM_BUILTIN_WADDSSW,
23564 ARM_BUILTIN_WADDUSB,
23565 ARM_BUILTIN_WADDUSH,
23566 ARM_BUILTIN_WADDUSW,
23567 ARM_BUILTIN_WSUBB,
23568 ARM_BUILTIN_WSUBH,
23569 ARM_BUILTIN_WSUBW,
23570 ARM_BUILTIN_WSUBSSB,
23571 ARM_BUILTIN_WSUBSSH,
23572 ARM_BUILTIN_WSUBSSW,
23573 ARM_BUILTIN_WSUBUSB,
23574 ARM_BUILTIN_WSUBUSH,
23575 ARM_BUILTIN_WSUBUSW,
23577 ARM_BUILTIN_WAND,
23578 ARM_BUILTIN_WANDN,
23579 ARM_BUILTIN_WOR,
23580 ARM_BUILTIN_WXOR,
23582 ARM_BUILTIN_WCMPEQB,
23583 ARM_BUILTIN_WCMPEQH,
23584 ARM_BUILTIN_WCMPEQW,
23585 ARM_BUILTIN_WCMPGTUB,
23586 ARM_BUILTIN_WCMPGTUH,
23587 ARM_BUILTIN_WCMPGTUW,
23588 ARM_BUILTIN_WCMPGTSB,
23589 ARM_BUILTIN_WCMPGTSH,
23590 ARM_BUILTIN_WCMPGTSW,
23592 ARM_BUILTIN_TEXTRMSB,
23593 ARM_BUILTIN_TEXTRMSH,
23594 ARM_BUILTIN_TEXTRMSW,
23595 ARM_BUILTIN_TEXTRMUB,
23596 ARM_BUILTIN_TEXTRMUH,
23597 ARM_BUILTIN_TEXTRMUW,
23598 ARM_BUILTIN_TINSRB,
23599 ARM_BUILTIN_TINSRH,
23600 ARM_BUILTIN_TINSRW,
23602 ARM_BUILTIN_WMAXSW,
23603 ARM_BUILTIN_WMAXSH,
23604 ARM_BUILTIN_WMAXSB,
23605 ARM_BUILTIN_WMAXUW,
23606 ARM_BUILTIN_WMAXUH,
23607 ARM_BUILTIN_WMAXUB,
23608 ARM_BUILTIN_WMINSW,
23609 ARM_BUILTIN_WMINSH,
23610 ARM_BUILTIN_WMINSB,
23611 ARM_BUILTIN_WMINUW,
23612 ARM_BUILTIN_WMINUH,
23613 ARM_BUILTIN_WMINUB,
23615 ARM_BUILTIN_WMULUM,
23616 ARM_BUILTIN_WMULSM,
23617 ARM_BUILTIN_WMULUL,
23619 ARM_BUILTIN_PSADBH,
23620 ARM_BUILTIN_WSHUFH,
23622 ARM_BUILTIN_WSLLH,
23623 ARM_BUILTIN_WSLLW,
23624 ARM_BUILTIN_WSLLD,
23625 ARM_BUILTIN_WSRAH,
23626 ARM_BUILTIN_WSRAW,
23627 ARM_BUILTIN_WSRAD,
23628 ARM_BUILTIN_WSRLH,
23629 ARM_BUILTIN_WSRLW,
23630 ARM_BUILTIN_WSRLD,
23631 ARM_BUILTIN_WRORH,
23632 ARM_BUILTIN_WRORW,
23633 ARM_BUILTIN_WRORD,
23634 ARM_BUILTIN_WSLLHI,
23635 ARM_BUILTIN_WSLLWI,
23636 ARM_BUILTIN_WSLLDI,
23637 ARM_BUILTIN_WSRAHI,
23638 ARM_BUILTIN_WSRAWI,
23639 ARM_BUILTIN_WSRADI,
23640 ARM_BUILTIN_WSRLHI,
23641 ARM_BUILTIN_WSRLWI,
23642 ARM_BUILTIN_WSRLDI,
23643 ARM_BUILTIN_WRORHI,
23644 ARM_BUILTIN_WRORWI,
23645 ARM_BUILTIN_WRORDI,
23647 ARM_BUILTIN_WUNPCKIHB,
23648 ARM_BUILTIN_WUNPCKIHH,
23649 ARM_BUILTIN_WUNPCKIHW,
23650 ARM_BUILTIN_WUNPCKILB,
23651 ARM_BUILTIN_WUNPCKILH,
23652 ARM_BUILTIN_WUNPCKILW,
23654 ARM_BUILTIN_WUNPCKEHSB,
23655 ARM_BUILTIN_WUNPCKEHSH,
23656 ARM_BUILTIN_WUNPCKEHSW,
23657 ARM_BUILTIN_WUNPCKEHUB,
23658 ARM_BUILTIN_WUNPCKEHUH,
23659 ARM_BUILTIN_WUNPCKEHUW,
23660 ARM_BUILTIN_WUNPCKELSB,
23661 ARM_BUILTIN_WUNPCKELSH,
23662 ARM_BUILTIN_WUNPCKELSW,
23663 ARM_BUILTIN_WUNPCKELUB,
23664 ARM_BUILTIN_WUNPCKELUH,
23665 ARM_BUILTIN_WUNPCKELUW,
23667 ARM_BUILTIN_WABSB,
23668 ARM_BUILTIN_WABSH,
23669 ARM_BUILTIN_WABSW,
23671 ARM_BUILTIN_WADDSUBHX,
23672 ARM_BUILTIN_WSUBADDHX,
23674 ARM_BUILTIN_WABSDIFFB,
23675 ARM_BUILTIN_WABSDIFFH,
23676 ARM_BUILTIN_WABSDIFFW,
23678 ARM_BUILTIN_WADDCH,
23679 ARM_BUILTIN_WADDCW,
23681 ARM_BUILTIN_WAVG4,
23682 ARM_BUILTIN_WAVG4R,
23684 ARM_BUILTIN_WMADDSX,
23685 ARM_BUILTIN_WMADDUX,
23687 ARM_BUILTIN_WMADDSN,
23688 ARM_BUILTIN_WMADDUN,
23690 ARM_BUILTIN_WMULWSM,
23691 ARM_BUILTIN_WMULWUM,
23693 ARM_BUILTIN_WMULWSMR,
23694 ARM_BUILTIN_WMULWUMR,
23696 ARM_BUILTIN_WMULWL,
23698 ARM_BUILTIN_WMULSMR,
23699 ARM_BUILTIN_WMULUMR,
23701 ARM_BUILTIN_WQMULM,
23702 ARM_BUILTIN_WQMULMR,
23704 ARM_BUILTIN_WQMULWM,
23705 ARM_BUILTIN_WQMULWMR,
23707 ARM_BUILTIN_WADDBHUSM,
23708 ARM_BUILTIN_WADDBHUSL,
23710 ARM_BUILTIN_WQMIABB,
23711 ARM_BUILTIN_WQMIABT,
23712 ARM_BUILTIN_WQMIATB,
23713 ARM_BUILTIN_WQMIATT,
23715 ARM_BUILTIN_WQMIABBN,
23716 ARM_BUILTIN_WQMIABTN,
23717 ARM_BUILTIN_WQMIATBN,
23718 ARM_BUILTIN_WQMIATTN,
23720 ARM_BUILTIN_WMIABB,
23721 ARM_BUILTIN_WMIABT,
23722 ARM_BUILTIN_WMIATB,
23723 ARM_BUILTIN_WMIATT,
23725 ARM_BUILTIN_WMIABBN,
23726 ARM_BUILTIN_WMIABTN,
23727 ARM_BUILTIN_WMIATBN,
23728 ARM_BUILTIN_WMIATTN,
23730 ARM_BUILTIN_WMIAWBB,
23731 ARM_BUILTIN_WMIAWBT,
23732 ARM_BUILTIN_WMIAWTB,
23733 ARM_BUILTIN_WMIAWTT,
23735 ARM_BUILTIN_WMIAWBBN,
23736 ARM_BUILTIN_WMIAWBTN,
23737 ARM_BUILTIN_WMIAWTBN,
23738 ARM_BUILTIN_WMIAWTTN,
23740 ARM_BUILTIN_WMERGE,
23742 ARM_BUILTIN_CRC32B,
23743 ARM_BUILTIN_CRC32H,
23744 ARM_BUILTIN_CRC32W,
23745 ARM_BUILTIN_CRC32CB,
23746 ARM_BUILTIN_CRC32CH,
23747 ARM_BUILTIN_CRC32CW,
23749 ARM_BUILTIN_GET_FPSCR,
23750 ARM_BUILTIN_SET_FPSCR,
23752 #undef CRYPTO1
23753 #undef CRYPTO2
23754 #undef CRYPTO3
23756 #define CRYPTO1(L, U, M1, M2) \
23757 ARM_BUILTIN_CRYPTO_##U,
23758 #define CRYPTO2(L, U, M1, M2, M3) \
23759 ARM_BUILTIN_CRYPTO_##U,
23760 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23761 ARM_BUILTIN_CRYPTO_##U,
23763 #include "crypto.def"
23765 #undef CRYPTO1
23766 #undef CRYPTO2
23767 #undef CRYPTO3
23769 #include "arm_neon_builtins.def"
23771 ,ARM_BUILTIN_MAX
23774 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23776 #undef CF
23777 #undef VAR1
23778 #undef VAR2
23779 #undef VAR3
23780 #undef VAR4
23781 #undef VAR5
23782 #undef VAR6
23783 #undef VAR7
23784 #undef VAR8
23785 #undef VAR9
23786 #undef VAR10
23788 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23790 #define NUM_DREG_TYPES 5
23791 #define NUM_QREG_TYPES 6
23793 static void
23794 arm_init_neon_builtins (void)
23796 unsigned int i, fcode;
23797 tree decl;
23799 tree neon_intQI_type_node;
23800 tree neon_intHI_type_node;
23801 tree neon_floatHF_type_node;
23802 tree neon_polyQI_type_node;
23803 tree neon_polyHI_type_node;
23804 tree neon_intSI_type_node;
23805 tree neon_intDI_type_node;
23806 tree neon_intUTI_type_node;
23807 tree neon_float_type_node;
23809 tree intQI_pointer_node;
23810 tree intHI_pointer_node;
23811 tree intSI_pointer_node;
23812 tree intDI_pointer_node;
23813 tree float_pointer_node;
23815 tree const_intQI_node;
23816 tree const_intHI_node;
23817 tree const_intSI_node;
23818 tree const_intDI_node;
23819 tree const_float_node;
23821 tree const_intQI_pointer_node;
23822 tree const_intHI_pointer_node;
23823 tree const_intSI_pointer_node;
23824 tree const_intDI_pointer_node;
23825 tree const_float_pointer_node;
23827 tree V8QI_type_node;
23828 tree V4HI_type_node;
23829 tree V4UHI_type_node;
23830 tree V4HF_type_node;
23831 tree V2SI_type_node;
23832 tree V2USI_type_node;
23833 tree V2SF_type_node;
23834 tree V16QI_type_node;
23835 tree V8HI_type_node;
23836 tree V8UHI_type_node;
23837 tree V4SI_type_node;
23838 tree V4USI_type_node;
23839 tree V4SF_type_node;
23840 tree V2DI_type_node;
23841 tree V2UDI_type_node;
23843 tree intUQI_type_node;
23844 tree intUHI_type_node;
23845 tree intUSI_type_node;
23846 tree intUDI_type_node;
23848 tree intEI_type_node;
23849 tree intOI_type_node;
23850 tree intCI_type_node;
23851 tree intXI_type_node;
23853 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23854 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23855 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23857 /* Create distinguished type nodes for NEON vector element types,
23858 and pointers to values of such types, so we can detect them later. */
23859 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23860 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23861 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23862 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23863 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23864 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23865 neon_float_type_node = make_node (REAL_TYPE);
23866 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23867 layout_type (neon_float_type_node);
23868 neon_floatHF_type_node = make_node (REAL_TYPE);
23869 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23870 layout_type (neon_floatHF_type_node);
23872 /* Define typedefs which exactly correspond to the modes we are basing vector
23873 types on. If you change these names you'll need to change
23874 the table used by arm_mangle_type too. */
23875 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23876 "__builtin_neon_qi");
23877 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23878 "__builtin_neon_hi");
23879 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23880 "__builtin_neon_hf");
23881 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23882 "__builtin_neon_si");
23883 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23884 "__builtin_neon_sf");
23885 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23886 "__builtin_neon_di");
23887 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23888 "__builtin_neon_poly8");
23889 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23890 "__builtin_neon_poly16");
23892 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23893 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23894 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23895 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23896 float_pointer_node = build_pointer_type (neon_float_type_node);
23898 /* Next create constant-qualified versions of the above types. */
23899 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23900 TYPE_QUAL_CONST);
23901 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23902 TYPE_QUAL_CONST);
23903 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23904 TYPE_QUAL_CONST);
23905 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23906 TYPE_QUAL_CONST);
23907 const_float_node = build_qualified_type (neon_float_type_node,
23908 TYPE_QUAL_CONST);
23910 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23911 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23912 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23913 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23914 const_float_pointer_node = build_pointer_type (const_float_node);
23916 /* Unsigned integer types for various mode sizes. */
23917 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23918 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23919 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23920 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23921 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23922 /* Now create vector types based on our NEON element types. */
23923 /* 64-bit vectors. */
23924 V8QI_type_node =
23925 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23926 V4HI_type_node =
23927 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23928 V4UHI_type_node =
23929 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23930 V4HF_type_node =
23931 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23932 V2SI_type_node =
23933 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23934 V2USI_type_node =
23935 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23936 V2SF_type_node =
23937 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23938 /* 128-bit vectors. */
23939 V16QI_type_node =
23940 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23941 V8HI_type_node =
23942 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23943 V8UHI_type_node =
23944 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23945 V4SI_type_node =
23946 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23947 V4USI_type_node =
23948 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23949 V4SF_type_node =
23950 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23951 V2DI_type_node =
23952 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23953 V2UDI_type_node =
23954 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23957 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23958 "__builtin_neon_uqi");
23959 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23960 "__builtin_neon_uhi");
23961 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23962 "__builtin_neon_usi");
23963 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23964 "__builtin_neon_udi");
23965 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23966 "__builtin_neon_poly64");
23967 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23968 "__builtin_neon_poly128");
23970 /* Opaque integer types for structures of vectors. */
23971 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23972 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23973 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23974 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23976 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23977 "__builtin_neon_ti");
23978 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23979 "__builtin_neon_ei");
23980 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23981 "__builtin_neon_oi");
23982 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23983 "__builtin_neon_ci");
23984 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23985 "__builtin_neon_xi");
23987 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23990 tree V16UQI_type_node =
23991 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23993 tree v16uqi_ftype_v16uqi
23994 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23996 tree v16uqi_ftype_v16uqi_v16uqi
23997 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23998 V16UQI_type_node, NULL_TREE);
24000 tree v4usi_ftype_v4usi
24001 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
24003 tree v4usi_ftype_v4usi_v4usi
24004 = build_function_type_list (V4USI_type_node, V4USI_type_node,
24005 V4USI_type_node, NULL_TREE);
24007 tree v4usi_ftype_v4usi_v4usi_v4usi
24008 = build_function_type_list (V4USI_type_node, V4USI_type_node,
24009 V4USI_type_node, V4USI_type_node, NULL_TREE);
24011 tree uti_ftype_udi_udi
24012 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
24013 intUDI_type_node, NULL_TREE);
24015 #undef CRYPTO1
24016 #undef CRYPTO2
24017 #undef CRYPTO3
24018 #undef C
24019 #undef N
24020 #undef CF
24021 #undef FT1
24022 #undef FT2
24023 #undef FT3
24025 #define C(U) \
24026 ARM_BUILTIN_CRYPTO_##U
24027 #define N(L) \
24028 "__builtin_arm_crypto_"#L
24029 #define FT1(R, A) \
24030 R##_ftype_##A
24031 #define FT2(R, A1, A2) \
24032 R##_ftype_##A1##_##A2
24033 #define FT3(R, A1, A2, A3) \
24034 R##_ftype_##A1##_##A2##_##A3
24035 #define CRYPTO1(L, U, R, A) \
24036 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
24037 C (U), BUILT_IN_MD, \
24038 NULL, NULL_TREE);
24039 #define CRYPTO2(L, U, R, A1, A2) \
24040 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
24041 C (U), BUILT_IN_MD, \
24042 NULL, NULL_TREE);
24044 #define CRYPTO3(L, U, R, A1, A2, A3) \
24045 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
24046 C (U), BUILT_IN_MD, \
24047 NULL, NULL_TREE);
24048 #include "crypto.def"
24050 #undef CRYPTO1
24051 #undef CRYPTO2
24052 #undef CRYPTO3
24053 #undef C
24054 #undef N
24055 #undef FT1
24056 #undef FT2
24057 #undef FT3
24059 dreg_types[0] = V8QI_type_node;
24060 dreg_types[1] = V4HI_type_node;
24061 dreg_types[2] = V2SI_type_node;
24062 dreg_types[3] = V2SF_type_node;
24063 dreg_types[4] = neon_intDI_type_node;
24065 qreg_types[0] = V16QI_type_node;
24066 qreg_types[1] = V8HI_type_node;
24067 qreg_types[2] = V4SI_type_node;
24068 qreg_types[3] = V4SF_type_node;
24069 qreg_types[4] = V2DI_type_node;
24070 qreg_types[5] = neon_intUTI_type_node;
24072 for (i = 0; i < NUM_QREG_TYPES; i++)
24074 int j;
24075 for (j = 0; j < NUM_QREG_TYPES; j++)
24077 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24078 reinterp_ftype_dreg[i][j]
24079 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24081 reinterp_ftype_qreg[i][j]
24082 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24086 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24087 i < ARRAY_SIZE (neon_builtin_data);
24088 i++, fcode++)
24090 neon_builtin_datum *d = &neon_builtin_data[i];
24092 const char* const modenames[] = {
24093 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24094 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24095 "ti", "ei", "oi"
24097 char namebuf[60];
24098 tree ftype = NULL;
24099 int is_load = 0, is_store = 0;
24101 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24103 d->fcode = fcode;
24105 switch (d->itype)
24107 case NEON_LOAD1:
24108 case NEON_LOAD1LANE:
24109 case NEON_LOADSTRUCT:
24110 case NEON_LOADSTRUCTLANE:
24111 is_load = 1;
24112 /* Fall through. */
24113 case NEON_STORE1:
24114 case NEON_STORE1LANE:
24115 case NEON_STORESTRUCT:
24116 case NEON_STORESTRUCTLANE:
24117 if (!is_load)
24118 is_store = 1;
24119 /* Fall through. */
24120 case NEON_UNOP:
24121 case NEON_RINT:
24122 case NEON_BINOP:
24123 case NEON_LOGICBINOP:
24124 case NEON_SHIFTINSERT:
24125 case NEON_TERNOP:
24126 case NEON_GETLANE:
24127 case NEON_SETLANE:
24128 case NEON_CREATE:
24129 case NEON_DUP:
24130 case NEON_DUPLANE:
24131 case NEON_SHIFTIMM:
24132 case NEON_SHIFTACC:
24133 case NEON_COMBINE:
24134 case NEON_SPLIT:
24135 case NEON_CONVERT:
24136 case NEON_FIXCONV:
24137 case NEON_LANEMUL:
24138 case NEON_LANEMULL:
24139 case NEON_LANEMULH:
24140 case NEON_LANEMAC:
24141 case NEON_SCALARMUL:
24142 case NEON_SCALARMULL:
24143 case NEON_SCALARMULH:
24144 case NEON_SCALARMAC:
24145 case NEON_SELECT:
24146 case NEON_VTBL:
24147 case NEON_VTBX:
24149 int k;
24150 tree return_type = void_type_node, args = void_list_node;
24152 /* Build a function type directly from the insn_data for
24153 this builtin. The build_function_type() function takes
24154 care of removing duplicates for us. */
24155 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24157 tree eltype;
24159 if (is_load && k == 1)
24161 /* Neon load patterns always have the memory
24162 operand in the operand 1 position. */
24163 gcc_assert (insn_data[d->code].operand[k].predicate
24164 == neon_struct_operand);
24166 switch (d->mode)
24168 case T_V8QI:
24169 case T_V16QI:
24170 eltype = const_intQI_pointer_node;
24171 break;
24173 case T_V4HI:
24174 case T_V8HI:
24175 eltype = const_intHI_pointer_node;
24176 break;
24178 case T_V2SI:
24179 case T_V4SI:
24180 eltype = const_intSI_pointer_node;
24181 break;
24183 case T_V2SF:
24184 case T_V4SF:
24185 eltype = const_float_pointer_node;
24186 break;
24188 case T_DI:
24189 case T_V2DI:
24190 eltype = const_intDI_pointer_node;
24191 break;
24193 default: gcc_unreachable ();
24196 else if (is_store && k == 0)
24198 /* Similarly, Neon store patterns use operand 0 as
24199 the memory location to store to. */
24200 gcc_assert (insn_data[d->code].operand[k].predicate
24201 == neon_struct_operand);
24203 switch (d->mode)
24205 case T_V8QI:
24206 case T_V16QI:
24207 eltype = intQI_pointer_node;
24208 break;
24210 case T_V4HI:
24211 case T_V8HI:
24212 eltype = intHI_pointer_node;
24213 break;
24215 case T_V2SI:
24216 case T_V4SI:
24217 eltype = intSI_pointer_node;
24218 break;
24220 case T_V2SF:
24221 case T_V4SF:
24222 eltype = float_pointer_node;
24223 break;
24225 case T_DI:
24226 case T_V2DI:
24227 eltype = intDI_pointer_node;
24228 break;
24230 default: gcc_unreachable ();
24233 else
24235 switch (insn_data[d->code].operand[k].mode)
24237 case VOIDmode: eltype = void_type_node; break;
24238 /* Scalars. */
24239 case QImode: eltype = neon_intQI_type_node; break;
24240 case HImode: eltype = neon_intHI_type_node; break;
24241 case SImode: eltype = neon_intSI_type_node; break;
24242 case SFmode: eltype = neon_float_type_node; break;
24243 case DImode: eltype = neon_intDI_type_node; break;
24244 case TImode: eltype = intTI_type_node; break;
24245 case EImode: eltype = intEI_type_node; break;
24246 case OImode: eltype = intOI_type_node; break;
24247 case CImode: eltype = intCI_type_node; break;
24248 case XImode: eltype = intXI_type_node; break;
24249 /* 64-bit vectors. */
24250 case V8QImode: eltype = V8QI_type_node; break;
24251 case V4HImode: eltype = V4HI_type_node; break;
24252 case V2SImode: eltype = V2SI_type_node; break;
24253 case V2SFmode: eltype = V2SF_type_node; break;
24254 /* 128-bit vectors. */
24255 case V16QImode: eltype = V16QI_type_node; break;
24256 case V8HImode: eltype = V8HI_type_node; break;
24257 case V4SImode: eltype = V4SI_type_node; break;
24258 case V4SFmode: eltype = V4SF_type_node; break;
24259 case V2DImode: eltype = V2DI_type_node; break;
24260 default: gcc_unreachable ();
24264 if (k == 0 && !is_store)
24265 return_type = eltype;
24266 else
24267 args = tree_cons (NULL_TREE, eltype, args);
24270 ftype = build_function_type (return_type, args);
24272 break;
24274 case NEON_REINTERP:
24276 /* We iterate over NUM_DREG_TYPES doubleword types,
24277 then NUM_QREG_TYPES quadword types.
24278 V4HF is not a type used in reinterpret, so we translate
24279 d->mode to the correct index in reinterp_ftype_dreg. */
24280 bool qreg_p
24281 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24282 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24283 % NUM_QREG_TYPES;
24284 switch (insn_data[d->code].operand[0].mode)
24286 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24287 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24288 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24289 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24290 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24291 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24292 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24293 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24294 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24295 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24296 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24297 default: gcc_unreachable ();
24300 break;
24301 case NEON_FLOAT_WIDEN:
24303 tree eltype = NULL_TREE;
24304 tree return_type = NULL_TREE;
24306 switch (insn_data[d->code].operand[1].mode)
24308 case V4HFmode:
24309 eltype = V4HF_type_node;
24310 return_type = V4SF_type_node;
24311 break;
24312 default: gcc_unreachable ();
24314 ftype = build_function_type_list (return_type, eltype, NULL);
24315 break;
24317 case NEON_FLOAT_NARROW:
24319 tree eltype = NULL_TREE;
24320 tree return_type = NULL_TREE;
24322 switch (insn_data[d->code].operand[1].mode)
24324 case V4SFmode:
24325 eltype = V4SF_type_node;
24326 return_type = V4HF_type_node;
24327 break;
24328 default: gcc_unreachable ();
24330 ftype = build_function_type_list (return_type, eltype, NULL);
24331 break;
24333 case NEON_BSWAP:
24335 tree eltype = NULL_TREE;
24336 switch (insn_data[d->code].operand[1].mode)
24338 case V4HImode:
24339 eltype = V4UHI_type_node;
24340 break;
24341 case V8HImode:
24342 eltype = V8UHI_type_node;
24343 break;
24344 case V2SImode:
24345 eltype = V2USI_type_node;
24346 break;
24347 case V4SImode:
24348 eltype = V4USI_type_node;
24349 break;
24350 case V2DImode:
24351 eltype = V2UDI_type_node;
24352 break;
24353 default: gcc_unreachable ();
24355 ftype = build_function_type_list (eltype, eltype, NULL);
24356 break;
24358 case NEON_COPYSIGNF:
24360 tree eltype = NULL_TREE;
24361 switch (insn_data[d->code].operand[1].mode)
24363 case V2SFmode:
24364 eltype = V2SF_type_node;
24365 break;
24366 case V4SFmode:
24367 eltype = V4SF_type_node;
24368 break;
24369 default: gcc_unreachable ();
24371 ftype = build_function_type_list (eltype, eltype, NULL);
24372 break;
24374 default:
24375 gcc_unreachable ();
24378 gcc_assert (ftype != NULL);
24380 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24382 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24383 NULL_TREE);
24384 arm_builtin_decls[fcode] = decl;
24388 #undef NUM_DREG_TYPES
24389 #undef NUM_QREG_TYPES
24391 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24392 do \
24394 if ((MASK) & insn_flags) \
24396 tree bdecl; \
24397 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24398 BUILT_IN_MD, NULL, NULL_TREE); \
24399 arm_builtin_decls[CODE] = bdecl; \
24402 while (0)
24404 struct builtin_description
24406 const unsigned int mask;
24407 const enum insn_code icode;
24408 const char * const name;
24409 const enum arm_builtins code;
24410 const enum rtx_code comparison;
24411 const unsigned int flag;
24414 static const struct builtin_description bdesc_2arg[] =
24416 #define IWMMXT_BUILTIN(code, string, builtin) \
24417 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24418 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24420 #define IWMMXT2_BUILTIN(code, string, builtin) \
24421 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24422 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24424 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24425 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24426 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24427 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24428 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24429 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24430 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24431 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24432 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24433 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24434 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24435 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24436 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24437 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24438 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24439 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24440 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24441 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24442 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24443 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24444 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24445 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24446 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24447 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24448 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24449 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24450 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24451 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24452 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24453 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24454 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24455 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24456 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24457 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24458 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24459 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24460 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24461 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24462 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24463 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24464 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24465 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24466 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24467 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24468 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24469 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24470 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24471 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24472 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24473 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24474 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24475 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24476 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24477 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24478 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24479 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24480 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24481 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24482 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24483 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24484 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24485 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24486 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24487 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24488 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24489 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24490 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24491 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24492 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24493 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24494 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24495 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24496 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24497 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24498 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24499 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24500 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24501 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24503 #define IWMMXT_BUILTIN2(code, builtin) \
24504 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24506 #define IWMMXT2_BUILTIN2(code, builtin) \
24507 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24509 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24510 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24511 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24512 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24513 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24514 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24515 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24516 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24517 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24518 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24521 #define FP_BUILTIN(L, U) \
24522 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24523 UNKNOWN, 0},
24525 FP_BUILTIN (get_fpscr, GET_FPSCR)
24526 FP_BUILTIN (set_fpscr, SET_FPSCR)
24527 #undef FP_BUILTIN
24529 #define CRC32_BUILTIN(L, U) \
24530 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24531 UNKNOWN, 0},
24532 CRC32_BUILTIN (crc32b, CRC32B)
24533 CRC32_BUILTIN (crc32h, CRC32H)
24534 CRC32_BUILTIN (crc32w, CRC32W)
24535 CRC32_BUILTIN (crc32cb, CRC32CB)
24536 CRC32_BUILTIN (crc32ch, CRC32CH)
24537 CRC32_BUILTIN (crc32cw, CRC32CW)
24538 #undef CRC32_BUILTIN
24541 #define CRYPTO_BUILTIN(L, U) \
24542 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24543 UNKNOWN, 0},
24544 #undef CRYPTO1
24545 #undef CRYPTO2
24546 #undef CRYPTO3
24547 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24548 #define CRYPTO1(L, U, R, A)
24549 #define CRYPTO3(L, U, R, A1, A2, A3)
24550 #include "crypto.def"
24551 #undef CRYPTO1
24552 #undef CRYPTO2
24553 #undef CRYPTO3
24557 static const struct builtin_description bdesc_1arg[] =
24559 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24560 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24561 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24562 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24563 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24564 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24565 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24566 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24567 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24568 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24569 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24570 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24571 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24572 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24573 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24574 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24575 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24576 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24577 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24578 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24579 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24580 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24581 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24582 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24584 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24585 #define CRYPTO2(L, U, R, A1, A2)
24586 #define CRYPTO3(L, U, R, A1, A2, A3)
24587 #include "crypto.def"
24588 #undef CRYPTO1
24589 #undef CRYPTO2
24590 #undef CRYPTO3
24593 static const struct builtin_description bdesc_3arg[] =
24595 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24596 #define CRYPTO1(L, U, R, A)
24597 #define CRYPTO2(L, U, R, A1, A2)
24598 #include "crypto.def"
24599 #undef CRYPTO1
24600 #undef CRYPTO2
24601 #undef CRYPTO3
24603 #undef CRYPTO_BUILTIN
24605 /* Set up all the iWMMXt builtins. This is not called if
24606 TARGET_IWMMXT is zero. */
24608 static void
24609 arm_init_iwmmxt_builtins (void)
24611 const struct builtin_description * d;
24612 size_t i;
24614 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24615 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24616 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24618 tree v8qi_ftype_v8qi_v8qi_int
24619 = build_function_type_list (V8QI_type_node,
24620 V8QI_type_node, V8QI_type_node,
24621 integer_type_node, NULL_TREE);
24622 tree v4hi_ftype_v4hi_int
24623 = build_function_type_list (V4HI_type_node,
24624 V4HI_type_node, integer_type_node, NULL_TREE);
24625 tree v2si_ftype_v2si_int
24626 = build_function_type_list (V2SI_type_node,
24627 V2SI_type_node, integer_type_node, NULL_TREE);
24628 tree v2si_ftype_di_di
24629 = build_function_type_list (V2SI_type_node,
24630 long_long_integer_type_node,
24631 long_long_integer_type_node,
24632 NULL_TREE);
24633 tree di_ftype_di_int
24634 = build_function_type_list (long_long_integer_type_node,
24635 long_long_integer_type_node,
24636 integer_type_node, NULL_TREE);
24637 tree di_ftype_di_int_int
24638 = build_function_type_list (long_long_integer_type_node,
24639 long_long_integer_type_node,
24640 integer_type_node,
24641 integer_type_node, NULL_TREE);
24642 tree int_ftype_v8qi
24643 = build_function_type_list (integer_type_node,
24644 V8QI_type_node, NULL_TREE);
24645 tree int_ftype_v4hi
24646 = build_function_type_list (integer_type_node,
24647 V4HI_type_node, NULL_TREE);
24648 tree int_ftype_v2si
24649 = build_function_type_list (integer_type_node,
24650 V2SI_type_node, NULL_TREE);
24651 tree int_ftype_v8qi_int
24652 = build_function_type_list (integer_type_node,
24653 V8QI_type_node, integer_type_node, NULL_TREE);
24654 tree int_ftype_v4hi_int
24655 = build_function_type_list (integer_type_node,
24656 V4HI_type_node, integer_type_node, NULL_TREE);
24657 tree int_ftype_v2si_int
24658 = build_function_type_list (integer_type_node,
24659 V2SI_type_node, integer_type_node, NULL_TREE);
24660 tree v8qi_ftype_v8qi_int_int
24661 = build_function_type_list (V8QI_type_node,
24662 V8QI_type_node, integer_type_node,
24663 integer_type_node, NULL_TREE);
24664 tree v4hi_ftype_v4hi_int_int
24665 = build_function_type_list (V4HI_type_node,
24666 V4HI_type_node, integer_type_node,
24667 integer_type_node, NULL_TREE);
24668 tree v2si_ftype_v2si_int_int
24669 = build_function_type_list (V2SI_type_node,
24670 V2SI_type_node, integer_type_node,
24671 integer_type_node, NULL_TREE);
24672 /* Miscellaneous. */
24673 tree v8qi_ftype_v4hi_v4hi
24674 = build_function_type_list (V8QI_type_node,
24675 V4HI_type_node, V4HI_type_node, NULL_TREE);
24676 tree v4hi_ftype_v2si_v2si
24677 = build_function_type_list (V4HI_type_node,
24678 V2SI_type_node, V2SI_type_node, NULL_TREE);
24679 tree v8qi_ftype_v4hi_v8qi
24680 = build_function_type_list (V8QI_type_node,
24681 V4HI_type_node, V8QI_type_node, NULL_TREE);
24682 tree v2si_ftype_v4hi_v4hi
24683 = build_function_type_list (V2SI_type_node,
24684 V4HI_type_node, V4HI_type_node, NULL_TREE);
24685 tree v2si_ftype_v8qi_v8qi
24686 = build_function_type_list (V2SI_type_node,
24687 V8QI_type_node, V8QI_type_node, NULL_TREE);
24688 tree v4hi_ftype_v4hi_di
24689 = build_function_type_list (V4HI_type_node,
24690 V4HI_type_node, long_long_integer_type_node,
24691 NULL_TREE);
24692 tree v2si_ftype_v2si_di
24693 = build_function_type_list (V2SI_type_node,
24694 V2SI_type_node, long_long_integer_type_node,
24695 NULL_TREE);
24696 tree di_ftype_void
24697 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24698 tree int_ftype_void
24699 = build_function_type_list (integer_type_node, NULL_TREE);
24700 tree di_ftype_v8qi
24701 = build_function_type_list (long_long_integer_type_node,
24702 V8QI_type_node, NULL_TREE);
24703 tree di_ftype_v4hi
24704 = build_function_type_list (long_long_integer_type_node,
24705 V4HI_type_node, NULL_TREE);
24706 tree di_ftype_v2si
24707 = build_function_type_list (long_long_integer_type_node,
24708 V2SI_type_node, NULL_TREE);
24709 tree v2si_ftype_v4hi
24710 = build_function_type_list (V2SI_type_node,
24711 V4HI_type_node, NULL_TREE);
24712 tree v4hi_ftype_v8qi
24713 = build_function_type_list (V4HI_type_node,
24714 V8QI_type_node, NULL_TREE);
24715 tree v8qi_ftype_v8qi
24716 = build_function_type_list (V8QI_type_node,
24717 V8QI_type_node, NULL_TREE);
24718 tree v4hi_ftype_v4hi
24719 = build_function_type_list (V4HI_type_node,
24720 V4HI_type_node, NULL_TREE);
24721 tree v2si_ftype_v2si
24722 = build_function_type_list (V2SI_type_node,
24723 V2SI_type_node, NULL_TREE);
24725 tree di_ftype_di_v4hi_v4hi
24726 = build_function_type_list (long_long_unsigned_type_node,
24727 long_long_unsigned_type_node,
24728 V4HI_type_node, V4HI_type_node,
24729 NULL_TREE);
24731 tree di_ftype_v4hi_v4hi
24732 = build_function_type_list (long_long_unsigned_type_node,
24733 V4HI_type_node,V4HI_type_node,
24734 NULL_TREE);
24736 tree v2si_ftype_v2si_v4hi_v4hi
24737 = build_function_type_list (V2SI_type_node,
24738 V2SI_type_node, V4HI_type_node,
24739 V4HI_type_node, NULL_TREE);
24741 tree v2si_ftype_v2si_v8qi_v8qi
24742 = build_function_type_list (V2SI_type_node,
24743 V2SI_type_node, V8QI_type_node,
24744 V8QI_type_node, NULL_TREE);
24746 tree di_ftype_di_v2si_v2si
24747 = build_function_type_list (long_long_unsigned_type_node,
24748 long_long_unsigned_type_node,
24749 V2SI_type_node, V2SI_type_node,
24750 NULL_TREE);
24752 tree di_ftype_di_di_int
24753 = build_function_type_list (long_long_unsigned_type_node,
24754 long_long_unsigned_type_node,
24755 long_long_unsigned_type_node,
24756 integer_type_node, NULL_TREE);
24758 tree void_ftype_int
24759 = build_function_type_list (void_type_node,
24760 integer_type_node, NULL_TREE);
24762 tree v8qi_ftype_char
24763 = build_function_type_list (V8QI_type_node,
24764 signed_char_type_node, NULL_TREE);
24766 tree v4hi_ftype_short
24767 = build_function_type_list (V4HI_type_node,
24768 short_integer_type_node, NULL_TREE);
24770 tree v2si_ftype_int
24771 = build_function_type_list (V2SI_type_node,
24772 integer_type_node, NULL_TREE);
24774 /* Normal vector binops. */
24775 tree v8qi_ftype_v8qi_v8qi
24776 = build_function_type_list (V8QI_type_node,
24777 V8QI_type_node, V8QI_type_node, NULL_TREE);
24778 tree v4hi_ftype_v4hi_v4hi
24779 = build_function_type_list (V4HI_type_node,
24780 V4HI_type_node,V4HI_type_node, NULL_TREE);
24781 tree v2si_ftype_v2si_v2si
24782 = build_function_type_list (V2SI_type_node,
24783 V2SI_type_node, V2SI_type_node, NULL_TREE);
24784 tree di_ftype_di_di
24785 = build_function_type_list (long_long_unsigned_type_node,
24786 long_long_unsigned_type_node,
24787 long_long_unsigned_type_node,
24788 NULL_TREE);
24790 /* Add all builtins that are more or less simple operations on two
24791 operands. */
24792 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24794 /* Use one of the operands; the target can have a different mode for
24795 mask-generating compares. */
24796 machine_mode mode;
24797 tree type;
24799 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24800 continue;
24802 mode = insn_data[d->icode].operand[1].mode;
24804 switch (mode)
24806 case V8QImode:
24807 type = v8qi_ftype_v8qi_v8qi;
24808 break;
24809 case V4HImode:
24810 type = v4hi_ftype_v4hi_v4hi;
24811 break;
24812 case V2SImode:
24813 type = v2si_ftype_v2si_v2si;
24814 break;
24815 case DImode:
24816 type = di_ftype_di_di;
24817 break;
24819 default:
24820 gcc_unreachable ();
24823 def_mbuiltin (d->mask, d->name, type, d->code);
24826 /* Add the remaining MMX insns with somewhat more complicated types. */
24827 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24828 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24829 ARM_BUILTIN_ ## CODE)
24831 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24832 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24833 ARM_BUILTIN_ ## CODE)
24835 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24836 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24837 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24838 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24839 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24840 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24841 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24842 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24843 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24845 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24846 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24847 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24848 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24849 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24850 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24852 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24853 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24854 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24855 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24856 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24857 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24859 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24860 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24861 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24862 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24863 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24864 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24866 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24867 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24868 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24869 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24870 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24871 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24873 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24875 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24876 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24877 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24878 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24879 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24880 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24881 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24882 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24883 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24884 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24886 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24887 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24888 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24889 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24890 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24891 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24892 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24893 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24894 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24896 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24897 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24898 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24900 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24901 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24902 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24904 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24905 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24907 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24908 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24909 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24910 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24911 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24912 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24914 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24915 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24916 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24917 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24918 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24919 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24920 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24921 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24922 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24923 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24924 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24925 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24927 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24928 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24929 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24930 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24932 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24933 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24934 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24935 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24936 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24937 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24938 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24940 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24941 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24942 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24944 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24945 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24946 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24947 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24949 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24950 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24951 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24952 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24954 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24955 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24956 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24957 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24959 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24960 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24961 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24962 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24964 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24965 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24966 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24967 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24969 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24970 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24971 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24972 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24974 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24976 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24977 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24978 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24980 #undef iwmmx_mbuiltin
24981 #undef iwmmx2_mbuiltin
24984 static void
24985 arm_init_fp16_builtins (void)
24987 tree fp16_type = make_node (REAL_TYPE);
24988 TYPE_PRECISION (fp16_type) = 16;
24989 layout_type (fp16_type);
24990 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24993 static void
24994 arm_init_crc32_builtins ()
24996 tree si_ftype_si_qi
24997 = build_function_type_list (unsigned_intSI_type_node,
24998 unsigned_intSI_type_node,
24999 unsigned_intQI_type_node, NULL_TREE);
25000 tree si_ftype_si_hi
25001 = build_function_type_list (unsigned_intSI_type_node,
25002 unsigned_intSI_type_node,
25003 unsigned_intHI_type_node, NULL_TREE);
25004 tree si_ftype_si_si
25005 = build_function_type_list (unsigned_intSI_type_node,
25006 unsigned_intSI_type_node,
25007 unsigned_intSI_type_node, NULL_TREE);
25009 arm_builtin_decls[ARM_BUILTIN_CRC32B]
25010 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
25011 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
25012 arm_builtin_decls[ARM_BUILTIN_CRC32H]
25013 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
25014 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
25015 arm_builtin_decls[ARM_BUILTIN_CRC32W]
25016 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
25017 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
25018 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
25019 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
25020 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
25021 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
25022 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
25023 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
25024 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
25025 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
25026 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
25029 static void
25030 arm_init_builtins (void)
25032 if (TARGET_REALLY_IWMMXT)
25033 arm_init_iwmmxt_builtins ();
25035 if (TARGET_NEON)
25036 arm_init_neon_builtins ();
25038 if (arm_fp16_format)
25039 arm_init_fp16_builtins ();
25041 if (TARGET_CRC32)
25042 arm_init_crc32_builtins ();
25044 if (TARGET_VFP && TARGET_HARD_FLOAT)
25046 tree ftype_set_fpscr
25047 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
25048 tree ftype_get_fpscr
25049 = build_function_type_list (unsigned_type_node, NULL);
25051 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
25052 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
25053 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25054 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
25055 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
25056 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25060 /* Return the ARM builtin for CODE. */
25062 static tree
25063 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25065 if (code >= ARM_BUILTIN_MAX)
25066 return error_mark_node;
25068 return arm_builtin_decls[code];
25071 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25073 static const char *
25074 arm_invalid_parameter_type (const_tree t)
25076 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25077 return N_("function parameters cannot have __fp16 type");
25078 return NULL;
25081 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25083 static const char *
25084 arm_invalid_return_type (const_tree t)
25086 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25087 return N_("functions cannot return __fp16 type");
25088 return NULL;
25091 /* Implement TARGET_PROMOTED_TYPE. */
25093 static tree
25094 arm_promoted_type (const_tree t)
25096 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25097 return float_type_node;
25098 return NULL_TREE;
25101 /* Implement TARGET_CONVERT_TO_TYPE.
25102 Specifically, this hook implements the peculiarity of the ARM
25103 half-precision floating-point C semantics that requires conversions between
25104 __fp16 to or from double to do an intermediate conversion to float. */
25106 static tree
25107 arm_convert_to_type (tree type, tree expr)
25109 tree fromtype = TREE_TYPE (expr);
25110 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25111 return NULL_TREE;
25112 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25113 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25114 return convert (type, convert (float_type_node, expr));
25115 return NULL_TREE;
25118 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25119 This simply adds HFmode as a supported mode; even though we don't
25120 implement arithmetic on this type directly, it's supported by
25121 optabs conversions, much the way the double-word arithmetic is
25122 special-cased in the default hook. */
25124 static bool
25125 arm_scalar_mode_supported_p (machine_mode mode)
25127 if (mode == HFmode)
25128 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25129 else if (ALL_FIXED_POINT_MODE_P (mode))
25130 return true;
25131 else
25132 return default_scalar_mode_supported_p (mode);
25135 /* Errors in the source file can cause expand_expr to return const0_rtx
25136 where we expect a vector. To avoid crashing, use one of the vector
25137 clear instructions. */
25139 static rtx
25140 safe_vector_operand (rtx x, machine_mode mode)
25142 if (x != const0_rtx)
25143 return x;
25144 x = gen_reg_rtx (mode);
25146 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25147 : gen_rtx_SUBREG (DImode, x, 0)));
25148 return x;
25151 /* Function to expand ternary builtins. */
25152 static rtx
25153 arm_expand_ternop_builtin (enum insn_code icode,
25154 tree exp, rtx target)
25156 rtx pat;
25157 tree arg0 = CALL_EXPR_ARG (exp, 0);
25158 tree arg1 = CALL_EXPR_ARG (exp, 1);
25159 tree arg2 = CALL_EXPR_ARG (exp, 2);
25161 rtx op0 = expand_normal (arg0);
25162 rtx op1 = expand_normal (arg1);
25163 rtx op2 = expand_normal (arg2);
25164 rtx op3 = NULL_RTX;
25166 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25167 lane operand depending on endianness. */
25168 bool builtin_sha1cpm_p = false;
25170 if (insn_data[icode].n_operands == 5)
25172 gcc_assert (icode == CODE_FOR_crypto_sha1c
25173 || icode == CODE_FOR_crypto_sha1p
25174 || icode == CODE_FOR_crypto_sha1m);
25175 builtin_sha1cpm_p = true;
25177 machine_mode tmode = insn_data[icode].operand[0].mode;
25178 machine_mode mode0 = insn_data[icode].operand[1].mode;
25179 machine_mode mode1 = insn_data[icode].operand[2].mode;
25180 machine_mode mode2 = insn_data[icode].operand[3].mode;
25183 if (VECTOR_MODE_P (mode0))
25184 op0 = safe_vector_operand (op0, mode0);
25185 if (VECTOR_MODE_P (mode1))
25186 op1 = safe_vector_operand (op1, mode1);
25187 if (VECTOR_MODE_P (mode2))
25188 op2 = safe_vector_operand (op2, mode2);
25190 if (! target
25191 || GET_MODE (target) != tmode
25192 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25193 target = gen_reg_rtx (tmode);
25195 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25196 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25197 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25199 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25200 op0 = copy_to_mode_reg (mode0, op0);
25201 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25202 op1 = copy_to_mode_reg (mode1, op1);
25203 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25204 op2 = copy_to_mode_reg (mode2, op2);
25205 if (builtin_sha1cpm_p)
25206 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25208 if (builtin_sha1cpm_p)
25209 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25210 else
25211 pat = GEN_FCN (icode) (target, op0, op1, op2);
25212 if (! pat)
25213 return 0;
25214 emit_insn (pat);
25215 return target;
25218 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25220 static rtx
25221 arm_expand_binop_builtin (enum insn_code icode,
25222 tree exp, rtx target)
25224 rtx pat;
25225 tree arg0 = CALL_EXPR_ARG (exp, 0);
25226 tree arg1 = CALL_EXPR_ARG (exp, 1);
25227 rtx op0 = expand_normal (arg0);
25228 rtx op1 = expand_normal (arg1);
25229 machine_mode tmode = insn_data[icode].operand[0].mode;
25230 machine_mode mode0 = insn_data[icode].operand[1].mode;
25231 machine_mode mode1 = insn_data[icode].operand[2].mode;
25233 if (VECTOR_MODE_P (mode0))
25234 op0 = safe_vector_operand (op0, mode0);
25235 if (VECTOR_MODE_P (mode1))
25236 op1 = safe_vector_operand (op1, mode1);
25238 if (! target
25239 || GET_MODE (target) != tmode
25240 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25241 target = gen_reg_rtx (tmode);
25243 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25244 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25246 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25247 op0 = copy_to_mode_reg (mode0, op0);
25248 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25249 op1 = copy_to_mode_reg (mode1, op1);
25251 pat = GEN_FCN (icode) (target, op0, op1);
25252 if (! pat)
25253 return 0;
25254 emit_insn (pat);
25255 return target;
25258 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25260 static rtx
25261 arm_expand_unop_builtin (enum insn_code icode,
25262 tree exp, rtx target, int do_load)
25264 rtx pat;
25265 tree arg0 = CALL_EXPR_ARG (exp, 0);
25266 rtx op0 = expand_normal (arg0);
25267 rtx op1 = NULL_RTX;
25268 machine_mode tmode = insn_data[icode].operand[0].mode;
25269 machine_mode mode0 = insn_data[icode].operand[1].mode;
25270 bool builtin_sha1h_p = false;
25272 if (insn_data[icode].n_operands == 3)
25274 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25275 builtin_sha1h_p = true;
25278 if (! target
25279 || GET_MODE (target) != tmode
25280 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25281 target = gen_reg_rtx (tmode);
25282 if (do_load)
25283 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25284 else
25286 if (VECTOR_MODE_P (mode0))
25287 op0 = safe_vector_operand (op0, mode0);
25289 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25290 op0 = copy_to_mode_reg (mode0, op0);
25292 if (builtin_sha1h_p)
25293 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25295 if (builtin_sha1h_p)
25296 pat = GEN_FCN (icode) (target, op0, op1);
25297 else
25298 pat = GEN_FCN (icode) (target, op0);
25299 if (! pat)
25300 return 0;
25301 emit_insn (pat);
25302 return target;
25305 typedef enum {
25306 NEON_ARG_COPY_TO_REG,
25307 NEON_ARG_CONSTANT,
25308 NEON_ARG_MEMORY,
25309 NEON_ARG_STOP
25310 } builtin_arg;
25312 #define NEON_MAX_BUILTIN_ARGS 5
25314 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25315 and return an expression for the accessed memory.
25317 The intrinsic function operates on a block of registers that has
25318 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25319 function references the memory at EXP of type TYPE and in mode
25320 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25321 available. */
25323 static tree
25324 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25325 machine_mode reg_mode,
25326 neon_builtin_type_mode type_mode)
25328 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25329 tree elem_type, upper_bound, array_type;
25331 /* Work out the size of the register block in bytes. */
25332 reg_size = GET_MODE_SIZE (reg_mode);
25334 /* Work out the size of each vector in bytes. */
25335 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25336 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25338 /* Work out how many vectors there are. */
25339 gcc_assert (reg_size % vector_size == 0);
25340 nvectors = reg_size / vector_size;
25342 /* Work out the type of each element. */
25343 gcc_assert (POINTER_TYPE_P (type));
25344 elem_type = TREE_TYPE (type);
25346 /* Work out how many elements are being loaded or stored.
25347 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25348 and memory elements; anything else implies a lane load or store. */
25349 if (mem_mode == reg_mode)
25350 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25351 else
25352 nelems = nvectors;
25354 /* Create a type that describes the full access. */
25355 upper_bound = build_int_cst (size_type_node, nelems - 1);
25356 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25358 /* Dereference EXP using that type. */
25359 return fold_build2 (MEM_REF, array_type, exp,
25360 build_int_cst (build_pointer_type (array_type), 0));
25363 /* Expand a Neon builtin. */
25364 static rtx
25365 arm_expand_neon_args (rtx target, int icode, int have_retval,
25366 neon_builtin_type_mode type_mode,
25367 tree exp, int fcode, ...)
25369 va_list ap;
25370 rtx pat;
25371 tree arg[NEON_MAX_BUILTIN_ARGS];
25372 rtx op[NEON_MAX_BUILTIN_ARGS];
25373 tree arg_type;
25374 tree formals;
25375 machine_mode tmode = insn_data[icode].operand[0].mode;
25376 machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25377 machine_mode other_mode;
25378 int argc = 0;
25379 int opno;
25381 if (have_retval
25382 && (!target
25383 || GET_MODE (target) != tmode
25384 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25385 target = gen_reg_rtx (tmode);
25387 va_start (ap, fcode);
25389 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25391 for (;;)
25393 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25395 if (thisarg == NEON_ARG_STOP)
25396 break;
25397 else
25399 opno = argc + have_retval;
25400 mode[argc] = insn_data[icode].operand[opno].mode;
25401 arg[argc] = CALL_EXPR_ARG (exp, argc);
25402 arg_type = TREE_VALUE (formals);
25403 if (thisarg == NEON_ARG_MEMORY)
25405 other_mode = insn_data[icode].operand[1 - opno].mode;
25406 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25407 mode[argc], other_mode,
25408 type_mode);
25411 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25412 be returned. */
25413 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25414 (thisarg == NEON_ARG_MEMORY
25415 ? EXPAND_MEMORY : EXPAND_NORMAL));
25417 switch (thisarg)
25419 case NEON_ARG_COPY_TO_REG:
25420 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25421 if (!(*insn_data[icode].operand[opno].predicate)
25422 (op[argc], mode[argc]))
25423 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25424 break;
25426 case NEON_ARG_CONSTANT:
25427 /* FIXME: This error message is somewhat unhelpful. */
25428 if (!(*insn_data[icode].operand[opno].predicate)
25429 (op[argc], mode[argc]))
25430 error ("argument must be a constant");
25431 break;
25433 case NEON_ARG_MEMORY:
25434 /* Check if expand failed. */
25435 if (op[argc] == const0_rtx)
25436 return 0;
25437 gcc_assert (MEM_P (op[argc]));
25438 PUT_MODE (op[argc], mode[argc]);
25439 /* ??? arm_neon.h uses the same built-in functions for signed
25440 and unsigned accesses, casting where necessary. This isn't
25441 alias safe. */
25442 set_mem_alias_set (op[argc], 0);
25443 if (!(*insn_data[icode].operand[opno].predicate)
25444 (op[argc], mode[argc]))
25445 op[argc] = (replace_equiv_address
25446 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25447 break;
25449 case NEON_ARG_STOP:
25450 gcc_unreachable ();
25453 argc++;
25454 formals = TREE_CHAIN (formals);
25458 va_end (ap);
25460 if (have_retval)
25461 switch (argc)
25463 case 1:
25464 pat = GEN_FCN (icode) (target, op[0]);
25465 break;
25467 case 2:
25468 pat = GEN_FCN (icode) (target, op[0], op[1]);
25469 break;
25471 case 3:
25472 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25473 break;
25475 case 4:
25476 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25477 break;
25479 case 5:
25480 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25481 break;
25483 default:
25484 gcc_unreachable ();
25486 else
25487 switch (argc)
25489 case 1:
25490 pat = GEN_FCN (icode) (op[0]);
25491 break;
25493 case 2:
25494 pat = GEN_FCN (icode) (op[0], op[1]);
25495 break;
25497 case 3:
25498 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25499 break;
25501 case 4:
25502 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25503 break;
25505 case 5:
25506 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25507 break;
25509 default:
25510 gcc_unreachable ();
25513 if (!pat)
25514 return 0;
25516 emit_insn (pat);
25518 return target;
25521 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25522 constants defined per-instruction or per instruction-variant. Instead, the
25523 required info is looked up in the table neon_builtin_data. */
25524 static rtx
25525 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25527 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25528 neon_itype itype = d->itype;
25529 enum insn_code icode = d->code;
25530 neon_builtin_type_mode type_mode = d->mode;
25532 switch (itype)
25534 case NEON_UNOP:
25535 case NEON_CONVERT:
25536 case NEON_DUPLANE:
25537 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25538 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25540 case NEON_BINOP:
25541 case NEON_LOGICBINOP:
25542 case NEON_SCALARMUL:
25543 case NEON_SCALARMULL:
25544 case NEON_SCALARMULH:
25545 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25546 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25548 case NEON_TERNOP:
25549 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25550 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25551 NEON_ARG_STOP);
25553 case NEON_GETLANE:
25554 case NEON_FIXCONV:
25555 case NEON_SHIFTIMM:
25556 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25557 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25558 NEON_ARG_STOP);
25560 case NEON_CREATE:
25561 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25562 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25564 case NEON_DUP:
25565 case NEON_RINT:
25566 case NEON_SPLIT:
25567 case NEON_FLOAT_WIDEN:
25568 case NEON_FLOAT_NARROW:
25569 case NEON_BSWAP:
25570 case NEON_REINTERP:
25571 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25572 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25574 case NEON_COPYSIGNF:
25575 case NEON_COMBINE:
25576 case NEON_VTBL:
25577 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25578 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25580 case NEON_LANEMUL:
25581 case NEON_LANEMULL:
25582 case NEON_LANEMULH:
25583 case NEON_SETLANE:
25584 case NEON_SHIFTINSERT:
25585 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25586 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25587 NEON_ARG_STOP);
25589 case NEON_LANEMAC:
25590 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25591 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25592 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25594 case NEON_SHIFTACC:
25595 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25596 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25597 NEON_ARG_STOP);
25599 case NEON_SCALARMAC:
25600 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25601 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25602 NEON_ARG_STOP);
25604 case NEON_SELECT:
25605 case NEON_VTBX:
25606 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25607 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25608 NEON_ARG_STOP);
25610 case NEON_LOAD1:
25611 case NEON_LOADSTRUCT:
25612 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25613 NEON_ARG_MEMORY, NEON_ARG_STOP);
25615 case NEON_LOAD1LANE:
25616 case NEON_LOADSTRUCTLANE:
25617 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25618 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25619 NEON_ARG_STOP);
25621 case NEON_STORE1:
25622 case NEON_STORESTRUCT:
25623 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25624 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25626 case NEON_STORE1LANE:
25627 case NEON_STORESTRUCTLANE:
25628 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25629 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25630 NEON_ARG_STOP);
25633 gcc_unreachable ();
25636 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25637 void
25638 neon_reinterpret (rtx dest, rtx src)
25640 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25643 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25644 not to early-clobber SRC registers in the process.
25646 We assume that the operands described by SRC and DEST represent a
25647 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25648 number of components into which the copy has been decomposed. */
25649 void
25650 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25652 unsigned int i;
25654 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25655 || REGNO (operands[0]) < REGNO (operands[1]))
25657 for (i = 0; i < count; i++)
25659 operands[2 * i] = dest[i];
25660 operands[2 * i + 1] = src[i];
25663 else
25665 for (i = 0; i < count; i++)
25667 operands[2 * i] = dest[count - i - 1];
25668 operands[2 * i + 1] = src[count - i - 1];
25673 /* Split operands into moves from op[1] + op[2] into op[0]. */
25675 void
25676 neon_split_vcombine (rtx operands[3])
25678 unsigned int dest = REGNO (operands[0]);
25679 unsigned int src1 = REGNO (operands[1]);
25680 unsigned int src2 = REGNO (operands[2]);
25681 machine_mode halfmode = GET_MODE (operands[1]);
25682 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25683 rtx destlo, desthi;
25685 if (src1 == dest && src2 == dest + halfregs)
25687 /* No-op move. Can't split to nothing; emit something. */
25688 emit_note (NOTE_INSN_DELETED);
25689 return;
25692 /* Preserve register attributes for variable tracking. */
25693 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25694 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25695 GET_MODE_SIZE (halfmode));
25697 /* Special case of reversed high/low parts. Use VSWP. */
25698 if (src2 == dest && src1 == dest + halfregs)
25700 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25701 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25702 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25703 return;
25706 if (!reg_overlap_mentioned_p (operands[2], destlo))
25708 /* Try to avoid unnecessary moves if part of the result
25709 is in the right place already. */
25710 if (src1 != dest)
25711 emit_move_insn (destlo, operands[1]);
25712 if (src2 != dest + halfregs)
25713 emit_move_insn (desthi, operands[2]);
25715 else
25717 if (src2 != dest + halfregs)
25718 emit_move_insn (desthi, operands[2]);
25719 if (src1 != dest)
25720 emit_move_insn (destlo, operands[1]);
25724 /* Expand an expression EXP that calls a built-in function,
25725 with result going to TARGET if that's convenient
25726 (and in mode MODE if that's convenient).
25727 SUBTARGET may be used as the target for computing one of EXP's operands.
25728 IGNORE is nonzero if the value is to be ignored. */
25730 static rtx
25731 arm_expand_builtin (tree exp,
25732 rtx target,
25733 rtx subtarget ATTRIBUTE_UNUSED,
25734 machine_mode mode ATTRIBUTE_UNUSED,
25735 int ignore ATTRIBUTE_UNUSED)
25737 const struct builtin_description * d;
25738 enum insn_code icode;
25739 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25740 tree arg0;
25741 tree arg1;
25742 tree arg2;
25743 rtx op0;
25744 rtx op1;
25745 rtx op2;
25746 rtx pat;
25747 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25748 size_t i;
25749 machine_mode tmode;
25750 machine_mode mode0;
25751 machine_mode mode1;
25752 machine_mode mode2;
25753 int opint;
25754 int selector;
25755 int mask;
25756 int imm;
25758 if (fcode >= ARM_BUILTIN_NEON_BASE)
25759 return arm_expand_neon_builtin (fcode, exp, target);
25761 switch (fcode)
25763 case ARM_BUILTIN_GET_FPSCR:
25764 case ARM_BUILTIN_SET_FPSCR:
25765 if (fcode == ARM_BUILTIN_GET_FPSCR)
25767 icode = CODE_FOR_get_fpscr;
25768 target = gen_reg_rtx (SImode);
25769 pat = GEN_FCN (icode) (target);
25771 else
25773 target = NULL_RTX;
25774 icode = CODE_FOR_set_fpscr;
25775 arg0 = CALL_EXPR_ARG (exp, 0);
25776 op0 = expand_normal (arg0);
25777 pat = GEN_FCN (icode) (op0);
25779 emit_insn (pat);
25780 return target;
25782 case ARM_BUILTIN_TEXTRMSB:
25783 case ARM_BUILTIN_TEXTRMUB:
25784 case ARM_BUILTIN_TEXTRMSH:
25785 case ARM_BUILTIN_TEXTRMUH:
25786 case ARM_BUILTIN_TEXTRMSW:
25787 case ARM_BUILTIN_TEXTRMUW:
25788 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25789 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25790 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25791 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25792 : CODE_FOR_iwmmxt_textrmw);
25794 arg0 = CALL_EXPR_ARG (exp, 0);
25795 arg1 = CALL_EXPR_ARG (exp, 1);
25796 op0 = expand_normal (arg0);
25797 op1 = expand_normal (arg1);
25798 tmode = insn_data[icode].operand[0].mode;
25799 mode0 = insn_data[icode].operand[1].mode;
25800 mode1 = insn_data[icode].operand[2].mode;
25802 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25803 op0 = copy_to_mode_reg (mode0, op0);
25804 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25806 /* @@@ better error message */
25807 error ("selector must be an immediate");
25808 return gen_reg_rtx (tmode);
25811 opint = INTVAL (op1);
25812 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25814 if (opint > 7 || opint < 0)
25815 error ("the range of selector should be in 0 to 7");
25817 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25819 if (opint > 3 || opint < 0)
25820 error ("the range of selector should be in 0 to 3");
25822 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25824 if (opint > 1 || opint < 0)
25825 error ("the range of selector should be in 0 to 1");
25828 if (target == 0
25829 || GET_MODE (target) != tmode
25830 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25831 target = gen_reg_rtx (tmode);
25832 pat = GEN_FCN (icode) (target, op0, op1);
25833 if (! pat)
25834 return 0;
25835 emit_insn (pat);
25836 return target;
25838 case ARM_BUILTIN_WALIGNI:
25839 /* If op2 is immediate, call walighi, else call walighr. */
25840 arg0 = CALL_EXPR_ARG (exp, 0);
25841 arg1 = CALL_EXPR_ARG (exp, 1);
25842 arg2 = CALL_EXPR_ARG (exp, 2);
25843 op0 = expand_normal (arg0);
25844 op1 = expand_normal (arg1);
25845 op2 = expand_normal (arg2);
25846 if (CONST_INT_P (op2))
25848 icode = CODE_FOR_iwmmxt_waligni;
25849 tmode = insn_data[icode].operand[0].mode;
25850 mode0 = insn_data[icode].operand[1].mode;
25851 mode1 = insn_data[icode].operand[2].mode;
25852 mode2 = insn_data[icode].operand[3].mode;
25853 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25854 op0 = copy_to_mode_reg (mode0, op0);
25855 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25856 op1 = copy_to_mode_reg (mode1, op1);
25857 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25858 selector = INTVAL (op2);
25859 if (selector > 7 || selector < 0)
25860 error ("the range of selector should be in 0 to 7");
25862 else
25864 icode = CODE_FOR_iwmmxt_walignr;
25865 tmode = insn_data[icode].operand[0].mode;
25866 mode0 = insn_data[icode].operand[1].mode;
25867 mode1 = insn_data[icode].operand[2].mode;
25868 mode2 = insn_data[icode].operand[3].mode;
25869 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25870 op0 = copy_to_mode_reg (mode0, op0);
25871 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25872 op1 = copy_to_mode_reg (mode1, op1);
25873 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25874 op2 = copy_to_mode_reg (mode2, op2);
25876 if (target == 0
25877 || GET_MODE (target) != tmode
25878 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25879 target = gen_reg_rtx (tmode);
25880 pat = GEN_FCN (icode) (target, op0, op1, op2);
25881 if (!pat)
25882 return 0;
25883 emit_insn (pat);
25884 return target;
25886 case ARM_BUILTIN_TINSRB:
25887 case ARM_BUILTIN_TINSRH:
25888 case ARM_BUILTIN_TINSRW:
25889 case ARM_BUILTIN_WMERGE:
25890 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25891 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25892 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25893 : CODE_FOR_iwmmxt_tinsrw);
25894 arg0 = CALL_EXPR_ARG (exp, 0);
25895 arg1 = CALL_EXPR_ARG (exp, 1);
25896 arg2 = CALL_EXPR_ARG (exp, 2);
25897 op0 = expand_normal (arg0);
25898 op1 = expand_normal (arg1);
25899 op2 = expand_normal (arg2);
25900 tmode = insn_data[icode].operand[0].mode;
25901 mode0 = insn_data[icode].operand[1].mode;
25902 mode1 = insn_data[icode].operand[2].mode;
25903 mode2 = insn_data[icode].operand[3].mode;
25905 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25906 op0 = copy_to_mode_reg (mode0, op0);
25907 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25908 op1 = copy_to_mode_reg (mode1, op1);
25909 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25911 error ("selector must be an immediate");
25912 return const0_rtx;
25914 if (icode == CODE_FOR_iwmmxt_wmerge)
25916 selector = INTVAL (op2);
25917 if (selector > 7 || selector < 0)
25918 error ("the range of selector should be in 0 to 7");
25920 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25921 || (icode == CODE_FOR_iwmmxt_tinsrh)
25922 || (icode == CODE_FOR_iwmmxt_tinsrw))
25924 mask = 0x01;
25925 selector= INTVAL (op2);
25926 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25927 error ("the range of selector should be in 0 to 7");
25928 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25929 error ("the range of selector should be in 0 to 3");
25930 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25931 error ("the range of selector should be in 0 to 1");
25932 mask <<= selector;
25933 op2 = GEN_INT (mask);
25935 if (target == 0
25936 || GET_MODE (target) != tmode
25937 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25938 target = gen_reg_rtx (tmode);
25939 pat = GEN_FCN (icode) (target, op0, op1, op2);
25940 if (! pat)
25941 return 0;
25942 emit_insn (pat);
25943 return target;
25945 case ARM_BUILTIN_SETWCGR0:
25946 case ARM_BUILTIN_SETWCGR1:
25947 case ARM_BUILTIN_SETWCGR2:
25948 case ARM_BUILTIN_SETWCGR3:
25949 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25950 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25951 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25952 : CODE_FOR_iwmmxt_setwcgr3);
25953 arg0 = CALL_EXPR_ARG (exp, 0);
25954 op0 = expand_normal (arg0);
25955 mode0 = insn_data[icode].operand[0].mode;
25956 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25957 op0 = copy_to_mode_reg (mode0, op0);
25958 pat = GEN_FCN (icode) (op0);
25959 if (!pat)
25960 return 0;
25961 emit_insn (pat);
25962 return 0;
25964 case ARM_BUILTIN_GETWCGR0:
25965 case ARM_BUILTIN_GETWCGR1:
25966 case ARM_BUILTIN_GETWCGR2:
25967 case ARM_BUILTIN_GETWCGR3:
25968 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25969 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25970 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25971 : CODE_FOR_iwmmxt_getwcgr3);
25972 tmode = insn_data[icode].operand[0].mode;
25973 if (target == 0
25974 || GET_MODE (target) != tmode
25975 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25976 target = gen_reg_rtx (tmode);
25977 pat = GEN_FCN (icode) (target);
25978 if (!pat)
25979 return 0;
25980 emit_insn (pat);
25981 return target;
25983 case ARM_BUILTIN_WSHUFH:
25984 icode = CODE_FOR_iwmmxt_wshufh;
25985 arg0 = CALL_EXPR_ARG (exp, 0);
25986 arg1 = CALL_EXPR_ARG (exp, 1);
25987 op0 = expand_normal (arg0);
25988 op1 = expand_normal (arg1);
25989 tmode = insn_data[icode].operand[0].mode;
25990 mode1 = insn_data[icode].operand[1].mode;
25991 mode2 = insn_data[icode].operand[2].mode;
25993 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25994 op0 = copy_to_mode_reg (mode1, op0);
25995 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25997 error ("mask must be an immediate");
25998 return const0_rtx;
26000 selector = INTVAL (op1);
26001 if (selector < 0 || selector > 255)
26002 error ("the range of mask should be in 0 to 255");
26003 if (target == 0
26004 || GET_MODE (target) != tmode
26005 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26006 target = gen_reg_rtx (tmode);
26007 pat = GEN_FCN (icode) (target, op0, op1);
26008 if (! pat)
26009 return 0;
26010 emit_insn (pat);
26011 return target;
26013 case ARM_BUILTIN_WMADDS:
26014 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
26015 case ARM_BUILTIN_WMADDSX:
26016 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
26017 case ARM_BUILTIN_WMADDSN:
26018 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
26019 case ARM_BUILTIN_WMADDU:
26020 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
26021 case ARM_BUILTIN_WMADDUX:
26022 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
26023 case ARM_BUILTIN_WMADDUN:
26024 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
26025 case ARM_BUILTIN_WSADBZ:
26026 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
26027 case ARM_BUILTIN_WSADHZ:
26028 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
26030 /* Several three-argument builtins. */
26031 case ARM_BUILTIN_WMACS:
26032 case ARM_BUILTIN_WMACU:
26033 case ARM_BUILTIN_TMIA:
26034 case ARM_BUILTIN_TMIAPH:
26035 case ARM_BUILTIN_TMIATT:
26036 case ARM_BUILTIN_TMIATB:
26037 case ARM_BUILTIN_TMIABT:
26038 case ARM_BUILTIN_TMIABB:
26039 case ARM_BUILTIN_WQMIABB:
26040 case ARM_BUILTIN_WQMIABT:
26041 case ARM_BUILTIN_WQMIATB:
26042 case ARM_BUILTIN_WQMIATT:
26043 case ARM_BUILTIN_WQMIABBN:
26044 case ARM_BUILTIN_WQMIABTN:
26045 case ARM_BUILTIN_WQMIATBN:
26046 case ARM_BUILTIN_WQMIATTN:
26047 case ARM_BUILTIN_WMIABB:
26048 case ARM_BUILTIN_WMIABT:
26049 case ARM_BUILTIN_WMIATB:
26050 case ARM_BUILTIN_WMIATT:
26051 case ARM_BUILTIN_WMIABBN:
26052 case ARM_BUILTIN_WMIABTN:
26053 case ARM_BUILTIN_WMIATBN:
26054 case ARM_BUILTIN_WMIATTN:
26055 case ARM_BUILTIN_WMIAWBB:
26056 case ARM_BUILTIN_WMIAWBT:
26057 case ARM_BUILTIN_WMIAWTB:
26058 case ARM_BUILTIN_WMIAWTT:
26059 case ARM_BUILTIN_WMIAWBBN:
26060 case ARM_BUILTIN_WMIAWBTN:
26061 case ARM_BUILTIN_WMIAWTBN:
26062 case ARM_BUILTIN_WMIAWTTN:
26063 case ARM_BUILTIN_WSADB:
26064 case ARM_BUILTIN_WSADH:
26065 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26066 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26067 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26068 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26069 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26070 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26071 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26072 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26073 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26074 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26075 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26076 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26077 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26078 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26079 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26080 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26081 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26082 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26083 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26084 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26085 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26086 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26087 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26088 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26089 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26090 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26091 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26092 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26093 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26094 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26095 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26096 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26097 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26098 : CODE_FOR_iwmmxt_wsadh);
26099 arg0 = CALL_EXPR_ARG (exp, 0);
26100 arg1 = CALL_EXPR_ARG (exp, 1);
26101 arg2 = CALL_EXPR_ARG (exp, 2);
26102 op0 = expand_normal (arg0);
26103 op1 = expand_normal (arg1);
26104 op2 = expand_normal (arg2);
26105 tmode = insn_data[icode].operand[0].mode;
26106 mode0 = insn_data[icode].operand[1].mode;
26107 mode1 = insn_data[icode].operand[2].mode;
26108 mode2 = insn_data[icode].operand[3].mode;
26110 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26111 op0 = copy_to_mode_reg (mode0, op0);
26112 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26113 op1 = copy_to_mode_reg (mode1, op1);
26114 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26115 op2 = copy_to_mode_reg (mode2, op2);
26116 if (target == 0
26117 || GET_MODE (target) != tmode
26118 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26119 target = gen_reg_rtx (tmode);
26120 pat = GEN_FCN (icode) (target, op0, op1, op2);
26121 if (! pat)
26122 return 0;
26123 emit_insn (pat);
26124 return target;
26126 case ARM_BUILTIN_WZERO:
26127 target = gen_reg_rtx (DImode);
26128 emit_insn (gen_iwmmxt_clrdi (target));
26129 return target;
26131 case ARM_BUILTIN_WSRLHI:
26132 case ARM_BUILTIN_WSRLWI:
26133 case ARM_BUILTIN_WSRLDI:
26134 case ARM_BUILTIN_WSLLHI:
26135 case ARM_BUILTIN_WSLLWI:
26136 case ARM_BUILTIN_WSLLDI:
26137 case ARM_BUILTIN_WSRAHI:
26138 case ARM_BUILTIN_WSRAWI:
26139 case ARM_BUILTIN_WSRADI:
26140 case ARM_BUILTIN_WRORHI:
26141 case ARM_BUILTIN_WRORWI:
26142 case ARM_BUILTIN_WRORDI:
26143 case ARM_BUILTIN_WSRLH:
26144 case ARM_BUILTIN_WSRLW:
26145 case ARM_BUILTIN_WSRLD:
26146 case ARM_BUILTIN_WSLLH:
26147 case ARM_BUILTIN_WSLLW:
26148 case ARM_BUILTIN_WSLLD:
26149 case ARM_BUILTIN_WSRAH:
26150 case ARM_BUILTIN_WSRAW:
26151 case ARM_BUILTIN_WSRAD:
26152 case ARM_BUILTIN_WRORH:
26153 case ARM_BUILTIN_WRORW:
26154 case ARM_BUILTIN_WRORD:
26155 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26156 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26157 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26158 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26159 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26160 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26161 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26162 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26163 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26164 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26165 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26166 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26167 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26168 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26169 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26170 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26171 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26172 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26173 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26174 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26175 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26176 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26177 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26178 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26179 : CODE_FOR_nothing);
26180 arg1 = CALL_EXPR_ARG (exp, 1);
26181 op1 = expand_normal (arg1);
26182 if (GET_MODE (op1) == VOIDmode)
26184 imm = INTVAL (op1);
26185 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26186 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26187 && (imm < 0 || imm > 32))
26189 if (fcode == ARM_BUILTIN_WRORHI)
26190 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26191 else if (fcode == ARM_BUILTIN_WRORWI)
26192 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26193 else if (fcode == ARM_BUILTIN_WRORH)
26194 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26195 else
26196 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26198 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26199 && (imm < 0 || imm > 64))
26201 if (fcode == ARM_BUILTIN_WRORDI)
26202 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26203 else
26204 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26206 else if (imm < 0)
26208 if (fcode == ARM_BUILTIN_WSRLHI)
26209 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26210 else if (fcode == ARM_BUILTIN_WSRLWI)
26211 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26212 else if (fcode == ARM_BUILTIN_WSRLDI)
26213 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26214 else if (fcode == ARM_BUILTIN_WSLLHI)
26215 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26216 else if (fcode == ARM_BUILTIN_WSLLWI)
26217 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26218 else if (fcode == ARM_BUILTIN_WSLLDI)
26219 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26220 else if (fcode == ARM_BUILTIN_WSRAHI)
26221 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26222 else if (fcode == ARM_BUILTIN_WSRAWI)
26223 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26224 else if (fcode == ARM_BUILTIN_WSRADI)
26225 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26226 else if (fcode == ARM_BUILTIN_WSRLH)
26227 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26228 else if (fcode == ARM_BUILTIN_WSRLW)
26229 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26230 else if (fcode == ARM_BUILTIN_WSRLD)
26231 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26232 else if (fcode == ARM_BUILTIN_WSLLH)
26233 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26234 else if (fcode == ARM_BUILTIN_WSLLW)
26235 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26236 else if (fcode == ARM_BUILTIN_WSLLD)
26237 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26238 else if (fcode == ARM_BUILTIN_WSRAH)
26239 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26240 else if (fcode == ARM_BUILTIN_WSRAW)
26241 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26242 else
26243 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26246 return arm_expand_binop_builtin (icode, exp, target);
26248 default:
26249 break;
26252 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26253 if (d->code == (const enum arm_builtins) fcode)
26254 return arm_expand_binop_builtin (d->icode, exp, target);
26256 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26257 if (d->code == (const enum arm_builtins) fcode)
26258 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26260 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26261 if (d->code == (const enum arm_builtins) fcode)
26262 return arm_expand_ternop_builtin (d->icode, exp, target);
26264 /* @@@ Should really do something sensible here. */
26265 return NULL_RTX;
26268 /* Return the number (counting from 0) of
26269 the least significant set bit in MASK. */
26271 inline static int
26272 number_of_first_bit_set (unsigned mask)
26274 return ctz_hwi (mask);
26277 /* Like emit_multi_reg_push, but allowing for a different set of
26278 registers to be described as saved. MASK is the set of registers
26279 to be saved; REAL_REGS is the set of registers to be described as
26280 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26282 static rtx_insn *
26283 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26285 unsigned long regno;
26286 rtx par[10], tmp, reg;
26287 rtx_insn *insn;
26288 int i, j;
26290 /* Build the parallel of the registers actually being stored. */
26291 for (i = 0; mask; ++i, mask &= mask - 1)
26293 regno = ctz_hwi (mask);
26294 reg = gen_rtx_REG (SImode, regno);
26296 if (i == 0)
26297 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26298 else
26299 tmp = gen_rtx_USE (VOIDmode, reg);
26301 par[i] = tmp;
26304 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26305 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26306 tmp = gen_frame_mem (BLKmode, tmp);
26307 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26308 par[0] = tmp;
26310 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26311 insn = emit_insn (tmp);
26313 /* Always build the stack adjustment note for unwind info. */
26314 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26315 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26316 par[0] = tmp;
26318 /* Build the parallel of the registers recorded as saved for unwind. */
26319 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26321 regno = ctz_hwi (real_regs);
26322 reg = gen_rtx_REG (SImode, regno);
26324 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26325 tmp = gen_frame_mem (SImode, tmp);
26326 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26327 RTX_FRAME_RELATED_P (tmp) = 1;
26328 par[j + 1] = tmp;
26331 if (j == 0)
26332 tmp = par[0];
26333 else
26335 RTX_FRAME_RELATED_P (par[0]) = 1;
26336 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26339 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26341 return insn;
26344 /* Emit code to push or pop registers to or from the stack. F is the
26345 assembly file. MASK is the registers to pop. */
26346 static void
26347 thumb_pop (FILE *f, unsigned long mask)
26349 int regno;
26350 int lo_mask = mask & 0xFF;
26351 int pushed_words = 0;
26353 gcc_assert (mask);
26355 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26357 /* Special case. Do not generate a POP PC statement here, do it in
26358 thumb_exit() */
26359 thumb_exit (f, -1);
26360 return;
26363 fprintf (f, "\tpop\t{");
26365 /* Look at the low registers first. */
26366 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26368 if (lo_mask & 1)
26370 asm_fprintf (f, "%r", regno);
26372 if ((lo_mask & ~1) != 0)
26373 fprintf (f, ", ");
26375 pushed_words++;
26379 if (mask & (1 << PC_REGNUM))
26381 /* Catch popping the PC. */
26382 if (TARGET_INTERWORK || TARGET_BACKTRACE
26383 || crtl->calls_eh_return)
26385 /* The PC is never poped directly, instead
26386 it is popped into r3 and then BX is used. */
26387 fprintf (f, "}\n");
26389 thumb_exit (f, -1);
26391 return;
26393 else
26395 if (mask & 0xFF)
26396 fprintf (f, ", ");
26398 asm_fprintf (f, "%r", PC_REGNUM);
26402 fprintf (f, "}\n");
26405 /* Generate code to return from a thumb function.
26406 If 'reg_containing_return_addr' is -1, then the return address is
26407 actually on the stack, at the stack pointer. */
26408 static void
26409 thumb_exit (FILE *f, int reg_containing_return_addr)
26411 unsigned regs_available_for_popping;
26412 unsigned regs_to_pop;
26413 int pops_needed;
26414 unsigned available;
26415 unsigned required;
26416 machine_mode mode;
26417 int size;
26418 int restore_a4 = FALSE;
26420 /* Compute the registers we need to pop. */
26421 regs_to_pop = 0;
26422 pops_needed = 0;
26424 if (reg_containing_return_addr == -1)
26426 regs_to_pop |= 1 << LR_REGNUM;
26427 ++pops_needed;
26430 if (TARGET_BACKTRACE)
26432 /* Restore the (ARM) frame pointer and stack pointer. */
26433 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26434 pops_needed += 2;
26437 /* If there is nothing to pop then just emit the BX instruction and
26438 return. */
26439 if (pops_needed == 0)
26441 if (crtl->calls_eh_return)
26442 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26444 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26445 return;
26447 /* Otherwise if we are not supporting interworking and we have not created
26448 a backtrace structure and the function was not entered in ARM mode then
26449 just pop the return address straight into the PC. */
26450 else if (!TARGET_INTERWORK
26451 && !TARGET_BACKTRACE
26452 && !is_called_in_ARM_mode (current_function_decl)
26453 && !crtl->calls_eh_return)
26455 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26456 return;
26459 /* Find out how many of the (return) argument registers we can corrupt. */
26460 regs_available_for_popping = 0;
26462 /* If returning via __builtin_eh_return, the bottom three registers
26463 all contain information needed for the return. */
26464 if (crtl->calls_eh_return)
26465 size = 12;
26466 else
26468 /* If we can deduce the registers used from the function's
26469 return value. This is more reliable that examining
26470 df_regs_ever_live_p () because that will be set if the register is
26471 ever used in the function, not just if the register is used
26472 to hold a return value. */
26474 if (crtl->return_rtx != 0)
26475 mode = GET_MODE (crtl->return_rtx);
26476 else
26477 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26479 size = GET_MODE_SIZE (mode);
26481 if (size == 0)
26483 /* In a void function we can use any argument register.
26484 In a function that returns a structure on the stack
26485 we can use the second and third argument registers. */
26486 if (mode == VOIDmode)
26487 regs_available_for_popping =
26488 (1 << ARG_REGISTER (1))
26489 | (1 << ARG_REGISTER (2))
26490 | (1 << ARG_REGISTER (3));
26491 else
26492 regs_available_for_popping =
26493 (1 << ARG_REGISTER (2))
26494 | (1 << ARG_REGISTER (3));
26496 else if (size <= 4)
26497 regs_available_for_popping =
26498 (1 << ARG_REGISTER (2))
26499 | (1 << ARG_REGISTER (3));
26500 else if (size <= 8)
26501 regs_available_for_popping =
26502 (1 << ARG_REGISTER (3));
26505 /* Match registers to be popped with registers into which we pop them. */
26506 for (available = regs_available_for_popping,
26507 required = regs_to_pop;
26508 required != 0 && available != 0;
26509 available &= ~(available & - available),
26510 required &= ~(required & - required))
26511 -- pops_needed;
26513 /* If we have any popping registers left over, remove them. */
26514 if (available > 0)
26515 regs_available_for_popping &= ~available;
26517 /* Otherwise if we need another popping register we can use
26518 the fourth argument register. */
26519 else if (pops_needed)
26521 /* If we have not found any free argument registers and
26522 reg a4 contains the return address, we must move it. */
26523 if (regs_available_for_popping == 0
26524 && reg_containing_return_addr == LAST_ARG_REGNUM)
26526 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26527 reg_containing_return_addr = LR_REGNUM;
26529 else if (size > 12)
26531 /* Register a4 is being used to hold part of the return value,
26532 but we have dire need of a free, low register. */
26533 restore_a4 = TRUE;
26535 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26538 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26540 /* The fourth argument register is available. */
26541 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26543 --pops_needed;
26547 /* Pop as many registers as we can. */
26548 thumb_pop (f, regs_available_for_popping);
26550 /* Process the registers we popped. */
26551 if (reg_containing_return_addr == -1)
26553 /* The return address was popped into the lowest numbered register. */
26554 regs_to_pop &= ~(1 << LR_REGNUM);
26556 reg_containing_return_addr =
26557 number_of_first_bit_set (regs_available_for_popping);
26559 /* Remove this register for the mask of available registers, so that
26560 the return address will not be corrupted by further pops. */
26561 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26564 /* If we popped other registers then handle them here. */
26565 if (regs_available_for_popping)
26567 int frame_pointer;
26569 /* Work out which register currently contains the frame pointer. */
26570 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26572 /* Move it into the correct place. */
26573 asm_fprintf (f, "\tmov\t%r, %r\n",
26574 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26576 /* (Temporarily) remove it from the mask of popped registers. */
26577 regs_available_for_popping &= ~(1 << frame_pointer);
26578 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26580 if (regs_available_for_popping)
26582 int stack_pointer;
26584 /* We popped the stack pointer as well,
26585 find the register that contains it. */
26586 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26588 /* Move it into the stack register. */
26589 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26591 /* At this point we have popped all necessary registers, so
26592 do not worry about restoring regs_available_for_popping
26593 to its correct value:
26595 assert (pops_needed == 0)
26596 assert (regs_available_for_popping == (1 << frame_pointer))
26597 assert (regs_to_pop == (1 << STACK_POINTER)) */
26599 else
26601 /* Since we have just move the popped value into the frame
26602 pointer, the popping register is available for reuse, and
26603 we know that we still have the stack pointer left to pop. */
26604 regs_available_for_popping |= (1 << frame_pointer);
26608 /* If we still have registers left on the stack, but we no longer have
26609 any registers into which we can pop them, then we must move the return
26610 address into the link register and make available the register that
26611 contained it. */
26612 if (regs_available_for_popping == 0 && pops_needed > 0)
26614 regs_available_for_popping |= 1 << reg_containing_return_addr;
26616 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26617 reg_containing_return_addr);
26619 reg_containing_return_addr = LR_REGNUM;
26622 /* If we have registers left on the stack then pop some more.
26623 We know that at most we will want to pop FP and SP. */
26624 if (pops_needed > 0)
26626 int popped_into;
26627 int move_to;
26629 thumb_pop (f, regs_available_for_popping);
26631 /* We have popped either FP or SP.
26632 Move whichever one it is into the correct register. */
26633 popped_into = number_of_first_bit_set (regs_available_for_popping);
26634 move_to = number_of_first_bit_set (regs_to_pop);
26636 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26638 regs_to_pop &= ~(1 << move_to);
26640 --pops_needed;
26643 /* If we still have not popped everything then we must have only
26644 had one register available to us and we are now popping the SP. */
26645 if (pops_needed > 0)
26647 int popped_into;
26649 thumb_pop (f, regs_available_for_popping);
26651 popped_into = number_of_first_bit_set (regs_available_for_popping);
26653 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26655 assert (regs_to_pop == (1 << STACK_POINTER))
26656 assert (pops_needed == 1)
26660 /* If necessary restore the a4 register. */
26661 if (restore_a4)
26663 if (reg_containing_return_addr != LR_REGNUM)
26665 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26666 reg_containing_return_addr = LR_REGNUM;
26669 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26672 if (crtl->calls_eh_return)
26673 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26675 /* Return to caller. */
26676 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26679 /* Scan INSN just before assembler is output for it.
26680 For Thumb-1, we track the status of the condition codes; this
26681 information is used in the cbranchsi4_insn pattern. */
26682 void
26683 thumb1_final_prescan_insn (rtx_insn *insn)
26685 if (flag_print_asm_name)
26686 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26687 INSN_ADDRESSES (INSN_UID (insn)));
26688 /* Don't overwrite the previous setter when we get to a cbranch. */
26689 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26691 enum attr_conds conds;
26693 if (cfun->machine->thumb1_cc_insn)
26695 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26696 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26697 CC_STATUS_INIT;
26699 conds = get_attr_conds (insn);
26700 if (conds == CONDS_SET)
26702 rtx set = single_set (insn);
26703 cfun->machine->thumb1_cc_insn = insn;
26704 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26705 cfun->machine->thumb1_cc_op1 = const0_rtx;
26706 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26707 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26709 rtx src1 = XEXP (SET_SRC (set), 1);
26710 if (src1 == const0_rtx)
26711 cfun->machine->thumb1_cc_mode = CCmode;
26713 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26715 /* Record the src register operand instead of dest because
26716 cprop_hardreg pass propagates src. */
26717 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26720 else if (conds != CONDS_NOCOND)
26721 cfun->machine->thumb1_cc_insn = NULL_RTX;
26724 /* Check if unexpected far jump is used. */
26725 if (cfun->machine->lr_save_eliminated
26726 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26727 internal_error("Unexpected thumb1 far jump");
26731 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26733 unsigned HOST_WIDE_INT mask = 0xff;
26734 int i;
26736 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26737 if (val == 0) /* XXX */
26738 return 0;
26740 for (i = 0; i < 25; i++)
26741 if ((val & (mask << i)) == val)
26742 return 1;
26744 return 0;
26747 /* Returns nonzero if the current function contains,
26748 or might contain a far jump. */
26749 static int
26750 thumb_far_jump_used_p (void)
26752 rtx_insn *insn;
26753 bool far_jump = false;
26754 unsigned int func_size = 0;
26756 /* This test is only important for leaf functions. */
26757 /* assert (!leaf_function_p ()); */
26759 /* If we have already decided that far jumps may be used,
26760 do not bother checking again, and always return true even if
26761 it turns out that they are not being used. Once we have made
26762 the decision that far jumps are present (and that hence the link
26763 register will be pushed onto the stack) we cannot go back on it. */
26764 if (cfun->machine->far_jump_used)
26765 return 1;
26767 /* If this function is not being called from the prologue/epilogue
26768 generation code then it must be being called from the
26769 INITIAL_ELIMINATION_OFFSET macro. */
26770 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26772 /* In this case we know that we are being asked about the elimination
26773 of the arg pointer register. If that register is not being used,
26774 then there are no arguments on the stack, and we do not have to
26775 worry that a far jump might force the prologue to push the link
26776 register, changing the stack offsets. In this case we can just
26777 return false, since the presence of far jumps in the function will
26778 not affect stack offsets.
26780 If the arg pointer is live (or if it was live, but has now been
26781 eliminated and so set to dead) then we do have to test to see if
26782 the function might contain a far jump. This test can lead to some
26783 false negatives, since before reload is completed, then length of
26784 branch instructions is not known, so gcc defaults to returning their
26785 longest length, which in turn sets the far jump attribute to true.
26787 A false negative will not result in bad code being generated, but it
26788 will result in a needless push and pop of the link register. We
26789 hope that this does not occur too often.
26791 If we need doubleword stack alignment this could affect the other
26792 elimination offsets so we can't risk getting it wrong. */
26793 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26794 cfun->machine->arg_pointer_live = 1;
26795 else if (!cfun->machine->arg_pointer_live)
26796 return 0;
26799 /* We should not change far_jump_used during or after reload, as there is
26800 no chance to change stack frame layout. */
26801 if (reload_in_progress || reload_completed)
26802 return 0;
26804 /* Check to see if the function contains a branch
26805 insn with the far jump attribute set. */
26806 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26808 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26810 far_jump = true;
26812 func_size += get_attr_length (insn);
26815 /* Attribute far_jump will always be true for thumb1 before
26816 shorten_branch pass. So checking far_jump attribute before
26817 shorten_branch isn't much useful.
26819 Following heuristic tries to estimate more accurately if a far jump
26820 may finally be used. The heuristic is very conservative as there is
26821 no chance to roll-back the decision of not to use far jump.
26823 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26824 2-byte insn is associated with a 4 byte constant pool. Using
26825 function size 2048/3 as the threshold is conservative enough. */
26826 if (far_jump)
26828 if ((func_size * 3) >= 2048)
26830 /* Record the fact that we have decided that
26831 the function does use far jumps. */
26832 cfun->machine->far_jump_used = 1;
26833 return 1;
26837 return 0;
26840 /* Return nonzero if FUNC must be entered in ARM mode. */
26842 is_called_in_ARM_mode (tree func)
26844 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26846 /* Ignore the problem about functions whose address is taken. */
26847 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26848 return TRUE;
26850 #ifdef ARM_PE
26851 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26852 #else
26853 return FALSE;
26854 #endif
26857 /* Given the stack offsets and register mask in OFFSETS, decide how
26858 many additional registers to push instead of subtracting a constant
26859 from SP. For epilogues the principle is the same except we use pop.
26860 FOR_PROLOGUE indicates which we're generating. */
26861 static int
26862 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26864 HOST_WIDE_INT amount;
26865 unsigned long live_regs_mask = offsets->saved_regs_mask;
26866 /* Extract a mask of the ones we can give to the Thumb's push/pop
26867 instruction. */
26868 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26869 /* Then count how many other high registers will need to be pushed. */
26870 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26871 int n_free, reg_base, size;
26873 if (!for_prologue && frame_pointer_needed)
26874 amount = offsets->locals_base - offsets->saved_regs;
26875 else
26876 amount = offsets->outgoing_args - offsets->saved_regs;
26878 /* If the stack frame size is 512 exactly, we can save one load
26879 instruction, which should make this a win even when optimizing
26880 for speed. */
26881 if (!optimize_size && amount != 512)
26882 return 0;
26884 /* Can't do this if there are high registers to push. */
26885 if (high_regs_pushed != 0)
26886 return 0;
26888 /* Shouldn't do it in the prologue if no registers would normally
26889 be pushed at all. In the epilogue, also allow it if we'll have
26890 a pop insn for the PC. */
26891 if (l_mask == 0
26892 && (for_prologue
26893 || TARGET_BACKTRACE
26894 || (live_regs_mask & 1 << LR_REGNUM) == 0
26895 || TARGET_INTERWORK
26896 || crtl->args.pretend_args_size != 0))
26897 return 0;
26899 /* Don't do this if thumb_expand_prologue wants to emit instructions
26900 between the push and the stack frame allocation. */
26901 if (for_prologue
26902 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26903 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26904 return 0;
26906 reg_base = 0;
26907 n_free = 0;
26908 if (!for_prologue)
26910 size = arm_size_return_regs ();
26911 reg_base = ARM_NUM_INTS (size);
26912 live_regs_mask >>= reg_base;
26915 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26916 && (for_prologue || call_used_regs[reg_base + n_free]))
26918 live_regs_mask >>= 1;
26919 n_free++;
26922 if (n_free == 0)
26923 return 0;
26924 gcc_assert (amount / 4 * 4 == amount);
26926 if (amount >= 512 && (amount - n_free * 4) < 512)
26927 return (amount - 508) / 4;
26928 if (amount <= n_free * 4)
26929 return amount / 4;
26930 return 0;
26933 /* The bits which aren't usefully expanded as rtl. */
26934 const char *
26935 thumb1_unexpanded_epilogue (void)
26937 arm_stack_offsets *offsets;
26938 int regno;
26939 unsigned long live_regs_mask = 0;
26940 int high_regs_pushed = 0;
26941 int extra_pop;
26942 int had_to_push_lr;
26943 int size;
26945 if (cfun->machine->return_used_this_function != 0)
26946 return "";
26948 if (IS_NAKED (arm_current_func_type ()))
26949 return "";
26951 offsets = arm_get_frame_offsets ();
26952 live_regs_mask = offsets->saved_regs_mask;
26953 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26955 /* If we can deduce the registers used from the function's return value.
26956 This is more reliable that examining df_regs_ever_live_p () because that
26957 will be set if the register is ever used in the function, not just if
26958 the register is used to hold a return value. */
26959 size = arm_size_return_regs ();
26961 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26962 if (extra_pop > 0)
26964 unsigned long extra_mask = (1 << extra_pop) - 1;
26965 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26968 /* The prolog may have pushed some high registers to use as
26969 work registers. e.g. the testsuite file:
26970 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26971 compiles to produce:
26972 push {r4, r5, r6, r7, lr}
26973 mov r7, r9
26974 mov r6, r8
26975 push {r6, r7}
26976 as part of the prolog. We have to undo that pushing here. */
26978 if (high_regs_pushed)
26980 unsigned long mask = live_regs_mask & 0xff;
26981 int next_hi_reg;
26983 /* The available low registers depend on the size of the value we are
26984 returning. */
26985 if (size <= 12)
26986 mask |= 1 << 3;
26987 if (size <= 8)
26988 mask |= 1 << 2;
26990 if (mask == 0)
26991 /* Oh dear! We have no low registers into which we can pop
26992 high registers! */
26993 internal_error
26994 ("no low registers available for popping high registers");
26996 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26997 if (live_regs_mask & (1 << next_hi_reg))
26998 break;
27000 while (high_regs_pushed)
27002 /* Find lo register(s) into which the high register(s) can
27003 be popped. */
27004 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
27006 if (mask & (1 << regno))
27007 high_regs_pushed--;
27008 if (high_regs_pushed == 0)
27009 break;
27012 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
27014 /* Pop the values into the low register(s). */
27015 thumb_pop (asm_out_file, mask);
27017 /* Move the value(s) into the high registers. */
27018 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
27020 if (mask & (1 << regno))
27022 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
27023 regno);
27025 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
27026 if (live_regs_mask & (1 << next_hi_reg))
27027 break;
27031 live_regs_mask &= ~0x0f00;
27034 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
27035 live_regs_mask &= 0xff;
27037 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
27039 /* Pop the return address into the PC. */
27040 if (had_to_push_lr)
27041 live_regs_mask |= 1 << PC_REGNUM;
27043 /* Either no argument registers were pushed or a backtrace
27044 structure was created which includes an adjusted stack
27045 pointer, so just pop everything. */
27046 if (live_regs_mask)
27047 thumb_pop (asm_out_file, live_regs_mask);
27049 /* We have either just popped the return address into the
27050 PC or it is was kept in LR for the entire function.
27051 Note that thumb_pop has already called thumb_exit if the
27052 PC was in the list. */
27053 if (!had_to_push_lr)
27054 thumb_exit (asm_out_file, LR_REGNUM);
27056 else
27058 /* Pop everything but the return address. */
27059 if (live_regs_mask)
27060 thumb_pop (asm_out_file, live_regs_mask);
27062 if (had_to_push_lr)
27064 if (size > 12)
27066 /* We have no free low regs, so save one. */
27067 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27068 LAST_ARG_REGNUM);
27071 /* Get the return address into a temporary register. */
27072 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27074 if (size > 12)
27076 /* Move the return address to lr. */
27077 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27078 LAST_ARG_REGNUM);
27079 /* Restore the low register. */
27080 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27081 IP_REGNUM);
27082 regno = LR_REGNUM;
27084 else
27085 regno = LAST_ARG_REGNUM;
27087 else
27088 regno = LR_REGNUM;
27090 /* Remove the argument registers that were pushed onto the stack. */
27091 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27092 SP_REGNUM, SP_REGNUM,
27093 crtl->args.pretend_args_size);
27095 thumb_exit (asm_out_file, regno);
27098 return "";
27101 /* Functions to save and restore machine-specific function data. */
27102 static struct machine_function *
27103 arm_init_machine_status (void)
27105 struct machine_function *machine;
27106 machine = ggc_cleared_alloc<machine_function> ();
27108 #if ARM_FT_UNKNOWN != 0
27109 machine->func_type = ARM_FT_UNKNOWN;
27110 #endif
27111 return machine;
27114 /* Return an RTX indicating where the return address to the
27115 calling function can be found. */
27117 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27119 if (count != 0)
27120 return NULL_RTX;
27122 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27125 /* Do anything needed before RTL is emitted for each function. */
27126 void
27127 arm_init_expanders (void)
27129 /* Arrange to initialize and mark the machine per-function status. */
27130 init_machine_status = arm_init_machine_status;
27132 /* This is to stop the combine pass optimizing away the alignment
27133 adjustment of va_arg. */
27134 /* ??? It is claimed that this should not be necessary. */
27135 if (cfun)
27136 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27140 /* Like arm_compute_initial_elimination offset. Simpler because there
27141 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27142 to point at the base of the local variables after static stack
27143 space for a function has been allocated. */
27145 HOST_WIDE_INT
27146 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27148 arm_stack_offsets *offsets;
27150 offsets = arm_get_frame_offsets ();
27152 switch (from)
27154 case ARG_POINTER_REGNUM:
27155 switch (to)
27157 case STACK_POINTER_REGNUM:
27158 return offsets->outgoing_args - offsets->saved_args;
27160 case FRAME_POINTER_REGNUM:
27161 return offsets->soft_frame - offsets->saved_args;
27163 case ARM_HARD_FRAME_POINTER_REGNUM:
27164 return offsets->saved_regs - offsets->saved_args;
27166 case THUMB_HARD_FRAME_POINTER_REGNUM:
27167 return offsets->locals_base - offsets->saved_args;
27169 default:
27170 gcc_unreachable ();
27172 break;
27174 case FRAME_POINTER_REGNUM:
27175 switch (to)
27177 case STACK_POINTER_REGNUM:
27178 return offsets->outgoing_args - offsets->soft_frame;
27180 case ARM_HARD_FRAME_POINTER_REGNUM:
27181 return offsets->saved_regs - offsets->soft_frame;
27183 case THUMB_HARD_FRAME_POINTER_REGNUM:
27184 return offsets->locals_base - offsets->soft_frame;
27186 default:
27187 gcc_unreachable ();
27189 break;
27191 default:
27192 gcc_unreachable ();
27196 /* Generate the function's prologue. */
27198 void
27199 thumb1_expand_prologue (void)
27201 rtx_insn *insn;
27203 HOST_WIDE_INT amount;
27204 arm_stack_offsets *offsets;
27205 unsigned long func_type;
27206 int regno;
27207 unsigned long live_regs_mask;
27208 unsigned long l_mask;
27209 unsigned high_regs_pushed = 0;
27211 func_type = arm_current_func_type ();
27213 /* Naked functions don't have prologues. */
27214 if (IS_NAKED (func_type))
27215 return;
27217 if (IS_INTERRUPT (func_type))
27219 error ("interrupt Service Routines cannot be coded in Thumb mode");
27220 return;
27223 if (is_called_in_ARM_mode (current_function_decl))
27224 emit_insn (gen_prologue_thumb1_interwork ());
27226 offsets = arm_get_frame_offsets ();
27227 live_regs_mask = offsets->saved_regs_mask;
27229 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27230 l_mask = live_regs_mask & 0x40ff;
27231 /* Then count how many other high registers will need to be pushed. */
27232 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27234 if (crtl->args.pretend_args_size)
27236 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27238 if (cfun->machine->uses_anonymous_args)
27240 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27241 unsigned long mask;
27243 mask = 1ul << (LAST_ARG_REGNUM + 1);
27244 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27246 insn = thumb1_emit_multi_reg_push (mask, 0);
27248 else
27250 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27251 stack_pointer_rtx, x));
27253 RTX_FRAME_RELATED_P (insn) = 1;
27256 if (TARGET_BACKTRACE)
27258 HOST_WIDE_INT offset = 0;
27259 unsigned work_register;
27260 rtx work_reg, x, arm_hfp_rtx;
27262 /* We have been asked to create a stack backtrace structure.
27263 The code looks like this:
27265 0 .align 2
27266 0 func:
27267 0 sub SP, #16 Reserve space for 4 registers.
27268 2 push {R7} Push low registers.
27269 4 add R7, SP, #20 Get the stack pointer before the push.
27270 6 str R7, [SP, #8] Store the stack pointer
27271 (before reserving the space).
27272 8 mov R7, PC Get hold of the start of this code + 12.
27273 10 str R7, [SP, #16] Store it.
27274 12 mov R7, FP Get hold of the current frame pointer.
27275 14 str R7, [SP, #4] Store it.
27276 16 mov R7, LR Get hold of the current return address.
27277 18 str R7, [SP, #12] Store it.
27278 20 add R7, SP, #16 Point at the start of the
27279 backtrace structure.
27280 22 mov FP, R7 Put this value into the frame pointer. */
27282 work_register = thumb_find_work_register (live_regs_mask);
27283 work_reg = gen_rtx_REG (SImode, work_register);
27284 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27286 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27287 stack_pointer_rtx, GEN_INT (-16)));
27288 RTX_FRAME_RELATED_P (insn) = 1;
27290 if (l_mask)
27292 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27293 RTX_FRAME_RELATED_P (insn) = 1;
27295 offset = bit_count (l_mask) * UNITS_PER_WORD;
27298 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27299 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27301 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27302 x = gen_frame_mem (SImode, x);
27303 emit_move_insn (x, work_reg);
27305 /* Make sure that the instruction fetching the PC is in the right place
27306 to calculate "start of backtrace creation code + 12". */
27307 /* ??? The stores using the common WORK_REG ought to be enough to
27308 prevent the scheduler from doing anything weird. Failing that
27309 we could always move all of the following into an UNSPEC_VOLATILE. */
27310 if (l_mask)
27312 x = gen_rtx_REG (SImode, PC_REGNUM);
27313 emit_move_insn (work_reg, x);
27315 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27316 x = gen_frame_mem (SImode, x);
27317 emit_move_insn (x, work_reg);
27319 emit_move_insn (work_reg, arm_hfp_rtx);
27321 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27322 x = gen_frame_mem (SImode, x);
27323 emit_move_insn (x, work_reg);
27325 else
27327 emit_move_insn (work_reg, arm_hfp_rtx);
27329 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27330 x = gen_frame_mem (SImode, x);
27331 emit_move_insn (x, work_reg);
27333 x = gen_rtx_REG (SImode, PC_REGNUM);
27334 emit_move_insn (work_reg, x);
27336 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27337 x = gen_frame_mem (SImode, x);
27338 emit_move_insn (x, work_reg);
27341 x = gen_rtx_REG (SImode, LR_REGNUM);
27342 emit_move_insn (work_reg, x);
27344 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27345 x = gen_frame_mem (SImode, x);
27346 emit_move_insn (x, work_reg);
27348 x = GEN_INT (offset + 12);
27349 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27351 emit_move_insn (arm_hfp_rtx, work_reg);
27353 /* Optimization: If we are not pushing any low registers but we are going
27354 to push some high registers then delay our first push. This will just
27355 be a push of LR and we can combine it with the push of the first high
27356 register. */
27357 else if ((l_mask & 0xff) != 0
27358 || (high_regs_pushed == 0 && l_mask))
27360 unsigned long mask = l_mask;
27361 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27362 insn = thumb1_emit_multi_reg_push (mask, mask);
27363 RTX_FRAME_RELATED_P (insn) = 1;
27366 if (high_regs_pushed)
27368 unsigned pushable_regs;
27369 unsigned next_hi_reg;
27370 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27371 : crtl->args.info.nregs;
27372 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27374 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27375 if (live_regs_mask & (1 << next_hi_reg))
27376 break;
27378 /* Here we need to mask out registers used for passing arguments
27379 even if they can be pushed. This is to avoid using them to stash the high
27380 registers. Such kind of stash may clobber the use of arguments. */
27381 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27383 if (pushable_regs == 0)
27384 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27386 while (high_regs_pushed > 0)
27388 unsigned long real_regs_mask = 0;
27390 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27392 if (pushable_regs & (1 << regno))
27394 emit_move_insn (gen_rtx_REG (SImode, regno),
27395 gen_rtx_REG (SImode, next_hi_reg));
27397 high_regs_pushed --;
27398 real_regs_mask |= (1 << next_hi_reg);
27400 if (high_regs_pushed)
27402 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27403 next_hi_reg --)
27404 if (live_regs_mask & (1 << next_hi_reg))
27405 break;
27407 else
27409 pushable_regs &= ~((1 << regno) - 1);
27410 break;
27415 /* If we had to find a work register and we have not yet
27416 saved the LR then add it to the list of regs to push. */
27417 if (l_mask == (1 << LR_REGNUM))
27419 pushable_regs |= l_mask;
27420 real_regs_mask |= l_mask;
27421 l_mask = 0;
27424 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27425 RTX_FRAME_RELATED_P (insn) = 1;
27429 /* Load the pic register before setting the frame pointer,
27430 so we can use r7 as a temporary work register. */
27431 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27432 arm_load_pic_register (live_regs_mask);
27434 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27435 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27436 stack_pointer_rtx);
27438 if (flag_stack_usage_info)
27439 current_function_static_stack_size
27440 = offsets->outgoing_args - offsets->saved_args;
27442 amount = offsets->outgoing_args - offsets->saved_regs;
27443 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27444 if (amount)
27446 if (amount < 512)
27448 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27449 GEN_INT (- amount)));
27450 RTX_FRAME_RELATED_P (insn) = 1;
27452 else
27454 rtx reg, dwarf;
27456 /* The stack decrement is too big for an immediate value in a single
27457 insn. In theory we could issue multiple subtracts, but after
27458 three of them it becomes more space efficient to place the full
27459 value in the constant pool and load into a register. (Also the
27460 ARM debugger really likes to see only one stack decrement per
27461 function). So instead we look for a scratch register into which
27462 we can load the decrement, and then we subtract this from the
27463 stack pointer. Unfortunately on the thumb the only available
27464 scratch registers are the argument registers, and we cannot use
27465 these as they may hold arguments to the function. Instead we
27466 attempt to locate a call preserved register which is used by this
27467 function. If we can find one, then we know that it will have
27468 been pushed at the start of the prologue and so we can corrupt
27469 it now. */
27470 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27471 if (live_regs_mask & (1 << regno))
27472 break;
27474 gcc_assert(regno <= LAST_LO_REGNUM);
27476 reg = gen_rtx_REG (SImode, regno);
27478 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27480 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27481 stack_pointer_rtx, reg));
27483 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27484 plus_constant (Pmode, stack_pointer_rtx,
27485 -amount));
27486 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27487 RTX_FRAME_RELATED_P (insn) = 1;
27491 if (frame_pointer_needed)
27492 thumb_set_frame_pointer (offsets);
27494 /* If we are profiling, make sure no instructions are scheduled before
27495 the call to mcount. Similarly if the user has requested no
27496 scheduling in the prolog. Similarly if we want non-call exceptions
27497 using the EABI unwinder, to prevent faulting instructions from being
27498 swapped with a stack adjustment. */
27499 if (crtl->profile || !TARGET_SCHED_PROLOG
27500 || (arm_except_unwind_info (&global_options) == UI_TARGET
27501 && cfun->can_throw_non_call_exceptions))
27502 emit_insn (gen_blockage ());
27504 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27505 if (live_regs_mask & 0xff)
27506 cfun->machine->lr_save_eliminated = 0;
27509 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27510 POP instruction can be generated. LR should be replaced by PC. All
27511 the checks required are already done by USE_RETURN_INSN (). Hence,
27512 all we really need to check here is if single register is to be
27513 returned, or multiple register return. */
27514 void
27515 thumb2_expand_return (bool simple_return)
27517 int i, num_regs;
27518 unsigned long saved_regs_mask;
27519 arm_stack_offsets *offsets;
27521 offsets = arm_get_frame_offsets ();
27522 saved_regs_mask = offsets->saved_regs_mask;
27524 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27525 if (saved_regs_mask & (1 << i))
27526 num_regs++;
27528 if (!simple_return && saved_regs_mask)
27530 if (num_regs == 1)
27532 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27533 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27534 rtx addr = gen_rtx_MEM (SImode,
27535 gen_rtx_POST_INC (SImode,
27536 stack_pointer_rtx));
27537 set_mem_alias_set (addr, get_frame_alias_set ());
27538 XVECEXP (par, 0, 0) = ret_rtx;
27539 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27540 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27541 emit_jump_insn (par);
27543 else
27545 saved_regs_mask &= ~ (1 << LR_REGNUM);
27546 saved_regs_mask |= (1 << PC_REGNUM);
27547 arm_emit_multi_reg_pop (saved_regs_mask);
27550 else
27552 emit_jump_insn (simple_return_rtx);
27556 void
27557 thumb1_expand_epilogue (void)
27559 HOST_WIDE_INT amount;
27560 arm_stack_offsets *offsets;
27561 int regno;
27563 /* Naked functions don't have prologues. */
27564 if (IS_NAKED (arm_current_func_type ()))
27565 return;
27567 offsets = arm_get_frame_offsets ();
27568 amount = offsets->outgoing_args - offsets->saved_regs;
27570 if (frame_pointer_needed)
27572 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27573 amount = offsets->locals_base - offsets->saved_regs;
27575 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27577 gcc_assert (amount >= 0);
27578 if (amount)
27580 emit_insn (gen_blockage ());
27582 if (amount < 512)
27583 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27584 GEN_INT (amount)));
27585 else
27587 /* r3 is always free in the epilogue. */
27588 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27590 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27591 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27595 /* Emit a USE (stack_pointer_rtx), so that
27596 the stack adjustment will not be deleted. */
27597 emit_insn (gen_force_register_use (stack_pointer_rtx));
27599 if (crtl->profile || !TARGET_SCHED_PROLOG)
27600 emit_insn (gen_blockage ());
27602 /* Emit a clobber for each insn that will be restored in the epilogue,
27603 so that flow2 will get register lifetimes correct. */
27604 for (regno = 0; regno < 13; regno++)
27605 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27606 emit_clobber (gen_rtx_REG (SImode, regno));
27608 if (! df_regs_ever_live_p (LR_REGNUM))
27609 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27612 /* Epilogue code for APCS frame. */
27613 static void
27614 arm_expand_epilogue_apcs_frame (bool really_return)
27616 unsigned long func_type;
27617 unsigned long saved_regs_mask;
27618 int num_regs = 0;
27619 int i;
27620 int floats_from_frame = 0;
27621 arm_stack_offsets *offsets;
27623 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27624 func_type = arm_current_func_type ();
27626 /* Get frame offsets for ARM. */
27627 offsets = arm_get_frame_offsets ();
27628 saved_regs_mask = offsets->saved_regs_mask;
27630 /* Find the offset of the floating-point save area in the frame. */
27631 floats_from_frame
27632 = (offsets->saved_args
27633 + arm_compute_static_chain_stack_bytes ()
27634 - offsets->frame);
27636 /* Compute how many core registers saved and how far away the floats are. */
27637 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27638 if (saved_regs_mask & (1 << i))
27640 num_regs++;
27641 floats_from_frame += 4;
27644 if (TARGET_HARD_FLOAT && TARGET_VFP)
27646 int start_reg;
27647 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27649 /* The offset is from IP_REGNUM. */
27650 int saved_size = arm_get_vfp_saved_size ();
27651 if (saved_size > 0)
27653 rtx_insn *insn;
27654 floats_from_frame += saved_size;
27655 insn = emit_insn (gen_addsi3 (ip_rtx,
27656 hard_frame_pointer_rtx,
27657 GEN_INT (-floats_from_frame)));
27658 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27659 ip_rtx, hard_frame_pointer_rtx);
27662 /* Generate VFP register multi-pop. */
27663 start_reg = FIRST_VFP_REGNUM;
27665 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27666 /* Look for a case where a reg does not need restoring. */
27667 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27668 && (!df_regs_ever_live_p (i + 1)
27669 || call_used_regs[i + 1]))
27671 if (start_reg != i)
27672 arm_emit_vfp_multi_reg_pop (start_reg,
27673 (i - start_reg) / 2,
27674 gen_rtx_REG (SImode,
27675 IP_REGNUM));
27676 start_reg = i + 2;
27679 /* Restore the remaining regs that we have discovered (or possibly
27680 even all of them, if the conditional in the for loop never
27681 fired). */
27682 if (start_reg != i)
27683 arm_emit_vfp_multi_reg_pop (start_reg,
27684 (i - start_reg) / 2,
27685 gen_rtx_REG (SImode, IP_REGNUM));
27688 if (TARGET_IWMMXT)
27690 /* The frame pointer is guaranteed to be non-double-word aligned, as
27691 it is set to double-word-aligned old_stack_pointer - 4. */
27692 rtx_insn *insn;
27693 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27695 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27696 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27698 rtx addr = gen_frame_mem (V2SImode,
27699 plus_constant (Pmode, hard_frame_pointer_rtx,
27700 - lrm_count * 4));
27701 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27702 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27703 gen_rtx_REG (V2SImode, i),
27704 NULL_RTX);
27705 lrm_count += 2;
27709 /* saved_regs_mask should contain IP which contains old stack pointer
27710 at the time of activation creation. Since SP and IP are adjacent registers,
27711 we can restore the value directly into SP. */
27712 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27713 saved_regs_mask &= ~(1 << IP_REGNUM);
27714 saved_regs_mask |= (1 << SP_REGNUM);
27716 /* There are two registers left in saved_regs_mask - LR and PC. We
27717 only need to restore LR (the return address), but to
27718 save time we can load it directly into PC, unless we need a
27719 special function exit sequence, or we are not really returning. */
27720 if (really_return
27721 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27722 && !crtl->calls_eh_return)
27723 /* Delete LR from the register mask, so that LR on
27724 the stack is loaded into the PC in the register mask. */
27725 saved_regs_mask &= ~(1 << LR_REGNUM);
27726 else
27727 saved_regs_mask &= ~(1 << PC_REGNUM);
27729 num_regs = bit_count (saved_regs_mask);
27730 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27732 rtx_insn *insn;
27733 emit_insn (gen_blockage ());
27734 /* Unwind the stack to just below the saved registers. */
27735 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27736 hard_frame_pointer_rtx,
27737 GEN_INT (- 4 * num_regs)));
27739 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27740 stack_pointer_rtx, hard_frame_pointer_rtx);
27743 arm_emit_multi_reg_pop (saved_regs_mask);
27745 if (IS_INTERRUPT (func_type))
27747 /* Interrupt handlers will have pushed the
27748 IP onto the stack, so restore it now. */
27749 rtx_insn *insn;
27750 rtx addr = gen_rtx_MEM (SImode,
27751 gen_rtx_POST_INC (SImode,
27752 stack_pointer_rtx));
27753 set_mem_alias_set (addr, get_frame_alias_set ());
27754 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27755 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27756 gen_rtx_REG (SImode, IP_REGNUM),
27757 NULL_RTX);
27760 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27761 return;
27763 if (crtl->calls_eh_return)
27764 emit_insn (gen_addsi3 (stack_pointer_rtx,
27765 stack_pointer_rtx,
27766 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27768 if (IS_STACKALIGN (func_type))
27769 /* Restore the original stack pointer. Before prologue, the stack was
27770 realigned and the original stack pointer saved in r0. For details,
27771 see comment in arm_expand_prologue. */
27772 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27774 emit_jump_insn (simple_return_rtx);
27777 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27778 function is not a sibcall. */
27779 void
27780 arm_expand_epilogue (bool really_return)
27782 unsigned long func_type;
27783 unsigned long saved_regs_mask;
27784 int num_regs = 0;
27785 int i;
27786 int amount;
27787 arm_stack_offsets *offsets;
27789 func_type = arm_current_func_type ();
27791 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27792 let output_return_instruction take care of instruction emission if any. */
27793 if (IS_NAKED (func_type)
27794 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27796 if (really_return)
27797 emit_jump_insn (simple_return_rtx);
27798 return;
27801 /* If we are throwing an exception, then we really must be doing a
27802 return, so we can't tail-call. */
27803 gcc_assert (!crtl->calls_eh_return || really_return);
27805 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27807 arm_expand_epilogue_apcs_frame (really_return);
27808 return;
27811 /* Get frame offsets for ARM. */
27812 offsets = arm_get_frame_offsets ();
27813 saved_regs_mask = offsets->saved_regs_mask;
27814 num_regs = bit_count (saved_regs_mask);
27816 if (frame_pointer_needed)
27818 rtx_insn *insn;
27819 /* Restore stack pointer if necessary. */
27820 if (TARGET_ARM)
27822 /* In ARM mode, frame pointer points to first saved register.
27823 Restore stack pointer to last saved register. */
27824 amount = offsets->frame - offsets->saved_regs;
27826 /* Force out any pending memory operations that reference stacked data
27827 before stack de-allocation occurs. */
27828 emit_insn (gen_blockage ());
27829 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27830 hard_frame_pointer_rtx,
27831 GEN_INT (amount)));
27832 arm_add_cfa_adjust_cfa_note (insn, amount,
27833 stack_pointer_rtx,
27834 hard_frame_pointer_rtx);
27836 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27837 deleted. */
27838 emit_insn (gen_force_register_use (stack_pointer_rtx));
27840 else
27842 /* In Thumb-2 mode, the frame pointer points to the last saved
27843 register. */
27844 amount = offsets->locals_base - offsets->saved_regs;
27845 if (amount)
27847 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27848 hard_frame_pointer_rtx,
27849 GEN_INT (amount)));
27850 arm_add_cfa_adjust_cfa_note (insn, amount,
27851 hard_frame_pointer_rtx,
27852 hard_frame_pointer_rtx);
27855 /* Force out any pending memory operations that reference stacked data
27856 before stack de-allocation occurs. */
27857 emit_insn (gen_blockage ());
27858 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27859 hard_frame_pointer_rtx));
27860 arm_add_cfa_adjust_cfa_note (insn, 0,
27861 stack_pointer_rtx,
27862 hard_frame_pointer_rtx);
27863 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27864 deleted. */
27865 emit_insn (gen_force_register_use (stack_pointer_rtx));
27868 else
27870 /* Pop off outgoing args and local frame to adjust stack pointer to
27871 last saved register. */
27872 amount = offsets->outgoing_args - offsets->saved_regs;
27873 if (amount)
27875 rtx_insn *tmp;
27876 /* Force out any pending memory operations that reference stacked data
27877 before stack de-allocation occurs. */
27878 emit_insn (gen_blockage ());
27879 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27880 stack_pointer_rtx,
27881 GEN_INT (amount)));
27882 arm_add_cfa_adjust_cfa_note (tmp, amount,
27883 stack_pointer_rtx, stack_pointer_rtx);
27884 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27885 not deleted. */
27886 emit_insn (gen_force_register_use (stack_pointer_rtx));
27890 if (TARGET_HARD_FLOAT && TARGET_VFP)
27892 /* Generate VFP register multi-pop. */
27893 int end_reg = LAST_VFP_REGNUM + 1;
27895 /* Scan the registers in reverse order. We need to match
27896 any groupings made in the prologue and generate matching
27897 vldm operations. The need to match groups is because,
27898 unlike pop, vldm can only do consecutive regs. */
27899 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27900 /* Look for a case where a reg does not need restoring. */
27901 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27902 && (!df_regs_ever_live_p (i + 1)
27903 || call_used_regs[i + 1]))
27905 /* Restore the regs discovered so far (from reg+2 to
27906 end_reg). */
27907 if (end_reg > i + 2)
27908 arm_emit_vfp_multi_reg_pop (i + 2,
27909 (end_reg - (i + 2)) / 2,
27910 stack_pointer_rtx);
27911 end_reg = i;
27914 /* Restore the remaining regs that we have discovered (or possibly
27915 even all of them, if the conditional in the for loop never
27916 fired). */
27917 if (end_reg > i + 2)
27918 arm_emit_vfp_multi_reg_pop (i + 2,
27919 (end_reg - (i + 2)) / 2,
27920 stack_pointer_rtx);
27923 if (TARGET_IWMMXT)
27924 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27925 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27927 rtx_insn *insn;
27928 rtx addr = gen_rtx_MEM (V2SImode,
27929 gen_rtx_POST_INC (SImode,
27930 stack_pointer_rtx));
27931 set_mem_alias_set (addr, get_frame_alias_set ());
27932 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27933 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27934 gen_rtx_REG (V2SImode, i),
27935 NULL_RTX);
27936 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27937 stack_pointer_rtx, stack_pointer_rtx);
27940 if (saved_regs_mask)
27942 rtx insn;
27943 bool return_in_pc = false;
27945 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27946 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27947 && !IS_STACKALIGN (func_type)
27948 && really_return
27949 && crtl->args.pretend_args_size == 0
27950 && saved_regs_mask & (1 << LR_REGNUM)
27951 && !crtl->calls_eh_return)
27953 saved_regs_mask &= ~(1 << LR_REGNUM);
27954 saved_regs_mask |= (1 << PC_REGNUM);
27955 return_in_pc = true;
27958 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27960 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27961 if (saved_regs_mask & (1 << i))
27963 rtx addr = gen_rtx_MEM (SImode,
27964 gen_rtx_POST_INC (SImode,
27965 stack_pointer_rtx));
27966 set_mem_alias_set (addr, get_frame_alias_set ());
27968 if (i == PC_REGNUM)
27970 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27971 XVECEXP (insn, 0, 0) = ret_rtx;
27972 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27973 gen_rtx_REG (SImode, i),
27974 addr);
27975 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27976 insn = emit_jump_insn (insn);
27978 else
27980 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27981 addr));
27982 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27983 gen_rtx_REG (SImode, i),
27984 NULL_RTX);
27985 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27986 stack_pointer_rtx,
27987 stack_pointer_rtx);
27991 else
27993 if (TARGET_LDRD
27994 && current_tune->prefer_ldrd_strd
27995 && !optimize_function_for_size_p (cfun))
27997 if (TARGET_THUMB2)
27998 thumb2_emit_ldrd_pop (saved_regs_mask);
27999 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28000 arm_emit_ldrd_pop (saved_regs_mask);
28001 else
28002 arm_emit_multi_reg_pop (saved_regs_mask);
28004 else
28005 arm_emit_multi_reg_pop (saved_regs_mask);
28008 if (return_in_pc == true)
28009 return;
28012 if (crtl->args.pretend_args_size)
28014 int i, j;
28015 rtx dwarf = NULL_RTX;
28016 rtx_insn *tmp =
28017 emit_insn (gen_addsi3 (stack_pointer_rtx,
28018 stack_pointer_rtx,
28019 GEN_INT (crtl->args.pretend_args_size)));
28021 RTX_FRAME_RELATED_P (tmp) = 1;
28023 if (cfun->machine->uses_anonymous_args)
28025 /* Restore pretend args. Refer arm_expand_prologue on how to save
28026 pretend_args in stack. */
28027 int num_regs = crtl->args.pretend_args_size / 4;
28028 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28029 for (j = 0, i = 0; j < num_regs; i++)
28030 if (saved_regs_mask & (1 << i))
28032 rtx reg = gen_rtx_REG (SImode, i);
28033 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28034 j++;
28036 REG_NOTES (tmp) = dwarf;
28038 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
28039 stack_pointer_rtx, stack_pointer_rtx);
28042 if (!really_return)
28043 return;
28045 if (crtl->calls_eh_return)
28046 emit_insn (gen_addsi3 (stack_pointer_rtx,
28047 stack_pointer_rtx,
28048 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28050 if (IS_STACKALIGN (func_type))
28051 /* Restore the original stack pointer. Before prologue, the stack was
28052 realigned and the original stack pointer saved in r0. For details,
28053 see comment in arm_expand_prologue. */
28054 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
28056 emit_jump_insn (simple_return_rtx);
28059 /* Implementation of insn prologue_thumb1_interwork. This is the first
28060 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28062 const char *
28063 thumb1_output_interwork (void)
28065 const char * name;
28066 FILE *f = asm_out_file;
28068 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28069 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28070 == SYMBOL_REF);
28071 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28073 /* Generate code sequence to switch us into Thumb mode. */
28074 /* The .code 32 directive has already been emitted by
28075 ASM_DECLARE_FUNCTION_NAME. */
28076 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28077 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28079 /* Generate a label, so that the debugger will notice the
28080 change in instruction sets. This label is also used by
28081 the assembler to bypass the ARM code when this function
28082 is called from a Thumb encoded function elsewhere in the
28083 same file. Hence the definition of STUB_NAME here must
28084 agree with the definition in gas/config/tc-arm.c. */
28086 #define STUB_NAME ".real_start_of"
28088 fprintf (f, "\t.code\t16\n");
28089 #ifdef ARM_PE
28090 if (arm_dllexport_name_p (name))
28091 name = arm_strip_name_encoding (name);
28092 #endif
28093 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28094 fprintf (f, "\t.thumb_func\n");
28095 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28097 return "";
28100 /* Handle the case of a double word load into a low register from
28101 a computed memory address. The computed address may involve a
28102 register which is overwritten by the load. */
28103 const char *
28104 thumb_load_double_from_address (rtx *operands)
28106 rtx addr;
28107 rtx base;
28108 rtx offset;
28109 rtx arg1;
28110 rtx arg2;
28112 gcc_assert (REG_P (operands[0]));
28113 gcc_assert (MEM_P (operands[1]));
28115 /* Get the memory address. */
28116 addr = XEXP (operands[1], 0);
28118 /* Work out how the memory address is computed. */
28119 switch (GET_CODE (addr))
28121 case REG:
28122 operands[2] = adjust_address (operands[1], SImode, 4);
28124 if (REGNO (operands[0]) == REGNO (addr))
28126 output_asm_insn ("ldr\t%H0, %2", operands);
28127 output_asm_insn ("ldr\t%0, %1", operands);
28129 else
28131 output_asm_insn ("ldr\t%0, %1", operands);
28132 output_asm_insn ("ldr\t%H0, %2", operands);
28134 break;
28136 case CONST:
28137 /* Compute <address> + 4 for the high order load. */
28138 operands[2] = adjust_address (operands[1], SImode, 4);
28140 output_asm_insn ("ldr\t%0, %1", operands);
28141 output_asm_insn ("ldr\t%H0, %2", operands);
28142 break;
28144 case PLUS:
28145 arg1 = XEXP (addr, 0);
28146 arg2 = XEXP (addr, 1);
28148 if (CONSTANT_P (arg1))
28149 base = arg2, offset = arg1;
28150 else
28151 base = arg1, offset = arg2;
28153 gcc_assert (REG_P (base));
28155 /* Catch the case of <address> = <reg> + <reg> */
28156 if (REG_P (offset))
28158 int reg_offset = REGNO (offset);
28159 int reg_base = REGNO (base);
28160 int reg_dest = REGNO (operands[0]);
28162 /* Add the base and offset registers together into the
28163 higher destination register. */
28164 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28165 reg_dest + 1, reg_base, reg_offset);
28167 /* Load the lower destination register from the address in
28168 the higher destination register. */
28169 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28170 reg_dest, reg_dest + 1);
28172 /* Load the higher destination register from its own address
28173 plus 4. */
28174 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28175 reg_dest + 1, reg_dest + 1);
28177 else
28179 /* Compute <address> + 4 for the high order load. */
28180 operands[2] = adjust_address (operands[1], SImode, 4);
28182 /* If the computed address is held in the low order register
28183 then load the high order register first, otherwise always
28184 load the low order register first. */
28185 if (REGNO (operands[0]) == REGNO (base))
28187 output_asm_insn ("ldr\t%H0, %2", operands);
28188 output_asm_insn ("ldr\t%0, %1", operands);
28190 else
28192 output_asm_insn ("ldr\t%0, %1", operands);
28193 output_asm_insn ("ldr\t%H0, %2", operands);
28196 break;
28198 case LABEL_REF:
28199 /* With no registers to worry about we can just load the value
28200 directly. */
28201 operands[2] = adjust_address (operands[1], SImode, 4);
28203 output_asm_insn ("ldr\t%H0, %2", operands);
28204 output_asm_insn ("ldr\t%0, %1", operands);
28205 break;
28207 default:
28208 gcc_unreachable ();
28211 return "";
28214 const char *
28215 thumb_output_move_mem_multiple (int n, rtx *operands)
28217 rtx tmp;
28219 switch (n)
28221 case 2:
28222 if (REGNO (operands[4]) > REGNO (operands[5]))
28224 tmp = operands[4];
28225 operands[4] = operands[5];
28226 operands[5] = tmp;
28228 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28229 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28230 break;
28232 case 3:
28233 if (REGNO (operands[4]) > REGNO (operands[5]))
28235 tmp = operands[4];
28236 operands[4] = operands[5];
28237 operands[5] = tmp;
28239 if (REGNO (operands[5]) > REGNO (operands[6]))
28241 tmp = operands[5];
28242 operands[5] = operands[6];
28243 operands[6] = tmp;
28245 if (REGNO (operands[4]) > REGNO (operands[5]))
28247 tmp = operands[4];
28248 operands[4] = operands[5];
28249 operands[5] = tmp;
28252 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28253 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28254 break;
28256 default:
28257 gcc_unreachable ();
28260 return "";
28263 /* Output a call-via instruction for thumb state. */
28264 const char *
28265 thumb_call_via_reg (rtx reg)
28267 int regno = REGNO (reg);
28268 rtx *labelp;
28270 gcc_assert (regno < LR_REGNUM);
28272 /* If we are in the normal text section we can use a single instance
28273 per compilation unit. If we are doing function sections, then we need
28274 an entry per section, since we can't rely on reachability. */
28275 if (in_section == text_section)
28277 thumb_call_reg_needed = 1;
28279 if (thumb_call_via_label[regno] == NULL)
28280 thumb_call_via_label[regno] = gen_label_rtx ();
28281 labelp = thumb_call_via_label + regno;
28283 else
28285 if (cfun->machine->call_via[regno] == NULL)
28286 cfun->machine->call_via[regno] = gen_label_rtx ();
28287 labelp = cfun->machine->call_via + regno;
28290 output_asm_insn ("bl\t%a0", labelp);
28291 return "";
28294 /* Routines for generating rtl. */
28295 void
28296 thumb_expand_movmemqi (rtx *operands)
28298 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28299 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28300 HOST_WIDE_INT len = INTVAL (operands[2]);
28301 HOST_WIDE_INT offset = 0;
28303 while (len >= 12)
28305 emit_insn (gen_movmem12b (out, in, out, in));
28306 len -= 12;
28309 if (len >= 8)
28311 emit_insn (gen_movmem8b (out, in, out, in));
28312 len -= 8;
28315 if (len >= 4)
28317 rtx reg = gen_reg_rtx (SImode);
28318 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28319 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28320 len -= 4;
28321 offset += 4;
28324 if (len >= 2)
28326 rtx reg = gen_reg_rtx (HImode);
28327 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28328 plus_constant (Pmode, in,
28329 offset))));
28330 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28331 offset)),
28332 reg));
28333 len -= 2;
28334 offset += 2;
28337 if (len)
28339 rtx reg = gen_reg_rtx (QImode);
28340 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28341 plus_constant (Pmode, in,
28342 offset))));
28343 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28344 offset)),
28345 reg));
28349 void
28350 thumb_reload_out_hi (rtx *operands)
28352 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28355 /* Handle reading a half-word from memory during reload. */
28356 void
28357 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28359 gcc_unreachable ();
28362 /* Return the length of a function name prefix
28363 that starts with the character 'c'. */
28364 static int
28365 arm_get_strip_length (int c)
28367 switch (c)
28369 ARM_NAME_ENCODING_LENGTHS
28370 default: return 0;
28374 /* Return a pointer to a function's name with any
28375 and all prefix encodings stripped from it. */
28376 const char *
28377 arm_strip_name_encoding (const char *name)
28379 int skip;
28381 while ((skip = arm_get_strip_length (* name)))
28382 name += skip;
28384 return name;
28387 /* If there is a '*' anywhere in the name's prefix, then
28388 emit the stripped name verbatim, otherwise prepend an
28389 underscore if leading underscores are being used. */
28390 void
28391 arm_asm_output_labelref (FILE *stream, const char *name)
28393 int skip;
28394 int verbatim = 0;
28396 while ((skip = arm_get_strip_length (* name)))
28398 verbatim |= (*name == '*');
28399 name += skip;
28402 if (verbatim)
28403 fputs (name, stream);
28404 else
28405 asm_fprintf (stream, "%U%s", name);
28408 /* This function is used to emit an EABI tag and its associated value.
28409 We emit the numerical value of the tag in case the assembler does not
28410 support textual tags. (Eg gas prior to 2.20). If requested we include
28411 the tag name in a comment so that anyone reading the assembler output
28412 will know which tag is being set.
28414 This function is not static because arm-c.c needs it too. */
28416 void
28417 arm_emit_eabi_attribute (const char *name, int num, int val)
28419 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28420 if (flag_verbose_asm || flag_debug_asm)
28421 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28422 asm_fprintf (asm_out_file, "\n");
28425 static void
28426 arm_file_start (void)
28428 int val;
28430 if (TARGET_UNIFIED_ASM)
28431 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28433 if (TARGET_BPABI)
28435 const char *fpu_name;
28436 if (arm_selected_arch)
28438 /* armv7ve doesn't support any extensions. */
28439 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28441 /* Keep backward compatability for assemblers
28442 which don't support armv7ve. */
28443 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28444 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28445 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28446 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28447 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28449 else
28451 const char* pos = strchr (arm_selected_arch->name, '+');
28452 if (pos)
28454 char buf[15];
28455 gcc_assert (strlen (arm_selected_arch->name)
28456 <= sizeof (buf) / sizeof (*pos));
28457 strncpy (buf, arm_selected_arch->name,
28458 (pos - arm_selected_arch->name) * sizeof (*pos));
28459 buf[pos - arm_selected_arch->name] = '\0';
28460 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28461 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28463 else
28464 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28467 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28468 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28469 else
28471 const char* truncated_name
28472 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28473 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28476 if (TARGET_SOFT_FLOAT)
28478 fpu_name = "softvfp";
28480 else
28482 fpu_name = arm_fpu_desc->name;
28483 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28485 if (TARGET_HARD_FLOAT)
28486 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28487 if (TARGET_HARD_FLOAT_ABI)
28488 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28491 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28493 /* Some of these attributes only apply when the corresponding features
28494 are used. However we don't have any easy way of figuring this out.
28495 Conservatively record the setting that would have been used. */
28497 if (flag_rounding_math)
28498 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28500 if (!flag_unsafe_math_optimizations)
28502 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28503 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28505 if (flag_signaling_nans)
28506 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28508 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28509 flag_finite_math_only ? 1 : 3);
28511 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28512 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28513 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28514 flag_short_enums ? 1 : 2);
28516 /* Tag_ABI_optimization_goals. */
28517 if (optimize_size)
28518 val = 4;
28519 else if (optimize >= 2)
28520 val = 2;
28521 else if (optimize)
28522 val = 1;
28523 else
28524 val = 6;
28525 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28527 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28528 unaligned_access);
28530 if (arm_fp16_format)
28531 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28532 (int) arm_fp16_format);
28534 if (arm_lang_output_object_attributes_hook)
28535 arm_lang_output_object_attributes_hook();
28538 default_file_start ();
28541 static void
28542 arm_file_end (void)
28544 int regno;
28546 if (NEED_INDICATE_EXEC_STACK)
28547 /* Add .note.GNU-stack. */
28548 file_end_indicate_exec_stack ();
28550 if (! thumb_call_reg_needed)
28551 return;
28553 switch_to_section (text_section);
28554 asm_fprintf (asm_out_file, "\t.code 16\n");
28555 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28557 for (regno = 0; regno < LR_REGNUM; regno++)
28559 rtx label = thumb_call_via_label[regno];
28561 if (label != 0)
28563 targetm.asm_out.internal_label (asm_out_file, "L",
28564 CODE_LABEL_NUMBER (label));
28565 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28570 #ifndef ARM_PE
28571 /* Symbols in the text segment can be accessed without indirecting via the
28572 constant pool; it may take an extra binary operation, but this is still
28573 faster than indirecting via memory. Don't do this when not optimizing,
28574 since we won't be calculating al of the offsets necessary to do this
28575 simplification. */
28577 static void
28578 arm_encode_section_info (tree decl, rtx rtl, int first)
28580 if (optimize > 0 && TREE_CONSTANT (decl))
28581 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28583 default_encode_section_info (decl, rtl, first);
28585 #endif /* !ARM_PE */
28587 static void
28588 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28590 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28591 && !strcmp (prefix, "L"))
28593 arm_ccfsm_state = 0;
28594 arm_target_insn = NULL;
28596 default_internal_label (stream, prefix, labelno);
28599 /* Output code to add DELTA to the first argument, and then jump
28600 to FUNCTION. Used for C++ multiple inheritance. */
28601 static void
28602 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28603 HOST_WIDE_INT delta,
28604 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28605 tree function)
28607 static int thunk_label = 0;
28608 char label[256];
28609 char labelpc[256];
28610 int mi_delta = delta;
28611 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28612 int shift = 0;
28613 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28614 ? 1 : 0);
28615 if (mi_delta < 0)
28616 mi_delta = - mi_delta;
28618 final_start_function (emit_barrier (), file, 1);
28620 if (TARGET_THUMB1)
28622 int labelno = thunk_label++;
28623 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28624 /* Thunks are entered in arm mode when avaiable. */
28625 if (TARGET_THUMB1_ONLY)
28627 /* push r3 so we can use it as a temporary. */
28628 /* TODO: Omit this save if r3 is not used. */
28629 fputs ("\tpush {r3}\n", file);
28630 fputs ("\tldr\tr3, ", file);
28632 else
28634 fputs ("\tldr\tr12, ", file);
28636 assemble_name (file, label);
28637 fputc ('\n', file);
28638 if (flag_pic)
28640 /* If we are generating PIC, the ldr instruction below loads
28641 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28642 the address of the add + 8, so we have:
28644 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28645 = target + 1.
28647 Note that we have "+ 1" because some versions of GNU ld
28648 don't set the low bit of the result for R_ARM_REL32
28649 relocations against thumb function symbols.
28650 On ARMv6M this is +4, not +8. */
28651 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28652 assemble_name (file, labelpc);
28653 fputs (":\n", file);
28654 if (TARGET_THUMB1_ONLY)
28656 /* This is 2 insns after the start of the thunk, so we know it
28657 is 4-byte aligned. */
28658 fputs ("\tadd\tr3, pc, r3\n", file);
28659 fputs ("\tmov r12, r3\n", file);
28661 else
28662 fputs ("\tadd\tr12, pc, r12\n", file);
28664 else if (TARGET_THUMB1_ONLY)
28665 fputs ("\tmov r12, r3\n", file);
28667 if (TARGET_THUMB1_ONLY)
28669 if (mi_delta > 255)
28671 fputs ("\tldr\tr3, ", file);
28672 assemble_name (file, label);
28673 fputs ("+4\n", file);
28674 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28675 mi_op, this_regno, this_regno);
28677 else if (mi_delta != 0)
28679 /* Thumb1 unified syntax requires s suffix in instruction name when
28680 one of the operands is immediate. */
28681 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28682 mi_op, this_regno, this_regno,
28683 mi_delta);
28686 else
28688 /* TODO: Use movw/movt for large constants when available. */
28689 while (mi_delta != 0)
28691 if ((mi_delta & (3 << shift)) == 0)
28692 shift += 2;
28693 else
28695 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28696 mi_op, this_regno, this_regno,
28697 mi_delta & (0xff << shift));
28698 mi_delta &= ~(0xff << shift);
28699 shift += 8;
28703 if (TARGET_THUMB1)
28705 if (TARGET_THUMB1_ONLY)
28706 fputs ("\tpop\t{r3}\n", file);
28708 fprintf (file, "\tbx\tr12\n");
28709 ASM_OUTPUT_ALIGN (file, 2);
28710 assemble_name (file, label);
28711 fputs (":\n", file);
28712 if (flag_pic)
28714 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28715 rtx tem = XEXP (DECL_RTL (function), 0);
28716 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28717 pipeline offset is four rather than eight. Adjust the offset
28718 accordingly. */
28719 tem = plus_constant (GET_MODE (tem), tem,
28720 TARGET_THUMB1_ONLY ? -3 : -7);
28721 tem = gen_rtx_MINUS (GET_MODE (tem),
28722 tem,
28723 gen_rtx_SYMBOL_REF (Pmode,
28724 ggc_strdup (labelpc)));
28725 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28727 else
28728 /* Output ".word .LTHUNKn". */
28729 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28731 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28732 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28734 else
28736 fputs ("\tb\t", file);
28737 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28738 if (NEED_PLT_RELOC)
28739 fputs ("(PLT)", file);
28740 fputc ('\n', file);
28743 final_end_function ();
28747 arm_emit_vector_const (FILE *file, rtx x)
28749 int i;
28750 const char * pattern;
28752 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28754 switch (GET_MODE (x))
28756 case V2SImode: pattern = "%08x"; break;
28757 case V4HImode: pattern = "%04x"; break;
28758 case V8QImode: pattern = "%02x"; break;
28759 default: gcc_unreachable ();
28762 fprintf (file, "0x");
28763 for (i = CONST_VECTOR_NUNITS (x); i--;)
28765 rtx element;
28767 element = CONST_VECTOR_ELT (x, i);
28768 fprintf (file, pattern, INTVAL (element));
28771 return 1;
28774 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28775 HFmode constant pool entries are actually loaded with ldr. */
28776 void
28777 arm_emit_fp16_const (rtx c)
28779 REAL_VALUE_TYPE r;
28780 long bits;
28782 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28783 bits = real_to_target (NULL, &r, HFmode);
28784 if (WORDS_BIG_ENDIAN)
28785 assemble_zeros (2);
28786 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28787 if (!WORDS_BIG_ENDIAN)
28788 assemble_zeros (2);
28791 const char *
28792 arm_output_load_gr (rtx *operands)
28794 rtx reg;
28795 rtx offset;
28796 rtx wcgr;
28797 rtx sum;
28799 if (!MEM_P (operands [1])
28800 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28801 || !REG_P (reg = XEXP (sum, 0))
28802 || !CONST_INT_P (offset = XEXP (sum, 1))
28803 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28804 return "wldrw%?\t%0, %1";
28806 /* Fix up an out-of-range load of a GR register. */
28807 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28808 wcgr = operands[0];
28809 operands[0] = reg;
28810 output_asm_insn ("ldr%?\t%0, %1", operands);
28812 operands[0] = wcgr;
28813 operands[1] = reg;
28814 output_asm_insn ("tmcr%?\t%0, %1", operands);
28815 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28817 return "";
28820 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28822 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28823 named arg and all anonymous args onto the stack.
28824 XXX I know the prologue shouldn't be pushing registers, but it is faster
28825 that way. */
28827 static void
28828 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28829 machine_mode mode,
28830 tree type,
28831 int *pretend_size,
28832 int second_time ATTRIBUTE_UNUSED)
28834 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28835 int nregs;
28837 cfun->machine->uses_anonymous_args = 1;
28838 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28840 nregs = pcum->aapcs_ncrn;
28841 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28842 nregs++;
28844 else
28845 nregs = pcum->nregs;
28847 if (nregs < NUM_ARG_REGS)
28848 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28851 /* We can't rely on the caller doing the proper promotion when
28852 using APCS or ATPCS. */
28854 static bool
28855 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28857 return !TARGET_AAPCS_BASED;
28860 static machine_mode
28861 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28862 machine_mode mode,
28863 int *punsignedp ATTRIBUTE_UNUSED,
28864 const_tree fntype ATTRIBUTE_UNUSED,
28865 int for_return ATTRIBUTE_UNUSED)
28867 if (GET_MODE_CLASS (mode) == MODE_INT
28868 && GET_MODE_SIZE (mode) < 4)
28869 return SImode;
28871 return mode;
28874 /* AAPCS based ABIs use short enums by default. */
28876 static bool
28877 arm_default_short_enums (void)
28879 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28883 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28885 static bool
28886 arm_align_anon_bitfield (void)
28888 return TARGET_AAPCS_BASED;
28892 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28894 static tree
28895 arm_cxx_guard_type (void)
28897 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28901 /* The EABI says test the least significant bit of a guard variable. */
28903 static bool
28904 arm_cxx_guard_mask_bit (void)
28906 return TARGET_AAPCS_BASED;
28910 /* The EABI specifies that all array cookies are 8 bytes long. */
28912 static tree
28913 arm_get_cookie_size (tree type)
28915 tree size;
28917 if (!TARGET_AAPCS_BASED)
28918 return default_cxx_get_cookie_size (type);
28920 size = build_int_cst (sizetype, 8);
28921 return size;
28925 /* The EABI says that array cookies should also contain the element size. */
28927 static bool
28928 arm_cookie_has_size (void)
28930 return TARGET_AAPCS_BASED;
28934 /* The EABI says constructors and destructors should return a pointer to
28935 the object constructed/destroyed. */
28937 static bool
28938 arm_cxx_cdtor_returns_this (void)
28940 return TARGET_AAPCS_BASED;
28943 /* The EABI says that an inline function may never be the key
28944 method. */
28946 static bool
28947 arm_cxx_key_method_may_be_inline (void)
28949 return !TARGET_AAPCS_BASED;
28952 static void
28953 arm_cxx_determine_class_data_visibility (tree decl)
28955 if (!TARGET_AAPCS_BASED
28956 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28957 return;
28959 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28960 is exported. However, on systems without dynamic vague linkage,
28961 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28962 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28963 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28964 else
28965 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28966 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28969 static bool
28970 arm_cxx_class_data_always_comdat (void)
28972 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28973 vague linkage if the class has no key function. */
28974 return !TARGET_AAPCS_BASED;
28978 /* The EABI says __aeabi_atexit should be used to register static
28979 destructors. */
28981 static bool
28982 arm_cxx_use_aeabi_atexit (void)
28984 return TARGET_AAPCS_BASED;
28988 void
28989 arm_set_return_address (rtx source, rtx scratch)
28991 arm_stack_offsets *offsets;
28992 HOST_WIDE_INT delta;
28993 rtx addr;
28994 unsigned long saved_regs;
28996 offsets = arm_get_frame_offsets ();
28997 saved_regs = offsets->saved_regs_mask;
28999 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29000 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29001 else
29003 if (frame_pointer_needed)
29004 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29005 else
29007 /* LR will be the first saved register. */
29008 delta = offsets->outgoing_args - (offsets->frame + 4);
29011 if (delta >= 4096)
29013 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29014 GEN_INT (delta & ~4095)));
29015 addr = scratch;
29016 delta &= 4095;
29018 else
29019 addr = stack_pointer_rtx;
29021 addr = plus_constant (Pmode, addr, delta);
29023 /* The store needs to be marked as frame related in order to prevent
29024 DSE from deleting it as dead if it is based on fp. */
29025 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29026 RTX_FRAME_RELATED_P (insn) = 1;
29027 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29032 void
29033 thumb_set_return_address (rtx source, rtx scratch)
29035 arm_stack_offsets *offsets;
29036 HOST_WIDE_INT delta;
29037 HOST_WIDE_INT limit;
29038 int reg;
29039 rtx addr;
29040 unsigned long mask;
29042 emit_use (source);
29044 offsets = arm_get_frame_offsets ();
29045 mask = offsets->saved_regs_mask;
29046 if (mask & (1 << LR_REGNUM))
29048 limit = 1024;
29049 /* Find the saved regs. */
29050 if (frame_pointer_needed)
29052 delta = offsets->soft_frame - offsets->saved_args;
29053 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29054 if (TARGET_THUMB1)
29055 limit = 128;
29057 else
29059 delta = offsets->outgoing_args - offsets->saved_args;
29060 reg = SP_REGNUM;
29062 /* Allow for the stack frame. */
29063 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29064 delta -= 16;
29065 /* The link register is always the first saved register. */
29066 delta -= 4;
29068 /* Construct the address. */
29069 addr = gen_rtx_REG (SImode, reg);
29070 if (delta > limit)
29072 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29073 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29074 addr = scratch;
29076 else
29077 addr = plus_constant (Pmode, addr, delta);
29079 /* The store needs to be marked as frame related in order to prevent
29080 DSE from deleting it as dead if it is based on fp. */
29081 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29082 RTX_FRAME_RELATED_P (insn) = 1;
29083 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29085 else
29086 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29089 /* Implements target hook vector_mode_supported_p. */
29090 bool
29091 arm_vector_mode_supported_p (machine_mode mode)
29093 /* Neon also supports V2SImode, etc. listed in the clause below. */
29094 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29095 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29096 return true;
29098 if ((TARGET_NEON || TARGET_IWMMXT)
29099 && ((mode == V2SImode)
29100 || (mode == V4HImode)
29101 || (mode == V8QImode)))
29102 return true;
29104 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29105 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29106 || mode == V2HAmode))
29107 return true;
29109 return false;
29112 /* Implements target hook array_mode_supported_p. */
29114 static bool
29115 arm_array_mode_supported_p (machine_mode mode,
29116 unsigned HOST_WIDE_INT nelems)
29118 if (TARGET_NEON
29119 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29120 && (nelems >= 2 && nelems <= 4))
29121 return true;
29123 return false;
29126 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29127 registers when autovectorizing for Neon, at least until multiple vector
29128 widths are supported properly by the middle-end. */
29130 static machine_mode
29131 arm_preferred_simd_mode (machine_mode mode)
29133 if (TARGET_NEON)
29134 switch (mode)
29136 case SFmode:
29137 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29138 case SImode:
29139 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29140 case HImode:
29141 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29142 case QImode:
29143 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29144 case DImode:
29145 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29146 return V2DImode;
29147 break;
29149 default:;
29152 if (TARGET_REALLY_IWMMXT)
29153 switch (mode)
29155 case SImode:
29156 return V2SImode;
29157 case HImode:
29158 return V4HImode;
29159 case QImode:
29160 return V8QImode;
29162 default:;
29165 return word_mode;
29168 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29170 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29171 using r0-r4 for function arguments, r7 for the stack frame and don't have
29172 enough left over to do doubleword arithmetic. For Thumb-2 all the
29173 potentially problematic instructions accept high registers so this is not
29174 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29175 that require many low registers. */
29176 static bool
29177 arm_class_likely_spilled_p (reg_class_t rclass)
29179 if ((TARGET_THUMB1 && rclass == LO_REGS)
29180 || rclass == CC_REG)
29181 return true;
29183 return false;
29186 /* Implements target hook small_register_classes_for_mode_p. */
29187 bool
29188 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29190 return TARGET_THUMB1;
29193 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29194 ARM insns and therefore guarantee that the shift count is modulo 256.
29195 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29196 guarantee no particular behavior for out-of-range counts. */
29198 static unsigned HOST_WIDE_INT
29199 arm_shift_truncation_mask (machine_mode mode)
29201 return mode == SImode ? 255 : 0;
29205 /* Map internal gcc register numbers to DWARF2 register numbers. */
29207 unsigned int
29208 arm_dbx_register_number (unsigned int regno)
29210 if (regno < 16)
29211 return regno;
29213 if (IS_VFP_REGNUM (regno))
29215 /* See comment in arm_dwarf_register_span. */
29216 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29217 return 64 + regno - FIRST_VFP_REGNUM;
29218 else
29219 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29222 if (IS_IWMMXT_GR_REGNUM (regno))
29223 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29225 if (IS_IWMMXT_REGNUM (regno))
29226 return 112 + regno - FIRST_IWMMXT_REGNUM;
29228 gcc_unreachable ();
29231 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29232 GCC models tham as 64 32-bit registers, so we need to describe this to
29233 the DWARF generation code. Other registers can use the default. */
29234 static rtx
29235 arm_dwarf_register_span (rtx rtl)
29237 machine_mode mode;
29238 unsigned regno;
29239 rtx parts[16];
29240 int nregs;
29241 int i;
29243 regno = REGNO (rtl);
29244 if (!IS_VFP_REGNUM (regno))
29245 return NULL_RTX;
29247 /* XXX FIXME: The EABI defines two VFP register ranges:
29248 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29249 256-287: D0-D31
29250 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29251 corresponding D register. Until GDB supports this, we shall use the
29252 legacy encodings. We also use these encodings for D0-D15 for
29253 compatibility with older debuggers. */
29254 mode = GET_MODE (rtl);
29255 if (GET_MODE_SIZE (mode) < 8)
29256 return NULL_RTX;
29258 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29260 nregs = GET_MODE_SIZE (mode) / 4;
29261 for (i = 0; i < nregs; i += 2)
29262 if (TARGET_BIG_END)
29264 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29265 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29267 else
29269 parts[i] = gen_rtx_REG (SImode, regno + i);
29270 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29273 else
29275 nregs = GET_MODE_SIZE (mode) / 8;
29276 for (i = 0; i < nregs; i++)
29277 parts[i] = gen_rtx_REG (DImode, regno + i);
29280 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29283 #if ARM_UNWIND_INFO
29284 /* Emit unwind directives for a store-multiple instruction or stack pointer
29285 push during alignment.
29286 These should only ever be generated by the function prologue code, so
29287 expect them to have a particular form.
29288 The store-multiple instruction sometimes pushes pc as the last register,
29289 although it should not be tracked into unwind information, or for -Os
29290 sometimes pushes some dummy registers before first register that needs
29291 to be tracked in unwind information; such dummy registers are there just
29292 to avoid separate stack adjustment, and will not be restored in the
29293 epilogue. */
29295 static void
29296 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29298 int i;
29299 HOST_WIDE_INT offset;
29300 HOST_WIDE_INT nregs;
29301 int reg_size;
29302 unsigned reg;
29303 unsigned lastreg;
29304 unsigned padfirst = 0, padlast = 0;
29305 rtx e;
29307 e = XVECEXP (p, 0, 0);
29308 gcc_assert (GET_CODE (e) == SET);
29310 /* First insn will adjust the stack pointer. */
29311 gcc_assert (GET_CODE (e) == SET
29312 && REG_P (SET_DEST (e))
29313 && REGNO (SET_DEST (e)) == SP_REGNUM
29314 && GET_CODE (SET_SRC (e)) == PLUS);
29316 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29317 nregs = XVECLEN (p, 0) - 1;
29318 gcc_assert (nregs);
29320 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29321 if (reg < 16)
29323 /* For -Os dummy registers can be pushed at the beginning to
29324 avoid separate stack pointer adjustment. */
29325 e = XVECEXP (p, 0, 1);
29326 e = XEXP (SET_DEST (e), 0);
29327 if (GET_CODE (e) == PLUS)
29328 padfirst = INTVAL (XEXP (e, 1));
29329 gcc_assert (padfirst == 0 || optimize_size);
29330 /* The function prologue may also push pc, but not annotate it as it is
29331 never restored. We turn this into a stack pointer adjustment. */
29332 e = XVECEXP (p, 0, nregs);
29333 e = XEXP (SET_DEST (e), 0);
29334 if (GET_CODE (e) == PLUS)
29335 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29336 else
29337 padlast = offset - 4;
29338 gcc_assert (padlast == 0 || padlast == 4);
29339 if (padlast == 4)
29340 fprintf (asm_out_file, "\t.pad #4\n");
29341 reg_size = 4;
29342 fprintf (asm_out_file, "\t.save {");
29344 else if (IS_VFP_REGNUM (reg))
29346 reg_size = 8;
29347 fprintf (asm_out_file, "\t.vsave {");
29349 else
29350 /* Unknown register type. */
29351 gcc_unreachable ();
29353 /* If the stack increment doesn't match the size of the saved registers,
29354 something has gone horribly wrong. */
29355 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29357 offset = padfirst;
29358 lastreg = 0;
29359 /* The remaining insns will describe the stores. */
29360 for (i = 1; i <= nregs; i++)
29362 /* Expect (set (mem <addr>) (reg)).
29363 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29364 e = XVECEXP (p, 0, i);
29365 gcc_assert (GET_CODE (e) == SET
29366 && MEM_P (SET_DEST (e))
29367 && REG_P (SET_SRC (e)));
29369 reg = REGNO (SET_SRC (e));
29370 gcc_assert (reg >= lastreg);
29372 if (i != 1)
29373 fprintf (asm_out_file, ", ");
29374 /* We can't use %r for vfp because we need to use the
29375 double precision register names. */
29376 if (IS_VFP_REGNUM (reg))
29377 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29378 else
29379 asm_fprintf (asm_out_file, "%r", reg);
29381 #ifdef ENABLE_CHECKING
29382 /* Check that the addresses are consecutive. */
29383 e = XEXP (SET_DEST (e), 0);
29384 if (GET_CODE (e) == PLUS)
29385 gcc_assert (REG_P (XEXP (e, 0))
29386 && REGNO (XEXP (e, 0)) == SP_REGNUM
29387 && CONST_INT_P (XEXP (e, 1))
29388 && offset == INTVAL (XEXP (e, 1)));
29389 else
29390 gcc_assert (i == 1
29391 && REG_P (e)
29392 && REGNO (e) == SP_REGNUM);
29393 offset += reg_size;
29394 #endif
29396 fprintf (asm_out_file, "}\n");
29397 if (padfirst)
29398 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29401 /* Emit unwind directives for a SET. */
29403 static void
29404 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29406 rtx e0;
29407 rtx e1;
29408 unsigned reg;
29410 e0 = XEXP (p, 0);
29411 e1 = XEXP (p, 1);
29412 switch (GET_CODE (e0))
29414 case MEM:
29415 /* Pushing a single register. */
29416 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29417 || !REG_P (XEXP (XEXP (e0, 0), 0))
29418 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29419 abort ();
29421 asm_fprintf (asm_out_file, "\t.save ");
29422 if (IS_VFP_REGNUM (REGNO (e1)))
29423 asm_fprintf(asm_out_file, "{d%d}\n",
29424 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29425 else
29426 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29427 break;
29429 case REG:
29430 if (REGNO (e0) == SP_REGNUM)
29432 /* A stack increment. */
29433 if (GET_CODE (e1) != PLUS
29434 || !REG_P (XEXP (e1, 0))
29435 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29436 || !CONST_INT_P (XEXP (e1, 1)))
29437 abort ();
29439 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29440 -INTVAL (XEXP (e1, 1)));
29442 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29444 HOST_WIDE_INT offset;
29446 if (GET_CODE (e1) == PLUS)
29448 if (!REG_P (XEXP (e1, 0))
29449 || !CONST_INT_P (XEXP (e1, 1)))
29450 abort ();
29451 reg = REGNO (XEXP (e1, 0));
29452 offset = INTVAL (XEXP (e1, 1));
29453 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29454 HARD_FRAME_POINTER_REGNUM, reg,
29455 offset);
29457 else if (REG_P (e1))
29459 reg = REGNO (e1);
29460 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29461 HARD_FRAME_POINTER_REGNUM, reg);
29463 else
29464 abort ();
29466 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29468 /* Move from sp to reg. */
29469 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29471 else if (GET_CODE (e1) == PLUS
29472 && REG_P (XEXP (e1, 0))
29473 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29474 && CONST_INT_P (XEXP (e1, 1)))
29476 /* Set reg to offset from sp. */
29477 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29478 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29480 else
29481 abort ();
29482 break;
29484 default:
29485 abort ();
29490 /* Emit unwind directives for the given insn. */
29492 static void
29493 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29495 rtx note, pat;
29496 bool handled_one = false;
29498 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29499 return;
29501 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29502 && (TREE_NOTHROW (current_function_decl)
29503 || crtl->all_throwers_are_sibcalls))
29504 return;
29506 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29507 return;
29509 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29511 switch (REG_NOTE_KIND (note))
29513 case REG_FRAME_RELATED_EXPR:
29514 pat = XEXP (note, 0);
29515 goto found;
29517 case REG_CFA_REGISTER:
29518 pat = XEXP (note, 0);
29519 if (pat == NULL)
29521 pat = PATTERN (insn);
29522 if (GET_CODE (pat) == PARALLEL)
29523 pat = XVECEXP (pat, 0, 0);
29526 /* Only emitted for IS_STACKALIGN re-alignment. */
29528 rtx dest, src;
29529 unsigned reg;
29531 src = SET_SRC (pat);
29532 dest = SET_DEST (pat);
29534 gcc_assert (src == stack_pointer_rtx);
29535 reg = REGNO (dest);
29536 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29537 reg + 0x90, reg);
29539 handled_one = true;
29540 break;
29542 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29543 to get correct dwarf information for shrink-wrap. We should not
29544 emit unwind information for it because these are used either for
29545 pretend arguments or notes to adjust sp and restore registers from
29546 stack. */
29547 case REG_CFA_DEF_CFA:
29548 case REG_CFA_ADJUST_CFA:
29549 case REG_CFA_RESTORE:
29550 return;
29552 case REG_CFA_EXPRESSION:
29553 case REG_CFA_OFFSET:
29554 /* ??? Only handling here what we actually emit. */
29555 gcc_unreachable ();
29557 default:
29558 break;
29561 if (handled_one)
29562 return;
29563 pat = PATTERN (insn);
29564 found:
29566 switch (GET_CODE (pat))
29568 case SET:
29569 arm_unwind_emit_set (asm_out_file, pat);
29570 break;
29572 case SEQUENCE:
29573 /* Store multiple. */
29574 arm_unwind_emit_sequence (asm_out_file, pat);
29575 break;
29577 default:
29578 abort();
29583 /* Output a reference from a function exception table to the type_info
29584 object X. The EABI specifies that the symbol should be relocated by
29585 an R_ARM_TARGET2 relocation. */
29587 static bool
29588 arm_output_ttype (rtx x)
29590 fputs ("\t.word\t", asm_out_file);
29591 output_addr_const (asm_out_file, x);
29592 /* Use special relocations for symbol references. */
29593 if (!CONST_INT_P (x))
29594 fputs ("(TARGET2)", asm_out_file);
29595 fputc ('\n', asm_out_file);
29597 return TRUE;
29600 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29602 static void
29603 arm_asm_emit_except_personality (rtx personality)
29605 fputs ("\t.personality\t", asm_out_file);
29606 output_addr_const (asm_out_file, personality);
29607 fputc ('\n', asm_out_file);
29610 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29612 static void
29613 arm_asm_init_sections (void)
29615 exception_section = get_unnamed_section (0, output_section_asm_op,
29616 "\t.handlerdata");
29618 #endif /* ARM_UNWIND_INFO */
29620 /* Output unwind directives for the start/end of a function. */
29622 void
29623 arm_output_fn_unwind (FILE * f, bool prologue)
29625 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29626 return;
29628 if (prologue)
29629 fputs ("\t.fnstart\n", f);
29630 else
29632 /* If this function will never be unwound, then mark it as such.
29633 The came condition is used in arm_unwind_emit to suppress
29634 the frame annotations. */
29635 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29636 && (TREE_NOTHROW (current_function_decl)
29637 || crtl->all_throwers_are_sibcalls))
29638 fputs("\t.cantunwind\n", f);
29640 fputs ("\t.fnend\n", f);
29644 static bool
29645 arm_emit_tls_decoration (FILE *fp, rtx x)
29647 enum tls_reloc reloc;
29648 rtx val;
29650 val = XVECEXP (x, 0, 0);
29651 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29653 output_addr_const (fp, val);
29655 switch (reloc)
29657 case TLS_GD32:
29658 fputs ("(tlsgd)", fp);
29659 break;
29660 case TLS_LDM32:
29661 fputs ("(tlsldm)", fp);
29662 break;
29663 case TLS_LDO32:
29664 fputs ("(tlsldo)", fp);
29665 break;
29666 case TLS_IE32:
29667 fputs ("(gottpoff)", fp);
29668 break;
29669 case TLS_LE32:
29670 fputs ("(tpoff)", fp);
29671 break;
29672 case TLS_DESCSEQ:
29673 fputs ("(tlsdesc)", fp);
29674 break;
29675 default:
29676 gcc_unreachable ();
29679 switch (reloc)
29681 case TLS_GD32:
29682 case TLS_LDM32:
29683 case TLS_IE32:
29684 case TLS_DESCSEQ:
29685 fputs (" + (. - ", fp);
29686 output_addr_const (fp, XVECEXP (x, 0, 2));
29687 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29688 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29689 output_addr_const (fp, XVECEXP (x, 0, 3));
29690 fputc (')', fp);
29691 break;
29692 default:
29693 break;
29696 return TRUE;
29699 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29701 static void
29702 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29704 gcc_assert (size == 4);
29705 fputs ("\t.word\t", file);
29706 output_addr_const (file, x);
29707 fputs ("(tlsldo)", file);
29710 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29712 static bool
29713 arm_output_addr_const_extra (FILE *fp, rtx x)
29715 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29716 return arm_emit_tls_decoration (fp, x);
29717 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29719 char label[256];
29720 int labelno = INTVAL (XVECEXP (x, 0, 0));
29722 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29723 assemble_name_raw (fp, label);
29725 return TRUE;
29727 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29729 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29730 if (GOT_PCREL)
29731 fputs ("+.", fp);
29732 fputs ("-(", fp);
29733 output_addr_const (fp, XVECEXP (x, 0, 0));
29734 fputc (')', fp);
29735 return TRUE;
29737 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29739 output_addr_const (fp, XVECEXP (x, 0, 0));
29740 if (GOT_PCREL)
29741 fputs ("+.", fp);
29742 fputs ("-(", fp);
29743 output_addr_const (fp, XVECEXP (x, 0, 1));
29744 fputc (')', fp);
29745 return TRUE;
29747 else if (GET_CODE (x) == CONST_VECTOR)
29748 return arm_emit_vector_const (fp, x);
29750 return FALSE;
29753 /* Output assembly for a shift instruction.
29754 SET_FLAGS determines how the instruction modifies the condition codes.
29755 0 - Do not set condition codes.
29756 1 - Set condition codes.
29757 2 - Use smallest instruction. */
29758 const char *
29759 arm_output_shift(rtx * operands, int set_flags)
29761 char pattern[100];
29762 static const char flag_chars[3] = {'?', '.', '!'};
29763 const char *shift;
29764 HOST_WIDE_INT val;
29765 char c;
29767 c = flag_chars[set_flags];
29768 if (TARGET_UNIFIED_ASM)
29770 shift = shift_op(operands[3], &val);
29771 if (shift)
29773 if (val != -1)
29774 operands[2] = GEN_INT(val);
29775 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29777 else
29778 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29780 else
29781 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29782 output_asm_insn (pattern, operands);
29783 return "";
29786 /* Output assembly for a WMMX immediate shift instruction. */
29787 const char *
29788 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29790 int shift = INTVAL (operands[2]);
29791 char templ[50];
29792 machine_mode opmode = GET_MODE (operands[0]);
29794 gcc_assert (shift >= 0);
29796 /* If the shift value in the register versions is > 63 (for D qualifier),
29797 31 (for W qualifier) or 15 (for H qualifier). */
29798 if (((opmode == V4HImode) && (shift > 15))
29799 || ((opmode == V2SImode) && (shift > 31))
29800 || ((opmode == DImode) && (shift > 63)))
29802 if (wror_or_wsra)
29804 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29805 output_asm_insn (templ, operands);
29806 if (opmode == DImode)
29808 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29809 output_asm_insn (templ, operands);
29812 else
29814 /* The destination register will contain all zeros. */
29815 sprintf (templ, "wzero\t%%0");
29816 output_asm_insn (templ, operands);
29818 return "";
29821 if ((opmode == DImode) && (shift > 32))
29823 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29824 output_asm_insn (templ, operands);
29825 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29826 output_asm_insn (templ, operands);
29828 else
29830 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29831 output_asm_insn (templ, operands);
29833 return "";
29836 /* Output assembly for a WMMX tinsr instruction. */
29837 const char *
29838 arm_output_iwmmxt_tinsr (rtx *operands)
29840 int mask = INTVAL (operands[3]);
29841 int i;
29842 char templ[50];
29843 int units = mode_nunits[GET_MODE (operands[0])];
29844 gcc_assert ((mask & (mask - 1)) == 0);
29845 for (i = 0; i < units; ++i)
29847 if ((mask & 0x01) == 1)
29849 break;
29851 mask >>= 1;
29853 gcc_assert (i < units);
29855 switch (GET_MODE (operands[0]))
29857 case V8QImode:
29858 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29859 break;
29860 case V4HImode:
29861 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29862 break;
29863 case V2SImode:
29864 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29865 break;
29866 default:
29867 gcc_unreachable ();
29868 break;
29870 output_asm_insn (templ, operands);
29872 return "";
29875 /* Output a Thumb-1 casesi dispatch sequence. */
29876 const char *
29877 thumb1_output_casesi (rtx *operands)
29879 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29881 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29883 switch (GET_MODE(diff_vec))
29885 case QImode:
29886 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29887 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29888 case HImode:
29889 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29890 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29891 case SImode:
29892 return "bl\t%___gnu_thumb1_case_si";
29893 default:
29894 gcc_unreachable ();
29898 /* Output a Thumb-2 casesi instruction. */
29899 const char *
29900 thumb2_output_casesi (rtx *operands)
29902 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29904 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29906 output_asm_insn ("cmp\t%0, %1", operands);
29907 output_asm_insn ("bhi\t%l3", operands);
29908 switch (GET_MODE(diff_vec))
29910 case QImode:
29911 return "tbb\t[%|pc, %0]";
29912 case HImode:
29913 return "tbh\t[%|pc, %0, lsl #1]";
29914 case SImode:
29915 if (flag_pic)
29917 output_asm_insn ("adr\t%4, %l2", operands);
29918 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29919 output_asm_insn ("add\t%4, %4, %5", operands);
29920 return "bx\t%4";
29922 else
29924 output_asm_insn ("adr\t%4, %l2", operands);
29925 return "ldr\t%|pc, [%4, %0, lsl #2]";
29927 default:
29928 gcc_unreachable ();
29932 /* Most ARM cores are single issue, but some newer ones can dual issue.
29933 The scheduler descriptions rely on this being correct. */
29934 static int
29935 arm_issue_rate (void)
29937 switch (arm_tune)
29939 case cortexa15:
29940 case cortexa57:
29941 return 3;
29943 case cortexm7:
29944 case cortexr4:
29945 case cortexr4f:
29946 case cortexr5:
29947 case genericv7a:
29948 case cortexa5:
29949 case cortexa7:
29950 case cortexa8:
29951 case cortexa9:
29952 case cortexa12:
29953 case cortexa53:
29954 case fa726te:
29955 case marvell_pj4:
29956 return 2;
29958 default:
29959 return 1;
29963 /* A table and a function to perform ARM-specific name mangling for
29964 NEON vector types in order to conform to the AAPCS (see "Procedure
29965 Call Standard for the ARM Architecture", Appendix A). To qualify
29966 for emission with the mangled names defined in that document, a
29967 vector type must not only be of the correct mode but also be
29968 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29969 typedef struct
29971 machine_mode mode;
29972 const char *element_type_name;
29973 const char *aapcs_name;
29974 } arm_mangle_map_entry;
29976 static arm_mangle_map_entry arm_mangle_map[] = {
29977 /* 64-bit containerized types. */
29978 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29979 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29980 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29981 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29982 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29983 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29984 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29985 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29986 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29987 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29989 /* 128-bit containerized types. */
29990 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29991 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29992 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29993 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29994 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29995 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29996 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29997 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29998 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29999 { VOIDmode, NULL, NULL }
30002 const char *
30003 arm_mangle_type (const_tree type)
30005 arm_mangle_map_entry *pos = arm_mangle_map;
30007 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30008 has to be managled as if it is in the "std" namespace. */
30009 if (TARGET_AAPCS_BASED
30010 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30011 return "St9__va_list";
30013 /* Half-precision float. */
30014 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30015 return "Dh";
30017 if (TREE_CODE (type) != VECTOR_TYPE)
30018 return NULL;
30020 /* Check the mode of the vector type, and the name of the vector
30021 element type, against the table. */
30022 while (pos->mode != VOIDmode)
30024 tree elt_type = TREE_TYPE (type);
30026 if (pos->mode == TYPE_MODE (type)
30027 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
30028 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
30029 pos->element_type_name))
30030 return pos->aapcs_name;
30032 pos++;
30035 /* Use the default mangling for unrecognized (possibly user-defined)
30036 vector types. */
30037 return NULL;
30040 /* Order of allocation of core registers for Thumb: this allocation is
30041 written over the corresponding initial entries of the array
30042 initialized with REG_ALLOC_ORDER. We allocate all low registers
30043 first. Saving and restoring a low register is usually cheaper than
30044 using a call-clobbered high register. */
30046 static const int thumb_core_reg_alloc_order[] =
30048 3, 2, 1, 0, 4, 5, 6, 7,
30049 14, 12, 8, 9, 10, 11
30052 /* Adjust register allocation order when compiling for Thumb. */
30054 void
30055 arm_order_regs_for_local_alloc (void)
30057 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30058 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30059 if (TARGET_THUMB)
30060 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30061 sizeof (thumb_core_reg_alloc_order));
30064 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30066 bool
30067 arm_frame_pointer_required (void)
30069 return (cfun->has_nonlocal_label
30070 || SUBTARGET_FRAME_POINTER_REQUIRED
30071 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30074 /* Only thumb1 can't support conditional execution, so return true if
30075 the target is not thumb1. */
30076 static bool
30077 arm_have_conditional_execution (void)
30079 return !TARGET_THUMB1;
30082 tree
30083 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30085 machine_mode in_mode, out_mode;
30086 int in_n, out_n;
30087 bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30089 if (TREE_CODE (type_out) != VECTOR_TYPE
30090 || TREE_CODE (type_in) != VECTOR_TYPE)
30091 return NULL_TREE;
30093 out_mode = TYPE_MODE (TREE_TYPE (type_out));
30094 out_n = TYPE_VECTOR_SUBPARTS (type_out);
30095 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30096 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30098 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30099 decl of the vectorized builtin for the appropriate vector mode.
30100 NULL_TREE is returned if no such builtin is available. */
30101 #undef ARM_CHECK_BUILTIN_MODE
30102 #define ARM_CHECK_BUILTIN_MODE(C) \
30103 (TARGET_NEON && TARGET_FPU_ARMV8 \
30104 && flag_unsafe_math_optimizations \
30105 && ARM_CHECK_BUILTIN_MODE_1 (C))
30107 #undef ARM_CHECK_BUILTIN_MODE_1
30108 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30109 (out_mode == SFmode && out_n == C \
30110 && in_mode == SFmode && in_n == C)
30112 #undef ARM_FIND_VRINT_VARIANT
30113 #define ARM_FIND_VRINT_VARIANT(N) \
30114 (ARM_CHECK_BUILTIN_MODE (2) \
30115 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30116 : (ARM_CHECK_BUILTIN_MODE (4) \
30117 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30118 : NULL_TREE))
30120 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30122 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30123 switch (fn)
30125 case BUILT_IN_FLOORF:
30126 return ARM_FIND_VRINT_VARIANT (vrintm);
30127 case BUILT_IN_CEILF:
30128 return ARM_FIND_VRINT_VARIANT (vrintp);
30129 case BUILT_IN_TRUNCF:
30130 return ARM_FIND_VRINT_VARIANT (vrintz);
30131 case BUILT_IN_ROUNDF:
30132 return ARM_FIND_VRINT_VARIANT (vrinta);
30133 #undef ARM_CHECK_BUILTIN_MODE_1
30134 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30135 (out_mode == SImode && out_n == C \
30136 && in_mode == SFmode && in_n == C)
30138 #define ARM_FIND_VCVT_VARIANT(N) \
30139 (ARM_CHECK_BUILTIN_MODE (2) \
30140 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30141 : (ARM_CHECK_BUILTIN_MODE (4) \
30142 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30143 : NULL_TREE))
30145 #define ARM_FIND_VCVTU_VARIANT(N) \
30146 (ARM_CHECK_BUILTIN_MODE (2) \
30147 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30148 : (ARM_CHECK_BUILTIN_MODE (4) \
30149 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30150 : NULL_TREE))
30151 case BUILT_IN_LROUNDF:
30152 return out_unsigned_p
30153 ? ARM_FIND_VCVTU_VARIANT (vcvta)
30154 : ARM_FIND_VCVT_VARIANT (vcvta);
30155 case BUILT_IN_LCEILF:
30156 return out_unsigned_p
30157 ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30158 : ARM_FIND_VCVT_VARIANT (vcvtp);
30159 case BUILT_IN_LFLOORF:
30160 return out_unsigned_p
30161 ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30162 : ARM_FIND_VCVT_VARIANT (vcvtm);
30163 #undef ARM_CHECK_BUILTIN_MODE
30164 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30165 (out_mode == N##mode && out_n == C \
30166 && in_mode == N##mode && in_n == C)
30167 case BUILT_IN_BSWAP16:
30168 if (ARM_CHECK_BUILTIN_MODE (4, HI))
30169 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30170 else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30171 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30172 else
30173 return NULL_TREE;
30174 case BUILT_IN_BSWAP32:
30175 if (ARM_CHECK_BUILTIN_MODE (2, SI))
30176 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30177 else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30178 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30179 else
30180 return NULL_TREE;
30181 case BUILT_IN_BSWAP64:
30182 if (ARM_CHECK_BUILTIN_MODE (2, DI))
30183 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30184 else
30185 return NULL_TREE;
30186 case BUILT_IN_COPYSIGNF:
30187 if (ARM_CHECK_BUILTIN_MODE (2, SF))
30188 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30189 else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30190 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30191 else
30192 return NULL_TREE;
30194 default:
30195 return NULL_TREE;
30198 return NULL_TREE;
30200 #undef ARM_FIND_VCVT_VARIANT
30201 #undef ARM_FIND_VCVTU_VARIANT
30202 #undef ARM_CHECK_BUILTIN_MODE
30203 #undef ARM_FIND_VRINT_VARIANT
30206 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30207 static HOST_WIDE_INT
30208 arm_vector_alignment (const_tree type)
30210 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30212 if (TARGET_AAPCS_BASED)
30213 align = MIN (align, 64);
30215 return align;
30218 static unsigned int
30219 arm_autovectorize_vector_sizes (void)
30221 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30224 static bool
30225 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30227 /* Vectors which aren't in packed structures will not be less aligned than
30228 the natural alignment of their element type, so this is safe. */
30229 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30230 return !is_packed;
30232 return default_builtin_vector_alignment_reachable (type, is_packed);
30235 static bool
30236 arm_builtin_support_vector_misalignment (machine_mode mode,
30237 const_tree type, int misalignment,
30238 bool is_packed)
30240 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30242 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30244 if (is_packed)
30245 return align == 1;
30247 /* If the misalignment is unknown, we should be able to handle the access
30248 so long as it is not to a member of a packed data structure. */
30249 if (misalignment == -1)
30250 return true;
30252 /* Return true if the misalignment is a multiple of the natural alignment
30253 of the vector's element type. This is probably always going to be
30254 true in practice, since we've already established that this isn't a
30255 packed access. */
30256 return ((misalignment % align) == 0);
30259 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30260 is_packed);
30263 static void
30264 arm_conditional_register_usage (void)
30266 int regno;
30268 if (TARGET_THUMB1 && optimize_size)
30270 /* When optimizing for size on Thumb-1, it's better not
30271 to use the HI regs, because of the overhead of
30272 stacking them. */
30273 for (regno = FIRST_HI_REGNUM;
30274 regno <= LAST_HI_REGNUM; ++regno)
30275 fixed_regs[regno] = call_used_regs[regno] = 1;
30278 /* The link register can be clobbered by any branch insn,
30279 but we have no way to track that at present, so mark
30280 it as unavailable. */
30281 if (TARGET_THUMB1)
30282 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30284 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30286 /* VFPv3 registers are disabled when earlier VFP
30287 versions are selected due to the definition of
30288 LAST_VFP_REGNUM. */
30289 for (regno = FIRST_VFP_REGNUM;
30290 regno <= LAST_VFP_REGNUM; ++ regno)
30292 fixed_regs[regno] = 0;
30293 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30294 || regno >= FIRST_VFP_REGNUM + 32;
30298 if (TARGET_REALLY_IWMMXT)
30300 regno = FIRST_IWMMXT_GR_REGNUM;
30301 /* The 2002/10/09 revision of the XScale ABI has wCG0
30302 and wCG1 as call-preserved registers. The 2002/11/21
30303 revision changed this so that all wCG registers are
30304 scratch registers. */
30305 for (regno = FIRST_IWMMXT_GR_REGNUM;
30306 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30307 fixed_regs[regno] = 0;
30308 /* The XScale ABI has wR0 - wR9 as scratch registers,
30309 the rest as call-preserved registers. */
30310 for (regno = FIRST_IWMMXT_REGNUM;
30311 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30313 fixed_regs[regno] = 0;
30314 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30318 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30320 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30321 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30323 else if (TARGET_APCS_STACK)
30325 fixed_regs[10] = 1;
30326 call_used_regs[10] = 1;
30328 /* -mcaller-super-interworking reserves r11 for calls to
30329 _interwork_r11_call_via_rN(). Making the register global
30330 is an easy way of ensuring that it remains valid for all
30331 calls. */
30332 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30333 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30335 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30336 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30337 if (TARGET_CALLER_INTERWORKING)
30338 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30340 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30343 static reg_class_t
30344 arm_preferred_rename_class (reg_class_t rclass)
30346 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30347 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30348 and code size can be reduced. */
30349 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30350 return LO_REGS;
30351 else
30352 return NO_REGS;
30355 /* Compute the atrribute "length" of insn "*push_multi".
30356 So this function MUST be kept in sync with that insn pattern. */
30358 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30360 int i, regno, hi_reg;
30361 int num_saves = XVECLEN (parallel_op, 0);
30363 /* ARM mode. */
30364 if (TARGET_ARM)
30365 return 4;
30366 /* Thumb1 mode. */
30367 if (TARGET_THUMB1)
30368 return 2;
30370 /* Thumb2 mode. */
30371 regno = REGNO (first_op);
30372 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30373 for (i = 1; i < num_saves && !hi_reg; i++)
30375 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30376 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30379 if (!hi_reg)
30380 return 2;
30381 return 4;
30384 /* Compute the number of instructions emitted by output_move_double. */
30386 arm_count_output_move_double_insns (rtx *operands)
30388 int count;
30389 rtx ops[2];
30390 /* output_move_double may modify the operands array, so call it
30391 here on a copy of the array. */
30392 ops[0] = operands[0];
30393 ops[1] = operands[1];
30394 output_move_double (ops, false, &count);
30395 return count;
30399 vfp3_const_double_for_fract_bits (rtx operand)
30401 REAL_VALUE_TYPE r0;
30403 if (!CONST_DOUBLE_P (operand))
30404 return 0;
30406 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30407 if (exact_real_inverse (DFmode, &r0))
30409 if (exact_real_truncate (DFmode, &r0))
30411 HOST_WIDE_INT value = real_to_integer (&r0);
30412 value = value & 0xffffffff;
30413 if ((value != 0) && ( (value & (value - 1)) == 0))
30414 return int_log2 (value);
30417 return 0;
30421 vfp3_const_double_for_bits (rtx operand)
30423 REAL_VALUE_TYPE r0;
30425 if (!CONST_DOUBLE_P (operand))
30426 return 0;
30428 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30429 if (exact_real_truncate (DFmode, &r0))
30431 HOST_WIDE_INT value = real_to_integer (&r0);
30432 value = value & 0xffffffff;
30433 if ((value != 0) && ( (value & (value - 1)) == 0))
30434 return int_log2 (value);
30437 return 0;
30440 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30442 static void
30443 arm_pre_atomic_barrier (enum memmodel model)
30445 if (need_atomic_barrier_p (model, true))
30446 emit_insn (gen_memory_barrier ());
30449 static void
30450 arm_post_atomic_barrier (enum memmodel model)
30452 if (need_atomic_barrier_p (model, false))
30453 emit_insn (gen_memory_barrier ());
30456 /* Emit the load-exclusive and store-exclusive instructions.
30457 Use acquire and release versions if necessary. */
30459 static void
30460 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30462 rtx (*gen) (rtx, rtx);
30464 if (acq)
30466 switch (mode)
30468 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30469 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30470 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30471 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30472 default:
30473 gcc_unreachable ();
30476 else
30478 switch (mode)
30480 case QImode: gen = gen_arm_load_exclusiveqi; break;
30481 case HImode: gen = gen_arm_load_exclusivehi; break;
30482 case SImode: gen = gen_arm_load_exclusivesi; break;
30483 case DImode: gen = gen_arm_load_exclusivedi; break;
30484 default:
30485 gcc_unreachable ();
30489 emit_insn (gen (rval, mem));
30492 static void
30493 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30494 rtx mem, bool rel)
30496 rtx (*gen) (rtx, rtx, rtx);
30498 if (rel)
30500 switch (mode)
30502 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30503 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30504 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30505 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30506 default:
30507 gcc_unreachable ();
30510 else
30512 switch (mode)
30514 case QImode: gen = gen_arm_store_exclusiveqi; break;
30515 case HImode: gen = gen_arm_store_exclusivehi; break;
30516 case SImode: gen = gen_arm_store_exclusivesi; break;
30517 case DImode: gen = gen_arm_store_exclusivedi; break;
30518 default:
30519 gcc_unreachable ();
30523 emit_insn (gen (bval, rval, mem));
30526 /* Mark the previous jump instruction as unlikely. */
30528 static void
30529 emit_unlikely_jump (rtx insn)
30531 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30533 insn = emit_jump_insn (insn);
30534 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30537 /* Expand a compare and swap pattern. */
30539 void
30540 arm_expand_compare_and_swap (rtx operands[])
30542 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30543 machine_mode mode;
30544 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30546 bval = operands[0];
30547 rval = operands[1];
30548 mem = operands[2];
30549 oldval = operands[3];
30550 newval = operands[4];
30551 is_weak = operands[5];
30552 mod_s = operands[6];
30553 mod_f = operands[7];
30554 mode = GET_MODE (mem);
30556 /* Normally the succ memory model must be stronger than fail, but in the
30557 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30558 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30560 if (TARGET_HAVE_LDACQ
30561 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30562 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30563 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30565 switch (mode)
30567 case QImode:
30568 case HImode:
30569 /* For narrow modes, we're going to perform the comparison in SImode,
30570 so do the zero-extension now. */
30571 rval = gen_reg_rtx (SImode);
30572 oldval = convert_modes (SImode, mode, oldval, true);
30573 /* FALLTHRU */
30575 case SImode:
30576 /* Force the value into a register if needed. We waited until after
30577 the zero-extension above to do this properly. */
30578 if (!arm_add_operand (oldval, SImode))
30579 oldval = force_reg (SImode, oldval);
30580 break;
30582 case DImode:
30583 if (!cmpdi_operand (oldval, mode))
30584 oldval = force_reg (mode, oldval);
30585 break;
30587 default:
30588 gcc_unreachable ();
30591 switch (mode)
30593 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30594 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30595 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30596 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30597 default:
30598 gcc_unreachable ();
30601 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30603 if (mode == QImode || mode == HImode)
30604 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30606 /* In all cases, we arrange for success to be signaled by Z set.
30607 This arrangement allows for the boolean result to be used directly
30608 in a subsequent branch, post optimization. */
30609 x = gen_rtx_REG (CCmode, CC_REGNUM);
30610 x = gen_rtx_EQ (SImode, x, const0_rtx);
30611 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30614 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30615 another memory store between the load-exclusive and store-exclusive can
30616 reset the monitor from Exclusive to Open state. This means we must wait
30617 until after reload to split the pattern, lest we get a register spill in
30618 the middle of the atomic sequence. */
30620 void
30621 arm_split_compare_and_swap (rtx operands[])
30623 rtx rval, mem, oldval, newval, scratch;
30624 machine_mode mode;
30625 enum memmodel mod_s, mod_f;
30626 bool is_weak;
30627 rtx_code_label *label1, *label2;
30628 rtx x, cond;
30630 rval = operands[0];
30631 mem = operands[1];
30632 oldval = operands[2];
30633 newval = operands[3];
30634 is_weak = (operands[4] != const0_rtx);
30635 mod_s = (enum memmodel) INTVAL (operands[5]);
30636 mod_f = (enum memmodel) INTVAL (operands[6]);
30637 scratch = operands[7];
30638 mode = GET_MODE (mem);
30640 bool use_acquire = TARGET_HAVE_LDACQ
30641 && !(mod_s == MEMMODEL_RELAXED
30642 || mod_s == MEMMODEL_CONSUME
30643 || mod_s == MEMMODEL_RELEASE);
30645 bool use_release = TARGET_HAVE_LDACQ
30646 && !(mod_s == MEMMODEL_RELAXED
30647 || mod_s == MEMMODEL_CONSUME
30648 || mod_s == MEMMODEL_ACQUIRE);
30650 /* Checks whether a barrier is needed and emits one accordingly. */
30651 if (!(use_acquire || use_release))
30652 arm_pre_atomic_barrier (mod_s);
30654 label1 = NULL;
30655 if (!is_weak)
30657 label1 = gen_label_rtx ();
30658 emit_label (label1);
30660 label2 = gen_label_rtx ();
30662 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30664 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30665 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30666 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30667 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30668 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30670 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30672 /* Weak or strong, we want EQ to be true for success, so that we
30673 match the flags that we got from the compare above. */
30674 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30675 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30676 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30678 if (!is_weak)
30680 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30681 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30682 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30683 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30686 if (mod_f != MEMMODEL_RELAXED)
30687 emit_label (label2);
30689 /* Checks whether a barrier is needed and emits one accordingly. */
30690 if (!(use_acquire || use_release))
30691 arm_post_atomic_barrier (mod_s);
30693 if (mod_f == MEMMODEL_RELAXED)
30694 emit_label (label2);
30697 void
30698 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30699 rtx value, rtx model_rtx, rtx cond)
30701 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30702 machine_mode mode = GET_MODE (mem);
30703 machine_mode wmode = (mode == DImode ? DImode : SImode);
30704 rtx_code_label *label;
30705 rtx x;
30707 bool use_acquire = TARGET_HAVE_LDACQ
30708 && !(model == MEMMODEL_RELAXED
30709 || model == MEMMODEL_CONSUME
30710 || model == MEMMODEL_RELEASE);
30712 bool use_release = TARGET_HAVE_LDACQ
30713 && !(model == MEMMODEL_RELAXED
30714 || model == MEMMODEL_CONSUME
30715 || model == MEMMODEL_ACQUIRE);
30717 /* Checks whether a barrier is needed and emits one accordingly. */
30718 if (!(use_acquire || use_release))
30719 arm_pre_atomic_barrier (model);
30721 label = gen_label_rtx ();
30722 emit_label (label);
30724 if (new_out)
30725 new_out = gen_lowpart (wmode, new_out);
30726 if (old_out)
30727 old_out = gen_lowpart (wmode, old_out);
30728 else
30729 old_out = new_out;
30730 value = simplify_gen_subreg (wmode, value, mode, 0);
30732 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30734 switch (code)
30736 case SET:
30737 new_out = value;
30738 break;
30740 case NOT:
30741 x = gen_rtx_AND (wmode, old_out, value);
30742 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30743 x = gen_rtx_NOT (wmode, new_out);
30744 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30745 break;
30747 case MINUS:
30748 if (CONST_INT_P (value))
30750 value = GEN_INT (-INTVAL (value));
30751 code = PLUS;
30753 /* FALLTHRU */
30755 case PLUS:
30756 if (mode == DImode)
30758 /* DImode plus/minus need to clobber flags. */
30759 /* The adddi3 and subdi3 patterns are incorrectly written so that
30760 they require matching operands, even when we could easily support
30761 three operands. Thankfully, this can be fixed up post-splitting,
30762 as the individual add+adc patterns do accept three operands and
30763 post-reload cprop can make these moves go away. */
30764 emit_move_insn (new_out, old_out);
30765 if (code == PLUS)
30766 x = gen_adddi3 (new_out, new_out, value);
30767 else
30768 x = gen_subdi3 (new_out, new_out, value);
30769 emit_insn (x);
30770 break;
30772 /* FALLTHRU */
30774 default:
30775 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30776 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30777 break;
30780 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30781 use_release);
30783 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30784 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30786 /* Checks whether a barrier is needed and emits one accordingly. */
30787 if (!(use_acquire || use_release))
30788 arm_post_atomic_barrier (model);
30791 #define MAX_VECT_LEN 16
30793 struct expand_vec_perm_d
30795 rtx target, op0, op1;
30796 unsigned char perm[MAX_VECT_LEN];
30797 machine_mode vmode;
30798 unsigned char nelt;
30799 bool one_vector_p;
30800 bool testing_p;
30803 /* Generate a variable permutation. */
30805 static void
30806 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30808 machine_mode vmode = GET_MODE (target);
30809 bool one_vector_p = rtx_equal_p (op0, op1);
30811 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30812 gcc_checking_assert (GET_MODE (op0) == vmode);
30813 gcc_checking_assert (GET_MODE (op1) == vmode);
30814 gcc_checking_assert (GET_MODE (sel) == vmode);
30815 gcc_checking_assert (TARGET_NEON);
30817 if (one_vector_p)
30819 if (vmode == V8QImode)
30820 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30821 else
30822 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30824 else
30826 rtx pair;
30828 if (vmode == V8QImode)
30830 pair = gen_reg_rtx (V16QImode);
30831 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30832 pair = gen_lowpart (TImode, pair);
30833 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30835 else
30837 pair = gen_reg_rtx (OImode);
30838 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30839 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30844 void
30845 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30847 machine_mode vmode = GET_MODE (target);
30848 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30849 bool one_vector_p = rtx_equal_p (op0, op1);
30850 rtx rmask[MAX_VECT_LEN], mask;
30852 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30853 numbering of elements for big-endian, we must reverse the order. */
30854 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30856 /* The VTBL instruction does not use a modulo index, so we must take care
30857 of that ourselves. */
30858 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30859 for (i = 0; i < nelt; ++i)
30860 rmask[i] = mask;
30861 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30862 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30864 arm_expand_vec_perm_1 (target, op0, op1, sel);
30867 /* Generate or test for an insn that supports a constant permutation. */
30869 /* Recognize patterns for the VUZP insns. */
30871 static bool
30872 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30874 unsigned int i, odd, mask, nelt = d->nelt;
30875 rtx out0, out1, in0, in1, x;
30876 rtx (*gen)(rtx, rtx, rtx, rtx);
30878 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30879 return false;
30881 /* Note that these are little-endian tests. Adjust for big-endian later. */
30882 if (d->perm[0] == 0)
30883 odd = 0;
30884 else if (d->perm[0] == 1)
30885 odd = 1;
30886 else
30887 return false;
30888 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30890 for (i = 0; i < nelt; i++)
30892 unsigned elt = (i * 2 + odd) & mask;
30893 if (d->perm[i] != elt)
30894 return false;
30897 /* Success! */
30898 if (d->testing_p)
30899 return true;
30901 switch (d->vmode)
30903 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30904 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30905 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30906 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30907 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30908 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30909 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30910 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30911 default:
30912 gcc_unreachable ();
30915 in0 = d->op0;
30916 in1 = d->op1;
30917 if (BYTES_BIG_ENDIAN)
30919 x = in0, in0 = in1, in1 = x;
30920 odd = !odd;
30923 out0 = d->target;
30924 out1 = gen_reg_rtx (d->vmode);
30925 if (odd)
30926 x = out0, out0 = out1, out1 = x;
30928 emit_insn (gen (out0, in0, in1, out1));
30929 return true;
30932 /* Recognize patterns for the VZIP insns. */
30934 static bool
30935 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30937 unsigned int i, high, mask, nelt = d->nelt;
30938 rtx out0, out1, in0, in1, x;
30939 rtx (*gen)(rtx, rtx, rtx, rtx);
30941 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30942 return false;
30944 /* Note that these are little-endian tests. Adjust for big-endian later. */
30945 high = nelt / 2;
30946 if (d->perm[0] == high)
30948 else if (d->perm[0] == 0)
30949 high = 0;
30950 else
30951 return false;
30952 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30954 for (i = 0; i < nelt / 2; i++)
30956 unsigned elt = (i + high) & mask;
30957 if (d->perm[i * 2] != elt)
30958 return false;
30959 elt = (elt + nelt) & mask;
30960 if (d->perm[i * 2 + 1] != elt)
30961 return false;
30964 /* Success! */
30965 if (d->testing_p)
30966 return true;
30968 switch (d->vmode)
30970 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30971 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30972 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30973 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30974 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30975 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30976 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30977 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30978 default:
30979 gcc_unreachable ();
30982 in0 = d->op0;
30983 in1 = d->op1;
30984 if (BYTES_BIG_ENDIAN)
30986 x = in0, in0 = in1, in1 = x;
30987 high = !high;
30990 out0 = d->target;
30991 out1 = gen_reg_rtx (d->vmode);
30992 if (high)
30993 x = out0, out0 = out1, out1 = x;
30995 emit_insn (gen (out0, in0, in1, out1));
30996 return true;
30999 /* Recognize patterns for the VREV insns. */
31001 static bool
31002 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31004 unsigned int i, j, diff, nelt = d->nelt;
31005 rtx (*gen)(rtx, rtx);
31007 if (!d->one_vector_p)
31008 return false;
31010 diff = d->perm[0];
31011 switch (diff)
31013 case 7:
31014 switch (d->vmode)
31016 case V16QImode: gen = gen_neon_vrev64v16qi; break;
31017 case V8QImode: gen = gen_neon_vrev64v8qi; break;
31018 default:
31019 return false;
31021 break;
31022 case 3:
31023 switch (d->vmode)
31025 case V16QImode: gen = gen_neon_vrev32v16qi; break;
31026 case V8QImode: gen = gen_neon_vrev32v8qi; break;
31027 case V8HImode: gen = gen_neon_vrev64v8hi; break;
31028 case V4HImode: gen = gen_neon_vrev64v4hi; break;
31029 default:
31030 return false;
31032 break;
31033 case 1:
31034 switch (d->vmode)
31036 case V16QImode: gen = gen_neon_vrev16v16qi; break;
31037 case V8QImode: gen = gen_neon_vrev16v8qi; break;
31038 case V8HImode: gen = gen_neon_vrev32v8hi; break;
31039 case V4HImode: gen = gen_neon_vrev32v4hi; break;
31040 case V4SImode: gen = gen_neon_vrev64v4si; break;
31041 case V2SImode: gen = gen_neon_vrev64v2si; break;
31042 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
31043 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
31044 default:
31045 return false;
31047 break;
31048 default:
31049 return false;
31052 for (i = 0; i < nelt ; i += diff + 1)
31053 for (j = 0; j <= diff; j += 1)
31055 /* This is guaranteed to be true as the value of diff
31056 is 7, 3, 1 and we should have enough elements in the
31057 queue to generate this. Getting a vector mask with a
31058 value of diff other than these values implies that
31059 something is wrong by the time we get here. */
31060 gcc_assert (i + j < nelt);
31061 if (d->perm[i + j] != i + diff - j)
31062 return false;
31065 /* Success! */
31066 if (d->testing_p)
31067 return true;
31069 emit_insn (gen (d->target, d->op0));
31070 return true;
31073 /* Recognize patterns for the VTRN insns. */
31075 static bool
31076 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31078 unsigned int i, odd, mask, nelt = d->nelt;
31079 rtx out0, out1, in0, in1, x;
31080 rtx (*gen)(rtx, rtx, rtx, rtx);
31082 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31083 return false;
31085 /* Note that these are little-endian tests. Adjust for big-endian later. */
31086 if (d->perm[0] == 0)
31087 odd = 0;
31088 else if (d->perm[0] == 1)
31089 odd = 1;
31090 else
31091 return false;
31092 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31094 for (i = 0; i < nelt; i += 2)
31096 if (d->perm[i] != i + odd)
31097 return false;
31098 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31099 return false;
31102 /* Success! */
31103 if (d->testing_p)
31104 return true;
31106 switch (d->vmode)
31108 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31109 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
31110 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
31111 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
31112 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
31113 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
31114 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
31115 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
31116 default:
31117 gcc_unreachable ();
31120 in0 = d->op0;
31121 in1 = d->op1;
31122 if (BYTES_BIG_ENDIAN)
31124 x = in0, in0 = in1, in1 = x;
31125 odd = !odd;
31128 out0 = d->target;
31129 out1 = gen_reg_rtx (d->vmode);
31130 if (odd)
31131 x = out0, out0 = out1, out1 = x;
31133 emit_insn (gen (out0, in0, in1, out1));
31134 return true;
31137 /* Recognize patterns for the VEXT insns. */
31139 static bool
31140 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31142 unsigned int i, nelt = d->nelt;
31143 rtx (*gen) (rtx, rtx, rtx, rtx);
31144 rtx offset;
31146 unsigned int location;
31148 unsigned int next = d->perm[0] + 1;
31150 /* TODO: Handle GCC's numbering of elements for big-endian. */
31151 if (BYTES_BIG_ENDIAN)
31152 return false;
31154 /* Check if the extracted indexes are increasing by one. */
31155 for (i = 1; i < nelt; next++, i++)
31157 /* If we hit the most significant element of the 2nd vector in
31158 the previous iteration, no need to test further. */
31159 if (next == 2 * nelt)
31160 return false;
31162 /* If we are operating on only one vector: it could be a
31163 rotation. If there are only two elements of size < 64, let
31164 arm_evpc_neon_vrev catch it. */
31165 if (d->one_vector_p && (next == nelt))
31167 if ((nelt == 2) && (d->vmode != V2DImode))
31168 return false;
31169 else
31170 next = 0;
31173 if (d->perm[i] != next)
31174 return false;
31177 location = d->perm[0];
31179 switch (d->vmode)
31181 case V16QImode: gen = gen_neon_vextv16qi; break;
31182 case V8QImode: gen = gen_neon_vextv8qi; break;
31183 case V4HImode: gen = gen_neon_vextv4hi; break;
31184 case V8HImode: gen = gen_neon_vextv8hi; break;
31185 case V2SImode: gen = gen_neon_vextv2si; break;
31186 case V4SImode: gen = gen_neon_vextv4si; break;
31187 case V2SFmode: gen = gen_neon_vextv2sf; break;
31188 case V4SFmode: gen = gen_neon_vextv4sf; break;
31189 case V2DImode: gen = gen_neon_vextv2di; break;
31190 default:
31191 return false;
31194 /* Success! */
31195 if (d->testing_p)
31196 return true;
31198 offset = GEN_INT (location);
31199 emit_insn (gen (d->target, d->op0, d->op1, offset));
31200 return true;
31203 /* The NEON VTBL instruction is a fully variable permuation that's even
31204 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31205 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31206 can do slightly better by expanding this as a constant where we don't
31207 have to apply a mask. */
31209 static bool
31210 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31212 rtx rperm[MAX_VECT_LEN], sel;
31213 machine_mode vmode = d->vmode;
31214 unsigned int i, nelt = d->nelt;
31216 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31217 numbering of elements for big-endian, we must reverse the order. */
31218 if (BYTES_BIG_ENDIAN)
31219 return false;
31221 if (d->testing_p)
31222 return true;
31224 /* Generic code will try constant permutation twice. Once with the
31225 original mode and again with the elements lowered to QImode.
31226 So wait and don't do the selector expansion ourselves. */
31227 if (vmode != V8QImode && vmode != V16QImode)
31228 return false;
31230 for (i = 0; i < nelt; ++i)
31231 rperm[i] = GEN_INT (d->perm[i]);
31232 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31233 sel = force_reg (vmode, sel);
31235 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31236 return true;
31239 static bool
31240 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31242 /* Check if the input mask matches vext before reordering the
31243 operands. */
31244 if (TARGET_NEON)
31245 if (arm_evpc_neon_vext (d))
31246 return true;
31248 /* The pattern matching functions above are written to look for a small
31249 number to begin the sequence (0, 1, N/2). If we begin with an index
31250 from the second operand, we can swap the operands. */
31251 if (d->perm[0] >= d->nelt)
31253 unsigned i, nelt = d->nelt;
31254 rtx x;
31256 for (i = 0; i < nelt; ++i)
31257 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31259 x = d->op0;
31260 d->op0 = d->op1;
31261 d->op1 = x;
31264 if (TARGET_NEON)
31266 if (arm_evpc_neon_vuzp (d))
31267 return true;
31268 if (arm_evpc_neon_vzip (d))
31269 return true;
31270 if (arm_evpc_neon_vrev (d))
31271 return true;
31272 if (arm_evpc_neon_vtrn (d))
31273 return true;
31274 return arm_evpc_neon_vtbl (d);
31276 return false;
31279 /* Expand a vec_perm_const pattern. */
31281 bool
31282 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31284 struct expand_vec_perm_d d;
31285 int i, nelt, which;
31287 d.target = target;
31288 d.op0 = op0;
31289 d.op1 = op1;
31291 d.vmode = GET_MODE (target);
31292 gcc_assert (VECTOR_MODE_P (d.vmode));
31293 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31294 d.testing_p = false;
31296 for (i = which = 0; i < nelt; ++i)
31298 rtx e = XVECEXP (sel, 0, i);
31299 int ei = INTVAL (e) & (2 * nelt - 1);
31300 which |= (ei < nelt ? 1 : 2);
31301 d.perm[i] = ei;
31304 switch (which)
31306 default:
31307 gcc_unreachable();
31309 case 3:
31310 d.one_vector_p = false;
31311 if (!rtx_equal_p (op0, op1))
31312 break;
31314 /* The elements of PERM do not suggest that only the first operand
31315 is used, but both operands are identical. Allow easier matching
31316 of the permutation by folding the permutation into the single
31317 input vector. */
31318 /* FALLTHRU */
31319 case 2:
31320 for (i = 0; i < nelt; ++i)
31321 d.perm[i] &= nelt - 1;
31322 d.op0 = op1;
31323 d.one_vector_p = true;
31324 break;
31326 case 1:
31327 d.op1 = op0;
31328 d.one_vector_p = true;
31329 break;
31332 return arm_expand_vec_perm_const_1 (&d);
31335 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31337 static bool
31338 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31339 const unsigned char *sel)
31341 struct expand_vec_perm_d d;
31342 unsigned int i, nelt, which;
31343 bool ret;
31345 d.vmode = vmode;
31346 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31347 d.testing_p = true;
31348 memcpy (d.perm, sel, nelt);
31350 /* Categorize the set of elements in the selector. */
31351 for (i = which = 0; i < nelt; ++i)
31353 unsigned char e = d.perm[i];
31354 gcc_assert (e < 2 * nelt);
31355 which |= (e < nelt ? 1 : 2);
31358 /* For all elements from second vector, fold the elements to first. */
31359 if (which == 2)
31360 for (i = 0; i < nelt; ++i)
31361 d.perm[i] -= nelt;
31363 /* Check whether the mask can be applied to the vector type. */
31364 d.one_vector_p = (which != 3);
31366 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31367 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31368 if (!d.one_vector_p)
31369 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31371 start_sequence ();
31372 ret = arm_expand_vec_perm_const_1 (&d);
31373 end_sequence ();
31375 return ret;
31378 bool
31379 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31381 /* If we are soft float and we do not have ldrd
31382 then all auto increment forms are ok. */
31383 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31384 return true;
31386 switch (code)
31388 /* Post increment and Pre Decrement are supported for all
31389 instruction forms except for vector forms. */
31390 case ARM_POST_INC:
31391 case ARM_PRE_DEC:
31392 if (VECTOR_MODE_P (mode))
31394 if (code != ARM_PRE_DEC)
31395 return true;
31396 else
31397 return false;
31400 return true;
31402 case ARM_POST_DEC:
31403 case ARM_PRE_INC:
31404 /* Without LDRD and mode size greater than
31405 word size, there is no point in auto-incrementing
31406 because ldm and stm will not have these forms. */
31407 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31408 return false;
31410 /* Vector and floating point modes do not support
31411 these auto increment forms. */
31412 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31413 return false;
31415 return true;
31417 default:
31418 return false;
31422 return false;
31425 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31426 on ARM, since we know that shifts by negative amounts are no-ops.
31427 Additionally, the default expansion code is not available or suitable
31428 for post-reload insn splits (this can occur when the register allocator
31429 chooses not to do a shift in NEON).
31431 This function is used in both initial expand and post-reload splits, and
31432 handles all kinds of 64-bit shifts.
31434 Input requirements:
31435 - It is safe for the input and output to be the same register, but
31436 early-clobber rules apply for the shift amount and scratch registers.
31437 - Shift by register requires both scratch registers. In all other cases
31438 the scratch registers may be NULL.
31439 - Ashiftrt by a register also clobbers the CC register. */
31440 void
31441 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31442 rtx amount, rtx scratch1, rtx scratch2)
31444 rtx out_high = gen_highpart (SImode, out);
31445 rtx out_low = gen_lowpart (SImode, out);
31446 rtx in_high = gen_highpart (SImode, in);
31447 rtx in_low = gen_lowpart (SImode, in);
31449 /* Terminology:
31450 in = the register pair containing the input value.
31451 out = the destination register pair.
31452 up = the high- or low-part of each pair.
31453 down = the opposite part to "up".
31454 In a shift, we can consider bits to shift from "up"-stream to
31455 "down"-stream, so in a left-shift "up" is the low-part and "down"
31456 is the high-part of each register pair. */
31458 rtx out_up = code == ASHIFT ? out_low : out_high;
31459 rtx out_down = code == ASHIFT ? out_high : out_low;
31460 rtx in_up = code == ASHIFT ? in_low : in_high;
31461 rtx in_down = code == ASHIFT ? in_high : in_low;
31463 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31464 gcc_assert (out
31465 && (REG_P (out) || GET_CODE (out) == SUBREG)
31466 && GET_MODE (out) == DImode);
31467 gcc_assert (in
31468 && (REG_P (in) || GET_CODE (in) == SUBREG)
31469 && GET_MODE (in) == DImode);
31470 gcc_assert (amount
31471 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31472 && GET_MODE (amount) == SImode)
31473 || CONST_INT_P (amount)));
31474 gcc_assert (scratch1 == NULL
31475 || (GET_CODE (scratch1) == SCRATCH)
31476 || (GET_MODE (scratch1) == SImode
31477 && REG_P (scratch1)));
31478 gcc_assert (scratch2 == NULL
31479 || (GET_CODE (scratch2) == SCRATCH)
31480 || (GET_MODE (scratch2) == SImode
31481 && REG_P (scratch2)));
31482 gcc_assert (!REG_P (out) || !REG_P (amount)
31483 || !HARD_REGISTER_P (out)
31484 || (REGNO (out) != REGNO (amount)
31485 && REGNO (out) + 1 != REGNO (amount)));
31487 /* Macros to make following code more readable. */
31488 #define SUB_32(DEST,SRC) \
31489 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31490 #define RSB_32(DEST,SRC) \
31491 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31492 #define SUB_S_32(DEST,SRC) \
31493 gen_addsi3_compare0 ((DEST), (SRC), \
31494 GEN_INT (-32))
31495 #define SET(DEST,SRC) \
31496 gen_rtx_SET (SImode, (DEST), (SRC))
31497 #define SHIFT(CODE,SRC,AMOUNT) \
31498 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31499 #define LSHIFT(CODE,SRC,AMOUNT) \
31500 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31501 SImode, (SRC), (AMOUNT))
31502 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31503 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31504 SImode, (SRC), (AMOUNT))
31505 #define ORR(A,B) \
31506 gen_rtx_IOR (SImode, (A), (B))
31507 #define BRANCH(COND,LABEL) \
31508 gen_arm_cond_branch ((LABEL), \
31509 gen_rtx_ ## COND (CCmode, cc_reg, \
31510 const0_rtx), \
31511 cc_reg)
31513 /* Shifts by register and shifts by constant are handled separately. */
31514 if (CONST_INT_P (amount))
31516 /* We have a shift-by-constant. */
31518 /* First, handle out-of-range shift amounts.
31519 In both cases we try to match the result an ARM instruction in a
31520 shift-by-register would give. This helps reduce execution
31521 differences between optimization levels, but it won't stop other
31522 parts of the compiler doing different things. This is "undefined
31523 behaviour, in any case. */
31524 if (INTVAL (amount) <= 0)
31525 emit_insn (gen_movdi (out, in));
31526 else if (INTVAL (amount) >= 64)
31528 if (code == ASHIFTRT)
31530 rtx const31_rtx = GEN_INT (31);
31531 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31532 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31534 else
31535 emit_insn (gen_movdi (out, const0_rtx));
31538 /* Now handle valid shifts. */
31539 else if (INTVAL (amount) < 32)
31541 /* Shifts by a constant less than 32. */
31542 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31544 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31545 emit_insn (SET (out_down,
31546 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31547 out_down)));
31548 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31550 else
31552 /* Shifts by a constant greater than 31. */
31553 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31555 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31556 if (code == ASHIFTRT)
31557 emit_insn (gen_ashrsi3 (out_up, in_up,
31558 GEN_INT (31)));
31559 else
31560 emit_insn (SET (out_up, const0_rtx));
31563 else
31565 /* We have a shift-by-register. */
31566 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31568 /* This alternative requires the scratch registers. */
31569 gcc_assert (scratch1 && REG_P (scratch1));
31570 gcc_assert (scratch2 && REG_P (scratch2));
31572 /* We will need the values "amount-32" and "32-amount" later.
31573 Swapping them around now allows the later code to be more general. */
31574 switch (code)
31576 case ASHIFT:
31577 emit_insn (SUB_32 (scratch1, amount));
31578 emit_insn (RSB_32 (scratch2, amount));
31579 break;
31580 case ASHIFTRT:
31581 emit_insn (RSB_32 (scratch1, amount));
31582 /* Also set CC = amount > 32. */
31583 emit_insn (SUB_S_32 (scratch2, amount));
31584 break;
31585 case LSHIFTRT:
31586 emit_insn (RSB_32 (scratch1, amount));
31587 emit_insn (SUB_32 (scratch2, amount));
31588 break;
31589 default:
31590 gcc_unreachable ();
31593 /* Emit code like this:
31595 arithmetic-left:
31596 out_down = in_down << amount;
31597 out_down = (in_up << (amount - 32)) | out_down;
31598 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31599 out_up = in_up << amount;
31601 arithmetic-right:
31602 out_down = in_down >> amount;
31603 out_down = (in_up << (32 - amount)) | out_down;
31604 if (amount < 32)
31605 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31606 out_up = in_up << amount;
31608 logical-right:
31609 out_down = in_down >> amount;
31610 out_down = (in_up << (32 - amount)) | out_down;
31611 if (amount < 32)
31612 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31613 out_up = in_up << amount;
31615 The ARM and Thumb2 variants are the same but implemented slightly
31616 differently. If this were only called during expand we could just
31617 use the Thumb2 case and let combine do the right thing, but this
31618 can also be called from post-reload splitters. */
31620 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31622 if (!TARGET_THUMB2)
31624 /* Emit code for ARM mode. */
31625 emit_insn (SET (out_down,
31626 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31627 if (code == ASHIFTRT)
31629 rtx_code_label *done_label = gen_label_rtx ();
31630 emit_jump_insn (BRANCH (LT, done_label));
31631 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31632 out_down)));
31633 emit_label (done_label);
31635 else
31636 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31637 out_down)));
31639 else
31641 /* Emit code for Thumb2 mode.
31642 Thumb2 can't do shift and or in one insn. */
31643 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31644 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31646 if (code == ASHIFTRT)
31648 rtx_code_label *done_label = gen_label_rtx ();
31649 emit_jump_insn (BRANCH (LT, done_label));
31650 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31651 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31652 emit_label (done_label);
31654 else
31656 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31657 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31661 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31664 #undef SUB_32
31665 #undef RSB_32
31666 #undef SUB_S_32
31667 #undef SET
31668 #undef SHIFT
31669 #undef LSHIFT
31670 #undef REV_LSHIFT
31671 #undef ORR
31672 #undef BRANCH
31676 /* Returns true if a valid comparison operation and makes
31677 the operands in a form that is valid. */
31678 bool
31679 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31681 enum rtx_code code = GET_CODE (*comparison);
31682 int code_int;
31683 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31684 ? GET_MODE (*op2) : GET_MODE (*op1);
31686 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31688 if (code == UNEQ || code == LTGT)
31689 return false;
31691 code_int = (int)code;
31692 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31693 PUT_CODE (*comparison, (enum rtx_code)code_int);
31695 switch (mode)
31697 case SImode:
31698 if (!arm_add_operand (*op1, mode))
31699 *op1 = force_reg (mode, *op1);
31700 if (!arm_add_operand (*op2, mode))
31701 *op2 = force_reg (mode, *op2);
31702 return true;
31704 case DImode:
31705 if (!cmpdi_operand (*op1, mode))
31706 *op1 = force_reg (mode, *op1);
31707 if (!cmpdi_operand (*op2, mode))
31708 *op2 = force_reg (mode, *op2);
31709 return true;
31711 case SFmode:
31712 case DFmode:
31713 if (!arm_float_compare_operand (*op1, mode))
31714 *op1 = force_reg (mode, *op1);
31715 if (!arm_float_compare_operand (*op2, mode))
31716 *op2 = force_reg (mode, *op2);
31717 return true;
31718 default:
31719 break;
31722 return false;
31726 /* Maximum number of instructions to set block of memory. */
31727 static int
31728 arm_block_set_max_insns (void)
31730 if (optimize_function_for_size_p (cfun))
31731 return 4;
31732 else
31733 return current_tune->max_insns_inline_memset;
31736 /* Return TRUE if it's profitable to set block of memory for
31737 non-vectorized case. VAL is the value to set the memory
31738 with. LENGTH is the number of bytes to set. ALIGN is the
31739 alignment of the destination memory in bytes. UNALIGNED_P
31740 is TRUE if we can only set the memory with instructions
31741 meeting alignment requirements. USE_STRD_P is TRUE if we
31742 can use strd to set the memory. */
31743 static bool
31744 arm_block_set_non_vect_profit_p (rtx val,
31745 unsigned HOST_WIDE_INT length,
31746 unsigned HOST_WIDE_INT align,
31747 bool unaligned_p, bool use_strd_p)
31749 int num = 0;
31750 /* For leftovers in bytes of 0-7, we can set the memory block using
31751 strb/strh/str with minimum instruction number. */
31752 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31754 if (unaligned_p)
31756 num = arm_const_inline_cost (SET, val);
31757 num += length / align + length % align;
31759 else if (use_strd_p)
31761 num = arm_const_double_inline_cost (val);
31762 num += (length >> 3) + leftover[length & 7];
31764 else
31766 num = arm_const_inline_cost (SET, val);
31767 num += (length >> 2) + leftover[length & 3];
31770 /* We may be able to combine last pair STRH/STRB into a single STR
31771 by shifting one byte back. */
31772 if (unaligned_access && length > 3 && (length & 3) == 3)
31773 num--;
31775 return (num <= arm_block_set_max_insns ());
31778 /* Return TRUE if it's profitable to set block of memory for
31779 vectorized case. LENGTH is the number of bytes to set.
31780 ALIGN is the alignment of destination memory in bytes.
31781 MODE is the vector mode used to set the memory. */
31782 static bool
31783 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31784 unsigned HOST_WIDE_INT align,
31785 machine_mode mode)
31787 int num;
31788 bool unaligned_p = ((align & 3) != 0);
31789 unsigned int nelt = GET_MODE_NUNITS (mode);
31791 /* Instruction loading constant value. */
31792 num = 1;
31793 /* Instructions storing the memory. */
31794 num += (length + nelt - 1) / nelt;
31795 /* Instructions adjusting the address expression. Only need to
31796 adjust address expression if it's 4 bytes aligned and bytes
31797 leftover can only be stored by mis-aligned store instruction. */
31798 if (!unaligned_p && (length & 3) != 0)
31799 num++;
31801 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31802 if (!unaligned_p && mode == V16QImode)
31803 num--;
31805 return (num <= arm_block_set_max_insns ());
31808 /* Set a block of memory using vectorization instructions for the
31809 unaligned case. We fill the first LENGTH bytes of the memory
31810 area starting from DSTBASE with byte constant VALUE. ALIGN is
31811 the alignment requirement of memory. Return TRUE if succeeded. */
31812 static bool
31813 arm_block_set_unaligned_vect (rtx dstbase,
31814 unsigned HOST_WIDE_INT length,
31815 unsigned HOST_WIDE_INT value,
31816 unsigned HOST_WIDE_INT align)
31818 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31819 rtx dst, mem;
31820 rtx val_elt, val_vec, reg;
31821 rtx rval[MAX_VECT_LEN];
31822 rtx (*gen_func) (rtx, rtx);
31823 machine_mode mode;
31824 unsigned HOST_WIDE_INT v = value;
31826 gcc_assert ((align & 0x3) != 0);
31827 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31828 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31829 if (length >= nelt_v16)
31831 mode = V16QImode;
31832 gen_func = gen_movmisalignv16qi;
31834 else
31836 mode = V8QImode;
31837 gen_func = gen_movmisalignv8qi;
31839 nelt_mode = GET_MODE_NUNITS (mode);
31840 gcc_assert (length >= nelt_mode);
31841 /* Skip if it isn't profitable. */
31842 if (!arm_block_set_vect_profit_p (length, align, mode))
31843 return false;
31845 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31846 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31848 v = sext_hwi (v, BITS_PER_WORD);
31849 val_elt = GEN_INT (v);
31850 for (j = 0; j < nelt_mode; j++)
31851 rval[j] = val_elt;
31853 reg = gen_reg_rtx (mode);
31854 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31855 /* Emit instruction loading the constant value. */
31856 emit_move_insn (reg, val_vec);
31858 /* Handle nelt_mode bytes in a vector. */
31859 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31861 emit_insn ((*gen_func) (mem, reg));
31862 if (i + 2 * nelt_mode <= length)
31863 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31866 /* If there are not less than nelt_v8 bytes leftover, we must be in
31867 V16QI mode. */
31868 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31870 /* Handle (8, 16) bytes leftover. */
31871 if (i + nelt_v8 < length)
31873 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31874 /* We are shifting bytes back, set the alignment accordingly. */
31875 if ((length & 1) != 0 && align >= 2)
31876 set_mem_align (mem, BITS_PER_UNIT);
31878 emit_insn (gen_movmisalignv16qi (mem, reg));
31880 /* Handle (0, 8] bytes leftover. */
31881 else if (i < length && i + nelt_v8 >= length)
31883 if (mode == V16QImode)
31885 reg = gen_lowpart (V8QImode, reg);
31886 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31888 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31889 + (nelt_mode - nelt_v8))));
31890 /* We are shifting bytes back, set the alignment accordingly. */
31891 if ((length & 1) != 0 && align >= 2)
31892 set_mem_align (mem, BITS_PER_UNIT);
31894 emit_insn (gen_movmisalignv8qi (mem, reg));
31897 return true;
31900 /* Set a block of memory using vectorization instructions for the
31901 aligned case. We fill the first LENGTH bytes of the memory area
31902 starting from DSTBASE with byte constant VALUE. ALIGN is the
31903 alignment requirement of memory. Return TRUE if succeeded. */
31904 static bool
31905 arm_block_set_aligned_vect (rtx dstbase,
31906 unsigned HOST_WIDE_INT length,
31907 unsigned HOST_WIDE_INT value,
31908 unsigned HOST_WIDE_INT align)
31910 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31911 rtx dst, addr, mem;
31912 rtx val_elt, val_vec, reg;
31913 rtx rval[MAX_VECT_LEN];
31914 machine_mode mode;
31915 unsigned HOST_WIDE_INT v = value;
31917 gcc_assert ((align & 0x3) == 0);
31918 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31919 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31920 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31921 mode = V16QImode;
31922 else
31923 mode = V8QImode;
31925 nelt_mode = GET_MODE_NUNITS (mode);
31926 gcc_assert (length >= nelt_mode);
31927 /* Skip if it isn't profitable. */
31928 if (!arm_block_set_vect_profit_p (length, align, mode))
31929 return false;
31931 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31933 v = sext_hwi (v, BITS_PER_WORD);
31934 val_elt = GEN_INT (v);
31935 for (j = 0; j < nelt_mode; j++)
31936 rval[j] = val_elt;
31938 reg = gen_reg_rtx (mode);
31939 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31940 /* Emit instruction loading the constant value. */
31941 emit_move_insn (reg, val_vec);
31943 i = 0;
31944 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31945 if (mode == V16QImode)
31947 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31948 emit_insn (gen_movmisalignv16qi (mem, reg));
31949 i += nelt_mode;
31950 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31951 if (i + nelt_v8 < length && i + nelt_v16 > length)
31953 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31954 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31955 /* We are shifting bytes back, set the alignment accordingly. */
31956 if ((length & 0x3) == 0)
31957 set_mem_align (mem, BITS_PER_UNIT * 4);
31958 else if ((length & 0x1) == 0)
31959 set_mem_align (mem, BITS_PER_UNIT * 2);
31960 else
31961 set_mem_align (mem, BITS_PER_UNIT);
31963 emit_insn (gen_movmisalignv16qi (mem, reg));
31964 return true;
31966 /* Fall through for bytes leftover. */
31967 mode = V8QImode;
31968 nelt_mode = GET_MODE_NUNITS (mode);
31969 reg = gen_lowpart (V8QImode, reg);
31972 /* Handle 8 bytes in a vector. */
31973 for (; (i + nelt_mode <= length); i += nelt_mode)
31975 addr = plus_constant (Pmode, dst, i);
31976 mem = adjust_automodify_address (dstbase, mode, addr, i);
31977 emit_move_insn (mem, reg);
31980 /* Handle single word leftover by shifting 4 bytes back. We can
31981 use aligned access for this case. */
31982 if (i + UNITS_PER_WORD == length)
31984 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31985 mem = adjust_automodify_address (dstbase, mode,
31986 addr, i - UNITS_PER_WORD);
31987 /* We are shifting 4 bytes back, set the alignment accordingly. */
31988 if (align > UNITS_PER_WORD)
31989 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31991 emit_move_insn (mem, reg);
31993 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31994 We have to use unaligned access for this case. */
31995 else if (i < length)
31997 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31998 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31999 /* We are shifting bytes back, set the alignment accordingly. */
32000 if ((length & 1) == 0)
32001 set_mem_align (mem, BITS_PER_UNIT * 2);
32002 else
32003 set_mem_align (mem, BITS_PER_UNIT);
32005 emit_insn (gen_movmisalignv8qi (mem, reg));
32008 return true;
32011 /* Set a block of memory using plain strh/strb instructions, only
32012 using instructions allowed by ALIGN on processor. We fill the
32013 first LENGTH bytes of the memory area starting from DSTBASE
32014 with byte constant VALUE. ALIGN is the alignment requirement
32015 of memory. */
32016 static bool
32017 arm_block_set_unaligned_non_vect (rtx dstbase,
32018 unsigned HOST_WIDE_INT length,
32019 unsigned HOST_WIDE_INT value,
32020 unsigned HOST_WIDE_INT align)
32022 unsigned int i;
32023 rtx dst, addr, mem;
32024 rtx val_exp, val_reg, reg;
32025 machine_mode mode;
32026 HOST_WIDE_INT v = value;
32028 gcc_assert (align == 1 || align == 2);
32030 if (align == 2)
32031 v |= (value << BITS_PER_UNIT);
32033 v = sext_hwi (v, BITS_PER_WORD);
32034 val_exp = GEN_INT (v);
32035 /* Skip if it isn't profitable. */
32036 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32037 align, true, false))
32038 return false;
32040 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32041 mode = (align == 2 ? HImode : QImode);
32042 val_reg = force_reg (SImode, val_exp);
32043 reg = gen_lowpart (mode, val_reg);
32045 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32047 addr = plus_constant (Pmode, dst, i);
32048 mem = adjust_automodify_address (dstbase, mode, addr, i);
32049 emit_move_insn (mem, reg);
32052 /* Handle single byte leftover. */
32053 if (i + 1 == length)
32055 reg = gen_lowpart (QImode, val_reg);
32056 addr = plus_constant (Pmode, dst, i);
32057 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32058 emit_move_insn (mem, reg);
32059 i++;
32062 gcc_assert (i == length);
32063 return true;
32066 /* Set a block of memory using plain strd/str/strh/strb instructions,
32067 to permit unaligned copies on processors which support unaligned
32068 semantics for those instructions. We fill the first LENGTH bytes
32069 of the memory area starting from DSTBASE with byte constant VALUE.
32070 ALIGN is the alignment requirement of memory. */
32071 static bool
32072 arm_block_set_aligned_non_vect (rtx dstbase,
32073 unsigned HOST_WIDE_INT length,
32074 unsigned HOST_WIDE_INT value,
32075 unsigned HOST_WIDE_INT align)
32077 unsigned int i;
32078 rtx dst, addr, mem;
32079 rtx val_exp, val_reg, reg;
32080 unsigned HOST_WIDE_INT v;
32081 bool use_strd_p;
32083 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32084 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32086 v = (value | (value << 8) | (value << 16) | (value << 24));
32087 if (length < UNITS_PER_WORD)
32088 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32090 if (use_strd_p)
32091 v |= (v << BITS_PER_WORD);
32092 else
32093 v = sext_hwi (v, BITS_PER_WORD);
32095 val_exp = GEN_INT (v);
32096 /* Skip if it isn't profitable. */
32097 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32098 align, false, use_strd_p))
32100 if (!use_strd_p)
32101 return false;
32103 /* Try without strd. */
32104 v = (v >> BITS_PER_WORD);
32105 v = sext_hwi (v, BITS_PER_WORD);
32106 val_exp = GEN_INT (v);
32107 use_strd_p = false;
32108 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32109 align, false, use_strd_p))
32110 return false;
32113 i = 0;
32114 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32115 /* Handle double words using strd if possible. */
32116 if (use_strd_p)
32118 val_reg = force_reg (DImode, val_exp);
32119 reg = val_reg;
32120 for (; (i + 8 <= length); i += 8)
32122 addr = plus_constant (Pmode, dst, i);
32123 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32124 emit_move_insn (mem, reg);
32127 else
32128 val_reg = force_reg (SImode, val_exp);
32130 /* Handle words. */
32131 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32132 for (; (i + 4 <= length); i += 4)
32134 addr = plus_constant (Pmode, dst, i);
32135 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32136 if ((align & 3) == 0)
32137 emit_move_insn (mem, reg);
32138 else
32139 emit_insn (gen_unaligned_storesi (mem, reg));
32142 /* Merge last pair of STRH and STRB into a STR if possible. */
32143 if (unaligned_access && i > 0 && (i + 3) == length)
32145 addr = plus_constant (Pmode, dst, i - 1);
32146 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32147 /* We are shifting one byte back, set the alignment accordingly. */
32148 if ((align & 1) == 0)
32149 set_mem_align (mem, BITS_PER_UNIT);
32151 /* Most likely this is an unaligned access, and we can't tell at
32152 compilation time. */
32153 emit_insn (gen_unaligned_storesi (mem, reg));
32154 return true;
32157 /* Handle half word leftover. */
32158 if (i + 2 <= length)
32160 reg = gen_lowpart (HImode, val_reg);
32161 addr = plus_constant (Pmode, dst, i);
32162 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32163 if ((align & 1) == 0)
32164 emit_move_insn (mem, reg);
32165 else
32166 emit_insn (gen_unaligned_storehi (mem, reg));
32168 i += 2;
32171 /* Handle single byte leftover. */
32172 if (i + 1 == length)
32174 reg = gen_lowpart (QImode, val_reg);
32175 addr = plus_constant (Pmode, dst, i);
32176 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32177 emit_move_insn (mem, reg);
32180 return true;
32183 /* Set a block of memory using vectorization instructions for both
32184 aligned and unaligned cases. We fill the first LENGTH bytes of
32185 the memory area starting from DSTBASE with byte constant VALUE.
32186 ALIGN is the alignment requirement of memory. */
32187 static bool
32188 arm_block_set_vect (rtx dstbase,
32189 unsigned HOST_WIDE_INT length,
32190 unsigned HOST_WIDE_INT value,
32191 unsigned HOST_WIDE_INT align)
32193 /* Check whether we need to use unaligned store instruction. */
32194 if (((align & 3) != 0 || (length & 3) != 0)
32195 /* Check whether unaligned store instruction is available. */
32196 && (!unaligned_access || BYTES_BIG_ENDIAN))
32197 return false;
32199 if ((align & 3) == 0)
32200 return arm_block_set_aligned_vect (dstbase, length, value, align);
32201 else
32202 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32205 /* Expand string store operation. Firstly we try to do that by using
32206 vectorization instructions, then try with ARM unaligned access and
32207 double-word store if profitable. OPERANDS[0] is the destination,
32208 OPERANDS[1] is the number of bytes, operands[2] is the value to
32209 initialize the memory, OPERANDS[3] is the known alignment of the
32210 destination. */
32211 bool
32212 arm_gen_setmem (rtx *operands)
32214 rtx dstbase = operands[0];
32215 unsigned HOST_WIDE_INT length;
32216 unsigned HOST_WIDE_INT value;
32217 unsigned HOST_WIDE_INT align;
32219 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32220 return false;
32222 length = UINTVAL (operands[1]);
32223 if (length > 64)
32224 return false;
32226 value = (UINTVAL (operands[2]) & 0xFF);
32227 align = UINTVAL (operands[3]);
32228 if (TARGET_NEON && length >= 8
32229 && current_tune->string_ops_prefer_neon
32230 && arm_block_set_vect (dstbase, length, value, align))
32231 return true;
32233 if (!unaligned_access && (align & 3) != 0)
32234 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32236 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32239 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32241 static unsigned HOST_WIDE_INT
32242 arm_asan_shadow_offset (void)
32244 return (unsigned HOST_WIDE_INT) 1 << 29;
32248 /* This is a temporary fix for PR60655. Ideally we need
32249 to handle most of these cases in the generic part but
32250 currently we reject minus (..) (sym_ref). We try to
32251 ameliorate the case with minus (sym_ref1) (sym_ref2)
32252 where they are in the same section. */
32254 static bool
32255 arm_const_not_ok_for_debug_p (rtx p)
32257 tree decl_op0 = NULL;
32258 tree decl_op1 = NULL;
32260 if (GET_CODE (p) == MINUS)
32262 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32264 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32265 if (decl_op1
32266 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32267 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32269 if ((TREE_CODE (decl_op1) == VAR_DECL
32270 || TREE_CODE (decl_op1) == CONST_DECL)
32271 && (TREE_CODE (decl_op0) == VAR_DECL
32272 || TREE_CODE (decl_op0) == CONST_DECL))
32273 return (get_variable_section (decl_op1, false)
32274 != get_variable_section (decl_op0, false));
32276 if (TREE_CODE (decl_op1) == LABEL_DECL
32277 && TREE_CODE (decl_op0) == LABEL_DECL)
32278 return (DECL_CONTEXT (decl_op1)
32279 != DECL_CONTEXT (decl_op0));
32282 return true;
32286 return false;
32289 static void
32290 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32292 const unsigned ARM_FE_INVALID = 1;
32293 const unsigned ARM_FE_DIVBYZERO = 2;
32294 const unsigned ARM_FE_OVERFLOW = 4;
32295 const unsigned ARM_FE_UNDERFLOW = 8;
32296 const unsigned ARM_FE_INEXACT = 16;
32297 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32298 | ARM_FE_DIVBYZERO
32299 | ARM_FE_OVERFLOW
32300 | ARM_FE_UNDERFLOW
32301 | ARM_FE_INEXACT);
32302 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32303 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32304 tree new_fenv_var, reload_fenv, restore_fnenv;
32305 tree update_call, atomic_feraiseexcept, hold_fnclex;
32307 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32308 return;
32310 /* Generate the equivalent of :
32311 unsigned int fenv_var;
32312 fenv_var = __builtin_arm_get_fpscr ();
32314 unsigned int masked_fenv;
32315 masked_fenv = fenv_var & mask;
32317 __builtin_arm_set_fpscr (masked_fenv); */
32319 fenv_var = create_tmp_var (unsigned_type_node, NULL);
32320 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32321 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32322 mask = build_int_cst (unsigned_type_node,
32323 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32324 | ARM_FE_ALL_EXCEPT));
32325 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32326 fenv_var, build_call_expr (get_fpscr, 0));
32327 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32328 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32329 *hold = build2 (COMPOUND_EXPR, void_type_node,
32330 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32331 hold_fnclex);
32333 /* Store the value of masked_fenv to clear the exceptions:
32334 __builtin_arm_set_fpscr (masked_fenv); */
32336 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32338 /* Generate the equivalent of :
32339 unsigned int new_fenv_var;
32340 new_fenv_var = __builtin_arm_get_fpscr ();
32342 __builtin_arm_set_fpscr (fenv_var);
32344 __atomic_feraiseexcept (new_fenv_var); */
32346 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32347 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32348 build_call_expr (get_fpscr, 0));
32349 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32350 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32351 update_call = build_call_expr (atomic_feraiseexcept, 1,
32352 fold_convert (integer_type_node, new_fenv_var));
32353 *update = build2 (COMPOUND_EXPR, void_type_node,
32354 build2 (COMPOUND_EXPR, void_type_node,
32355 reload_fenv, restore_fnenv), update_call);
32358 /* return TRUE if x is a reference to a value in a constant pool */
32359 extern bool
32360 arm_is_constant_pool_ref (rtx x)
32362 return (MEM_P (x)
32363 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32364 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32367 /* If MEM is in the form of [base+offset], extract the two parts
32368 of address and set to BASE and OFFSET, otherwise return false
32369 after clearing BASE and OFFSET. */
32371 static bool
32372 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
32374 rtx addr;
32376 gcc_assert (MEM_P (mem));
32378 addr = XEXP (mem, 0);
32380 /* Strip off const from addresses like (const (addr)). */
32381 if (GET_CODE (addr) == CONST)
32382 addr = XEXP (addr, 0);
32384 if (GET_CODE (addr) == REG)
32386 *base = addr;
32387 *offset = const0_rtx;
32388 return true;
32391 if (GET_CODE (addr) == PLUS
32392 && GET_CODE (XEXP (addr, 0)) == REG
32393 && CONST_INT_P (XEXP (addr, 1)))
32395 *base = XEXP (addr, 0);
32396 *offset = XEXP (addr, 1);
32397 return true;
32400 *base = NULL_RTX;
32401 *offset = NULL_RTX;
32403 return false;
32406 /* If INSN is a load or store of address in the form of [base+offset],
32407 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
32408 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
32409 otherwise return FALSE. */
32411 static bool
32412 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
32414 rtx x, dest, src;
32416 gcc_assert (INSN_P (insn));
32417 x = PATTERN (insn);
32418 if (GET_CODE (x) != SET)
32419 return false;
32421 src = SET_SRC (x);
32422 dest = SET_DEST (x);
32423 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
32425 *is_load = false;
32426 extract_base_offset_in_addr (dest, base, offset);
32428 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
32430 *is_load = true;
32431 extract_base_offset_in_addr (src, base, offset);
32433 else
32434 return false;
32436 return (*base != NULL_RTX && *offset != NULL_RTX);
32439 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
32441 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
32442 and PRI are only calculated for these instructions. For other instruction,
32443 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
32444 instruction fusion can be supported by returning different priorities.
32446 It's important that irrelevant instructions get the largest FUSION_PRI. */
32448 static void
32449 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
32450 int *fusion_pri, int *pri)
32452 int tmp, off_val;
32453 bool is_load;
32454 rtx base, offset;
32456 gcc_assert (INSN_P (insn));
32458 tmp = max_pri - 1;
32459 if (!fusion_load_store (insn, &base, &offset, &is_load))
32461 *pri = tmp;
32462 *fusion_pri = tmp;
32463 return;
32466 /* Load goes first. */
32467 if (is_load)
32468 *fusion_pri = tmp - 1;
32469 else
32470 *fusion_pri = tmp - 2;
32472 tmp /= 2;
32474 /* INSN with smaller base register goes first. */
32475 tmp -= ((REGNO (base) & 0xff) << 20);
32477 /* INSN with smaller offset goes first. */
32478 off_val = (int)(INTVAL (offset));
32479 if (off_val >= 0)
32480 tmp -= (off_val & 0xfffff);
32481 else
32482 tmp += ((- off_val) & 0xfffff);
32484 *pri = tmp;
32485 return;
32487 #include "gt-arm.h"