[Patch ARM Refactor Builtins 2/8] Move Processor flags to arm-protos.h
[official-gcc.git] / gcc / config / arm / arm.c
blob9aa978be7e1020151cd100a9e00be955a610ec11
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
91 void (*arm_lang_output_object_attributes_hook)(void);
93 struct four_ints
95 int i[4];
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
121 static void arm_print_operand (FILE *, rtx, int);
122 static void arm_print_operand_address (FILE *, rtx);
123 static bool arm_print_operand_punct_valid_p (unsigned char code);
124 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
125 static arm_cc get_arm_condition_code (rtx);
126 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
127 static const char *output_multi_immediate (rtx *, const char *, const char *,
128 int, HOST_WIDE_INT);
129 static const char *shift_op (rtx, HOST_WIDE_INT *);
130 static struct machine_function *arm_init_machine_status (void);
131 static void thumb_exit (FILE *, int);
132 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
133 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
134 static Mnode *add_minipool_forward_ref (Mfix *);
135 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
136 static Mnode *add_minipool_backward_ref (Mfix *);
137 static void assign_minipool_offsets (Mfix *);
138 static void arm_print_value (FILE *, rtx);
139 static void dump_minipool (rtx_insn *);
140 static int arm_barrier_cost (rtx);
141 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
142 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
143 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
144 machine_mode, rtx);
145 static void arm_reorg (void);
146 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
148 static unsigned long arm_compute_save_reg_mask (void);
149 static unsigned long arm_isr_value (tree);
150 static unsigned long arm_compute_func_type (void);
151 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
153 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
156 #endif
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
159 static int arm_comp_type_attributes (const_tree, const_tree);
160 static void arm_set_default_type_attributes (tree);
161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence);
166 static int optimal_immediate_sequence_1 (enum rtx_code code,
167 unsigned HOST_WIDE_INT val,
168 struct four_ints *return_sequence,
169 int i);
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree, tree);
172 static machine_mode arm_promote_function_mode (const_tree,
173 machine_mode, int *,
174 const_tree, int);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 static rtx arm_function_value (const_tree, const_tree, bool);
177 static rtx arm_libcall_value_1 (machine_mode);
178 static rtx arm_libcall_value (machine_mode, const_rtx);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
182 tree);
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
185 static bool arm_legitimate_constant_p (machine_mode, rtx);
186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
196 static void arm_init_builtins (void);
197 static void arm_init_iwmmxt_builtins (void);
198 static rtx safe_vector_operand (rtx, machine_mode);
199 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
200 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
201 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
202 static tree arm_builtin_decl (unsigned, bool);
203 static void emit_constant_insn (rtx cond, rtx pattern);
204 static rtx_insn *emit_set_insn (rtx, rtx);
205 static rtx emit_multi_reg_push (unsigned long, unsigned long);
206 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
207 tree, bool);
208 static rtx arm_function_arg (cumulative_args_t, machine_mode,
209 const_tree, bool);
210 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
211 const_tree, bool);
212 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
213 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
214 const_tree);
215 static rtx aapcs_libcall_value (machine_mode);
216 static int aapcs_select_return_coproc (const_tree, const_tree);
218 #ifdef OBJECT_FORMAT_ELF
219 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
220 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
221 #endif
222 #ifndef ARM_PE
223 static void arm_encode_section_info (tree, rtx, int);
224 #endif
226 static void arm_file_end (void);
227 static void arm_file_start (void);
229 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
230 tree, int *, int);
231 static bool arm_pass_by_reference (cumulative_args_t,
232 machine_mode, const_tree, bool);
233 static bool arm_promote_prototypes (const_tree);
234 static bool arm_default_short_enums (void);
235 static bool arm_align_anon_bitfield (void);
236 static bool arm_return_in_msb (const_tree);
237 static bool arm_must_pass_in_stack (machine_mode, const_tree);
238 static bool arm_return_in_memory (const_tree, const_tree);
239 #if ARM_UNWIND_INFO
240 static void arm_unwind_emit (FILE *, rtx_insn *);
241 static bool arm_output_ttype (rtx);
242 static void arm_asm_emit_except_personality (rtx);
243 static void arm_asm_init_sections (void);
244 #endif
245 static rtx arm_dwarf_register_span (rtx);
247 static tree arm_cxx_guard_type (void);
248 static bool arm_cxx_guard_mask_bit (void);
249 static tree arm_get_cookie_size (tree);
250 static bool arm_cookie_has_size (void);
251 static bool arm_cxx_cdtor_returns_this (void);
252 static bool arm_cxx_key_method_may_be_inline (void);
253 static void arm_cxx_determine_class_data_visibility (tree);
254 static bool arm_cxx_class_data_always_comdat (void);
255 static bool arm_cxx_use_aeabi_atexit (void);
256 static void arm_init_libfuncs (void);
257 static tree arm_build_builtin_va_list (void);
258 static void arm_expand_builtin_va_start (tree, rtx);
259 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static void arm_option_override (void);
261 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
262 static bool arm_cannot_copy_insn_p (rtx_insn *);
263 static int arm_issue_rate (void);
264 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
265 static bool arm_output_addr_const_extra (FILE *, rtx);
266 static bool arm_allocate_stack_slots_for_args (void);
267 static bool arm_warn_func_return (tree);
268 static const char *arm_invalid_parameter_type (const_tree t);
269 static const char *arm_invalid_return_type (const_tree t);
270 static tree arm_promoted_type (const_tree t);
271 static tree arm_convert_to_type (tree type, tree expr);
272 static bool arm_scalar_mode_supported_p (machine_mode);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx, tree, rtx);
277 static rtx arm_trampoline_adjust_address (rtx);
278 static rtx arm_pic_static_addr (rtx orig, rtx reg);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (machine_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
294 static unsigned int arm_autovectorize_vector_sizes (void);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
300 const unsigned char *sel);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
317 /* Table of machine attributes. */
318 static const struct attribute_spec arm_attribute_table[] =
320 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
321 affects_type_identity } */
322 /* Function calls made to this symbol must be done indirectly, because
323 it may lie outside of the 26 bit addressing range of a normal function
324 call. */
325 { "long_call", 0, 0, false, true, true, NULL, false },
326 /* Whereas these functions are always known to reside within the 26 bit
327 addressing range. */
328 { "short_call", 0, 0, false, true, true, NULL, false },
329 /* Specify the procedure call conventions for a function. */
330 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
331 false },
332 /* Interrupt Service Routines have special prologue and epilogue requirements. */
333 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
334 false },
335 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
336 false },
337 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
338 false },
339 #ifdef ARM_PE
340 /* ARM/PE has three new attributes:
341 interfacearm - ?
342 dllexport - for exporting a function/variable that will live in a dll
343 dllimport - for importing a function/variable from a dll
345 Microsoft allows multiple declspecs in one __declspec, separating
346 them with spaces. We do NOT support this. Instead, use __declspec
347 multiple times.
349 { "dllimport", 0, 0, true, false, false, NULL, false },
350 { "dllexport", 0, 0, true, false, false, NULL, false },
351 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
352 false },
353 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
354 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
355 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
356 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
357 false },
358 #endif
359 { NULL, 0, 0, false, false, false, NULL, false }
362 /* Initialize the GCC target structure. */
363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
364 #undef TARGET_MERGE_DECL_ATTRIBUTES
365 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #endif
368 #undef TARGET_LEGITIMIZE_ADDRESS
369 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
371 #undef TARGET_LRA_P
372 #define TARGET_LRA_P arm_lra_p
374 #undef TARGET_ATTRIBUTE_TABLE
375 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
377 #undef TARGET_ASM_FILE_START
378 #define TARGET_ASM_FILE_START arm_file_start
379 #undef TARGET_ASM_FILE_END
380 #define TARGET_ASM_FILE_END arm_file_end
382 #undef TARGET_ASM_ALIGNED_SI_OP
383 #define TARGET_ASM_ALIGNED_SI_OP NULL
384 #undef TARGET_ASM_INTEGER
385 #define TARGET_ASM_INTEGER arm_assemble_integer
387 #undef TARGET_PRINT_OPERAND
388 #define TARGET_PRINT_OPERAND arm_print_operand
389 #undef TARGET_PRINT_OPERAND_ADDRESS
390 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
391 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
392 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
400 #undef TARGET_ASM_FUNCTION_EPILOGUE
401 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
403 #undef TARGET_OPTION_OVERRIDE
404 #define TARGET_OPTION_OVERRIDE arm_option_override
406 #undef TARGET_COMP_TYPE_ATTRIBUTES
407 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
409 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
410 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
412 #undef TARGET_SCHED_ADJUST_COST
413 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
415 #undef TARGET_SCHED_REORDER
416 #define TARGET_SCHED_REORDER arm_sched_reorder
418 #undef TARGET_REGISTER_MOVE_COST
419 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
421 #undef TARGET_MEMORY_MOVE_COST
422 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
424 #undef TARGET_ENCODE_SECTION_INFO
425 #ifdef ARM_PE
426 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
427 #else
428 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
429 #endif
431 #undef TARGET_STRIP_NAME_ENCODING
432 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
434 #undef TARGET_ASM_INTERNAL_LABEL
435 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
437 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
438 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
440 #undef TARGET_FUNCTION_VALUE
441 #define TARGET_FUNCTION_VALUE arm_function_value
443 #undef TARGET_LIBCALL_VALUE
444 #define TARGET_LIBCALL_VALUE arm_libcall_value
446 #undef TARGET_FUNCTION_VALUE_REGNO_P
447 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
449 #undef TARGET_ASM_OUTPUT_MI_THUNK
450 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
451 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
452 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
454 #undef TARGET_RTX_COSTS
455 #define TARGET_RTX_COSTS arm_rtx_costs
456 #undef TARGET_ADDRESS_COST
457 #define TARGET_ADDRESS_COST arm_address_cost
459 #undef TARGET_SHIFT_TRUNCATION_MASK
460 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
461 #undef TARGET_VECTOR_MODE_SUPPORTED_P
462 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
463 #undef TARGET_ARRAY_MODE_SUPPORTED_P
464 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
465 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
466 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
467 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
468 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
469 arm_autovectorize_vector_sizes
471 #undef TARGET_MACHINE_DEPENDENT_REORG
472 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
474 #undef TARGET_INIT_BUILTINS
475 #define TARGET_INIT_BUILTINS arm_init_builtins
476 #undef TARGET_EXPAND_BUILTIN
477 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
478 #undef TARGET_BUILTIN_DECL
479 #define TARGET_BUILTIN_DECL arm_builtin_decl
481 #undef TARGET_INIT_LIBFUNCS
482 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
484 #undef TARGET_PROMOTE_FUNCTION_MODE
485 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
486 #undef TARGET_PROMOTE_PROTOTYPES
487 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
488 #undef TARGET_PASS_BY_REFERENCE
489 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
490 #undef TARGET_ARG_PARTIAL_BYTES
491 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
492 #undef TARGET_FUNCTION_ARG
493 #define TARGET_FUNCTION_ARG arm_function_arg
494 #undef TARGET_FUNCTION_ARG_ADVANCE
495 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
496 #undef TARGET_FUNCTION_ARG_BOUNDARY
497 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
499 #undef TARGET_SETUP_INCOMING_VARARGS
500 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
502 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
503 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
505 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
506 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
507 #undef TARGET_TRAMPOLINE_INIT
508 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
509 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
510 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
512 #undef TARGET_WARN_FUNC_RETURN
513 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
515 #undef TARGET_DEFAULT_SHORT_ENUMS
516 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
518 #undef TARGET_ALIGN_ANON_BITFIELD
519 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
521 #undef TARGET_NARROW_VOLATILE_BITFIELD
522 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
524 #undef TARGET_CXX_GUARD_TYPE
525 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
527 #undef TARGET_CXX_GUARD_MASK_BIT
528 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
530 #undef TARGET_CXX_GET_COOKIE_SIZE
531 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
533 #undef TARGET_CXX_COOKIE_HAS_SIZE
534 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
536 #undef TARGET_CXX_CDTOR_RETURNS_THIS
537 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
539 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
540 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
542 #undef TARGET_CXX_USE_AEABI_ATEXIT
543 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
545 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
546 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
547 arm_cxx_determine_class_data_visibility
549 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
550 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
552 #undef TARGET_RETURN_IN_MSB
553 #define TARGET_RETURN_IN_MSB arm_return_in_msb
555 #undef TARGET_RETURN_IN_MEMORY
556 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
558 #undef TARGET_MUST_PASS_IN_STACK
559 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
561 #if ARM_UNWIND_INFO
562 #undef TARGET_ASM_UNWIND_EMIT
563 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
565 /* EABI unwinding tables use a different format for the typeinfo tables. */
566 #undef TARGET_ASM_TTYPE
567 #define TARGET_ASM_TTYPE arm_output_ttype
569 #undef TARGET_ARM_EABI_UNWINDER
570 #define TARGET_ARM_EABI_UNWINDER true
572 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
573 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
575 #undef TARGET_ASM_INIT_SECTIONS
576 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
577 #endif /* ARM_UNWIND_INFO */
579 #undef TARGET_DWARF_REGISTER_SPAN
580 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
582 #undef TARGET_CANNOT_COPY_INSN_P
583 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
585 #ifdef HAVE_AS_TLS
586 #undef TARGET_HAVE_TLS
587 #define TARGET_HAVE_TLS true
588 #endif
590 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
591 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
593 #undef TARGET_LEGITIMATE_CONSTANT_P
594 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
596 #undef TARGET_CANNOT_FORCE_CONST_MEM
597 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
599 #undef TARGET_MAX_ANCHOR_OFFSET
600 #define TARGET_MAX_ANCHOR_OFFSET 4095
602 /* The minimum is set such that the total size of the block
603 for a particular anchor is -4088 + 1 + 4095 bytes, which is
604 divisible by eight, ensuring natural spacing of anchors. */
605 #undef TARGET_MIN_ANCHOR_OFFSET
606 #define TARGET_MIN_ANCHOR_OFFSET -4088
608 #undef TARGET_SCHED_ISSUE_RATE
609 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
611 #undef TARGET_MANGLE_TYPE
612 #define TARGET_MANGLE_TYPE arm_mangle_type
614 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
615 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
617 #undef TARGET_BUILD_BUILTIN_VA_LIST
618 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
619 #undef TARGET_EXPAND_BUILTIN_VA_START
620 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
621 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
622 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
624 #ifdef HAVE_AS_TLS
625 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
626 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
627 #endif
629 #undef TARGET_LEGITIMATE_ADDRESS_P
630 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
632 #undef TARGET_PREFERRED_RELOAD_CLASS
633 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
635 #undef TARGET_INVALID_PARAMETER_TYPE
636 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
638 #undef TARGET_INVALID_RETURN_TYPE
639 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
641 #undef TARGET_PROMOTED_TYPE
642 #define TARGET_PROMOTED_TYPE arm_promoted_type
644 #undef TARGET_CONVERT_TO_TYPE
645 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
647 #undef TARGET_SCALAR_MODE_SUPPORTED_P
648 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
650 #undef TARGET_FRAME_POINTER_REQUIRED
651 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
653 #undef TARGET_CAN_ELIMINATE
654 #define TARGET_CAN_ELIMINATE arm_can_eliminate
656 #undef TARGET_CONDITIONAL_REGISTER_USAGE
657 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
659 #undef TARGET_CLASS_LIKELY_SPILLED_P
660 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
662 #undef TARGET_VECTORIZE_BUILTINS
663 #define TARGET_VECTORIZE_BUILTINS
665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
667 arm_builtin_vectorized_function
669 #undef TARGET_VECTOR_ALIGNMENT
670 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
672 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
673 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
674 arm_vector_alignment_reachable
676 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
677 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
678 arm_builtin_support_vector_misalignment
680 #undef TARGET_PREFERRED_RENAME_CLASS
681 #define TARGET_PREFERRED_RENAME_CLASS \
682 arm_preferred_rename_class
684 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
685 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
686 arm_vectorize_vec_perm_const_ok
688 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
689 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
690 arm_builtin_vectorization_cost
691 #undef TARGET_VECTORIZE_ADD_STMT_COST
692 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
694 #undef TARGET_CANONICALIZE_COMPARISON
695 #define TARGET_CANONICALIZE_COMPARISON \
696 arm_canonicalize_comparison
698 #undef TARGET_ASAN_SHADOW_OFFSET
699 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
701 #undef MAX_INSN_PER_IT_BLOCK
702 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
704 #undef TARGET_CAN_USE_DOLOOP_P
705 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
707 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
708 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
710 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
711 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
713 #undef TARGET_SCHED_FUSION_PRIORITY
714 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
716 struct gcc_target targetm = TARGET_INITIALIZER;
718 /* Obstack for minipool constant handling. */
719 static struct obstack minipool_obstack;
720 static char * minipool_startobj;
722 /* The maximum number of insns skipped which
723 will be conditionalised if possible. */
724 static int max_insns_skipped = 5;
726 extern FILE * asm_out_file;
728 /* True if we are currently building a constant table. */
729 int making_const_table;
731 /* The processor for which instructions should be scheduled. */
732 enum processor_type arm_tune = arm_none;
734 /* The current tuning set. */
735 const struct tune_params *current_tune;
737 /* Which floating point hardware to schedule for. */
738 int arm_fpu_attr;
740 /* Which floating popint hardware to use. */
741 const struct arm_fpu_desc *arm_fpu_desc;
743 /* Used for Thumb call_via trampolines. */
744 rtx thumb_call_via_label[14];
745 static int thumb_call_reg_needed;
747 /* The bits in this mask specify which
748 instructions we are allowed to generate. */
749 unsigned long insn_flags = 0;
751 /* The bits in this mask specify which instruction scheduling options should
752 be used. */
753 unsigned long tune_flags = 0;
755 /* The highest ARM architecture version supported by the
756 target. */
757 enum base_architecture arm_base_arch = BASE_ARCH_0;
759 /* The following are used in the arm.md file as equivalents to bits
760 in the above two flag variables. */
762 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
763 int arm_arch3m = 0;
765 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
766 int arm_arch4 = 0;
768 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
769 int arm_arch4t = 0;
771 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
772 int arm_arch5 = 0;
774 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
775 int arm_arch5e = 0;
777 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
778 int arm_arch6 = 0;
780 /* Nonzero if this chip supports the ARM 6K extensions. */
781 int arm_arch6k = 0;
783 /* Nonzero if instructions present in ARMv6-M can be used. */
784 int arm_arch6m = 0;
786 /* Nonzero if this chip supports the ARM 7 extensions. */
787 int arm_arch7 = 0;
789 /* Nonzero if instructions not present in the 'M' profile can be used. */
790 int arm_arch_notm = 0;
792 /* Nonzero if instructions present in ARMv7E-M can be used. */
793 int arm_arch7em = 0;
795 /* Nonzero if instructions present in ARMv8 can be used. */
796 int arm_arch8 = 0;
798 /* Nonzero if this chip can benefit from load scheduling. */
799 int arm_ld_sched = 0;
801 /* Nonzero if this chip is a StrongARM. */
802 int arm_tune_strongarm = 0;
804 /* Nonzero if this chip supports Intel Wireless MMX technology. */
805 int arm_arch_iwmmxt = 0;
807 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
808 int arm_arch_iwmmxt2 = 0;
810 /* Nonzero if this chip is an XScale. */
811 int arm_arch_xscale = 0;
813 /* Nonzero if tuning for XScale */
814 int arm_tune_xscale = 0;
816 /* Nonzero if we want to tune for stores that access the write-buffer.
817 This typically means an ARM6 or ARM7 with MMU or MPU. */
818 int arm_tune_wbuf = 0;
820 /* Nonzero if tuning for Cortex-A9. */
821 int arm_tune_cortex_a9 = 0;
823 /* Nonzero if generating Thumb instructions. */
824 int thumb_code = 0;
826 /* Nonzero if generating Thumb-1 instructions. */
827 int thumb1_code = 0;
829 /* Nonzero if we should define __THUMB_INTERWORK__ in the
830 preprocessor.
831 XXX This is a bit of a hack, it's intended to help work around
832 problems in GLD which doesn't understand that armv5t code is
833 interworking clean. */
834 int arm_cpp_interwork = 0;
836 /* Nonzero if chip supports Thumb 2. */
837 int arm_arch_thumb2;
839 /* Nonzero if chip supports integer division instruction. */
840 int arm_arch_arm_hwdiv;
841 int arm_arch_thumb_hwdiv;
843 /* Nonzero if we should use Neon to handle 64-bits operations rather
844 than core registers. */
845 int prefer_neon_for_64bits = 0;
847 /* Nonzero if we shouldn't use literal pools. */
848 bool arm_disable_literal_pool = false;
850 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
851 we must report the mode of the memory reference from
852 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
853 machine_mode output_memory_reference_mode;
855 /* The register number to be used for the PIC offset register. */
856 unsigned arm_pic_register = INVALID_REGNUM;
858 enum arm_pcs arm_pcs_default;
860 /* For an explanation of these variables, see final_prescan_insn below. */
861 int arm_ccfsm_state;
862 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
863 enum arm_cond_code arm_current_cc;
865 rtx arm_target_insn;
866 int arm_target_label;
867 /* The number of conditionally executed insns, including the current insn. */
868 int arm_condexec_count = 0;
869 /* A bitmask specifying the patterns for the IT block.
870 Zero means do not output an IT block before this insn. */
871 int arm_condexec_mask = 0;
872 /* The number of bits used in arm_condexec_mask. */
873 int arm_condexec_masklen = 0;
875 /* Nonzero if chip supports the ARMv8 CRC instructions. */
876 int arm_arch_crc = 0;
878 /* Nonzero if the core has a very small, high-latency, multiply unit. */
879 int arm_m_profile_small_mul = 0;
881 /* The condition codes of the ARM, and the inverse function. */
882 static const char * const arm_condition_codes[] =
884 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
885 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
888 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
889 int arm_regs_in_sequence[] =
891 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
894 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
895 #define streq(string1, string2) (strcmp (string1, string2) == 0)
897 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
898 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
899 | (1 << PIC_OFFSET_TABLE_REGNUM)))
901 /* Initialization code. */
903 struct processors
905 const char *const name;
906 enum processor_type core;
907 const char *arch;
908 enum base_architecture base_arch;
909 const unsigned long flags;
910 const struct tune_params *const tune;
914 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
915 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
916 prefetch_slots, \
917 l1_size, \
918 l1_line_size
920 /* arm generic vectorizer costs. */
921 static const
922 struct cpu_vec_costs arm_default_vec_cost = {
923 1, /* scalar_stmt_cost. */
924 1, /* scalar load_cost. */
925 1, /* scalar_store_cost. */
926 1, /* vec_stmt_cost. */
927 1, /* vec_to_scalar_cost. */
928 1, /* scalar_to_vec_cost. */
929 1, /* vec_align_load_cost. */
930 1, /* vec_unalign_load_cost. */
931 1, /* vec_unalign_store_cost. */
932 1, /* vec_store_cost. */
933 3, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
938 #include "aarch-cost-tables.h"
942 const struct cpu_cost_table cortexa9_extra_costs =
944 /* ALU */
946 0, /* arith. */
947 0, /* logical. */
948 0, /* shift. */
949 COSTS_N_INSNS (1), /* shift_reg. */
950 COSTS_N_INSNS (1), /* arith_shift. */
951 COSTS_N_INSNS (2), /* arith_shift_reg. */
952 0, /* log_shift. */
953 COSTS_N_INSNS (1), /* log_shift_reg. */
954 COSTS_N_INSNS (1), /* extend. */
955 COSTS_N_INSNS (2), /* extend_arith. */
956 COSTS_N_INSNS (1), /* bfi. */
957 COSTS_N_INSNS (1), /* bfx. */
958 0, /* clz. */
959 0, /* rev. */
960 0, /* non_exec. */
961 true /* non_exec_costs_exec. */
964 /* MULT SImode */
966 COSTS_N_INSNS (3), /* simple. */
967 COSTS_N_INSNS (3), /* flag_setting. */
968 COSTS_N_INSNS (2), /* extend. */
969 COSTS_N_INSNS (3), /* add. */
970 COSTS_N_INSNS (2), /* extend_add. */
971 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
973 /* MULT DImode */
975 0, /* simple (N/A). */
976 0, /* flag_setting (N/A). */
977 COSTS_N_INSNS (4), /* extend. */
978 0, /* add (N/A). */
979 COSTS_N_INSNS (4), /* extend_add. */
980 0 /* idiv (N/A). */
983 /* LD/ST */
985 COSTS_N_INSNS (2), /* load. */
986 COSTS_N_INSNS (2), /* load_sign_extend. */
987 COSTS_N_INSNS (2), /* ldrd. */
988 COSTS_N_INSNS (2), /* ldm_1st. */
989 1, /* ldm_regs_per_insn_1st. */
990 2, /* ldm_regs_per_insn_subsequent. */
991 COSTS_N_INSNS (5), /* loadf. */
992 COSTS_N_INSNS (5), /* loadd. */
993 COSTS_N_INSNS (1), /* load_unaligned. */
994 COSTS_N_INSNS (2), /* store. */
995 COSTS_N_INSNS (2), /* strd. */
996 COSTS_N_INSNS (2), /* stm_1st. */
997 1, /* stm_regs_per_insn_1st. */
998 2, /* stm_regs_per_insn_subsequent. */
999 COSTS_N_INSNS (1), /* storef. */
1000 COSTS_N_INSNS (1), /* stored. */
1001 COSTS_N_INSNS (1) /* store_unaligned. */
1004 /* FP SFmode */
1006 COSTS_N_INSNS (14), /* div. */
1007 COSTS_N_INSNS (4), /* mult. */
1008 COSTS_N_INSNS (7), /* mult_addsub. */
1009 COSTS_N_INSNS (30), /* fma. */
1010 COSTS_N_INSNS (3), /* addsub. */
1011 COSTS_N_INSNS (1), /* fpconst. */
1012 COSTS_N_INSNS (1), /* neg. */
1013 COSTS_N_INSNS (3), /* compare. */
1014 COSTS_N_INSNS (3), /* widen. */
1015 COSTS_N_INSNS (3), /* narrow. */
1016 COSTS_N_INSNS (3), /* toint. */
1017 COSTS_N_INSNS (3), /* fromint. */
1018 COSTS_N_INSNS (3) /* roundint. */
1020 /* FP DFmode */
1022 COSTS_N_INSNS (24), /* div. */
1023 COSTS_N_INSNS (5), /* mult. */
1024 COSTS_N_INSNS (8), /* mult_addsub. */
1025 COSTS_N_INSNS (30), /* fma. */
1026 COSTS_N_INSNS (3), /* addsub. */
1027 COSTS_N_INSNS (1), /* fpconst. */
1028 COSTS_N_INSNS (1), /* neg. */
1029 COSTS_N_INSNS (3), /* compare. */
1030 COSTS_N_INSNS (3), /* widen. */
1031 COSTS_N_INSNS (3), /* narrow. */
1032 COSTS_N_INSNS (3), /* toint. */
1033 COSTS_N_INSNS (3), /* fromint. */
1034 COSTS_N_INSNS (3) /* roundint. */
1037 /* Vector */
1039 COSTS_N_INSNS (1) /* alu. */
1043 const struct cpu_cost_table cortexa8_extra_costs =
1045 /* ALU */
1047 0, /* arith. */
1048 0, /* logical. */
1049 COSTS_N_INSNS (1), /* shift. */
1050 0, /* shift_reg. */
1051 COSTS_N_INSNS (1), /* arith_shift. */
1052 0, /* arith_shift_reg. */
1053 COSTS_N_INSNS (1), /* log_shift. */
1054 0, /* log_shift_reg. */
1055 0, /* extend. */
1056 0, /* extend_arith. */
1057 0, /* bfi. */
1058 0, /* bfx. */
1059 0, /* clz. */
1060 0, /* rev. */
1061 0, /* non_exec. */
1062 true /* non_exec_costs_exec. */
1065 /* MULT SImode */
1067 COSTS_N_INSNS (1), /* simple. */
1068 COSTS_N_INSNS (1), /* flag_setting. */
1069 COSTS_N_INSNS (1), /* extend. */
1070 COSTS_N_INSNS (1), /* add. */
1071 COSTS_N_INSNS (1), /* extend_add. */
1072 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1074 /* MULT DImode */
1076 0, /* simple (N/A). */
1077 0, /* flag_setting (N/A). */
1078 COSTS_N_INSNS (2), /* extend. */
1079 0, /* add (N/A). */
1080 COSTS_N_INSNS (2), /* extend_add. */
1081 0 /* idiv (N/A). */
1084 /* LD/ST */
1086 COSTS_N_INSNS (1), /* load. */
1087 COSTS_N_INSNS (1), /* load_sign_extend. */
1088 COSTS_N_INSNS (1), /* ldrd. */
1089 COSTS_N_INSNS (1), /* ldm_1st. */
1090 1, /* ldm_regs_per_insn_1st. */
1091 2, /* ldm_regs_per_insn_subsequent. */
1092 COSTS_N_INSNS (1), /* loadf. */
1093 COSTS_N_INSNS (1), /* loadd. */
1094 COSTS_N_INSNS (1), /* load_unaligned. */
1095 COSTS_N_INSNS (1), /* store. */
1096 COSTS_N_INSNS (1), /* strd. */
1097 COSTS_N_INSNS (1), /* stm_1st. */
1098 1, /* stm_regs_per_insn_1st. */
1099 2, /* stm_regs_per_insn_subsequent. */
1100 COSTS_N_INSNS (1), /* storef. */
1101 COSTS_N_INSNS (1), /* stored. */
1102 COSTS_N_INSNS (1) /* store_unaligned. */
1105 /* FP SFmode */
1107 COSTS_N_INSNS (36), /* div. */
1108 COSTS_N_INSNS (11), /* mult. */
1109 COSTS_N_INSNS (20), /* mult_addsub. */
1110 COSTS_N_INSNS (30), /* fma. */
1111 COSTS_N_INSNS (9), /* addsub. */
1112 COSTS_N_INSNS (3), /* fpconst. */
1113 COSTS_N_INSNS (3), /* neg. */
1114 COSTS_N_INSNS (6), /* compare. */
1115 COSTS_N_INSNS (4), /* widen. */
1116 COSTS_N_INSNS (4), /* narrow. */
1117 COSTS_N_INSNS (8), /* toint. */
1118 COSTS_N_INSNS (8), /* fromint. */
1119 COSTS_N_INSNS (8) /* roundint. */
1121 /* FP DFmode */
1123 COSTS_N_INSNS (64), /* div. */
1124 COSTS_N_INSNS (16), /* mult. */
1125 COSTS_N_INSNS (25), /* mult_addsub. */
1126 COSTS_N_INSNS (30), /* fma. */
1127 COSTS_N_INSNS (9), /* addsub. */
1128 COSTS_N_INSNS (3), /* fpconst. */
1129 COSTS_N_INSNS (3), /* neg. */
1130 COSTS_N_INSNS (6), /* compare. */
1131 COSTS_N_INSNS (6), /* widen. */
1132 COSTS_N_INSNS (6), /* narrow. */
1133 COSTS_N_INSNS (8), /* toint. */
1134 COSTS_N_INSNS (8), /* fromint. */
1135 COSTS_N_INSNS (8) /* roundint. */
1138 /* Vector */
1140 COSTS_N_INSNS (1) /* alu. */
1144 const struct cpu_cost_table cortexa5_extra_costs =
1146 /* ALU */
1148 0, /* arith. */
1149 0, /* logical. */
1150 COSTS_N_INSNS (1), /* shift. */
1151 COSTS_N_INSNS (1), /* shift_reg. */
1152 COSTS_N_INSNS (1), /* arith_shift. */
1153 COSTS_N_INSNS (1), /* arith_shift_reg. */
1154 COSTS_N_INSNS (1), /* log_shift. */
1155 COSTS_N_INSNS (1), /* log_shift_reg. */
1156 COSTS_N_INSNS (1), /* extend. */
1157 COSTS_N_INSNS (1), /* extend_arith. */
1158 COSTS_N_INSNS (1), /* bfi. */
1159 COSTS_N_INSNS (1), /* bfx. */
1160 COSTS_N_INSNS (1), /* clz. */
1161 COSTS_N_INSNS (1), /* rev. */
1162 0, /* non_exec. */
1163 true /* non_exec_costs_exec. */
1167 /* MULT SImode */
1169 0, /* simple. */
1170 COSTS_N_INSNS (1), /* flag_setting. */
1171 COSTS_N_INSNS (1), /* extend. */
1172 COSTS_N_INSNS (1), /* add. */
1173 COSTS_N_INSNS (1), /* extend_add. */
1174 COSTS_N_INSNS (7) /* idiv. */
1176 /* MULT DImode */
1178 0, /* simple (N/A). */
1179 0, /* flag_setting (N/A). */
1180 COSTS_N_INSNS (1), /* extend. */
1181 0, /* add. */
1182 COSTS_N_INSNS (2), /* extend_add. */
1183 0 /* idiv (N/A). */
1186 /* LD/ST */
1188 COSTS_N_INSNS (1), /* load. */
1189 COSTS_N_INSNS (1), /* load_sign_extend. */
1190 COSTS_N_INSNS (6), /* ldrd. */
1191 COSTS_N_INSNS (1), /* ldm_1st. */
1192 1, /* ldm_regs_per_insn_1st. */
1193 2, /* ldm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (2), /* loadf. */
1195 COSTS_N_INSNS (4), /* loadd. */
1196 COSTS_N_INSNS (1), /* load_unaligned. */
1197 COSTS_N_INSNS (1), /* store. */
1198 COSTS_N_INSNS (3), /* strd. */
1199 COSTS_N_INSNS (1), /* stm_1st. */
1200 1, /* stm_regs_per_insn_1st. */
1201 2, /* stm_regs_per_insn_subsequent. */
1202 COSTS_N_INSNS (2), /* storef. */
1203 COSTS_N_INSNS (2), /* stored. */
1204 COSTS_N_INSNS (1) /* store_unaligned. */
1207 /* FP SFmode */
1209 COSTS_N_INSNS (15), /* div. */
1210 COSTS_N_INSNS (3), /* mult. */
1211 COSTS_N_INSNS (7), /* mult_addsub. */
1212 COSTS_N_INSNS (7), /* fma. */
1213 COSTS_N_INSNS (3), /* addsub. */
1214 COSTS_N_INSNS (3), /* fpconst. */
1215 COSTS_N_INSNS (3), /* neg. */
1216 COSTS_N_INSNS (3), /* compare. */
1217 COSTS_N_INSNS (3), /* widen. */
1218 COSTS_N_INSNS (3), /* narrow. */
1219 COSTS_N_INSNS (3), /* toint. */
1220 COSTS_N_INSNS (3), /* fromint. */
1221 COSTS_N_INSNS (3) /* roundint. */
1223 /* FP DFmode */
1225 COSTS_N_INSNS (30), /* div. */
1226 COSTS_N_INSNS (6), /* mult. */
1227 COSTS_N_INSNS (10), /* mult_addsub. */
1228 COSTS_N_INSNS (7), /* fma. */
1229 COSTS_N_INSNS (3), /* addsub. */
1230 COSTS_N_INSNS (3), /* fpconst. */
1231 COSTS_N_INSNS (3), /* neg. */
1232 COSTS_N_INSNS (3), /* compare. */
1233 COSTS_N_INSNS (3), /* widen. */
1234 COSTS_N_INSNS (3), /* narrow. */
1235 COSTS_N_INSNS (3), /* toint. */
1236 COSTS_N_INSNS (3), /* fromint. */
1237 COSTS_N_INSNS (3) /* roundint. */
1240 /* Vector */
1242 COSTS_N_INSNS (1) /* alu. */
1247 const struct cpu_cost_table cortexa7_extra_costs =
1249 /* ALU */
1251 0, /* arith. */
1252 0, /* logical. */
1253 COSTS_N_INSNS (1), /* shift. */
1254 COSTS_N_INSNS (1), /* shift_reg. */
1255 COSTS_N_INSNS (1), /* arith_shift. */
1256 COSTS_N_INSNS (1), /* arith_shift_reg. */
1257 COSTS_N_INSNS (1), /* log_shift. */
1258 COSTS_N_INSNS (1), /* log_shift_reg. */
1259 COSTS_N_INSNS (1), /* extend. */
1260 COSTS_N_INSNS (1), /* extend_arith. */
1261 COSTS_N_INSNS (1), /* bfi. */
1262 COSTS_N_INSNS (1), /* bfx. */
1263 COSTS_N_INSNS (1), /* clz. */
1264 COSTS_N_INSNS (1), /* rev. */
1265 0, /* non_exec. */
1266 true /* non_exec_costs_exec. */
1270 /* MULT SImode */
1272 0, /* simple. */
1273 COSTS_N_INSNS (1), /* flag_setting. */
1274 COSTS_N_INSNS (1), /* extend. */
1275 COSTS_N_INSNS (1), /* add. */
1276 COSTS_N_INSNS (1), /* extend_add. */
1277 COSTS_N_INSNS (7) /* idiv. */
1279 /* MULT DImode */
1281 0, /* simple (N/A). */
1282 0, /* flag_setting (N/A). */
1283 COSTS_N_INSNS (1), /* extend. */
1284 0, /* add. */
1285 COSTS_N_INSNS (2), /* extend_add. */
1286 0 /* idiv (N/A). */
1289 /* LD/ST */
1291 COSTS_N_INSNS (1), /* load. */
1292 COSTS_N_INSNS (1), /* load_sign_extend. */
1293 COSTS_N_INSNS (3), /* ldrd. */
1294 COSTS_N_INSNS (1), /* ldm_1st. */
1295 1, /* ldm_regs_per_insn_1st. */
1296 2, /* ldm_regs_per_insn_subsequent. */
1297 COSTS_N_INSNS (2), /* loadf. */
1298 COSTS_N_INSNS (2), /* loadd. */
1299 COSTS_N_INSNS (1), /* load_unaligned. */
1300 COSTS_N_INSNS (1), /* store. */
1301 COSTS_N_INSNS (3), /* strd. */
1302 COSTS_N_INSNS (1), /* stm_1st. */
1303 1, /* stm_regs_per_insn_1st. */
1304 2, /* stm_regs_per_insn_subsequent. */
1305 COSTS_N_INSNS (2), /* storef. */
1306 COSTS_N_INSNS (2), /* stored. */
1307 COSTS_N_INSNS (1) /* store_unaligned. */
1310 /* FP SFmode */
1312 COSTS_N_INSNS (15), /* div. */
1313 COSTS_N_INSNS (3), /* mult. */
1314 COSTS_N_INSNS (7), /* mult_addsub. */
1315 COSTS_N_INSNS (7), /* fma. */
1316 COSTS_N_INSNS (3), /* addsub. */
1317 COSTS_N_INSNS (3), /* fpconst. */
1318 COSTS_N_INSNS (3), /* neg. */
1319 COSTS_N_INSNS (3), /* compare. */
1320 COSTS_N_INSNS (3), /* widen. */
1321 COSTS_N_INSNS (3), /* narrow. */
1322 COSTS_N_INSNS (3), /* toint. */
1323 COSTS_N_INSNS (3), /* fromint. */
1324 COSTS_N_INSNS (3) /* roundint. */
1326 /* FP DFmode */
1328 COSTS_N_INSNS (30), /* div. */
1329 COSTS_N_INSNS (6), /* mult. */
1330 COSTS_N_INSNS (10), /* mult_addsub. */
1331 COSTS_N_INSNS (7), /* fma. */
1332 COSTS_N_INSNS (3), /* addsub. */
1333 COSTS_N_INSNS (3), /* fpconst. */
1334 COSTS_N_INSNS (3), /* neg. */
1335 COSTS_N_INSNS (3), /* compare. */
1336 COSTS_N_INSNS (3), /* widen. */
1337 COSTS_N_INSNS (3), /* narrow. */
1338 COSTS_N_INSNS (3), /* toint. */
1339 COSTS_N_INSNS (3), /* fromint. */
1340 COSTS_N_INSNS (3) /* roundint. */
1343 /* Vector */
1345 COSTS_N_INSNS (1) /* alu. */
1349 const struct cpu_cost_table cortexa12_extra_costs =
1351 /* ALU */
1353 0, /* arith. */
1354 0, /* logical. */
1355 0, /* shift. */
1356 COSTS_N_INSNS (1), /* shift_reg. */
1357 COSTS_N_INSNS (1), /* arith_shift. */
1358 COSTS_N_INSNS (1), /* arith_shift_reg. */
1359 COSTS_N_INSNS (1), /* log_shift. */
1360 COSTS_N_INSNS (1), /* log_shift_reg. */
1361 0, /* extend. */
1362 COSTS_N_INSNS (1), /* extend_arith. */
1363 0, /* bfi. */
1364 COSTS_N_INSNS (1), /* bfx. */
1365 COSTS_N_INSNS (1), /* clz. */
1366 COSTS_N_INSNS (1), /* rev. */
1367 0, /* non_exec. */
1368 true /* non_exec_costs_exec. */
1370 /* MULT SImode */
1373 COSTS_N_INSNS (2), /* simple. */
1374 COSTS_N_INSNS (3), /* flag_setting. */
1375 COSTS_N_INSNS (2), /* extend. */
1376 COSTS_N_INSNS (3), /* add. */
1377 COSTS_N_INSNS (2), /* extend_add. */
1378 COSTS_N_INSNS (18) /* idiv. */
1380 /* MULT DImode */
1382 0, /* simple (N/A). */
1383 0, /* flag_setting (N/A). */
1384 COSTS_N_INSNS (3), /* extend. */
1385 0, /* add (N/A). */
1386 COSTS_N_INSNS (3), /* extend_add. */
1387 0 /* idiv (N/A). */
1390 /* LD/ST */
1392 COSTS_N_INSNS (3), /* load. */
1393 COSTS_N_INSNS (3), /* load_sign_extend. */
1394 COSTS_N_INSNS (3), /* ldrd. */
1395 COSTS_N_INSNS (3), /* ldm_1st. */
1396 1, /* ldm_regs_per_insn_1st. */
1397 2, /* ldm_regs_per_insn_subsequent. */
1398 COSTS_N_INSNS (3), /* loadf. */
1399 COSTS_N_INSNS (3), /* loadd. */
1400 0, /* load_unaligned. */
1401 0, /* store. */
1402 0, /* strd. */
1403 0, /* stm_1st. */
1404 1, /* stm_regs_per_insn_1st. */
1405 2, /* stm_regs_per_insn_subsequent. */
1406 COSTS_N_INSNS (2), /* storef. */
1407 COSTS_N_INSNS (2), /* stored. */
1408 0 /* store_unaligned. */
1411 /* FP SFmode */
1413 COSTS_N_INSNS (17), /* div. */
1414 COSTS_N_INSNS (4), /* mult. */
1415 COSTS_N_INSNS (8), /* mult_addsub. */
1416 COSTS_N_INSNS (8), /* fma. */
1417 COSTS_N_INSNS (4), /* addsub. */
1418 COSTS_N_INSNS (2), /* fpconst. */
1419 COSTS_N_INSNS (2), /* neg. */
1420 COSTS_N_INSNS (2), /* compare. */
1421 COSTS_N_INSNS (4), /* widen. */
1422 COSTS_N_INSNS (4), /* narrow. */
1423 COSTS_N_INSNS (4), /* toint. */
1424 COSTS_N_INSNS (4), /* fromint. */
1425 COSTS_N_INSNS (4) /* roundint. */
1427 /* FP DFmode */
1429 COSTS_N_INSNS (31), /* div. */
1430 COSTS_N_INSNS (4), /* mult. */
1431 COSTS_N_INSNS (8), /* mult_addsub. */
1432 COSTS_N_INSNS (8), /* fma. */
1433 COSTS_N_INSNS (4), /* addsub. */
1434 COSTS_N_INSNS (2), /* fpconst. */
1435 COSTS_N_INSNS (2), /* neg. */
1436 COSTS_N_INSNS (2), /* compare. */
1437 COSTS_N_INSNS (4), /* widen. */
1438 COSTS_N_INSNS (4), /* narrow. */
1439 COSTS_N_INSNS (4), /* toint. */
1440 COSTS_N_INSNS (4), /* fromint. */
1441 COSTS_N_INSNS (4) /* roundint. */
1444 /* Vector */
1446 COSTS_N_INSNS (1) /* alu. */
1450 const struct cpu_cost_table cortexa15_extra_costs =
1452 /* ALU */
1454 0, /* arith. */
1455 0, /* logical. */
1456 0, /* shift. */
1457 0, /* shift_reg. */
1458 COSTS_N_INSNS (1), /* arith_shift. */
1459 COSTS_N_INSNS (1), /* arith_shift_reg. */
1460 COSTS_N_INSNS (1), /* log_shift. */
1461 COSTS_N_INSNS (1), /* log_shift_reg. */
1462 0, /* extend. */
1463 COSTS_N_INSNS (1), /* extend_arith. */
1464 COSTS_N_INSNS (1), /* bfi. */
1465 0, /* bfx. */
1466 0, /* clz. */
1467 0, /* rev. */
1468 0, /* non_exec. */
1469 true /* non_exec_costs_exec. */
1471 /* MULT SImode */
1474 COSTS_N_INSNS (2), /* simple. */
1475 COSTS_N_INSNS (3), /* flag_setting. */
1476 COSTS_N_INSNS (2), /* extend. */
1477 COSTS_N_INSNS (2), /* add. */
1478 COSTS_N_INSNS (2), /* extend_add. */
1479 COSTS_N_INSNS (18) /* idiv. */
1481 /* MULT DImode */
1483 0, /* simple (N/A). */
1484 0, /* flag_setting (N/A). */
1485 COSTS_N_INSNS (3), /* extend. */
1486 0, /* add (N/A). */
1487 COSTS_N_INSNS (3), /* extend_add. */
1488 0 /* idiv (N/A). */
1491 /* LD/ST */
1493 COSTS_N_INSNS (3), /* load. */
1494 COSTS_N_INSNS (3), /* load_sign_extend. */
1495 COSTS_N_INSNS (3), /* ldrd. */
1496 COSTS_N_INSNS (4), /* ldm_1st. */
1497 1, /* ldm_regs_per_insn_1st. */
1498 2, /* ldm_regs_per_insn_subsequent. */
1499 COSTS_N_INSNS (4), /* loadf. */
1500 COSTS_N_INSNS (4), /* loadd. */
1501 0, /* load_unaligned. */
1502 0, /* store. */
1503 0, /* strd. */
1504 COSTS_N_INSNS (1), /* stm_1st. */
1505 1, /* stm_regs_per_insn_1st. */
1506 2, /* stm_regs_per_insn_subsequent. */
1507 0, /* storef. */
1508 0, /* stored. */
1509 0 /* store_unaligned. */
1512 /* FP SFmode */
1514 COSTS_N_INSNS (17), /* div. */
1515 COSTS_N_INSNS (4), /* mult. */
1516 COSTS_N_INSNS (8), /* mult_addsub. */
1517 COSTS_N_INSNS (8), /* fma. */
1518 COSTS_N_INSNS (4), /* addsub. */
1519 COSTS_N_INSNS (2), /* fpconst. */
1520 COSTS_N_INSNS (2), /* neg. */
1521 COSTS_N_INSNS (5), /* compare. */
1522 COSTS_N_INSNS (4), /* widen. */
1523 COSTS_N_INSNS (4), /* narrow. */
1524 COSTS_N_INSNS (4), /* toint. */
1525 COSTS_N_INSNS (4), /* fromint. */
1526 COSTS_N_INSNS (4) /* roundint. */
1528 /* FP DFmode */
1530 COSTS_N_INSNS (31), /* div. */
1531 COSTS_N_INSNS (4), /* mult. */
1532 COSTS_N_INSNS (8), /* mult_addsub. */
1533 COSTS_N_INSNS (8), /* fma. */
1534 COSTS_N_INSNS (4), /* addsub. */
1535 COSTS_N_INSNS (2), /* fpconst. */
1536 COSTS_N_INSNS (2), /* neg. */
1537 COSTS_N_INSNS (2), /* compare. */
1538 COSTS_N_INSNS (4), /* widen. */
1539 COSTS_N_INSNS (4), /* narrow. */
1540 COSTS_N_INSNS (4), /* toint. */
1541 COSTS_N_INSNS (4), /* fromint. */
1542 COSTS_N_INSNS (4) /* roundint. */
1545 /* Vector */
1547 COSTS_N_INSNS (1) /* alu. */
1551 const struct cpu_cost_table v7m_extra_costs =
1553 /* ALU */
1555 0, /* arith. */
1556 0, /* logical. */
1557 0, /* shift. */
1558 0, /* shift_reg. */
1559 0, /* arith_shift. */
1560 COSTS_N_INSNS (1), /* arith_shift_reg. */
1561 0, /* log_shift. */
1562 COSTS_N_INSNS (1), /* log_shift_reg. */
1563 0, /* extend. */
1564 COSTS_N_INSNS (1), /* extend_arith. */
1565 0, /* bfi. */
1566 0, /* bfx. */
1567 0, /* clz. */
1568 0, /* rev. */
1569 COSTS_N_INSNS (1), /* non_exec. */
1570 false /* non_exec_costs_exec. */
1573 /* MULT SImode */
1575 COSTS_N_INSNS (1), /* simple. */
1576 COSTS_N_INSNS (1), /* flag_setting. */
1577 COSTS_N_INSNS (2), /* extend. */
1578 COSTS_N_INSNS (1), /* add. */
1579 COSTS_N_INSNS (3), /* extend_add. */
1580 COSTS_N_INSNS (8) /* idiv. */
1582 /* MULT DImode */
1584 0, /* simple (N/A). */
1585 0, /* flag_setting (N/A). */
1586 COSTS_N_INSNS (2), /* extend. */
1587 0, /* add (N/A). */
1588 COSTS_N_INSNS (3), /* extend_add. */
1589 0 /* idiv (N/A). */
1592 /* LD/ST */
1594 COSTS_N_INSNS (2), /* load. */
1595 0, /* load_sign_extend. */
1596 COSTS_N_INSNS (3), /* ldrd. */
1597 COSTS_N_INSNS (2), /* ldm_1st. */
1598 1, /* ldm_regs_per_insn_1st. */
1599 1, /* ldm_regs_per_insn_subsequent. */
1600 COSTS_N_INSNS (2), /* loadf. */
1601 COSTS_N_INSNS (3), /* loadd. */
1602 COSTS_N_INSNS (1), /* load_unaligned. */
1603 COSTS_N_INSNS (2), /* store. */
1604 COSTS_N_INSNS (3), /* strd. */
1605 COSTS_N_INSNS (2), /* stm_1st. */
1606 1, /* stm_regs_per_insn_1st. */
1607 1, /* stm_regs_per_insn_subsequent. */
1608 COSTS_N_INSNS (2), /* storef. */
1609 COSTS_N_INSNS (3), /* stored. */
1610 COSTS_N_INSNS (1) /* store_unaligned. */
1613 /* FP SFmode */
1615 COSTS_N_INSNS (7), /* div. */
1616 COSTS_N_INSNS (2), /* mult. */
1617 COSTS_N_INSNS (5), /* mult_addsub. */
1618 COSTS_N_INSNS (3), /* fma. */
1619 COSTS_N_INSNS (1), /* addsub. */
1620 0, /* fpconst. */
1621 0, /* neg. */
1622 0, /* compare. */
1623 0, /* widen. */
1624 0, /* narrow. */
1625 0, /* toint. */
1626 0, /* fromint. */
1627 0 /* roundint. */
1629 /* FP DFmode */
1631 COSTS_N_INSNS (15), /* div. */
1632 COSTS_N_INSNS (5), /* mult. */
1633 COSTS_N_INSNS (7), /* mult_addsub. */
1634 COSTS_N_INSNS (7), /* fma. */
1635 COSTS_N_INSNS (3), /* addsub. */
1636 0, /* fpconst. */
1637 0, /* neg. */
1638 0, /* compare. */
1639 0, /* widen. */
1640 0, /* narrow. */
1641 0, /* toint. */
1642 0, /* fromint. */
1643 0 /* roundint. */
1646 /* Vector */
1648 COSTS_N_INSNS (1) /* alu. */
1652 const struct tune_params arm_slowmul_tune =
1654 arm_slowmul_rtx_costs,
1655 NULL,
1656 NULL, /* Sched adj cost. */
1657 3, /* Constant limit. */
1658 5, /* Max cond insns. */
1659 ARM_PREFETCH_NOT_BENEFICIAL,
1660 true, /* Prefer constant pool. */
1661 arm_default_branch_cost,
1662 false, /* Prefer LDRD/STRD. */
1663 {true, true}, /* Prefer non short circuit. */
1664 &arm_default_vec_cost, /* Vectorizer costs. */
1665 false, /* Prefer Neon for 64-bits bitops. */
1666 false, false, /* Prefer 32-bit encodings. */
1667 false, /* Prefer Neon for stringops. */
1668 8 /* Maximum insns to inline memset. */
1671 const struct tune_params arm_fastmul_tune =
1673 arm_fastmul_rtx_costs,
1674 NULL,
1675 NULL, /* Sched adj cost. */
1676 1, /* Constant limit. */
1677 5, /* Max cond insns. */
1678 ARM_PREFETCH_NOT_BENEFICIAL,
1679 true, /* Prefer constant pool. */
1680 arm_default_branch_cost,
1681 false, /* Prefer LDRD/STRD. */
1682 {true, true}, /* Prefer non short circuit. */
1683 &arm_default_vec_cost, /* Vectorizer costs. */
1684 false, /* Prefer Neon for 64-bits bitops. */
1685 false, false, /* Prefer 32-bit encodings. */
1686 false, /* Prefer Neon for stringops. */
1687 8 /* Maximum insns to inline memset. */
1690 /* StrongARM has early execution of branches, so a sequence that is worth
1691 skipping is shorter. Set max_insns_skipped to a lower value. */
1693 const struct tune_params arm_strongarm_tune =
1695 arm_fastmul_rtx_costs,
1696 NULL,
1697 NULL, /* Sched adj cost. */
1698 1, /* Constant limit. */
1699 3, /* Max cond insns. */
1700 ARM_PREFETCH_NOT_BENEFICIAL,
1701 true, /* Prefer constant pool. */
1702 arm_default_branch_cost,
1703 false, /* Prefer LDRD/STRD. */
1704 {true, true}, /* Prefer non short circuit. */
1705 &arm_default_vec_cost, /* Vectorizer costs. */
1706 false, /* Prefer Neon for 64-bits bitops. */
1707 false, false, /* Prefer 32-bit encodings. */
1708 false, /* Prefer Neon for stringops. */
1709 8 /* Maximum insns to inline memset. */
1712 const struct tune_params arm_xscale_tune =
1714 arm_xscale_rtx_costs,
1715 NULL,
1716 xscale_sched_adjust_cost,
1717 2, /* Constant limit. */
1718 3, /* Max cond insns. */
1719 ARM_PREFETCH_NOT_BENEFICIAL,
1720 true, /* Prefer constant pool. */
1721 arm_default_branch_cost,
1722 false, /* Prefer LDRD/STRD. */
1723 {true, true}, /* Prefer non short circuit. */
1724 &arm_default_vec_cost, /* Vectorizer costs. */
1725 false, /* Prefer Neon for 64-bits bitops. */
1726 false, false, /* Prefer 32-bit encodings. */
1727 false, /* Prefer Neon for stringops. */
1728 8 /* Maximum insns to inline memset. */
1731 const struct tune_params arm_9e_tune =
1733 arm_9e_rtx_costs,
1734 NULL,
1735 NULL, /* Sched adj cost. */
1736 1, /* Constant limit. */
1737 5, /* Max cond insns. */
1738 ARM_PREFETCH_NOT_BENEFICIAL,
1739 true, /* Prefer constant pool. */
1740 arm_default_branch_cost,
1741 false, /* Prefer LDRD/STRD. */
1742 {true, true}, /* Prefer non short circuit. */
1743 &arm_default_vec_cost, /* Vectorizer costs. */
1744 false, /* Prefer Neon for 64-bits bitops. */
1745 false, false, /* Prefer 32-bit encodings. */
1746 false, /* Prefer Neon for stringops. */
1747 8 /* Maximum insns to inline memset. */
1750 const struct tune_params arm_v6t2_tune =
1752 arm_9e_rtx_costs,
1753 NULL,
1754 NULL, /* Sched adj cost. */
1755 1, /* Constant limit. */
1756 5, /* Max cond insns. */
1757 ARM_PREFETCH_NOT_BENEFICIAL,
1758 false, /* Prefer constant pool. */
1759 arm_default_branch_cost,
1760 false, /* Prefer LDRD/STRD. */
1761 {true, true}, /* Prefer non short circuit. */
1762 &arm_default_vec_cost, /* Vectorizer costs. */
1763 false, /* Prefer Neon for 64-bits bitops. */
1764 false, false, /* Prefer 32-bit encodings. */
1765 false, /* Prefer Neon for stringops. */
1766 8 /* Maximum insns to inline memset. */
1769 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1770 const struct tune_params arm_cortex_tune =
1772 arm_9e_rtx_costs,
1773 &generic_extra_costs,
1774 NULL, /* Sched adj cost. */
1775 1, /* Constant limit. */
1776 5, /* Max cond insns. */
1777 ARM_PREFETCH_NOT_BENEFICIAL,
1778 false, /* Prefer constant pool. */
1779 arm_default_branch_cost,
1780 false, /* Prefer LDRD/STRD. */
1781 {true, true}, /* Prefer non short circuit. */
1782 &arm_default_vec_cost, /* Vectorizer costs. */
1783 false, /* Prefer Neon for 64-bits bitops. */
1784 false, false, /* Prefer 32-bit encodings. */
1785 false, /* Prefer Neon for stringops. */
1786 8 /* Maximum insns to inline memset. */
1789 const struct tune_params arm_cortex_a8_tune =
1791 arm_9e_rtx_costs,
1792 &cortexa8_extra_costs,
1793 NULL, /* Sched adj cost. */
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 ARM_PREFETCH_NOT_BENEFICIAL,
1797 false, /* Prefer constant pool. */
1798 arm_default_branch_cost,
1799 false, /* Prefer LDRD/STRD. */
1800 {true, true}, /* Prefer non short circuit. */
1801 &arm_default_vec_cost, /* Vectorizer costs. */
1802 false, /* Prefer Neon for 64-bits bitops. */
1803 false, false, /* Prefer 32-bit encodings. */
1804 true, /* Prefer Neon for stringops. */
1805 8 /* Maximum insns to inline memset. */
1808 const struct tune_params arm_cortex_a7_tune =
1810 arm_9e_rtx_costs,
1811 &cortexa7_extra_costs,
1812 NULL,
1813 1, /* Constant limit. */
1814 5, /* Max cond insns. */
1815 ARM_PREFETCH_NOT_BENEFICIAL,
1816 false, /* Prefer constant pool. */
1817 arm_default_branch_cost,
1818 false, /* Prefer LDRD/STRD. */
1819 {true, true}, /* Prefer non short circuit. */
1820 &arm_default_vec_cost, /* Vectorizer costs. */
1821 false, /* Prefer Neon for 64-bits bitops. */
1822 false, false, /* Prefer 32-bit encodings. */
1823 true, /* Prefer Neon for stringops. */
1824 8 /* Maximum insns to inline memset. */
1827 const struct tune_params arm_cortex_a15_tune =
1829 arm_9e_rtx_costs,
1830 &cortexa15_extra_costs,
1831 NULL, /* Sched adj cost. */
1832 1, /* Constant limit. */
1833 2, /* Max cond insns. */
1834 ARM_PREFETCH_NOT_BENEFICIAL,
1835 false, /* Prefer constant pool. */
1836 arm_default_branch_cost,
1837 true, /* Prefer LDRD/STRD. */
1838 {true, true}, /* Prefer non short circuit. */
1839 &arm_default_vec_cost, /* Vectorizer costs. */
1840 false, /* Prefer Neon for 64-bits bitops. */
1841 true, true, /* Prefer 32-bit encodings. */
1842 true, /* Prefer Neon for stringops. */
1843 8 /* Maximum insns to inline memset. */
1846 const struct tune_params arm_cortex_a53_tune =
1848 arm_9e_rtx_costs,
1849 &cortexa53_extra_costs,
1850 NULL, /* Scheduler cost adjustment. */
1851 1, /* Constant limit. */
1852 5, /* Max cond insns. */
1853 ARM_PREFETCH_NOT_BENEFICIAL,
1854 false, /* Prefer constant pool. */
1855 arm_default_branch_cost,
1856 false, /* Prefer LDRD/STRD. */
1857 {true, true}, /* Prefer non short circuit. */
1858 &arm_default_vec_cost, /* Vectorizer costs. */
1859 false, /* Prefer Neon for 64-bits bitops. */
1860 false, false, /* Prefer 32-bit encodings. */
1861 false, /* Prefer Neon for stringops. */
1862 8 /* Maximum insns to inline memset. */
1865 const struct tune_params arm_cortex_a57_tune =
1867 arm_9e_rtx_costs,
1868 &cortexa57_extra_costs,
1869 NULL, /* Scheduler cost adjustment. */
1870 1, /* Constant limit. */
1871 2, /* Max cond insns. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 false, /* Prefer constant pool. */
1874 arm_default_branch_cost,
1875 true, /* Prefer LDRD/STRD. */
1876 {true, true}, /* Prefer non short circuit. */
1877 &arm_default_vec_cost, /* Vectorizer costs. */
1878 false, /* Prefer Neon for 64-bits bitops. */
1879 true, true, /* Prefer 32-bit encodings. */
1880 false, /* Prefer Neon for stringops. */
1881 8 /* Maximum insns to inline memset. */
1884 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1885 less appealing. Set max_insns_skipped to a low value. */
1887 const struct tune_params arm_cortex_a5_tune =
1889 arm_9e_rtx_costs,
1890 &cortexa5_extra_costs,
1891 NULL, /* Sched adj cost. */
1892 1, /* Constant limit. */
1893 1, /* Max cond insns. */
1894 ARM_PREFETCH_NOT_BENEFICIAL,
1895 false, /* Prefer constant pool. */
1896 arm_cortex_a5_branch_cost,
1897 false, /* Prefer LDRD/STRD. */
1898 {false, false}, /* Prefer non short circuit. */
1899 &arm_default_vec_cost, /* Vectorizer costs. */
1900 false, /* Prefer Neon for 64-bits bitops. */
1901 false, false, /* Prefer 32-bit encodings. */
1902 true, /* Prefer Neon for stringops. */
1903 8 /* Maximum insns to inline memset. */
1906 const struct tune_params arm_cortex_a9_tune =
1908 arm_9e_rtx_costs,
1909 &cortexa9_extra_costs,
1910 cortex_a9_sched_adjust_cost,
1911 1, /* Constant limit. */
1912 5, /* Max cond insns. */
1913 ARM_PREFETCH_BENEFICIAL(4,32,32),
1914 false, /* Prefer constant pool. */
1915 arm_default_branch_cost,
1916 false, /* Prefer LDRD/STRD. */
1917 {true, true}, /* Prefer non short circuit. */
1918 &arm_default_vec_cost, /* Vectorizer costs. */
1919 false, /* Prefer Neon for 64-bits bitops. */
1920 false, false, /* Prefer 32-bit encodings. */
1921 false, /* Prefer Neon for stringops. */
1922 8 /* Maximum insns to inline memset. */
1925 const struct tune_params arm_cortex_a12_tune =
1927 arm_9e_rtx_costs,
1928 &cortexa12_extra_costs,
1929 NULL,
1930 1, /* Constant limit. */
1931 5, /* Max cond insns. */
1932 ARM_PREFETCH_BENEFICIAL(4,32,32),
1933 false, /* Prefer constant pool. */
1934 arm_default_branch_cost,
1935 true, /* Prefer LDRD/STRD. */
1936 {true, true}, /* Prefer non short circuit. */
1937 &arm_default_vec_cost, /* Vectorizer costs. */
1938 false, /* Prefer Neon for 64-bits bitops. */
1939 false, false, /* Prefer 32-bit encodings. */
1940 true, /* Prefer Neon for stringops. */
1941 8 /* Maximum insns to inline memset. */
1944 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1945 cycle to execute each. An LDR from the constant pool also takes two cycles
1946 to execute, but mildly increases pipelining opportunity (consecutive
1947 loads/stores can be pipelined together, saving one cycle), and may also
1948 improve icache utilisation. Hence we prefer the constant pool for such
1949 processors. */
1951 const struct tune_params arm_v7m_tune =
1953 arm_9e_rtx_costs,
1954 &v7m_extra_costs,
1955 NULL, /* Sched adj cost. */
1956 1, /* Constant limit. */
1957 2, /* Max cond insns. */
1958 ARM_PREFETCH_NOT_BENEFICIAL,
1959 true, /* Prefer constant pool. */
1960 arm_cortex_m_branch_cost,
1961 false, /* Prefer LDRD/STRD. */
1962 {false, false}, /* Prefer non short circuit. */
1963 &arm_default_vec_cost, /* Vectorizer costs. */
1964 false, /* Prefer Neon for 64-bits bitops. */
1965 false, false, /* Prefer 32-bit encodings. */
1966 false, /* Prefer Neon for stringops. */
1967 8 /* Maximum insns to inline memset. */
1970 /* Cortex-M7 tuning. */
1972 const struct tune_params arm_cortex_m7_tune =
1974 arm_9e_rtx_costs,
1975 &v7m_extra_costs,
1976 NULL, /* Sched adj cost. */
1977 0, /* Constant limit. */
1978 0, /* Max cond insns. */
1979 ARM_PREFETCH_NOT_BENEFICIAL,
1980 true, /* Prefer constant pool. */
1981 arm_cortex_m_branch_cost,
1982 false, /* Prefer LDRD/STRD. */
1983 {true, true}, /* Prefer non short circuit. */
1984 &arm_default_vec_cost, /* Vectorizer costs. */
1985 false, /* Prefer Neon for 64-bits bitops. */
1986 false, false, /* Prefer 32-bit encodings. */
1987 false, /* Prefer Neon for stringops. */
1988 8 /* Maximum insns to inline memset. */
1991 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1992 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1993 const struct tune_params arm_v6m_tune =
1995 arm_9e_rtx_costs,
1996 NULL,
1997 NULL, /* Sched adj cost. */
1998 1, /* Constant limit. */
1999 5, /* Max cond insns. */
2000 ARM_PREFETCH_NOT_BENEFICIAL,
2001 false, /* Prefer constant pool. */
2002 arm_default_branch_cost,
2003 false, /* Prefer LDRD/STRD. */
2004 {false, false}, /* Prefer non short circuit. */
2005 &arm_default_vec_cost, /* Vectorizer costs. */
2006 false, /* Prefer Neon for 64-bits bitops. */
2007 false, false, /* Prefer 32-bit encodings. */
2008 false, /* Prefer Neon for stringops. */
2009 8 /* Maximum insns to inline memset. */
2012 const struct tune_params arm_fa726te_tune =
2014 arm_9e_rtx_costs,
2015 NULL,
2016 fa726te_sched_adjust_cost,
2017 1, /* Constant limit. */
2018 5, /* Max cond insns. */
2019 ARM_PREFETCH_NOT_BENEFICIAL,
2020 true, /* Prefer constant pool. */
2021 arm_default_branch_cost,
2022 false, /* Prefer LDRD/STRD. */
2023 {true, true}, /* Prefer non short circuit. */
2024 &arm_default_vec_cost, /* Vectorizer costs. */
2025 false, /* Prefer Neon for 64-bits bitops. */
2026 false, false, /* Prefer 32-bit encodings. */
2027 false, /* Prefer Neon for stringops. */
2028 8 /* Maximum insns to inline memset. */
2032 /* Not all of these give usefully different compilation alternatives,
2033 but there is no simple way of generalizing them. */
2034 static const struct processors all_cores[] =
2036 /* ARM Cores */
2037 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2038 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2039 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2040 #include "arm-cores.def"
2041 #undef ARM_CORE
2042 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2045 static const struct processors all_architectures[] =
2047 /* ARM Architectures */
2048 /* We don't specify tuning costs here as it will be figured out
2049 from the core. */
2051 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2052 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2053 #include "arm-arches.def"
2054 #undef ARM_ARCH
2055 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2059 /* These are populated as commandline arguments are processed, or NULL
2060 if not specified. */
2061 static const struct processors *arm_selected_arch;
2062 static const struct processors *arm_selected_cpu;
2063 static const struct processors *arm_selected_tune;
2065 /* The name of the preprocessor macro to define for this architecture. */
2067 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2069 /* Available values for -mfpu=. */
2071 static const struct arm_fpu_desc all_fpus[] =
2073 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2074 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2075 #include "arm-fpus.def"
2076 #undef ARM_FPU
2080 /* Supported TLS relocations. */
2082 enum tls_reloc {
2083 TLS_GD32,
2084 TLS_LDM32,
2085 TLS_LDO32,
2086 TLS_IE32,
2087 TLS_LE32,
2088 TLS_DESCSEQ /* GNU scheme */
2091 /* The maximum number of insns to be used when loading a constant. */
2092 inline static int
2093 arm_constant_limit (bool size_p)
2095 return size_p ? 1 : current_tune->constant_limit;
2098 /* Emit an insn that's a simple single-set. Both the operands must be known
2099 to be valid. */
2100 inline static rtx_insn *
2101 emit_set_insn (rtx x, rtx y)
2103 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2106 /* Return the number of bits set in VALUE. */
2107 static unsigned
2108 bit_count (unsigned long value)
2110 unsigned long count = 0;
2112 while (value)
2114 count++;
2115 value &= value - 1; /* Clear the least-significant set bit. */
2118 return count;
2121 typedef struct
2123 machine_mode mode;
2124 const char *name;
2125 } arm_fixed_mode_set;
2127 /* A small helper for setting fixed-point library libfuncs. */
2129 static void
2130 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2131 const char *funcname, const char *modename,
2132 int num_suffix)
2134 char buffer[50];
2136 if (num_suffix == 0)
2137 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2138 else
2139 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2141 set_optab_libfunc (optable, mode, buffer);
2144 static void
2145 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2146 machine_mode from, const char *funcname,
2147 const char *toname, const char *fromname)
2149 char buffer[50];
2150 const char *maybe_suffix_2 = "";
2152 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2153 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2154 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2155 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2156 maybe_suffix_2 = "2";
2158 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2159 maybe_suffix_2);
2161 set_conv_libfunc (optable, to, from, buffer);
2164 /* Set up library functions unique to ARM. */
2166 static void
2167 arm_init_libfuncs (void)
2169 /* For Linux, we have access to kernel support for atomic operations. */
2170 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2171 init_sync_libfuncs (2 * UNITS_PER_WORD);
2173 /* There are no special library functions unless we are using the
2174 ARM BPABI. */
2175 if (!TARGET_BPABI)
2176 return;
2178 /* The functions below are described in Section 4 of the "Run-Time
2179 ABI for the ARM architecture", Version 1.0. */
2181 /* Double-precision floating-point arithmetic. Table 2. */
2182 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2183 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2184 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2185 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2186 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2188 /* Double-precision comparisons. Table 3. */
2189 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2190 set_optab_libfunc (ne_optab, DFmode, NULL);
2191 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2192 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2193 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2194 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2195 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2197 /* Single-precision floating-point arithmetic. Table 4. */
2198 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2199 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2200 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2201 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2202 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2204 /* Single-precision comparisons. Table 5. */
2205 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2206 set_optab_libfunc (ne_optab, SFmode, NULL);
2207 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2208 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2209 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2210 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2211 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2213 /* Floating-point to integer conversions. Table 6. */
2214 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2215 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2216 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2217 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2218 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2219 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2220 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2221 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2223 /* Conversions between floating types. Table 7. */
2224 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2225 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2227 /* Integer to floating-point conversions. Table 8. */
2228 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2229 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2230 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2231 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2232 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2233 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2234 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2235 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2237 /* Long long. Table 9. */
2238 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2239 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2240 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2241 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2242 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2243 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2244 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2245 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2247 /* Integer (32/32->32) division. \S 4.3.1. */
2248 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2249 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2251 /* The divmod functions are designed so that they can be used for
2252 plain division, even though they return both the quotient and the
2253 remainder. The quotient is returned in the usual location (i.e.,
2254 r0 for SImode, {r0, r1} for DImode), just as would be expected
2255 for an ordinary division routine. Because the AAPCS calling
2256 conventions specify that all of { r0, r1, r2, r3 } are
2257 callee-saved registers, there is no need to tell the compiler
2258 explicitly that those registers are clobbered by these
2259 routines. */
2260 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2261 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2263 /* For SImode division the ABI provides div-without-mod routines,
2264 which are faster. */
2265 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2266 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2268 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2269 divmod libcalls instead. */
2270 set_optab_libfunc (smod_optab, DImode, NULL);
2271 set_optab_libfunc (umod_optab, DImode, NULL);
2272 set_optab_libfunc (smod_optab, SImode, NULL);
2273 set_optab_libfunc (umod_optab, SImode, NULL);
2275 /* Half-precision float operations. The compiler handles all operations
2276 with NULL libfuncs by converting the SFmode. */
2277 switch (arm_fp16_format)
2279 case ARM_FP16_FORMAT_IEEE:
2280 case ARM_FP16_FORMAT_ALTERNATIVE:
2282 /* Conversions. */
2283 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2284 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2285 ? "__gnu_f2h_ieee"
2286 : "__gnu_f2h_alternative"));
2287 set_conv_libfunc (sext_optab, SFmode, HFmode,
2288 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2289 ? "__gnu_h2f_ieee"
2290 : "__gnu_h2f_alternative"));
2292 /* Arithmetic. */
2293 set_optab_libfunc (add_optab, HFmode, NULL);
2294 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2295 set_optab_libfunc (smul_optab, HFmode, NULL);
2296 set_optab_libfunc (neg_optab, HFmode, NULL);
2297 set_optab_libfunc (sub_optab, HFmode, NULL);
2299 /* Comparisons. */
2300 set_optab_libfunc (eq_optab, HFmode, NULL);
2301 set_optab_libfunc (ne_optab, HFmode, NULL);
2302 set_optab_libfunc (lt_optab, HFmode, NULL);
2303 set_optab_libfunc (le_optab, HFmode, NULL);
2304 set_optab_libfunc (ge_optab, HFmode, NULL);
2305 set_optab_libfunc (gt_optab, HFmode, NULL);
2306 set_optab_libfunc (unord_optab, HFmode, NULL);
2307 break;
2309 default:
2310 break;
2313 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2315 const arm_fixed_mode_set fixed_arith_modes[] =
2317 { QQmode, "qq" },
2318 { UQQmode, "uqq" },
2319 { HQmode, "hq" },
2320 { UHQmode, "uhq" },
2321 { SQmode, "sq" },
2322 { USQmode, "usq" },
2323 { DQmode, "dq" },
2324 { UDQmode, "udq" },
2325 { TQmode, "tq" },
2326 { UTQmode, "utq" },
2327 { HAmode, "ha" },
2328 { UHAmode, "uha" },
2329 { SAmode, "sa" },
2330 { USAmode, "usa" },
2331 { DAmode, "da" },
2332 { UDAmode, "uda" },
2333 { TAmode, "ta" },
2334 { UTAmode, "uta" }
2336 const arm_fixed_mode_set fixed_conv_modes[] =
2338 { QQmode, "qq" },
2339 { UQQmode, "uqq" },
2340 { HQmode, "hq" },
2341 { UHQmode, "uhq" },
2342 { SQmode, "sq" },
2343 { USQmode, "usq" },
2344 { DQmode, "dq" },
2345 { UDQmode, "udq" },
2346 { TQmode, "tq" },
2347 { UTQmode, "utq" },
2348 { HAmode, "ha" },
2349 { UHAmode, "uha" },
2350 { SAmode, "sa" },
2351 { USAmode, "usa" },
2352 { DAmode, "da" },
2353 { UDAmode, "uda" },
2354 { TAmode, "ta" },
2355 { UTAmode, "uta" },
2356 { QImode, "qi" },
2357 { HImode, "hi" },
2358 { SImode, "si" },
2359 { DImode, "di" },
2360 { TImode, "ti" },
2361 { SFmode, "sf" },
2362 { DFmode, "df" }
2364 unsigned int i, j;
2366 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2368 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2369 "add", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2371 "ssadd", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2373 "usadd", fixed_arith_modes[i].name, 3);
2374 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2375 "sub", fixed_arith_modes[i].name, 3);
2376 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2377 "sssub", fixed_arith_modes[i].name, 3);
2378 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2379 "ussub", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2381 "mul", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2383 "ssmul", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2385 "usmul", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2387 "div", fixed_arith_modes[i].name, 3);
2388 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2389 "udiv", fixed_arith_modes[i].name, 3);
2390 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2391 "ssdiv", fixed_arith_modes[i].name, 3);
2392 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2393 "usdiv", fixed_arith_modes[i].name, 3);
2394 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2395 "neg", fixed_arith_modes[i].name, 2);
2396 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2397 "ssneg", fixed_arith_modes[i].name, 2);
2398 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2399 "usneg", fixed_arith_modes[i].name, 2);
2400 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2401 "ashl", fixed_arith_modes[i].name, 3);
2402 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2403 "ashr", fixed_arith_modes[i].name, 3);
2404 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2405 "lshr", fixed_arith_modes[i].name, 3);
2406 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2407 "ssashl", fixed_arith_modes[i].name, 3);
2408 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2409 "usashl", fixed_arith_modes[i].name, 3);
2410 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2411 "cmp", fixed_arith_modes[i].name, 2);
2414 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2415 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2417 if (i == j
2418 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2419 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2420 continue;
2422 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2423 fixed_conv_modes[j].mode, "fract",
2424 fixed_conv_modes[i].name,
2425 fixed_conv_modes[j].name);
2426 arm_set_fixed_conv_libfunc (satfract_optab,
2427 fixed_conv_modes[i].mode,
2428 fixed_conv_modes[j].mode, "satfract",
2429 fixed_conv_modes[i].name,
2430 fixed_conv_modes[j].name);
2431 arm_set_fixed_conv_libfunc (fractuns_optab,
2432 fixed_conv_modes[i].mode,
2433 fixed_conv_modes[j].mode, "fractuns",
2434 fixed_conv_modes[i].name,
2435 fixed_conv_modes[j].name);
2436 arm_set_fixed_conv_libfunc (satfractuns_optab,
2437 fixed_conv_modes[i].mode,
2438 fixed_conv_modes[j].mode, "satfractuns",
2439 fixed_conv_modes[i].name,
2440 fixed_conv_modes[j].name);
2444 if (TARGET_AAPCS_BASED)
2445 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2448 /* On AAPCS systems, this is the "struct __va_list". */
2449 static GTY(()) tree va_list_type;
2451 /* Return the type to use as __builtin_va_list. */
2452 static tree
2453 arm_build_builtin_va_list (void)
2455 tree va_list_name;
2456 tree ap_field;
2458 if (!TARGET_AAPCS_BASED)
2459 return std_build_builtin_va_list ();
2461 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2462 defined as:
2464 struct __va_list
2466 void *__ap;
2469 The C Library ABI further reinforces this definition in \S
2470 4.1.
2472 We must follow this definition exactly. The structure tag
2473 name is visible in C++ mangled names, and thus forms a part
2474 of the ABI. The field name may be used by people who
2475 #include <stdarg.h>. */
2476 /* Create the type. */
2477 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2478 /* Give it the required name. */
2479 va_list_name = build_decl (BUILTINS_LOCATION,
2480 TYPE_DECL,
2481 get_identifier ("__va_list"),
2482 va_list_type);
2483 DECL_ARTIFICIAL (va_list_name) = 1;
2484 TYPE_NAME (va_list_type) = va_list_name;
2485 TYPE_STUB_DECL (va_list_type) = va_list_name;
2486 /* Create the __ap field. */
2487 ap_field = build_decl (BUILTINS_LOCATION,
2488 FIELD_DECL,
2489 get_identifier ("__ap"),
2490 ptr_type_node);
2491 DECL_ARTIFICIAL (ap_field) = 1;
2492 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2493 TYPE_FIELDS (va_list_type) = ap_field;
2494 /* Compute its layout. */
2495 layout_type (va_list_type);
2497 return va_list_type;
2500 /* Return an expression of type "void *" pointing to the next
2501 available argument in a variable-argument list. VALIST is the
2502 user-level va_list object, of type __builtin_va_list. */
2503 static tree
2504 arm_extract_valist_ptr (tree valist)
2506 if (TREE_TYPE (valist) == error_mark_node)
2507 return error_mark_node;
2509 /* On an AAPCS target, the pointer is stored within "struct
2510 va_list". */
2511 if (TARGET_AAPCS_BASED)
2513 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2514 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2515 valist, ap_field, NULL_TREE);
2518 return valist;
2521 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2522 static void
2523 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2525 valist = arm_extract_valist_ptr (valist);
2526 std_expand_builtin_va_start (valist, nextarg);
2529 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2530 static tree
2531 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2532 gimple_seq *post_p)
2534 valist = arm_extract_valist_ptr (valist);
2535 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2538 /* Fix up any incompatible options that the user has specified. */
2539 static void
2540 arm_option_override (void)
2542 if (global_options_set.x_arm_arch_option)
2543 arm_selected_arch = &all_architectures[arm_arch_option];
2545 if (global_options_set.x_arm_cpu_option)
2547 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2548 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2551 if (global_options_set.x_arm_tune_option)
2552 arm_selected_tune = &all_cores[(int) arm_tune_option];
2554 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2555 SUBTARGET_OVERRIDE_OPTIONS;
2556 #endif
2558 if (arm_selected_arch)
2560 if (arm_selected_cpu)
2562 /* Check for conflict between mcpu and march. */
2563 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2565 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2566 arm_selected_cpu->name, arm_selected_arch->name);
2567 /* -march wins for code generation.
2568 -mcpu wins for default tuning. */
2569 if (!arm_selected_tune)
2570 arm_selected_tune = arm_selected_cpu;
2572 arm_selected_cpu = arm_selected_arch;
2574 else
2575 /* -mcpu wins. */
2576 arm_selected_arch = NULL;
2578 else
2579 /* Pick a CPU based on the architecture. */
2580 arm_selected_cpu = arm_selected_arch;
2583 /* If the user did not specify a processor, choose one for them. */
2584 if (!arm_selected_cpu)
2586 const struct processors * sel;
2587 unsigned int sought;
2589 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2590 if (!arm_selected_cpu->name)
2592 #ifdef SUBTARGET_CPU_DEFAULT
2593 /* Use the subtarget default CPU if none was specified by
2594 configure. */
2595 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2596 #endif
2597 /* Default to ARM6. */
2598 if (!arm_selected_cpu->name)
2599 arm_selected_cpu = &all_cores[arm6];
2602 sel = arm_selected_cpu;
2603 insn_flags = sel->flags;
2605 /* Now check to see if the user has specified some command line
2606 switch that require certain abilities from the cpu. */
2607 sought = 0;
2609 if (TARGET_INTERWORK || TARGET_THUMB)
2611 sought |= (FL_THUMB | FL_MODE32);
2613 /* There are no ARM processors that support both APCS-26 and
2614 interworking. Therefore we force FL_MODE26 to be removed
2615 from insn_flags here (if it was set), so that the search
2616 below will always be able to find a compatible processor. */
2617 insn_flags &= ~FL_MODE26;
2620 if (sought != 0 && ((sought & insn_flags) != sought))
2622 /* Try to locate a CPU type that supports all of the abilities
2623 of the default CPU, plus the extra abilities requested by
2624 the user. */
2625 for (sel = all_cores; sel->name != NULL; sel++)
2626 if ((sel->flags & sought) == (sought | insn_flags))
2627 break;
2629 if (sel->name == NULL)
2631 unsigned current_bit_count = 0;
2632 const struct processors * best_fit = NULL;
2634 /* Ideally we would like to issue an error message here
2635 saying that it was not possible to find a CPU compatible
2636 with the default CPU, but which also supports the command
2637 line options specified by the programmer, and so they
2638 ought to use the -mcpu=<name> command line option to
2639 override the default CPU type.
2641 If we cannot find a cpu that has both the
2642 characteristics of the default cpu and the given
2643 command line options we scan the array again looking
2644 for a best match. */
2645 for (sel = all_cores; sel->name != NULL; sel++)
2646 if ((sel->flags & sought) == sought)
2648 unsigned count;
2650 count = bit_count (sel->flags & insn_flags);
2652 if (count >= current_bit_count)
2654 best_fit = sel;
2655 current_bit_count = count;
2659 gcc_assert (best_fit);
2660 sel = best_fit;
2663 arm_selected_cpu = sel;
2667 gcc_assert (arm_selected_cpu);
2668 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2669 if (!arm_selected_tune)
2670 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2672 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2673 insn_flags = arm_selected_cpu->flags;
2674 arm_base_arch = arm_selected_cpu->base_arch;
2676 arm_tune = arm_selected_tune->core;
2677 tune_flags = arm_selected_tune->flags;
2678 current_tune = arm_selected_tune->tune;
2680 /* Make sure that the processor choice does not conflict with any of the
2681 other command line choices. */
2682 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2683 error ("target CPU does not support ARM mode");
2685 /* BPABI targets use linker tricks to allow interworking on cores
2686 without thumb support. */
2687 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2689 warning (0, "target CPU does not support interworking" );
2690 target_flags &= ~MASK_INTERWORK;
2693 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2695 warning (0, "target CPU does not support THUMB instructions");
2696 target_flags &= ~MASK_THUMB;
2699 if (TARGET_APCS_FRAME && TARGET_THUMB)
2701 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2702 target_flags &= ~MASK_APCS_FRAME;
2705 /* Callee super interworking implies thumb interworking. Adding
2706 this to the flags here simplifies the logic elsewhere. */
2707 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2708 target_flags |= MASK_INTERWORK;
2710 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2711 from here where no function is being compiled currently. */
2712 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2713 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2715 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2716 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2718 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2720 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2721 target_flags |= MASK_APCS_FRAME;
2724 if (TARGET_POKE_FUNCTION_NAME)
2725 target_flags |= MASK_APCS_FRAME;
2727 if (TARGET_APCS_REENT && flag_pic)
2728 error ("-fpic and -mapcs-reent are incompatible");
2730 if (TARGET_APCS_REENT)
2731 warning (0, "APCS reentrant code not supported. Ignored");
2733 /* If this target is normally configured to use APCS frames, warn if they
2734 are turned off and debugging is turned on. */
2735 if (TARGET_ARM
2736 && write_symbols != NO_DEBUG
2737 && !TARGET_APCS_FRAME
2738 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2739 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2741 if (TARGET_APCS_FLOAT)
2742 warning (0, "passing floating point arguments in fp regs not yet supported");
2744 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2745 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2746 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2747 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2748 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2749 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2750 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2751 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2752 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2753 arm_arch6m = arm_arch6 && !arm_arch_notm;
2754 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2755 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2756 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2757 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2758 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2760 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2761 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2762 thumb_code = TARGET_ARM == 0;
2763 thumb1_code = TARGET_THUMB1 != 0;
2764 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2765 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2766 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2767 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2768 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2769 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2770 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2771 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2772 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2773 if (arm_restrict_it == 2)
2774 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2776 if (!TARGET_THUMB2)
2777 arm_restrict_it = 0;
2779 /* If we are not using the default (ARM mode) section anchor offset
2780 ranges, then set the correct ranges now. */
2781 if (TARGET_THUMB1)
2783 /* Thumb-1 LDR instructions cannot have negative offsets.
2784 Permissible positive offset ranges are 5-bit (for byte loads),
2785 6-bit (for halfword loads), or 7-bit (for word loads).
2786 Empirical results suggest a 7-bit anchor range gives the best
2787 overall code size. */
2788 targetm.min_anchor_offset = 0;
2789 targetm.max_anchor_offset = 127;
2791 else if (TARGET_THUMB2)
2793 /* The minimum is set such that the total size of the block
2794 for a particular anchor is 248 + 1 + 4095 bytes, which is
2795 divisible by eight, ensuring natural spacing of anchors. */
2796 targetm.min_anchor_offset = -248;
2797 targetm.max_anchor_offset = 4095;
2800 /* V5 code we generate is completely interworking capable, so we turn off
2801 TARGET_INTERWORK here to avoid many tests later on. */
2803 /* XXX However, we must pass the right pre-processor defines to CPP
2804 or GLD can get confused. This is a hack. */
2805 if (TARGET_INTERWORK)
2806 arm_cpp_interwork = 1;
2808 if (arm_arch5)
2809 target_flags &= ~MASK_INTERWORK;
2811 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2812 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2814 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2815 error ("iwmmxt abi requires an iwmmxt capable cpu");
2817 if (!global_options_set.x_arm_fpu_index)
2819 const char *target_fpu_name;
2820 bool ok;
2822 #ifdef FPUTYPE_DEFAULT
2823 target_fpu_name = FPUTYPE_DEFAULT;
2824 #else
2825 target_fpu_name = "vfp";
2826 #endif
2828 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2829 CL_TARGET);
2830 gcc_assert (ok);
2833 arm_fpu_desc = &all_fpus[arm_fpu_index];
2835 if (TARGET_NEON && !arm_arch7)
2836 error ("target CPU does not support NEON");
2838 switch (arm_fpu_desc->model)
2840 case ARM_FP_MODEL_VFP:
2841 arm_fpu_attr = FPU_VFP;
2842 break;
2844 default:
2845 gcc_unreachable();
2848 if (TARGET_AAPCS_BASED)
2850 if (TARGET_CALLER_INTERWORKING)
2851 error ("AAPCS does not support -mcaller-super-interworking");
2852 else
2853 if (TARGET_CALLEE_INTERWORKING)
2854 error ("AAPCS does not support -mcallee-super-interworking");
2857 /* iWMMXt and NEON are incompatible. */
2858 if (TARGET_IWMMXT && TARGET_NEON)
2859 error ("iWMMXt and NEON are incompatible");
2861 /* iWMMXt unsupported under Thumb mode. */
2862 if (TARGET_THUMB && TARGET_IWMMXT)
2863 error ("iWMMXt unsupported under Thumb mode");
2865 /* __fp16 support currently assumes the core has ldrh. */
2866 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2867 sorry ("__fp16 and no ldrh");
2869 /* If soft-float is specified then don't use FPU. */
2870 if (TARGET_SOFT_FLOAT)
2871 arm_fpu_attr = FPU_NONE;
2873 if (TARGET_AAPCS_BASED)
2875 if (arm_abi == ARM_ABI_IWMMXT)
2876 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2877 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2878 && TARGET_HARD_FLOAT
2879 && TARGET_VFP)
2880 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2881 else
2882 arm_pcs_default = ARM_PCS_AAPCS;
2884 else
2886 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2887 sorry ("-mfloat-abi=hard and VFP");
2889 if (arm_abi == ARM_ABI_APCS)
2890 arm_pcs_default = ARM_PCS_APCS;
2891 else
2892 arm_pcs_default = ARM_PCS_ATPCS;
2895 /* For arm2/3 there is no need to do any scheduling if we are doing
2896 software floating-point. */
2897 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2898 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2900 /* Use the cp15 method if it is available. */
2901 if (target_thread_pointer == TP_AUTO)
2903 if (arm_arch6k && !TARGET_THUMB1)
2904 target_thread_pointer = TP_CP15;
2905 else
2906 target_thread_pointer = TP_SOFT;
2909 if (TARGET_HARD_TP && TARGET_THUMB1)
2910 error ("can not use -mtp=cp15 with 16-bit Thumb");
2912 /* Override the default structure alignment for AAPCS ABI. */
2913 if (!global_options_set.x_arm_structure_size_boundary)
2915 if (TARGET_AAPCS_BASED)
2916 arm_structure_size_boundary = 8;
2918 else
2920 if (arm_structure_size_boundary != 8
2921 && arm_structure_size_boundary != 32
2922 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2924 if (ARM_DOUBLEWORD_ALIGN)
2925 warning (0,
2926 "structure size boundary can only be set to 8, 32 or 64");
2927 else
2928 warning (0, "structure size boundary can only be set to 8 or 32");
2929 arm_structure_size_boundary
2930 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2934 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2936 error ("RTP PIC is incompatible with Thumb");
2937 flag_pic = 0;
2940 /* If stack checking is disabled, we can use r10 as the PIC register,
2941 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2942 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2944 if (TARGET_VXWORKS_RTP)
2945 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2946 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2949 if (flag_pic && TARGET_VXWORKS_RTP)
2950 arm_pic_register = 9;
2952 if (arm_pic_register_string != NULL)
2954 int pic_register = decode_reg_name (arm_pic_register_string);
2956 if (!flag_pic)
2957 warning (0, "-mpic-register= is useless without -fpic");
2959 /* Prevent the user from choosing an obviously stupid PIC register. */
2960 else if (pic_register < 0 || call_used_regs[pic_register]
2961 || pic_register == HARD_FRAME_POINTER_REGNUM
2962 || pic_register == STACK_POINTER_REGNUM
2963 || pic_register >= PC_REGNUM
2964 || (TARGET_VXWORKS_RTP
2965 && (unsigned int) pic_register != arm_pic_register))
2966 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2967 else
2968 arm_pic_register = pic_register;
2971 if (TARGET_VXWORKS_RTP
2972 && !global_options_set.x_arm_pic_data_is_text_relative)
2973 arm_pic_data_is_text_relative = 0;
2975 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2976 if (fix_cm3_ldrd == 2)
2978 if (arm_selected_cpu->core == cortexm3)
2979 fix_cm3_ldrd = 1;
2980 else
2981 fix_cm3_ldrd = 0;
2984 /* Enable -munaligned-access by default for
2985 - all ARMv6 architecture-based processors
2986 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2987 - ARMv8 architecture-base processors.
2989 Disable -munaligned-access by default for
2990 - all pre-ARMv6 architecture-based processors
2991 - ARMv6-M architecture-based processors. */
2993 if (unaligned_access == 2)
2995 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2996 unaligned_access = 1;
2997 else
2998 unaligned_access = 0;
3000 else if (unaligned_access == 1
3001 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3003 warning (0, "target CPU does not support unaligned accesses");
3004 unaligned_access = 0;
3007 if (TARGET_THUMB1 && flag_schedule_insns)
3009 /* Don't warn since it's on by default in -O2. */
3010 flag_schedule_insns = 0;
3013 if (optimize_size)
3015 /* If optimizing for size, bump the number of instructions that we
3016 are prepared to conditionally execute (even on a StrongARM). */
3017 max_insns_skipped = 6;
3019 /* For THUMB2, we limit the conditional sequence to one IT block. */
3020 if (TARGET_THUMB2)
3021 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3023 else
3024 max_insns_skipped = current_tune->max_insns_skipped;
3026 /* Hot/Cold partitioning is not currently supported, since we can't
3027 handle literal pool placement in that case. */
3028 if (flag_reorder_blocks_and_partition)
3030 inform (input_location,
3031 "-freorder-blocks-and-partition not supported on this architecture");
3032 flag_reorder_blocks_and_partition = 0;
3033 flag_reorder_blocks = 1;
3036 if (flag_pic)
3037 /* Hoisting PIC address calculations more aggressively provides a small,
3038 but measurable, size reduction for PIC code. Therefore, we decrease
3039 the bar for unrestricted expression hoisting to the cost of PIC address
3040 calculation, which is 2 instructions. */
3041 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3042 global_options.x_param_values,
3043 global_options_set.x_param_values);
3045 /* ARM EABI defaults to strict volatile bitfields. */
3046 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3047 && abi_version_at_least(2))
3048 flag_strict_volatile_bitfields = 1;
3050 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3051 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3052 if (flag_prefetch_loop_arrays < 0
3053 && HAVE_prefetch
3054 && optimize >= 3
3055 && current_tune->num_prefetch_slots > 0)
3056 flag_prefetch_loop_arrays = 1;
3058 /* Set up parameters to be used in prefetching algorithm. Do not override the
3059 defaults unless we are tuning for a core we have researched values for. */
3060 if (current_tune->num_prefetch_slots > 0)
3061 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3062 current_tune->num_prefetch_slots,
3063 global_options.x_param_values,
3064 global_options_set.x_param_values);
3065 if (current_tune->l1_cache_line_size >= 0)
3066 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3067 current_tune->l1_cache_line_size,
3068 global_options.x_param_values,
3069 global_options_set.x_param_values);
3070 if (current_tune->l1_cache_size >= 0)
3071 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3072 current_tune->l1_cache_size,
3073 global_options.x_param_values,
3074 global_options_set.x_param_values);
3076 /* Use Neon to perform 64-bits operations rather than core
3077 registers. */
3078 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3079 if (use_neon_for_64bits == 1)
3080 prefer_neon_for_64bits = true;
3082 /* Use the alternative scheduling-pressure algorithm by default. */
3083 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3084 global_options.x_param_values,
3085 global_options_set.x_param_values);
3087 /* Disable shrink-wrap when optimizing function for size, since it tends to
3088 generate additional returns. */
3089 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3090 flag_shrink_wrap = false;
3091 /* TBD: Dwarf info for apcs frame is not handled yet. */
3092 if (TARGET_APCS_FRAME)
3093 flag_shrink_wrap = false;
3095 /* We only support -mslow-flash-data on armv7-m targets. */
3096 if (target_slow_flash_data
3097 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3098 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3099 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3101 /* Currently, for slow flash data, we just disable literal pools. */
3102 if (target_slow_flash_data)
3103 arm_disable_literal_pool = true;
3105 /* Thumb2 inline assembly code should always use unified syntax.
3106 This will apply to ARM and Thumb1 eventually. */
3107 if (TARGET_THUMB2)
3108 inline_asm_unified = 1;
3110 /* Disable scheduling fusion by default if it's not armv7 processor
3111 or doesn't prefer ldrd/strd. */
3112 if (flag_schedule_fusion == 2
3113 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3114 flag_schedule_fusion = 0;
3116 /* Register global variables with the garbage collector. */
3117 arm_add_gc_roots ();
3120 static void
3121 arm_add_gc_roots (void)
3123 gcc_obstack_init(&minipool_obstack);
3124 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3127 /* A table of known ARM exception types.
3128 For use with the interrupt function attribute. */
3130 typedef struct
3132 const char *const arg;
3133 const unsigned long return_value;
3135 isr_attribute_arg;
3137 static const isr_attribute_arg isr_attribute_args [] =
3139 { "IRQ", ARM_FT_ISR },
3140 { "irq", ARM_FT_ISR },
3141 { "FIQ", ARM_FT_FIQ },
3142 { "fiq", ARM_FT_FIQ },
3143 { "ABORT", ARM_FT_ISR },
3144 { "abort", ARM_FT_ISR },
3145 { "ABORT", ARM_FT_ISR },
3146 { "abort", ARM_FT_ISR },
3147 { "UNDEF", ARM_FT_EXCEPTION },
3148 { "undef", ARM_FT_EXCEPTION },
3149 { "SWI", ARM_FT_EXCEPTION },
3150 { "swi", ARM_FT_EXCEPTION },
3151 { NULL, ARM_FT_NORMAL }
3154 /* Returns the (interrupt) function type of the current
3155 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3157 static unsigned long
3158 arm_isr_value (tree argument)
3160 const isr_attribute_arg * ptr;
3161 const char * arg;
3163 if (!arm_arch_notm)
3164 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3166 /* No argument - default to IRQ. */
3167 if (argument == NULL_TREE)
3168 return ARM_FT_ISR;
3170 /* Get the value of the argument. */
3171 if (TREE_VALUE (argument) == NULL_TREE
3172 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3173 return ARM_FT_UNKNOWN;
3175 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3177 /* Check it against the list of known arguments. */
3178 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3179 if (streq (arg, ptr->arg))
3180 return ptr->return_value;
3182 /* An unrecognized interrupt type. */
3183 return ARM_FT_UNKNOWN;
3186 /* Computes the type of the current function. */
3188 static unsigned long
3189 arm_compute_func_type (void)
3191 unsigned long type = ARM_FT_UNKNOWN;
3192 tree a;
3193 tree attr;
3195 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3197 /* Decide if the current function is volatile. Such functions
3198 never return, and many memory cycles can be saved by not storing
3199 register values that will never be needed again. This optimization
3200 was added to speed up context switching in a kernel application. */
3201 if (optimize > 0
3202 && (TREE_NOTHROW (current_function_decl)
3203 || !(flag_unwind_tables
3204 || (flag_exceptions
3205 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3206 && TREE_THIS_VOLATILE (current_function_decl))
3207 type |= ARM_FT_VOLATILE;
3209 if (cfun->static_chain_decl != NULL)
3210 type |= ARM_FT_NESTED;
3212 attr = DECL_ATTRIBUTES (current_function_decl);
3214 a = lookup_attribute ("naked", attr);
3215 if (a != NULL_TREE)
3216 type |= ARM_FT_NAKED;
3218 a = lookup_attribute ("isr", attr);
3219 if (a == NULL_TREE)
3220 a = lookup_attribute ("interrupt", attr);
3222 if (a == NULL_TREE)
3223 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3224 else
3225 type |= arm_isr_value (TREE_VALUE (a));
3227 return type;
3230 /* Returns the type of the current function. */
3232 unsigned long
3233 arm_current_func_type (void)
3235 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3236 cfun->machine->func_type = arm_compute_func_type ();
3238 return cfun->machine->func_type;
3241 bool
3242 arm_allocate_stack_slots_for_args (void)
3244 /* Naked functions should not allocate stack slots for arguments. */
3245 return !IS_NAKED (arm_current_func_type ());
3248 static bool
3249 arm_warn_func_return (tree decl)
3251 /* Naked functions are implemented entirely in assembly, including the
3252 return sequence, so suppress warnings about this. */
3253 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3257 /* Output assembler code for a block containing the constant parts
3258 of a trampoline, leaving space for the variable parts.
3260 On the ARM, (if r8 is the static chain regnum, and remembering that
3261 referencing pc adds an offset of 8) the trampoline looks like:
3262 ldr r8, [pc, #0]
3263 ldr pc, [pc]
3264 .word static chain value
3265 .word function's address
3266 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3268 static void
3269 arm_asm_trampoline_template (FILE *f)
3271 if (TARGET_ARM)
3273 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3274 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3276 else if (TARGET_THUMB2)
3278 /* The Thumb-2 trampoline is similar to the arm implementation.
3279 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3280 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3281 STATIC_CHAIN_REGNUM, PC_REGNUM);
3282 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3284 else
3286 ASM_OUTPUT_ALIGN (f, 2);
3287 fprintf (f, "\t.code\t16\n");
3288 fprintf (f, ".Ltrampoline_start:\n");
3289 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3290 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3291 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3292 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3293 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3294 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3296 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3297 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3300 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3302 static void
3303 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3305 rtx fnaddr, mem, a_tramp;
3307 emit_block_move (m_tramp, assemble_trampoline_template (),
3308 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3310 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3311 emit_move_insn (mem, chain_value);
3313 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3314 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3315 emit_move_insn (mem, fnaddr);
3317 a_tramp = XEXP (m_tramp, 0);
3318 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3319 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3320 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3323 /* Thumb trampolines should be entered in thumb mode, so set
3324 the bottom bit of the address. */
3326 static rtx
3327 arm_trampoline_adjust_address (rtx addr)
3329 if (TARGET_THUMB)
3330 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3331 NULL, 0, OPTAB_LIB_WIDEN);
3332 return addr;
3335 /* Return 1 if it is possible to return using a single instruction.
3336 If SIBLING is non-null, this is a test for a return before a sibling
3337 call. SIBLING is the call insn, so we can examine its register usage. */
3340 use_return_insn (int iscond, rtx sibling)
3342 int regno;
3343 unsigned int func_type;
3344 unsigned long saved_int_regs;
3345 unsigned HOST_WIDE_INT stack_adjust;
3346 arm_stack_offsets *offsets;
3348 /* Never use a return instruction before reload has run. */
3349 if (!reload_completed)
3350 return 0;
3352 func_type = arm_current_func_type ();
3354 /* Naked, volatile and stack alignment functions need special
3355 consideration. */
3356 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3357 return 0;
3359 /* So do interrupt functions that use the frame pointer and Thumb
3360 interrupt functions. */
3361 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3362 return 0;
3364 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3365 && !optimize_function_for_size_p (cfun))
3366 return 0;
3368 offsets = arm_get_frame_offsets ();
3369 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3371 /* As do variadic functions. */
3372 if (crtl->args.pretend_args_size
3373 || cfun->machine->uses_anonymous_args
3374 /* Or if the function calls __builtin_eh_return () */
3375 || crtl->calls_eh_return
3376 /* Or if the function calls alloca */
3377 || cfun->calls_alloca
3378 /* Or if there is a stack adjustment. However, if the stack pointer
3379 is saved on the stack, we can use a pre-incrementing stack load. */
3380 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3381 && stack_adjust == 4)))
3382 return 0;
3384 saved_int_regs = offsets->saved_regs_mask;
3386 /* Unfortunately, the insn
3388 ldmib sp, {..., sp, ...}
3390 triggers a bug on most SA-110 based devices, such that the stack
3391 pointer won't be correctly restored if the instruction takes a
3392 page fault. We work around this problem by popping r3 along with
3393 the other registers, since that is never slower than executing
3394 another instruction.
3396 We test for !arm_arch5 here, because code for any architecture
3397 less than this could potentially be run on one of the buggy
3398 chips. */
3399 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3401 /* Validate that r3 is a call-clobbered register (always true in
3402 the default abi) ... */
3403 if (!call_used_regs[3])
3404 return 0;
3406 /* ... that it isn't being used for a return value ... */
3407 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3408 return 0;
3410 /* ... or for a tail-call argument ... */
3411 if (sibling)
3413 gcc_assert (CALL_P (sibling));
3415 if (find_regno_fusage (sibling, USE, 3))
3416 return 0;
3419 /* ... and that there are no call-saved registers in r0-r2
3420 (always true in the default ABI). */
3421 if (saved_int_regs & 0x7)
3422 return 0;
3425 /* Can't be done if interworking with Thumb, and any registers have been
3426 stacked. */
3427 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3428 return 0;
3430 /* On StrongARM, conditional returns are expensive if they aren't
3431 taken and multiple registers have been stacked. */
3432 if (iscond && arm_tune_strongarm)
3434 /* Conditional return when just the LR is stored is a simple
3435 conditional-load instruction, that's not expensive. */
3436 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3437 return 0;
3439 if (flag_pic
3440 && arm_pic_register != INVALID_REGNUM
3441 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3442 return 0;
3445 /* If there are saved registers but the LR isn't saved, then we need
3446 two instructions for the return. */
3447 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3448 return 0;
3450 /* Can't be done if any of the VFP regs are pushed,
3451 since this also requires an insn. */
3452 if (TARGET_HARD_FLOAT && TARGET_VFP)
3453 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3454 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3455 return 0;
3457 if (TARGET_REALLY_IWMMXT)
3458 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3459 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3460 return 0;
3462 return 1;
3465 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3466 shrink-wrapping if possible. This is the case if we need to emit a
3467 prologue, which we can test by looking at the offsets. */
3468 bool
3469 use_simple_return_p (void)
3471 arm_stack_offsets *offsets;
3473 offsets = arm_get_frame_offsets ();
3474 return offsets->outgoing_args != 0;
3477 /* Return TRUE if int I is a valid immediate ARM constant. */
3480 const_ok_for_arm (HOST_WIDE_INT i)
3482 int lowbit;
3484 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3485 be all zero, or all one. */
3486 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3487 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3488 != ((~(unsigned HOST_WIDE_INT) 0)
3489 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3490 return FALSE;
3492 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3494 /* Fast return for 0 and small values. We must do this for zero, since
3495 the code below can't handle that one case. */
3496 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3497 return TRUE;
3499 /* Get the number of trailing zeros. */
3500 lowbit = ffs((int) i) - 1;
3502 /* Only even shifts are allowed in ARM mode so round down to the
3503 nearest even number. */
3504 if (TARGET_ARM)
3505 lowbit &= ~1;
3507 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3508 return TRUE;
3510 if (TARGET_ARM)
3512 /* Allow rotated constants in ARM mode. */
3513 if (lowbit <= 4
3514 && ((i & ~0xc000003f) == 0
3515 || (i & ~0xf000000f) == 0
3516 || (i & ~0xfc000003) == 0))
3517 return TRUE;
3519 else
3521 HOST_WIDE_INT v;
3523 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3524 v = i & 0xff;
3525 v |= v << 16;
3526 if (i == v || i == (v | (v << 8)))
3527 return TRUE;
3529 /* Allow repeated pattern 0xXY00XY00. */
3530 v = i & 0xff00;
3531 v |= v << 16;
3532 if (i == v)
3533 return TRUE;
3536 return FALSE;
3539 /* Return true if I is a valid constant for the operation CODE. */
3541 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3543 if (const_ok_for_arm (i))
3544 return 1;
3546 switch (code)
3548 case SET:
3549 /* See if we can use movw. */
3550 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3551 return 1;
3552 else
3553 /* Otherwise, try mvn. */
3554 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3556 case PLUS:
3557 /* See if we can use addw or subw. */
3558 if (TARGET_THUMB2
3559 && ((i & 0xfffff000) == 0
3560 || ((-i) & 0xfffff000) == 0))
3561 return 1;
3562 /* else fall through. */
3564 case COMPARE:
3565 case EQ:
3566 case NE:
3567 case GT:
3568 case LE:
3569 case LT:
3570 case GE:
3571 case GEU:
3572 case LTU:
3573 case GTU:
3574 case LEU:
3575 case UNORDERED:
3576 case ORDERED:
3577 case UNEQ:
3578 case UNGE:
3579 case UNLT:
3580 case UNGT:
3581 case UNLE:
3582 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3584 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3585 case XOR:
3586 return 0;
3588 case IOR:
3589 if (TARGET_THUMB2)
3590 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3591 return 0;
3593 case AND:
3594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3596 default:
3597 gcc_unreachable ();
3601 /* Return true if I is a valid di mode constant for the operation CODE. */
3603 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3605 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3606 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3607 rtx hi = GEN_INT (hi_val);
3608 rtx lo = GEN_INT (lo_val);
3610 if (TARGET_THUMB1)
3611 return 0;
3613 switch (code)
3615 case AND:
3616 case IOR:
3617 case XOR:
3618 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3619 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3620 case PLUS:
3621 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3623 default:
3624 return 0;
3628 /* Emit a sequence of insns to handle a large constant.
3629 CODE is the code of the operation required, it can be any of SET, PLUS,
3630 IOR, AND, XOR, MINUS;
3631 MODE is the mode in which the operation is being performed;
3632 VAL is the integer to operate on;
3633 SOURCE is the other operand (a register, or a null-pointer for SET);
3634 SUBTARGETS means it is safe to create scratch registers if that will
3635 either produce a simpler sequence, or we will want to cse the values.
3636 Return value is the number of insns emitted. */
3638 /* ??? Tweak this for thumb2. */
3640 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3641 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3643 rtx cond;
3645 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3646 cond = COND_EXEC_TEST (PATTERN (insn));
3647 else
3648 cond = NULL_RTX;
3650 if (subtargets || code == SET
3651 || (REG_P (target) && REG_P (source)
3652 && REGNO (target) != REGNO (source)))
3654 /* After arm_reorg has been called, we can't fix up expensive
3655 constants by pushing them into memory so we must synthesize
3656 them in-line, regardless of the cost. This is only likely to
3657 be more costly on chips that have load delay slots and we are
3658 compiling without running the scheduler (so no splitting
3659 occurred before the final instruction emission).
3661 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3663 if (!cfun->machine->after_arm_reorg
3664 && !cond
3665 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3666 1, 0)
3667 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3668 + (code != SET))))
3670 if (code == SET)
3672 /* Currently SET is the only monadic value for CODE, all
3673 the rest are diadic. */
3674 if (TARGET_USE_MOVT)
3675 arm_emit_movpair (target, GEN_INT (val));
3676 else
3677 emit_set_insn (target, GEN_INT (val));
3679 return 1;
3681 else
3683 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3685 if (TARGET_USE_MOVT)
3686 arm_emit_movpair (temp, GEN_INT (val));
3687 else
3688 emit_set_insn (temp, GEN_INT (val));
3690 /* For MINUS, the value is subtracted from, since we never
3691 have subtraction of a constant. */
3692 if (code == MINUS)
3693 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3694 else
3695 emit_set_insn (target,
3696 gen_rtx_fmt_ee (code, mode, source, temp));
3697 return 2;
3702 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3706 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3707 ARM/THUMB2 immediates, and add up to VAL.
3708 Thr function return value gives the number of insns required. */
3709 static int
3710 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3711 struct four_ints *return_sequence)
3713 int best_consecutive_zeros = 0;
3714 int i;
3715 int best_start = 0;
3716 int insns1, insns2;
3717 struct four_ints tmp_sequence;
3719 /* If we aren't targeting ARM, the best place to start is always at
3720 the bottom, otherwise look more closely. */
3721 if (TARGET_ARM)
3723 for (i = 0; i < 32; i += 2)
3725 int consecutive_zeros = 0;
3727 if (!(val & (3 << i)))
3729 while ((i < 32) && !(val & (3 << i)))
3731 consecutive_zeros += 2;
3732 i += 2;
3734 if (consecutive_zeros > best_consecutive_zeros)
3736 best_consecutive_zeros = consecutive_zeros;
3737 best_start = i - consecutive_zeros;
3739 i -= 2;
3744 /* So long as it won't require any more insns to do so, it's
3745 desirable to emit a small constant (in bits 0...9) in the last
3746 insn. This way there is more chance that it can be combined with
3747 a later addressing insn to form a pre-indexed load or store
3748 operation. Consider:
3750 *((volatile int *)0xe0000100) = 1;
3751 *((volatile int *)0xe0000110) = 2;
3753 We want this to wind up as:
3755 mov rA, #0xe0000000
3756 mov rB, #1
3757 str rB, [rA, #0x100]
3758 mov rB, #2
3759 str rB, [rA, #0x110]
3761 rather than having to synthesize both large constants from scratch.
3763 Therefore, we calculate how many insns would be required to emit
3764 the constant starting from `best_start', and also starting from
3765 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3766 yield a shorter sequence, we may as well use zero. */
3767 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3768 if (best_start != 0
3769 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3771 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3772 if (insns2 <= insns1)
3774 *return_sequence = tmp_sequence;
3775 insns1 = insns2;
3779 return insns1;
3782 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3783 static int
3784 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3785 struct four_ints *return_sequence, int i)
3787 int remainder = val & 0xffffffff;
3788 int insns = 0;
3790 /* Try and find a way of doing the job in either two or three
3791 instructions.
3793 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3794 location. We start at position I. This may be the MSB, or
3795 optimial_immediate_sequence may have positioned it at the largest block
3796 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3797 wrapping around to the top of the word when we drop off the bottom.
3798 In the worst case this code should produce no more than four insns.
3800 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3801 constants, shifted to any arbitrary location. We should always start
3802 at the MSB. */
3805 int end;
3806 unsigned int b1, b2, b3, b4;
3807 unsigned HOST_WIDE_INT result;
3808 int loc;
3810 gcc_assert (insns < 4);
3812 if (i <= 0)
3813 i += 32;
3815 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3816 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3818 loc = i;
3819 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3820 /* We can use addw/subw for the last 12 bits. */
3821 result = remainder;
3822 else
3824 /* Use an 8-bit shifted/rotated immediate. */
3825 end = i - 8;
3826 if (end < 0)
3827 end += 32;
3828 result = remainder & ((0x0ff << end)
3829 | ((i < end) ? (0xff >> (32 - end))
3830 : 0));
3831 i -= 8;
3834 else
3836 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3837 arbitrary shifts. */
3838 i -= TARGET_ARM ? 2 : 1;
3839 continue;
3842 /* Next, see if we can do a better job with a thumb2 replicated
3843 constant.
3845 We do it this way around to catch the cases like 0x01F001E0 where
3846 two 8-bit immediates would work, but a replicated constant would
3847 make it worse.
3849 TODO: 16-bit constants that don't clear all the bits, but still win.
3850 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3851 if (TARGET_THUMB2)
3853 b1 = (remainder & 0xff000000) >> 24;
3854 b2 = (remainder & 0x00ff0000) >> 16;
3855 b3 = (remainder & 0x0000ff00) >> 8;
3856 b4 = remainder & 0xff;
3858 if (loc > 24)
3860 /* The 8-bit immediate already found clears b1 (and maybe b2),
3861 but must leave b3 and b4 alone. */
3863 /* First try to find a 32-bit replicated constant that clears
3864 almost everything. We can assume that we can't do it in one,
3865 or else we wouldn't be here. */
3866 unsigned int tmp = b1 & b2 & b3 & b4;
3867 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3868 + (tmp << 24);
3869 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3870 + (tmp == b3) + (tmp == b4);
3871 if (tmp
3872 && (matching_bytes >= 3
3873 || (matching_bytes == 2
3874 && const_ok_for_op (remainder & ~tmp2, code))))
3876 /* At least 3 of the bytes match, and the fourth has at
3877 least as many bits set, or two of the bytes match
3878 and it will only require one more insn to finish. */
3879 result = tmp2;
3880 i = tmp != b1 ? 32
3881 : tmp != b2 ? 24
3882 : tmp != b3 ? 16
3883 : 8;
3886 /* Second, try to find a 16-bit replicated constant that can
3887 leave three of the bytes clear. If b2 or b4 is already
3888 zero, then we can. If the 8-bit from above would not
3889 clear b2 anyway, then we still win. */
3890 else if (b1 == b3 && (!b2 || !b4
3891 || (remainder & 0x00ff0000 & ~result)))
3893 result = remainder & 0xff00ff00;
3894 i = 24;
3897 else if (loc > 16)
3899 /* The 8-bit immediate already found clears b2 (and maybe b3)
3900 and we don't get here unless b1 is alredy clear, but it will
3901 leave b4 unchanged. */
3903 /* If we can clear b2 and b4 at once, then we win, since the
3904 8-bits couldn't possibly reach that far. */
3905 if (b2 == b4)
3907 result = remainder & 0x00ff00ff;
3908 i = 16;
3913 return_sequence->i[insns++] = result;
3914 remainder &= ~result;
3916 if (code == SET || code == MINUS)
3917 code = PLUS;
3919 while (remainder);
3921 return insns;
3924 /* Emit an instruction with the indicated PATTERN. If COND is
3925 non-NULL, conditionalize the execution of the instruction on COND
3926 being true. */
3928 static void
3929 emit_constant_insn (rtx cond, rtx pattern)
3931 if (cond)
3932 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3933 emit_insn (pattern);
3936 /* As above, but extra parameter GENERATE which, if clear, suppresses
3937 RTL generation. */
3939 static int
3940 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3941 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3942 int generate)
3944 int can_invert = 0;
3945 int can_negate = 0;
3946 int final_invert = 0;
3947 int i;
3948 int set_sign_bit_copies = 0;
3949 int clear_sign_bit_copies = 0;
3950 int clear_zero_bit_copies = 0;
3951 int set_zero_bit_copies = 0;
3952 int insns = 0, neg_insns, inv_insns;
3953 unsigned HOST_WIDE_INT temp1, temp2;
3954 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3955 struct four_ints *immediates;
3956 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3958 /* Find out which operations are safe for a given CODE. Also do a quick
3959 check for degenerate cases; these can occur when DImode operations
3960 are split. */
3961 switch (code)
3963 case SET:
3964 can_invert = 1;
3965 break;
3967 case PLUS:
3968 can_negate = 1;
3969 break;
3971 case IOR:
3972 if (remainder == 0xffffffff)
3974 if (generate)
3975 emit_constant_insn (cond,
3976 gen_rtx_SET (VOIDmode, target,
3977 GEN_INT (ARM_SIGN_EXTEND (val))));
3978 return 1;
3981 if (remainder == 0)
3983 if (reload_completed && rtx_equal_p (target, source))
3984 return 0;
3986 if (generate)
3987 emit_constant_insn (cond,
3988 gen_rtx_SET (VOIDmode, target, source));
3989 return 1;
3991 break;
3993 case AND:
3994 if (remainder == 0)
3996 if (generate)
3997 emit_constant_insn (cond,
3998 gen_rtx_SET (VOIDmode, target, const0_rtx));
3999 return 1;
4001 if (remainder == 0xffffffff)
4003 if (reload_completed && rtx_equal_p (target, source))
4004 return 0;
4005 if (generate)
4006 emit_constant_insn (cond,
4007 gen_rtx_SET (VOIDmode, target, source));
4008 return 1;
4010 can_invert = 1;
4011 break;
4013 case XOR:
4014 if (remainder == 0)
4016 if (reload_completed && rtx_equal_p (target, source))
4017 return 0;
4018 if (generate)
4019 emit_constant_insn (cond,
4020 gen_rtx_SET (VOIDmode, target, source));
4021 return 1;
4024 if (remainder == 0xffffffff)
4026 if (generate)
4027 emit_constant_insn (cond,
4028 gen_rtx_SET (VOIDmode, target,
4029 gen_rtx_NOT (mode, source)));
4030 return 1;
4032 final_invert = 1;
4033 break;
4035 case MINUS:
4036 /* We treat MINUS as (val - source), since (source - val) is always
4037 passed as (source + (-val)). */
4038 if (remainder == 0)
4040 if (generate)
4041 emit_constant_insn (cond,
4042 gen_rtx_SET (VOIDmode, target,
4043 gen_rtx_NEG (mode, source)));
4044 return 1;
4046 if (const_ok_for_arm (val))
4048 if (generate)
4049 emit_constant_insn (cond,
4050 gen_rtx_SET (VOIDmode, target,
4051 gen_rtx_MINUS (mode, GEN_INT (val),
4052 source)));
4053 return 1;
4056 break;
4058 default:
4059 gcc_unreachable ();
4062 /* If we can do it in one insn get out quickly. */
4063 if (const_ok_for_op (val, code))
4065 if (generate)
4066 emit_constant_insn (cond,
4067 gen_rtx_SET (VOIDmode, target,
4068 (source
4069 ? gen_rtx_fmt_ee (code, mode, source,
4070 GEN_INT (val))
4071 : GEN_INT (val))));
4072 return 1;
4075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4076 insn. */
4077 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4078 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4080 if (generate)
4082 if (mode == SImode && i == 16)
4083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4084 smaller insn. */
4085 emit_constant_insn (cond,
4086 gen_zero_extendhisi2
4087 (target, gen_lowpart (HImode, source)));
4088 else
4089 /* Extz only supports SImode, but we can coerce the operands
4090 into that mode. */
4091 emit_constant_insn (cond,
4092 gen_extzv_t2 (gen_lowpart (SImode, target),
4093 gen_lowpart (SImode, source),
4094 GEN_INT (i), const0_rtx));
4097 return 1;
4100 /* Calculate a few attributes that may be useful for specific
4101 optimizations. */
4102 /* Count number of leading zeros. */
4103 for (i = 31; i >= 0; i--)
4105 if ((remainder & (1 << i)) == 0)
4106 clear_sign_bit_copies++;
4107 else
4108 break;
4111 /* Count number of leading 1's. */
4112 for (i = 31; i >= 0; i--)
4114 if ((remainder & (1 << i)) != 0)
4115 set_sign_bit_copies++;
4116 else
4117 break;
4120 /* Count number of trailing zero's. */
4121 for (i = 0; i <= 31; i++)
4123 if ((remainder & (1 << i)) == 0)
4124 clear_zero_bit_copies++;
4125 else
4126 break;
4129 /* Count number of trailing 1's. */
4130 for (i = 0; i <= 31; i++)
4132 if ((remainder & (1 << i)) != 0)
4133 set_zero_bit_copies++;
4134 else
4135 break;
4138 switch (code)
4140 case SET:
4141 /* See if we can do this by sign_extending a constant that is known
4142 to be negative. This is a good, way of doing it, since the shift
4143 may well merge into a subsequent insn. */
4144 if (set_sign_bit_copies > 1)
4146 if (const_ok_for_arm
4147 (temp1 = ARM_SIGN_EXTEND (remainder
4148 << (set_sign_bit_copies - 1))))
4150 if (generate)
4152 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4153 emit_constant_insn (cond,
4154 gen_rtx_SET (VOIDmode, new_src,
4155 GEN_INT (temp1)));
4156 emit_constant_insn (cond,
4157 gen_ashrsi3 (target, new_src,
4158 GEN_INT (set_sign_bit_copies - 1)));
4160 return 2;
4162 /* For an inverted constant, we will need to set the low bits,
4163 these will be shifted out of harm's way. */
4164 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4165 if (const_ok_for_arm (~temp1))
4167 if (generate)
4169 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4170 emit_constant_insn (cond,
4171 gen_rtx_SET (VOIDmode, new_src,
4172 GEN_INT (temp1)));
4173 emit_constant_insn (cond,
4174 gen_ashrsi3 (target, new_src,
4175 GEN_INT (set_sign_bit_copies - 1)));
4177 return 2;
4181 /* See if we can calculate the value as the difference between two
4182 valid immediates. */
4183 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4185 int topshift = clear_sign_bit_copies & ~1;
4187 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4188 & (0xff000000 >> topshift));
4190 /* If temp1 is zero, then that means the 9 most significant
4191 bits of remainder were 1 and we've caused it to overflow.
4192 When topshift is 0 we don't need to do anything since we
4193 can borrow from 'bit 32'. */
4194 if (temp1 == 0 && topshift != 0)
4195 temp1 = 0x80000000 >> (topshift - 1);
4197 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4199 if (const_ok_for_arm (temp2))
4201 if (generate)
4203 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4204 emit_constant_insn (cond,
4205 gen_rtx_SET (VOIDmode, new_src,
4206 GEN_INT (temp1)));
4207 emit_constant_insn (cond,
4208 gen_addsi3 (target, new_src,
4209 GEN_INT (-temp2)));
4212 return 2;
4216 /* See if we can generate this by setting the bottom (or the top)
4217 16 bits, and then shifting these into the other half of the
4218 word. We only look for the simplest cases, to do more would cost
4219 too much. Be careful, however, not to generate this when the
4220 alternative would take fewer insns. */
4221 if (val & 0xffff0000)
4223 temp1 = remainder & 0xffff0000;
4224 temp2 = remainder & 0x0000ffff;
4226 /* Overlaps outside this range are best done using other methods. */
4227 for (i = 9; i < 24; i++)
4229 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4230 && !const_ok_for_arm (temp2))
4232 rtx new_src = (subtargets
4233 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4234 : target);
4235 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4236 source, subtargets, generate);
4237 source = new_src;
4238 if (generate)
4239 emit_constant_insn
4240 (cond,
4241 gen_rtx_SET
4242 (VOIDmode, target,
4243 gen_rtx_IOR (mode,
4244 gen_rtx_ASHIFT (mode, source,
4245 GEN_INT (i)),
4246 source)));
4247 return insns + 1;
4251 /* Don't duplicate cases already considered. */
4252 for (i = 17; i < 24; i++)
4254 if (((temp1 | (temp1 >> i)) == remainder)
4255 && !const_ok_for_arm (temp1))
4257 rtx new_src = (subtargets
4258 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4259 : target);
4260 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4261 source, subtargets, generate);
4262 source = new_src;
4263 if (generate)
4264 emit_constant_insn
4265 (cond,
4266 gen_rtx_SET (VOIDmode, target,
4267 gen_rtx_IOR
4268 (mode,
4269 gen_rtx_LSHIFTRT (mode, source,
4270 GEN_INT (i)),
4271 source)));
4272 return insns + 1;
4276 break;
4278 case IOR:
4279 case XOR:
4280 /* If we have IOR or XOR, and the constant can be loaded in a
4281 single instruction, and we can find a temporary to put it in,
4282 then this can be done in two instructions instead of 3-4. */
4283 if (subtargets
4284 /* TARGET can't be NULL if SUBTARGETS is 0 */
4285 || (reload_completed && !reg_mentioned_p (target, source)))
4287 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4289 if (generate)
4291 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4293 emit_constant_insn (cond,
4294 gen_rtx_SET (VOIDmode, sub,
4295 GEN_INT (val)));
4296 emit_constant_insn (cond,
4297 gen_rtx_SET (VOIDmode, target,
4298 gen_rtx_fmt_ee (code, mode,
4299 source, sub)));
4301 return 2;
4305 if (code == XOR)
4306 break;
4308 /* Convert.
4309 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4310 and the remainder 0s for e.g. 0xfff00000)
4311 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4313 This can be done in 2 instructions by using shifts with mov or mvn.
4314 e.g. for
4315 x = x | 0xfff00000;
4316 we generate.
4317 mvn r0, r0, asl #12
4318 mvn r0, r0, lsr #12 */
4319 if (set_sign_bit_copies > 8
4320 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4322 if (generate)
4324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4325 rtx shift = GEN_INT (set_sign_bit_copies);
4327 emit_constant_insn
4328 (cond,
4329 gen_rtx_SET (VOIDmode, sub,
4330 gen_rtx_NOT (mode,
4331 gen_rtx_ASHIFT (mode,
4332 source,
4333 shift))));
4334 emit_constant_insn
4335 (cond,
4336 gen_rtx_SET (VOIDmode, target,
4337 gen_rtx_NOT (mode,
4338 gen_rtx_LSHIFTRT (mode, sub,
4339 shift))));
4341 return 2;
4344 /* Convert
4345 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4347 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4349 For eg. r0 = r0 | 0xfff
4350 mvn r0, r0, lsr #12
4351 mvn r0, r0, asl #12
4354 if (set_zero_bit_copies > 8
4355 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4357 if (generate)
4359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4360 rtx shift = GEN_INT (set_zero_bit_copies);
4362 emit_constant_insn
4363 (cond,
4364 gen_rtx_SET (VOIDmode, sub,
4365 gen_rtx_NOT (mode,
4366 gen_rtx_LSHIFTRT (mode,
4367 source,
4368 shift))));
4369 emit_constant_insn
4370 (cond,
4371 gen_rtx_SET (VOIDmode, target,
4372 gen_rtx_NOT (mode,
4373 gen_rtx_ASHIFT (mode, sub,
4374 shift))));
4376 return 2;
4379 /* This will never be reached for Thumb2 because orn is a valid
4380 instruction. This is for Thumb1 and the ARM 32 bit cases.
4382 x = y | constant (such that ~constant is a valid constant)
4383 Transform this to
4384 x = ~(~y & ~constant).
4386 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4388 if (generate)
4390 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4391 emit_constant_insn (cond,
4392 gen_rtx_SET (VOIDmode, sub,
4393 gen_rtx_NOT (mode, source)));
4394 source = sub;
4395 if (subtargets)
4396 sub = gen_reg_rtx (mode);
4397 emit_constant_insn (cond,
4398 gen_rtx_SET (VOIDmode, sub,
4399 gen_rtx_AND (mode, source,
4400 GEN_INT (temp1))));
4401 emit_constant_insn (cond,
4402 gen_rtx_SET (VOIDmode, target,
4403 gen_rtx_NOT (mode, sub)));
4405 return 3;
4407 break;
4409 case AND:
4410 /* See if two shifts will do 2 or more insn's worth of work. */
4411 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4413 HOST_WIDE_INT shift_mask = ((0xffffffff
4414 << (32 - clear_sign_bit_copies))
4415 & 0xffffffff);
4417 if ((remainder | shift_mask) != 0xffffffff)
4419 if (generate)
4421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4422 insns = arm_gen_constant (AND, mode, cond,
4423 remainder | shift_mask,
4424 new_src, source, subtargets, 1);
4425 source = new_src;
4427 else
4429 rtx targ = subtargets ? NULL_RTX : target;
4430 insns = arm_gen_constant (AND, mode, cond,
4431 remainder | shift_mask,
4432 targ, source, subtargets, 0);
4436 if (generate)
4438 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4439 rtx shift = GEN_INT (clear_sign_bit_copies);
4441 emit_insn (gen_ashlsi3 (new_src, source, shift));
4442 emit_insn (gen_lshrsi3 (target, new_src, shift));
4445 return insns + 2;
4448 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4450 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4452 if ((remainder | shift_mask) != 0xffffffff)
4454 if (generate)
4456 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4458 insns = arm_gen_constant (AND, mode, cond,
4459 remainder | shift_mask,
4460 new_src, source, subtargets, 1);
4461 source = new_src;
4463 else
4465 rtx targ = subtargets ? NULL_RTX : target;
4467 insns = arm_gen_constant (AND, mode, cond,
4468 remainder | shift_mask,
4469 targ, source, subtargets, 0);
4473 if (generate)
4475 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4476 rtx shift = GEN_INT (clear_zero_bit_copies);
4478 emit_insn (gen_lshrsi3 (new_src, source, shift));
4479 emit_insn (gen_ashlsi3 (target, new_src, shift));
4482 return insns + 2;
4485 break;
4487 default:
4488 break;
4491 /* Calculate what the instruction sequences would be if we generated it
4492 normally, negated, or inverted. */
4493 if (code == AND)
4494 /* AND cannot be split into multiple insns, so invert and use BIC. */
4495 insns = 99;
4496 else
4497 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4499 if (can_negate)
4500 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4501 &neg_immediates);
4502 else
4503 neg_insns = 99;
4505 if (can_invert || final_invert)
4506 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4507 &inv_immediates);
4508 else
4509 inv_insns = 99;
4511 immediates = &pos_immediates;
4513 /* Is the negated immediate sequence more efficient? */
4514 if (neg_insns < insns && neg_insns <= inv_insns)
4516 insns = neg_insns;
4517 immediates = &neg_immediates;
4519 else
4520 can_negate = 0;
4522 /* Is the inverted immediate sequence more efficient?
4523 We must allow for an extra NOT instruction for XOR operations, although
4524 there is some chance that the final 'mvn' will get optimized later. */
4525 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4527 insns = inv_insns;
4528 immediates = &inv_immediates;
4530 else
4532 can_invert = 0;
4533 final_invert = 0;
4536 /* Now output the chosen sequence as instructions. */
4537 if (generate)
4539 for (i = 0; i < insns; i++)
4541 rtx new_src, temp1_rtx;
4543 temp1 = immediates->i[i];
4545 if (code == SET || code == MINUS)
4546 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4547 else if ((final_invert || i < (insns - 1)) && subtargets)
4548 new_src = gen_reg_rtx (mode);
4549 else
4550 new_src = target;
4552 if (can_invert)
4553 temp1 = ~temp1;
4554 else if (can_negate)
4555 temp1 = -temp1;
4557 temp1 = trunc_int_for_mode (temp1, mode);
4558 temp1_rtx = GEN_INT (temp1);
4560 if (code == SET)
4562 else if (code == MINUS)
4563 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4564 else
4565 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4567 emit_constant_insn (cond,
4568 gen_rtx_SET (VOIDmode, new_src,
4569 temp1_rtx));
4570 source = new_src;
4572 if (code == SET)
4574 can_negate = can_invert;
4575 can_invert = 0;
4576 code = PLUS;
4578 else if (code == MINUS)
4579 code = PLUS;
4583 if (final_invert)
4585 if (generate)
4586 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4587 gen_rtx_NOT (mode, source)));
4588 insns++;
4591 return insns;
4594 /* Canonicalize a comparison so that we are more likely to recognize it.
4595 This can be done for a few constant compares, where we can make the
4596 immediate value easier to load. */
4598 static void
4599 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4600 bool op0_preserve_value)
4602 machine_mode mode;
4603 unsigned HOST_WIDE_INT i, maxval;
4605 mode = GET_MODE (*op0);
4606 if (mode == VOIDmode)
4607 mode = GET_MODE (*op1);
4609 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4611 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4612 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4613 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4614 for GTU/LEU in Thumb mode. */
4615 if (mode == DImode)
4617 rtx tem;
4619 if (*code == GT || *code == LE
4620 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4622 /* Missing comparison. First try to use an available
4623 comparison. */
4624 if (CONST_INT_P (*op1))
4626 i = INTVAL (*op1);
4627 switch (*code)
4629 case GT:
4630 case LE:
4631 if (i != maxval
4632 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4634 *op1 = GEN_INT (i + 1);
4635 *code = *code == GT ? GE : LT;
4636 return;
4638 break;
4639 case GTU:
4640 case LEU:
4641 if (i != ~((unsigned HOST_WIDE_INT) 0)
4642 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4644 *op1 = GEN_INT (i + 1);
4645 *code = *code == GTU ? GEU : LTU;
4646 return;
4648 break;
4649 default:
4650 gcc_unreachable ();
4654 /* If that did not work, reverse the condition. */
4655 if (!op0_preserve_value)
4657 tem = *op0;
4658 *op0 = *op1;
4659 *op1 = tem;
4660 *code = (int)swap_condition ((enum rtx_code)*code);
4663 return;
4666 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4667 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4668 to facilitate possible combining with a cmp into 'ands'. */
4669 if (mode == SImode
4670 && GET_CODE (*op0) == ZERO_EXTEND
4671 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4672 && GET_MODE (XEXP (*op0, 0)) == QImode
4673 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4674 && subreg_lowpart_p (XEXP (*op0, 0))
4675 && *op1 == const0_rtx)
4676 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4677 GEN_INT (255));
4679 /* Comparisons smaller than DImode. Only adjust comparisons against
4680 an out-of-range constant. */
4681 if (!CONST_INT_P (*op1)
4682 || const_ok_for_arm (INTVAL (*op1))
4683 || const_ok_for_arm (- INTVAL (*op1)))
4684 return;
4686 i = INTVAL (*op1);
4688 switch (*code)
4690 case EQ:
4691 case NE:
4692 return;
4694 case GT:
4695 case LE:
4696 if (i != maxval
4697 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4699 *op1 = GEN_INT (i + 1);
4700 *code = *code == GT ? GE : LT;
4701 return;
4703 break;
4705 case GE:
4706 case LT:
4707 if (i != ~maxval
4708 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4710 *op1 = GEN_INT (i - 1);
4711 *code = *code == GE ? GT : LE;
4712 return;
4714 break;
4716 case GTU:
4717 case LEU:
4718 if (i != ~((unsigned HOST_WIDE_INT) 0)
4719 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4721 *op1 = GEN_INT (i + 1);
4722 *code = *code == GTU ? GEU : LTU;
4723 return;
4725 break;
4727 case GEU:
4728 case LTU:
4729 if (i != 0
4730 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4732 *op1 = GEN_INT (i - 1);
4733 *code = *code == GEU ? GTU : LEU;
4734 return;
4736 break;
4738 default:
4739 gcc_unreachable ();
4744 /* Define how to find the value returned by a function. */
4746 static rtx
4747 arm_function_value(const_tree type, const_tree func,
4748 bool outgoing ATTRIBUTE_UNUSED)
4750 machine_mode mode;
4751 int unsignedp ATTRIBUTE_UNUSED;
4752 rtx r ATTRIBUTE_UNUSED;
4754 mode = TYPE_MODE (type);
4756 if (TARGET_AAPCS_BASED)
4757 return aapcs_allocate_return_reg (mode, type, func);
4759 /* Promote integer types. */
4760 if (INTEGRAL_TYPE_P (type))
4761 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4763 /* Promotes small structs returned in a register to full-word size
4764 for big-endian AAPCS. */
4765 if (arm_return_in_msb (type))
4767 HOST_WIDE_INT size = int_size_in_bytes (type);
4768 if (size % UNITS_PER_WORD != 0)
4770 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4771 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4775 return arm_libcall_value_1 (mode);
4778 /* libcall hashtable helpers. */
4780 struct libcall_hasher : typed_noop_remove <rtx_def>
4782 typedef rtx_def value_type;
4783 typedef rtx_def compare_type;
4784 static inline hashval_t hash (const value_type *);
4785 static inline bool equal (const value_type *, const compare_type *);
4786 static inline void remove (value_type *);
4789 inline bool
4790 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4792 return rtx_equal_p (p1, p2);
4795 inline hashval_t
4796 libcall_hasher::hash (const value_type *p1)
4798 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4801 typedef hash_table<libcall_hasher> libcall_table_type;
4803 static void
4804 add_libcall (libcall_table_type *htab, rtx libcall)
4806 *htab->find_slot (libcall, INSERT) = libcall;
4809 static bool
4810 arm_libcall_uses_aapcs_base (const_rtx libcall)
4812 static bool init_done = false;
4813 static libcall_table_type *libcall_htab = NULL;
4815 if (!init_done)
4817 init_done = true;
4819 libcall_htab = new libcall_table_type (31);
4820 add_libcall (libcall_htab,
4821 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4822 add_libcall (libcall_htab,
4823 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4824 add_libcall (libcall_htab,
4825 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4826 add_libcall (libcall_htab,
4827 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4829 add_libcall (libcall_htab,
4830 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4831 add_libcall (libcall_htab,
4832 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4833 add_libcall (libcall_htab,
4834 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4835 add_libcall (libcall_htab,
4836 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4838 add_libcall (libcall_htab,
4839 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4840 add_libcall (libcall_htab,
4841 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4842 add_libcall (libcall_htab,
4843 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4844 add_libcall (libcall_htab,
4845 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4846 add_libcall (libcall_htab,
4847 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4848 add_libcall (libcall_htab,
4849 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4850 add_libcall (libcall_htab,
4851 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4852 add_libcall (libcall_htab,
4853 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4855 /* Values from double-precision helper functions are returned in core
4856 registers if the selected core only supports single-precision
4857 arithmetic, even if we are using the hard-float ABI. The same is
4858 true for single-precision helpers, but we will never be using the
4859 hard-float ABI on a CPU which doesn't support single-precision
4860 operations in hardware. */
4861 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4862 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4863 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4864 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4865 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4866 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4867 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4868 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4869 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4870 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4871 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4872 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4873 SFmode));
4874 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4875 DFmode));
4878 return libcall && libcall_htab->find (libcall) != NULL;
4881 static rtx
4882 arm_libcall_value_1 (machine_mode mode)
4884 if (TARGET_AAPCS_BASED)
4885 return aapcs_libcall_value (mode);
4886 else if (TARGET_IWMMXT_ABI
4887 && arm_vector_mode_supported_p (mode))
4888 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4889 else
4890 return gen_rtx_REG (mode, ARG_REGISTER (1));
4893 /* Define how to find the value returned by a library function
4894 assuming the value has mode MODE. */
4896 static rtx
4897 arm_libcall_value (machine_mode mode, const_rtx libcall)
4899 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4900 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4902 /* The following libcalls return their result in integer registers,
4903 even though they return a floating point value. */
4904 if (arm_libcall_uses_aapcs_base (libcall))
4905 return gen_rtx_REG (mode, ARG_REGISTER(1));
4909 return arm_libcall_value_1 (mode);
4912 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4914 static bool
4915 arm_function_value_regno_p (const unsigned int regno)
4917 if (regno == ARG_REGISTER (1)
4918 || (TARGET_32BIT
4919 && TARGET_AAPCS_BASED
4920 && TARGET_VFP
4921 && TARGET_HARD_FLOAT
4922 && regno == FIRST_VFP_REGNUM)
4923 || (TARGET_IWMMXT_ABI
4924 && regno == FIRST_IWMMXT_REGNUM))
4925 return true;
4927 return false;
4930 /* Determine the amount of memory needed to store the possible return
4931 registers of an untyped call. */
4933 arm_apply_result_size (void)
4935 int size = 16;
4937 if (TARGET_32BIT)
4939 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4940 size += 32;
4941 if (TARGET_IWMMXT_ABI)
4942 size += 8;
4945 return size;
4948 /* Decide whether TYPE should be returned in memory (true)
4949 or in a register (false). FNTYPE is the type of the function making
4950 the call. */
4951 static bool
4952 arm_return_in_memory (const_tree type, const_tree fntype)
4954 HOST_WIDE_INT size;
4956 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4958 if (TARGET_AAPCS_BASED)
4960 /* Simple, non-aggregate types (ie not including vectors and
4961 complex) are always returned in a register (or registers).
4962 We don't care about which register here, so we can short-cut
4963 some of the detail. */
4964 if (!AGGREGATE_TYPE_P (type)
4965 && TREE_CODE (type) != VECTOR_TYPE
4966 && TREE_CODE (type) != COMPLEX_TYPE)
4967 return false;
4969 /* Any return value that is no larger than one word can be
4970 returned in r0. */
4971 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4972 return false;
4974 /* Check any available co-processors to see if they accept the
4975 type as a register candidate (VFP, for example, can return
4976 some aggregates in consecutive registers). These aren't
4977 available if the call is variadic. */
4978 if (aapcs_select_return_coproc (type, fntype) >= 0)
4979 return false;
4981 /* Vector values should be returned using ARM registers, not
4982 memory (unless they're over 16 bytes, which will break since
4983 we only have four call-clobbered registers to play with). */
4984 if (TREE_CODE (type) == VECTOR_TYPE)
4985 return (size < 0 || size > (4 * UNITS_PER_WORD));
4987 /* The rest go in memory. */
4988 return true;
4991 if (TREE_CODE (type) == VECTOR_TYPE)
4992 return (size < 0 || size > (4 * UNITS_PER_WORD));
4994 if (!AGGREGATE_TYPE_P (type) &&
4995 (TREE_CODE (type) != VECTOR_TYPE))
4996 /* All simple types are returned in registers. */
4997 return false;
4999 if (arm_abi != ARM_ABI_APCS)
5001 /* ATPCS and later return aggregate types in memory only if they are
5002 larger than a word (or are variable size). */
5003 return (size < 0 || size > UNITS_PER_WORD);
5006 /* For the arm-wince targets we choose to be compatible with Microsoft's
5007 ARM and Thumb compilers, which always return aggregates in memory. */
5008 #ifndef ARM_WINCE
5009 /* All structures/unions bigger than one word are returned in memory.
5010 Also catch the case where int_size_in_bytes returns -1. In this case
5011 the aggregate is either huge or of variable size, and in either case
5012 we will want to return it via memory and not in a register. */
5013 if (size < 0 || size > UNITS_PER_WORD)
5014 return true;
5016 if (TREE_CODE (type) == RECORD_TYPE)
5018 tree field;
5020 /* For a struct the APCS says that we only return in a register
5021 if the type is 'integer like' and every addressable element
5022 has an offset of zero. For practical purposes this means
5023 that the structure can have at most one non bit-field element
5024 and that this element must be the first one in the structure. */
5026 /* Find the first field, ignoring non FIELD_DECL things which will
5027 have been created by C++. */
5028 for (field = TYPE_FIELDS (type);
5029 field && TREE_CODE (field) != FIELD_DECL;
5030 field = DECL_CHAIN (field))
5031 continue;
5033 if (field == NULL)
5034 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5036 /* Check that the first field is valid for returning in a register. */
5038 /* ... Floats are not allowed */
5039 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5040 return true;
5042 /* ... Aggregates that are not themselves valid for returning in
5043 a register are not allowed. */
5044 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5045 return true;
5047 /* Now check the remaining fields, if any. Only bitfields are allowed,
5048 since they are not addressable. */
5049 for (field = DECL_CHAIN (field);
5050 field;
5051 field = DECL_CHAIN (field))
5053 if (TREE_CODE (field) != FIELD_DECL)
5054 continue;
5056 if (!DECL_BIT_FIELD_TYPE (field))
5057 return true;
5060 return false;
5063 if (TREE_CODE (type) == UNION_TYPE)
5065 tree field;
5067 /* Unions can be returned in registers if every element is
5068 integral, or can be returned in an integer register. */
5069 for (field = TYPE_FIELDS (type);
5070 field;
5071 field = DECL_CHAIN (field))
5073 if (TREE_CODE (field) != FIELD_DECL)
5074 continue;
5076 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5077 return true;
5079 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5080 return true;
5083 return false;
5085 #endif /* not ARM_WINCE */
5087 /* Return all other types in memory. */
5088 return true;
5091 const struct pcs_attribute_arg
5093 const char *arg;
5094 enum arm_pcs value;
5095 } pcs_attribute_args[] =
5097 {"aapcs", ARM_PCS_AAPCS},
5098 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5099 #if 0
5100 /* We could recognize these, but changes would be needed elsewhere
5101 * to implement them. */
5102 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5103 {"atpcs", ARM_PCS_ATPCS},
5104 {"apcs", ARM_PCS_APCS},
5105 #endif
5106 {NULL, ARM_PCS_UNKNOWN}
5109 static enum arm_pcs
5110 arm_pcs_from_attribute (tree attr)
5112 const struct pcs_attribute_arg *ptr;
5113 const char *arg;
5115 /* Get the value of the argument. */
5116 if (TREE_VALUE (attr) == NULL_TREE
5117 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5118 return ARM_PCS_UNKNOWN;
5120 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5122 /* Check it against the list of known arguments. */
5123 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5124 if (streq (arg, ptr->arg))
5125 return ptr->value;
5127 /* An unrecognized interrupt type. */
5128 return ARM_PCS_UNKNOWN;
5131 /* Get the PCS variant to use for this call. TYPE is the function's type
5132 specification, DECL is the specific declartion. DECL may be null if
5133 the call could be indirect or if this is a library call. */
5134 static enum arm_pcs
5135 arm_get_pcs_model (const_tree type, const_tree decl)
5137 bool user_convention = false;
5138 enum arm_pcs user_pcs = arm_pcs_default;
5139 tree attr;
5141 gcc_assert (type);
5143 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5144 if (attr)
5146 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5147 user_convention = true;
5150 if (TARGET_AAPCS_BASED)
5152 /* Detect varargs functions. These always use the base rules
5153 (no argument is ever a candidate for a co-processor
5154 register). */
5155 bool base_rules = stdarg_p (type);
5157 if (user_convention)
5159 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5160 sorry ("non-AAPCS derived PCS variant");
5161 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5162 error ("variadic functions must use the base AAPCS variant");
5165 if (base_rules)
5166 return ARM_PCS_AAPCS;
5167 else if (user_convention)
5168 return user_pcs;
5169 else if (decl && flag_unit_at_a_time)
5171 /* Local functions never leak outside this compilation unit,
5172 so we are free to use whatever conventions are
5173 appropriate. */
5174 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5175 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5176 if (i && i->local)
5177 return ARM_PCS_AAPCS_LOCAL;
5180 else if (user_convention && user_pcs != arm_pcs_default)
5181 sorry ("PCS variant");
5183 /* For everything else we use the target's default. */
5184 return arm_pcs_default;
5188 static void
5189 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5190 const_tree fntype ATTRIBUTE_UNUSED,
5191 rtx libcall ATTRIBUTE_UNUSED,
5192 const_tree fndecl ATTRIBUTE_UNUSED)
5194 /* Record the unallocated VFP registers. */
5195 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5196 pcum->aapcs_vfp_reg_alloc = 0;
5199 /* Walk down the type tree of TYPE counting consecutive base elements.
5200 If *MODEP is VOIDmode, then set it to the first valid floating point
5201 type. If a non-floating point type is found, or if a floating point
5202 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5203 otherwise return the count in the sub-tree. */
5204 static int
5205 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5207 machine_mode mode;
5208 HOST_WIDE_INT size;
5210 switch (TREE_CODE (type))
5212 case REAL_TYPE:
5213 mode = TYPE_MODE (type);
5214 if (mode != DFmode && mode != SFmode)
5215 return -1;
5217 if (*modep == VOIDmode)
5218 *modep = mode;
5220 if (*modep == mode)
5221 return 1;
5223 break;
5225 case COMPLEX_TYPE:
5226 mode = TYPE_MODE (TREE_TYPE (type));
5227 if (mode != DFmode && mode != SFmode)
5228 return -1;
5230 if (*modep == VOIDmode)
5231 *modep = mode;
5233 if (*modep == mode)
5234 return 2;
5236 break;
5238 case VECTOR_TYPE:
5239 /* Use V2SImode and V4SImode as representatives of all 64-bit
5240 and 128-bit vector types, whether or not those modes are
5241 supported with the present options. */
5242 size = int_size_in_bytes (type);
5243 switch (size)
5245 case 8:
5246 mode = V2SImode;
5247 break;
5248 case 16:
5249 mode = V4SImode;
5250 break;
5251 default:
5252 return -1;
5255 if (*modep == VOIDmode)
5256 *modep = mode;
5258 /* Vector modes are considered to be opaque: two vectors are
5259 equivalent for the purposes of being homogeneous aggregates
5260 if they are the same size. */
5261 if (*modep == mode)
5262 return 1;
5264 break;
5266 case ARRAY_TYPE:
5268 int count;
5269 tree index = TYPE_DOMAIN (type);
5271 /* Can't handle incomplete types nor sizes that are not
5272 fixed. */
5273 if (!COMPLETE_TYPE_P (type)
5274 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5275 return -1;
5277 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5278 if (count == -1
5279 || !index
5280 || !TYPE_MAX_VALUE (index)
5281 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5282 || !TYPE_MIN_VALUE (index)
5283 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5284 || count < 0)
5285 return -1;
5287 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5288 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5290 /* There must be no padding. */
5291 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5292 return -1;
5294 return count;
5297 case RECORD_TYPE:
5299 int count = 0;
5300 int sub_count;
5301 tree field;
5303 /* Can't handle incomplete types nor sizes that are not
5304 fixed. */
5305 if (!COMPLETE_TYPE_P (type)
5306 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5307 return -1;
5309 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5311 if (TREE_CODE (field) != FIELD_DECL)
5312 continue;
5314 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5315 if (sub_count < 0)
5316 return -1;
5317 count += sub_count;
5320 /* There must be no padding. */
5321 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5322 return -1;
5324 return count;
5327 case UNION_TYPE:
5328 case QUAL_UNION_TYPE:
5330 /* These aren't very interesting except in a degenerate case. */
5331 int count = 0;
5332 int sub_count;
5333 tree field;
5335 /* Can't handle incomplete types nor sizes that are not
5336 fixed. */
5337 if (!COMPLETE_TYPE_P (type)
5338 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5339 return -1;
5341 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5343 if (TREE_CODE (field) != FIELD_DECL)
5344 continue;
5346 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5347 if (sub_count < 0)
5348 return -1;
5349 count = count > sub_count ? count : sub_count;
5352 /* There must be no padding. */
5353 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5354 return -1;
5356 return count;
5359 default:
5360 break;
5363 return -1;
5366 /* Return true if PCS_VARIANT should use VFP registers. */
5367 static bool
5368 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5370 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5372 static bool seen_thumb1_vfp = false;
5374 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5376 sorry ("Thumb-1 hard-float VFP ABI");
5377 /* sorry() is not immediately fatal, so only display this once. */
5378 seen_thumb1_vfp = true;
5381 return true;
5384 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5385 return false;
5387 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5388 (TARGET_VFP_DOUBLE || !is_double));
5391 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5392 suitable for passing or returning in VFP registers for the PCS
5393 variant selected. If it is, then *BASE_MODE is updated to contain
5394 a machine mode describing each element of the argument's type and
5395 *COUNT to hold the number of such elements. */
5396 static bool
5397 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5398 machine_mode mode, const_tree type,
5399 machine_mode *base_mode, int *count)
5401 machine_mode new_mode = VOIDmode;
5403 /* If we have the type information, prefer that to working things
5404 out from the mode. */
5405 if (type)
5407 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5409 if (ag_count > 0 && ag_count <= 4)
5410 *count = ag_count;
5411 else
5412 return false;
5414 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5415 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5416 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5418 *count = 1;
5419 new_mode = mode;
5421 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5423 *count = 2;
5424 new_mode = (mode == DCmode ? DFmode : SFmode);
5426 else
5427 return false;
5430 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5431 return false;
5433 *base_mode = new_mode;
5434 return true;
5437 static bool
5438 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5439 machine_mode mode, const_tree type)
5441 int count ATTRIBUTE_UNUSED;
5442 machine_mode ag_mode ATTRIBUTE_UNUSED;
5444 if (!use_vfp_abi (pcs_variant, false))
5445 return false;
5446 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5447 &ag_mode, &count);
5450 static bool
5451 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5452 const_tree type)
5454 if (!use_vfp_abi (pcum->pcs_variant, false))
5455 return false;
5457 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5458 &pcum->aapcs_vfp_rmode,
5459 &pcum->aapcs_vfp_rcount);
5462 static bool
5463 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5464 const_tree type ATTRIBUTE_UNUSED)
5466 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5467 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5468 int regno;
5470 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5471 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5473 pcum->aapcs_vfp_reg_alloc = mask << regno;
5474 if (mode == BLKmode
5475 || (mode == TImode && ! TARGET_NEON)
5476 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5478 int i;
5479 int rcount = pcum->aapcs_vfp_rcount;
5480 int rshift = shift;
5481 machine_mode rmode = pcum->aapcs_vfp_rmode;
5482 rtx par;
5483 if (!TARGET_NEON)
5485 /* Avoid using unsupported vector modes. */
5486 if (rmode == V2SImode)
5487 rmode = DImode;
5488 else if (rmode == V4SImode)
5490 rmode = DImode;
5491 rcount *= 2;
5492 rshift /= 2;
5495 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5496 for (i = 0; i < rcount; i++)
5498 rtx tmp = gen_rtx_REG (rmode,
5499 FIRST_VFP_REGNUM + regno + i * rshift);
5500 tmp = gen_rtx_EXPR_LIST
5501 (VOIDmode, tmp,
5502 GEN_INT (i * GET_MODE_SIZE (rmode)));
5503 XVECEXP (par, 0, i) = tmp;
5506 pcum->aapcs_reg = par;
5508 else
5509 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5510 return true;
5512 return false;
5515 static rtx
5516 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5517 machine_mode mode,
5518 const_tree type ATTRIBUTE_UNUSED)
5520 if (!use_vfp_abi (pcs_variant, false))
5521 return NULL;
5523 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5525 int count;
5526 machine_mode ag_mode;
5527 int i;
5528 rtx par;
5529 int shift;
5531 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5532 &ag_mode, &count);
5534 if (!TARGET_NEON)
5536 if (ag_mode == V2SImode)
5537 ag_mode = DImode;
5538 else if (ag_mode == V4SImode)
5540 ag_mode = DImode;
5541 count *= 2;
5544 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5545 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5546 for (i = 0; i < count; i++)
5548 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5549 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5550 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5551 XVECEXP (par, 0, i) = tmp;
5554 return par;
5557 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5560 static void
5561 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5562 machine_mode mode ATTRIBUTE_UNUSED,
5563 const_tree type ATTRIBUTE_UNUSED)
5565 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5566 pcum->aapcs_vfp_reg_alloc = 0;
5567 return;
5570 #define AAPCS_CP(X) \
5572 aapcs_ ## X ## _cum_init, \
5573 aapcs_ ## X ## _is_call_candidate, \
5574 aapcs_ ## X ## _allocate, \
5575 aapcs_ ## X ## _is_return_candidate, \
5576 aapcs_ ## X ## _allocate_return_reg, \
5577 aapcs_ ## X ## _advance \
5580 /* Table of co-processors that can be used to pass arguments in
5581 registers. Idealy no arugment should be a candidate for more than
5582 one co-processor table entry, but the table is processed in order
5583 and stops after the first match. If that entry then fails to put
5584 the argument into a co-processor register, the argument will go on
5585 the stack. */
5586 static struct
5588 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5589 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5591 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5592 BLKmode) is a candidate for this co-processor's registers; this
5593 function should ignore any position-dependent state in
5594 CUMULATIVE_ARGS and only use call-type dependent information. */
5595 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5597 /* Return true if the argument does get a co-processor register; it
5598 should set aapcs_reg to an RTX of the register allocated as is
5599 required for a return from FUNCTION_ARG. */
5600 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5602 /* Return true if a result of mode MODE (or type TYPE if MODE is
5603 BLKmode) is can be returned in this co-processor's registers. */
5604 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5606 /* Allocate and return an RTX element to hold the return type of a
5607 call, this routine must not fail and will only be called if
5608 is_return_candidate returned true with the same parameters. */
5609 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5611 /* Finish processing this argument and prepare to start processing
5612 the next one. */
5613 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5614 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5616 AAPCS_CP(vfp)
5619 #undef AAPCS_CP
5621 static int
5622 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5623 const_tree type)
5625 int i;
5627 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5628 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5629 return i;
5631 return -1;
5634 static int
5635 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5637 /* We aren't passed a decl, so we can't check that a call is local.
5638 However, it isn't clear that that would be a win anyway, since it
5639 might limit some tail-calling opportunities. */
5640 enum arm_pcs pcs_variant;
5642 if (fntype)
5644 const_tree fndecl = NULL_TREE;
5646 if (TREE_CODE (fntype) == FUNCTION_DECL)
5648 fndecl = fntype;
5649 fntype = TREE_TYPE (fntype);
5652 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5654 else
5655 pcs_variant = arm_pcs_default;
5657 if (pcs_variant != ARM_PCS_AAPCS)
5659 int i;
5661 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5662 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5663 TYPE_MODE (type),
5664 type))
5665 return i;
5667 return -1;
5670 static rtx
5671 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5672 const_tree fntype)
5674 /* We aren't passed a decl, so we can't check that a call is local.
5675 However, it isn't clear that that would be a win anyway, since it
5676 might limit some tail-calling opportunities. */
5677 enum arm_pcs pcs_variant;
5678 int unsignedp ATTRIBUTE_UNUSED;
5680 if (fntype)
5682 const_tree fndecl = NULL_TREE;
5684 if (TREE_CODE (fntype) == FUNCTION_DECL)
5686 fndecl = fntype;
5687 fntype = TREE_TYPE (fntype);
5690 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5692 else
5693 pcs_variant = arm_pcs_default;
5695 /* Promote integer types. */
5696 if (type && INTEGRAL_TYPE_P (type))
5697 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5699 if (pcs_variant != ARM_PCS_AAPCS)
5701 int i;
5703 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5704 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5705 type))
5706 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5707 mode, type);
5710 /* Promotes small structs returned in a register to full-word size
5711 for big-endian AAPCS. */
5712 if (type && arm_return_in_msb (type))
5714 HOST_WIDE_INT size = int_size_in_bytes (type);
5715 if (size % UNITS_PER_WORD != 0)
5717 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5718 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5722 return gen_rtx_REG (mode, R0_REGNUM);
5725 static rtx
5726 aapcs_libcall_value (machine_mode mode)
5728 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5729 && GET_MODE_SIZE (mode) <= 4)
5730 mode = SImode;
5732 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5735 /* Lay out a function argument using the AAPCS rules. The rule
5736 numbers referred to here are those in the AAPCS. */
5737 static void
5738 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5739 const_tree type, bool named)
5741 int nregs, nregs2;
5742 int ncrn;
5744 /* We only need to do this once per argument. */
5745 if (pcum->aapcs_arg_processed)
5746 return;
5748 pcum->aapcs_arg_processed = true;
5750 /* Special case: if named is false then we are handling an incoming
5751 anonymous argument which is on the stack. */
5752 if (!named)
5753 return;
5755 /* Is this a potential co-processor register candidate? */
5756 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5758 int slot = aapcs_select_call_coproc (pcum, mode, type);
5759 pcum->aapcs_cprc_slot = slot;
5761 /* We don't have to apply any of the rules from part B of the
5762 preparation phase, these are handled elsewhere in the
5763 compiler. */
5765 if (slot >= 0)
5767 /* A Co-processor register candidate goes either in its own
5768 class of registers or on the stack. */
5769 if (!pcum->aapcs_cprc_failed[slot])
5771 /* C1.cp - Try to allocate the argument to co-processor
5772 registers. */
5773 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5774 return;
5776 /* C2.cp - Put the argument on the stack and note that we
5777 can't assign any more candidates in this slot. We also
5778 need to note that we have allocated stack space, so that
5779 we won't later try to split a non-cprc candidate between
5780 core registers and the stack. */
5781 pcum->aapcs_cprc_failed[slot] = true;
5782 pcum->can_split = false;
5785 /* We didn't get a register, so this argument goes on the
5786 stack. */
5787 gcc_assert (pcum->can_split == false);
5788 return;
5792 /* C3 - For double-word aligned arguments, round the NCRN up to the
5793 next even number. */
5794 ncrn = pcum->aapcs_ncrn;
5795 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5796 ncrn++;
5798 nregs = ARM_NUM_REGS2(mode, type);
5800 /* Sigh, this test should really assert that nregs > 0, but a GCC
5801 extension allows empty structs and then gives them empty size; it
5802 then allows such a structure to be passed by value. For some of
5803 the code below we have to pretend that such an argument has
5804 non-zero size so that we 'locate' it correctly either in
5805 registers or on the stack. */
5806 gcc_assert (nregs >= 0);
5808 nregs2 = nregs ? nregs : 1;
5810 /* C4 - Argument fits entirely in core registers. */
5811 if (ncrn + nregs2 <= NUM_ARG_REGS)
5813 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5814 pcum->aapcs_next_ncrn = ncrn + nregs;
5815 return;
5818 /* C5 - Some core registers left and there are no arguments already
5819 on the stack: split this argument between the remaining core
5820 registers and the stack. */
5821 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5823 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5824 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5825 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5826 return;
5829 /* C6 - NCRN is set to 4. */
5830 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5832 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5833 return;
5836 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5837 for a call to a function whose data type is FNTYPE.
5838 For a library call, FNTYPE is NULL. */
5839 void
5840 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5841 rtx libname,
5842 tree fndecl ATTRIBUTE_UNUSED)
5844 /* Long call handling. */
5845 if (fntype)
5846 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5847 else
5848 pcum->pcs_variant = arm_pcs_default;
5850 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5852 if (arm_libcall_uses_aapcs_base (libname))
5853 pcum->pcs_variant = ARM_PCS_AAPCS;
5855 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5856 pcum->aapcs_reg = NULL_RTX;
5857 pcum->aapcs_partial = 0;
5858 pcum->aapcs_arg_processed = false;
5859 pcum->aapcs_cprc_slot = -1;
5860 pcum->can_split = true;
5862 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5864 int i;
5866 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5868 pcum->aapcs_cprc_failed[i] = false;
5869 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5872 return;
5875 /* Legacy ABIs */
5877 /* On the ARM, the offset starts at 0. */
5878 pcum->nregs = 0;
5879 pcum->iwmmxt_nregs = 0;
5880 pcum->can_split = true;
5882 /* Varargs vectors are treated the same as long long.
5883 named_count avoids having to change the way arm handles 'named' */
5884 pcum->named_count = 0;
5885 pcum->nargs = 0;
5887 if (TARGET_REALLY_IWMMXT && fntype)
5889 tree fn_arg;
5891 for (fn_arg = TYPE_ARG_TYPES (fntype);
5892 fn_arg;
5893 fn_arg = TREE_CHAIN (fn_arg))
5894 pcum->named_count += 1;
5896 if (! pcum->named_count)
5897 pcum->named_count = INT_MAX;
5901 /* Return true if we use LRA instead of reload pass. */
5902 static bool
5903 arm_lra_p (void)
5905 return arm_lra_flag;
5908 /* Return true if mode/type need doubleword alignment. */
5909 static bool
5910 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5912 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5913 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5917 /* Determine where to put an argument to a function.
5918 Value is zero to push the argument on the stack,
5919 or a hard register in which to store the argument.
5921 MODE is the argument's machine mode.
5922 TYPE is the data type of the argument (as a tree).
5923 This is null for libcalls where that information may
5924 not be available.
5925 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5926 the preceding args and about the function being called.
5927 NAMED is nonzero if this argument is a named parameter
5928 (otherwise it is an extra parameter matching an ellipsis).
5930 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5931 other arguments are passed on the stack. If (NAMED == 0) (which happens
5932 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5933 defined), say it is passed in the stack (function_prologue will
5934 indeed make it pass in the stack if necessary). */
5936 static rtx
5937 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5938 const_tree type, bool named)
5940 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5941 int nregs;
5943 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5944 a call insn (op3 of a call_value insn). */
5945 if (mode == VOIDmode)
5946 return const0_rtx;
5948 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5950 aapcs_layout_arg (pcum, mode, type, named);
5951 return pcum->aapcs_reg;
5954 /* Varargs vectors are treated the same as long long.
5955 named_count avoids having to change the way arm handles 'named' */
5956 if (TARGET_IWMMXT_ABI
5957 && arm_vector_mode_supported_p (mode)
5958 && pcum->named_count > pcum->nargs + 1)
5960 if (pcum->iwmmxt_nregs <= 9)
5961 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5962 else
5964 pcum->can_split = false;
5965 return NULL_RTX;
5969 /* Put doubleword aligned quantities in even register pairs. */
5970 if (pcum->nregs & 1
5971 && ARM_DOUBLEWORD_ALIGN
5972 && arm_needs_doubleword_align (mode, type))
5973 pcum->nregs++;
5975 /* Only allow splitting an arg between regs and memory if all preceding
5976 args were allocated to regs. For args passed by reference we only count
5977 the reference pointer. */
5978 if (pcum->can_split)
5979 nregs = 1;
5980 else
5981 nregs = ARM_NUM_REGS2 (mode, type);
5983 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5984 return NULL_RTX;
5986 return gen_rtx_REG (mode, pcum->nregs);
5989 static unsigned int
5990 arm_function_arg_boundary (machine_mode mode, const_tree type)
5992 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5993 ? DOUBLEWORD_ALIGNMENT
5994 : PARM_BOUNDARY);
5997 static int
5998 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
5999 tree type, bool named)
6001 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6002 int nregs = pcum->nregs;
6004 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6006 aapcs_layout_arg (pcum, mode, type, named);
6007 return pcum->aapcs_partial;
6010 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6011 return 0;
6013 if (NUM_ARG_REGS > nregs
6014 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6015 && pcum->can_split)
6016 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6018 return 0;
6021 /* Update the data in PCUM to advance over an argument
6022 of mode MODE and data type TYPE.
6023 (TYPE is null for libcalls where that information may not be available.) */
6025 static void
6026 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6027 const_tree type, bool named)
6029 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6031 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6033 aapcs_layout_arg (pcum, mode, type, named);
6035 if (pcum->aapcs_cprc_slot >= 0)
6037 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6038 type);
6039 pcum->aapcs_cprc_slot = -1;
6042 /* Generic stuff. */
6043 pcum->aapcs_arg_processed = false;
6044 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6045 pcum->aapcs_reg = NULL_RTX;
6046 pcum->aapcs_partial = 0;
6048 else
6050 pcum->nargs += 1;
6051 if (arm_vector_mode_supported_p (mode)
6052 && pcum->named_count > pcum->nargs
6053 && TARGET_IWMMXT_ABI)
6054 pcum->iwmmxt_nregs += 1;
6055 else
6056 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6060 /* Variable sized types are passed by reference. This is a GCC
6061 extension to the ARM ABI. */
6063 static bool
6064 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6065 machine_mode mode ATTRIBUTE_UNUSED,
6066 const_tree type, bool named ATTRIBUTE_UNUSED)
6068 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6071 /* Encode the current state of the #pragma [no_]long_calls. */
6072 typedef enum
6074 OFF, /* No #pragma [no_]long_calls is in effect. */
6075 LONG, /* #pragma long_calls is in effect. */
6076 SHORT /* #pragma no_long_calls is in effect. */
6077 } arm_pragma_enum;
6079 static arm_pragma_enum arm_pragma_long_calls = OFF;
6081 void
6082 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6084 arm_pragma_long_calls = LONG;
6087 void
6088 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6090 arm_pragma_long_calls = SHORT;
6093 void
6094 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6096 arm_pragma_long_calls = OFF;
6099 /* Handle an attribute requiring a FUNCTION_DECL;
6100 arguments as in struct attribute_spec.handler. */
6101 static tree
6102 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6103 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6105 if (TREE_CODE (*node) != FUNCTION_DECL)
6107 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6108 name);
6109 *no_add_attrs = true;
6112 return NULL_TREE;
6115 /* Handle an "interrupt" or "isr" attribute;
6116 arguments as in struct attribute_spec.handler. */
6117 static tree
6118 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6119 bool *no_add_attrs)
6121 if (DECL_P (*node))
6123 if (TREE_CODE (*node) != FUNCTION_DECL)
6125 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6126 name);
6127 *no_add_attrs = true;
6129 /* FIXME: the argument if any is checked for type attributes;
6130 should it be checked for decl ones? */
6132 else
6134 if (TREE_CODE (*node) == FUNCTION_TYPE
6135 || TREE_CODE (*node) == METHOD_TYPE)
6137 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6139 warning (OPT_Wattributes, "%qE attribute ignored",
6140 name);
6141 *no_add_attrs = true;
6144 else if (TREE_CODE (*node) == POINTER_TYPE
6145 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6146 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6147 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6149 *node = build_variant_type_copy (*node);
6150 TREE_TYPE (*node) = build_type_attribute_variant
6151 (TREE_TYPE (*node),
6152 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6153 *no_add_attrs = true;
6155 else
6157 /* Possibly pass this attribute on from the type to a decl. */
6158 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6159 | (int) ATTR_FLAG_FUNCTION_NEXT
6160 | (int) ATTR_FLAG_ARRAY_NEXT))
6162 *no_add_attrs = true;
6163 return tree_cons (name, args, NULL_TREE);
6165 else
6167 warning (OPT_Wattributes, "%qE attribute ignored",
6168 name);
6173 return NULL_TREE;
6176 /* Handle a "pcs" attribute; arguments as in struct
6177 attribute_spec.handler. */
6178 static tree
6179 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6180 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6182 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6184 warning (OPT_Wattributes, "%qE attribute ignored", name);
6185 *no_add_attrs = true;
6187 return NULL_TREE;
6190 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6191 /* Handle the "notshared" attribute. This attribute is another way of
6192 requesting hidden visibility. ARM's compiler supports
6193 "__declspec(notshared)"; we support the same thing via an
6194 attribute. */
6196 static tree
6197 arm_handle_notshared_attribute (tree *node,
6198 tree name ATTRIBUTE_UNUSED,
6199 tree args ATTRIBUTE_UNUSED,
6200 int flags ATTRIBUTE_UNUSED,
6201 bool *no_add_attrs)
6203 tree decl = TYPE_NAME (*node);
6205 if (decl)
6207 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6208 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6209 *no_add_attrs = false;
6211 return NULL_TREE;
6213 #endif
6215 /* Return 0 if the attributes for two types are incompatible, 1 if they
6216 are compatible, and 2 if they are nearly compatible (which causes a
6217 warning to be generated). */
6218 static int
6219 arm_comp_type_attributes (const_tree type1, const_tree type2)
6221 int l1, l2, s1, s2;
6223 /* Check for mismatch of non-default calling convention. */
6224 if (TREE_CODE (type1) != FUNCTION_TYPE)
6225 return 1;
6227 /* Check for mismatched call attributes. */
6228 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6229 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6230 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6231 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6233 /* Only bother to check if an attribute is defined. */
6234 if (l1 | l2 | s1 | s2)
6236 /* If one type has an attribute, the other must have the same attribute. */
6237 if ((l1 != l2) || (s1 != s2))
6238 return 0;
6240 /* Disallow mixed attributes. */
6241 if ((l1 & s2) || (l2 & s1))
6242 return 0;
6245 /* Check for mismatched ISR attribute. */
6246 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6247 if (! l1)
6248 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6249 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6250 if (! l2)
6251 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6252 if (l1 != l2)
6253 return 0;
6255 return 1;
6258 /* Assigns default attributes to newly defined type. This is used to
6259 set short_call/long_call attributes for function types of
6260 functions defined inside corresponding #pragma scopes. */
6261 static void
6262 arm_set_default_type_attributes (tree type)
6264 /* Add __attribute__ ((long_call)) to all functions, when
6265 inside #pragma long_calls or __attribute__ ((short_call)),
6266 when inside #pragma no_long_calls. */
6267 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6269 tree type_attr_list, attr_name;
6270 type_attr_list = TYPE_ATTRIBUTES (type);
6272 if (arm_pragma_long_calls == LONG)
6273 attr_name = get_identifier ("long_call");
6274 else if (arm_pragma_long_calls == SHORT)
6275 attr_name = get_identifier ("short_call");
6276 else
6277 return;
6279 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6280 TYPE_ATTRIBUTES (type) = type_attr_list;
6284 /* Return true if DECL is known to be linked into section SECTION. */
6286 static bool
6287 arm_function_in_section_p (tree decl, section *section)
6289 /* We can only be certain about functions defined in the same
6290 compilation unit. */
6291 if (!TREE_STATIC (decl))
6292 return false;
6294 /* Make sure that SYMBOL always binds to the definition in this
6295 compilation unit. */
6296 if (!targetm.binds_local_p (decl))
6297 return false;
6299 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6300 if (!DECL_SECTION_NAME (decl))
6302 /* Make sure that we will not create a unique section for DECL. */
6303 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6304 return false;
6307 return function_section (decl) == section;
6310 /* Return nonzero if a 32-bit "long_call" should be generated for
6311 a call from the current function to DECL. We generate a long_call
6312 if the function:
6314 a. has an __attribute__((long call))
6315 or b. is within the scope of a #pragma long_calls
6316 or c. the -mlong-calls command line switch has been specified
6318 However we do not generate a long call if the function:
6320 d. has an __attribute__ ((short_call))
6321 or e. is inside the scope of a #pragma no_long_calls
6322 or f. is defined in the same section as the current function. */
6324 bool
6325 arm_is_long_call_p (tree decl)
6327 tree attrs;
6329 if (!decl)
6330 return TARGET_LONG_CALLS;
6332 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6333 if (lookup_attribute ("short_call", attrs))
6334 return false;
6336 /* For "f", be conservative, and only cater for cases in which the
6337 whole of the current function is placed in the same section. */
6338 if (!flag_reorder_blocks_and_partition
6339 && TREE_CODE (decl) == FUNCTION_DECL
6340 && arm_function_in_section_p (decl, current_function_section ()))
6341 return false;
6343 if (lookup_attribute ("long_call", attrs))
6344 return true;
6346 return TARGET_LONG_CALLS;
6349 /* Return nonzero if it is ok to make a tail-call to DECL. */
6350 static bool
6351 arm_function_ok_for_sibcall (tree decl, tree exp)
6353 unsigned long func_type;
6355 if (cfun->machine->sibcall_blocked)
6356 return false;
6358 /* Never tailcall something if we are generating code for Thumb-1. */
6359 if (TARGET_THUMB1)
6360 return false;
6362 /* The PIC register is live on entry to VxWorks PLT entries, so we
6363 must make the call before restoring the PIC register. */
6364 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6365 return false;
6367 /* If we are interworking and the function is not declared static
6368 then we can't tail-call it unless we know that it exists in this
6369 compilation unit (since it might be a Thumb routine). */
6370 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6371 && !TREE_ASM_WRITTEN (decl))
6372 return false;
6374 func_type = arm_current_func_type ();
6375 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6376 if (IS_INTERRUPT (func_type))
6377 return false;
6379 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6381 /* Check that the return value locations are the same. For
6382 example that we aren't returning a value from the sibling in
6383 a VFP register but then need to transfer it to a core
6384 register. */
6385 rtx a, b;
6387 a = arm_function_value (TREE_TYPE (exp), decl, false);
6388 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6389 cfun->decl, false);
6390 if (!rtx_equal_p (a, b))
6391 return false;
6394 /* Never tailcall if function may be called with a misaligned SP. */
6395 if (IS_STACKALIGN (func_type))
6396 return false;
6398 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6399 references should become a NOP. Don't convert such calls into
6400 sibling calls. */
6401 if (TARGET_AAPCS_BASED
6402 && arm_abi == ARM_ABI_AAPCS
6403 && decl
6404 && DECL_WEAK (decl))
6405 return false;
6407 /* Everything else is ok. */
6408 return true;
6412 /* Addressing mode support functions. */
6414 /* Return nonzero if X is a legitimate immediate operand when compiling
6415 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6417 legitimate_pic_operand_p (rtx x)
6419 if (GET_CODE (x) == SYMBOL_REF
6420 || (GET_CODE (x) == CONST
6421 && GET_CODE (XEXP (x, 0)) == PLUS
6422 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6423 return 0;
6425 return 1;
6428 /* Record that the current function needs a PIC register. Initialize
6429 cfun->machine->pic_reg if we have not already done so. */
6431 static void
6432 require_pic_register (void)
6434 /* A lot of the logic here is made obscure by the fact that this
6435 routine gets called as part of the rtx cost estimation process.
6436 We don't want those calls to affect any assumptions about the real
6437 function; and further, we can't call entry_of_function() until we
6438 start the real expansion process. */
6439 if (!crtl->uses_pic_offset_table)
6441 gcc_assert (can_create_pseudo_p ());
6442 if (arm_pic_register != INVALID_REGNUM
6443 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6445 if (!cfun->machine->pic_reg)
6446 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6448 /* Play games to avoid marking the function as needing pic
6449 if we are being called as part of the cost-estimation
6450 process. */
6451 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6452 crtl->uses_pic_offset_table = 1;
6454 else
6456 rtx_insn *seq, *insn;
6458 if (!cfun->machine->pic_reg)
6459 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6461 /* Play games to avoid marking the function as needing pic
6462 if we are being called as part of the cost-estimation
6463 process. */
6464 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6466 crtl->uses_pic_offset_table = 1;
6467 start_sequence ();
6469 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6470 && arm_pic_register > LAST_LO_REGNUM)
6471 emit_move_insn (cfun->machine->pic_reg,
6472 gen_rtx_REG (Pmode, arm_pic_register));
6473 else
6474 arm_load_pic_register (0UL);
6476 seq = get_insns ();
6477 end_sequence ();
6479 for (insn = seq; insn; insn = NEXT_INSN (insn))
6480 if (INSN_P (insn))
6481 INSN_LOCATION (insn) = prologue_location;
6483 /* We can be called during expansion of PHI nodes, where
6484 we can't yet emit instructions directly in the final
6485 insn stream. Queue the insns on the entry edge, they will
6486 be committed after everything else is expanded. */
6487 insert_insn_on_edge (seq,
6488 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6495 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6497 if (GET_CODE (orig) == SYMBOL_REF
6498 || GET_CODE (orig) == LABEL_REF)
6500 rtx insn;
6502 if (reg == 0)
6504 gcc_assert (can_create_pseudo_p ());
6505 reg = gen_reg_rtx (Pmode);
6508 /* VxWorks does not impose a fixed gap between segments; the run-time
6509 gap can be different from the object-file gap. We therefore can't
6510 use GOTOFF unless we are absolutely sure that the symbol is in the
6511 same segment as the GOT. Unfortunately, the flexibility of linker
6512 scripts means that we can't be sure of that in general, so assume
6513 that GOTOFF is never valid on VxWorks. */
6514 if ((GET_CODE (orig) == LABEL_REF
6515 || (GET_CODE (orig) == SYMBOL_REF &&
6516 SYMBOL_REF_LOCAL_P (orig)))
6517 && NEED_GOT_RELOC
6518 && arm_pic_data_is_text_relative)
6519 insn = arm_pic_static_addr (orig, reg);
6520 else
6522 rtx pat;
6523 rtx mem;
6525 /* If this function doesn't have a pic register, create one now. */
6526 require_pic_register ();
6528 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6530 /* Make the MEM as close to a constant as possible. */
6531 mem = SET_SRC (pat);
6532 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6533 MEM_READONLY_P (mem) = 1;
6534 MEM_NOTRAP_P (mem) = 1;
6536 insn = emit_insn (pat);
6539 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6540 by loop. */
6541 set_unique_reg_note (insn, REG_EQUAL, orig);
6543 return reg;
6545 else if (GET_CODE (orig) == CONST)
6547 rtx base, offset;
6549 if (GET_CODE (XEXP (orig, 0)) == PLUS
6550 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6551 return orig;
6553 /* Handle the case where we have: const (UNSPEC_TLS). */
6554 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6555 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6556 return orig;
6558 /* Handle the case where we have:
6559 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6560 CONST_INT. */
6561 if (GET_CODE (XEXP (orig, 0)) == PLUS
6562 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6563 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6565 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6566 return orig;
6569 if (reg == 0)
6571 gcc_assert (can_create_pseudo_p ());
6572 reg = gen_reg_rtx (Pmode);
6575 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6577 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6578 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6579 base == reg ? 0 : reg);
6581 if (CONST_INT_P (offset))
6583 /* The base register doesn't really matter, we only want to
6584 test the index for the appropriate mode. */
6585 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6587 gcc_assert (can_create_pseudo_p ());
6588 offset = force_reg (Pmode, offset);
6591 if (CONST_INT_P (offset))
6592 return plus_constant (Pmode, base, INTVAL (offset));
6595 if (GET_MODE_SIZE (mode) > 4
6596 && (GET_MODE_CLASS (mode) == MODE_INT
6597 || TARGET_SOFT_FLOAT))
6599 emit_insn (gen_addsi3 (reg, base, offset));
6600 return reg;
6603 return gen_rtx_PLUS (Pmode, base, offset);
6606 return orig;
6610 /* Find a spare register to use during the prolog of a function. */
6612 static int
6613 thumb_find_work_register (unsigned long pushed_regs_mask)
6615 int reg;
6617 /* Check the argument registers first as these are call-used. The
6618 register allocation order means that sometimes r3 might be used
6619 but earlier argument registers might not, so check them all. */
6620 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6621 if (!df_regs_ever_live_p (reg))
6622 return reg;
6624 /* Before going on to check the call-saved registers we can try a couple
6625 more ways of deducing that r3 is available. The first is when we are
6626 pushing anonymous arguments onto the stack and we have less than 4
6627 registers worth of fixed arguments(*). In this case r3 will be part of
6628 the variable argument list and so we can be sure that it will be
6629 pushed right at the start of the function. Hence it will be available
6630 for the rest of the prologue.
6631 (*): ie crtl->args.pretend_args_size is greater than 0. */
6632 if (cfun->machine->uses_anonymous_args
6633 && crtl->args.pretend_args_size > 0)
6634 return LAST_ARG_REGNUM;
6636 /* The other case is when we have fixed arguments but less than 4 registers
6637 worth. In this case r3 might be used in the body of the function, but
6638 it is not being used to convey an argument into the function. In theory
6639 we could just check crtl->args.size to see how many bytes are
6640 being passed in argument registers, but it seems that it is unreliable.
6641 Sometimes it will have the value 0 when in fact arguments are being
6642 passed. (See testcase execute/20021111-1.c for an example). So we also
6643 check the args_info.nregs field as well. The problem with this field is
6644 that it makes no allowances for arguments that are passed to the
6645 function but which are not used. Hence we could miss an opportunity
6646 when a function has an unused argument in r3. But it is better to be
6647 safe than to be sorry. */
6648 if (! cfun->machine->uses_anonymous_args
6649 && crtl->args.size >= 0
6650 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6651 && (TARGET_AAPCS_BASED
6652 ? crtl->args.info.aapcs_ncrn < 4
6653 : crtl->args.info.nregs < 4))
6654 return LAST_ARG_REGNUM;
6656 /* Otherwise look for a call-saved register that is going to be pushed. */
6657 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6658 if (pushed_regs_mask & (1 << reg))
6659 return reg;
6661 if (TARGET_THUMB2)
6663 /* Thumb-2 can use high regs. */
6664 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6665 if (pushed_regs_mask & (1 << reg))
6666 return reg;
6668 /* Something went wrong - thumb_compute_save_reg_mask()
6669 should have arranged for a suitable register to be pushed. */
6670 gcc_unreachable ();
6673 static GTY(()) int pic_labelno;
6675 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6676 low register. */
6678 void
6679 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6681 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6683 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6684 return;
6686 gcc_assert (flag_pic);
6688 pic_reg = cfun->machine->pic_reg;
6689 if (TARGET_VXWORKS_RTP)
6691 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6692 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6693 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6695 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6697 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6698 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6700 else
6702 /* We use an UNSPEC rather than a LABEL_REF because this label
6703 never appears in the code stream. */
6705 labelno = GEN_INT (pic_labelno++);
6706 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6707 l1 = gen_rtx_CONST (VOIDmode, l1);
6709 /* On the ARM the PC register contains 'dot + 8' at the time of the
6710 addition, on the Thumb it is 'dot + 4'. */
6711 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6712 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6713 UNSPEC_GOTSYM_OFF);
6714 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6716 if (TARGET_32BIT)
6718 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6720 else /* TARGET_THUMB1 */
6722 if (arm_pic_register != INVALID_REGNUM
6723 && REGNO (pic_reg) > LAST_LO_REGNUM)
6725 /* We will have pushed the pic register, so we should always be
6726 able to find a work register. */
6727 pic_tmp = gen_rtx_REG (SImode,
6728 thumb_find_work_register (saved_regs));
6729 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6730 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6731 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6733 else if (arm_pic_register != INVALID_REGNUM
6734 && arm_pic_register > LAST_LO_REGNUM
6735 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6737 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6738 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6739 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6741 else
6742 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6746 /* Need to emit this whether or not we obey regdecls,
6747 since setjmp/longjmp can cause life info to screw up. */
6748 emit_use (pic_reg);
6751 /* Generate code to load the address of a static var when flag_pic is set. */
6752 static rtx
6753 arm_pic_static_addr (rtx orig, rtx reg)
6755 rtx l1, labelno, offset_rtx, insn;
6757 gcc_assert (flag_pic);
6759 /* We use an UNSPEC rather than a LABEL_REF because this label
6760 never appears in the code stream. */
6761 labelno = GEN_INT (pic_labelno++);
6762 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6763 l1 = gen_rtx_CONST (VOIDmode, l1);
6765 /* On the ARM the PC register contains 'dot + 8' at the time of the
6766 addition, on the Thumb it is 'dot + 4'. */
6767 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6768 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6769 UNSPEC_SYMBOL_OFFSET);
6770 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6772 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6773 return insn;
6776 /* Return nonzero if X is valid as an ARM state addressing register. */
6777 static int
6778 arm_address_register_rtx_p (rtx x, int strict_p)
6780 int regno;
6782 if (!REG_P (x))
6783 return 0;
6785 regno = REGNO (x);
6787 if (strict_p)
6788 return ARM_REGNO_OK_FOR_BASE_P (regno);
6790 return (regno <= LAST_ARM_REGNUM
6791 || regno >= FIRST_PSEUDO_REGISTER
6792 || regno == FRAME_POINTER_REGNUM
6793 || regno == ARG_POINTER_REGNUM);
6796 /* Return TRUE if this rtx is the difference of a symbol and a label,
6797 and will reduce to a PC-relative relocation in the object file.
6798 Expressions like this can be left alone when generating PIC, rather
6799 than forced through the GOT. */
6800 static int
6801 pcrel_constant_p (rtx x)
6803 if (GET_CODE (x) == MINUS)
6804 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6806 return FALSE;
6809 /* Return true if X will surely end up in an index register after next
6810 splitting pass. */
6811 static bool
6812 will_be_in_index_register (const_rtx x)
6814 /* arm.md: calculate_pic_address will split this into a register. */
6815 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6818 /* Return nonzero if X is a valid ARM state address operand. */
6820 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6821 int strict_p)
6823 bool use_ldrd;
6824 enum rtx_code code = GET_CODE (x);
6826 if (arm_address_register_rtx_p (x, strict_p))
6827 return 1;
6829 use_ldrd = (TARGET_LDRD
6830 && (mode == DImode
6831 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6833 if (code == POST_INC || code == PRE_DEC
6834 || ((code == PRE_INC || code == POST_DEC)
6835 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6836 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6838 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6839 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6840 && GET_CODE (XEXP (x, 1)) == PLUS
6841 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6843 rtx addend = XEXP (XEXP (x, 1), 1);
6845 /* Don't allow ldrd post increment by register because it's hard
6846 to fixup invalid register choices. */
6847 if (use_ldrd
6848 && GET_CODE (x) == POST_MODIFY
6849 && REG_P (addend))
6850 return 0;
6852 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6853 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6856 /* After reload constants split into minipools will have addresses
6857 from a LABEL_REF. */
6858 else if (reload_completed
6859 && (code == LABEL_REF
6860 || (code == CONST
6861 && GET_CODE (XEXP (x, 0)) == PLUS
6862 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6863 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6864 return 1;
6866 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6867 return 0;
6869 else if (code == PLUS)
6871 rtx xop0 = XEXP (x, 0);
6872 rtx xop1 = XEXP (x, 1);
6874 return ((arm_address_register_rtx_p (xop0, strict_p)
6875 && ((CONST_INT_P (xop1)
6876 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6877 || (!strict_p && will_be_in_index_register (xop1))))
6878 || (arm_address_register_rtx_p (xop1, strict_p)
6879 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6882 #if 0
6883 /* Reload currently can't handle MINUS, so disable this for now */
6884 else if (GET_CODE (x) == MINUS)
6886 rtx xop0 = XEXP (x, 0);
6887 rtx xop1 = XEXP (x, 1);
6889 return (arm_address_register_rtx_p (xop0, strict_p)
6890 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6892 #endif
6894 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6895 && code == SYMBOL_REF
6896 && CONSTANT_POOL_ADDRESS_P (x)
6897 && ! (flag_pic
6898 && symbol_mentioned_p (get_pool_constant (x))
6899 && ! pcrel_constant_p (get_pool_constant (x))))
6900 return 1;
6902 return 0;
6905 /* Return nonzero if X is a valid Thumb-2 address operand. */
6906 static int
6907 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6909 bool use_ldrd;
6910 enum rtx_code code = GET_CODE (x);
6912 if (arm_address_register_rtx_p (x, strict_p))
6913 return 1;
6915 use_ldrd = (TARGET_LDRD
6916 && (mode == DImode
6917 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6919 if (code == POST_INC || code == PRE_DEC
6920 || ((code == PRE_INC || code == POST_DEC)
6921 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6922 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6924 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6925 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6926 && GET_CODE (XEXP (x, 1)) == PLUS
6927 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6929 /* Thumb-2 only has autoincrement by constant. */
6930 rtx addend = XEXP (XEXP (x, 1), 1);
6931 HOST_WIDE_INT offset;
6933 if (!CONST_INT_P (addend))
6934 return 0;
6936 offset = INTVAL(addend);
6937 if (GET_MODE_SIZE (mode) <= 4)
6938 return (offset > -256 && offset < 256);
6940 return (use_ldrd && offset > -1024 && offset < 1024
6941 && (offset & 3) == 0);
6944 /* After reload constants split into minipools will have addresses
6945 from a LABEL_REF. */
6946 else if (reload_completed
6947 && (code == LABEL_REF
6948 || (code == CONST
6949 && GET_CODE (XEXP (x, 0)) == PLUS
6950 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6951 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6952 return 1;
6954 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6955 return 0;
6957 else if (code == PLUS)
6959 rtx xop0 = XEXP (x, 0);
6960 rtx xop1 = XEXP (x, 1);
6962 return ((arm_address_register_rtx_p (xop0, strict_p)
6963 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6964 || (!strict_p && will_be_in_index_register (xop1))))
6965 || (arm_address_register_rtx_p (xop1, strict_p)
6966 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6969 /* Normally we can assign constant values to target registers without
6970 the help of constant pool. But there are cases we have to use constant
6971 pool like:
6972 1) assign a label to register.
6973 2) sign-extend a 8bit value to 32bit and then assign to register.
6975 Constant pool access in format:
6976 (set (reg r0) (mem (symbol_ref (".LC0"))))
6977 will cause the use of literal pool (later in function arm_reorg).
6978 So here we mark such format as an invalid format, then the compiler
6979 will adjust it into:
6980 (set (reg r0) (symbol_ref (".LC0")))
6981 (set (reg r0) (mem (reg r0))).
6982 No extra register is required, and (mem (reg r0)) won't cause the use
6983 of literal pools. */
6984 else if (arm_disable_literal_pool && code == SYMBOL_REF
6985 && CONSTANT_POOL_ADDRESS_P (x))
6986 return 0;
6988 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6989 && code == SYMBOL_REF
6990 && CONSTANT_POOL_ADDRESS_P (x)
6991 && ! (flag_pic
6992 && symbol_mentioned_p (get_pool_constant (x))
6993 && ! pcrel_constant_p (get_pool_constant (x))))
6994 return 1;
6996 return 0;
6999 /* Return nonzero if INDEX is valid for an address index operand in
7000 ARM state. */
7001 static int
7002 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7003 int strict_p)
7005 HOST_WIDE_INT range;
7006 enum rtx_code code = GET_CODE (index);
7008 /* Standard coprocessor addressing modes. */
7009 if (TARGET_HARD_FLOAT
7010 && TARGET_VFP
7011 && (mode == SFmode || mode == DFmode))
7012 return (code == CONST_INT && INTVAL (index) < 1024
7013 && INTVAL (index) > -1024
7014 && (INTVAL (index) & 3) == 0);
7016 /* For quad modes, we restrict the constant offset to be slightly less
7017 than what the instruction format permits. We do this because for
7018 quad mode moves, we will actually decompose them into two separate
7019 double-mode reads or writes. INDEX must therefore be a valid
7020 (double-mode) offset and so should INDEX+8. */
7021 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7022 return (code == CONST_INT
7023 && INTVAL (index) < 1016
7024 && INTVAL (index) > -1024
7025 && (INTVAL (index) & 3) == 0);
7027 /* We have no such constraint on double mode offsets, so we permit the
7028 full range of the instruction format. */
7029 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7030 return (code == CONST_INT
7031 && INTVAL (index) < 1024
7032 && INTVAL (index) > -1024
7033 && (INTVAL (index) & 3) == 0);
7035 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7036 return (code == CONST_INT
7037 && INTVAL (index) < 1024
7038 && INTVAL (index) > -1024
7039 && (INTVAL (index) & 3) == 0);
7041 if (arm_address_register_rtx_p (index, strict_p)
7042 && (GET_MODE_SIZE (mode) <= 4))
7043 return 1;
7045 if (mode == DImode || mode == DFmode)
7047 if (code == CONST_INT)
7049 HOST_WIDE_INT val = INTVAL (index);
7051 if (TARGET_LDRD)
7052 return val > -256 && val < 256;
7053 else
7054 return val > -4096 && val < 4092;
7057 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7060 if (GET_MODE_SIZE (mode) <= 4
7061 && ! (arm_arch4
7062 && (mode == HImode
7063 || mode == HFmode
7064 || (mode == QImode && outer == SIGN_EXTEND))))
7066 if (code == MULT)
7068 rtx xiop0 = XEXP (index, 0);
7069 rtx xiop1 = XEXP (index, 1);
7071 return ((arm_address_register_rtx_p (xiop0, strict_p)
7072 && power_of_two_operand (xiop1, SImode))
7073 || (arm_address_register_rtx_p (xiop1, strict_p)
7074 && power_of_two_operand (xiop0, SImode)));
7076 else if (code == LSHIFTRT || code == ASHIFTRT
7077 || code == ASHIFT || code == ROTATERT)
7079 rtx op = XEXP (index, 1);
7081 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7082 && CONST_INT_P (op)
7083 && INTVAL (op) > 0
7084 && INTVAL (op) <= 31);
7088 /* For ARM v4 we may be doing a sign-extend operation during the
7089 load. */
7090 if (arm_arch4)
7092 if (mode == HImode
7093 || mode == HFmode
7094 || (outer == SIGN_EXTEND && mode == QImode))
7095 range = 256;
7096 else
7097 range = 4096;
7099 else
7100 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7102 return (code == CONST_INT
7103 && INTVAL (index) < range
7104 && INTVAL (index) > -range);
7107 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7108 index operand. i.e. 1, 2, 4 or 8. */
7109 static bool
7110 thumb2_index_mul_operand (rtx op)
7112 HOST_WIDE_INT val;
7114 if (!CONST_INT_P (op))
7115 return false;
7117 val = INTVAL(op);
7118 return (val == 1 || val == 2 || val == 4 || val == 8);
7121 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7122 static int
7123 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7125 enum rtx_code code = GET_CODE (index);
7127 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7128 /* Standard coprocessor addressing modes. */
7129 if (TARGET_HARD_FLOAT
7130 && TARGET_VFP
7131 && (mode == SFmode || mode == DFmode))
7132 return (code == CONST_INT && INTVAL (index) < 1024
7133 /* Thumb-2 allows only > -256 index range for it's core register
7134 load/stores. Since we allow SF/DF in core registers, we have
7135 to use the intersection between -256~4096 (core) and -1024~1024
7136 (coprocessor). */
7137 && INTVAL (index) > -256
7138 && (INTVAL (index) & 3) == 0);
7140 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7142 /* For DImode assume values will usually live in core regs
7143 and only allow LDRD addressing modes. */
7144 if (!TARGET_LDRD || mode != DImode)
7145 return (code == CONST_INT
7146 && INTVAL (index) < 1024
7147 && INTVAL (index) > -1024
7148 && (INTVAL (index) & 3) == 0);
7151 /* For quad modes, we restrict the constant offset to be slightly less
7152 than what the instruction format permits. We do this because for
7153 quad mode moves, we will actually decompose them into two separate
7154 double-mode reads or writes. INDEX must therefore be a valid
7155 (double-mode) offset and so should INDEX+8. */
7156 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7157 return (code == CONST_INT
7158 && INTVAL (index) < 1016
7159 && INTVAL (index) > -1024
7160 && (INTVAL (index) & 3) == 0);
7162 /* We have no such constraint on double mode offsets, so we permit the
7163 full range of the instruction format. */
7164 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7165 return (code == CONST_INT
7166 && INTVAL (index) < 1024
7167 && INTVAL (index) > -1024
7168 && (INTVAL (index) & 3) == 0);
7170 if (arm_address_register_rtx_p (index, strict_p)
7171 && (GET_MODE_SIZE (mode) <= 4))
7172 return 1;
7174 if (mode == DImode || mode == DFmode)
7176 if (code == CONST_INT)
7178 HOST_WIDE_INT val = INTVAL (index);
7179 /* ??? Can we assume ldrd for thumb2? */
7180 /* Thumb-2 ldrd only has reg+const addressing modes. */
7181 /* ldrd supports offsets of +-1020.
7182 However the ldr fallback does not. */
7183 return val > -256 && val < 256 && (val & 3) == 0;
7185 else
7186 return 0;
7189 if (code == MULT)
7191 rtx xiop0 = XEXP (index, 0);
7192 rtx xiop1 = XEXP (index, 1);
7194 return ((arm_address_register_rtx_p (xiop0, strict_p)
7195 && thumb2_index_mul_operand (xiop1))
7196 || (arm_address_register_rtx_p (xiop1, strict_p)
7197 && thumb2_index_mul_operand (xiop0)));
7199 else if (code == ASHIFT)
7201 rtx op = XEXP (index, 1);
7203 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7204 && CONST_INT_P (op)
7205 && INTVAL (op) > 0
7206 && INTVAL (op) <= 3);
7209 return (code == CONST_INT
7210 && INTVAL (index) < 4096
7211 && INTVAL (index) > -256);
7214 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7215 static int
7216 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7218 int regno;
7220 if (!REG_P (x))
7221 return 0;
7223 regno = REGNO (x);
7225 if (strict_p)
7226 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7228 return (regno <= LAST_LO_REGNUM
7229 || regno > LAST_VIRTUAL_REGISTER
7230 || regno == FRAME_POINTER_REGNUM
7231 || (GET_MODE_SIZE (mode) >= 4
7232 && (regno == STACK_POINTER_REGNUM
7233 || regno >= FIRST_PSEUDO_REGISTER
7234 || x == hard_frame_pointer_rtx
7235 || x == arg_pointer_rtx)));
7238 /* Return nonzero if x is a legitimate index register. This is the case
7239 for any base register that can access a QImode object. */
7240 inline static int
7241 thumb1_index_register_rtx_p (rtx x, int strict_p)
7243 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7246 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7248 The AP may be eliminated to either the SP or the FP, so we use the
7249 least common denominator, e.g. SImode, and offsets from 0 to 64.
7251 ??? Verify whether the above is the right approach.
7253 ??? Also, the FP may be eliminated to the SP, so perhaps that
7254 needs special handling also.
7256 ??? Look at how the mips16 port solves this problem. It probably uses
7257 better ways to solve some of these problems.
7259 Although it is not incorrect, we don't accept QImode and HImode
7260 addresses based on the frame pointer or arg pointer until the
7261 reload pass starts. This is so that eliminating such addresses
7262 into stack based ones won't produce impossible code. */
7264 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7266 /* ??? Not clear if this is right. Experiment. */
7267 if (GET_MODE_SIZE (mode) < 4
7268 && !(reload_in_progress || reload_completed)
7269 && (reg_mentioned_p (frame_pointer_rtx, x)
7270 || reg_mentioned_p (arg_pointer_rtx, x)
7271 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7272 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7273 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7274 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7275 return 0;
7277 /* Accept any base register. SP only in SImode or larger. */
7278 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7279 return 1;
7281 /* This is PC relative data before arm_reorg runs. */
7282 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7283 && GET_CODE (x) == SYMBOL_REF
7284 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7285 return 1;
7287 /* This is PC relative data after arm_reorg runs. */
7288 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7289 && reload_completed
7290 && (GET_CODE (x) == LABEL_REF
7291 || (GET_CODE (x) == CONST
7292 && GET_CODE (XEXP (x, 0)) == PLUS
7293 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7294 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7295 return 1;
7297 /* Post-inc indexing only supported for SImode and larger. */
7298 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7299 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7300 return 1;
7302 else if (GET_CODE (x) == PLUS)
7304 /* REG+REG address can be any two index registers. */
7305 /* We disallow FRAME+REG addressing since we know that FRAME
7306 will be replaced with STACK, and SP relative addressing only
7307 permits SP+OFFSET. */
7308 if (GET_MODE_SIZE (mode) <= 4
7309 && XEXP (x, 0) != frame_pointer_rtx
7310 && XEXP (x, 1) != frame_pointer_rtx
7311 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7312 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7313 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7314 return 1;
7316 /* REG+const has 5-7 bit offset for non-SP registers. */
7317 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7318 || XEXP (x, 0) == arg_pointer_rtx)
7319 && CONST_INT_P (XEXP (x, 1))
7320 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7321 return 1;
7323 /* REG+const has 10-bit offset for SP, but only SImode and
7324 larger is supported. */
7325 /* ??? Should probably check for DI/DFmode overflow here
7326 just like GO_IF_LEGITIMATE_OFFSET does. */
7327 else if (REG_P (XEXP (x, 0))
7328 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7329 && GET_MODE_SIZE (mode) >= 4
7330 && CONST_INT_P (XEXP (x, 1))
7331 && INTVAL (XEXP (x, 1)) >= 0
7332 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7333 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7334 return 1;
7336 else if (REG_P (XEXP (x, 0))
7337 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7338 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7339 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7340 && REGNO (XEXP (x, 0))
7341 <= LAST_VIRTUAL_POINTER_REGISTER))
7342 && GET_MODE_SIZE (mode) >= 4
7343 && CONST_INT_P (XEXP (x, 1))
7344 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7345 return 1;
7348 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7349 && GET_MODE_SIZE (mode) == 4
7350 && GET_CODE (x) == SYMBOL_REF
7351 && CONSTANT_POOL_ADDRESS_P (x)
7352 && ! (flag_pic
7353 && symbol_mentioned_p (get_pool_constant (x))
7354 && ! pcrel_constant_p (get_pool_constant (x))))
7355 return 1;
7357 return 0;
7360 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7361 instruction of mode MODE. */
7363 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7365 switch (GET_MODE_SIZE (mode))
7367 case 1:
7368 return val >= 0 && val < 32;
7370 case 2:
7371 return val >= 0 && val < 64 && (val & 1) == 0;
7373 default:
7374 return (val >= 0
7375 && (val + GET_MODE_SIZE (mode)) <= 128
7376 && (val & 3) == 0);
7380 bool
7381 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7383 if (TARGET_ARM)
7384 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7385 else if (TARGET_THUMB2)
7386 return thumb2_legitimate_address_p (mode, x, strict_p);
7387 else /* if (TARGET_THUMB1) */
7388 return thumb1_legitimate_address_p (mode, x, strict_p);
7391 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7393 Given an rtx X being reloaded into a reg required to be
7394 in class CLASS, return the class of reg to actually use.
7395 In general this is just CLASS, but for the Thumb core registers and
7396 immediate constants we prefer a LO_REGS class or a subset. */
7398 static reg_class_t
7399 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7401 if (TARGET_32BIT)
7402 return rclass;
7403 else
7405 if (rclass == GENERAL_REGS)
7406 return LO_REGS;
7407 else
7408 return rclass;
7412 /* Build the SYMBOL_REF for __tls_get_addr. */
7414 static GTY(()) rtx tls_get_addr_libfunc;
7416 static rtx
7417 get_tls_get_addr (void)
7419 if (!tls_get_addr_libfunc)
7420 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7421 return tls_get_addr_libfunc;
7425 arm_load_tp (rtx target)
7427 if (!target)
7428 target = gen_reg_rtx (SImode);
7430 if (TARGET_HARD_TP)
7432 /* Can return in any reg. */
7433 emit_insn (gen_load_tp_hard (target));
7435 else
7437 /* Always returned in r0. Immediately copy the result into a pseudo,
7438 otherwise other uses of r0 (e.g. setting up function arguments) may
7439 clobber the value. */
7441 rtx tmp;
7443 emit_insn (gen_load_tp_soft ());
7445 tmp = gen_rtx_REG (SImode, 0);
7446 emit_move_insn (target, tmp);
7448 return target;
7451 static rtx
7452 load_tls_operand (rtx x, rtx reg)
7454 rtx tmp;
7456 if (reg == NULL_RTX)
7457 reg = gen_reg_rtx (SImode);
7459 tmp = gen_rtx_CONST (SImode, x);
7461 emit_move_insn (reg, tmp);
7463 return reg;
7466 static rtx
7467 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7469 rtx insns, label, labelno, sum;
7471 gcc_assert (reloc != TLS_DESCSEQ);
7472 start_sequence ();
7474 labelno = GEN_INT (pic_labelno++);
7475 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7476 label = gen_rtx_CONST (VOIDmode, label);
7478 sum = gen_rtx_UNSPEC (Pmode,
7479 gen_rtvec (4, x, GEN_INT (reloc), label,
7480 GEN_INT (TARGET_ARM ? 8 : 4)),
7481 UNSPEC_TLS);
7482 reg = load_tls_operand (sum, reg);
7484 if (TARGET_ARM)
7485 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7486 else
7487 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7489 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7490 LCT_PURE, /* LCT_CONST? */
7491 Pmode, 1, reg, Pmode);
7493 insns = get_insns ();
7494 end_sequence ();
7496 return insns;
7499 static rtx
7500 arm_tls_descseq_addr (rtx x, rtx reg)
7502 rtx labelno = GEN_INT (pic_labelno++);
7503 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7504 rtx sum = gen_rtx_UNSPEC (Pmode,
7505 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7506 gen_rtx_CONST (VOIDmode, label),
7507 GEN_INT (!TARGET_ARM)),
7508 UNSPEC_TLS);
7509 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7511 emit_insn (gen_tlscall (x, labelno));
7512 if (!reg)
7513 reg = gen_reg_rtx (SImode);
7514 else
7515 gcc_assert (REGNO (reg) != 0);
7517 emit_move_insn (reg, reg0);
7519 return reg;
7523 legitimize_tls_address (rtx x, rtx reg)
7525 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7526 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7528 switch (model)
7530 case TLS_MODEL_GLOBAL_DYNAMIC:
7531 if (TARGET_GNU2_TLS)
7533 reg = arm_tls_descseq_addr (x, reg);
7535 tp = arm_load_tp (NULL_RTX);
7537 dest = gen_rtx_PLUS (Pmode, tp, reg);
7539 else
7541 /* Original scheme */
7542 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7543 dest = gen_reg_rtx (Pmode);
7544 emit_libcall_block (insns, dest, ret, x);
7546 return dest;
7548 case TLS_MODEL_LOCAL_DYNAMIC:
7549 if (TARGET_GNU2_TLS)
7551 reg = arm_tls_descseq_addr (x, reg);
7553 tp = arm_load_tp (NULL_RTX);
7555 dest = gen_rtx_PLUS (Pmode, tp, reg);
7557 else
7559 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7561 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7562 share the LDM result with other LD model accesses. */
7563 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7564 UNSPEC_TLS);
7565 dest = gen_reg_rtx (Pmode);
7566 emit_libcall_block (insns, dest, ret, eqv);
7568 /* Load the addend. */
7569 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7570 GEN_INT (TLS_LDO32)),
7571 UNSPEC_TLS);
7572 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7573 dest = gen_rtx_PLUS (Pmode, dest, addend);
7575 return dest;
7577 case TLS_MODEL_INITIAL_EXEC:
7578 labelno = GEN_INT (pic_labelno++);
7579 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7580 label = gen_rtx_CONST (VOIDmode, label);
7581 sum = gen_rtx_UNSPEC (Pmode,
7582 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7583 GEN_INT (TARGET_ARM ? 8 : 4)),
7584 UNSPEC_TLS);
7585 reg = load_tls_operand (sum, reg);
7587 if (TARGET_ARM)
7588 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7589 else if (TARGET_THUMB2)
7590 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7591 else
7593 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7594 emit_move_insn (reg, gen_const_mem (SImode, reg));
7597 tp = arm_load_tp (NULL_RTX);
7599 return gen_rtx_PLUS (Pmode, tp, reg);
7601 case TLS_MODEL_LOCAL_EXEC:
7602 tp = arm_load_tp (NULL_RTX);
7604 reg = gen_rtx_UNSPEC (Pmode,
7605 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7606 UNSPEC_TLS);
7607 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7609 return gen_rtx_PLUS (Pmode, tp, reg);
7611 default:
7612 abort ();
7616 /* Try machine-dependent ways of modifying an illegitimate address
7617 to be legitimate. If we find one, return the new, valid address. */
7619 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7621 if (arm_tls_referenced_p (x))
7623 rtx addend = NULL;
7625 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7627 addend = XEXP (XEXP (x, 0), 1);
7628 x = XEXP (XEXP (x, 0), 0);
7631 if (GET_CODE (x) != SYMBOL_REF)
7632 return x;
7634 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7636 x = legitimize_tls_address (x, NULL_RTX);
7638 if (addend)
7640 x = gen_rtx_PLUS (SImode, x, addend);
7641 orig_x = x;
7643 else
7644 return x;
7647 if (!TARGET_ARM)
7649 /* TODO: legitimize_address for Thumb2. */
7650 if (TARGET_THUMB2)
7651 return x;
7652 return thumb_legitimize_address (x, orig_x, mode);
7655 if (GET_CODE (x) == PLUS)
7657 rtx xop0 = XEXP (x, 0);
7658 rtx xop1 = XEXP (x, 1);
7660 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7661 xop0 = force_reg (SImode, xop0);
7663 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7664 && !symbol_mentioned_p (xop1))
7665 xop1 = force_reg (SImode, xop1);
7667 if (ARM_BASE_REGISTER_RTX_P (xop0)
7668 && CONST_INT_P (xop1))
7670 HOST_WIDE_INT n, low_n;
7671 rtx base_reg, val;
7672 n = INTVAL (xop1);
7674 /* VFP addressing modes actually allow greater offsets, but for
7675 now we just stick with the lowest common denominator. */
7676 if (mode == DImode
7677 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7679 low_n = n & 0x0f;
7680 n &= ~0x0f;
7681 if (low_n > 4)
7683 n += 16;
7684 low_n -= 16;
7687 else
7689 low_n = ((mode) == TImode ? 0
7690 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7691 n -= low_n;
7694 base_reg = gen_reg_rtx (SImode);
7695 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7696 emit_move_insn (base_reg, val);
7697 x = plus_constant (Pmode, base_reg, low_n);
7699 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7700 x = gen_rtx_PLUS (SImode, xop0, xop1);
7703 /* XXX We don't allow MINUS any more -- see comment in
7704 arm_legitimate_address_outer_p (). */
7705 else if (GET_CODE (x) == MINUS)
7707 rtx xop0 = XEXP (x, 0);
7708 rtx xop1 = XEXP (x, 1);
7710 if (CONSTANT_P (xop0))
7711 xop0 = force_reg (SImode, xop0);
7713 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7714 xop1 = force_reg (SImode, xop1);
7716 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7717 x = gen_rtx_MINUS (SImode, xop0, xop1);
7720 /* Make sure to take full advantage of the pre-indexed addressing mode
7721 with absolute addresses which often allows for the base register to
7722 be factorized for multiple adjacent memory references, and it might
7723 even allows for the mini pool to be avoided entirely. */
7724 else if (CONST_INT_P (x) && optimize > 0)
7726 unsigned int bits;
7727 HOST_WIDE_INT mask, base, index;
7728 rtx base_reg;
7730 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7731 use a 8-bit index. So let's use a 12-bit index for SImode only and
7732 hope that arm_gen_constant will enable ldrb to use more bits. */
7733 bits = (mode == SImode) ? 12 : 8;
7734 mask = (1 << bits) - 1;
7735 base = INTVAL (x) & ~mask;
7736 index = INTVAL (x) & mask;
7737 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7739 /* It'll most probably be more efficient to generate the base
7740 with more bits set and use a negative index instead. */
7741 base |= mask;
7742 index -= mask;
7744 base_reg = force_reg (SImode, GEN_INT (base));
7745 x = plus_constant (Pmode, base_reg, index);
7748 if (flag_pic)
7750 /* We need to find and carefully transform any SYMBOL and LABEL
7751 references; so go back to the original address expression. */
7752 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7754 if (new_x != orig_x)
7755 x = new_x;
7758 return x;
7762 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7763 to be legitimate. If we find one, return the new, valid address. */
7765 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7767 if (GET_CODE (x) == PLUS
7768 && CONST_INT_P (XEXP (x, 1))
7769 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7770 || INTVAL (XEXP (x, 1)) < 0))
7772 rtx xop0 = XEXP (x, 0);
7773 rtx xop1 = XEXP (x, 1);
7774 HOST_WIDE_INT offset = INTVAL (xop1);
7776 /* Try and fold the offset into a biasing of the base register and
7777 then offsetting that. Don't do this when optimizing for space
7778 since it can cause too many CSEs. */
7779 if (optimize_size && offset >= 0
7780 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7782 HOST_WIDE_INT delta;
7784 if (offset >= 256)
7785 delta = offset - (256 - GET_MODE_SIZE (mode));
7786 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7787 delta = 31 * GET_MODE_SIZE (mode);
7788 else
7789 delta = offset & (~31 * GET_MODE_SIZE (mode));
7791 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7792 NULL_RTX);
7793 x = plus_constant (Pmode, xop0, delta);
7795 else if (offset < 0 && offset > -256)
7796 /* Small negative offsets are best done with a subtract before the
7797 dereference, forcing these into a register normally takes two
7798 instructions. */
7799 x = force_operand (x, NULL_RTX);
7800 else
7802 /* For the remaining cases, force the constant into a register. */
7803 xop1 = force_reg (SImode, xop1);
7804 x = gen_rtx_PLUS (SImode, xop0, xop1);
7807 else if (GET_CODE (x) == PLUS
7808 && s_register_operand (XEXP (x, 1), SImode)
7809 && !s_register_operand (XEXP (x, 0), SImode))
7811 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7813 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7816 if (flag_pic)
7818 /* We need to find and carefully transform any SYMBOL and LABEL
7819 references; so go back to the original address expression. */
7820 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7822 if (new_x != orig_x)
7823 x = new_x;
7826 return x;
7829 bool
7830 arm_legitimize_reload_address (rtx *p,
7831 machine_mode mode,
7832 int opnum, int type,
7833 int ind_levels ATTRIBUTE_UNUSED)
7835 /* We must recognize output that we have already generated ourselves. */
7836 if (GET_CODE (*p) == PLUS
7837 && GET_CODE (XEXP (*p, 0)) == PLUS
7838 && REG_P (XEXP (XEXP (*p, 0), 0))
7839 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7840 && CONST_INT_P (XEXP (*p, 1)))
7842 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7843 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7844 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7845 return true;
7848 if (GET_CODE (*p) == PLUS
7849 && REG_P (XEXP (*p, 0))
7850 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7851 /* If the base register is equivalent to a constant, let the generic
7852 code handle it. Otherwise we will run into problems if a future
7853 reload pass decides to rematerialize the constant. */
7854 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7855 && CONST_INT_P (XEXP (*p, 1)))
7857 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7858 HOST_WIDE_INT low, high;
7860 /* Detect coprocessor load/stores. */
7861 bool coproc_p = ((TARGET_HARD_FLOAT
7862 && TARGET_VFP
7863 && (mode == SFmode || mode == DFmode))
7864 || (TARGET_REALLY_IWMMXT
7865 && VALID_IWMMXT_REG_MODE (mode))
7866 || (TARGET_NEON
7867 && (VALID_NEON_DREG_MODE (mode)
7868 || VALID_NEON_QREG_MODE (mode))));
7870 /* For some conditions, bail out when lower two bits are unaligned. */
7871 if ((val & 0x3) != 0
7872 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7873 && (coproc_p
7874 /* For DI, and DF under soft-float: */
7875 || ((mode == DImode || mode == DFmode)
7876 /* Without ldrd, we use stm/ldm, which does not
7877 fair well with unaligned bits. */
7878 && (! TARGET_LDRD
7879 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7880 || TARGET_THUMB2))))
7881 return false;
7883 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7884 of which the (reg+high) gets turned into a reload add insn,
7885 we try to decompose the index into high/low values that can often
7886 also lead to better reload CSE.
7887 For example:
7888 ldr r0, [r2, #4100] // Offset too large
7889 ldr r1, [r2, #4104] // Offset too large
7891 is best reloaded as:
7892 add t1, r2, #4096
7893 ldr r0, [t1, #4]
7894 add t2, r2, #4096
7895 ldr r1, [t2, #8]
7897 which post-reload CSE can simplify in most cases to eliminate the
7898 second add instruction:
7899 add t1, r2, #4096
7900 ldr r0, [t1, #4]
7901 ldr r1, [t1, #8]
7903 The idea here is that we want to split out the bits of the constant
7904 as a mask, rather than as subtracting the maximum offset that the
7905 respective type of load/store used can handle.
7907 When encountering negative offsets, we can still utilize it even if
7908 the overall offset is positive; sometimes this may lead to an immediate
7909 that can be constructed with fewer instructions.
7910 For example:
7911 ldr r0, [r2, #0x3FFFFC]
7913 This is best reloaded as:
7914 add t1, r2, #0x400000
7915 ldr r0, [t1, #-4]
7917 The trick for spotting this for a load insn with N bits of offset
7918 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7919 negative offset that is going to make bit N and all the bits below
7920 it become zero in the remainder part.
7922 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7923 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7924 used in most cases of ARM load/store instructions. */
7926 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7927 (((VAL) & ((1 << (N)) - 1)) \
7928 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7929 : 0)
7931 if (coproc_p)
7933 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7935 /* NEON quad-word load/stores are made of two double-word accesses,
7936 so the valid index range is reduced by 8. Treat as 9-bit range if
7937 we go over it. */
7938 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7939 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7941 else if (GET_MODE_SIZE (mode) == 8)
7943 if (TARGET_LDRD)
7944 low = (TARGET_THUMB2
7945 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7946 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7947 else
7948 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7949 to access doublewords. The supported load/store offsets are
7950 -8, -4, and 4, which we try to produce here. */
7951 low = ((val & 0xf) ^ 0x8) - 0x8;
7953 else if (GET_MODE_SIZE (mode) < 8)
7955 /* NEON element load/stores do not have an offset. */
7956 if (TARGET_NEON_FP16 && mode == HFmode)
7957 return false;
7959 if (TARGET_THUMB2)
7961 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7962 Try the wider 12-bit range first, and re-try if the result
7963 is out of range. */
7964 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7965 if (low < -255)
7966 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7968 else
7970 if (mode == HImode || mode == HFmode)
7972 if (arm_arch4)
7973 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7974 else
7976 /* The storehi/movhi_bytes fallbacks can use only
7977 [-4094,+4094] of the full ldrb/strb index range. */
7978 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7979 if (low == 4095 || low == -4095)
7980 return false;
7983 else
7984 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7987 else
7988 return false;
7990 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7991 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7992 - (unsigned HOST_WIDE_INT) 0x80000000);
7993 /* Check for overflow or zero */
7994 if (low == 0 || high == 0 || (high + low != val))
7995 return false;
7997 /* Reload the high part into a base reg; leave the low part
7998 in the mem.
7999 Note that replacing this gen_rtx_PLUS with plus_constant is
8000 wrong in this case because we rely on the
8001 (plus (plus reg c1) c2) structure being preserved so that
8002 XEXP (*p, 0) in push_reload below uses the correct term. */
8003 *p = gen_rtx_PLUS (GET_MODE (*p),
8004 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8005 GEN_INT (high)),
8006 GEN_INT (low));
8007 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8008 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8009 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8010 return true;
8013 return false;
8017 thumb_legitimize_reload_address (rtx *x_p,
8018 machine_mode mode,
8019 int opnum, int type,
8020 int ind_levels ATTRIBUTE_UNUSED)
8022 rtx x = *x_p;
8024 if (GET_CODE (x) == PLUS
8025 && GET_MODE_SIZE (mode) < 4
8026 && REG_P (XEXP (x, 0))
8027 && XEXP (x, 0) == stack_pointer_rtx
8028 && CONST_INT_P (XEXP (x, 1))
8029 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8031 rtx orig_x = x;
8033 x = copy_rtx (x);
8034 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8035 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8036 return x;
8039 /* If both registers are hi-regs, then it's better to reload the
8040 entire expression rather than each register individually. That
8041 only requires one reload register rather than two. */
8042 if (GET_CODE (x) == PLUS
8043 && REG_P (XEXP (x, 0))
8044 && REG_P (XEXP (x, 1))
8045 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8046 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8048 rtx orig_x = x;
8050 x = copy_rtx (x);
8051 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8052 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8053 return x;
8056 return NULL;
8059 /* Return TRUE if X contains any TLS symbol references. */
8061 bool
8062 arm_tls_referenced_p (rtx x)
8064 if (! TARGET_HAVE_TLS)
8065 return false;
8067 subrtx_iterator::array_type array;
8068 FOR_EACH_SUBRTX (iter, array, x, ALL)
8070 const_rtx x = *iter;
8071 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8072 return true;
8074 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8075 TLS offsets, not real symbol references. */
8076 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8077 iter.skip_subrtxes ();
8079 return false;
8082 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8084 On the ARM, allow any integer (invalid ones are removed later by insn
8085 patterns), nice doubles and symbol_refs which refer to the function's
8086 constant pool XXX.
8088 When generating pic allow anything. */
8090 static bool
8091 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8093 /* At present, we have no support for Neon structure constants, so forbid
8094 them here. It might be possible to handle simple cases like 0 and -1
8095 in future. */
8096 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8097 return false;
8099 return flag_pic || !label_mentioned_p (x);
8102 static bool
8103 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8105 return (CONST_INT_P (x)
8106 || CONST_DOUBLE_P (x)
8107 || CONSTANT_ADDRESS_P (x)
8108 || flag_pic);
8111 static bool
8112 arm_legitimate_constant_p (machine_mode mode, rtx x)
8114 return (!arm_cannot_force_const_mem (mode, x)
8115 && (TARGET_32BIT
8116 ? arm_legitimate_constant_p_1 (mode, x)
8117 : thumb_legitimate_constant_p (mode, x)));
8120 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8122 static bool
8123 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8125 rtx base, offset;
8127 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8129 split_const (x, &base, &offset);
8130 if (GET_CODE (base) == SYMBOL_REF
8131 && !offset_within_block_p (base, INTVAL (offset)))
8132 return true;
8134 return arm_tls_referenced_p (x);
8137 #define REG_OR_SUBREG_REG(X) \
8138 (REG_P (X) \
8139 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8141 #define REG_OR_SUBREG_RTX(X) \
8142 (REG_P (X) ? (X) : SUBREG_REG (X))
8144 static inline int
8145 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8147 machine_mode mode = GET_MODE (x);
8148 int total, words;
8150 switch (code)
8152 case ASHIFT:
8153 case ASHIFTRT:
8154 case LSHIFTRT:
8155 case ROTATERT:
8156 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8158 case PLUS:
8159 case MINUS:
8160 case COMPARE:
8161 case NEG:
8162 case NOT:
8163 return COSTS_N_INSNS (1);
8165 case MULT:
8166 if (CONST_INT_P (XEXP (x, 1)))
8168 int cycles = 0;
8169 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8171 while (i)
8173 i >>= 2;
8174 cycles++;
8176 return COSTS_N_INSNS (2) + cycles;
8178 return COSTS_N_INSNS (1) + 16;
8180 case SET:
8181 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8182 the mode. */
8183 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8184 return (COSTS_N_INSNS (words)
8185 + 4 * ((MEM_P (SET_SRC (x)))
8186 + MEM_P (SET_DEST (x))));
8188 case CONST_INT:
8189 if (outer == SET)
8191 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8192 return 0;
8193 if (thumb_shiftable_const (INTVAL (x)))
8194 return COSTS_N_INSNS (2);
8195 return COSTS_N_INSNS (3);
8197 else if ((outer == PLUS || outer == COMPARE)
8198 && INTVAL (x) < 256 && INTVAL (x) > -256)
8199 return 0;
8200 else if ((outer == IOR || outer == XOR || outer == AND)
8201 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8202 return COSTS_N_INSNS (1);
8203 else if (outer == AND)
8205 int i;
8206 /* This duplicates the tests in the andsi3 expander. */
8207 for (i = 9; i <= 31; i++)
8208 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8209 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8210 return COSTS_N_INSNS (2);
8212 else if (outer == ASHIFT || outer == ASHIFTRT
8213 || outer == LSHIFTRT)
8214 return 0;
8215 return COSTS_N_INSNS (2);
8217 case CONST:
8218 case CONST_DOUBLE:
8219 case LABEL_REF:
8220 case SYMBOL_REF:
8221 return COSTS_N_INSNS (3);
8223 case UDIV:
8224 case UMOD:
8225 case DIV:
8226 case MOD:
8227 return 100;
8229 case TRUNCATE:
8230 return 99;
8232 case AND:
8233 case XOR:
8234 case IOR:
8235 /* XXX guess. */
8236 return 8;
8238 case MEM:
8239 /* XXX another guess. */
8240 /* Memory costs quite a lot for the first word, but subsequent words
8241 load at the equivalent of a single insn each. */
8242 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8243 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8244 ? 4 : 0));
8246 case IF_THEN_ELSE:
8247 /* XXX a guess. */
8248 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8249 return 14;
8250 return 2;
8252 case SIGN_EXTEND:
8253 case ZERO_EXTEND:
8254 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8255 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8257 if (mode == SImode)
8258 return total;
8260 if (arm_arch6)
8261 return total + COSTS_N_INSNS (1);
8263 /* Assume a two-shift sequence. Increase the cost slightly so
8264 we prefer actual shifts over an extend operation. */
8265 return total + 1 + COSTS_N_INSNS (2);
8267 default:
8268 return 99;
8272 static inline bool
8273 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8275 machine_mode mode = GET_MODE (x);
8276 enum rtx_code subcode;
8277 rtx operand;
8278 enum rtx_code code = GET_CODE (x);
8279 *total = 0;
8281 switch (code)
8283 case MEM:
8284 /* Memory costs quite a lot for the first word, but subsequent words
8285 load at the equivalent of a single insn each. */
8286 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8287 return true;
8289 case DIV:
8290 case MOD:
8291 case UDIV:
8292 case UMOD:
8293 if (TARGET_HARD_FLOAT && mode == SFmode)
8294 *total = COSTS_N_INSNS (2);
8295 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8296 *total = COSTS_N_INSNS (4);
8297 else
8298 *total = COSTS_N_INSNS (20);
8299 return false;
8301 case ROTATE:
8302 if (REG_P (XEXP (x, 1)))
8303 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8304 else if (!CONST_INT_P (XEXP (x, 1)))
8305 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8307 /* Fall through */
8308 case ROTATERT:
8309 if (mode != SImode)
8311 *total += COSTS_N_INSNS (4);
8312 return true;
8315 /* Fall through */
8316 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8317 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8318 if (mode == DImode)
8320 *total += COSTS_N_INSNS (3);
8321 return true;
8324 *total += COSTS_N_INSNS (1);
8325 /* Increase the cost of complex shifts because they aren't any faster,
8326 and reduce dual issue opportunities. */
8327 if (arm_tune_cortex_a9
8328 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8329 ++*total;
8331 return true;
8333 case MINUS:
8334 if (mode == DImode)
8336 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8337 if (CONST_INT_P (XEXP (x, 0))
8338 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8340 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8341 return true;
8344 if (CONST_INT_P (XEXP (x, 1))
8345 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8347 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8348 return true;
8351 return false;
8354 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8356 if (TARGET_HARD_FLOAT
8357 && (mode == SFmode
8358 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8360 *total = COSTS_N_INSNS (1);
8361 if (CONST_DOUBLE_P (XEXP (x, 0))
8362 && arm_const_double_rtx (XEXP (x, 0)))
8364 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8365 return true;
8368 if (CONST_DOUBLE_P (XEXP (x, 1))
8369 && arm_const_double_rtx (XEXP (x, 1)))
8371 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8372 return true;
8375 return false;
8377 *total = COSTS_N_INSNS (20);
8378 return false;
8381 *total = COSTS_N_INSNS (1);
8382 if (CONST_INT_P (XEXP (x, 0))
8383 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8385 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8386 return true;
8389 subcode = GET_CODE (XEXP (x, 1));
8390 if (subcode == ASHIFT || subcode == ASHIFTRT
8391 || subcode == LSHIFTRT
8392 || subcode == ROTATE || subcode == ROTATERT)
8394 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8395 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8396 return true;
8399 /* A shift as a part of RSB costs no more than RSB itself. */
8400 if (GET_CODE (XEXP (x, 0)) == MULT
8401 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8403 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8404 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8405 return true;
8408 if (subcode == MULT
8409 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8411 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8412 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8413 return true;
8416 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8417 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8419 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8420 if (REG_P (XEXP (XEXP (x, 1), 0))
8421 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8422 *total += COSTS_N_INSNS (1);
8424 return true;
8427 /* Fall through */
8429 case PLUS:
8430 if (code == PLUS && arm_arch6 && mode == SImode
8431 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8432 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8434 *total = COSTS_N_INSNS (1);
8435 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8436 0, speed);
8437 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8438 return true;
8441 /* MLA: All arguments must be registers. We filter out
8442 multiplication by a power of two, so that we fall down into
8443 the code below. */
8444 if (GET_CODE (XEXP (x, 0)) == MULT
8445 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8447 /* The cost comes from the cost of the multiply. */
8448 return false;
8451 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8453 if (TARGET_HARD_FLOAT
8454 && (mode == SFmode
8455 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8457 *total = COSTS_N_INSNS (1);
8458 if (CONST_DOUBLE_P (XEXP (x, 1))
8459 && arm_const_double_rtx (XEXP (x, 1)))
8461 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8462 return true;
8465 return false;
8468 *total = COSTS_N_INSNS (20);
8469 return false;
8472 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8473 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8475 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8476 if (REG_P (XEXP (XEXP (x, 0), 0))
8477 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8478 *total += COSTS_N_INSNS (1);
8479 return true;
8482 /* Fall through */
8484 case AND: case XOR: case IOR:
8486 /* Normally the frame registers will be spilt into reg+const during
8487 reload, so it is a bad idea to combine them with other instructions,
8488 since then they might not be moved outside of loops. As a compromise
8489 we allow integration with ops that have a constant as their second
8490 operand. */
8491 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8492 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8493 && !CONST_INT_P (XEXP (x, 1)))
8494 *total = COSTS_N_INSNS (1);
8496 if (mode == DImode)
8498 *total += COSTS_N_INSNS (2);
8499 if (CONST_INT_P (XEXP (x, 1))
8500 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8502 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8503 return true;
8506 return false;
8509 *total += COSTS_N_INSNS (1);
8510 if (CONST_INT_P (XEXP (x, 1))
8511 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8513 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8514 return true;
8516 subcode = GET_CODE (XEXP (x, 0));
8517 if (subcode == ASHIFT || subcode == ASHIFTRT
8518 || subcode == LSHIFTRT
8519 || subcode == ROTATE || subcode == ROTATERT)
8521 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8522 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8523 return true;
8526 if (subcode == MULT
8527 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8529 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8530 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8531 return true;
8534 if (subcode == UMIN || subcode == UMAX
8535 || subcode == SMIN || subcode == SMAX)
8537 *total = COSTS_N_INSNS (3);
8538 return true;
8541 return false;
8543 case MULT:
8544 /* This should have been handled by the CPU specific routines. */
8545 gcc_unreachable ();
8547 case TRUNCATE:
8548 if (arm_arch3m && mode == SImode
8549 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8550 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8551 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8552 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8553 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8554 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8556 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8557 return true;
8559 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8560 return false;
8562 case NEG:
8563 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8565 if (TARGET_HARD_FLOAT
8566 && (mode == SFmode
8567 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8569 *total = COSTS_N_INSNS (1);
8570 return false;
8572 *total = COSTS_N_INSNS (2);
8573 return false;
8576 /* Fall through */
8577 case NOT:
8578 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8579 if (mode == SImode && code == NOT)
8581 subcode = GET_CODE (XEXP (x, 0));
8582 if (subcode == ASHIFT || subcode == ASHIFTRT
8583 || subcode == LSHIFTRT
8584 || subcode == ROTATE || subcode == ROTATERT
8585 || (subcode == MULT
8586 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8588 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8589 /* Register shifts cost an extra cycle. */
8590 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8591 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8592 subcode, 1, speed);
8593 return true;
8597 return false;
8599 case IF_THEN_ELSE:
8600 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8602 *total = COSTS_N_INSNS (4);
8603 return true;
8606 operand = XEXP (x, 0);
8608 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8609 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8610 && REG_P (XEXP (operand, 0))
8611 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8612 *total += COSTS_N_INSNS (1);
8613 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8614 + rtx_cost (XEXP (x, 2), code, 2, speed));
8615 return true;
8617 case NE:
8618 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8620 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8621 return true;
8623 goto scc_insn;
8625 case GE:
8626 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8627 && mode == SImode && XEXP (x, 1) == const0_rtx)
8629 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8630 return true;
8632 goto scc_insn;
8634 case LT:
8635 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8636 && mode == SImode && XEXP (x, 1) == const0_rtx)
8638 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8639 return true;
8641 goto scc_insn;
8643 case EQ:
8644 case GT:
8645 case LE:
8646 case GEU:
8647 case LTU:
8648 case GTU:
8649 case LEU:
8650 case UNORDERED:
8651 case ORDERED:
8652 case UNEQ:
8653 case UNGE:
8654 case UNLT:
8655 case UNGT:
8656 case UNLE:
8657 scc_insn:
8658 /* SCC insns. In the case where the comparison has already been
8659 performed, then they cost 2 instructions. Otherwise they need
8660 an additional comparison before them. */
8661 *total = COSTS_N_INSNS (2);
8662 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8664 return true;
8667 /* Fall through */
8668 case COMPARE:
8669 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8671 *total = 0;
8672 return true;
8675 *total += COSTS_N_INSNS (1);
8676 if (CONST_INT_P (XEXP (x, 1))
8677 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8679 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8680 return true;
8683 subcode = GET_CODE (XEXP (x, 0));
8684 if (subcode == ASHIFT || subcode == ASHIFTRT
8685 || subcode == LSHIFTRT
8686 || subcode == ROTATE || subcode == ROTATERT)
8688 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8689 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8690 return true;
8693 if (subcode == MULT
8694 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8696 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8697 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8698 return true;
8701 return false;
8703 case UMIN:
8704 case UMAX:
8705 case SMIN:
8706 case SMAX:
8707 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8708 if (!CONST_INT_P (XEXP (x, 1))
8709 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8710 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8711 return true;
8713 case ABS:
8714 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8716 if (TARGET_HARD_FLOAT
8717 && (mode == SFmode
8718 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8720 *total = COSTS_N_INSNS (1);
8721 return false;
8723 *total = COSTS_N_INSNS (20);
8724 return false;
8726 *total = COSTS_N_INSNS (1);
8727 if (mode == DImode)
8728 *total += COSTS_N_INSNS (3);
8729 return false;
8731 case SIGN_EXTEND:
8732 case ZERO_EXTEND:
8733 *total = 0;
8734 if (GET_MODE_CLASS (mode) == MODE_INT)
8736 rtx op = XEXP (x, 0);
8737 machine_mode opmode = GET_MODE (op);
8739 if (mode == DImode)
8740 *total += COSTS_N_INSNS (1);
8742 if (opmode != SImode)
8744 if (MEM_P (op))
8746 /* If !arm_arch4, we use one of the extendhisi2_mem
8747 or movhi_bytes patterns for HImode. For a QImode
8748 sign extension, we first zero-extend from memory
8749 and then perform a shift sequence. */
8750 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8751 *total += COSTS_N_INSNS (2);
8753 else if (arm_arch6)
8754 *total += COSTS_N_INSNS (1);
8756 /* We don't have the necessary insn, so we need to perform some
8757 other operation. */
8758 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8759 /* An and with constant 255. */
8760 *total += COSTS_N_INSNS (1);
8761 else
8762 /* A shift sequence. Increase costs slightly to avoid
8763 combining two shifts into an extend operation. */
8764 *total += COSTS_N_INSNS (2) + 1;
8767 return false;
8770 switch (GET_MODE (XEXP (x, 0)))
8772 case V8QImode:
8773 case V4HImode:
8774 case V2SImode:
8775 case V4QImode:
8776 case V2HImode:
8777 *total = COSTS_N_INSNS (1);
8778 return false;
8780 default:
8781 gcc_unreachable ();
8783 gcc_unreachable ();
8785 case ZERO_EXTRACT:
8786 case SIGN_EXTRACT:
8787 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8788 return true;
8790 case CONST_INT:
8791 if (const_ok_for_arm (INTVAL (x))
8792 || const_ok_for_arm (~INTVAL (x)))
8793 *total = COSTS_N_INSNS (1);
8794 else
8795 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8796 INTVAL (x), NULL_RTX,
8797 NULL_RTX, 0, 0));
8798 return true;
8800 case CONST:
8801 case LABEL_REF:
8802 case SYMBOL_REF:
8803 *total = COSTS_N_INSNS (3);
8804 return true;
8806 case HIGH:
8807 *total = COSTS_N_INSNS (1);
8808 return true;
8810 case LO_SUM:
8811 *total = COSTS_N_INSNS (1);
8812 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8813 return true;
8815 case CONST_DOUBLE:
8816 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8817 && (mode == SFmode || !TARGET_VFP_SINGLE))
8818 *total = COSTS_N_INSNS (1);
8819 else
8820 *total = COSTS_N_INSNS (4);
8821 return true;
8823 case SET:
8824 /* The vec_extract patterns accept memory operands that require an
8825 address reload. Account for the cost of that reload to give the
8826 auto-inc-dec pass an incentive to try to replace them. */
8827 if (TARGET_NEON && MEM_P (SET_DEST (x))
8828 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8830 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8831 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8832 *total += COSTS_N_INSNS (1);
8833 return true;
8835 /* Likewise for the vec_set patterns. */
8836 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8837 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8838 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8840 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8841 *total = rtx_cost (mem, code, 0, speed);
8842 if (!neon_vector_mem_operand (mem, 2, true))
8843 *total += COSTS_N_INSNS (1);
8844 return true;
8846 return false;
8848 case UNSPEC:
8849 /* We cost this as high as our memory costs to allow this to
8850 be hoisted from loops. */
8851 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8853 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8855 return true;
8857 case CONST_VECTOR:
8858 if (TARGET_NEON
8859 && TARGET_HARD_FLOAT
8860 && outer == SET
8861 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8862 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8863 *total = COSTS_N_INSNS (1);
8864 else
8865 *total = COSTS_N_INSNS (4);
8866 return true;
8868 default:
8869 *total = COSTS_N_INSNS (4);
8870 return false;
8874 /* Estimates the size cost of thumb1 instructions.
8875 For now most of the code is copied from thumb1_rtx_costs. We need more
8876 fine grain tuning when we have more related test cases. */
8877 static inline int
8878 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8880 machine_mode mode = GET_MODE (x);
8881 int words;
8883 switch (code)
8885 case ASHIFT:
8886 case ASHIFTRT:
8887 case LSHIFTRT:
8888 case ROTATERT:
8889 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8891 case PLUS:
8892 case MINUS:
8893 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8894 defined by RTL expansion, especially for the expansion of
8895 multiplication. */
8896 if ((GET_CODE (XEXP (x, 0)) == MULT
8897 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8898 || (GET_CODE (XEXP (x, 1)) == MULT
8899 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8900 return COSTS_N_INSNS (2);
8901 /* On purpose fall through for normal RTX. */
8902 case COMPARE:
8903 case NEG:
8904 case NOT:
8905 return COSTS_N_INSNS (1);
8907 case MULT:
8908 if (CONST_INT_P (XEXP (x, 1)))
8910 /* Thumb1 mul instruction can't operate on const. We must Load it
8911 into a register first. */
8912 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8913 /* For the targets which have a very small and high-latency multiply
8914 unit, we prefer to synthesize the mult with up to 5 instructions,
8915 giving a good balance between size and performance. */
8916 if (arm_arch6m && arm_m_profile_small_mul)
8917 return COSTS_N_INSNS (5);
8918 else
8919 return COSTS_N_INSNS (1) + const_size;
8921 return COSTS_N_INSNS (1);
8923 case SET:
8924 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8925 the mode. */
8926 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8927 return COSTS_N_INSNS (words)
8928 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8929 || satisfies_constraint_K (SET_SRC (x))
8930 /* thumb1_movdi_insn. */
8931 || ((words > 1) && MEM_P (SET_SRC (x))));
8933 case CONST_INT:
8934 if (outer == SET)
8936 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8937 return COSTS_N_INSNS (1);
8938 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8939 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8940 return COSTS_N_INSNS (2);
8941 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8942 if (thumb_shiftable_const (INTVAL (x)))
8943 return COSTS_N_INSNS (2);
8944 return COSTS_N_INSNS (3);
8946 else if ((outer == PLUS || outer == COMPARE)
8947 && INTVAL (x) < 256 && INTVAL (x) > -256)
8948 return 0;
8949 else if ((outer == IOR || outer == XOR || outer == AND)
8950 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8951 return COSTS_N_INSNS (1);
8952 else if (outer == AND)
8954 int i;
8955 /* This duplicates the tests in the andsi3 expander. */
8956 for (i = 9; i <= 31; i++)
8957 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8958 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8959 return COSTS_N_INSNS (2);
8961 else if (outer == ASHIFT || outer == ASHIFTRT
8962 || outer == LSHIFTRT)
8963 return 0;
8964 return COSTS_N_INSNS (2);
8966 case CONST:
8967 case CONST_DOUBLE:
8968 case LABEL_REF:
8969 case SYMBOL_REF:
8970 return COSTS_N_INSNS (3);
8972 case UDIV:
8973 case UMOD:
8974 case DIV:
8975 case MOD:
8976 return 100;
8978 case TRUNCATE:
8979 return 99;
8981 case AND:
8982 case XOR:
8983 case IOR:
8984 return COSTS_N_INSNS (1);
8986 case MEM:
8987 return (COSTS_N_INSNS (1)
8988 + COSTS_N_INSNS (1)
8989 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8990 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8991 ? COSTS_N_INSNS (1) : 0));
8993 case IF_THEN_ELSE:
8994 /* XXX a guess. */
8995 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8996 return 14;
8997 return 2;
8999 case ZERO_EXTEND:
9000 /* XXX still guessing. */
9001 switch (GET_MODE (XEXP (x, 0)))
9003 case QImode:
9004 return (1 + (mode == DImode ? 4 : 0)
9005 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9007 case HImode:
9008 return (4 + (mode == DImode ? 4 : 0)
9009 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9011 case SImode:
9012 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9014 default:
9015 return 99;
9018 default:
9019 return 99;
9023 /* RTX costs when optimizing for size. */
9024 static bool
9025 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9026 int *total)
9028 machine_mode mode = GET_MODE (x);
9029 if (TARGET_THUMB1)
9031 *total = thumb1_size_rtx_costs (x, code, outer_code);
9032 return true;
9035 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9036 switch (code)
9038 case MEM:
9039 /* A memory access costs 1 insn if the mode is small, or the address is
9040 a single register, otherwise it costs one insn per word. */
9041 if (REG_P (XEXP (x, 0)))
9042 *total = COSTS_N_INSNS (1);
9043 else if (flag_pic
9044 && GET_CODE (XEXP (x, 0)) == PLUS
9045 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9046 /* This will be split into two instructions.
9047 See arm.md:calculate_pic_address. */
9048 *total = COSTS_N_INSNS (2);
9049 else
9050 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9051 return true;
9053 case DIV:
9054 case MOD:
9055 case UDIV:
9056 case UMOD:
9057 /* Needs a libcall, so it costs about this. */
9058 *total = COSTS_N_INSNS (2);
9059 return false;
9061 case ROTATE:
9062 if (mode == SImode && REG_P (XEXP (x, 1)))
9064 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9065 return true;
9067 /* Fall through */
9068 case ROTATERT:
9069 case ASHIFT:
9070 case LSHIFTRT:
9071 case ASHIFTRT:
9072 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9074 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9075 return true;
9077 else if (mode == SImode)
9079 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9080 /* Slightly disparage register shifts, but not by much. */
9081 if (!CONST_INT_P (XEXP (x, 1)))
9082 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9083 return true;
9086 /* Needs a libcall. */
9087 *total = COSTS_N_INSNS (2);
9088 return false;
9090 case MINUS:
9091 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9092 && (mode == SFmode || !TARGET_VFP_SINGLE))
9094 *total = COSTS_N_INSNS (1);
9095 return false;
9098 if (mode == SImode)
9100 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9101 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9103 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9104 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9105 || subcode1 == ROTATE || subcode1 == ROTATERT
9106 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9107 || subcode1 == ASHIFTRT)
9109 /* It's just the cost of the two operands. */
9110 *total = 0;
9111 return false;
9114 *total = COSTS_N_INSNS (1);
9115 return false;
9118 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9119 return false;
9121 case PLUS:
9122 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9123 && (mode == SFmode || !TARGET_VFP_SINGLE))
9125 *total = COSTS_N_INSNS (1);
9126 return false;
9129 /* A shift as a part of ADD costs nothing. */
9130 if (GET_CODE (XEXP (x, 0)) == MULT
9131 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9133 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9134 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9135 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9136 return true;
9139 /* Fall through */
9140 case AND: case XOR: case IOR:
9141 if (mode == SImode)
9143 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9145 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9146 || subcode == LSHIFTRT || subcode == ASHIFTRT
9147 || (code == AND && subcode == NOT))
9149 /* It's just the cost of the two operands. */
9150 *total = 0;
9151 return false;
9155 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9156 return false;
9158 case MULT:
9159 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9160 return false;
9162 case NEG:
9163 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9164 && (mode == SFmode || !TARGET_VFP_SINGLE))
9166 *total = COSTS_N_INSNS (1);
9167 return false;
9170 /* Fall through */
9171 case NOT:
9172 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9174 return false;
9176 case IF_THEN_ELSE:
9177 *total = 0;
9178 return false;
9180 case COMPARE:
9181 if (cc_register (XEXP (x, 0), VOIDmode))
9182 * total = 0;
9183 else
9184 *total = COSTS_N_INSNS (1);
9185 return false;
9187 case ABS:
9188 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9189 && (mode == SFmode || !TARGET_VFP_SINGLE))
9190 *total = COSTS_N_INSNS (1);
9191 else
9192 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9193 return false;
9195 case SIGN_EXTEND:
9196 case ZERO_EXTEND:
9197 return arm_rtx_costs_1 (x, outer_code, total, 0);
9199 case CONST_INT:
9200 if (const_ok_for_arm (INTVAL (x)))
9201 /* A multiplication by a constant requires another instruction
9202 to load the constant to a register. */
9203 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9204 ? 1 : 0);
9205 else if (const_ok_for_arm (~INTVAL (x)))
9206 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9207 else if (const_ok_for_arm (-INTVAL (x)))
9209 if (outer_code == COMPARE || outer_code == PLUS
9210 || outer_code == MINUS)
9211 *total = 0;
9212 else
9213 *total = COSTS_N_INSNS (1);
9215 else
9216 *total = COSTS_N_INSNS (2);
9217 return true;
9219 case CONST:
9220 case LABEL_REF:
9221 case SYMBOL_REF:
9222 *total = COSTS_N_INSNS (2);
9223 return true;
9225 case CONST_DOUBLE:
9226 *total = COSTS_N_INSNS (4);
9227 return true;
9229 case CONST_VECTOR:
9230 if (TARGET_NEON
9231 && TARGET_HARD_FLOAT
9232 && outer_code == SET
9233 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9234 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9235 *total = COSTS_N_INSNS (1);
9236 else
9237 *total = COSTS_N_INSNS (4);
9238 return true;
9240 case HIGH:
9241 case LO_SUM:
9242 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9243 cost of these slightly. */
9244 *total = COSTS_N_INSNS (1) + 1;
9245 return true;
9247 case SET:
9248 return false;
9250 default:
9251 if (mode != VOIDmode)
9252 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9253 else
9254 *total = COSTS_N_INSNS (4); /* How knows? */
9255 return false;
9259 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9260 operand, then return the operand that is being shifted. If the shift
9261 is not by a constant, then set SHIFT_REG to point to the operand.
9262 Return NULL if OP is not a shifter operand. */
9263 static rtx
9264 shifter_op_p (rtx op, rtx *shift_reg)
9266 enum rtx_code code = GET_CODE (op);
9268 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9269 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9270 return XEXP (op, 0);
9271 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9272 return XEXP (op, 0);
9273 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9274 || code == ASHIFTRT)
9276 if (!CONST_INT_P (XEXP (op, 1)))
9277 *shift_reg = XEXP (op, 1);
9278 return XEXP (op, 0);
9281 return NULL;
9284 static bool
9285 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9287 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9288 gcc_assert (GET_CODE (x) == UNSPEC);
9290 switch (XINT (x, 1))
9292 case UNSPEC_UNALIGNED_LOAD:
9293 /* We can only do unaligned loads into the integer unit, and we can't
9294 use LDM or LDRD. */
9295 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9296 if (speed_p)
9297 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9298 + extra_cost->ldst.load_unaligned);
9300 #ifdef NOT_YET
9301 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9302 ADDR_SPACE_GENERIC, speed_p);
9303 #endif
9304 return true;
9306 case UNSPEC_UNALIGNED_STORE:
9307 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9308 if (speed_p)
9309 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9310 + extra_cost->ldst.store_unaligned);
9312 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9313 #ifdef NOT_YET
9314 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9315 ADDR_SPACE_GENERIC, speed_p);
9316 #endif
9317 return true;
9319 case UNSPEC_VRINTZ:
9320 case UNSPEC_VRINTP:
9321 case UNSPEC_VRINTM:
9322 case UNSPEC_VRINTR:
9323 case UNSPEC_VRINTX:
9324 case UNSPEC_VRINTA:
9325 *cost = COSTS_N_INSNS (1);
9326 if (speed_p)
9327 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9329 return true;
9330 default:
9331 *cost = COSTS_N_INSNS (2);
9332 break;
9334 return false;
9337 /* Cost of a libcall. We assume one insn per argument, an amount for the
9338 call (one insn for -Os) and then one for processing the result. */
9339 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9341 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9342 do \
9344 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9345 if (shift_op != NULL \
9346 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9348 if (shift_reg) \
9350 if (speed_p) \
9351 *cost += extra_cost->alu.arith_shift_reg; \
9352 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9354 else if (speed_p) \
9355 *cost += extra_cost->alu.arith_shift; \
9357 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9358 + rtx_cost (XEXP (x, 1 - IDX), \
9359 OP, 1, speed_p)); \
9360 return true; \
9363 while (0);
9365 /* RTX costs. Make an estimate of the cost of executing the operation
9366 X, which is contained with an operation with code OUTER_CODE.
9367 SPEED_P indicates whether the cost desired is the performance cost,
9368 or the size cost. The estimate is stored in COST and the return
9369 value is TRUE if the cost calculation is final, or FALSE if the
9370 caller should recurse through the operands of X to add additional
9371 costs.
9373 We currently make no attempt to model the size savings of Thumb-2
9374 16-bit instructions. At the normal points in compilation where
9375 this code is called we have no measure of whether the condition
9376 flags are live or not, and thus no realistic way to determine what
9377 the size will eventually be. */
9378 static bool
9379 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9380 const struct cpu_cost_table *extra_cost,
9381 int *cost, bool speed_p)
9383 machine_mode mode = GET_MODE (x);
9385 if (TARGET_THUMB1)
9387 if (speed_p)
9388 *cost = thumb1_rtx_costs (x, code, outer_code);
9389 else
9390 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9391 return true;
9394 switch (code)
9396 case SET:
9397 *cost = 0;
9398 /* SET RTXs don't have a mode so we get it from the destination. */
9399 mode = GET_MODE (SET_DEST (x));
9401 if (REG_P (SET_SRC (x))
9402 && REG_P (SET_DEST (x)))
9404 /* Assume that most copies can be done with a single insn,
9405 unless we don't have HW FP, in which case everything
9406 larger than word mode will require two insns. */
9407 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9408 && GET_MODE_SIZE (mode) > 4)
9409 || mode == DImode)
9410 ? 2 : 1);
9411 /* Conditional register moves can be encoded
9412 in 16 bits in Thumb mode. */
9413 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9414 *cost >>= 1;
9416 return true;
9419 if (CONST_INT_P (SET_SRC (x)))
9421 /* Handle CONST_INT here, since the value doesn't have a mode
9422 and we would otherwise be unable to work out the true cost. */
9423 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9424 outer_code = SET;
9425 /* Slightly lower the cost of setting a core reg to a constant.
9426 This helps break up chains and allows for better scheduling. */
9427 if (REG_P (SET_DEST (x))
9428 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9429 *cost -= 1;
9430 x = SET_SRC (x);
9431 /* Immediate moves with an immediate in the range [0, 255] can be
9432 encoded in 16 bits in Thumb mode. */
9433 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9434 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9435 *cost >>= 1;
9436 goto const_int_cost;
9439 return false;
9441 case MEM:
9442 /* A memory access costs 1 insn if the mode is small, or the address is
9443 a single register, otherwise it costs one insn per word. */
9444 if (REG_P (XEXP (x, 0)))
9445 *cost = COSTS_N_INSNS (1);
9446 else if (flag_pic
9447 && GET_CODE (XEXP (x, 0)) == PLUS
9448 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9449 /* This will be split into two instructions.
9450 See arm.md:calculate_pic_address. */
9451 *cost = COSTS_N_INSNS (2);
9452 else
9453 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9455 /* For speed optimizations, add the costs of the address and
9456 accessing memory. */
9457 if (speed_p)
9458 #ifdef NOT_YET
9459 *cost += (extra_cost->ldst.load
9460 + arm_address_cost (XEXP (x, 0), mode,
9461 ADDR_SPACE_GENERIC, speed_p));
9462 #else
9463 *cost += extra_cost->ldst.load;
9464 #endif
9465 return true;
9467 case PARALLEL:
9469 /* Calculations of LDM costs are complex. We assume an initial cost
9470 (ldm_1st) which will load the number of registers mentioned in
9471 ldm_regs_per_insn_1st registers; then each additional
9472 ldm_regs_per_insn_subsequent registers cost one more insn. The
9473 formula for N regs is thus:
9475 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9476 + ldm_regs_per_insn_subsequent - 1)
9477 / ldm_regs_per_insn_subsequent).
9479 Additional costs may also be added for addressing. A similar
9480 formula is used for STM. */
9482 bool is_ldm = load_multiple_operation (x, SImode);
9483 bool is_stm = store_multiple_operation (x, SImode);
9485 *cost = COSTS_N_INSNS (1);
9487 if (is_ldm || is_stm)
9489 if (speed_p)
9491 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9492 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9493 ? extra_cost->ldst.ldm_regs_per_insn_1st
9494 : extra_cost->ldst.stm_regs_per_insn_1st;
9495 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9496 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9497 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9499 *cost += regs_per_insn_1st
9500 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9501 + regs_per_insn_sub - 1)
9502 / regs_per_insn_sub);
9503 return true;
9507 return false;
9509 case DIV:
9510 case UDIV:
9511 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9512 && (mode == SFmode || !TARGET_VFP_SINGLE))
9513 *cost = COSTS_N_INSNS (speed_p
9514 ? extra_cost->fp[mode != SFmode].div : 1);
9515 else if (mode == SImode && TARGET_IDIV)
9516 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9517 else
9518 *cost = LIBCALL_COST (2);
9519 return false; /* All arguments must be in registers. */
9521 case MOD:
9522 case UMOD:
9523 *cost = LIBCALL_COST (2);
9524 return false; /* All arguments must be in registers. */
9526 case ROTATE:
9527 if (mode == SImode && REG_P (XEXP (x, 1)))
9529 *cost = (COSTS_N_INSNS (2)
9530 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9531 if (speed_p)
9532 *cost += extra_cost->alu.shift_reg;
9533 return true;
9535 /* Fall through */
9536 case ROTATERT:
9537 case ASHIFT:
9538 case LSHIFTRT:
9539 case ASHIFTRT:
9540 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9542 *cost = (COSTS_N_INSNS (3)
9543 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9544 if (speed_p)
9545 *cost += 2 * extra_cost->alu.shift;
9546 return true;
9548 else if (mode == SImode)
9550 *cost = (COSTS_N_INSNS (1)
9551 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9552 /* Slightly disparage register shifts at -Os, but not by much. */
9553 if (!CONST_INT_P (XEXP (x, 1)))
9554 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9555 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9556 return true;
9558 else if (GET_MODE_CLASS (mode) == MODE_INT
9559 && GET_MODE_SIZE (mode) < 4)
9561 if (code == ASHIFT)
9563 *cost = (COSTS_N_INSNS (1)
9564 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9565 /* Slightly disparage register shifts at -Os, but not by
9566 much. */
9567 if (!CONST_INT_P (XEXP (x, 1)))
9568 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9569 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9571 else if (code == LSHIFTRT || code == ASHIFTRT)
9573 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9575 /* Can use SBFX/UBFX. */
9576 *cost = COSTS_N_INSNS (1);
9577 if (speed_p)
9578 *cost += extra_cost->alu.bfx;
9579 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9581 else
9583 *cost = COSTS_N_INSNS (2);
9584 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9585 if (speed_p)
9587 if (CONST_INT_P (XEXP (x, 1)))
9588 *cost += 2 * extra_cost->alu.shift;
9589 else
9590 *cost += (extra_cost->alu.shift
9591 + extra_cost->alu.shift_reg);
9593 else
9594 /* Slightly disparage register shifts. */
9595 *cost += !CONST_INT_P (XEXP (x, 1));
9598 else /* Rotates. */
9600 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9601 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9602 if (speed_p)
9604 if (CONST_INT_P (XEXP (x, 1)))
9605 *cost += (2 * extra_cost->alu.shift
9606 + extra_cost->alu.log_shift);
9607 else
9608 *cost += (extra_cost->alu.shift
9609 + extra_cost->alu.shift_reg
9610 + extra_cost->alu.log_shift_reg);
9613 return true;
9616 *cost = LIBCALL_COST (2);
9617 return false;
9619 case BSWAP:
9620 if (arm_arch6)
9622 if (mode == SImode)
9624 *cost = COSTS_N_INSNS (1);
9625 if (speed_p)
9626 *cost += extra_cost->alu.rev;
9628 return false;
9631 else
9633 /* No rev instruction available. Look at arm_legacy_rev
9634 and thumb_legacy_rev for the form of RTL used then. */
9635 if (TARGET_THUMB)
9637 *cost = COSTS_N_INSNS (10);
9639 if (speed_p)
9641 *cost += 6 * extra_cost->alu.shift;
9642 *cost += 3 * extra_cost->alu.logical;
9645 else
9647 *cost = COSTS_N_INSNS (5);
9649 if (speed_p)
9651 *cost += 2 * extra_cost->alu.shift;
9652 *cost += extra_cost->alu.arith_shift;
9653 *cost += 2 * extra_cost->alu.logical;
9656 return true;
9658 return false;
9660 case MINUS:
9661 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9662 && (mode == SFmode || !TARGET_VFP_SINGLE))
9664 *cost = COSTS_N_INSNS (1);
9665 if (GET_CODE (XEXP (x, 0)) == MULT
9666 || GET_CODE (XEXP (x, 1)) == MULT)
9668 rtx mul_op0, mul_op1, sub_op;
9670 if (speed_p)
9671 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9673 if (GET_CODE (XEXP (x, 0)) == MULT)
9675 mul_op0 = XEXP (XEXP (x, 0), 0);
9676 mul_op1 = XEXP (XEXP (x, 0), 1);
9677 sub_op = XEXP (x, 1);
9679 else
9681 mul_op0 = XEXP (XEXP (x, 1), 0);
9682 mul_op1 = XEXP (XEXP (x, 1), 1);
9683 sub_op = XEXP (x, 0);
9686 /* The first operand of the multiply may be optionally
9687 negated. */
9688 if (GET_CODE (mul_op0) == NEG)
9689 mul_op0 = XEXP (mul_op0, 0);
9691 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9692 + rtx_cost (mul_op1, code, 0, speed_p)
9693 + rtx_cost (sub_op, code, 0, speed_p));
9695 return true;
9698 if (speed_p)
9699 *cost += extra_cost->fp[mode != SFmode].addsub;
9700 return false;
9703 if (mode == SImode)
9705 rtx shift_by_reg = NULL;
9706 rtx shift_op;
9707 rtx non_shift_op;
9709 *cost = COSTS_N_INSNS (1);
9711 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9712 if (shift_op == NULL)
9714 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9715 non_shift_op = XEXP (x, 0);
9717 else
9718 non_shift_op = XEXP (x, 1);
9720 if (shift_op != NULL)
9722 if (shift_by_reg != NULL)
9724 if (speed_p)
9725 *cost += extra_cost->alu.arith_shift_reg;
9726 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9728 else if (speed_p)
9729 *cost += extra_cost->alu.arith_shift;
9731 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9732 + rtx_cost (non_shift_op, code, 0, speed_p));
9733 return true;
9736 if (arm_arch_thumb2
9737 && GET_CODE (XEXP (x, 1)) == MULT)
9739 /* MLS. */
9740 if (speed_p)
9741 *cost += extra_cost->mult[0].add;
9742 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9743 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9744 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9745 return true;
9748 if (CONST_INT_P (XEXP (x, 0)))
9750 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9751 INTVAL (XEXP (x, 0)), NULL_RTX,
9752 NULL_RTX, 1, 0);
9753 *cost = COSTS_N_INSNS (insns);
9754 if (speed_p)
9755 *cost += insns * extra_cost->alu.arith;
9756 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9757 return true;
9759 else if (speed_p)
9760 *cost += extra_cost->alu.arith;
9762 return false;
9765 if (GET_MODE_CLASS (mode) == MODE_INT
9766 && GET_MODE_SIZE (mode) < 4)
9768 rtx shift_op, shift_reg;
9769 shift_reg = NULL;
9771 /* We check both sides of the MINUS for shifter operands since,
9772 unlike PLUS, it's not commutative. */
9774 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9775 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9777 /* Slightly disparage, as we might need to widen the result. */
9778 *cost = 1 + COSTS_N_INSNS (1);
9779 if (speed_p)
9780 *cost += extra_cost->alu.arith;
9782 if (CONST_INT_P (XEXP (x, 0)))
9784 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9785 return true;
9788 return false;
9791 if (mode == DImode)
9793 *cost = COSTS_N_INSNS (2);
9795 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9797 rtx op1 = XEXP (x, 1);
9799 if (speed_p)
9800 *cost += 2 * extra_cost->alu.arith;
9802 if (GET_CODE (op1) == ZERO_EXTEND)
9803 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9804 else
9805 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9806 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9807 0, speed_p);
9808 return true;
9810 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9812 if (speed_p)
9813 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9814 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9815 0, speed_p)
9816 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9817 return true;
9819 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9820 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9822 if (speed_p)
9823 *cost += (extra_cost->alu.arith
9824 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9825 ? extra_cost->alu.arith
9826 : extra_cost->alu.arith_shift));
9827 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9828 + rtx_cost (XEXP (XEXP (x, 1), 0),
9829 GET_CODE (XEXP (x, 1)), 0, speed_p));
9830 return true;
9833 if (speed_p)
9834 *cost += 2 * extra_cost->alu.arith;
9835 return false;
9838 /* Vector mode? */
9840 *cost = LIBCALL_COST (2);
9841 return false;
9843 case PLUS:
9844 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9845 && (mode == SFmode || !TARGET_VFP_SINGLE))
9847 *cost = COSTS_N_INSNS (1);
9848 if (GET_CODE (XEXP (x, 0)) == MULT)
9850 rtx mul_op0, mul_op1, add_op;
9852 if (speed_p)
9853 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9855 mul_op0 = XEXP (XEXP (x, 0), 0);
9856 mul_op1 = XEXP (XEXP (x, 0), 1);
9857 add_op = XEXP (x, 1);
9859 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9860 + rtx_cost (mul_op1, code, 0, speed_p)
9861 + rtx_cost (add_op, code, 0, speed_p));
9863 return true;
9866 if (speed_p)
9867 *cost += extra_cost->fp[mode != SFmode].addsub;
9868 return false;
9870 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9872 *cost = LIBCALL_COST (2);
9873 return false;
9876 /* Narrow modes can be synthesized in SImode, but the range
9877 of useful sub-operations is limited. Check for shift operations
9878 on one of the operands. Only left shifts can be used in the
9879 narrow modes. */
9880 if (GET_MODE_CLASS (mode) == MODE_INT
9881 && GET_MODE_SIZE (mode) < 4)
9883 rtx shift_op, shift_reg;
9884 shift_reg = NULL;
9886 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9888 if (CONST_INT_P (XEXP (x, 1)))
9890 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9891 INTVAL (XEXP (x, 1)), NULL_RTX,
9892 NULL_RTX, 1, 0);
9893 *cost = COSTS_N_INSNS (insns);
9894 if (speed_p)
9895 *cost += insns * extra_cost->alu.arith;
9896 /* Slightly penalize a narrow operation as the result may
9897 need widening. */
9898 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9899 return true;
9902 /* Slightly penalize a narrow operation as the result may
9903 need widening. */
9904 *cost = 1 + COSTS_N_INSNS (1);
9905 if (speed_p)
9906 *cost += extra_cost->alu.arith;
9908 return false;
9911 if (mode == SImode)
9913 rtx shift_op, shift_reg;
9915 *cost = COSTS_N_INSNS (1);
9916 if (TARGET_INT_SIMD
9917 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9918 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9920 /* UXTA[BH] or SXTA[BH]. */
9921 if (speed_p)
9922 *cost += extra_cost->alu.extend_arith;
9923 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9924 speed_p)
9925 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9926 return true;
9929 shift_reg = NULL;
9930 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9931 if (shift_op != NULL)
9933 if (shift_reg)
9935 if (speed_p)
9936 *cost += extra_cost->alu.arith_shift_reg;
9937 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9939 else if (speed_p)
9940 *cost += extra_cost->alu.arith_shift;
9942 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9943 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9944 return true;
9946 if (GET_CODE (XEXP (x, 0)) == MULT)
9948 rtx mul_op = XEXP (x, 0);
9950 *cost = COSTS_N_INSNS (1);
9952 if (TARGET_DSP_MULTIPLY
9953 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9954 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9955 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9956 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9957 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9958 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9959 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9960 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9961 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9962 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9963 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9964 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9965 == 16))))))
9967 /* SMLA[BT][BT]. */
9968 if (speed_p)
9969 *cost += extra_cost->mult[0].extend_add;
9970 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9971 SIGN_EXTEND, 0, speed_p)
9972 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9973 SIGN_EXTEND, 0, speed_p)
9974 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9975 return true;
9978 if (speed_p)
9979 *cost += extra_cost->mult[0].add;
9980 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9981 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9982 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9983 return true;
9985 if (CONST_INT_P (XEXP (x, 1)))
9987 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9988 INTVAL (XEXP (x, 1)), NULL_RTX,
9989 NULL_RTX, 1, 0);
9990 *cost = COSTS_N_INSNS (insns);
9991 if (speed_p)
9992 *cost += insns * extra_cost->alu.arith;
9993 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9994 return true;
9996 else if (speed_p)
9997 *cost += extra_cost->alu.arith;
9999 return false;
10002 if (mode == DImode)
10004 if (arm_arch3m
10005 && GET_CODE (XEXP (x, 0)) == MULT
10006 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10007 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10008 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10009 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10011 *cost = COSTS_N_INSNS (1);
10012 if (speed_p)
10013 *cost += extra_cost->mult[1].extend_add;
10014 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10015 ZERO_EXTEND, 0, speed_p)
10016 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10017 ZERO_EXTEND, 0, speed_p)
10018 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10019 return true;
10022 *cost = COSTS_N_INSNS (2);
10024 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10025 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10027 if (speed_p)
10028 *cost += (extra_cost->alu.arith
10029 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10030 ? extra_cost->alu.arith
10031 : extra_cost->alu.arith_shift));
10033 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10034 speed_p)
10035 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036 return true;
10039 if (speed_p)
10040 *cost += 2 * extra_cost->alu.arith;
10041 return false;
10044 /* Vector mode? */
10045 *cost = LIBCALL_COST (2);
10046 return false;
10047 case IOR:
10048 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10050 *cost = COSTS_N_INSNS (1);
10051 if (speed_p)
10052 *cost += extra_cost->alu.rev;
10054 return true;
10056 /* Fall through. */
10057 case AND: case XOR:
10058 if (mode == SImode)
10060 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10061 rtx op0 = XEXP (x, 0);
10062 rtx shift_op, shift_reg;
10064 *cost = COSTS_N_INSNS (1);
10066 if (subcode == NOT
10067 && (code == AND
10068 || (code == IOR && TARGET_THUMB2)))
10069 op0 = XEXP (op0, 0);
10071 shift_reg = NULL;
10072 shift_op = shifter_op_p (op0, &shift_reg);
10073 if (shift_op != NULL)
10075 if (shift_reg)
10077 if (speed_p)
10078 *cost += extra_cost->alu.log_shift_reg;
10079 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10081 else if (speed_p)
10082 *cost += extra_cost->alu.log_shift;
10084 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10085 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10086 return true;
10089 if (CONST_INT_P (XEXP (x, 1)))
10091 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10092 INTVAL (XEXP (x, 1)), NULL_RTX,
10093 NULL_RTX, 1, 0);
10095 *cost = COSTS_N_INSNS (insns);
10096 if (speed_p)
10097 *cost += insns * extra_cost->alu.logical;
10098 *cost += rtx_cost (op0, code, 0, speed_p);
10099 return true;
10102 if (speed_p)
10103 *cost += extra_cost->alu.logical;
10104 *cost += (rtx_cost (op0, code, 0, speed_p)
10105 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10106 return true;
10109 if (mode == DImode)
10111 rtx op0 = XEXP (x, 0);
10112 enum rtx_code subcode = GET_CODE (op0);
10114 *cost = COSTS_N_INSNS (2);
10116 if (subcode == NOT
10117 && (code == AND
10118 || (code == IOR && TARGET_THUMB2)))
10119 op0 = XEXP (op0, 0);
10121 if (GET_CODE (op0) == ZERO_EXTEND)
10123 if (speed_p)
10124 *cost += 2 * extra_cost->alu.logical;
10126 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10127 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10128 return true;
10130 else if (GET_CODE (op0) == SIGN_EXTEND)
10132 if (speed_p)
10133 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10135 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10136 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10137 return true;
10140 if (speed_p)
10141 *cost += 2 * extra_cost->alu.logical;
10143 return true;
10145 /* Vector mode? */
10147 *cost = LIBCALL_COST (2);
10148 return false;
10150 case MULT:
10151 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10152 && (mode == SFmode || !TARGET_VFP_SINGLE))
10154 rtx op0 = XEXP (x, 0);
10156 *cost = COSTS_N_INSNS (1);
10158 if (GET_CODE (op0) == NEG)
10159 op0 = XEXP (op0, 0);
10161 if (speed_p)
10162 *cost += extra_cost->fp[mode != SFmode].mult;
10164 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10165 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10166 return true;
10168 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10170 *cost = LIBCALL_COST (2);
10171 return false;
10174 if (mode == SImode)
10176 *cost = COSTS_N_INSNS (1);
10177 if (TARGET_DSP_MULTIPLY
10178 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10179 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10180 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10181 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10182 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10183 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10184 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10185 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10186 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10187 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10188 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10189 && (INTVAL (XEXP (XEXP (x, 1), 1))
10190 == 16))))))
10192 /* SMUL[TB][TB]. */
10193 if (speed_p)
10194 *cost += extra_cost->mult[0].extend;
10195 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10196 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10197 return true;
10199 if (speed_p)
10200 *cost += extra_cost->mult[0].simple;
10201 return false;
10204 if (mode == DImode)
10206 if (arm_arch3m
10207 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10208 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10209 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10210 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10212 *cost = COSTS_N_INSNS (1);
10213 if (speed_p)
10214 *cost += extra_cost->mult[1].extend;
10215 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10216 ZERO_EXTEND, 0, speed_p)
10217 + rtx_cost (XEXP (XEXP (x, 1), 0),
10218 ZERO_EXTEND, 0, speed_p));
10219 return true;
10222 *cost = LIBCALL_COST (2);
10223 return false;
10226 /* Vector mode? */
10227 *cost = LIBCALL_COST (2);
10228 return false;
10230 case NEG:
10231 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10232 && (mode == SFmode || !TARGET_VFP_SINGLE))
10234 *cost = COSTS_N_INSNS (1);
10235 if (speed_p)
10236 *cost += extra_cost->fp[mode != SFmode].neg;
10238 return false;
10240 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10242 *cost = LIBCALL_COST (1);
10243 return false;
10246 if (mode == SImode)
10248 if (GET_CODE (XEXP (x, 0)) == ABS)
10250 *cost = COSTS_N_INSNS (2);
10251 /* Assume the non-flag-changing variant. */
10252 if (speed_p)
10253 *cost += (extra_cost->alu.log_shift
10254 + extra_cost->alu.arith_shift);
10255 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10256 return true;
10259 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10260 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10262 *cost = COSTS_N_INSNS (2);
10263 /* No extra cost for MOV imm and MVN imm. */
10264 /* If the comparison op is using the flags, there's no further
10265 cost, otherwise we need to add the cost of the comparison. */
10266 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10267 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10268 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10270 *cost += (COSTS_N_INSNS (1)
10271 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10272 speed_p)
10273 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10274 speed_p));
10275 if (speed_p)
10276 *cost += extra_cost->alu.arith;
10278 return true;
10280 *cost = COSTS_N_INSNS (1);
10281 if (speed_p)
10282 *cost += extra_cost->alu.arith;
10283 return false;
10286 if (GET_MODE_CLASS (mode) == MODE_INT
10287 && GET_MODE_SIZE (mode) < 4)
10289 /* Slightly disparage, as we might need an extend operation. */
10290 *cost = 1 + COSTS_N_INSNS (1);
10291 if (speed_p)
10292 *cost += extra_cost->alu.arith;
10293 return false;
10296 if (mode == DImode)
10298 *cost = COSTS_N_INSNS (2);
10299 if (speed_p)
10300 *cost += 2 * extra_cost->alu.arith;
10301 return false;
10304 /* Vector mode? */
10305 *cost = LIBCALL_COST (1);
10306 return false;
10308 case NOT:
10309 if (mode == SImode)
10311 rtx shift_op;
10312 rtx shift_reg = NULL;
10314 *cost = COSTS_N_INSNS (1);
10315 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10317 if (shift_op)
10319 if (shift_reg != NULL)
10321 if (speed_p)
10322 *cost += extra_cost->alu.log_shift_reg;
10323 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10325 else if (speed_p)
10326 *cost += extra_cost->alu.log_shift;
10327 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10328 return true;
10331 if (speed_p)
10332 *cost += extra_cost->alu.logical;
10333 return false;
10335 if (mode == DImode)
10337 *cost = COSTS_N_INSNS (2);
10338 return false;
10341 /* Vector mode? */
10343 *cost += LIBCALL_COST (1);
10344 return false;
10346 case IF_THEN_ELSE:
10348 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10350 *cost = COSTS_N_INSNS (4);
10351 return true;
10353 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10354 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10356 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10357 /* Assume that if one arm of the if_then_else is a register,
10358 that it will be tied with the result and eliminate the
10359 conditional insn. */
10360 if (REG_P (XEXP (x, 1)))
10361 *cost += op2cost;
10362 else if (REG_P (XEXP (x, 2)))
10363 *cost += op1cost;
10364 else
10366 if (speed_p)
10368 if (extra_cost->alu.non_exec_costs_exec)
10369 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10370 else
10371 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10373 else
10374 *cost += op1cost + op2cost;
10377 return true;
10379 case COMPARE:
10380 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10381 *cost = 0;
10382 else
10384 machine_mode op0mode;
10385 /* We'll mostly assume that the cost of a compare is the cost of the
10386 LHS. However, there are some notable exceptions. */
10388 /* Floating point compares are never done as side-effects. */
10389 op0mode = GET_MODE (XEXP (x, 0));
10390 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10391 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10393 *cost = COSTS_N_INSNS (1);
10394 if (speed_p)
10395 *cost += extra_cost->fp[op0mode != SFmode].compare;
10397 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10399 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10400 return true;
10403 return false;
10405 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10407 *cost = LIBCALL_COST (2);
10408 return false;
10411 /* DImode compares normally take two insns. */
10412 if (op0mode == DImode)
10414 *cost = COSTS_N_INSNS (2);
10415 if (speed_p)
10416 *cost += 2 * extra_cost->alu.arith;
10417 return false;
10420 if (op0mode == SImode)
10422 rtx shift_op;
10423 rtx shift_reg;
10425 if (XEXP (x, 1) == const0_rtx
10426 && !(REG_P (XEXP (x, 0))
10427 || (GET_CODE (XEXP (x, 0)) == SUBREG
10428 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10430 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10432 /* Multiply operations that set the flags are often
10433 significantly more expensive. */
10434 if (speed_p
10435 && GET_CODE (XEXP (x, 0)) == MULT
10436 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10437 *cost += extra_cost->mult[0].flag_setting;
10439 if (speed_p
10440 && GET_CODE (XEXP (x, 0)) == PLUS
10441 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10442 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10443 0), 1), mode))
10444 *cost += extra_cost->mult[0].flag_setting;
10445 return true;
10448 shift_reg = NULL;
10449 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10450 if (shift_op != NULL)
10452 *cost = COSTS_N_INSNS (1);
10453 if (shift_reg != NULL)
10455 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10456 if (speed_p)
10457 *cost += extra_cost->alu.arith_shift_reg;
10459 else if (speed_p)
10460 *cost += extra_cost->alu.arith_shift;
10461 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10462 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10463 return true;
10466 *cost = COSTS_N_INSNS (1);
10467 if (speed_p)
10468 *cost += extra_cost->alu.arith;
10469 if (CONST_INT_P (XEXP (x, 1))
10470 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10472 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10473 return true;
10475 return false;
10478 /* Vector mode? */
10480 *cost = LIBCALL_COST (2);
10481 return false;
10483 return true;
10485 case EQ:
10486 case NE:
10487 case LT:
10488 case LE:
10489 case GT:
10490 case GE:
10491 case LTU:
10492 case LEU:
10493 case GEU:
10494 case GTU:
10495 case ORDERED:
10496 case UNORDERED:
10497 case UNEQ:
10498 case UNLE:
10499 case UNLT:
10500 case UNGE:
10501 case UNGT:
10502 case LTGT:
10503 if (outer_code == SET)
10505 /* Is it a store-flag operation? */
10506 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10507 && XEXP (x, 1) == const0_rtx)
10509 /* Thumb also needs an IT insn. */
10510 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10511 return true;
10513 if (XEXP (x, 1) == const0_rtx)
10515 switch (code)
10517 case LT:
10518 /* LSR Rd, Rn, #31. */
10519 *cost = COSTS_N_INSNS (1);
10520 if (speed_p)
10521 *cost += extra_cost->alu.shift;
10522 break;
10524 case EQ:
10525 /* RSBS T1, Rn, #0
10526 ADC Rd, Rn, T1. */
10528 case NE:
10529 /* SUBS T1, Rn, #1
10530 SBC Rd, Rn, T1. */
10531 *cost = COSTS_N_INSNS (2);
10532 break;
10534 case LE:
10535 /* RSBS T1, Rn, Rn, LSR #31
10536 ADC Rd, Rn, T1. */
10537 *cost = COSTS_N_INSNS (2);
10538 if (speed_p)
10539 *cost += extra_cost->alu.arith_shift;
10540 break;
10542 case GT:
10543 /* RSB Rd, Rn, Rn, ASR #1
10544 LSR Rd, Rd, #31. */
10545 *cost = COSTS_N_INSNS (2);
10546 if (speed_p)
10547 *cost += (extra_cost->alu.arith_shift
10548 + extra_cost->alu.shift);
10549 break;
10551 case GE:
10552 /* ASR Rd, Rn, #31
10553 ADD Rd, Rn, #1. */
10554 *cost = COSTS_N_INSNS (2);
10555 if (speed_p)
10556 *cost += extra_cost->alu.shift;
10557 break;
10559 default:
10560 /* Remaining cases are either meaningless or would take
10561 three insns anyway. */
10562 *cost = COSTS_N_INSNS (3);
10563 break;
10565 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10566 return true;
10568 else
10570 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10571 if (CONST_INT_P (XEXP (x, 1))
10572 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10574 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10575 return true;
10578 return false;
10581 /* Not directly inside a set. If it involves the condition code
10582 register it must be the condition for a branch, cond_exec or
10583 I_T_E operation. Since the comparison is performed elsewhere
10584 this is just the control part which has no additional
10585 cost. */
10586 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10587 && XEXP (x, 1) == const0_rtx)
10589 *cost = 0;
10590 return true;
10592 return false;
10594 case ABS:
10595 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10596 && (mode == SFmode || !TARGET_VFP_SINGLE))
10598 *cost = COSTS_N_INSNS (1);
10599 if (speed_p)
10600 *cost += extra_cost->fp[mode != SFmode].neg;
10602 return false;
10604 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10606 *cost = LIBCALL_COST (1);
10607 return false;
10610 if (mode == SImode)
10612 *cost = COSTS_N_INSNS (1);
10613 if (speed_p)
10614 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10615 return false;
10617 /* Vector mode? */
10618 *cost = LIBCALL_COST (1);
10619 return false;
10621 case SIGN_EXTEND:
10622 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10623 && MEM_P (XEXP (x, 0)))
10625 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10627 if (mode == DImode)
10628 *cost += COSTS_N_INSNS (1);
10630 if (!speed_p)
10631 return true;
10633 if (GET_MODE (XEXP (x, 0)) == SImode)
10634 *cost += extra_cost->ldst.load;
10635 else
10636 *cost += extra_cost->ldst.load_sign_extend;
10638 if (mode == DImode)
10639 *cost += extra_cost->alu.shift;
10641 return true;
10644 /* Widening from less than 32-bits requires an extend operation. */
10645 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10647 /* We have SXTB/SXTH. */
10648 *cost = COSTS_N_INSNS (1);
10649 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10650 if (speed_p)
10651 *cost += extra_cost->alu.extend;
10653 else if (GET_MODE (XEXP (x, 0)) != SImode)
10655 /* Needs two shifts. */
10656 *cost = COSTS_N_INSNS (2);
10657 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10658 if (speed_p)
10659 *cost += 2 * extra_cost->alu.shift;
10662 /* Widening beyond 32-bits requires one more insn. */
10663 if (mode == DImode)
10665 *cost += COSTS_N_INSNS (1);
10666 if (speed_p)
10667 *cost += extra_cost->alu.shift;
10670 return true;
10672 case ZERO_EXTEND:
10673 if ((arm_arch4
10674 || GET_MODE (XEXP (x, 0)) == SImode
10675 || GET_MODE (XEXP (x, 0)) == QImode)
10676 && MEM_P (XEXP (x, 0)))
10678 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10680 if (mode == DImode)
10681 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10683 return true;
10686 /* Widening from less than 32-bits requires an extend operation. */
10687 if (GET_MODE (XEXP (x, 0)) == QImode)
10689 /* UXTB can be a shorter instruction in Thumb2, but it might
10690 be slower than the AND Rd, Rn, #255 alternative. When
10691 optimizing for speed it should never be slower to use
10692 AND, and we don't really model 16-bit vs 32-bit insns
10693 here. */
10694 *cost = COSTS_N_INSNS (1);
10695 if (speed_p)
10696 *cost += extra_cost->alu.logical;
10698 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10700 /* We have UXTB/UXTH. */
10701 *cost = COSTS_N_INSNS (1);
10702 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10703 if (speed_p)
10704 *cost += extra_cost->alu.extend;
10706 else if (GET_MODE (XEXP (x, 0)) != SImode)
10708 /* Needs two shifts. It's marginally preferable to use
10709 shifts rather than two BIC instructions as the second
10710 shift may merge with a subsequent insn as a shifter
10711 op. */
10712 *cost = COSTS_N_INSNS (2);
10713 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10714 if (speed_p)
10715 *cost += 2 * extra_cost->alu.shift;
10717 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10718 *cost = COSTS_N_INSNS (1);
10720 /* Widening beyond 32-bits requires one more insn. */
10721 if (mode == DImode)
10723 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10726 return true;
10728 case CONST_INT:
10729 *cost = 0;
10730 /* CONST_INT has no mode, so we cannot tell for sure how many
10731 insns are really going to be needed. The best we can do is
10732 look at the value passed. If it fits in SImode, then assume
10733 that's the mode it will be used for. Otherwise assume it
10734 will be used in DImode. */
10735 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10736 mode = SImode;
10737 else
10738 mode = DImode;
10740 /* Avoid blowing up in arm_gen_constant (). */
10741 if (!(outer_code == PLUS
10742 || outer_code == AND
10743 || outer_code == IOR
10744 || outer_code == XOR
10745 || outer_code == MINUS))
10746 outer_code = SET;
10748 const_int_cost:
10749 if (mode == SImode)
10751 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10752 INTVAL (x), NULL, NULL,
10753 0, 0));
10754 /* Extra costs? */
10756 else
10758 *cost += COSTS_N_INSNS (arm_gen_constant
10759 (outer_code, SImode, NULL,
10760 trunc_int_for_mode (INTVAL (x), SImode),
10761 NULL, NULL, 0, 0)
10762 + arm_gen_constant (outer_code, SImode, NULL,
10763 INTVAL (x) >> 32, NULL,
10764 NULL, 0, 0));
10765 /* Extra costs? */
10768 return true;
10770 case CONST:
10771 case LABEL_REF:
10772 case SYMBOL_REF:
10773 if (speed_p)
10775 if (arm_arch_thumb2 && !flag_pic)
10776 *cost = COSTS_N_INSNS (2);
10777 else
10778 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10780 else
10781 *cost = COSTS_N_INSNS (2);
10783 if (flag_pic)
10785 *cost += COSTS_N_INSNS (1);
10786 if (speed_p)
10787 *cost += extra_cost->alu.arith;
10790 return true;
10792 case CONST_FIXED:
10793 *cost = COSTS_N_INSNS (4);
10794 /* Fixme. */
10795 return true;
10797 case CONST_DOUBLE:
10798 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10799 && (mode == SFmode || !TARGET_VFP_SINGLE))
10801 if (vfp3_const_double_rtx (x))
10803 *cost = COSTS_N_INSNS (1);
10804 if (speed_p)
10805 *cost += extra_cost->fp[mode == DFmode].fpconst;
10806 return true;
10809 if (speed_p)
10811 *cost = COSTS_N_INSNS (1);
10812 if (mode == DFmode)
10813 *cost += extra_cost->ldst.loadd;
10814 else
10815 *cost += extra_cost->ldst.loadf;
10817 else
10818 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10820 return true;
10822 *cost = COSTS_N_INSNS (4);
10823 return true;
10825 case CONST_VECTOR:
10826 /* Fixme. */
10827 if (TARGET_NEON
10828 && TARGET_HARD_FLOAT
10829 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10830 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10831 *cost = COSTS_N_INSNS (1);
10832 else
10833 *cost = COSTS_N_INSNS (4);
10834 return true;
10836 case HIGH:
10837 case LO_SUM:
10838 *cost = COSTS_N_INSNS (1);
10839 /* When optimizing for size, we prefer constant pool entries to
10840 MOVW/MOVT pairs, so bump the cost of these slightly. */
10841 if (!speed_p)
10842 *cost += 1;
10843 return true;
10845 case CLZ:
10846 *cost = COSTS_N_INSNS (1);
10847 if (speed_p)
10848 *cost += extra_cost->alu.clz;
10849 return false;
10851 case SMIN:
10852 if (XEXP (x, 1) == const0_rtx)
10854 *cost = COSTS_N_INSNS (1);
10855 if (speed_p)
10856 *cost += extra_cost->alu.log_shift;
10857 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10858 return true;
10860 /* Fall through. */
10861 case SMAX:
10862 case UMIN:
10863 case UMAX:
10864 *cost = COSTS_N_INSNS (2);
10865 return false;
10867 case TRUNCATE:
10868 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10869 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10870 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10871 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10872 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10873 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10874 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10875 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10876 == ZERO_EXTEND))))
10878 *cost = COSTS_N_INSNS (1);
10879 if (speed_p)
10880 *cost += extra_cost->mult[1].extend;
10881 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10882 speed_p)
10883 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10884 0, speed_p));
10885 return true;
10887 *cost = LIBCALL_COST (1);
10888 return false;
10890 case UNSPEC:
10891 return arm_unspec_cost (x, outer_code, speed_p, cost);
10893 case PC:
10894 /* Reading the PC is like reading any other register. Writing it
10895 is more expensive, but we take that into account elsewhere. */
10896 *cost = 0;
10897 return true;
10899 case ZERO_EXTRACT:
10900 /* TODO: Simple zero_extract of bottom bits using AND. */
10901 /* Fall through. */
10902 case SIGN_EXTRACT:
10903 if (arm_arch6
10904 && mode == SImode
10905 && CONST_INT_P (XEXP (x, 1))
10906 && CONST_INT_P (XEXP (x, 2)))
10908 *cost = COSTS_N_INSNS (1);
10909 if (speed_p)
10910 *cost += extra_cost->alu.bfx;
10911 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10912 return true;
10914 /* Without UBFX/SBFX, need to resort to shift operations. */
10915 *cost = COSTS_N_INSNS (2);
10916 if (speed_p)
10917 *cost += 2 * extra_cost->alu.shift;
10918 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10919 return true;
10921 case FLOAT_EXTEND:
10922 if (TARGET_HARD_FLOAT)
10924 *cost = COSTS_N_INSNS (1);
10925 if (speed_p)
10926 *cost += extra_cost->fp[mode == DFmode].widen;
10927 if (!TARGET_FPU_ARMV8
10928 && GET_MODE (XEXP (x, 0)) == HFmode)
10930 /* Pre v8, widening HF->DF is a two-step process, first
10931 widening to SFmode. */
10932 *cost += COSTS_N_INSNS (1);
10933 if (speed_p)
10934 *cost += extra_cost->fp[0].widen;
10936 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10937 return true;
10940 *cost = LIBCALL_COST (1);
10941 return false;
10943 case FLOAT_TRUNCATE:
10944 if (TARGET_HARD_FLOAT)
10946 *cost = COSTS_N_INSNS (1);
10947 if (speed_p)
10948 *cost += extra_cost->fp[mode == DFmode].narrow;
10949 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10950 return true;
10951 /* Vector modes? */
10953 *cost = LIBCALL_COST (1);
10954 return false;
10956 case FMA:
10957 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10959 rtx op0 = XEXP (x, 0);
10960 rtx op1 = XEXP (x, 1);
10961 rtx op2 = XEXP (x, 2);
10963 *cost = COSTS_N_INSNS (1);
10965 /* vfms or vfnma. */
10966 if (GET_CODE (op0) == NEG)
10967 op0 = XEXP (op0, 0);
10969 /* vfnms or vfnma. */
10970 if (GET_CODE (op2) == NEG)
10971 op2 = XEXP (op2, 0);
10973 *cost += rtx_cost (op0, FMA, 0, speed_p);
10974 *cost += rtx_cost (op1, FMA, 1, speed_p);
10975 *cost += rtx_cost (op2, FMA, 2, speed_p);
10977 if (speed_p)
10978 *cost += extra_cost->fp[mode ==DFmode].fma;
10980 return true;
10983 *cost = LIBCALL_COST (3);
10984 return false;
10986 case FIX:
10987 case UNSIGNED_FIX:
10988 if (TARGET_HARD_FLOAT)
10990 if (GET_MODE_CLASS (mode) == MODE_INT)
10992 *cost = COSTS_N_INSNS (1);
10993 if (speed_p)
10994 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10995 /* Strip of the 'cost' of rounding towards zero. */
10996 if (GET_CODE (XEXP (x, 0)) == FIX)
10997 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10998 else
10999 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11000 /* ??? Increase the cost to deal with transferring from
11001 FP -> CORE registers? */
11002 return true;
11004 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11005 && TARGET_FPU_ARMV8)
11007 *cost = COSTS_N_INSNS (1);
11008 if (speed_p)
11009 *cost += extra_cost->fp[mode == DFmode].roundint;
11010 return false;
11012 /* Vector costs? */
11014 *cost = LIBCALL_COST (1);
11015 return false;
11017 case FLOAT:
11018 case UNSIGNED_FLOAT:
11019 if (TARGET_HARD_FLOAT)
11021 /* ??? Increase the cost to deal with transferring from CORE
11022 -> FP registers? */
11023 *cost = COSTS_N_INSNS (1);
11024 if (speed_p)
11025 *cost += extra_cost->fp[mode == DFmode].fromint;
11026 return false;
11028 *cost = LIBCALL_COST (1);
11029 return false;
11031 case CALL:
11032 *cost = COSTS_N_INSNS (1);
11033 return true;
11035 case ASM_OPERANDS:
11037 /* Just a guess. Guess number of instructions in the asm
11038 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11039 though (see PR60663). */
11040 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11041 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11043 *cost = COSTS_N_INSNS (asm_length + num_operands);
11044 return true;
11046 default:
11047 if (mode != VOIDmode)
11048 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11049 else
11050 *cost = COSTS_N_INSNS (4); /* Who knows? */
11051 return false;
11055 #undef HANDLE_NARROW_SHIFT_ARITH
11057 /* RTX costs when optimizing for size. */
11058 static bool
11059 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11060 int *total, bool speed)
11062 bool result;
11064 if (TARGET_OLD_RTX_COSTS
11065 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11067 /* Old way. (Deprecated.) */
11068 if (!speed)
11069 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11070 (enum rtx_code) outer_code, total);
11071 else
11072 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11073 (enum rtx_code) outer_code, total,
11074 speed);
11076 else
11078 /* New way. */
11079 if (current_tune->insn_extra_cost)
11080 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11081 (enum rtx_code) outer_code,
11082 current_tune->insn_extra_cost,
11083 total, speed);
11084 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11085 && current_tune->insn_extra_cost != NULL */
11086 else
11087 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11088 (enum rtx_code) outer_code,
11089 &generic_extra_costs, total, speed);
11092 if (dump_file && (dump_flags & TDF_DETAILS))
11094 print_rtl_single (dump_file, x);
11095 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11096 *total, result ? "final" : "partial");
11098 return result;
11101 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11102 supported on any "slowmul" cores, so it can be ignored. */
11104 static bool
11105 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11106 int *total, bool speed)
11108 machine_mode mode = GET_MODE (x);
11110 if (TARGET_THUMB)
11112 *total = thumb1_rtx_costs (x, code, outer_code);
11113 return true;
11116 switch (code)
11118 case MULT:
11119 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11120 || mode == DImode)
11122 *total = COSTS_N_INSNS (20);
11123 return false;
11126 if (CONST_INT_P (XEXP (x, 1)))
11128 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11129 & (unsigned HOST_WIDE_INT) 0xffffffff);
11130 int cost, const_ok = const_ok_for_arm (i);
11131 int j, booth_unit_size;
11133 /* Tune as appropriate. */
11134 cost = const_ok ? 4 : 8;
11135 booth_unit_size = 2;
11136 for (j = 0; i && j < 32; j += booth_unit_size)
11138 i >>= booth_unit_size;
11139 cost++;
11142 *total = COSTS_N_INSNS (cost);
11143 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11144 return true;
11147 *total = COSTS_N_INSNS (20);
11148 return false;
11150 default:
11151 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11156 /* RTX cost for cores with a fast multiply unit (M variants). */
11158 static bool
11159 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11160 int *total, bool speed)
11162 machine_mode mode = GET_MODE (x);
11164 if (TARGET_THUMB1)
11166 *total = thumb1_rtx_costs (x, code, outer_code);
11167 return true;
11170 /* ??? should thumb2 use different costs? */
11171 switch (code)
11173 case MULT:
11174 /* There is no point basing this on the tuning, since it is always the
11175 fast variant if it exists at all. */
11176 if (mode == DImode
11177 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11178 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11179 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11181 *total = COSTS_N_INSNS(2);
11182 return false;
11186 if (mode == DImode)
11188 *total = COSTS_N_INSNS (5);
11189 return false;
11192 if (CONST_INT_P (XEXP (x, 1)))
11194 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11195 & (unsigned HOST_WIDE_INT) 0xffffffff);
11196 int cost, const_ok = const_ok_for_arm (i);
11197 int j, booth_unit_size;
11199 /* Tune as appropriate. */
11200 cost = const_ok ? 4 : 8;
11201 booth_unit_size = 8;
11202 for (j = 0; i && j < 32; j += booth_unit_size)
11204 i >>= booth_unit_size;
11205 cost++;
11208 *total = COSTS_N_INSNS(cost);
11209 return false;
11212 if (mode == SImode)
11214 *total = COSTS_N_INSNS (4);
11215 return false;
11218 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11220 if (TARGET_HARD_FLOAT
11221 && (mode == SFmode
11222 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11224 *total = COSTS_N_INSNS (1);
11225 return false;
11229 /* Requires a lib call */
11230 *total = COSTS_N_INSNS (20);
11231 return false;
11233 default:
11234 return arm_rtx_costs_1 (x, outer_code, total, speed);
11239 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11240 so it can be ignored. */
11242 static bool
11243 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11244 int *total, bool speed)
11246 machine_mode mode = GET_MODE (x);
11248 if (TARGET_THUMB)
11250 *total = thumb1_rtx_costs (x, code, outer_code);
11251 return true;
11254 switch (code)
11256 case COMPARE:
11257 if (GET_CODE (XEXP (x, 0)) != MULT)
11258 return arm_rtx_costs_1 (x, outer_code, total, speed);
11260 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11261 will stall until the multiplication is complete. */
11262 *total = COSTS_N_INSNS (3);
11263 return false;
11265 case MULT:
11266 /* There is no point basing this on the tuning, since it is always the
11267 fast variant if it exists at all. */
11268 if (mode == DImode
11269 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11270 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11271 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11273 *total = COSTS_N_INSNS (2);
11274 return false;
11278 if (mode == DImode)
11280 *total = COSTS_N_INSNS (5);
11281 return false;
11284 if (CONST_INT_P (XEXP (x, 1)))
11286 /* If operand 1 is a constant we can more accurately
11287 calculate the cost of the multiply. The multiplier can
11288 retire 15 bits on the first cycle and a further 12 on the
11289 second. We do, of course, have to load the constant into
11290 a register first. */
11291 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11292 /* There's a general overhead of one cycle. */
11293 int cost = 1;
11294 unsigned HOST_WIDE_INT masked_const;
11296 if (i & 0x80000000)
11297 i = ~i;
11299 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11301 masked_const = i & 0xffff8000;
11302 if (masked_const != 0)
11304 cost++;
11305 masked_const = i & 0xf8000000;
11306 if (masked_const != 0)
11307 cost++;
11309 *total = COSTS_N_INSNS (cost);
11310 return false;
11313 if (mode == SImode)
11315 *total = COSTS_N_INSNS (3);
11316 return false;
11319 /* Requires a lib call */
11320 *total = COSTS_N_INSNS (20);
11321 return false;
11323 default:
11324 return arm_rtx_costs_1 (x, outer_code, total, speed);
11329 /* RTX costs for 9e (and later) cores. */
11331 static bool
11332 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11333 int *total, bool speed)
11335 machine_mode mode = GET_MODE (x);
11337 if (TARGET_THUMB1)
11339 switch (code)
11341 case MULT:
11342 /* Small multiply: 32 cycles for an integer multiply inst. */
11343 if (arm_arch6m && arm_m_profile_small_mul)
11344 *total = COSTS_N_INSNS (32);
11345 else
11346 *total = COSTS_N_INSNS (3);
11347 return true;
11349 default:
11350 *total = thumb1_rtx_costs (x, code, outer_code);
11351 return true;
11355 switch (code)
11357 case MULT:
11358 /* There is no point basing this on the tuning, since it is always the
11359 fast variant if it exists at all. */
11360 if (mode == DImode
11361 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11362 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11363 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11365 *total = COSTS_N_INSNS (2);
11366 return false;
11370 if (mode == DImode)
11372 *total = COSTS_N_INSNS (5);
11373 return false;
11376 if (mode == SImode)
11378 *total = COSTS_N_INSNS (2);
11379 return false;
11382 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11384 if (TARGET_HARD_FLOAT
11385 && (mode == SFmode
11386 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11388 *total = COSTS_N_INSNS (1);
11389 return false;
11393 *total = COSTS_N_INSNS (20);
11394 return false;
11396 default:
11397 return arm_rtx_costs_1 (x, outer_code, total, speed);
11400 /* All address computations that can be done are free, but rtx cost returns
11401 the same for practically all of them. So we weight the different types
11402 of address here in the order (most pref first):
11403 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11404 static inline int
11405 arm_arm_address_cost (rtx x)
11407 enum rtx_code c = GET_CODE (x);
11409 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11410 return 0;
11411 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11412 return 10;
11414 if (c == PLUS)
11416 if (CONST_INT_P (XEXP (x, 1)))
11417 return 2;
11419 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11420 return 3;
11422 return 4;
11425 return 6;
11428 static inline int
11429 arm_thumb_address_cost (rtx x)
11431 enum rtx_code c = GET_CODE (x);
11433 if (c == REG)
11434 return 1;
11435 if (c == PLUS
11436 && REG_P (XEXP (x, 0))
11437 && CONST_INT_P (XEXP (x, 1)))
11438 return 1;
11440 return 2;
11443 static int
11444 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11445 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11447 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11450 /* Adjust cost hook for XScale. */
11451 static bool
11452 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11454 /* Some true dependencies can have a higher cost depending
11455 on precisely how certain input operands are used. */
11456 if (REG_NOTE_KIND(link) == 0
11457 && recog_memoized (insn) >= 0
11458 && recog_memoized (dep) >= 0)
11460 int shift_opnum = get_attr_shift (insn);
11461 enum attr_type attr_type = get_attr_type (dep);
11463 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11464 operand for INSN. If we have a shifted input operand and the
11465 instruction we depend on is another ALU instruction, then we may
11466 have to account for an additional stall. */
11467 if (shift_opnum != 0
11468 && (attr_type == TYPE_ALU_SHIFT_IMM
11469 || attr_type == TYPE_ALUS_SHIFT_IMM
11470 || attr_type == TYPE_LOGIC_SHIFT_IMM
11471 || attr_type == TYPE_LOGICS_SHIFT_IMM
11472 || attr_type == TYPE_ALU_SHIFT_REG
11473 || attr_type == TYPE_ALUS_SHIFT_REG
11474 || attr_type == TYPE_LOGIC_SHIFT_REG
11475 || attr_type == TYPE_LOGICS_SHIFT_REG
11476 || attr_type == TYPE_MOV_SHIFT
11477 || attr_type == TYPE_MVN_SHIFT
11478 || attr_type == TYPE_MOV_SHIFT_REG
11479 || attr_type == TYPE_MVN_SHIFT_REG))
11481 rtx shifted_operand;
11482 int opno;
11484 /* Get the shifted operand. */
11485 extract_insn (insn);
11486 shifted_operand = recog_data.operand[shift_opnum];
11488 /* Iterate over all the operands in DEP. If we write an operand
11489 that overlaps with SHIFTED_OPERAND, then we have increase the
11490 cost of this dependency. */
11491 extract_insn (dep);
11492 preprocess_constraints (dep);
11493 for (opno = 0; opno < recog_data.n_operands; opno++)
11495 /* We can ignore strict inputs. */
11496 if (recog_data.operand_type[opno] == OP_IN)
11497 continue;
11499 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11500 shifted_operand))
11502 *cost = 2;
11503 return false;
11508 return true;
11511 /* Adjust cost hook for Cortex A9. */
11512 static bool
11513 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11515 switch (REG_NOTE_KIND (link))
11517 case REG_DEP_ANTI:
11518 *cost = 0;
11519 return false;
11521 case REG_DEP_TRUE:
11522 case REG_DEP_OUTPUT:
11523 if (recog_memoized (insn) >= 0
11524 && recog_memoized (dep) >= 0)
11526 if (GET_CODE (PATTERN (insn)) == SET)
11528 if (GET_MODE_CLASS
11529 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11530 || GET_MODE_CLASS
11531 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11533 enum attr_type attr_type_insn = get_attr_type (insn);
11534 enum attr_type attr_type_dep = get_attr_type (dep);
11536 /* By default all dependencies of the form
11537 s0 = s0 <op> s1
11538 s0 = s0 <op> s2
11539 have an extra latency of 1 cycle because
11540 of the input and output dependency in this
11541 case. However this gets modeled as an true
11542 dependency and hence all these checks. */
11543 if (REG_P (SET_DEST (PATTERN (insn)))
11544 && REG_P (SET_DEST (PATTERN (dep)))
11545 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11546 SET_DEST (PATTERN (dep))))
11548 /* FMACS is a special case where the dependent
11549 instruction can be issued 3 cycles before
11550 the normal latency in case of an output
11551 dependency. */
11552 if ((attr_type_insn == TYPE_FMACS
11553 || attr_type_insn == TYPE_FMACD)
11554 && (attr_type_dep == TYPE_FMACS
11555 || attr_type_dep == TYPE_FMACD))
11557 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11558 *cost = insn_default_latency (dep) - 3;
11559 else
11560 *cost = insn_default_latency (dep);
11561 return false;
11563 else
11565 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11566 *cost = insn_default_latency (dep) + 1;
11567 else
11568 *cost = insn_default_latency (dep);
11570 return false;
11575 break;
11577 default:
11578 gcc_unreachable ();
11581 return true;
11584 /* Adjust cost hook for FA726TE. */
11585 static bool
11586 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11588 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11589 have penalty of 3. */
11590 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11591 && recog_memoized (insn) >= 0
11592 && recog_memoized (dep) >= 0
11593 && get_attr_conds (dep) == CONDS_SET)
11595 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11596 if (get_attr_conds (insn) == CONDS_USE
11597 && get_attr_type (insn) != TYPE_BRANCH)
11599 *cost = 3;
11600 return false;
11603 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11604 || get_attr_conds (insn) == CONDS_USE)
11606 *cost = 0;
11607 return false;
11611 return true;
11614 /* Implement TARGET_REGISTER_MOVE_COST.
11616 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11617 it is typically more expensive than a single memory access. We set
11618 the cost to less than two memory accesses so that floating
11619 point to integer conversion does not go through memory. */
11622 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11623 reg_class_t from, reg_class_t to)
11625 if (TARGET_32BIT)
11627 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11628 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11629 return 15;
11630 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11631 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11632 return 4;
11633 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11634 return 20;
11635 else
11636 return 2;
11638 else
11640 if (from == HI_REGS || to == HI_REGS)
11641 return 4;
11642 else
11643 return 2;
11647 /* Implement TARGET_MEMORY_MOVE_COST. */
11650 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11651 bool in ATTRIBUTE_UNUSED)
11653 if (TARGET_32BIT)
11654 return 10;
11655 else
11657 if (GET_MODE_SIZE (mode) < 4)
11658 return 8;
11659 else
11660 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11664 /* Vectorizer cost model implementation. */
11666 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11667 static int
11668 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11669 tree vectype,
11670 int misalign ATTRIBUTE_UNUSED)
11672 unsigned elements;
11674 switch (type_of_cost)
11676 case scalar_stmt:
11677 return current_tune->vec_costs->scalar_stmt_cost;
11679 case scalar_load:
11680 return current_tune->vec_costs->scalar_load_cost;
11682 case scalar_store:
11683 return current_tune->vec_costs->scalar_store_cost;
11685 case vector_stmt:
11686 return current_tune->vec_costs->vec_stmt_cost;
11688 case vector_load:
11689 return current_tune->vec_costs->vec_align_load_cost;
11691 case vector_store:
11692 return current_tune->vec_costs->vec_store_cost;
11694 case vec_to_scalar:
11695 return current_tune->vec_costs->vec_to_scalar_cost;
11697 case scalar_to_vec:
11698 return current_tune->vec_costs->scalar_to_vec_cost;
11700 case unaligned_load:
11701 return current_tune->vec_costs->vec_unalign_load_cost;
11703 case unaligned_store:
11704 return current_tune->vec_costs->vec_unalign_store_cost;
11706 case cond_branch_taken:
11707 return current_tune->vec_costs->cond_taken_branch_cost;
11709 case cond_branch_not_taken:
11710 return current_tune->vec_costs->cond_not_taken_branch_cost;
11712 case vec_perm:
11713 case vec_promote_demote:
11714 return current_tune->vec_costs->vec_stmt_cost;
11716 case vec_construct:
11717 elements = TYPE_VECTOR_SUBPARTS (vectype);
11718 return elements / 2 + 1;
11720 default:
11721 gcc_unreachable ();
11725 /* Implement targetm.vectorize.add_stmt_cost. */
11727 static unsigned
11728 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11729 struct _stmt_vec_info *stmt_info, int misalign,
11730 enum vect_cost_model_location where)
11732 unsigned *cost = (unsigned *) data;
11733 unsigned retval = 0;
11735 if (flag_vect_cost_model)
11737 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11738 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11740 /* Statements in an inner loop relative to the loop being
11741 vectorized are weighted more heavily. The value here is
11742 arbitrary and could potentially be improved with analysis. */
11743 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11744 count *= 50; /* FIXME. */
11746 retval = (unsigned) (count * stmt_cost);
11747 cost[where] += retval;
11750 return retval;
11753 /* Return true if and only if this insn can dual-issue only as older. */
11754 static bool
11755 cortexa7_older_only (rtx_insn *insn)
11757 if (recog_memoized (insn) < 0)
11758 return false;
11760 switch (get_attr_type (insn))
11762 case TYPE_ALU_DSP_REG:
11763 case TYPE_ALU_SREG:
11764 case TYPE_ALUS_SREG:
11765 case TYPE_LOGIC_REG:
11766 case TYPE_LOGICS_REG:
11767 case TYPE_ADC_REG:
11768 case TYPE_ADCS_REG:
11769 case TYPE_ADR:
11770 case TYPE_BFM:
11771 case TYPE_REV:
11772 case TYPE_MVN_REG:
11773 case TYPE_SHIFT_IMM:
11774 case TYPE_SHIFT_REG:
11775 case TYPE_LOAD_BYTE:
11776 case TYPE_LOAD1:
11777 case TYPE_STORE1:
11778 case TYPE_FFARITHS:
11779 case TYPE_FADDS:
11780 case TYPE_FFARITHD:
11781 case TYPE_FADDD:
11782 case TYPE_FMOV:
11783 case TYPE_F_CVT:
11784 case TYPE_FCMPS:
11785 case TYPE_FCMPD:
11786 case TYPE_FCONSTS:
11787 case TYPE_FCONSTD:
11788 case TYPE_FMULS:
11789 case TYPE_FMACS:
11790 case TYPE_FMULD:
11791 case TYPE_FMACD:
11792 case TYPE_FDIVS:
11793 case TYPE_FDIVD:
11794 case TYPE_F_MRC:
11795 case TYPE_F_MRRC:
11796 case TYPE_F_FLAG:
11797 case TYPE_F_LOADS:
11798 case TYPE_F_STORES:
11799 return true;
11800 default:
11801 return false;
11805 /* Return true if and only if this insn can dual-issue as younger. */
11806 static bool
11807 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11809 if (recog_memoized (insn) < 0)
11811 if (verbose > 5)
11812 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11813 return false;
11816 switch (get_attr_type (insn))
11818 case TYPE_ALU_IMM:
11819 case TYPE_ALUS_IMM:
11820 case TYPE_LOGIC_IMM:
11821 case TYPE_LOGICS_IMM:
11822 case TYPE_EXTEND:
11823 case TYPE_MVN_IMM:
11824 case TYPE_MOV_IMM:
11825 case TYPE_MOV_REG:
11826 case TYPE_MOV_SHIFT:
11827 case TYPE_MOV_SHIFT_REG:
11828 case TYPE_BRANCH:
11829 case TYPE_CALL:
11830 return true;
11831 default:
11832 return false;
11837 /* Look for an instruction that can dual issue only as an older
11838 instruction, and move it in front of any instructions that can
11839 dual-issue as younger, while preserving the relative order of all
11840 other instructions in the ready list. This is a hueuristic to help
11841 dual-issue in later cycles, by postponing issue of more flexible
11842 instructions. This heuristic may affect dual issue opportunities
11843 in the current cycle. */
11844 static void
11845 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11846 int *n_readyp, int clock)
11848 int i;
11849 int first_older_only = -1, first_younger = -1;
11851 if (verbose > 5)
11852 fprintf (file,
11853 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11854 clock,
11855 *n_readyp);
11857 /* Traverse the ready list from the head (the instruction to issue
11858 first), and looking for the first instruction that can issue as
11859 younger and the first instruction that can dual-issue only as
11860 older. */
11861 for (i = *n_readyp - 1; i >= 0; i--)
11863 rtx_insn *insn = ready[i];
11864 if (cortexa7_older_only (insn))
11866 first_older_only = i;
11867 if (verbose > 5)
11868 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11869 break;
11871 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11872 first_younger = i;
11875 /* Nothing to reorder because either no younger insn found or insn
11876 that can dual-issue only as older appears before any insn that
11877 can dual-issue as younger. */
11878 if (first_younger == -1)
11880 if (verbose > 5)
11881 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11882 return;
11885 /* Nothing to reorder because no older-only insn in the ready list. */
11886 if (first_older_only == -1)
11888 if (verbose > 5)
11889 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11890 return;
11893 /* Move first_older_only insn before first_younger. */
11894 if (verbose > 5)
11895 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11896 INSN_UID(ready [first_older_only]),
11897 INSN_UID(ready [first_younger]));
11898 rtx_insn *first_older_only_insn = ready [first_older_only];
11899 for (i = first_older_only; i < first_younger; i++)
11901 ready[i] = ready[i+1];
11904 ready[i] = first_older_only_insn;
11905 return;
11908 /* Implement TARGET_SCHED_REORDER. */
11909 static int
11910 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11911 int clock)
11913 switch (arm_tune)
11915 case cortexa7:
11916 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11917 break;
11918 default:
11919 /* Do nothing for other cores. */
11920 break;
11923 return arm_issue_rate ();
11926 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11927 It corrects the value of COST based on the relationship between
11928 INSN and DEP through the dependence LINK. It returns the new
11929 value. There is a per-core adjust_cost hook to adjust scheduler costs
11930 and the per-core hook can choose to completely override the generic
11931 adjust_cost function. Only put bits of code into arm_adjust_cost that
11932 are common across all cores. */
11933 static int
11934 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11936 rtx i_pat, d_pat;
11938 /* When generating Thumb-1 code, we want to place flag-setting operations
11939 close to a conditional branch which depends on them, so that we can
11940 omit the comparison. */
11941 if (TARGET_THUMB1
11942 && REG_NOTE_KIND (link) == 0
11943 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11944 && recog_memoized (dep) >= 0
11945 && get_attr_conds (dep) == CONDS_SET)
11946 return 0;
11948 if (current_tune->sched_adjust_cost != NULL)
11950 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11951 return cost;
11954 /* XXX Is this strictly true? */
11955 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11956 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11957 return 0;
11959 /* Call insns don't incur a stall, even if they follow a load. */
11960 if (REG_NOTE_KIND (link) == 0
11961 && CALL_P (insn))
11962 return 1;
11964 if ((i_pat = single_set (insn)) != NULL
11965 && MEM_P (SET_SRC (i_pat))
11966 && (d_pat = single_set (dep)) != NULL
11967 && MEM_P (SET_DEST (d_pat)))
11969 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11970 /* This is a load after a store, there is no conflict if the load reads
11971 from a cached area. Assume that loads from the stack, and from the
11972 constant pool are cached, and that others will miss. This is a
11973 hack. */
11975 if ((GET_CODE (src_mem) == SYMBOL_REF
11976 && CONSTANT_POOL_ADDRESS_P (src_mem))
11977 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11978 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11979 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11980 return 1;
11983 return cost;
11987 arm_max_conditional_execute (void)
11989 return max_insns_skipped;
11992 static int
11993 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11995 if (TARGET_32BIT)
11996 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11997 else
11998 return (optimize > 0) ? 2 : 0;
12001 static int
12002 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12004 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12007 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12008 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12009 sequences of non-executed instructions in IT blocks probably take the same
12010 amount of time as executed instructions (and the IT instruction itself takes
12011 space in icache). This function was experimentally determined to give good
12012 results on a popular embedded benchmark. */
12014 static int
12015 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12017 return (TARGET_32BIT && speed_p) ? 1
12018 : arm_default_branch_cost (speed_p, predictable_p);
12021 static bool fp_consts_inited = false;
12023 static REAL_VALUE_TYPE value_fp0;
12025 static void
12026 init_fp_table (void)
12028 REAL_VALUE_TYPE r;
12030 r = REAL_VALUE_ATOF ("0", DFmode);
12031 value_fp0 = r;
12032 fp_consts_inited = true;
12035 /* Return TRUE if rtx X is a valid immediate FP constant. */
12037 arm_const_double_rtx (rtx x)
12039 REAL_VALUE_TYPE r;
12041 if (!fp_consts_inited)
12042 init_fp_table ();
12044 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12045 if (REAL_VALUE_MINUS_ZERO (r))
12046 return 0;
12048 if (REAL_VALUES_EQUAL (r, value_fp0))
12049 return 1;
12051 return 0;
12054 /* VFPv3 has a fairly wide range of representable immediates, formed from
12055 "quarter-precision" floating-point values. These can be evaluated using this
12056 formula (with ^ for exponentiation):
12058 -1^s * n * 2^-r
12060 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12061 16 <= n <= 31 and 0 <= r <= 7.
12063 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12065 - A (most-significant) is the sign bit.
12066 - BCD are the exponent (encoded as r XOR 3).
12067 - EFGH are the mantissa (encoded as n - 16).
12070 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12071 fconst[sd] instruction, or -1 if X isn't suitable. */
12072 static int
12073 vfp3_const_double_index (rtx x)
12075 REAL_VALUE_TYPE r, m;
12076 int sign, exponent;
12077 unsigned HOST_WIDE_INT mantissa, mant_hi;
12078 unsigned HOST_WIDE_INT mask;
12079 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12080 bool fail;
12082 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12083 return -1;
12085 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12087 /* We can't represent these things, so detect them first. */
12088 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12089 return -1;
12091 /* Extract sign, exponent and mantissa. */
12092 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12093 r = real_value_abs (&r);
12094 exponent = REAL_EXP (&r);
12095 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12096 highest (sign) bit, with a fixed binary point at bit point_pos.
12097 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12098 bits for the mantissa, this may fail (low bits would be lost). */
12099 real_ldexp (&m, &r, point_pos - exponent);
12100 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12101 mantissa = w.elt (0);
12102 mant_hi = w.elt (1);
12104 /* If there are bits set in the low part of the mantissa, we can't
12105 represent this value. */
12106 if (mantissa != 0)
12107 return -1;
12109 /* Now make it so that mantissa contains the most-significant bits, and move
12110 the point_pos to indicate that the least-significant bits have been
12111 discarded. */
12112 point_pos -= HOST_BITS_PER_WIDE_INT;
12113 mantissa = mant_hi;
12115 /* We can permit four significant bits of mantissa only, plus a high bit
12116 which is always 1. */
12117 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12118 if ((mantissa & mask) != 0)
12119 return -1;
12121 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12122 mantissa >>= point_pos - 5;
12124 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12125 floating-point immediate zero with Neon using an integer-zero load, but
12126 that case is handled elsewhere.) */
12127 if (mantissa == 0)
12128 return -1;
12130 gcc_assert (mantissa >= 16 && mantissa <= 31);
12132 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12133 normalized significands are in the range [1, 2). (Our mantissa is shifted
12134 left 4 places at this point relative to normalized IEEE754 values). GCC
12135 internally uses [0.5, 1) (see real.c), so the exponent returned from
12136 REAL_EXP must be altered. */
12137 exponent = 5 - exponent;
12139 if (exponent < 0 || exponent > 7)
12140 return -1;
12142 /* Sign, mantissa and exponent are now in the correct form to plug into the
12143 formula described in the comment above. */
12144 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12147 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12149 vfp3_const_double_rtx (rtx x)
12151 if (!TARGET_VFP3)
12152 return 0;
12154 return vfp3_const_double_index (x) != -1;
12157 /* Recognize immediates which can be used in various Neon instructions. Legal
12158 immediates are described by the following table (for VMVN variants, the
12159 bitwise inverse of the constant shown is recognized. In either case, VMOV
12160 is output and the correct instruction to use for a given constant is chosen
12161 by the assembler). The constant shown is replicated across all elements of
12162 the destination vector.
12164 insn elems variant constant (binary)
12165 ---- ----- ------- -----------------
12166 vmov i32 0 00000000 00000000 00000000 abcdefgh
12167 vmov i32 1 00000000 00000000 abcdefgh 00000000
12168 vmov i32 2 00000000 abcdefgh 00000000 00000000
12169 vmov i32 3 abcdefgh 00000000 00000000 00000000
12170 vmov i16 4 00000000 abcdefgh
12171 vmov i16 5 abcdefgh 00000000
12172 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12173 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12174 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12175 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12176 vmvn i16 10 00000000 abcdefgh
12177 vmvn i16 11 abcdefgh 00000000
12178 vmov i32 12 00000000 00000000 abcdefgh 11111111
12179 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12180 vmov i32 14 00000000 abcdefgh 11111111 11111111
12181 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12182 vmov i8 16 abcdefgh
12183 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12184 eeeeeeee ffffffff gggggggg hhhhhhhh
12185 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12186 vmov f32 19 00000000 00000000 00000000 00000000
12188 For case 18, B = !b. Representable values are exactly those accepted by
12189 vfp3_const_double_index, but are output as floating-point numbers rather
12190 than indices.
12192 For case 19, we will change it to vmov.i32 when assembling.
12194 Variants 0-5 (inclusive) may also be used as immediates for the second
12195 operand of VORR/VBIC instructions.
12197 The INVERSE argument causes the bitwise inverse of the given operand to be
12198 recognized instead (used for recognizing legal immediates for the VAND/VORN
12199 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12200 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12201 output, rather than the real insns vbic/vorr).
12203 INVERSE makes no difference to the recognition of float vectors.
12205 The return value is the variant of immediate as shown in the above table, or
12206 -1 if the given value doesn't match any of the listed patterns.
12208 static int
12209 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12210 rtx *modconst, int *elementwidth)
12212 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12213 matches = 1; \
12214 for (i = 0; i < idx; i += (STRIDE)) \
12215 if (!(TEST)) \
12216 matches = 0; \
12217 if (matches) \
12219 immtype = (CLASS); \
12220 elsize = (ELSIZE); \
12221 break; \
12224 unsigned int i, elsize = 0, idx = 0, n_elts;
12225 unsigned int innersize;
12226 unsigned char bytes[16];
12227 int immtype = -1, matches;
12228 unsigned int invmask = inverse ? 0xff : 0;
12229 bool vector = GET_CODE (op) == CONST_VECTOR;
12231 if (vector)
12233 n_elts = CONST_VECTOR_NUNITS (op);
12234 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12236 else
12238 n_elts = 1;
12239 if (mode == VOIDmode)
12240 mode = DImode;
12241 innersize = GET_MODE_SIZE (mode);
12244 /* Vectors of float constants. */
12245 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12247 rtx el0 = CONST_VECTOR_ELT (op, 0);
12248 REAL_VALUE_TYPE r0;
12250 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12251 return -1;
12253 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12255 for (i = 1; i < n_elts; i++)
12257 rtx elt = CONST_VECTOR_ELT (op, i);
12258 REAL_VALUE_TYPE re;
12260 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12262 if (!REAL_VALUES_EQUAL (r0, re))
12263 return -1;
12266 if (modconst)
12267 *modconst = CONST_VECTOR_ELT (op, 0);
12269 if (elementwidth)
12270 *elementwidth = 0;
12272 if (el0 == CONST0_RTX (GET_MODE (el0)))
12273 return 19;
12274 else
12275 return 18;
12278 /* Splat vector constant out into a byte vector. */
12279 for (i = 0; i < n_elts; i++)
12281 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12282 unsigned HOST_WIDE_INT elpart;
12283 unsigned int part, parts;
12285 if (CONST_INT_P (el))
12287 elpart = INTVAL (el);
12288 parts = 1;
12290 else if (CONST_DOUBLE_P (el))
12292 elpart = CONST_DOUBLE_LOW (el);
12293 parts = 2;
12295 else
12296 gcc_unreachable ();
12298 for (part = 0; part < parts; part++)
12300 unsigned int byte;
12301 for (byte = 0; byte < innersize; byte++)
12303 bytes[idx++] = (elpart & 0xff) ^ invmask;
12304 elpart >>= BITS_PER_UNIT;
12306 if (CONST_DOUBLE_P (el))
12307 elpart = CONST_DOUBLE_HIGH (el);
12311 /* Sanity check. */
12312 gcc_assert (idx == GET_MODE_SIZE (mode));
12316 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12317 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12319 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12320 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12322 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12323 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12325 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12326 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12328 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12330 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12332 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12333 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12335 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12336 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12338 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12339 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12341 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12342 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12344 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12346 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12348 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12349 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12351 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12352 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12354 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12355 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12357 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12358 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12360 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12362 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12363 && bytes[i] == bytes[(i + 8) % idx]);
12365 while (0);
12367 if (immtype == -1)
12368 return -1;
12370 if (elementwidth)
12371 *elementwidth = elsize;
12373 if (modconst)
12375 unsigned HOST_WIDE_INT imm = 0;
12377 /* Un-invert bytes of recognized vector, if necessary. */
12378 if (invmask != 0)
12379 for (i = 0; i < idx; i++)
12380 bytes[i] ^= invmask;
12382 if (immtype == 17)
12384 /* FIXME: Broken on 32-bit H_W_I hosts. */
12385 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12387 for (i = 0; i < 8; i++)
12388 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12389 << (i * BITS_PER_UNIT);
12391 *modconst = GEN_INT (imm);
12393 else
12395 unsigned HOST_WIDE_INT imm = 0;
12397 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12398 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12400 *modconst = GEN_INT (imm);
12404 return immtype;
12405 #undef CHECK
12408 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12409 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12410 float elements), and a modified constant (whatever should be output for a
12411 VMOV) in *MODCONST. */
12414 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12415 rtx *modconst, int *elementwidth)
12417 rtx tmpconst;
12418 int tmpwidth;
12419 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12421 if (retval == -1)
12422 return 0;
12424 if (modconst)
12425 *modconst = tmpconst;
12427 if (elementwidth)
12428 *elementwidth = tmpwidth;
12430 return 1;
12433 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12434 the immediate is valid, write a constant suitable for using as an operand
12435 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12436 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12439 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12440 rtx *modconst, int *elementwidth)
12442 rtx tmpconst;
12443 int tmpwidth;
12444 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12446 if (retval < 0 || retval > 5)
12447 return 0;
12449 if (modconst)
12450 *modconst = tmpconst;
12452 if (elementwidth)
12453 *elementwidth = tmpwidth;
12455 return 1;
12458 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12459 the immediate is valid, write a constant suitable for using as an operand
12460 to VSHR/VSHL to *MODCONST and the corresponding element width to
12461 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12462 because they have different limitations. */
12465 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12466 rtx *modconst, int *elementwidth,
12467 bool isleftshift)
12469 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12470 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12471 unsigned HOST_WIDE_INT last_elt = 0;
12472 unsigned HOST_WIDE_INT maxshift;
12474 /* Split vector constant out into a byte vector. */
12475 for (i = 0; i < n_elts; i++)
12477 rtx el = CONST_VECTOR_ELT (op, i);
12478 unsigned HOST_WIDE_INT elpart;
12480 if (CONST_INT_P (el))
12481 elpart = INTVAL (el);
12482 else if (CONST_DOUBLE_P (el))
12483 return 0;
12484 else
12485 gcc_unreachable ();
12487 if (i != 0 && elpart != last_elt)
12488 return 0;
12490 last_elt = elpart;
12493 /* Shift less than element size. */
12494 maxshift = innersize * 8;
12496 if (isleftshift)
12498 /* Left shift immediate value can be from 0 to <size>-1. */
12499 if (last_elt >= maxshift)
12500 return 0;
12502 else
12504 /* Right shift immediate value can be from 1 to <size>. */
12505 if (last_elt == 0 || last_elt > maxshift)
12506 return 0;
12509 if (elementwidth)
12510 *elementwidth = innersize * 8;
12512 if (modconst)
12513 *modconst = CONST_VECTOR_ELT (op, 0);
12515 return 1;
12518 /* Return a string suitable for output of Neon immediate logic operation
12519 MNEM. */
12521 char *
12522 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12523 int inverse, int quad)
12525 int width, is_valid;
12526 static char templ[40];
12528 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12530 gcc_assert (is_valid != 0);
12532 if (quad)
12533 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12534 else
12535 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12537 return templ;
12540 /* Return a string suitable for output of Neon immediate shift operation
12541 (VSHR or VSHL) MNEM. */
12543 char *
12544 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12545 machine_mode mode, int quad,
12546 bool isleftshift)
12548 int width, is_valid;
12549 static char templ[40];
12551 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12552 gcc_assert (is_valid != 0);
12554 if (quad)
12555 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12556 else
12557 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12559 return templ;
12562 /* Output a sequence of pairwise operations to implement a reduction.
12563 NOTE: We do "too much work" here, because pairwise operations work on two
12564 registers-worth of operands in one go. Unfortunately we can't exploit those
12565 extra calculations to do the full operation in fewer steps, I don't think.
12566 Although all vector elements of the result but the first are ignored, we
12567 actually calculate the same result in each of the elements. An alternative
12568 such as initially loading a vector with zero to use as each of the second
12569 operands would use up an additional register and take an extra instruction,
12570 for no particular gain. */
12572 void
12573 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12574 rtx (*reduc) (rtx, rtx, rtx))
12576 machine_mode inner = GET_MODE_INNER (mode);
12577 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12578 rtx tmpsum = op1;
12580 for (i = parts / 2; i >= 1; i /= 2)
12582 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12583 emit_insn (reduc (dest, tmpsum, tmpsum));
12584 tmpsum = dest;
12588 /* If VALS is a vector constant that can be loaded into a register
12589 using VDUP, generate instructions to do so and return an RTX to
12590 assign to the register. Otherwise return NULL_RTX. */
12592 static rtx
12593 neon_vdup_constant (rtx vals)
12595 machine_mode mode = GET_MODE (vals);
12596 machine_mode inner_mode = GET_MODE_INNER (mode);
12597 int n_elts = GET_MODE_NUNITS (mode);
12598 bool all_same = true;
12599 rtx x;
12600 int i;
12602 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12603 return NULL_RTX;
12605 for (i = 0; i < n_elts; ++i)
12607 x = XVECEXP (vals, 0, i);
12608 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12609 all_same = false;
12612 if (!all_same)
12613 /* The elements are not all the same. We could handle repeating
12614 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12615 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12616 vdup.i16). */
12617 return NULL_RTX;
12619 /* We can load this constant by using VDUP and a constant in a
12620 single ARM register. This will be cheaper than a vector
12621 load. */
12623 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12624 return gen_rtx_VEC_DUPLICATE (mode, x);
12627 /* Generate code to load VALS, which is a PARALLEL containing only
12628 constants (for vec_init) or CONST_VECTOR, efficiently into a
12629 register. Returns an RTX to copy into the register, or NULL_RTX
12630 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12633 neon_make_constant (rtx vals)
12635 machine_mode mode = GET_MODE (vals);
12636 rtx target;
12637 rtx const_vec = NULL_RTX;
12638 int n_elts = GET_MODE_NUNITS (mode);
12639 int n_const = 0;
12640 int i;
12642 if (GET_CODE (vals) == CONST_VECTOR)
12643 const_vec = vals;
12644 else if (GET_CODE (vals) == PARALLEL)
12646 /* A CONST_VECTOR must contain only CONST_INTs and
12647 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12648 Only store valid constants in a CONST_VECTOR. */
12649 for (i = 0; i < n_elts; ++i)
12651 rtx x = XVECEXP (vals, 0, i);
12652 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12653 n_const++;
12655 if (n_const == n_elts)
12656 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12658 else
12659 gcc_unreachable ();
12661 if (const_vec != NULL
12662 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12663 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12664 return const_vec;
12665 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12666 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12667 pipeline cycle; creating the constant takes one or two ARM
12668 pipeline cycles. */
12669 return target;
12670 else if (const_vec != NULL_RTX)
12671 /* Load from constant pool. On Cortex-A8 this takes two cycles
12672 (for either double or quad vectors). We can not take advantage
12673 of single-cycle VLD1 because we need a PC-relative addressing
12674 mode. */
12675 return const_vec;
12676 else
12677 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12678 We can not construct an initializer. */
12679 return NULL_RTX;
12682 /* Initialize vector TARGET to VALS. */
12684 void
12685 neon_expand_vector_init (rtx target, rtx vals)
12687 machine_mode mode = GET_MODE (target);
12688 machine_mode inner_mode = GET_MODE_INNER (mode);
12689 int n_elts = GET_MODE_NUNITS (mode);
12690 int n_var = 0, one_var = -1;
12691 bool all_same = true;
12692 rtx x, mem;
12693 int i;
12695 for (i = 0; i < n_elts; ++i)
12697 x = XVECEXP (vals, 0, i);
12698 if (!CONSTANT_P (x))
12699 ++n_var, one_var = i;
12701 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12702 all_same = false;
12705 if (n_var == 0)
12707 rtx constant = neon_make_constant (vals);
12708 if (constant != NULL_RTX)
12710 emit_move_insn (target, constant);
12711 return;
12715 /* Splat a single non-constant element if we can. */
12716 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12718 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12719 emit_insn (gen_rtx_SET (VOIDmode, target,
12720 gen_rtx_VEC_DUPLICATE (mode, x)));
12721 return;
12724 /* One field is non-constant. Load constant then overwrite varying
12725 field. This is more efficient than using the stack. */
12726 if (n_var == 1)
12728 rtx copy = copy_rtx (vals);
12729 rtx index = GEN_INT (one_var);
12731 /* Load constant part of vector, substitute neighboring value for
12732 varying element. */
12733 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12734 neon_expand_vector_init (target, copy);
12736 /* Insert variable. */
12737 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12738 switch (mode)
12740 case V8QImode:
12741 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12742 break;
12743 case V16QImode:
12744 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12745 break;
12746 case V4HImode:
12747 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12748 break;
12749 case V8HImode:
12750 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12751 break;
12752 case V2SImode:
12753 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12754 break;
12755 case V4SImode:
12756 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12757 break;
12758 case V2SFmode:
12759 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12760 break;
12761 case V4SFmode:
12762 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12763 break;
12764 case V2DImode:
12765 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12766 break;
12767 default:
12768 gcc_unreachable ();
12770 return;
12773 /* Construct the vector in memory one field at a time
12774 and load the whole vector. */
12775 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12776 for (i = 0; i < n_elts; i++)
12777 emit_move_insn (adjust_address_nv (mem, inner_mode,
12778 i * GET_MODE_SIZE (inner_mode)),
12779 XVECEXP (vals, 0, i));
12780 emit_move_insn (target, mem);
12783 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12784 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12785 reported source locations are bogus. */
12787 static void
12788 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12789 const char *err)
12791 HOST_WIDE_INT lane;
12793 gcc_assert (CONST_INT_P (operand));
12795 lane = INTVAL (operand);
12797 if (lane < low || lane >= high)
12798 error (err);
12801 /* Bounds-check lanes. */
12803 void
12804 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12806 bounds_check (operand, low, high, "lane out of range");
12809 /* Bounds-check constants. */
12811 void
12812 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12814 bounds_check (operand, low, high, "constant out of range");
12817 HOST_WIDE_INT
12818 neon_element_bits (machine_mode mode)
12820 if (mode == DImode)
12821 return GET_MODE_BITSIZE (mode);
12822 else
12823 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12827 /* Predicates for `match_operand' and `match_operator'. */
12829 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12830 WB is true if full writeback address modes are allowed and is false
12831 if limited writeback address modes (POST_INC and PRE_DEC) are
12832 allowed. */
12835 arm_coproc_mem_operand (rtx op, bool wb)
12837 rtx ind;
12839 /* Reject eliminable registers. */
12840 if (! (reload_in_progress || reload_completed || lra_in_progress)
12841 && ( reg_mentioned_p (frame_pointer_rtx, op)
12842 || reg_mentioned_p (arg_pointer_rtx, op)
12843 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12844 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12845 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12846 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12847 return FALSE;
12849 /* Constants are converted into offsets from labels. */
12850 if (!MEM_P (op))
12851 return FALSE;
12853 ind = XEXP (op, 0);
12855 if (reload_completed
12856 && (GET_CODE (ind) == LABEL_REF
12857 || (GET_CODE (ind) == CONST
12858 && GET_CODE (XEXP (ind, 0)) == PLUS
12859 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12860 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12861 return TRUE;
12863 /* Match: (mem (reg)). */
12864 if (REG_P (ind))
12865 return arm_address_register_rtx_p (ind, 0);
12867 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12868 acceptable in any case (subject to verification by
12869 arm_address_register_rtx_p). We need WB to be true to accept
12870 PRE_INC and POST_DEC. */
12871 if (GET_CODE (ind) == POST_INC
12872 || GET_CODE (ind) == PRE_DEC
12873 || (wb
12874 && (GET_CODE (ind) == PRE_INC
12875 || GET_CODE (ind) == POST_DEC)))
12876 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12878 if (wb
12879 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12880 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12881 && GET_CODE (XEXP (ind, 1)) == PLUS
12882 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12883 ind = XEXP (ind, 1);
12885 /* Match:
12886 (plus (reg)
12887 (const)). */
12888 if (GET_CODE (ind) == PLUS
12889 && REG_P (XEXP (ind, 0))
12890 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12891 && CONST_INT_P (XEXP (ind, 1))
12892 && INTVAL (XEXP (ind, 1)) > -1024
12893 && INTVAL (XEXP (ind, 1)) < 1024
12894 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12895 return TRUE;
12897 return FALSE;
12900 /* Return TRUE if OP is a memory operand which we can load or store a vector
12901 to/from. TYPE is one of the following values:
12902 0 - Vector load/stor (vldr)
12903 1 - Core registers (ldm)
12904 2 - Element/structure loads (vld1)
12907 neon_vector_mem_operand (rtx op, int type, bool strict)
12909 rtx ind;
12911 /* Reject eliminable registers. */
12912 if (! (reload_in_progress || reload_completed)
12913 && ( reg_mentioned_p (frame_pointer_rtx, op)
12914 || reg_mentioned_p (arg_pointer_rtx, op)
12915 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12916 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12917 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12918 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12919 return !strict;
12921 /* Constants are converted into offsets from labels. */
12922 if (!MEM_P (op))
12923 return FALSE;
12925 ind = XEXP (op, 0);
12927 if (reload_completed
12928 && (GET_CODE (ind) == LABEL_REF
12929 || (GET_CODE (ind) == CONST
12930 && GET_CODE (XEXP (ind, 0)) == PLUS
12931 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12932 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12933 return TRUE;
12935 /* Match: (mem (reg)). */
12936 if (REG_P (ind))
12937 return arm_address_register_rtx_p (ind, 0);
12939 /* Allow post-increment with Neon registers. */
12940 if ((type != 1 && GET_CODE (ind) == POST_INC)
12941 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12942 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12944 /* Allow post-increment by register for VLDn */
12945 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12946 && GET_CODE (XEXP (ind, 1)) == PLUS
12947 && REG_P (XEXP (XEXP (ind, 1), 1)))
12948 return true;
12950 /* Match:
12951 (plus (reg)
12952 (const)). */
12953 if (type == 0
12954 && GET_CODE (ind) == PLUS
12955 && REG_P (XEXP (ind, 0))
12956 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12957 && CONST_INT_P (XEXP (ind, 1))
12958 && INTVAL (XEXP (ind, 1)) > -1024
12959 /* For quad modes, we restrict the constant offset to be slightly less
12960 than what the instruction format permits. We have no such constraint
12961 on double mode offsets. (This must match arm_legitimate_index_p.) */
12962 && (INTVAL (XEXP (ind, 1))
12963 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12964 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12965 return TRUE;
12967 return FALSE;
12970 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12971 type. */
12973 neon_struct_mem_operand (rtx op)
12975 rtx ind;
12977 /* Reject eliminable registers. */
12978 if (! (reload_in_progress || reload_completed)
12979 && ( reg_mentioned_p (frame_pointer_rtx, op)
12980 || reg_mentioned_p (arg_pointer_rtx, op)
12981 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12982 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12983 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12984 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12985 return FALSE;
12987 /* Constants are converted into offsets from labels. */
12988 if (!MEM_P (op))
12989 return FALSE;
12991 ind = XEXP (op, 0);
12993 if (reload_completed
12994 && (GET_CODE (ind) == LABEL_REF
12995 || (GET_CODE (ind) == CONST
12996 && GET_CODE (XEXP (ind, 0)) == PLUS
12997 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12998 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12999 return TRUE;
13001 /* Match: (mem (reg)). */
13002 if (REG_P (ind))
13003 return arm_address_register_rtx_p (ind, 0);
13005 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13006 if (GET_CODE (ind) == POST_INC
13007 || GET_CODE (ind) == PRE_DEC)
13008 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13010 return FALSE;
13013 /* Return true if X is a register that will be eliminated later on. */
13015 arm_eliminable_register (rtx x)
13017 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13018 || REGNO (x) == ARG_POINTER_REGNUM
13019 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13020 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13023 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13024 coprocessor registers. Otherwise return NO_REGS. */
13026 enum reg_class
13027 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13029 if (mode == HFmode)
13031 if (!TARGET_NEON_FP16)
13032 return GENERAL_REGS;
13033 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13034 return NO_REGS;
13035 return GENERAL_REGS;
13038 /* The neon move patterns handle all legitimate vector and struct
13039 addresses. */
13040 if (TARGET_NEON
13041 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13042 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13043 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13044 || VALID_NEON_STRUCT_MODE (mode)))
13045 return NO_REGS;
13047 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13048 return NO_REGS;
13050 return GENERAL_REGS;
13053 /* Values which must be returned in the most-significant end of the return
13054 register. */
13056 static bool
13057 arm_return_in_msb (const_tree valtype)
13059 return (TARGET_AAPCS_BASED
13060 && BYTES_BIG_ENDIAN
13061 && (AGGREGATE_TYPE_P (valtype)
13062 || TREE_CODE (valtype) == COMPLEX_TYPE
13063 || FIXED_POINT_TYPE_P (valtype)));
13066 /* Return TRUE if X references a SYMBOL_REF. */
13068 symbol_mentioned_p (rtx x)
13070 const char * fmt;
13071 int i;
13073 if (GET_CODE (x) == SYMBOL_REF)
13074 return 1;
13076 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13077 are constant offsets, not symbols. */
13078 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13079 return 0;
13081 fmt = GET_RTX_FORMAT (GET_CODE (x));
13083 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13085 if (fmt[i] == 'E')
13087 int j;
13089 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13090 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13091 return 1;
13093 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13094 return 1;
13097 return 0;
13100 /* Return TRUE if X references a LABEL_REF. */
13102 label_mentioned_p (rtx x)
13104 const char * fmt;
13105 int i;
13107 if (GET_CODE (x) == LABEL_REF)
13108 return 1;
13110 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13111 instruction, but they are constant offsets, not symbols. */
13112 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13113 return 0;
13115 fmt = GET_RTX_FORMAT (GET_CODE (x));
13116 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13118 if (fmt[i] == 'E')
13120 int j;
13122 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13123 if (label_mentioned_p (XVECEXP (x, i, j)))
13124 return 1;
13126 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13127 return 1;
13130 return 0;
13134 tls_mentioned_p (rtx x)
13136 switch (GET_CODE (x))
13138 case CONST:
13139 return tls_mentioned_p (XEXP (x, 0));
13141 case UNSPEC:
13142 if (XINT (x, 1) == UNSPEC_TLS)
13143 return 1;
13145 default:
13146 return 0;
13150 /* Must not copy any rtx that uses a pc-relative address. */
13152 static bool
13153 arm_cannot_copy_insn_p (rtx_insn *insn)
13155 /* The tls call insn cannot be copied, as it is paired with a data
13156 word. */
13157 if (recog_memoized (insn) == CODE_FOR_tlscall)
13158 return true;
13160 subrtx_iterator::array_type array;
13161 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13163 const_rtx x = *iter;
13164 if (GET_CODE (x) == UNSPEC
13165 && (XINT (x, 1) == UNSPEC_PIC_BASE
13166 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13167 return true;
13169 return false;
13172 enum rtx_code
13173 minmax_code (rtx x)
13175 enum rtx_code code = GET_CODE (x);
13177 switch (code)
13179 case SMAX:
13180 return GE;
13181 case SMIN:
13182 return LE;
13183 case UMIN:
13184 return LEU;
13185 case UMAX:
13186 return GEU;
13187 default:
13188 gcc_unreachable ();
13192 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13194 bool
13195 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13196 int *mask, bool *signed_sat)
13198 /* The high bound must be a power of two minus one. */
13199 int log = exact_log2 (INTVAL (hi_bound) + 1);
13200 if (log == -1)
13201 return false;
13203 /* The low bound is either zero (for usat) or one less than the
13204 negation of the high bound (for ssat). */
13205 if (INTVAL (lo_bound) == 0)
13207 if (mask)
13208 *mask = log;
13209 if (signed_sat)
13210 *signed_sat = false;
13212 return true;
13215 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13217 if (mask)
13218 *mask = log + 1;
13219 if (signed_sat)
13220 *signed_sat = true;
13222 return true;
13225 return false;
13228 /* Return 1 if memory locations are adjacent. */
13230 adjacent_mem_locations (rtx a, rtx b)
13232 /* We don't guarantee to preserve the order of these memory refs. */
13233 if (volatile_refs_p (a) || volatile_refs_p (b))
13234 return 0;
13236 if ((REG_P (XEXP (a, 0))
13237 || (GET_CODE (XEXP (a, 0)) == PLUS
13238 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13239 && (REG_P (XEXP (b, 0))
13240 || (GET_CODE (XEXP (b, 0)) == PLUS
13241 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13243 HOST_WIDE_INT val0 = 0, val1 = 0;
13244 rtx reg0, reg1;
13245 int val_diff;
13247 if (GET_CODE (XEXP (a, 0)) == PLUS)
13249 reg0 = XEXP (XEXP (a, 0), 0);
13250 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13252 else
13253 reg0 = XEXP (a, 0);
13255 if (GET_CODE (XEXP (b, 0)) == PLUS)
13257 reg1 = XEXP (XEXP (b, 0), 0);
13258 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13260 else
13261 reg1 = XEXP (b, 0);
13263 /* Don't accept any offset that will require multiple
13264 instructions to handle, since this would cause the
13265 arith_adjacentmem pattern to output an overlong sequence. */
13266 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13267 return 0;
13269 /* Don't allow an eliminable register: register elimination can make
13270 the offset too large. */
13271 if (arm_eliminable_register (reg0))
13272 return 0;
13274 val_diff = val1 - val0;
13276 if (arm_ld_sched)
13278 /* If the target has load delay slots, then there's no benefit
13279 to using an ldm instruction unless the offset is zero and
13280 we are optimizing for size. */
13281 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13282 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13283 && (val_diff == 4 || val_diff == -4));
13286 return ((REGNO (reg0) == REGNO (reg1))
13287 && (val_diff == 4 || val_diff == -4));
13290 return 0;
13293 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13294 for load operations, false for store operations. CONSECUTIVE is true
13295 if the register numbers in the operation must be consecutive in the register
13296 bank. RETURN_PC is true if value is to be loaded in PC.
13297 The pattern we are trying to match for load is:
13298 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13299 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13302 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13304 where
13305 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13306 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13307 3. If consecutive is TRUE, then for kth register being loaded,
13308 REGNO (R_dk) = REGNO (R_d0) + k.
13309 The pattern for store is similar. */
13310 bool
13311 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13312 bool consecutive, bool return_pc)
13314 HOST_WIDE_INT count = XVECLEN (op, 0);
13315 rtx reg, mem, addr;
13316 unsigned regno;
13317 unsigned first_regno;
13318 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13319 rtx elt;
13320 bool addr_reg_in_reglist = false;
13321 bool update = false;
13322 int reg_increment;
13323 int offset_adj;
13324 int regs_per_val;
13326 /* If not in SImode, then registers must be consecutive
13327 (e.g., VLDM instructions for DFmode). */
13328 gcc_assert ((mode == SImode) || consecutive);
13329 /* Setting return_pc for stores is illegal. */
13330 gcc_assert (!return_pc || load);
13332 /* Set up the increments and the regs per val based on the mode. */
13333 reg_increment = GET_MODE_SIZE (mode);
13334 regs_per_val = reg_increment / 4;
13335 offset_adj = return_pc ? 1 : 0;
13337 if (count <= 1
13338 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13339 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13340 return false;
13342 /* Check if this is a write-back. */
13343 elt = XVECEXP (op, 0, offset_adj);
13344 if (GET_CODE (SET_SRC (elt)) == PLUS)
13346 i++;
13347 base = 1;
13348 update = true;
13350 /* The offset adjustment must be the number of registers being
13351 popped times the size of a single register. */
13352 if (!REG_P (SET_DEST (elt))
13353 || !REG_P (XEXP (SET_SRC (elt), 0))
13354 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13355 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13356 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13357 ((count - 1 - offset_adj) * reg_increment))
13358 return false;
13361 i = i + offset_adj;
13362 base = base + offset_adj;
13363 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13364 success depends on the type: VLDM can do just one reg,
13365 LDM must do at least two. */
13366 if ((count <= i) && (mode == SImode))
13367 return false;
13369 elt = XVECEXP (op, 0, i - 1);
13370 if (GET_CODE (elt) != SET)
13371 return false;
13373 if (load)
13375 reg = SET_DEST (elt);
13376 mem = SET_SRC (elt);
13378 else
13380 reg = SET_SRC (elt);
13381 mem = SET_DEST (elt);
13384 if (!REG_P (reg) || !MEM_P (mem))
13385 return false;
13387 regno = REGNO (reg);
13388 first_regno = regno;
13389 addr = XEXP (mem, 0);
13390 if (GET_CODE (addr) == PLUS)
13392 if (!CONST_INT_P (XEXP (addr, 1)))
13393 return false;
13395 offset = INTVAL (XEXP (addr, 1));
13396 addr = XEXP (addr, 0);
13399 if (!REG_P (addr))
13400 return false;
13402 /* Don't allow SP to be loaded unless it is also the base register. It
13403 guarantees that SP is reset correctly when an LDM instruction
13404 is interrupted. Otherwise, we might end up with a corrupt stack. */
13405 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13406 return false;
13408 for (; i < count; i++)
13410 elt = XVECEXP (op, 0, i);
13411 if (GET_CODE (elt) != SET)
13412 return false;
13414 if (load)
13416 reg = SET_DEST (elt);
13417 mem = SET_SRC (elt);
13419 else
13421 reg = SET_SRC (elt);
13422 mem = SET_DEST (elt);
13425 if (!REG_P (reg)
13426 || GET_MODE (reg) != mode
13427 || REGNO (reg) <= regno
13428 || (consecutive
13429 && (REGNO (reg) !=
13430 (unsigned int) (first_regno + regs_per_val * (i - base))))
13431 /* Don't allow SP to be loaded unless it is also the base register. It
13432 guarantees that SP is reset correctly when an LDM instruction
13433 is interrupted. Otherwise, we might end up with a corrupt stack. */
13434 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13435 || !MEM_P (mem)
13436 || GET_MODE (mem) != mode
13437 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13438 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13439 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13440 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13441 offset + (i - base) * reg_increment))
13442 && (!REG_P (XEXP (mem, 0))
13443 || offset + (i - base) * reg_increment != 0)))
13444 return false;
13446 regno = REGNO (reg);
13447 if (regno == REGNO (addr))
13448 addr_reg_in_reglist = true;
13451 if (load)
13453 if (update && addr_reg_in_reglist)
13454 return false;
13456 /* For Thumb-1, address register is always modified - either by write-back
13457 or by explicit load. If the pattern does not describe an update,
13458 then the address register must be in the list of loaded registers. */
13459 if (TARGET_THUMB1)
13460 return update || addr_reg_in_reglist;
13463 return true;
13466 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13467 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13468 instruction. ADD_OFFSET is nonzero if the base address register needs
13469 to be modified with an add instruction before we can use it. */
13471 static bool
13472 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13473 int nops, HOST_WIDE_INT add_offset)
13475 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13476 if the offset isn't small enough. The reason 2 ldrs are faster
13477 is because these ARMs are able to do more than one cache access
13478 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13479 whilst the ARM8 has a double bandwidth cache. This means that
13480 these cores can do both an instruction fetch and a data fetch in
13481 a single cycle, so the trick of calculating the address into a
13482 scratch register (one of the result regs) and then doing a load
13483 multiple actually becomes slower (and no smaller in code size).
13484 That is the transformation
13486 ldr rd1, [rbase + offset]
13487 ldr rd2, [rbase + offset + 4]
13491 add rd1, rbase, offset
13492 ldmia rd1, {rd1, rd2}
13494 produces worse code -- '3 cycles + any stalls on rd2' instead of
13495 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13496 access per cycle, the first sequence could never complete in less
13497 than 6 cycles, whereas the ldm sequence would only take 5 and
13498 would make better use of sequential accesses if not hitting the
13499 cache.
13501 We cheat here and test 'arm_ld_sched' which we currently know to
13502 only be true for the ARM8, ARM9 and StrongARM. If this ever
13503 changes, then the test below needs to be reworked. */
13504 if (nops == 2 && arm_ld_sched && add_offset != 0)
13505 return false;
13507 /* XScale has load-store double instructions, but they have stricter
13508 alignment requirements than load-store multiple, so we cannot
13509 use them.
13511 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13512 the pipeline until completion.
13514 NREGS CYCLES
13520 An ldr instruction takes 1-3 cycles, but does not block the
13521 pipeline.
13523 NREGS CYCLES
13524 1 1-3
13525 2 2-6
13526 3 3-9
13527 4 4-12
13529 Best case ldr will always win. However, the more ldr instructions
13530 we issue, the less likely we are to be able to schedule them well.
13531 Using ldr instructions also increases code size.
13533 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13534 for counts of 3 or 4 regs. */
13535 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13536 return false;
13537 return true;
13540 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13541 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13542 an array ORDER which describes the sequence to use when accessing the
13543 offsets that produces an ascending order. In this sequence, each
13544 offset must be larger by exactly 4 than the previous one. ORDER[0]
13545 must have been filled in with the lowest offset by the caller.
13546 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13547 we use to verify that ORDER produces an ascending order of registers.
13548 Return true if it was possible to construct such an order, false if
13549 not. */
13551 static bool
13552 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13553 int *unsorted_regs)
13555 int i;
13556 for (i = 1; i < nops; i++)
13558 int j;
13560 order[i] = order[i - 1];
13561 for (j = 0; j < nops; j++)
13562 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13564 /* We must find exactly one offset that is higher than the
13565 previous one by 4. */
13566 if (order[i] != order[i - 1])
13567 return false;
13568 order[i] = j;
13570 if (order[i] == order[i - 1])
13571 return false;
13572 /* The register numbers must be ascending. */
13573 if (unsorted_regs != NULL
13574 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13575 return false;
13577 return true;
13580 /* Used to determine in a peephole whether a sequence of load
13581 instructions can be changed into a load-multiple instruction.
13582 NOPS is the number of separate load instructions we are examining. The
13583 first NOPS entries in OPERANDS are the destination registers, the
13584 next NOPS entries are memory operands. If this function is
13585 successful, *BASE is set to the common base register of the memory
13586 accesses; *LOAD_OFFSET is set to the first memory location's offset
13587 from that base register.
13588 REGS is an array filled in with the destination register numbers.
13589 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13590 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13591 the sequence of registers in REGS matches the loads from ascending memory
13592 locations, and the function verifies that the register numbers are
13593 themselves ascending. If CHECK_REGS is false, the register numbers
13594 are stored in the order they are found in the operands. */
13595 static int
13596 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13597 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13599 int unsorted_regs[MAX_LDM_STM_OPS];
13600 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13601 int order[MAX_LDM_STM_OPS];
13602 rtx base_reg_rtx = NULL;
13603 int base_reg = -1;
13604 int i, ldm_case;
13606 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13607 easily extended if required. */
13608 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13610 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13612 /* Loop over the operands and check that the memory references are
13613 suitable (i.e. immediate offsets from the same base register). At
13614 the same time, extract the target register, and the memory
13615 offsets. */
13616 for (i = 0; i < nops; i++)
13618 rtx reg;
13619 rtx offset;
13621 /* Convert a subreg of a mem into the mem itself. */
13622 if (GET_CODE (operands[nops + i]) == SUBREG)
13623 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13625 gcc_assert (MEM_P (operands[nops + i]));
13627 /* Don't reorder volatile memory references; it doesn't seem worth
13628 looking for the case where the order is ok anyway. */
13629 if (MEM_VOLATILE_P (operands[nops + i]))
13630 return 0;
13632 offset = const0_rtx;
13634 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13635 || (GET_CODE (reg) == SUBREG
13636 && REG_P (reg = SUBREG_REG (reg))))
13637 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13638 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13639 || (GET_CODE (reg) == SUBREG
13640 && REG_P (reg = SUBREG_REG (reg))))
13641 && (CONST_INT_P (offset
13642 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13644 if (i == 0)
13646 base_reg = REGNO (reg);
13647 base_reg_rtx = reg;
13648 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13649 return 0;
13651 else if (base_reg != (int) REGNO (reg))
13652 /* Not addressed from the same base register. */
13653 return 0;
13655 unsorted_regs[i] = (REG_P (operands[i])
13656 ? REGNO (operands[i])
13657 : REGNO (SUBREG_REG (operands[i])));
13659 /* If it isn't an integer register, or if it overwrites the
13660 base register but isn't the last insn in the list, then
13661 we can't do this. */
13662 if (unsorted_regs[i] < 0
13663 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13664 || unsorted_regs[i] > 14
13665 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13666 return 0;
13668 /* Don't allow SP to be loaded unless it is also the base
13669 register. It guarantees that SP is reset correctly when
13670 an LDM instruction is interrupted. Otherwise, we might
13671 end up with a corrupt stack. */
13672 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13673 return 0;
13675 unsorted_offsets[i] = INTVAL (offset);
13676 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13677 order[0] = i;
13679 else
13680 /* Not a suitable memory address. */
13681 return 0;
13684 /* All the useful information has now been extracted from the
13685 operands into unsorted_regs and unsorted_offsets; additionally,
13686 order[0] has been set to the lowest offset in the list. Sort
13687 the offsets into order, verifying that they are adjacent, and
13688 check that the register numbers are ascending. */
13689 if (!compute_offset_order (nops, unsorted_offsets, order,
13690 check_regs ? unsorted_regs : NULL))
13691 return 0;
13693 if (saved_order)
13694 memcpy (saved_order, order, sizeof order);
13696 if (base)
13698 *base = base_reg;
13700 for (i = 0; i < nops; i++)
13701 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13703 *load_offset = unsorted_offsets[order[0]];
13706 if (TARGET_THUMB1
13707 && !peep2_reg_dead_p (nops, base_reg_rtx))
13708 return 0;
13710 if (unsorted_offsets[order[0]] == 0)
13711 ldm_case = 1; /* ldmia */
13712 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13713 ldm_case = 2; /* ldmib */
13714 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13715 ldm_case = 3; /* ldmda */
13716 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13717 ldm_case = 4; /* ldmdb */
13718 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13719 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13720 ldm_case = 5;
13721 else
13722 return 0;
13724 if (!multiple_operation_profitable_p (false, nops,
13725 ldm_case == 5
13726 ? unsorted_offsets[order[0]] : 0))
13727 return 0;
13729 return ldm_case;
13732 /* Used to determine in a peephole whether a sequence of store instructions can
13733 be changed into a store-multiple instruction.
13734 NOPS is the number of separate store instructions we are examining.
13735 NOPS_TOTAL is the total number of instructions recognized by the peephole
13736 pattern.
13737 The first NOPS entries in OPERANDS are the source registers, the next
13738 NOPS entries are memory operands. If this function is successful, *BASE is
13739 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13740 to the first memory location's offset from that base register. REGS is an
13741 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13742 likewise filled with the corresponding rtx's.
13743 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13744 numbers to an ascending order of stores.
13745 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13746 from ascending memory locations, and the function verifies that the register
13747 numbers are themselves ascending. If CHECK_REGS is false, the register
13748 numbers are stored in the order they are found in the operands. */
13749 static int
13750 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13751 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13752 HOST_WIDE_INT *load_offset, bool check_regs)
13754 int unsorted_regs[MAX_LDM_STM_OPS];
13755 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13756 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13757 int order[MAX_LDM_STM_OPS];
13758 int base_reg = -1;
13759 rtx base_reg_rtx = NULL;
13760 int i, stm_case;
13762 /* Write back of base register is currently only supported for Thumb 1. */
13763 int base_writeback = TARGET_THUMB1;
13765 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13766 easily extended if required. */
13767 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13769 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13771 /* Loop over the operands and check that the memory references are
13772 suitable (i.e. immediate offsets from the same base register). At
13773 the same time, extract the target register, and the memory
13774 offsets. */
13775 for (i = 0; i < nops; i++)
13777 rtx reg;
13778 rtx offset;
13780 /* Convert a subreg of a mem into the mem itself. */
13781 if (GET_CODE (operands[nops + i]) == SUBREG)
13782 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13784 gcc_assert (MEM_P (operands[nops + i]));
13786 /* Don't reorder volatile memory references; it doesn't seem worth
13787 looking for the case where the order is ok anyway. */
13788 if (MEM_VOLATILE_P (operands[nops + i]))
13789 return 0;
13791 offset = const0_rtx;
13793 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13794 || (GET_CODE (reg) == SUBREG
13795 && REG_P (reg = SUBREG_REG (reg))))
13796 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13797 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13798 || (GET_CODE (reg) == SUBREG
13799 && REG_P (reg = SUBREG_REG (reg))))
13800 && (CONST_INT_P (offset
13801 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13803 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13804 ? operands[i] : SUBREG_REG (operands[i]));
13805 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13807 if (i == 0)
13809 base_reg = REGNO (reg);
13810 base_reg_rtx = reg;
13811 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13812 return 0;
13814 else if (base_reg != (int) REGNO (reg))
13815 /* Not addressed from the same base register. */
13816 return 0;
13818 /* If it isn't an integer register, then we can't do this. */
13819 if (unsorted_regs[i] < 0
13820 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13821 /* The effects are unpredictable if the base register is
13822 both updated and stored. */
13823 || (base_writeback && unsorted_regs[i] == base_reg)
13824 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13825 || unsorted_regs[i] > 14)
13826 return 0;
13828 unsorted_offsets[i] = INTVAL (offset);
13829 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13830 order[0] = i;
13832 else
13833 /* Not a suitable memory address. */
13834 return 0;
13837 /* All the useful information has now been extracted from the
13838 operands into unsorted_regs and unsorted_offsets; additionally,
13839 order[0] has been set to the lowest offset in the list. Sort
13840 the offsets into order, verifying that they are adjacent, and
13841 check that the register numbers are ascending. */
13842 if (!compute_offset_order (nops, unsorted_offsets, order,
13843 check_regs ? unsorted_regs : NULL))
13844 return 0;
13846 if (saved_order)
13847 memcpy (saved_order, order, sizeof order);
13849 if (base)
13851 *base = base_reg;
13853 for (i = 0; i < nops; i++)
13855 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13856 if (reg_rtxs)
13857 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13860 *load_offset = unsorted_offsets[order[0]];
13863 if (TARGET_THUMB1
13864 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13865 return 0;
13867 if (unsorted_offsets[order[0]] == 0)
13868 stm_case = 1; /* stmia */
13869 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13870 stm_case = 2; /* stmib */
13871 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13872 stm_case = 3; /* stmda */
13873 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13874 stm_case = 4; /* stmdb */
13875 else
13876 return 0;
13878 if (!multiple_operation_profitable_p (false, nops, 0))
13879 return 0;
13881 return stm_case;
13884 /* Routines for use in generating RTL. */
13886 /* Generate a load-multiple instruction. COUNT is the number of loads in
13887 the instruction; REGS and MEMS are arrays containing the operands.
13888 BASEREG is the base register to be used in addressing the memory operands.
13889 WBACK_OFFSET is nonzero if the instruction should update the base
13890 register. */
13892 static rtx
13893 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13894 HOST_WIDE_INT wback_offset)
13896 int i = 0, j;
13897 rtx result;
13899 if (!multiple_operation_profitable_p (false, count, 0))
13901 rtx seq;
13903 start_sequence ();
13905 for (i = 0; i < count; i++)
13906 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13908 if (wback_offset != 0)
13909 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13911 seq = get_insns ();
13912 end_sequence ();
13914 return seq;
13917 result = gen_rtx_PARALLEL (VOIDmode,
13918 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13919 if (wback_offset != 0)
13921 XVECEXP (result, 0, 0)
13922 = gen_rtx_SET (VOIDmode, basereg,
13923 plus_constant (Pmode, basereg, wback_offset));
13924 i = 1;
13925 count++;
13928 for (j = 0; i < count; i++, j++)
13929 XVECEXP (result, 0, i)
13930 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13932 return result;
13935 /* Generate a store-multiple instruction. COUNT is the number of stores in
13936 the instruction; REGS and MEMS are arrays containing the operands.
13937 BASEREG is the base register to be used in addressing the memory operands.
13938 WBACK_OFFSET is nonzero if the instruction should update the base
13939 register. */
13941 static rtx
13942 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13943 HOST_WIDE_INT wback_offset)
13945 int i = 0, j;
13946 rtx result;
13948 if (GET_CODE (basereg) == PLUS)
13949 basereg = XEXP (basereg, 0);
13951 if (!multiple_operation_profitable_p (false, count, 0))
13953 rtx seq;
13955 start_sequence ();
13957 for (i = 0; i < count; i++)
13958 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13960 if (wback_offset != 0)
13961 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13963 seq = get_insns ();
13964 end_sequence ();
13966 return seq;
13969 result = gen_rtx_PARALLEL (VOIDmode,
13970 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13971 if (wback_offset != 0)
13973 XVECEXP (result, 0, 0)
13974 = gen_rtx_SET (VOIDmode, basereg,
13975 plus_constant (Pmode, basereg, wback_offset));
13976 i = 1;
13977 count++;
13980 for (j = 0; i < count; i++, j++)
13981 XVECEXP (result, 0, i)
13982 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13984 return result;
13987 /* Generate either a load-multiple or a store-multiple instruction. This
13988 function can be used in situations where we can start with a single MEM
13989 rtx and adjust its address upwards.
13990 COUNT is the number of operations in the instruction, not counting a
13991 possible update of the base register. REGS is an array containing the
13992 register operands.
13993 BASEREG is the base register to be used in addressing the memory operands,
13994 which are constructed from BASEMEM.
13995 WRITE_BACK specifies whether the generated instruction should include an
13996 update of the base register.
13997 OFFSETP is used to pass an offset to and from this function; this offset
13998 is not used when constructing the address (instead BASEMEM should have an
13999 appropriate offset in its address), it is used only for setting
14000 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14002 static rtx
14003 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14004 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14006 rtx mems[MAX_LDM_STM_OPS];
14007 HOST_WIDE_INT offset = *offsetp;
14008 int i;
14010 gcc_assert (count <= MAX_LDM_STM_OPS);
14012 if (GET_CODE (basereg) == PLUS)
14013 basereg = XEXP (basereg, 0);
14015 for (i = 0; i < count; i++)
14017 rtx addr = plus_constant (Pmode, basereg, i * 4);
14018 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14019 offset += 4;
14022 if (write_back)
14023 *offsetp = offset;
14025 if (is_load)
14026 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14027 write_back ? 4 * count : 0);
14028 else
14029 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14030 write_back ? 4 * count : 0);
14034 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14035 rtx basemem, HOST_WIDE_INT *offsetp)
14037 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14038 offsetp);
14042 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14043 rtx basemem, HOST_WIDE_INT *offsetp)
14045 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14046 offsetp);
14049 /* Called from a peephole2 expander to turn a sequence of loads into an
14050 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14051 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14052 is true if we can reorder the registers because they are used commutatively
14053 subsequently.
14054 Returns true iff we could generate a new instruction. */
14056 bool
14057 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14059 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14060 rtx mems[MAX_LDM_STM_OPS];
14061 int i, j, base_reg;
14062 rtx base_reg_rtx;
14063 HOST_WIDE_INT offset;
14064 int write_back = FALSE;
14065 int ldm_case;
14066 rtx addr;
14068 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14069 &base_reg, &offset, !sort_regs);
14071 if (ldm_case == 0)
14072 return false;
14074 if (sort_regs)
14075 for (i = 0; i < nops - 1; i++)
14076 for (j = i + 1; j < nops; j++)
14077 if (regs[i] > regs[j])
14079 int t = regs[i];
14080 regs[i] = regs[j];
14081 regs[j] = t;
14083 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14085 if (TARGET_THUMB1)
14087 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14088 gcc_assert (ldm_case == 1 || ldm_case == 5);
14089 write_back = TRUE;
14092 if (ldm_case == 5)
14094 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14095 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14096 offset = 0;
14097 if (!TARGET_THUMB1)
14099 base_reg = regs[0];
14100 base_reg_rtx = newbase;
14104 for (i = 0; i < nops; i++)
14106 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14107 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14108 SImode, addr, 0);
14110 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14111 write_back ? offset + i * 4 : 0));
14112 return true;
14115 /* Called from a peephole2 expander to turn a sequence of stores into an
14116 STM instruction. OPERANDS are the operands found by the peephole matcher;
14117 NOPS indicates how many separate stores we are trying to combine.
14118 Returns true iff we could generate a new instruction. */
14120 bool
14121 gen_stm_seq (rtx *operands, int nops)
14123 int i;
14124 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14125 rtx mems[MAX_LDM_STM_OPS];
14126 int base_reg;
14127 rtx base_reg_rtx;
14128 HOST_WIDE_INT offset;
14129 int write_back = FALSE;
14130 int stm_case;
14131 rtx addr;
14132 bool base_reg_dies;
14134 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14135 mem_order, &base_reg, &offset, true);
14137 if (stm_case == 0)
14138 return false;
14140 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14142 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14143 if (TARGET_THUMB1)
14145 gcc_assert (base_reg_dies);
14146 write_back = TRUE;
14149 if (stm_case == 5)
14151 gcc_assert (base_reg_dies);
14152 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14153 offset = 0;
14156 addr = plus_constant (Pmode, base_reg_rtx, offset);
14158 for (i = 0; i < nops; i++)
14160 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14161 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14162 SImode, addr, 0);
14164 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14165 write_back ? offset + i * 4 : 0));
14166 return true;
14169 /* Called from a peephole2 expander to turn a sequence of stores that are
14170 preceded by constant loads into an STM instruction. OPERANDS are the
14171 operands found by the peephole matcher; NOPS indicates how many
14172 separate stores we are trying to combine; there are 2 * NOPS
14173 instructions in the peephole.
14174 Returns true iff we could generate a new instruction. */
14176 bool
14177 gen_const_stm_seq (rtx *operands, int nops)
14179 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14180 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14181 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14182 rtx mems[MAX_LDM_STM_OPS];
14183 int base_reg;
14184 rtx base_reg_rtx;
14185 HOST_WIDE_INT offset;
14186 int write_back = FALSE;
14187 int stm_case;
14188 rtx addr;
14189 bool base_reg_dies;
14190 int i, j;
14191 HARD_REG_SET allocated;
14193 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14194 mem_order, &base_reg, &offset, false);
14196 if (stm_case == 0)
14197 return false;
14199 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14201 /* If the same register is used more than once, try to find a free
14202 register. */
14203 CLEAR_HARD_REG_SET (allocated);
14204 for (i = 0; i < nops; i++)
14206 for (j = i + 1; j < nops; j++)
14207 if (regs[i] == regs[j])
14209 rtx t = peep2_find_free_register (0, nops * 2,
14210 TARGET_THUMB1 ? "l" : "r",
14211 SImode, &allocated);
14212 if (t == NULL_RTX)
14213 return false;
14214 reg_rtxs[i] = t;
14215 regs[i] = REGNO (t);
14219 /* Compute an ordering that maps the register numbers to an ascending
14220 sequence. */
14221 reg_order[0] = 0;
14222 for (i = 0; i < nops; i++)
14223 if (regs[i] < regs[reg_order[0]])
14224 reg_order[0] = i;
14226 for (i = 1; i < nops; i++)
14228 int this_order = reg_order[i - 1];
14229 for (j = 0; j < nops; j++)
14230 if (regs[j] > regs[reg_order[i - 1]]
14231 && (this_order == reg_order[i - 1]
14232 || regs[j] < regs[this_order]))
14233 this_order = j;
14234 reg_order[i] = this_order;
14237 /* Ensure that registers that must be live after the instruction end
14238 up with the correct value. */
14239 for (i = 0; i < nops; i++)
14241 int this_order = reg_order[i];
14242 if ((this_order != mem_order[i]
14243 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14244 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14245 return false;
14248 /* Load the constants. */
14249 for (i = 0; i < nops; i++)
14251 rtx op = operands[2 * nops + mem_order[i]];
14252 sorted_regs[i] = regs[reg_order[i]];
14253 emit_move_insn (reg_rtxs[reg_order[i]], op);
14256 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14258 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14259 if (TARGET_THUMB1)
14261 gcc_assert (base_reg_dies);
14262 write_back = TRUE;
14265 if (stm_case == 5)
14267 gcc_assert (base_reg_dies);
14268 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14269 offset = 0;
14272 addr = plus_constant (Pmode, base_reg_rtx, offset);
14274 for (i = 0; i < nops; i++)
14276 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14277 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14278 SImode, addr, 0);
14280 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14281 write_back ? offset + i * 4 : 0));
14282 return true;
14285 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14286 unaligned copies on processors which support unaligned semantics for those
14287 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14288 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14289 An interleave factor of 1 (the minimum) will perform no interleaving.
14290 Load/store multiple are used for aligned addresses where possible. */
14292 static void
14293 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14294 HOST_WIDE_INT length,
14295 unsigned int interleave_factor)
14297 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14298 int *regnos = XALLOCAVEC (int, interleave_factor);
14299 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14300 HOST_WIDE_INT i, j;
14301 HOST_WIDE_INT remaining = length, words;
14302 rtx halfword_tmp = NULL, byte_tmp = NULL;
14303 rtx dst, src;
14304 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14305 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14306 HOST_WIDE_INT srcoffset, dstoffset;
14307 HOST_WIDE_INT src_autoinc, dst_autoinc;
14308 rtx mem, addr;
14310 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14312 /* Use hard registers if we have aligned source or destination so we can use
14313 load/store multiple with contiguous registers. */
14314 if (dst_aligned || src_aligned)
14315 for (i = 0; i < interleave_factor; i++)
14316 regs[i] = gen_rtx_REG (SImode, i);
14317 else
14318 for (i = 0; i < interleave_factor; i++)
14319 regs[i] = gen_reg_rtx (SImode);
14321 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14322 src = copy_addr_to_reg (XEXP (srcbase, 0));
14324 srcoffset = dstoffset = 0;
14326 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14327 For copying the last bytes we want to subtract this offset again. */
14328 src_autoinc = dst_autoinc = 0;
14330 for (i = 0; i < interleave_factor; i++)
14331 regnos[i] = i;
14333 /* Copy BLOCK_SIZE_BYTES chunks. */
14335 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14337 /* Load words. */
14338 if (src_aligned && interleave_factor > 1)
14340 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14341 TRUE, srcbase, &srcoffset));
14342 src_autoinc += UNITS_PER_WORD * interleave_factor;
14344 else
14346 for (j = 0; j < interleave_factor; j++)
14348 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14349 - src_autoinc));
14350 mem = adjust_automodify_address (srcbase, SImode, addr,
14351 srcoffset + j * UNITS_PER_WORD);
14352 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14354 srcoffset += block_size_bytes;
14357 /* Store words. */
14358 if (dst_aligned && interleave_factor > 1)
14360 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14361 TRUE, dstbase, &dstoffset));
14362 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14364 else
14366 for (j = 0; j < interleave_factor; j++)
14368 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14369 - dst_autoinc));
14370 mem = adjust_automodify_address (dstbase, SImode, addr,
14371 dstoffset + j * UNITS_PER_WORD);
14372 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14374 dstoffset += block_size_bytes;
14377 remaining -= block_size_bytes;
14380 /* Copy any whole words left (note these aren't interleaved with any
14381 subsequent halfword/byte load/stores in the interests of simplicity). */
14383 words = remaining / UNITS_PER_WORD;
14385 gcc_assert (words < interleave_factor);
14387 if (src_aligned && words > 1)
14389 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14390 &srcoffset));
14391 src_autoinc += UNITS_PER_WORD * words;
14393 else
14395 for (j = 0; j < words; j++)
14397 addr = plus_constant (Pmode, src,
14398 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14399 mem = adjust_automodify_address (srcbase, SImode, addr,
14400 srcoffset + j * UNITS_PER_WORD);
14401 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14403 srcoffset += words * UNITS_PER_WORD;
14406 if (dst_aligned && words > 1)
14408 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14409 &dstoffset));
14410 dst_autoinc += words * UNITS_PER_WORD;
14412 else
14414 for (j = 0; j < words; j++)
14416 addr = plus_constant (Pmode, dst,
14417 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14418 mem = adjust_automodify_address (dstbase, SImode, addr,
14419 dstoffset + j * UNITS_PER_WORD);
14420 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14422 dstoffset += words * UNITS_PER_WORD;
14425 remaining -= words * UNITS_PER_WORD;
14427 gcc_assert (remaining < 4);
14429 /* Copy a halfword if necessary. */
14431 if (remaining >= 2)
14433 halfword_tmp = gen_reg_rtx (SImode);
14435 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14436 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14437 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14439 /* Either write out immediately, or delay until we've loaded the last
14440 byte, depending on interleave factor. */
14441 if (interleave_factor == 1)
14443 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14444 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14445 emit_insn (gen_unaligned_storehi (mem,
14446 gen_lowpart (HImode, halfword_tmp)));
14447 halfword_tmp = NULL;
14448 dstoffset += 2;
14451 remaining -= 2;
14452 srcoffset += 2;
14455 gcc_assert (remaining < 2);
14457 /* Copy last byte. */
14459 if ((remaining & 1) != 0)
14461 byte_tmp = gen_reg_rtx (SImode);
14463 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14464 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14465 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14467 if (interleave_factor == 1)
14469 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14470 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14471 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14472 byte_tmp = NULL;
14473 dstoffset++;
14476 remaining--;
14477 srcoffset++;
14480 /* Store last halfword if we haven't done so already. */
14482 if (halfword_tmp)
14484 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14485 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14486 emit_insn (gen_unaligned_storehi (mem,
14487 gen_lowpart (HImode, halfword_tmp)));
14488 dstoffset += 2;
14491 /* Likewise for last byte. */
14493 if (byte_tmp)
14495 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14496 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14497 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14498 dstoffset++;
14501 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14504 /* From mips_adjust_block_mem:
14506 Helper function for doing a loop-based block operation on memory
14507 reference MEM. Each iteration of the loop will operate on LENGTH
14508 bytes of MEM.
14510 Create a new base register for use within the loop and point it to
14511 the start of MEM. Create a new memory reference that uses this
14512 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14514 static void
14515 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14516 rtx *loop_mem)
14518 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14520 /* Although the new mem does not refer to a known location,
14521 it does keep up to LENGTH bytes of alignment. */
14522 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14523 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14526 /* From mips_block_move_loop:
14528 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14529 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14530 the memory regions do not overlap. */
14532 static void
14533 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14534 unsigned int interleave_factor,
14535 HOST_WIDE_INT bytes_per_iter)
14537 rtx src_reg, dest_reg, final_src, test;
14538 HOST_WIDE_INT leftover;
14540 leftover = length % bytes_per_iter;
14541 length -= leftover;
14543 /* Create registers and memory references for use within the loop. */
14544 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14545 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14547 /* Calculate the value that SRC_REG should have after the last iteration of
14548 the loop. */
14549 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14550 0, 0, OPTAB_WIDEN);
14552 /* Emit the start of the loop. */
14553 rtx_code_label *label = gen_label_rtx ();
14554 emit_label (label);
14556 /* Emit the loop body. */
14557 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14558 interleave_factor);
14560 /* Move on to the next block. */
14561 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14562 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14564 /* Emit the loop condition. */
14565 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14566 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14568 /* Mop up any left-over bytes. */
14569 if (leftover)
14570 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14573 /* Emit a block move when either the source or destination is unaligned (not
14574 aligned to a four-byte boundary). This may need further tuning depending on
14575 core type, optimize_size setting, etc. */
14577 static int
14578 arm_movmemqi_unaligned (rtx *operands)
14580 HOST_WIDE_INT length = INTVAL (operands[2]);
14582 if (optimize_size)
14584 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14585 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14586 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14587 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14588 or dst_aligned though: allow more interleaving in those cases since the
14589 resulting code can be smaller. */
14590 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14591 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14593 if (length > 12)
14594 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14595 interleave_factor, bytes_per_iter);
14596 else
14597 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14598 interleave_factor);
14600 else
14602 /* Note that the loop created by arm_block_move_unaligned_loop may be
14603 subject to loop unrolling, which makes tuning this condition a little
14604 redundant. */
14605 if (length > 32)
14606 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14607 else
14608 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14611 return 1;
14615 arm_gen_movmemqi (rtx *operands)
14617 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14618 HOST_WIDE_INT srcoffset, dstoffset;
14619 int i;
14620 rtx src, dst, srcbase, dstbase;
14621 rtx part_bytes_reg = NULL;
14622 rtx mem;
14624 if (!CONST_INT_P (operands[2])
14625 || !CONST_INT_P (operands[3])
14626 || INTVAL (operands[2]) > 64)
14627 return 0;
14629 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14630 return arm_movmemqi_unaligned (operands);
14632 if (INTVAL (operands[3]) & 3)
14633 return 0;
14635 dstbase = operands[0];
14636 srcbase = operands[1];
14638 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14639 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14641 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14642 out_words_to_go = INTVAL (operands[2]) / 4;
14643 last_bytes = INTVAL (operands[2]) & 3;
14644 dstoffset = srcoffset = 0;
14646 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14647 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14649 for (i = 0; in_words_to_go >= 2; i+=4)
14651 if (in_words_to_go > 4)
14652 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14653 TRUE, srcbase, &srcoffset));
14654 else
14655 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14656 src, FALSE, srcbase,
14657 &srcoffset));
14659 if (out_words_to_go)
14661 if (out_words_to_go > 4)
14662 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14663 TRUE, dstbase, &dstoffset));
14664 else if (out_words_to_go != 1)
14665 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14666 out_words_to_go, dst,
14667 (last_bytes == 0
14668 ? FALSE : TRUE),
14669 dstbase, &dstoffset));
14670 else
14672 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14673 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14674 if (last_bytes != 0)
14676 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14677 dstoffset += 4;
14682 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14683 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14686 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14687 if (out_words_to_go)
14689 rtx sreg;
14691 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14692 sreg = copy_to_reg (mem);
14694 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14695 emit_move_insn (mem, sreg);
14696 in_words_to_go--;
14698 gcc_assert (!in_words_to_go); /* Sanity check */
14701 if (in_words_to_go)
14703 gcc_assert (in_words_to_go > 0);
14705 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14706 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14709 gcc_assert (!last_bytes || part_bytes_reg);
14711 if (BYTES_BIG_ENDIAN && last_bytes)
14713 rtx tmp = gen_reg_rtx (SImode);
14715 /* The bytes we want are in the top end of the word. */
14716 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14717 GEN_INT (8 * (4 - last_bytes))));
14718 part_bytes_reg = tmp;
14720 while (last_bytes)
14722 mem = adjust_automodify_address (dstbase, QImode,
14723 plus_constant (Pmode, dst,
14724 last_bytes - 1),
14725 dstoffset + last_bytes - 1);
14726 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14728 if (--last_bytes)
14730 tmp = gen_reg_rtx (SImode);
14731 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14732 part_bytes_reg = tmp;
14737 else
14739 if (last_bytes > 1)
14741 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14742 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14743 last_bytes -= 2;
14744 if (last_bytes)
14746 rtx tmp = gen_reg_rtx (SImode);
14747 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14748 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14749 part_bytes_reg = tmp;
14750 dstoffset += 2;
14754 if (last_bytes)
14756 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14757 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14761 return 1;
14764 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14765 by mode size. */
14766 inline static rtx
14767 next_consecutive_mem (rtx mem)
14769 machine_mode mode = GET_MODE (mem);
14770 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14771 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14773 return adjust_automodify_address (mem, mode, addr, offset);
14776 /* Copy using LDRD/STRD instructions whenever possible.
14777 Returns true upon success. */
14778 bool
14779 gen_movmem_ldrd_strd (rtx *operands)
14781 unsigned HOST_WIDE_INT len;
14782 HOST_WIDE_INT align;
14783 rtx src, dst, base;
14784 rtx reg0;
14785 bool src_aligned, dst_aligned;
14786 bool src_volatile, dst_volatile;
14788 gcc_assert (CONST_INT_P (operands[2]));
14789 gcc_assert (CONST_INT_P (operands[3]));
14791 len = UINTVAL (operands[2]);
14792 if (len > 64)
14793 return false;
14795 /* Maximum alignment we can assume for both src and dst buffers. */
14796 align = INTVAL (operands[3]);
14798 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14799 return false;
14801 /* Place src and dst addresses in registers
14802 and update the corresponding mem rtx. */
14803 dst = operands[0];
14804 dst_volatile = MEM_VOLATILE_P (dst);
14805 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14806 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14807 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14809 src = operands[1];
14810 src_volatile = MEM_VOLATILE_P (src);
14811 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14812 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14813 src = adjust_automodify_address (src, VOIDmode, base, 0);
14815 if (!unaligned_access && !(src_aligned && dst_aligned))
14816 return false;
14818 if (src_volatile || dst_volatile)
14819 return false;
14821 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14822 if (!(dst_aligned || src_aligned))
14823 return arm_gen_movmemqi (operands);
14825 src = adjust_address (src, DImode, 0);
14826 dst = adjust_address (dst, DImode, 0);
14827 while (len >= 8)
14829 len -= 8;
14830 reg0 = gen_reg_rtx (DImode);
14831 if (src_aligned)
14832 emit_move_insn (reg0, src);
14833 else
14834 emit_insn (gen_unaligned_loaddi (reg0, src));
14836 if (dst_aligned)
14837 emit_move_insn (dst, reg0);
14838 else
14839 emit_insn (gen_unaligned_storedi (dst, reg0));
14841 src = next_consecutive_mem (src);
14842 dst = next_consecutive_mem (dst);
14845 gcc_assert (len < 8);
14846 if (len >= 4)
14848 /* More than a word but less than a double-word to copy. Copy a word. */
14849 reg0 = gen_reg_rtx (SImode);
14850 src = adjust_address (src, SImode, 0);
14851 dst = adjust_address (dst, SImode, 0);
14852 if (src_aligned)
14853 emit_move_insn (reg0, src);
14854 else
14855 emit_insn (gen_unaligned_loadsi (reg0, src));
14857 if (dst_aligned)
14858 emit_move_insn (dst, reg0);
14859 else
14860 emit_insn (gen_unaligned_storesi (dst, reg0));
14862 src = next_consecutive_mem (src);
14863 dst = next_consecutive_mem (dst);
14864 len -= 4;
14867 if (len == 0)
14868 return true;
14870 /* Copy the remaining bytes. */
14871 if (len >= 2)
14873 dst = adjust_address (dst, HImode, 0);
14874 src = adjust_address (src, HImode, 0);
14875 reg0 = gen_reg_rtx (SImode);
14876 if (src_aligned)
14877 emit_insn (gen_zero_extendhisi2 (reg0, src));
14878 else
14879 emit_insn (gen_unaligned_loadhiu (reg0, src));
14881 if (dst_aligned)
14882 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14883 else
14884 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14886 src = next_consecutive_mem (src);
14887 dst = next_consecutive_mem (dst);
14888 if (len == 2)
14889 return true;
14892 dst = adjust_address (dst, QImode, 0);
14893 src = adjust_address (src, QImode, 0);
14894 reg0 = gen_reg_rtx (QImode);
14895 emit_move_insn (reg0, src);
14896 emit_move_insn (dst, reg0);
14897 return true;
14900 /* Select a dominance comparison mode if possible for a test of the general
14901 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14902 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14903 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14904 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14905 In all cases OP will be either EQ or NE, but we don't need to know which
14906 here. If we are unable to support a dominance comparison we return
14907 CC mode. This will then fail to match for the RTL expressions that
14908 generate this call. */
14909 machine_mode
14910 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14912 enum rtx_code cond1, cond2;
14913 int swapped = 0;
14915 /* Currently we will probably get the wrong result if the individual
14916 comparisons are not simple. This also ensures that it is safe to
14917 reverse a comparison if necessary. */
14918 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14919 != CCmode)
14920 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14921 != CCmode))
14922 return CCmode;
14924 /* The if_then_else variant of this tests the second condition if the
14925 first passes, but is true if the first fails. Reverse the first
14926 condition to get a true "inclusive-or" expression. */
14927 if (cond_or == DOM_CC_NX_OR_Y)
14928 cond1 = reverse_condition (cond1);
14930 /* If the comparisons are not equal, and one doesn't dominate the other,
14931 then we can't do this. */
14932 if (cond1 != cond2
14933 && !comparison_dominates_p (cond1, cond2)
14934 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14935 return CCmode;
14937 if (swapped)
14939 enum rtx_code temp = cond1;
14940 cond1 = cond2;
14941 cond2 = temp;
14944 switch (cond1)
14946 case EQ:
14947 if (cond_or == DOM_CC_X_AND_Y)
14948 return CC_DEQmode;
14950 switch (cond2)
14952 case EQ: return CC_DEQmode;
14953 case LE: return CC_DLEmode;
14954 case LEU: return CC_DLEUmode;
14955 case GE: return CC_DGEmode;
14956 case GEU: return CC_DGEUmode;
14957 default: gcc_unreachable ();
14960 case LT:
14961 if (cond_or == DOM_CC_X_AND_Y)
14962 return CC_DLTmode;
14964 switch (cond2)
14966 case LT:
14967 return CC_DLTmode;
14968 case LE:
14969 return CC_DLEmode;
14970 case NE:
14971 return CC_DNEmode;
14972 default:
14973 gcc_unreachable ();
14976 case GT:
14977 if (cond_or == DOM_CC_X_AND_Y)
14978 return CC_DGTmode;
14980 switch (cond2)
14982 case GT:
14983 return CC_DGTmode;
14984 case GE:
14985 return CC_DGEmode;
14986 case NE:
14987 return CC_DNEmode;
14988 default:
14989 gcc_unreachable ();
14992 case LTU:
14993 if (cond_or == DOM_CC_X_AND_Y)
14994 return CC_DLTUmode;
14996 switch (cond2)
14998 case LTU:
14999 return CC_DLTUmode;
15000 case LEU:
15001 return CC_DLEUmode;
15002 case NE:
15003 return CC_DNEmode;
15004 default:
15005 gcc_unreachable ();
15008 case GTU:
15009 if (cond_or == DOM_CC_X_AND_Y)
15010 return CC_DGTUmode;
15012 switch (cond2)
15014 case GTU:
15015 return CC_DGTUmode;
15016 case GEU:
15017 return CC_DGEUmode;
15018 case NE:
15019 return CC_DNEmode;
15020 default:
15021 gcc_unreachable ();
15024 /* The remaining cases only occur when both comparisons are the
15025 same. */
15026 case NE:
15027 gcc_assert (cond1 == cond2);
15028 return CC_DNEmode;
15030 case LE:
15031 gcc_assert (cond1 == cond2);
15032 return CC_DLEmode;
15034 case GE:
15035 gcc_assert (cond1 == cond2);
15036 return CC_DGEmode;
15038 case LEU:
15039 gcc_assert (cond1 == cond2);
15040 return CC_DLEUmode;
15042 case GEU:
15043 gcc_assert (cond1 == cond2);
15044 return CC_DGEUmode;
15046 default:
15047 gcc_unreachable ();
15051 machine_mode
15052 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15054 /* All floating point compares return CCFP if it is an equality
15055 comparison, and CCFPE otherwise. */
15056 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15058 switch (op)
15060 case EQ:
15061 case NE:
15062 case UNORDERED:
15063 case ORDERED:
15064 case UNLT:
15065 case UNLE:
15066 case UNGT:
15067 case UNGE:
15068 case UNEQ:
15069 case LTGT:
15070 return CCFPmode;
15072 case LT:
15073 case LE:
15074 case GT:
15075 case GE:
15076 return CCFPEmode;
15078 default:
15079 gcc_unreachable ();
15083 /* A compare with a shifted operand. Because of canonicalization, the
15084 comparison will have to be swapped when we emit the assembler. */
15085 if (GET_MODE (y) == SImode
15086 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15087 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15088 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15089 || GET_CODE (x) == ROTATERT))
15090 return CC_SWPmode;
15092 /* This operation is performed swapped, but since we only rely on the Z
15093 flag we don't need an additional mode. */
15094 if (GET_MODE (y) == SImode
15095 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15096 && GET_CODE (x) == NEG
15097 && (op == EQ || op == NE))
15098 return CC_Zmode;
15100 /* This is a special case that is used by combine to allow a
15101 comparison of a shifted byte load to be split into a zero-extend
15102 followed by a comparison of the shifted integer (only valid for
15103 equalities and unsigned inequalities). */
15104 if (GET_MODE (x) == SImode
15105 && GET_CODE (x) == ASHIFT
15106 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15107 && GET_CODE (XEXP (x, 0)) == SUBREG
15108 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15109 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15110 && (op == EQ || op == NE
15111 || op == GEU || op == GTU || op == LTU || op == LEU)
15112 && CONST_INT_P (y))
15113 return CC_Zmode;
15115 /* A construct for a conditional compare, if the false arm contains
15116 0, then both conditions must be true, otherwise either condition
15117 must be true. Not all conditions are possible, so CCmode is
15118 returned if it can't be done. */
15119 if (GET_CODE (x) == IF_THEN_ELSE
15120 && (XEXP (x, 2) == const0_rtx
15121 || XEXP (x, 2) == const1_rtx)
15122 && COMPARISON_P (XEXP (x, 0))
15123 && COMPARISON_P (XEXP (x, 1)))
15124 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15125 INTVAL (XEXP (x, 2)));
15127 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15128 if (GET_CODE (x) == AND
15129 && (op == EQ || op == NE)
15130 && COMPARISON_P (XEXP (x, 0))
15131 && COMPARISON_P (XEXP (x, 1)))
15132 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15133 DOM_CC_X_AND_Y);
15135 if (GET_CODE (x) == IOR
15136 && (op == EQ || op == NE)
15137 && COMPARISON_P (XEXP (x, 0))
15138 && COMPARISON_P (XEXP (x, 1)))
15139 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15140 DOM_CC_X_OR_Y);
15142 /* An operation (on Thumb) where we want to test for a single bit.
15143 This is done by shifting that bit up into the top bit of a
15144 scratch register; we can then branch on the sign bit. */
15145 if (TARGET_THUMB1
15146 && GET_MODE (x) == SImode
15147 && (op == EQ || op == NE)
15148 && GET_CODE (x) == ZERO_EXTRACT
15149 && XEXP (x, 1) == const1_rtx)
15150 return CC_Nmode;
15152 /* An operation that sets the condition codes as a side-effect, the
15153 V flag is not set correctly, so we can only use comparisons where
15154 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15155 instead.) */
15156 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15157 if (GET_MODE (x) == SImode
15158 && y == const0_rtx
15159 && (op == EQ || op == NE || op == LT || op == GE)
15160 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15161 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15162 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15163 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15164 || GET_CODE (x) == LSHIFTRT
15165 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15166 || GET_CODE (x) == ROTATERT
15167 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15168 return CC_NOOVmode;
15170 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15171 return CC_Zmode;
15173 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15174 && GET_CODE (x) == PLUS
15175 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15176 return CC_Cmode;
15178 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15180 switch (op)
15182 case EQ:
15183 case NE:
15184 /* A DImode comparison against zero can be implemented by
15185 or'ing the two halves together. */
15186 if (y == const0_rtx)
15187 return CC_Zmode;
15189 /* We can do an equality test in three Thumb instructions. */
15190 if (!TARGET_32BIT)
15191 return CC_Zmode;
15193 /* FALLTHROUGH */
15195 case LTU:
15196 case LEU:
15197 case GTU:
15198 case GEU:
15199 /* DImode unsigned comparisons can be implemented by cmp +
15200 cmpeq without a scratch register. Not worth doing in
15201 Thumb-2. */
15202 if (TARGET_32BIT)
15203 return CC_CZmode;
15205 /* FALLTHROUGH */
15207 case LT:
15208 case LE:
15209 case GT:
15210 case GE:
15211 /* DImode signed and unsigned comparisons can be implemented
15212 by cmp + sbcs with a scratch register, but that does not
15213 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15214 gcc_assert (op != EQ && op != NE);
15215 return CC_NCVmode;
15217 default:
15218 gcc_unreachable ();
15222 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15223 return GET_MODE (x);
15225 return CCmode;
15228 /* X and Y are two things to compare using CODE. Emit the compare insn and
15229 return the rtx for register 0 in the proper mode. FP means this is a
15230 floating point compare: I don't think that it is needed on the arm. */
15232 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15234 machine_mode mode;
15235 rtx cc_reg;
15236 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15238 /* We might have X as a constant, Y as a register because of the predicates
15239 used for cmpdi. If so, force X to a register here. */
15240 if (dimode_comparison && !REG_P (x))
15241 x = force_reg (DImode, x);
15243 mode = SELECT_CC_MODE (code, x, y);
15244 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15246 if (dimode_comparison
15247 && mode != CC_CZmode)
15249 rtx clobber, set;
15251 /* To compare two non-zero values for equality, XOR them and
15252 then compare against zero. Not used for ARM mode; there
15253 CC_CZmode is cheaper. */
15254 if (mode == CC_Zmode && y != const0_rtx)
15256 gcc_assert (!reload_completed);
15257 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15258 y = const0_rtx;
15261 /* A scratch register is required. */
15262 if (reload_completed)
15263 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15264 else
15265 scratch = gen_rtx_SCRATCH (SImode);
15267 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15268 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15269 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15271 else
15272 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15274 return cc_reg;
15277 /* Generate a sequence of insns that will generate the correct return
15278 address mask depending on the physical architecture that the program
15279 is running on. */
15281 arm_gen_return_addr_mask (void)
15283 rtx reg = gen_reg_rtx (Pmode);
15285 emit_insn (gen_return_addr_mask (reg));
15286 return reg;
15289 void
15290 arm_reload_in_hi (rtx *operands)
15292 rtx ref = operands[1];
15293 rtx base, scratch;
15294 HOST_WIDE_INT offset = 0;
15296 if (GET_CODE (ref) == SUBREG)
15298 offset = SUBREG_BYTE (ref);
15299 ref = SUBREG_REG (ref);
15302 if (REG_P (ref))
15304 /* We have a pseudo which has been spilt onto the stack; there
15305 are two cases here: the first where there is a simple
15306 stack-slot replacement and a second where the stack-slot is
15307 out of range, or is used as a subreg. */
15308 if (reg_equiv_mem (REGNO (ref)))
15310 ref = reg_equiv_mem (REGNO (ref));
15311 base = find_replacement (&XEXP (ref, 0));
15313 else
15314 /* The slot is out of range, or was dressed up in a SUBREG. */
15315 base = reg_equiv_address (REGNO (ref));
15317 else
15318 base = find_replacement (&XEXP (ref, 0));
15320 /* Handle the case where the address is too complex to be offset by 1. */
15321 if (GET_CODE (base) == MINUS
15322 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15324 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15326 emit_set_insn (base_plus, base);
15327 base = base_plus;
15329 else if (GET_CODE (base) == PLUS)
15331 /* The addend must be CONST_INT, or we would have dealt with it above. */
15332 HOST_WIDE_INT hi, lo;
15334 offset += INTVAL (XEXP (base, 1));
15335 base = XEXP (base, 0);
15337 /* Rework the address into a legal sequence of insns. */
15338 /* Valid range for lo is -4095 -> 4095 */
15339 lo = (offset >= 0
15340 ? (offset & 0xfff)
15341 : -((-offset) & 0xfff));
15343 /* Corner case, if lo is the max offset then we would be out of range
15344 once we have added the additional 1 below, so bump the msb into the
15345 pre-loading insn(s). */
15346 if (lo == 4095)
15347 lo &= 0x7ff;
15349 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15350 ^ (HOST_WIDE_INT) 0x80000000)
15351 - (HOST_WIDE_INT) 0x80000000);
15353 gcc_assert (hi + lo == offset);
15355 if (hi != 0)
15357 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15359 /* Get the base address; addsi3 knows how to handle constants
15360 that require more than one insn. */
15361 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15362 base = base_plus;
15363 offset = lo;
15367 /* Operands[2] may overlap operands[0] (though it won't overlap
15368 operands[1]), that's why we asked for a DImode reg -- so we can
15369 use the bit that does not overlap. */
15370 if (REGNO (operands[2]) == REGNO (operands[0]))
15371 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15372 else
15373 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15375 emit_insn (gen_zero_extendqisi2 (scratch,
15376 gen_rtx_MEM (QImode,
15377 plus_constant (Pmode, base,
15378 offset))));
15379 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15380 gen_rtx_MEM (QImode,
15381 plus_constant (Pmode, base,
15382 offset + 1))));
15383 if (!BYTES_BIG_ENDIAN)
15384 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15385 gen_rtx_IOR (SImode,
15386 gen_rtx_ASHIFT
15387 (SImode,
15388 gen_rtx_SUBREG (SImode, operands[0], 0),
15389 GEN_INT (8)),
15390 scratch));
15391 else
15392 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15393 gen_rtx_IOR (SImode,
15394 gen_rtx_ASHIFT (SImode, scratch,
15395 GEN_INT (8)),
15396 gen_rtx_SUBREG (SImode, operands[0], 0)));
15399 /* Handle storing a half-word to memory during reload by synthesizing as two
15400 byte stores. Take care not to clobber the input values until after we
15401 have moved them somewhere safe. This code assumes that if the DImode
15402 scratch in operands[2] overlaps either the input value or output address
15403 in some way, then that value must die in this insn (we absolutely need
15404 two scratch registers for some corner cases). */
15405 void
15406 arm_reload_out_hi (rtx *operands)
15408 rtx ref = operands[0];
15409 rtx outval = operands[1];
15410 rtx base, scratch;
15411 HOST_WIDE_INT offset = 0;
15413 if (GET_CODE (ref) == SUBREG)
15415 offset = SUBREG_BYTE (ref);
15416 ref = SUBREG_REG (ref);
15419 if (REG_P (ref))
15421 /* We have a pseudo which has been spilt onto the stack; there
15422 are two cases here: the first where there is a simple
15423 stack-slot replacement and a second where the stack-slot is
15424 out of range, or is used as a subreg. */
15425 if (reg_equiv_mem (REGNO (ref)))
15427 ref = reg_equiv_mem (REGNO (ref));
15428 base = find_replacement (&XEXP (ref, 0));
15430 else
15431 /* The slot is out of range, or was dressed up in a SUBREG. */
15432 base = reg_equiv_address (REGNO (ref));
15434 else
15435 base = find_replacement (&XEXP (ref, 0));
15437 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15439 /* Handle the case where the address is too complex to be offset by 1. */
15440 if (GET_CODE (base) == MINUS
15441 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15443 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15445 /* Be careful not to destroy OUTVAL. */
15446 if (reg_overlap_mentioned_p (base_plus, outval))
15448 /* Updating base_plus might destroy outval, see if we can
15449 swap the scratch and base_plus. */
15450 if (!reg_overlap_mentioned_p (scratch, outval))
15452 rtx tmp = scratch;
15453 scratch = base_plus;
15454 base_plus = tmp;
15456 else
15458 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15460 /* Be conservative and copy OUTVAL into the scratch now,
15461 this should only be necessary if outval is a subreg
15462 of something larger than a word. */
15463 /* XXX Might this clobber base? I can't see how it can,
15464 since scratch is known to overlap with OUTVAL, and
15465 must be wider than a word. */
15466 emit_insn (gen_movhi (scratch_hi, outval));
15467 outval = scratch_hi;
15471 emit_set_insn (base_plus, base);
15472 base = base_plus;
15474 else if (GET_CODE (base) == PLUS)
15476 /* The addend must be CONST_INT, or we would have dealt with it above. */
15477 HOST_WIDE_INT hi, lo;
15479 offset += INTVAL (XEXP (base, 1));
15480 base = XEXP (base, 0);
15482 /* Rework the address into a legal sequence of insns. */
15483 /* Valid range for lo is -4095 -> 4095 */
15484 lo = (offset >= 0
15485 ? (offset & 0xfff)
15486 : -((-offset) & 0xfff));
15488 /* Corner case, if lo is the max offset then we would be out of range
15489 once we have added the additional 1 below, so bump the msb into the
15490 pre-loading insn(s). */
15491 if (lo == 4095)
15492 lo &= 0x7ff;
15494 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15495 ^ (HOST_WIDE_INT) 0x80000000)
15496 - (HOST_WIDE_INT) 0x80000000);
15498 gcc_assert (hi + lo == offset);
15500 if (hi != 0)
15502 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15504 /* Be careful not to destroy OUTVAL. */
15505 if (reg_overlap_mentioned_p (base_plus, outval))
15507 /* Updating base_plus might destroy outval, see if we
15508 can swap the scratch and base_plus. */
15509 if (!reg_overlap_mentioned_p (scratch, outval))
15511 rtx tmp = scratch;
15512 scratch = base_plus;
15513 base_plus = tmp;
15515 else
15517 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15519 /* Be conservative and copy outval into scratch now,
15520 this should only be necessary if outval is a
15521 subreg of something larger than a word. */
15522 /* XXX Might this clobber base? I can't see how it
15523 can, since scratch is known to overlap with
15524 outval. */
15525 emit_insn (gen_movhi (scratch_hi, outval));
15526 outval = scratch_hi;
15530 /* Get the base address; addsi3 knows how to handle constants
15531 that require more than one insn. */
15532 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15533 base = base_plus;
15534 offset = lo;
15538 if (BYTES_BIG_ENDIAN)
15540 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15541 plus_constant (Pmode, base,
15542 offset + 1)),
15543 gen_lowpart (QImode, outval)));
15544 emit_insn (gen_lshrsi3 (scratch,
15545 gen_rtx_SUBREG (SImode, outval, 0),
15546 GEN_INT (8)));
15547 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15548 offset)),
15549 gen_lowpart (QImode, scratch)));
15551 else
15553 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15554 offset)),
15555 gen_lowpart (QImode, outval)));
15556 emit_insn (gen_lshrsi3 (scratch,
15557 gen_rtx_SUBREG (SImode, outval, 0),
15558 GEN_INT (8)));
15559 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15560 plus_constant (Pmode, base,
15561 offset + 1)),
15562 gen_lowpart (QImode, scratch)));
15566 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15567 (padded to the size of a word) should be passed in a register. */
15569 static bool
15570 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15572 if (TARGET_AAPCS_BASED)
15573 return must_pass_in_stack_var_size (mode, type);
15574 else
15575 return must_pass_in_stack_var_size_or_pad (mode, type);
15579 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15580 Return true if an argument passed on the stack should be padded upwards,
15581 i.e. if the least-significant byte has useful data.
15582 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15583 aggregate types are placed in the lowest memory address. */
15585 bool
15586 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15588 if (!TARGET_AAPCS_BASED)
15589 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15591 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15592 return false;
15594 return true;
15598 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15599 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15600 register has useful data, and return the opposite if the most
15601 significant byte does. */
15603 bool
15604 arm_pad_reg_upward (machine_mode mode,
15605 tree type, int first ATTRIBUTE_UNUSED)
15607 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15609 /* For AAPCS, small aggregates, small fixed-point types,
15610 and small complex types are always padded upwards. */
15611 if (type)
15613 if ((AGGREGATE_TYPE_P (type)
15614 || TREE_CODE (type) == COMPLEX_TYPE
15615 || FIXED_POINT_TYPE_P (type))
15616 && int_size_in_bytes (type) <= 4)
15617 return true;
15619 else
15621 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15622 && GET_MODE_SIZE (mode) <= 4)
15623 return true;
15627 /* Otherwise, use default padding. */
15628 return !BYTES_BIG_ENDIAN;
15631 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15632 assuming that the address in the base register is word aligned. */
15633 bool
15634 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15636 HOST_WIDE_INT max_offset;
15638 /* Offset must be a multiple of 4 in Thumb mode. */
15639 if (TARGET_THUMB2 && ((offset & 3) != 0))
15640 return false;
15642 if (TARGET_THUMB2)
15643 max_offset = 1020;
15644 else if (TARGET_ARM)
15645 max_offset = 255;
15646 else
15647 return false;
15649 return ((offset <= max_offset) && (offset >= -max_offset));
15652 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15653 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15654 Assumes that the address in the base register RN is word aligned. Pattern
15655 guarantees that both memory accesses use the same base register,
15656 the offsets are constants within the range, and the gap between the offsets is 4.
15657 If preload complete then check that registers are legal. WBACK indicates whether
15658 address is updated. LOAD indicates whether memory access is load or store. */
15659 bool
15660 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15661 bool wback, bool load)
15663 unsigned int t, t2, n;
15665 if (!reload_completed)
15666 return true;
15668 if (!offset_ok_for_ldrd_strd (offset))
15669 return false;
15671 t = REGNO (rt);
15672 t2 = REGNO (rt2);
15673 n = REGNO (rn);
15675 if ((TARGET_THUMB2)
15676 && ((wback && (n == t || n == t2))
15677 || (t == SP_REGNUM)
15678 || (t == PC_REGNUM)
15679 || (t2 == SP_REGNUM)
15680 || (t2 == PC_REGNUM)
15681 || (!load && (n == PC_REGNUM))
15682 || (load && (t == t2))
15683 /* Triggers Cortex-M3 LDRD errata. */
15684 || (!wback && load && fix_cm3_ldrd && (n == t))))
15685 return false;
15687 if ((TARGET_ARM)
15688 && ((wback && (n == t || n == t2))
15689 || (t2 == PC_REGNUM)
15690 || (t % 2 != 0) /* First destination register is not even. */
15691 || (t2 != t + 1)
15692 /* PC can be used as base register (for offset addressing only),
15693 but it is depricated. */
15694 || (n == PC_REGNUM)))
15695 return false;
15697 return true;
15700 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15701 operand MEM's address contains an immediate offset from the base
15702 register and has no side effects, in which case it sets BASE and
15703 OFFSET accordingly. */
15704 static bool
15705 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15707 rtx addr;
15709 gcc_assert (base != NULL && offset != NULL);
15711 /* TODO: Handle more general memory operand patterns, such as
15712 PRE_DEC and PRE_INC. */
15714 if (side_effects_p (mem))
15715 return false;
15717 /* Can't deal with subregs. */
15718 if (GET_CODE (mem) == SUBREG)
15719 return false;
15721 gcc_assert (MEM_P (mem));
15723 *offset = const0_rtx;
15725 addr = XEXP (mem, 0);
15727 /* If addr isn't valid for DImode, then we can't handle it. */
15728 if (!arm_legitimate_address_p (DImode, addr,
15729 reload_in_progress || reload_completed))
15730 return false;
15732 if (REG_P (addr))
15734 *base = addr;
15735 return true;
15737 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15739 *base = XEXP (addr, 0);
15740 *offset = XEXP (addr, 1);
15741 return (REG_P (*base) && CONST_INT_P (*offset));
15744 return false;
15747 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15749 /* Called from a peephole2 to replace two word-size accesses with a
15750 single LDRD/STRD instruction. Returns true iff we can generate a
15751 new instruction sequence. That is, both accesses use the same base
15752 register and the gap between constant offsets is 4. This function
15753 may reorder its operands to match ldrd/strd RTL templates.
15754 OPERANDS are the operands found by the peephole matcher;
15755 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15756 corresponding memory operands. LOAD indicaates whether the access
15757 is load or store. CONST_STORE indicates a store of constant
15758 integer values held in OPERANDS[4,5] and assumes that the pattern
15759 is of length 4 insn, for the purpose of checking dead registers.
15760 COMMUTE indicates that register operands may be reordered. */
15761 bool
15762 gen_operands_ldrd_strd (rtx *operands, bool load,
15763 bool const_store, bool commute)
15765 int nops = 2;
15766 HOST_WIDE_INT offsets[2], offset;
15767 rtx base = NULL_RTX;
15768 rtx cur_base, cur_offset, tmp;
15769 int i, gap;
15770 HARD_REG_SET regset;
15772 gcc_assert (!const_store || !load);
15773 /* Check that the memory references are immediate offsets from the
15774 same base register. Extract the base register, the destination
15775 registers, and the corresponding memory offsets. */
15776 for (i = 0; i < nops; i++)
15778 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15779 return false;
15781 if (i == 0)
15782 base = cur_base;
15783 else if (REGNO (base) != REGNO (cur_base))
15784 return false;
15786 offsets[i] = INTVAL (cur_offset);
15787 if (GET_CODE (operands[i]) == SUBREG)
15789 tmp = SUBREG_REG (operands[i]);
15790 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15791 operands[i] = tmp;
15795 /* Make sure there is no dependency between the individual loads. */
15796 if (load && REGNO (operands[0]) == REGNO (base))
15797 return false; /* RAW */
15799 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15800 return false; /* WAW */
15802 /* If the same input register is used in both stores
15803 when storing different constants, try to find a free register.
15804 For example, the code
15805 mov r0, 0
15806 str r0, [r2]
15807 mov r0, 1
15808 str r0, [r2, #4]
15809 can be transformed into
15810 mov r1, 0
15811 strd r1, r0, [r2]
15812 in Thumb mode assuming that r1 is free. */
15813 if (const_store
15814 && REGNO (operands[0]) == REGNO (operands[1])
15815 && INTVAL (operands[4]) != INTVAL (operands[5]))
15817 if (TARGET_THUMB2)
15819 CLEAR_HARD_REG_SET (regset);
15820 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15821 if (tmp == NULL_RTX)
15822 return false;
15824 /* Use the new register in the first load to ensure that
15825 if the original input register is not dead after peephole,
15826 then it will have the correct constant value. */
15827 operands[0] = tmp;
15829 else if (TARGET_ARM)
15831 return false;
15832 int regno = REGNO (operands[0]);
15833 if (!peep2_reg_dead_p (4, operands[0]))
15835 /* When the input register is even and is not dead after the
15836 pattern, it has to hold the second constant but we cannot
15837 form a legal STRD in ARM mode with this register as the second
15838 register. */
15839 if (regno % 2 == 0)
15840 return false;
15842 /* Is regno-1 free? */
15843 SET_HARD_REG_SET (regset);
15844 CLEAR_HARD_REG_BIT(regset, regno - 1);
15845 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15846 if (tmp == NULL_RTX)
15847 return false;
15849 operands[0] = tmp;
15851 else
15853 /* Find a DImode register. */
15854 CLEAR_HARD_REG_SET (regset);
15855 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15856 if (tmp != NULL_RTX)
15858 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15859 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15861 else
15863 /* Can we use the input register to form a DI register? */
15864 SET_HARD_REG_SET (regset);
15865 CLEAR_HARD_REG_BIT(regset,
15866 regno % 2 == 0 ? regno + 1 : regno - 1);
15867 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15868 if (tmp == NULL_RTX)
15869 return false;
15870 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15874 gcc_assert (operands[0] != NULL_RTX);
15875 gcc_assert (operands[1] != NULL_RTX);
15876 gcc_assert (REGNO (operands[0]) % 2 == 0);
15877 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15881 /* Make sure the instructions are ordered with lower memory access first. */
15882 if (offsets[0] > offsets[1])
15884 gap = offsets[0] - offsets[1];
15885 offset = offsets[1];
15887 /* Swap the instructions such that lower memory is accessed first. */
15888 SWAP_RTX (operands[0], operands[1]);
15889 SWAP_RTX (operands[2], operands[3]);
15890 if (const_store)
15891 SWAP_RTX (operands[4], operands[5]);
15893 else
15895 gap = offsets[1] - offsets[0];
15896 offset = offsets[0];
15899 /* Make sure accesses are to consecutive memory locations. */
15900 if (gap != 4)
15901 return false;
15903 /* Make sure we generate legal instructions. */
15904 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15905 false, load))
15906 return true;
15908 /* In Thumb state, where registers are almost unconstrained, there
15909 is little hope to fix it. */
15910 if (TARGET_THUMB2)
15911 return false;
15913 if (load && commute)
15915 /* Try reordering registers. */
15916 SWAP_RTX (operands[0], operands[1]);
15917 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15918 false, load))
15919 return true;
15922 if (const_store)
15924 /* If input registers are dead after this pattern, they can be
15925 reordered or replaced by other registers that are free in the
15926 current pattern. */
15927 if (!peep2_reg_dead_p (4, operands[0])
15928 || !peep2_reg_dead_p (4, operands[1]))
15929 return false;
15931 /* Try to reorder the input registers. */
15932 /* For example, the code
15933 mov r0, 0
15934 mov r1, 1
15935 str r1, [r2]
15936 str r0, [r2, #4]
15937 can be transformed into
15938 mov r1, 0
15939 mov r0, 1
15940 strd r0, [r2]
15942 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15943 false, false))
15945 SWAP_RTX (operands[0], operands[1]);
15946 return true;
15949 /* Try to find a free DI register. */
15950 CLEAR_HARD_REG_SET (regset);
15951 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15952 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15953 while (true)
15955 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15956 if (tmp == NULL_RTX)
15957 return false;
15959 /* DREG must be an even-numbered register in DImode.
15960 Split it into SI registers. */
15961 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15962 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15963 gcc_assert (operands[0] != NULL_RTX);
15964 gcc_assert (operands[1] != NULL_RTX);
15965 gcc_assert (REGNO (operands[0]) % 2 == 0);
15966 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15968 return (operands_ok_ldrd_strd (operands[0], operands[1],
15969 base, offset,
15970 false, load));
15974 return false;
15976 #undef SWAP_RTX
15981 /* Print a symbolic form of X to the debug file, F. */
15982 static void
15983 arm_print_value (FILE *f, rtx x)
15985 switch (GET_CODE (x))
15987 case CONST_INT:
15988 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15989 return;
15991 case CONST_DOUBLE:
15992 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15993 return;
15995 case CONST_VECTOR:
15997 int i;
15999 fprintf (f, "<");
16000 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16002 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16003 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16004 fputc (',', f);
16006 fprintf (f, ">");
16008 return;
16010 case CONST_STRING:
16011 fprintf (f, "\"%s\"", XSTR (x, 0));
16012 return;
16014 case SYMBOL_REF:
16015 fprintf (f, "`%s'", XSTR (x, 0));
16016 return;
16018 case LABEL_REF:
16019 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16020 return;
16022 case CONST:
16023 arm_print_value (f, XEXP (x, 0));
16024 return;
16026 case PLUS:
16027 arm_print_value (f, XEXP (x, 0));
16028 fprintf (f, "+");
16029 arm_print_value (f, XEXP (x, 1));
16030 return;
16032 case PC:
16033 fprintf (f, "pc");
16034 return;
16036 default:
16037 fprintf (f, "????");
16038 return;
16042 /* Routines for manipulation of the constant pool. */
16044 /* Arm instructions cannot load a large constant directly into a
16045 register; they have to come from a pc relative load. The constant
16046 must therefore be placed in the addressable range of the pc
16047 relative load. Depending on the precise pc relative load
16048 instruction the range is somewhere between 256 bytes and 4k. This
16049 means that we often have to dump a constant inside a function, and
16050 generate code to branch around it.
16052 It is important to minimize this, since the branches will slow
16053 things down and make the code larger.
16055 Normally we can hide the table after an existing unconditional
16056 branch so that there is no interruption of the flow, but in the
16057 worst case the code looks like this:
16059 ldr rn, L1
16061 b L2
16062 align
16063 L1: .long value
16067 ldr rn, L3
16069 b L4
16070 align
16071 L3: .long value
16075 We fix this by performing a scan after scheduling, which notices
16076 which instructions need to have their operands fetched from the
16077 constant table and builds the table.
16079 The algorithm starts by building a table of all the constants that
16080 need fixing up and all the natural barriers in the function (places
16081 where a constant table can be dropped without breaking the flow).
16082 For each fixup we note how far the pc-relative replacement will be
16083 able to reach and the offset of the instruction into the function.
16085 Having built the table we then group the fixes together to form
16086 tables that are as large as possible (subject to addressing
16087 constraints) and emit each table of constants after the last
16088 barrier that is within range of all the instructions in the group.
16089 If a group does not contain a barrier, then we forcibly create one
16090 by inserting a jump instruction into the flow. Once the table has
16091 been inserted, the insns are then modified to reference the
16092 relevant entry in the pool.
16094 Possible enhancements to the algorithm (not implemented) are:
16096 1) For some processors and object formats, there may be benefit in
16097 aligning the pools to the start of cache lines; this alignment
16098 would need to be taken into account when calculating addressability
16099 of a pool. */
16101 /* These typedefs are located at the start of this file, so that
16102 they can be used in the prototypes there. This comment is to
16103 remind readers of that fact so that the following structures
16104 can be understood more easily.
16106 typedef struct minipool_node Mnode;
16107 typedef struct minipool_fixup Mfix; */
16109 struct minipool_node
16111 /* Doubly linked chain of entries. */
16112 Mnode * next;
16113 Mnode * prev;
16114 /* The maximum offset into the code that this entry can be placed. While
16115 pushing fixes for forward references, all entries are sorted in order
16116 of increasing max_address. */
16117 HOST_WIDE_INT max_address;
16118 /* Similarly for an entry inserted for a backwards ref. */
16119 HOST_WIDE_INT min_address;
16120 /* The number of fixes referencing this entry. This can become zero
16121 if we "unpush" an entry. In this case we ignore the entry when we
16122 come to emit the code. */
16123 int refcount;
16124 /* The offset from the start of the minipool. */
16125 HOST_WIDE_INT offset;
16126 /* The value in table. */
16127 rtx value;
16128 /* The mode of value. */
16129 machine_mode mode;
16130 /* The size of the value. With iWMMXt enabled
16131 sizes > 4 also imply an alignment of 8-bytes. */
16132 int fix_size;
16135 struct minipool_fixup
16137 Mfix * next;
16138 rtx_insn * insn;
16139 HOST_WIDE_INT address;
16140 rtx * loc;
16141 machine_mode mode;
16142 int fix_size;
16143 rtx value;
16144 Mnode * minipool;
16145 HOST_WIDE_INT forwards;
16146 HOST_WIDE_INT backwards;
16149 /* Fixes less than a word need padding out to a word boundary. */
16150 #define MINIPOOL_FIX_SIZE(mode) \
16151 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16153 static Mnode * minipool_vector_head;
16154 static Mnode * minipool_vector_tail;
16155 static rtx_code_label *minipool_vector_label;
16156 static int minipool_pad;
16158 /* The linked list of all minipool fixes required for this function. */
16159 Mfix * minipool_fix_head;
16160 Mfix * minipool_fix_tail;
16161 /* The fix entry for the current minipool, once it has been placed. */
16162 Mfix * minipool_barrier;
16164 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16165 #define JUMP_TABLES_IN_TEXT_SECTION 0
16166 #endif
16168 static HOST_WIDE_INT
16169 get_jump_table_size (rtx_jump_table_data *insn)
16171 /* ADDR_VECs only take room if read-only data does into the text
16172 section. */
16173 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16175 rtx body = PATTERN (insn);
16176 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16177 HOST_WIDE_INT size;
16178 HOST_WIDE_INT modesize;
16180 modesize = GET_MODE_SIZE (GET_MODE (body));
16181 size = modesize * XVECLEN (body, elt);
16182 switch (modesize)
16184 case 1:
16185 /* Round up size of TBB table to a halfword boundary. */
16186 size = (size + 1) & ~(HOST_WIDE_INT)1;
16187 break;
16188 case 2:
16189 /* No padding necessary for TBH. */
16190 break;
16191 case 4:
16192 /* Add two bytes for alignment on Thumb. */
16193 if (TARGET_THUMB)
16194 size += 2;
16195 break;
16196 default:
16197 gcc_unreachable ();
16199 return size;
16202 return 0;
16205 /* Return the maximum amount of padding that will be inserted before
16206 label LABEL. */
16208 static HOST_WIDE_INT
16209 get_label_padding (rtx label)
16211 HOST_WIDE_INT align, min_insn_size;
16213 align = 1 << label_to_alignment (label);
16214 min_insn_size = TARGET_THUMB ? 2 : 4;
16215 return align > min_insn_size ? align - min_insn_size : 0;
16218 /* Move a minipool fix MP from its current location to before MAX_MP.
16219 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16220 constraints may need updating. */
16221 static Mnode *
16222 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16223 HOST_WIDE_INT max_address)
16225 /* The code below assumes these are different. */
16226 gcc_assert (mp != max_mp);
16228 if (max_mp == NULL)
16230 if (max_address < mp->max_address)
16231 mp->max_address = max_address;
16233 else
16235 if (max_address > max_mp->max_address - mp->fix_size)
16236 mp->max_address = max_mp->max_address - mp->fix_size;
16237 else
16238 mp->max_address = max_address;
16240 /* Unlink MP from its current position. Since max_mp is non-null,
16241 mp->prev must be non-null. */
16242 mp->prev->next = mp->next;
16243 if (mp->next != NULL)
16244 mp->next->prev = mp->prev;
16245 else
16246 minipool_vector_tail = mp->prev;
16248 /* Re-insert it before MAX_MP. */
16249 mp->next = max_mp;
16250 mp->prev = max_mp->prev;
16251 max_mp->prev = mp;
16253 if (mp->prev != NULL)
16254 mp->prev->next = mp;
16255 else
16256 minipool_vector_head = mp;
16259 /* Save the new entry. */
16260 max_mp = mp;
16262 /* Scan over the preceding entries and adjust their addresses as
16263 required. */
16264 while (mp->prev != NULL
16265 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16267 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16268 mp = mp->prev;
16271 return max_mp;
16274 /* Add a constant to the minipool for a forward reference. Returns the
16275 node added or NULL if the constant will not fit in this pool. */
16276 static Mnode *
16277 add_minipool_forward_ref (Mfix *fix)
16279 /* If set, max_mp is the first pool_entry that has a lower
16280 constraint than the one we are trying to add. */
16281 Mnode * max_mp = NULL;
16282 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16283 Mnode * mp;
16285 /* If the minipool starts before the end of FIX->INSN then this FIX
16286 can not be placed into the current pool. Furthermore, adding the
16287 new constant pool entry may cause the pool to start FIX_SIZE bytes
16288 earlier. */
16289 if (minipool_vector_head &&
16290 (fix->address + get_attr_length (fix->insn)
16291 >= minipool_vector_head->max_address - fix->fix_size))
16292 return NULL;
16294 /* Scan the pool to see if a constant with the same value has
16295 already been added. While we are doing this, also note the
16296 location where we must insert the constant if it doesn't already
16297 exist. */
16298 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16300 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16301 && fix->mode == mp->mode
16302 && (!LABEL_P (fix->value)
16303 || (CODE_LABEL_NUMBER (fix->value)
16304 == CODE_LABEL_NUMBER (mp->value)))
16305 && rtx_equal_p (fix->value, mp->value))
16307 /* More than one fix references this entry. */
16308 mp->refcount++;
16309 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16312 /* Note the insertion point if necessary. */
16313 if (max_mp == NULL
16314 && mp->max_address > max_address)
16315 max_mp = mp;
16317 /* If we are inserting an 8-bytes aligned quantity and
16318 we have not already found an insertion point, then
16319 make sure that all such 8-byte aligned quantities are
16320 placed at the start of the pool. */
16321 if (ARM_DOUBLEWORD_ALIGN
16322 && max_mp == NULL
16323 && fix->fix_size >= 8
16324 && mp->fix_size < 8)
16326 max_mp = mp;
16327 max_address = mp->max_address;
16331 /* The value is not currently in the minipool, so we need to create
16332 a new entry for it. If MAX_MP is NULL, the entry will be put on
16333 the end of the list since the placement is less constrained than
16334 any existing entry. Otherwise, we insert the new fix before
16335 MAX_MP and, if necessary, adjust the constraints on the other
16336 entries. */
16337 mp = XNEW (Mnode);
16338 mp->fix_size = fix->fix_size;
16339 mp->mode = fix->mode;
16340 mp->value = fix->value;
16341 mp->refcount = 1;
16342 /* Not yet required for a backwards ref. */
16343 mp->min_address = -65536;
16345 if (max_mp == NULL)
16347 mp->max_address = max_address;
16348 mp->next = NULL;
16349 mp->prev = minipool_vector_tail;
16351 if (mp->prev == NULL)
16353 minipool_vector_head = mp;
16354 minipool_vector_label = gen_label_rtx ();
16356 else
16357 mp->prev->next = mp;
16359 minipool_vector_tail = mp;
16361 else
16363 if (max_address > max_mp->max_address - mp->fix_size)
16364 mp->max_address = max_mp->max_address - mp->fix_size;
16365 else
16366 mp->max_address = max_address;
16368 mp->next = max_mp;
16369 mp->prev = max_mp->prev;
16370 max_mp->prev = mp;
16371 if (mp->prev != NULL)
16372 mp->prev->next = mp;
16373 else
16374 minipool_vector_head = mp;
16377 /* Save the new entry. */
16378 max_mp = mp;
16380 /* Scan over the preceding entries and adjust their addresses as
16381 required. */
16382 while (mp->prev != NULL
16383 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16385 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16386 mp = mp->prev;
16389 return max_mp;
16392 static Mnode *
16393 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16394 HOST_WIDE_INT min_address)
16396 HOST_WIDE_INT offset;
16398 /* The code below assumes these are different. */
16399 gcc_assert (mp != min_mp);
16401 if (min_mp == NULL)
16403 if (min_address > mp->min_address)
16404 mp->min_address = min_address;
16406 else
16408 /* We will adjust this below if it is too loose. */
16409 mp->min_address = min_address;
16411 /* Unlink MP from its current position. Since min_mp is non-null,
16412 mp->next must be non-null. */
16413 mp->next->prev = mp->prev;
16414 if (mp->prev != NULL)
16415 mp->prev->next = mp->next;
16416 else
16417 minipool_vector_head = mp->next;
16419 /* Reinsert it after MIN_MP. */
16420 mp->prev = min_mp;
16421 mp->next = min_mp->next;
16422 min_mp->next = mp;
16423 if (mp->next != NULL)
16424 mp->next->prev = mp;
16425 else
16426 minipool_vector_tail = mp;
16429 min_mp = mp;
16431 offset = 0;
16432 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16434 mp->offset = offset;
16435 if (mp->refcount > 0)
16436 offset += mp->fix_size;
16438 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16439 mp->next->min_address = mp->min_address + mp->fix_size;
16442 return min_mp;
16445 /* Add a constant to the minipool for a backward reference. Returns the
16446 node added or NULL if the constant will not fit in this pool.
16448 Note that the code for insertion for a backwards reference can be
16449 somewhat confusing because the calculated offsets for each fix do
16450 not take into account the size of the pool (which is still under
16451 construction. */
16452 static Mnode *
16453 add_minipool_backward_ref (Mfix *fix)
16455 /* If set, min_mp is the last pool_entry that has a lower constraint
16456 than the one we are trying to add. */
16457 Mnode *min_mp = NULL;
16458 /* This can be negative, since it is only a constraint. */
16459 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16460 Mnode *mp;
16462 /* If we can't reach the current pool from this insn, or if we can't
16463 insert this entry at the end of the pool without pushing other
16464 fixes out of range, then we don't try. This ensures that we
16465 can't fail later on. */
16466 if (min_address >= minipool_barrier->address
16467 || (minipool_vector_tail->min_address + fix->fix_size
16468 >= minipool_barrier->address))
16469 return NULL;
16471 /* Scan the pool to see if a constant with the same value has
16472 already been added. While we are doing this, also note the
16473 location where we must insert the constant if it doesn't already
16474 exist. */
16475 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16477 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16478 && fix->mode == mp->mode
16479 && (!LABEL_P (fix->value)
16480 || (CODE_LABEL_NUMBER (fix->value)
16481 == CODE_LABEL_NUMBER (mp->value)))
16482 && rtx_equal_p (fix->value, mp->value)
16483 /* Check that there is enough slack to move this entry to the
16484 end of the table (this is conservative). */
16485 && (mp->max_address
16486 > (minipool_barrier->address
16487 + minipool_vector_tail->offset
16488 + minipool_vector_tail->fix_size)))
16490 mp->refcount++;
16491 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16494 if (min_mp != NULL)
16495 mp->min_address += fix->fix_size;
16496 else
16498 /* Note the insertion point if necessary. */
16499 if (mp->min_address < min_address)
16501 /* For now, we do not allow the insertion of 8-byte alignment
16502 requiring nodes anywhere but at the start of the pool. */
16503 if (ARM_DOUBLEWORD_ALIGN
16504 && fix->fix_size >= 8 && mp->fix_size < 8)
16505 return NULL;
16506 else
16507 min_mp = mp;
16509 else if (mp->max_address
16510 < minipool_barrier->address + mp->offset + fix->fix_size)
16512 /* Inserting before this entry would push the fix beyond
16513 its maximum address (which can happen if we have
16514 re-located a forwards fix); force the new fix to come
16515 after it. */
16516 if (ARM_DOUBLEWORD_ALIGN
16517 && fix->fix_size >= 8 && mp->fix_size < 8)
16518 return NULL;
16519 else
16521 min_mp = mp;
16522 min_address = mp->min_address + fix->fix_size;
16525 /* Do not insert a non-8-byte aligned quantity before 8-byte
16526 aligned quantities. */
16527 else if (ARM_DOUBLEWORD_ALIGN
16528 && fix->fix_size < 8
16529 && mp->fix_size >= 8)
16531 min_mp = mp;
16532 min_address = mp->min_address + fix->fix_size;
16537 /* We need to create a new entry. */
16538 mp = XNEW (Mnode);
16539 mp->fix_size = fix->fix_size;
16540 mp->mode = fix->mode;
16541 mp->value = fix->value;
16542 mp->refcount = 1;
16543 mp->max_address = minipool_barrier->address + 65536;
16545 mp->min_address = min_address;
16547 if (min_mp == NULL)
16549 mp->prev = NULL;
16550 mp->next = minipool_vector_head;
16552 if (mp->next == NULL)
16554 minipool_vector_tail = mp;
16555 minipool_vector_label = gen_label_rtx ();
16557 else
16558 mp->next->prev = mp;
16560 minipool_vector_head = mp;
16562 else
16564 mp->next = min_mp->next;
16565 mp->prev = min_mp;
16566 min_mp->next = mp;
16568 if (mp->next != NULL)
16569 mp->next->prev = mp;
16570 else
16571 minipool_vector_tail = mp;
16574 /* Save the new entry. */
16575 min_mp = mp;
16577 if (mp->prev)
16578 mp = mp->prev;
16579 else
16580 mp->offset = 0;
16582 /* Scan over the following entries and adjust their offsets. */
16583 while (mp->next != NULL)
16585 if (mp->next->min_address < mp->min_address + mp->fix_size)
16586 mp->next->min_address = mp->min_address + mp->fix_size;
16588 if (mp->refcount)
16589 mp->next->offset = mp->offset + mp->fix_size;
16590 else
16591 mp->next->offset = mp->offset;
16593 mp = mp->next;
16596 return min_mp;
16599 static void
16600 assign_minipool_offsets (Mfix *barrier)
16602 HOST_WIDE_INT offset = 0;
16603 Mnode *mp;
16605 minipool_barrier = barrier;
16607 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16609 mp->offset = offset;
16611 if (mp->refcount > 0)
16612 offset += mp->fix_size;
16616 /* Output the literal table */
16617 static void
16618 dump_minipool (rtx_insn *scan)
16620 Mnode * mp;
16621 Mnode * nmp;
16622 int align64 = 0;
16624 if (ARM_DOUBLEWORD_ALIGN)
16625 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16626 if (mp->refcount > 0 && mp->fix_size >= 8)
16628 align64 = 1;
16629 break;
16632 if (dump_file)
16633 fprintf (dump_file,
16634 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16635 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16637 scan = emit_label_after (gen_label_rtx (), scan);
16638 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16639 scan = emit_label_after (minipool_vector_label, scan);
16641 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16643 if (mp->refcount > 0)
16645 if (dump_file)
16647 fprintf (dump_file,
16648 ";; Offset %u, min %ld, max %ld ",
16649 (unsigned) mp->offset, (unsigned long) mp->min_address,
16650 (unsigned long) mp->max_address);
16651 arm_print_value (dump_file, mp->value);
16652 fputc ('\n', dump_file);
16655 switch (mp->fix_size)
16657 #ifdef HAVE_consttable_1
16658 case 1:
16659 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16660 break;
16662 #endif
16663 #ifdef HAVE_consttable_2
16664 case 2:
16665 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16666 break;
16668 #endif
16669 #ifdef HAVE_consttable_4
16670 case 4:
16671 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16672 break;
16674 #endif
16675 #ifdef HAVE_consttable_8
16676 case 8:
16677 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16678 break;
16680 #endif
16681 #ifdef HAVE_consttable_16
16682 case 16:
16683 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16684 break;
16686 #endif
16687 default:
16688 gcc_unreachable ();
16692 nmp = mp->next;
16693 free (mp);
16696 minipool_vector_head = minipool_vector_tail = NULL;
16697 scan = emit_insn_after (gen_consttable_end (), scan);
16698 scan = emit_barrier_after (scan);
16701 /* Return the cost of forcibly inserting a barrier after INSN. */
16702 static int
16703 arm_barrier_cost (rtx insn)
16705 /* Basing the location of the pool on the loop depth is preferable,
16706 but at the moment, the basic block information seems to be
16707 corrupt by this stage of the compilation. */
16708 int base_cost = 50;
16709 rtx next = next_nonnote_insn (insn);
16711 if (next != NULL && LABEL_P (next))
16712 base_cost -= 20;
16714 switch (GET_CODE (insn))
16716 case CODE_LABEL:
16717 /* It will always be better to place the table before the label, rather
16718 than after it. */
16719 return 50;
16721 case INSN:
16722 case CALL_INSN:
16723 return base_cost;
16725 case JUMP_INSN:
16726 return base_cost - 10;
16728 default:
16729 return base_cost + 10;
16733 /* Find the best place in the insn stream in the range
16734 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16735 Create the barrier by inserting a jump and add a new fix entry for
16736 it. */
16737 static Mfix *
16738 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16740 HOST_WIDE_INT count = 0;
16741 rtx_barrier *barrier;
16742 rtx_insn *from = fix->insn;
16743 /* The instruction after which we will insert the jump. */
16744 rtx_insn *selected = NULL;
16745 int selected_cost;
16746 /* The address at which the jump instruction will be placed. */
16747 HOST_WIDE_INT selected_address;
16748 Mfix * new_fix;
16749 HOST_WIDE_INT max_count = max_address - fix->address;
16750 rtx_code_label *label = gen_label_rtx ();
16752 selected_cost = arm_barrier_cost (from);
16753 selected_address = fix->address;
16755 while (from && count < max_count)
16757 rtx_jump_table_data *tmp;
16758 int new_cost;
16760 /* This code shouldn't have been called if there was a natural barrier
16761 within range. */
16762 gcc_assert (!BARRIER_P (from));
16764 /* Count the length of this insn. This must stay in sync with the
16765 code that pushes minipool fixes. */
16766 if (LABEL_P (from))
16767 count += get_label_padding (from);
16768 else
16769 count += get_attr_length (from);
16771 /* If there is a jump table, add its length. */
16772 if (tablejump_p (from, NULL, &tmp))
16774 count += get_jump_table_size (tmp);
16776 /* Jump tables aren't in a basic block, so base the cost on
16777 the dispatch insn. If we select this location, we will
16778 still put the pool after the table. */
16779 new_cost = arm_barrier_cost (from);
16781 if (count < max_count
16782 && (!selected || new_cost <= selected_cost))
16784 selected = tmp;
16785 selected_cost = new_cost;
16786 selected_address = fix->address + count;
16789 /* Continue after the dispatch table. */
16790 from = NEXT_INSN (tmp);
16791 continue;
16794 new_cost = arm_barrier_cost (from);
16796 if (count < max_count
16797 && (!selected || new_cost <= selected_cost))
16799 selected = from;
16800 selected_cost = new_cost;
16801 selected_address = fix->address + count;
16804 from = NEXT_INSN (from);
16807 /* Make sure that we found a place to insert the jump. */
16808 gcc_assert (selected);
16810 /* Make sure we do not split a call and its corresponding
16811 CALL_ARG_LOCATION note. */
16812 if (CALL_P (selected))
16814 rtx_insn *next = NEXT_INSN (selected);
16815 if (next && NOTE_P (next)
16816 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16817 selected = next;
16820 /* Create a new JUMP_INSN that branches around a barrier. */
16821 from = emit_jump_insn_after (gen_jump (label), selected);
16822 JUMP_LABEL (from) = label;
16823 barrier = emit_barrier_after (from);
16824 emit_label_after (label, barrier);
16826 /* Create a minipool barrier entry for the new barrier. */
16827 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16828 new_fix->insn = barrier;
16829 new_fix->address = selected_address;
16830 new_fix->next = fix->next;
16831 fix->next = new_fix;
16833 return new_fix;
16836 /* Record that there is a natural barrier in the insn stream at
16837 ADDRESS. */
16838 static void
16839 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16841 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16843 fix->insn = insn;
16844 fix->address = address;
16846 fix->next = NULL;
16847 if (minipool_fix_head != NULL)
16848 minipool_fix_tail->next = fix;
16849 else
16850 minipool_fix_head = fix;
16852 minipool_fix_tail = fix;
16855 /* Record INSN, which will need fixing up to load a value from the
16856 minipool. ADDRESS is the offset of the insn since the start of the
16857 function; LOC is a pointer to the part of the insn which requires
16858 fixing; VALUE is the constant that must be loaded, which is of type
16859 MODE. */
16860 static void
16861 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16862 machine_mode mode, rtx value)
16864 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16866 fix->insn = insn;
16867 fix->address = address;
16868 fix->loc = loc;
16869 fix->mode = mode;
16870 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16871 fix->value = value;
16872 fix->forwards = get_attr_pool_range (insn);
16873 fix->backwards = get_attr_neg_pool_range (insn);
16874 fix->minipool = NULL;
16876 /* If an insn doesn't have a range defined for it, then it isn't
16877 expecting to be reworked by this code. Better to stop now than
16878 to generate duff assembly code. */
16879 gcc_assert (fix->forwards || fix->backwards);
16881 /* If an entry requires 8-byte alignment then assume all constant pools
16882 require 4 bytes of padding. Trying to do this later on a per-pool
16883 basis is awkward because existing pool entries have to be modified. */
16884 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16885 minipool_pad = 4;
16887 if (dump_file)
16889 fprintf (dump_file,
16890 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16891 GET_MODE_NAME (mode),
16892 INSN_UID (insn), (unsigned long) address,
16893 -1 * (long)fix->backwards, (long)fix->forwards);
16894 arm_print_value (dump_file, fix->value);
16895 fprintf (dump_file, "\n");
16898 /* Add it to the chain of fixes. */
16899 fix->next = NULL;
16901 if (minipool_fix_head != NULL)
16902 minipool_fix_tail->next = fix;
16903 else
16904 minipool_fix_head = fix;
16906 minipool_fix_tail = fix;
16909 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16910 Returns the number of insns needed, or 99 if we always want to synthesize
16911 the value. */
16913 arm_max_const_double_inline_cost ()
16915 /* Let the value get synthesized to avoid the use of literal pools. */
16916 if (arm_disable_literal_pool)
16917 return 99;
16919 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16922 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16923 Returns the number of insns needed, or 99 if we don't know how to
16924 do it. */
16926 arm_const_double_inline_cost (rtx val)
16928 rtx lowpart, highpart;
16929 machine_mode mode;
16931 mode = GET_MODE (val);
16933 if (mode == VOIDmode)
16934 mode = DImode;
16936 gcc_assert (GET_MODE_SIZE (mode) == 8);
16938 lowpart = gen_lowpart (SImode, val);
16939 highpart = gen_highpart_mode (SImode, mode, val);
16941 gcc_assert (CONST_INT_P (lowpart));
16942 gcc_assert (CONST_INT_P (highpart));
16944 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16945 NULL_RTX, NULL_RTX, 0, 0)
16946 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16947 NULL_RTX, NULL_RTX, 0, 0));
16950 /* Cost of loading a SImode constant. */
16951 static inline int
16952 arm_const_inline_cost (enum rtx_code code, rtx val)
16954 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16955 NULL_RTX, NULL_RTX, 1, 0);
16958 /* Return true if it is worthwhile to split a 64-bit constant into two
16959 32-bit operations. This is the case if optimizing for size, or
16960 if we have load delay slots, or if one 32-bit part can be done with
16961 a single data operation. */
16962 bool
16963 arm_const_double_by_parts (rtx val)
16965 machine_mode mode = GET_MODE (val);
16966 rtx part;
16968 if (optimize_size || arm_ld_sched)
16969 return true;
16971 if (mode == VOIDmode)
16972 mode = DImode;
16974 part = gen_highpart_mode (SImode, mode, val);
16976 gcc_assert (CONST_INT_P (part));
16978 if (const_ok_for_arm (INTVAL (part))
16979 || const_ok_for_arm (~INTVAL (part)))
16980 return true;
16982 part = gen_lowpart (SImode, val);
16984 gcc_assert (CONST_INT_P (part));
16986 if (const_ok_for_arm (INTVAL (part))
16987 || const_ok_for_arm (~INTVAL (part)))
16988 return true;
16990 return false;
16993 /* Return true if it is possible to inline both the high and low parts
16994 of a 64-bit constant into 32-bit data processing instructions. */
16995 bool
16996 arm_const_double_by_immediates (rtx val)
16998 machine_mode mode = GET_MODE (val);
16999 rtx part;
17001 if (mode == VOIDmode)
17002 mode = DImode;
17004 part = gen_highpart_mode (SImode, mode, val);
17006 gcc_assert (CONST_INT_P (part));
17008 if (!const_ok_for_arm (INTVAL (part)))
17009 return false;
17011 part = gen_lowpart (SImode, val);
17013 gcc_assert (CONST_INT_P (part));
17015 if (!const_ok_for_arm (INTVAL (part)))
17016 return false;
17018 return true;
17021 /* Scan INSN and note any of its operands that need fixing.
17022 If DO_PUSHES is false we do not actually push any of the fixups
17023 needed. */
17024 static void
17025 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17027 int opno;
17029 extract_constrain_insn (insn);
17031 if (recog_data.n_alternatives == 0)
17032 return;
17034 /* Fill in recog_op_alt with information about the constraints of
17035 this insn. */
17036 preprocess_constraints (insn);
17038 const operand_alternative *op_alt = which_op_alt ();
17039 for (opno = 0; opno < recog_data.n_operands; opno++)
17041 /* Things we need to fix can only occur in inputs. */
17042 if (recog_data.operand_type[opno] != OP_IN)
17043 continue;
17045 /* If this alternative is a memory reference, then any mention
17046 of constants in this alternative is really to fool reload
17047 into allowing us to accept one there. We need to fix them up
17048 now so that we output the right code. */
17049 if (op_alt[opno].memory_ok)
17051 rtx op = recog_data.operand[opno];
17053 if (CONSTANT_P (op))
17055 if (do_pushes)
17056 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17057 recog_data.operand_mode[opno], op);
17059 else if (MEM_P (op)
17060 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17061 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17063 if (do_pushes)
17065 rtx cop = avoid_constant_pool_reference (op);
17067 /* Casting the address of something to a mode narrower
17068 than a word can cause avoid_constant_pool_reference()
17069 to return the pool reference itself. That's no good to
17070 us here. Lets just hope that we can use the
17071 constant pool value directly. */
17072 if (op == cop)
17073 cop = get_pool_constant (XEXP (op, 0));
17075 push_minipool_fix (insn, address,
17076 recog_data.operand_loc[opno],
17077 recog_data.operand_mode[opno], cop);
17084 return;
17087 /* Rewrite move insn into subtract of 0 if the condition codes will
17088 be useful in next conditional jump insn. */
17090 static void
17091 thumb1_reorg (void)
17093 basic_block bb;
17095 FOR_EACH_BB_FN (bb, cfun)
17097 rtx dest, src;
17098 rtx pat, op0, set = NULL;
17099 rtx_insn *prev, *insn = BB_END (bb);
17100 bool insn_clobbered = false;
17102 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17103 insn = PREV_INSN (insn);
17105 /* Find the last cbranchsi4_insn in basic block BB. */
17106 if (insn == BB_HEAD (bb)
17107 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17108 continue;
17110 /* Get the register with which we are comparing. */
17111 pat = PATTERN (insn);
17112 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17114 /* Find the first flag setting insn before INSN in basic block BB. */
17115 gcc_assert (insn != BB_HEAD (bb));
17116 for (prev = PREV_INSN (insn);
17117 (!insn_clobbered
17118 && prev != BB_HEAD (bb)
17119 && (NOTE_P (prev)
17120 || DEBUG_INSN_P (prev)
17121 || ((set = single_set (prev)) != NULL
17122 && get_attr_conds (prev) == CONDS_NOCOND)));
17123 prev = PREV_INSN (prev))
17125 if (reg_set_p (op0, prev))
17126 insn_clobbered = true;
17129 /* Skip if op0 is clobbered by insn other than prev. */
17130 if (insn_clobbered)
17131 continue;
17133 if (!set)
17134 continue;
17136 dest = SET_DEST (set);
17137 src = SET_SRC (set);
17138 if (!low_register_operand (dest, SImode)
17139 || !low_register_operand (src, SImode))
17140 continue;
17142 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17143 in INSN. Both src and dest of the move insn are checked. */
17144 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17146 dest = copy_rtx (dest);
17147 src = copy_rtx (src);
17148 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17149 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17150 INSN_CODE (prev) = -1;
17151 /* Set test register in INSN to dest. */
17152 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17153 INSN_CODE (insn) = -1;
17158 /* Convert instructions to their cc-clobbering variant if possible, since
17159 that allows us to use smaller encodings. */
17161 static void
17162 thumb2_reorg (void)
17164 basic_block bb;
17165 regset_head live;
17167 INIT_REG_SET (&live);
17169 /* We are freeing block_for_insn in the toplev to keep compatibility
17170 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17171 compute_bb_for_insn ();
17172 df_analyze ();
17174 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17176 FOR_EACH_BB_FN (bb, cfun)
17178 if (current_tune->disparage_flag_setting_t16_encodings
17179 && optimize_bb_for_speed_p (bb))
17180 continue;
17182 rtx_insn *insn;
17183 Convert_Action action = SKIP;
17184 Convert_Action action_for_partial_flag_setting
17185 = (current_tune->disparage_partial_flag_setting_t16_encodings
17186 && optimize_bb_for_speed_p (bb))
17187 ? SKIP : CONV;
17189 COPY_REG_SET (&live, DF_LR_OUT (bb));
17190 df_simulate_initialize_backwards (bb, &live);
17191 FOR_BB_INSNS_REVERSE (bb, insn)
17193 if (NONJUMP_INSN_P (insn)
17194 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17195 && GET_CODE (PATTERN (insn)) == SET)
17197 action = SKIP;
17198 rtx pat = PATTERN (insn);
17199 rtx dst = XEXP (pat, 0);
17200 rtx src = XEXP (pat, 1);
17201 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17203 if (!OBJECT_P (src))
17204 op0 = XEXP (src, 0);
17206 if (BINARY_P (src))
17207 op1 = XEXP (src, 1);
17209 if (low_register_operand (dst, SImode))
17211 switch (GET_CODE (src))
17213 case PLUS:
17214 /* Adding two registers and storing the result
17215 in the first source is already a 16-bit
17216 operation. */
17217 if (rtx_equal_p (dst, op0)
17218 && register_operand (op1, SImode))
17219 break;
17221 if (low_register_operand (op0, SImode))
17223 /* ADDS <Rd>,<Rn>,<Rm> */
17224 if (low_register_operand (op1, SImode))
17225 action = CONV;
17226 /* ADDS <Rdn>,#<imm8> */
17227 /* SUBS <Rdn>,#<imm8> */
17228 else if (rtx_equal_p (dst, op0)
17229 && CONST_INT_P (op1)
17230 && IN_RANGE (INTVAL (op1), -255, 255))
17231 action = CONV;
17232 /* ADDS <Rd>,<Rn>,#<imm3> */
17233 /* SUBS <Rd>,<Rn>,#<imm3> */
17234 else if (CONST_INT_P (op1)
17235 && IN_RANGE (INTVAL (op1), -7, 7))
17236 action = CONV;
17238 /* ADCS <Rd>, <Rn> */
17239 else if (GET_CODE (XEXP (src, 0)) == PLUS
17240 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17241 && low_register_operand (XEXP (XEXP (src, 0), 1),
17242 SImode)
17243 && COMPARISON_P (op1)
17244 && cc_register (XEXP (op1, 0), VOIDmode)
17245 && maybe_get_arm_condition_code (op1) == ARM_CS
17246 && XEXP (op1, 1) == const0_rtx)
17247 action = CONV;
17248 break;
17250 case MINUS:
17251 /* RSBS <Rd>,<Rn>,#0
17252 Not handled here: see NEG below. */
17253 /* SUBS <Rd>,<Rn>,#<imm3>
17254 SUBS <Rdn>,#<imm8>
17255 Not handled here: see PLUS above. */
17256 /* SUBS <Rd>,<Rn>,<Rm> */
17257 if (low_register_operand (op0, SImode)
17258 && low_register_operand (op1, SImode))
17259 action = CONV;
17260 break;
17262 case MULT:
17263 /* MULS <Rdm>,<Rn>,<Rdm>
17264 As an exception to the rule, this is only used
17265 when optimizing for size since MULS is slow on all
17266 known implementations. We do not even want to use
17267 MULS in cold code, if optimizing for speed, so we
17268 test the global flag here. */
17269 if (!optimize_size)
17270 break;
17271 /* else fall through. */
17272 case AND:
17273 case IOR:
17274 case XOR:
17275 /* ANDS <Rdn>,<Rm> */
17276 if (rtx_equal_p (dst, op0)
17277 && low_register_operand (op1, SImode))
17278 action = action_for_partial_flag_setting;
17279 else if (rtx_equal_p (dst, op1)
17280 && low_register_operand (op0, SImode))
17281 action = action_for_partial_flag_setting == SKIP
17282 ? SKIP : SWAP_CONV;
17283 break;
17285 case ASHIFTRT:
17286 case ASHIFT:
17287 case LSHIFTRT:
17288 /* ASRS <Rdn>,<Rm> */
17289 /* LSRS <Rdn>,<Rm> */
17290 /* LSLS <Rdn>,<Rm> */
17291 if (rtx_equal_p (dst, op0)
17292 && low_register_operand (op1, SImode))
17293 action = action_for_partial_flag_setting;
17294 /* ASRS <Rd>,<Rm>,#<imm5> */
17295 /* LSRS <Rd>,<Rm>,#<imm5> */
17296 /* LSLS <Rd>,<Rm>,#<imm5> */
17297 else if (low_register_operand (op0, SImode)
17298 && CONST_INT_P (op1)
17299 && IN_RANGE (INTVAL (op1), 0, 31))
17300 action = action_for_partial_flag_setting;
17301 break;
17303 case ROTATERT:
17304 /* RORS <Rdn>,<Rm> */
17305 if (rtx_equal_p (dst, op0)
17306 && low_register_operand (op1, SImode))
17307 action = action_for_partial_flag_setting;
17308 break;
17310 case NOT:
17311 /* MVNS <Rd>,<Rm> */
17312 if (low_register_operand (op0, SImode))
17313 action = action_for_partial_flag_setting;
17314 break;
17316 case NEG:
17317 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17318 if (low_register_operand (op0, SImode))
17319 action = CONV;
17320 break;
17322 case CONST_INT:
17323 /* MOVS <Rd>,#<imm8> */
17324 if (CONST_INT_P (src)
17325 && IN_RANGE (INTVAL (src), 0, 255))
17326 action = action_for_partial_flag_setting;
17327 break;
17329 case REG:
17330 /* MOVS and MOV<c> with registers have different
17331 encodings, so are not relevant here. */
17332 break;
17334 default:
17335 break;
17339 if (action != SKIP)
17341 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17342 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17343 rtvec vec;
17345 if (action == SWAP_CONV)
17347 src = copy_rtx (src);
17348 XEXP (src, 0) = op1;
17349 XEXP (src, 1) = op0;
17350 pat = gen_rtx_SET (VOIDmode, dst, src);
17351 vec = gen_rtvec (2, pat, clobber);
17353 else /* action == CONV */
17354 vec = gen_rtvec (2, pat, clobber);
17356 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17357 INSN_CODE (insn) = -1;
17361 if (NONDEBUG_INSN_P (insn))
17362 df_simulate_one_insn_backwards (bb, insn, &live);
17366 CLEAR_REG_SET (&live);
17369 /* Gcc puts the pool in the wrong place for ARM, since we can only
17370 load addresses a limited distance around the pc. We do some
17371 special munging to move the constant pool values to the correct
17372 point in the code. */
17373 static void
17374 arm_reorg (void)
17376 rtx_insn *insn;
17377 HOST_WIDE_INT address = 0;
17378 Mfix * fix;
17380 if (TARGET_THUMB1)
17381 thumb1_reorg ();
17382 else if (TARGET_THUMB2)
17383 thumb2_reorg ();
17385 /* Ensure all insns that must be split have been split at this point.
17386 Otherwise, the pool placement code below may compute incorrect
17387 insn lengths. Note that when optimizing, all insns have already
17388 been split at this point. */
17389 if (!optimize)
17390 split_all_insns_noflow ();
17392 minipool_fix_head = minipool_fix_tail = NULL;
17394 /* The first insn must always be a note, or the code below won't
17395 scan it properly. */
17396 insn = get_insns ();
17397 gcc_assert (NOTE_P (insn));
17398 minipool_pad = 0;
17400 /* Scan all the insns and record the operands that will need fixing. */
17401 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17403 if (BARRIER_P (insn))
17404 push_minipool_barrier (insn, address);
17405 else if (INSN_P (insn))
17407 rtx_jump_table_data *table;
17409 note_invalid_constants (insn, address, true);
17410 address += get_attr_length (insn);
17412 /* If the insn is a vector jump, add the size of the table
17413 and skip the table. */
17414 if (tablejump_p (insn, NULL, &table))
17416 address += get_jump_table_size (table);
17417 insn = table;
17420 else if (LABEL_P (insn))
17421 /* Add the worst-case padding due to alignment. We don't add
17422 the _current_ padding because the minipool insertions
17423 themselves might change it. */
17424 address += get_label_padding (insn);
17427 fix = minipool_fix_head;
17429 /* Now scan the fixups and perform the required changes. */
17430 while (fix)
17432 Mfix * ftmp;
17433 Mfix * fdel;
17434 Mfix * last_added_fix;
17435 Mfix * last_barrier = NULL;
17436 Mfix * this_fix;
17438 /* Skip any further barriers before the next fix. */
17439 while (fix && BARRIER_P (fix->insn))
17440 fix = fix->next;
17442 /* No more fixes. */
17443 if (fix == NULL)
17444 break;
17446 last_added_fix = NULL;
17448 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17450 if (BARRIER_P (ftmp->insn))
17452 if (ftmp->address >= minipool_vector_head->max_address)
17453 break;
17455 last_barrier = ftmp;
17457 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17458 break;
17460 last_added_fix = ftmp; /* Keep track of the last fix added. */
17463 /* If we found a barrier, drop back to that; any fixes that we
17464 could have reached but come after the barrier will now go in
17465 the next mini-pool. */
17466 if (last_barrier != NULL)
17468 /* Reduce the refcount for those fixes that won't go into this
17469 pool after all. */
17470 for (fdel = last_barrier->next;
17471 fdel && fdel != ftmp;
17472 fdel = fdel->next)
17474 fdel->minipool->refcount--;
17475 fdel->minipool = NULL;
17478 ftmp = last_barrier;
17480 else
17482 /* ftmp is first fix that we can't fit into this pool and
17483 there no natural barriers that we could use. Insert a
17484 new barrier in the code somewhere between the previous
17485 fix and this one, and arrange to jump around it. */
17486 HOST_WIDE_INT max_address;
17488 /* The last item on the list of fixes must be a barrier, so
17489 we can never run off the end of the list of fixes without
17490 last_barrier being set. */
17491 gcc_assert (ftmp);
17493 max_address = minipool_vector_head->max_address;
17494 /* Check that there isn't another fix that is in range that
17495 we couldn't fit into this pool because the pool was
17496 already too large: we need to put the pool before such an
17497 instruction. The pool itself may come just after the
17498 fix because create_fix_barrier also allows space for a
17499 jump instruction. */
17500 if (ftmp->address < max_address)
17501 max_address = ftmp->address + 1;
17503 last_barrier = create_fix_barrier (last_added_fix, max_address);
17506 assign_minipool_offsets (last_barrier);
17508 while (ftmp)
17510 if (!BARRIER_P (ftmp->insn)
17511 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17512 == NULL))
17513 break;
17515 ftmp = ftmp->next;
17518 /* Scan over the fixes we have identified for this pool, fixing them
17519 up and adding the constants to the pool itself. */
17520 for (this_fix = fix; this_fix && ftmp != this_fix;
17521 this_fix = this_fix->next)
17522 if (!BARRIER_P (this_fix->insn))
17524 rtx addr
17525 = plus_constant (Pmode,
17526 gen_rtx_LABEL_REF (VOIDmode,
17527 minipool_vector_label),
17528 this_fix->minipool->offset);
17529 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17532 dump_minipool (last_barrier->insn);
17533 fix = ftmp;
17536 /* From now on we must synthesize any constants that we can't handle
17537 directly. This can happen if the RTL gets split during final
17538 instruction generation. */
17539 cfun->machine->after_arm_reorg = 1;
17541 /* Free the minipool memory. */
17542 obstack_free (&minipool_obstack, minipool_startobj);
17545 /* Routines to output assembly language. */
17547 /* Return string representation of passed in real value. */
17548 static const char *
17549 fp_const_from_val (REAL_VALUE_TYPE *r)
17551 if (!fp_consts_inited)
17552 init_fp_table ();
17554 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17555 return "0";
17558 /* OPERANDS[0] is the entire list of insns that constitute pop,
17559 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17560 is in the list, UPDATE is true iff the list contains explicit
17561 update of base register. */
17562 void
17563 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17564 bool update)
17566 int i;
17567 char pattern[100];
17568 int offset;
17569 const char *conditional;
17570 int num_saves = XVECLEN (operands[0], 0);
17571 unsigned int regno;
17572 unsigned int regno_base = REGNO (operands[1]);
17574 offset = 0;
17575 offset += update ? 1 : 0;
17576 offset += return_pc ? 1 : 0;
17578 /* Is the base register in the list? */
17579 for (i = offset; i < num_saves; i++)
17581 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17582 /* If SP is in the list, then the base register must be SP. */
17583 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17584 /* If base register is in the list, there must be no explicit update. */
17585 if (regno == regno_base)
17586 gcc_assert (!update);
17589 conditional = reverse ? "%?%D0" : "%?%d0";
17590 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17592 /* Output pop (not stmfd) because it has a shorter encoding. */
17593 gcc_assert (update);
17594 sprintf (pattern, "pop%s\t{", conditional);
17596 else
17598 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17599 It's just a convention, their semantics are identical. */
17600 if (regno_base == SP_REGNUM)
17601 sprintf (pattern, "ldm%sfd\t", conditional);
17602 else if (TARGET_UNIFIED_ASM)
17603 sprintf (pattern, "ldmia%s\t", conditional);
17604 else
17605 sprintf (pattern, "ldm%sia\t", conditional);
17607 strcat (pattern, reg_names[regno_base]);
17608 if (update)
17609 strcat (pattern, "!, {");
17610 else
17611 strcat (pattern, ", {");
17614 /* Output the first destination register. */
17615 strcat (pattern,
17616 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17618 /* Output the rest of the destination registers. */
17619 for (i = offset + 1; i < num_saves; i++)
17621 strcat (pattern, ", ");
17622 strcat (pattern,
17623 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17626 strcat (pattern, "}");
17628 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17629 strcat (pattern, "^");
17631 output_asm_insn (pattern, &cond);
17635 /* Output the assembly for a store multiple. */
17637 const char *
17638 vfp_output_vstmd (rtx * operands)
17640 char pattern[100];
17641 int p;
17642 int base;
17643 int i;
17644 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17645 ? XEXP (operands[0], 0)
17646 : XEXP (XEXP (operands[0], 0), 0);
17647 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17649 if (push_p)
17650 strcpy (pattern, "vpush%?.64\t{%P1");
17651 else
17652 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17654 p = strlen (pattern);
17656 gcc_assert (REG_P (operands[1]));
17658 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17659 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17661 p += sprintf (&pattern[p], ", d%d", base + i);
17663 strcpy (&pattern[p], "}");
17665 output_asm_insn (pattern, operands);
17666 return "";
17670 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17671 number of bytes pushed. */
17673 static int
17674 vfp_emit_fstmd (int base_reg, int count)
17676 rtx par;
17677 rtx dwarf;
17678 rtx tmp, reg;
17679 int i;
17681 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17682 register pairs are stored by a store multiple insn. We avoid this
17683 by pushing an extra pair. */
17684 if (count == 2 && !arm_arch6)
17686 if (base_reg == LAST_VFP_REGNUM - 3)
17687 base_reg -= 2;
17688 count++;
17691 /* FSTMD may not store more than 16 doubleword registers at once. Split
17692 larger stores into multiple parts (up to a maximum of two, in
17693 practice). */
17694 if (count > 16)
17696 int saved;
17697 /* NOTE: base_reg is an internal register number, so each D register
17698 counts as 2. */
17699 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17700 saved += vfp_emit_fstmd (base_reg, 16);
17701 return saved;
17704 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17705 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17707 reg = gen_rtx_REG (DFmode, base_reg);
17708 base_reg += 2;
17710 XVECEXP (par, 0, 0)
17711 = gen_rtx_SET (VOIDmode,
17712 gen_frame_mem
17713 (BLKmode,
17714 gen_rtx_PRE_MODIFY (Pmode,
17715 stack_pointer_rtx,
17716 plus_constant
17717 (Pmode, stack_pointer_rtx,
17718 - (count * 8)))
17720 gen_rtx_UNSPEC (BLKmode,
17721 gen_rtvec (1, reg),
17722 UNSPEC_PUSH_MULT));
17724 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17725 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17726 RTX_FRAME_RELATED_P (tmp) = 1;
17727 XVECEXP (dwarf, 0, 0) = tmp;
17729 tmp = gen_rtx_SET (VOIDmode,
17730 gen_frame_mem (DFmode, stack_pointer_rtx),
17731 reg);
17732 RTX_FRAME_RELATED_P (tmp) = 1;
17733 XVECEXP (dwarf, 0, 1) = tmp;
17735 for (i = 1; i < count; i++)
17737 reg = gen_rtx_REG (DFmode, base_reg);
17738 base_reg += 2;
17739 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17741 tmp = gen_rtx_SET (VOIDmode,
17742 gen_frame_mem (DFmode,
17743 plus_constant (Pmode,
17744 stack_pointer_rtx,
17745 i * 8)),
17746 reg);
17747 RTX_FRAME_RELATED_P (tmp) = 1;
17748 XVECEXP (dwarf, 0, i + 1) = tmp;
17751 par = emit_insn (par);
17752 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17753 RTX_FRAME_RELATED_P (par) = 1;
17755 return count * 8;
17758 /* Emit a call instruction with pattern PAT. ADDR is the address of
17759 the call target. */
17761 void
17762 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17764 rtx insn;
17766 insn = emit_call_insn (pat);
17768 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17769 If the call might use such an entry, add a use of the PIC register
17770 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17771 if (TARGET_VXWORKS_RTP
17772 && flag_pic
17773 && !sibcall
17774 && GET_CODE (addr) == SYMBOL_REF
17775 && (SYMBOL_REF_DECL (addr)
17776 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17777 : !SYMBOL_REF_LOCAL_P (addr)))
17779 require_pic_register ();
17780 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17783 if (TARGET_AAPCS_BASED)
17785 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17786 linker. We need to add an IP clobber to allow setting
17787 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17788 is not needed since it's a fixed register. */
17789 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17790 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17794 /* Output a 'call' insn. */
17795 const char *
17796 output_call (rtx *operands)
17798 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17800 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17801 if (REGNO (operands[0]) == LR_REGNUM)
17803 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17804 output_asm_insn ("mov%?\t%0, %|lr", operands);
17807 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17809 if (TARGET_INTERWORK || arm_arch4t)
17810 output_asm_insn ("bx%?\t%0", operands);
17811 else
17812 output_asm_insn ("mov%?\t%|pc, %0", operands);
17814 return "";
17817 /* Output a 'call' insn that is a reference in memory. This is
17818 disabled for ARMv5 and we prefer a blx instead because otherwise
17819 there's a significant performance overhead. */
17820 const char *
17821 output_call_mem (rtx *operands)
17823 gcc_assert (!arm_arch5);
17824 if (TARGET_INTERWORK)
17826 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17827 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17828 output_asm_insn ("bx%?\t%|ip", operands);
17830 else if (regno_use_in (LR_REGNUM, operands[0]))
17832 /* LR is used in the memory address. We load the address in the
17833 first instruction. It's safe to use IP as the target of the
17834 load since the call will kill it anyway. */
17835 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17836 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17837 if (arm_arch4t)
17838 output_asm_insn ("bx%?\t%|ip", operands);
17839 else
17840 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17842 else
17844 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17845 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17848 return "";
17852 /* Output a move from arm registers to arm registers of a long double
17853 OPERANDS[0] is the destination.
17854 OPERANDS[1] is the source. */
17855 const char *
17856 output_mov_long_double_arm_from_arm (rtx *operands)
17858 /* We have to be careful here because the two might overlap. */
17859 int dest_start = REGNO (operands[0]);
17860 int src_start = REGNO (operands[1]);
17861 rtx ops[2];
17862 int i;
17864 if (dest_start < src_start)
17866 for (i = 0; i < 3; i++)
17868 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17869 ops[1] = gen_rtx_REG (SImode, src_start + i);
17870 output_asm_insn ("mov%?\t%0, %1", ops);
17873 else
17875 for (i = 2; i >= 0; i--)
17877 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17878 ops[1] = gen_rtx_REG (SImode, src_start + i);
17879 output_asm_insn ("mov%?\t%0, %1", ops);
17883 return "";
17886 void
17887 arm_emit_movpair (rtx dest, rtx src)
17889 /* If the src is an immediate, simplify it. */
17890 if (CONST_INT_P (src))
17892 HOST_WIDE_INT val = INTVAL (src);
17893 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17894 if ((val >> 16) & 0x0000ffff)
17895 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17896 GEN_INT (16)),
17897 GEN_INT ((val >> 16) & 0x0000ffff));
17898 return;
17900 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17901 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17904 /* Output a move between double words. It must be REG<-MEM
17905 or MEM<-REG. */
17906 const char *
17907 output_move_double (rtx *operands, bool emit, int *count)
17909 enum rtx_code code0 = GET_CODE (operands[0]);
17910 enum rtx_code code1 = GET_CODE (operands[1]);
17911 rtx otherops[3];
17912 if (count)
17913 *count = 1;
17915 /* The only case when this might happen is when
17916 you are looking at the length of a DImode instruction
17917 that has an invalid constant in it. */
17918 if (code0 == REG && code1 != MEM)
17920 gcc_assert (!emit);
17921 *count = 2;
17922 return "";
17925 if (code0 == REG)
17927 unsigned int reg0 = REGNO (operands[0]);
17929 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17931 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17933 switch (GET_CODE (XEXP (operands[1], 0)))
17935 case REG:
17937 if (emit)
17939 if (TARGET_LDRD
17940 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17941 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17942 else
17943 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17945 break;
17947 case PRE_INC:
17948 gcc_assert (TARGET_LDRD);
17949 if (emit)
17950 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17951 break;
17953 case PRE_DEC:
17954 if (emit)
17956 if (TARGET_LDRD)
17957 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17958 else
17959 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17961 break;
17963 case POST_INC:
17964 if (emit)
17966 if (TARGET_LDRD)
17967 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17968 else
17969 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17971 break;
17973 case POST_DEC:
17974 gcc_assert (TARGET_LDRD);
17975 if (emit)
17976 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17977 break;
17979 case PRE_MODIFY:
17980 case POST_MODIFY:
17981 /* Autoicrement addressing modes should never have overlapping
17982 base and destination registers, and overlapping index registers
17983 are already prohibited, so this doesn't need to worry about
17984 fix_cm3_ldrd. */
17985 otherops[0] = operands[0];
17986 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17987 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17989 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17991 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17993 /* Registers overlap so split out the increment. */
17994 if (emit)
17996 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17997 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17999 if (count)
18000 *count = 2;
18002 else
18004 /* Use a single insn if we can.
18005 FIXME: IWMMXT allows offsets larger than ldrd can
18006 handle, fix these up with a pair of ldr. */
18007 if (TARGET_THUMB2
18008 || !CONST_INT_P (otherops[2])
18009 || (INTVAL (otherops[2]) > -256
18010 && INTVAL (otherops[2]) < 256))
18012 if (emit)
18013 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18015 else
18017 if (emit)
18019 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18020 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18022 if (count)
18023 *count = 2;
18028 else
18030 /* Use a single insn if we can.
18031 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18032 fix these up with a pair of ldr. */
18033 if (TARGET_THUMB2
18034 || !CONST_INT_P (otherops[2])
18035 || (INTVAL (otherops[2]) > -256
18036 && INTVAL (otherops[2]) < 256))
18038 if (emit)
18039 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18041 else
18043 if (emit)
18045 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18046 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18048 if (count)
18049 *count = 2;
18052 break;
18054 case LABEL_REF:
18055 case CONST:
18056 /* We might be able to use ldrd %0, %1 here. However the range is
18057 different to ldr/adr, and it is broken on some ARMv7-M
18058 implementations. */
18059 /* Use the second register of the pair to avoid problematic
18060 overlap. */
18061 otherops[1] = operands[1];
18062 if (emit)
18063 output_asm_insn ("adr%?\t%0, %1", otherops);
18064 operands[1] = otherops[0];
18065 if (emit)
18067 if (TARGET_LDRD)
18068 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18069 else
18070 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18073 if (count)
18074 *count = 2;
18075 break;
18077 /* ??? This needs checking for thumb2. */
18078 default:
18079 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18080 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18082 otherops[0] = operands[0];
18083 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18084 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18086 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18088 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18090 switch ((int) INTVAL (otherops[2]))
18092 case -8:
18093 if (emit)
18094 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18095 return "";
18096 case -4:
18097 if (TARGET_THUMB2)
18098 break;
18099 if (emit)
18100 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18101 return "";
18102 case 4:
18103 if (TARGET_THUMB2)
18104 break;
18105 if (emit)
18106 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18107 return "";
18110 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18111 operands[1] = otherops[0];
18112 if (TARGET_LDRD
18113 && (REG_P (otherops[2])
18114 || TARGET_THUMB2
18115 || (CONST_INT_P (otherops[2])
18116 && INTVAL (otherops[2]) > -256
18117 && INTVAL (otherops[2]) < 256)))
18119 if (reg_overlap_mentioned_p (operands[0],
18120 otherops[2]))
18122 rtx tmp;
18123 /* Swap base and index registers over to
18124 avoid a conflict. */
18125 tmp = otherops[1];
18126 otherops[1] = otherops[2];
18127 otherops[2] = tmp;
18129 /* If both registers conflict, it will usually
18130 have been fixed by a splitter. */
18131 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18132 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18134 if (emit)
18136 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18137 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18139 if (count)
18140 *count = 2;
18142 else
18144 otherops[0] = operands[0];
18145 if (emit)
18146 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18148 return "";
18151 if (CONST_INT_P (otherops[2]))
18153 if (emit)
18155 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18156 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18157 else
18158 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18161 else
18163 if (emit)
18164 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18167 else
18169 if (emit)
18170 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18173 if (count)
18174 *count = 2;
18176 if (TARGET_LDRD)
18177 return "ldr%(d%)\t%0, [%1]";
18179 return "ldm%(ia%)\t%1, %M0";
18181 else
18183 otherops[1] = adjust_address (operands[1], SImode, 4);
18184 /* Take care of overlapping base/data reg. */
18185 if (reg_mentioned_p (operands[0], operands[1]))
18187 if (emit)
18189 output_asm_insn ("ldr%?\t%0, %1", otherops);
18190 output_asm_insn ("ldr%?\t%0, %1", operands);
18192 if (count)
18193 *count = 2;
18196 else
18198 if (emit)
18200 output_asm_insn ("ldr%?\t%0, %1", operands);
18201 output_asm_insn ("ldr%?\t%0, %1", otherops);
18203 if (count)
18204 *count = 2;
18209 else
18211 /* Constraints should ensure this. */
18212 gcc_assert (code0 == MEM && code1 == REG);
18213 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18214 || (TARGET_ARM && TARGET_LDRD));
18216 switch (GET_CODE (XEXP (operands[0], 0)))
18218 case REG:
18219 if (emit)
18221 if (TARGET_LDRD)
18222 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18223 else
18224 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18226 break;
18228 case PRE_INC:
18229 gcc_assert (TARGET_LDRD);
18230 if (emit)
18231 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18232 break;
18234 case PRE_DEC:
18235 if (emit)
18237 if (TARGET_LDRD)
18238 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18239 else
18240 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18242 break;
18244 case POST_INC:
18245 if (emit)
18247 if (TARGET_LDRD)
18248 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18249 else
18250 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18252 break;
18254 case POST_DEC:
18255 gcc_assert (TARGET_LDRD);
18256 if (emit)
18257 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18258 break;
18260 case PRE_MODIFY:
18261 case POST_MODIFY:
18262 otherops[0] = operands[1];
18263 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18264 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18266 /* IWMMXT allows offsets larger than ldrd can handle,
18267 fix these up with a pair of ldr. */
18268 if (!TARGET_THUMB2
18269 && CONST_INT_P (otherops[2])
18270 && (INTVAL(otherops[2]) <= -256
18271 || INTVAL(otherops[2]) >= 256))
18273 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18275 if (emit)
18277 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18278 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18280 if (count)
18281 *count = 2;
18283 else
18285 if (emit)
18287 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18288 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18290 if (count)
18291 *count = 2;
18294 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18296 if (emit)
18297 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18299 else
18301 if (emit)
18302 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18304 break;
18306 case PLUS:
18307 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18308 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18310 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18312 case -8:
18313 if (emit)
18314 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18315 return "";
18317 case -4:
18318 if (TARGET_THUMB2)
18319 break;
18320 if (emit)
18321 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18322 return "";
18324 case 4:
18325 if (TARGET_THUMB2)
18326 break;
18327 if (emit)
18328 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18329 return "";
18332 if (TARGET_LDRD
18333 && (REG_P (otherops[2])
18334 || TARGET_THUMB2
18335 || (CONST_INT_P (otherops[2])
18336 && INTVAL (otherops[2]) > -256
18337 && INTVAL (otherops[2]) < 256)))
18339 otherops[0] = operands[1];
18340 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18341 if (emit)
18342 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18343 return "";
18345 /* Fall through */
18347 default:
18348 otherops[0] = adjust_address (operands[0], SImode, 4);
18349 otherops[1] = operands[1];
18350 if (emit)
18352 output_asm_insn ("str%?\t%1, %0", operands);
18353 output_asm_insn ("str%?\t%H1, %0", otherops);
18355 if (count)
18356 *count = 2;
18360 return "";
18363 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18364 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18366 const char *
18367 output_move_quad (rtx *operands)
18369 if (REG_P (operands[0]))
18371 /* Load, or reg->reg move. */
18373 if (MEM_P (operands[1]))
18375 switch (GET_CODE (XEXP (operands[1], 0)))
18377 case REG:
18378 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18379 break;
18381 case LABEL_REF:
18382 case CONST:
18383 output_asm_insn ("adr%?\t%0, %1", operands);
18384 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18385 break;
18387 default:
18388 gcc_unreachable ();
18391 else
18393 rtx ops[2];
18394 int dest, src, i;
18396 gcc_assert (REG_P (operands[1]));
18398 dest = REGNO (operands[0]);
18399 src = REGNO (operands[1]);
18401 /* This seems pretty dumb, but hopefully GCC won't try to do it
18402 very often. */
18403 if (dest < src)
18404 for (i = 0; i < 4; i++)
18406 ops[0] = gen_rtx_REG (SImode, dest + i);
18407 ops[1] = gen_rtx_REG (SImode, src + i);
18408 output_asm_insn ("mov%?\t%0, %1", ops);
18410 else
18411 for (i = 3; i >= 0; i--)
18413 ops[0] = gen_rtx_REG (SImode, dest + i);
18414 ops[1] = gen_rtx_REG (SImode, src + i);
18415 output_asm_insn ("mov%?\t%0, %1", ops);
18419 else
18421 gcc_assert (MEM_P (operands[0]));
18422 gcc_assert (REG_P (operands[1]));
18423 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18425 switch (GET_CODE (XEXP (operands[0], 0)))
18427 case REG:
18428 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18429 break;
18431 default:
18432 gcc_unreachable ();
18436 return "";
18439 /* Output a VFP load or store instruction. */
18441 const char *
18442 output_move_vfp (rtx *operands)
18444 rtx reg, mem, addr, ops[2];
18445 int load = REG_P (operands[0]);
18446 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18447 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18448 const char *templ;
18449 char buff[50];
18450 machine_mode mode;
18452 reg = operands[!load];
18453 mem = operands[load];
18455 mode = GET_MODE (reg);
18457 gcc_assert (REG_P (reg));
18458 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18459 gcc_assert (mode == SFmode
18460 || mode == DFmode
18461 || mode == SImode
18462 || mode == DImode
18463 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18464 gcc_assert (MEM_P (mem));
18466 addr = XEXP (mem, 0);
18468 switch (GET_CODE (addr))
18470 case PRE_DEC:
18471 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18472 ops[0] = XEXP (addr, 0);
18473 ops[1] = reg;
18474 break;
18476 case POST_INC:
18477 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18478 ops[0] = XEXP (addr, 0);
18479 ops[1] = reg;
18480 break;
18482 default:
18483 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18484 ops[0] = reg;
18485 ops[1] = mem;
18486 break;
18489 sprintf (buff, templ,
18490 load ? "ld" : "st",
18491 dp ? "64" : "32",
18492 dp ? "P" : "",
18493 integer_p ? "\t%@ int" : "");
18494 output_asm_insn (buff, ops);
18496 return "";
18499 /* Output a Neon double-word or quad-word load or store, or a load
18500 or store for larger structure modes.
18502 WARNING: The ordering of elements is weird in big-endian mode,
18503 because the EABI requires that vectors stored in memory appear
18504 as though they were stored by a VSTM, as required by the EABI.
18505 GCC RTL defines element ordering based on in-memory order.
18506 This can be different from the architectural ordering of elements
18507 within a NEON register. The intrinsics defined in arm_neon.h use the
18508 NEON register element ordering, not the GCC RTL element ordering.
18510 For example, the in-memory ordering of a big-endian a quadword
18511 vector with 16-bit elements when stored from register pair {d0,d1}
18512 will be (lowest address first, d0[N] is NEON register element N):
18514 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18516 When necessary, quadword registers (dN, dN+1) are moved to ARM
18517 registers from rN in the order:
18519 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18521 So that STM/LDM can be used on vectors in ARM registers, and the
18522 same memory layout will result as if VSTM/VLDM were used.
18524 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18525 possible, which allows use of appropriate alignment tags.
18526 Note that the choice of "64" is independent of the actual vector
18527 element size; this size simply ensures that the behavior is
18528 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18530 Due to limitations of those instructions, use of VST1.64/VLD1.64
18531 is not possible if:
18532 - the address contains PRE_DEC, or
18533 - the mode refers to more than 4 double-word registers
18535 In those cases, it would be possible to replace VSTM/VLDM by a
18536 sequence of instructions; this is not currently implemented since
18537 this is not certain to actually improve performance. */
18539 const char *
18540 output_move_neon (rtx *operands)
18542 rtx reg, mem, addr, ops[2];
18543 int regno, nregs, load = REG_P (operands[0]);
18544 const char *templ;
18545 char buff[50];
18546 machine_mode mode;
18548 reg = operands[!load];
18549 mem = operands[load];
18551 mode = GET_MODE (reg);
18553 gcc_assert (REG_P (reg));
18554 regno = REGNO (reg);
18555 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18556 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18557 || NEON_REGNO_OK_FOR_QUAD (regno));
18558 gcc_assert (VALID_NEON_DREG_MODE (mode)
18559 || VALID_NEON_QREG_MODE (mode)
18560 || VALID_NEON_STRUCT_MODE (mode));
18561 gcc_assert (MEM_P (mem));
18563 addr = XEXP (mem, 0);
18565 /* Strip off const from addresses like (const (plus (...))). */
18566 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18567 addr = XEXP (addr, 0);
18569 switch (GET_CODE (addr))
18571 case POST_INC:
18572 /* We have to use vldm / vstm for too-large modes. */
18573 if (nregs > 4)
18575 templ = "v%smia%%?\t%%0!, %%h1";
18576 ops[0] = XEXP (addr, 0);
18578 else
18580 templ = "v%s1.64\t%%h1, %%A0";
18581 ops[0] = mem;
18583 ops[1] = reg;
18584 break;
18586 case PRE_DEC:
18587 /* We have to use vldm / vstm in this case, since there is no
18588 pre-decrement form of the vld1 / vst1 instructions. */
18589 templ = "v%smdb%%?\t%%0!, %%h1";
18590 ops[0] = XEXP (addr, 0);
18591 ops[1] = reg;
18592 break;
18594 case POST_MODIFY:
18595 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18596 gcc_unreachable ();
18598 case REG:
18599 /* We have to use vldm / vstm for too-large modes. */
18600 if (nregs > 1)
18602 if (nregs > 4)
18603 templ = "v%smia%%?\t%%m0, %%h1";
18604 else
18605 templ = "v%s1.64\t%%h1, %%A0";
18607 ops[0] = mem;
18608 ops[1] = reg;
18609 break;
18611 /* Fall through. */
18612 case LABEL_REF:
18613 case PLUS:
18615 int i;
18616 int overlap = -1;
18617 for (i = 0; i < nregs; i++)
18619 /* We're only using DImode here because it's a convenient size. */
18620 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18621 ops[1] = adjust_address (mem, DImode, 8 * i);
18622 if (reg_overlap_mentioned_p (ops[0], mem))
18624 gcc_assert (overlap == -1);
18625 overlap = i;
18627 else
18629 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18630 output_asm_insn (buff, ops);
18633 if (overlap != -1)
18635 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18636 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18637 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18638 output_asm_insn (buff, ops);
18641 return "";
18644 default:
18645 gcc_unreachable ();
18648 sprintf (buff, templ, load ? "ld" : "st");
18649 output_asm_insn (buff, ops);
18651 return "";
18654 /* Compute and return the length of neon_mov<mode>, where <mode> is
18655 one of VSTRUCT modes: EI, OI, CI or XI. */
18657 arm_attr_length_move_neon (rtx_insn *insn)
18659 rtx reg, mem, addr;
18660 int load;
18661 machine_mode mode;
18663 extract_insn_cached (insn);
18665 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18667 mode = GET_MODE (recog_data.operand[0]);
18668 switch (mode)
18670 case EImode:
18671 case OImode:
18672 return 8;
18673 case CImode:
18674 return 12;
18675 case XImode:
18676 return 16;
18677 default:
18678 gcc_unreachable ();
18682 load = REG_P (recog_data.operand[0]);
18683 reg = recog_data.operand[!load];
18684 mem = recog_data.operand[load];
18686 gcc_assert (MEM_P (mem));
18688 mode = GET_MODE (reg);
18689 addr = XEXP (mem, 0);
18691 /* Strip off const from addresses like (const (plus (...))). */
18692 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18693 addr = XEXP (addr, 0);
18695 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18697 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18698 return insns * 4;
18700 else
18701 return 4;
18704 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18705 return zero. */
18708 arm_address_offset_is_imm (rtx_insn *insn)
18710 rtx mem, addr;
18712 extract_insn_cached (insn);
18714 if (REG_P (recog_data.operand[0]))
18715 return 0;
18717 mem = recog_data.operand[0];
18719 gcc_assert (MEM_P (mem));
18721 addr = XEXP (mem, 0);
18723 if (REG_P (addr)
18724 || (GET_CODE (addr) == PLUS
18725 && REG_P (XEXP (addr, 0))
18726 && CONST_INT_P (XEXP (addr, 1))))
18727 return 1;
18728 else
18729 return 0;
18732 /* Output an ADD r, s, #n where n may be too big for one instruction.
18733 If adding zero to one register, output nothing. */
18734 const char *
18735 output_add_immediate (rtx *operands)
18737 HOST_WIDE_INT n = INTVAL (operands[2]);
18739 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18741 if (n < 0)
18742 output_multi_immediate (operands,
18743 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18744 -n);
18745 else
18746 output_multi_immediate (operands,
18747 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18751 return "";
18754 /* Output a multiple immediate operation.
18755 OPERANDS is the vector of operands referred to in the output patterns.
18756 INSTR1 is the output pattern to use for the first constant.
18757 INSTR2 is the output pattern to use for subsequent constants.
18758 IMMED_OP is the index of the constant slot in OPERANDS.
18759 N is the constant value. */
18760 static const char *
18761 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18762 int immed_op, HOST_WIDE_INT n)
18764 #if HOST_BITS_PER_WIDE_INT > 32
18765 n &= 0xffffffff;
18766 #endif
18768 if (n == 0)
18770 /* Quick and easy output. */
18771 operands[immed_op] = const0_rtx;
18772 output_asm_insn (instr1, operands);
18774 else
18776 int i;
18777 const char * instr = instr1;
18779 /* Note that n is never zero here (which would give no output). */
18780 for (i = 0; i < 32; i += 2)
18782 if (n & (3 << i))
18784 operands[immed_op] = GEN_INT (n & (255 << i));
18785 output_asm_insn (instr, operands);
18786 instr = instr2;
18787 i += 6;
18792 return "";
18795 /* Return the name of a shifter operation. */
18796 static const char *
18797 arm_shift_nmem(enum rtx_code code)
18799 switch (code)
18801 case ASHIFT:
18802 return ARM_LSL_NAME;
18804 case ASHIFTRT:
18805 return "asr";
18807 case LSHIFTRT:
18808 return "lsr";
18810 case ROTATERT:
18811 return "ror";
18813 default:
18814 abort();
18818 /* Return the appropriate ARM instruction for the operation code.
18819 The returned result should not be overwritten. OP is the rtx of the
18820 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18821 was shifted. */
18822 const char *
18823 arithmetic_instr (rtx op, int shift_first_arg)
18825 switch (GET_CODE (op))
18827 case PLUS:
18828 return "add";
18830 case MINUS:
18831 return shift_first_arg ? "rsb" : "sub";
18833 case IOR:
18834 return "orr";
18836 case XOR:
18837 return "eor";
18839 case AND:
18840 return "and";
18842 case ASHIFT:
18843 case ASHIFTRT:
18844 case LSHIFTRT:
18845 case ROTATERT:
18846 return arm_shift_nmem(GET_CODE(op));
18848 default:
18849 gcc_unreachable ();
18853 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18854 for the operation code. The returned result should not be overwritten.
18855 OP is the rtx code of the shift.
18856 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18857 shift. */
18858 static const char *
18859 shift_op (rtx op, HOST_WIDE_INT *amountp)
18861 const char * mnem;
18862 enum rtx_code code = GET_CODE (op);
18864 switch (code)
18866 case ROTATE:
18867 if (!CONST_INT_P (XEXP (op, 1)))
18869 output_operand_lossage ("invalid shift operand");
18870 return NULL;
18873 code = ROTATERT;
18874 *amountp = 32 - INTVAL (XEXP (op, 1));
18875 mnem = "ror";
18876 break;
18878 case ASHIFT:
18879 case ASHIFTRT:
18880 case LSHIFTRT:
18881 case ROTATERT:
18882 mnem = arm_shift_nmem(code);
18883 if (CONST_INT_P (XEXP (op, 1)))
18885 *amountp = INTVAL (XEXP (op, 1));
18887 else if (REG_P (XEXP (op, 1)))
18889 *amountp = -1;
18890 return mnem;
18892 else
18894 output_operand_lossage ("invalid shift operand");
18895 return NULL;
18897 break;
18899 case MULT:
18900 /* We never have to worry about the amount being other than a
18901 power of 2, since this case can never be reloaded from a reg. */
18902 if (!CONST_INT_P (XEXP (op, 1)))
18904 output_operand_lossage ("invalid shift operand");
18905 return NULL;
18908 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18910 /* Amount must be a power of two. */
18911 if (*amountp & (*amountp - 1))
18913 output_operand_lossage ("invalid shift operand");
18914 return NULL;
18917 *amountp = int_log2 (*amountp);
18918 return ARM_LSL_NAME;
18920 default:
18921 output_operand_lossage ("invalid shift operand");
18922 return NULL;
18925 /* This is not 100% correct, but follows from the desire to merge
18926 multiplication by a power of 2 with the recognizer for a
18927 shift. >=32 is not a valid shift for "lsl", so we must try and
18928 output a shift that produces the correct arithmetical result.
18929 Using lsr #32 is identical except for the fact that the carry bit
18930 is not set correctly if we set the flags; but we never use the
18931 carry bit from such an operation, so we can ignore that. */
18932 if (code == ROTATERT)
18933 /* Rotate is just modulo 32. */
18934 *amountp &= 31;
18935 else if (*amountp != (*amountp & 31))
18937 if (code == ASHIFT)
18938 mnem = "lsr";
18939 *amountp = 32;
18942 /* Shifts of 0 are no-ops. */
18943 if (*amountp == 0)
18944 return NULL;
18946 return mnem;
18949 /* Obtain the shift from the POWER of two. */
18951 static HOST_WIDE_INT
18952 int_log2 (HOST_WIDE_INT power)
18954 HOST_WIDE_INT shift = 0;
18956 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18958 gcc_assert (shift <= 31);
18959 shift++;
18962 return shift;
18965 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18966 because /bin/as is horribly restrictive. The judgement about
18967 whether or not each character is 'printable' (and can be output as
18968 is) or not (and must be printed with an octal escape) must be made
18969 with reference to the *host* character set -- the situation is
18970 similar to that discussed in the comments above pp_c_char in
18971 c-pretty-print.c. */
18973 #define MAX_ASCII_LEN 51
18975 void
18976 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18978 int i;
18979 int len_so_far = 0;
18981 fputs ("\t.ascii\t\"", stream);
18983 for (i = 0; i < len; i++)
18985 int c = p[i];
18987 if (len_so_far >= MAX_ASCII_LEN)
18989 fputs ("\"\n\t.ascii\t\"", stream);
18990 len_so_far = 0;
18993 if (ISPRINT (c))
18995 if (c == '\\' || c == '\"')
18997 putc ('\\', stream);
18998 len_so_far++;
19000 putc (c, stream);
19001 len_so_far++;
19003 else
19005 fprintf (stream, "\\%03o", c);
19006 len_so_far += 4;
19010 fputs ("\"\n", stream);
19013 /* Compute the register save mask for registers 0 through 12
19014 inclusive. This code is used by arm_compute_save_reg_mask. */
19016 static unsigned long
19017 arm_compute_save_reg0_reg12_mask (void)
19019 unsigned long func_type = arm_current_func_type ();
19020 unsigned long save_reg_mask = 0;
19021 unsigned int reg;
19023 if (IS_INTERRUPT (func_type))
19025 unsigned int max_reg;
19026 /* Interrupt functions must not corrupt any registers,
19027 even call clobbered ones. If this is a leaf function
19028 we can just examine the registers used by the RTL, but
19029 otherwise we have to assume that whatever function is
19030 called might clobber anything, and so we have to save
19031 all the call-clobbered registers as well. */
19032 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19033 /* FIQ handlers have registers r8 - r12 banked, so
19034 we only need to check r0 - r7, Normal ISRs only
19035 bank r14 and r15, so we must check up to r12.
19036 r13 is the stack pointer which is always preserved,
19037 so we do not need to consider it here. */
19038 max_reg = 7;
19039 else
19040 max_reg = 12;
19042 for (reg = 0; reg <= max_reg; reg++)
19043 if (df_regs_ever_live_p (reg)
19044 || (! crtl->is_leaf && call_used_regs[reg]))
19045 save_reg_mask |= (1 << reg);
19047 /* Also save the pic base register if necessary. */
19048 if (flag_pic
19049 && !TARGET_SINGLE_PIC_BASE
19050 && arm_pic_register != INVALID_REGNUM
19051 && crtl->uses_pic_offset_table)
19052 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19054 else if (IS_VOLATILE(func_type))
19056 /* For noreturn functions we historically omitted register saves
19057 altogether. However this really messes up debugging. As a
19058 compromise save just the frame pointers. Combined with the link
19059 register saved elsewhere this should be sufficient to get
19060 a backtrace. */
19061 if (frame_pointer_needed)
19062 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19063 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19064 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19065 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19066 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19068 else
19070 /* In the normal case we only need to save those registers
19071 which are call saved and which are used by this function. */
19072 for (reg = 0; reg <= 11; reg++)
19073 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19074 save_reg_mask |= (1 << reg);
19076 /* Handle the frame pointer as a special case. */
19077 if (frame_pointer_needed)
19078 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19080 /* If we aren't loading the PIC register,
19081 don't stack it even though it may be live. */
19082 if (flag_pic
19083 && !TARGET_SINGLE_PIC_BASE
19084 && arm_pic_register != INVALID_REGNUM
19085 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19086 || crtl->uses_pic_offset_table))
19087 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19089 /* The prologue will copy SP into R0, so save it. */
19090 if (IS_STACKALIGN (func_type))
19091 save_reg_mask |= 1;
19094 /* Save registers so the exception handler can modify them. */
19095 if (crtl->calls_eh_return)
19097 unsigned int i;
19099 for (i = 0; ; i++)
19101 reg = EH_RETURN_DATA_REGNO (i);
19102 if (reg == INVALID_REGNUM)
19103 break;
19104 save_reg_mask |= 1 << reg;
19108 return save_reg_mask;
19111 /* Return true if r3 is live at the start of the function. */
19113 static bool
19114 arm_r3_live_at_start_p (void)
19116 /* Just look at cfg info, which is still close enough to correct at this
19117 point. This gives false positives for broken functions that might use
19118 uninitialized data that happens to be allocated in r3, but who cares? */
19119 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19122 /* Compute the number of bytes used to store the static chain register on the
19123 stack, above the stack frame. We need to know this accurately to get the
19124 alignment of the rest of the stack frame correct. */
19126 static int
19127 arm_compute_static_chain_stack_bytes (void)
19129 /* See the defining assertion in arm_expand_prologue. */
19130 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19131 && IS_NESTED (arm_current_func_type ())
19132 && arm_r3_live_at_start_p ()
19133 && crtl->args.pretend_args_size == 0)
19134 return 4;
19136 return 0;
19139 /* Compute a bit mask of which registers need to be
19140 saved on the stack for the current function.
19141 This is used by arm_get_frame_offsets, which may add extra registers. */
19143 static unsigned long
19144 arm_compute_save_reg_mask (void)
19146 unsigned int save_reg_mask = 0;
19147 unsigned long func_type = arm_current_func_type ();
19148 unsigned int reg;
19150 if (IS_NAKED (func_type))
19151 /* This should never really happen. */
19152 return 0;
19154 /* If we are creating a stack frame, then we must save the frame pointer,
19155 IP (which will hold the old stack pointer), LR and the PC. */
19156 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19157 save_reg_mask |=
19158 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19159 | (1 << IP_REGNUM)
19160 | (1 << LR_REGNUM)
19161 | (1 << PC_REGNUM);
19163 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19165 /* Decide if we need to save the link register.
19166 Interrupt routines have their own banked link register,
19167 so they never need to save it.
19168 Otherwise if we do not use the link register we do not need to save
19169 it. If we are pushing other registers onto the stack however, we
19170 can save an instruction in the epilogue by pushing the link register
19171 now and then popping it back into the PC. This incurs extra memory
19172 accesses though, so we only do it when optimizing for size, and only
19173 if we know that we will not need a fancy return sequence. */
19174 if (df_regs_ever_live_p (LR_REGNUM)
19175 || (save_reg_mask
19176 && optimize_size
19177 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19178 && !crtl->calls_eh_return))
19179 save_reg_mask |= 1 << LR_REGNUM;
19181 if (cfun->machine->lr_save_eliminated)
19182 save_reg_mask &= ~ (1 << LR_REGNUM);
19184 if (TARGET_REALLY_IWMMXT
19185 && ((bit_count (save_reg_mask)
19186 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19187 arm_compute_static_chain_stack_bytes())
19188 ) % 2) != 0)
19190 /* The total number of registers that are going to be pushed
19191 onto the stack is odd. We need to ensure that the stack
19192 is 64-bit aligned before we start to save iWMMXt registers,
19193 and also before we start to create locals. (A local variable
19194 might be a double or long long which we will load/store using
19195 an iWMMXt instruction). Therefore we need to push another
19196 ARM register, so that the stack will be 64-bit aligned. We
19197 try to avoid using the arg registers (r0 -r3) as they might be
19198 used to pass values in a tail call. */
19199 for (reg = 4; reg <= 12; reg++)
19200 if ((save_reg_mask & (1 << reg)) == 0)
19201 break;
19203 if (reg <= 12)
19204 save_reg_mask |= (1 << reg);
19205 else
19207 cfun->machine->sibcall_blocked = 1;
19208 save_reg_mask |= (1 << 3);
19212 /* We may need to push an additional register for use initializing the
19213 PIC base register. */
19214 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19215 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19217 reg = thumb_find_work_register (1 << 4);
19218 if (!call_used_regs[reg])
19219 save_reg_mask |= (1 << reg);
19222 return save_reg_mask;
19226 /* Compute a bit mask of which registers need to be
19227 saved on the stack for the current function. */
19228 static unsigned long
19229 thumb1_compute_save_reg_mask (void)
19231 unsigned long mask;
19232 unsigned reg;
19234 mask = 0;
19235 for (reg = 0; reg < 12; reg ++)
19236 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19237 mask |= 1 << reg;
19239 if (flag_pic
19240 && !TARGET_SINGLE_PIC_BASE
19241 && arm_pic_register != INVALID_REGNUM
19242 && crtl->uses_pic_offset_table)
19243 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19245 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19246 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19247 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19249 /* LR will also be pushed if any lo regs are pushed. */
19250 if (mask & 0xff || thumb_force_lr_save ())
19251 mask |= (1 << LR_REGNUM);
19253 /* Make sure we have a low work register if we need one.
19254 We will need one if we are going to push a high register,
19255 but we are not currently intending to push a low register. */
19256 if ((mask & 0xff) == 0
19257 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19259 /* Use thumb_find_work_register to choose which register
19260 we will use. If the register is live then we will
19261 have to push it. Use LAST_LO_REGNUM as our fallback
19262 choice for the register to select. */
19263 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19264 /* Make sure the register returned by thumb_find_work_register is
19265 not part of the return value. */
19266 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19267 reg = LAST_LO_REGNUM;
19269 if (! call_used_regs[reg])
19270 mask |= 1 << reg;
19273 /* The 504 below is 8 bytes less than 512 because there are two possible
19274 alignment words. We can't tell here if they will be present or not so we
19275 have to play it safe and assume that they are. */
19276 if ((CALLER_INTERWORKING_SLOT_SIZE +
19277 ROUND_UP_WORD (get_frame_size ()) +
19278 crtl->outgoing_args_size) >= 504)
19280 /* This is the same as the code in thumb1_expand_prologue() which
19281 determines which register to use for stack decrement. */
19282 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19283 if (mask & (1 << reg))
19284 break;
19286 if (reg > LAST_LO_REGNUM)
19288 /* Make sure we have a register available for stack decrement. */
19289 mask |= 1 << LAST_LO_REGNUM;
19293 return mask;
19297 /* Return the number of bytes required to save VFP registers. */
19298 static int
19299 arm_get_vfp_saved_size (void)
19301 unsigned int regno;
19302 int count;
19303 int saved;
19305 saved = 0;
19306 /* Space for saved VFP registers. */
19307 if (TARGET_HARD_FLOAT && TARGET_VFP)
19309 count = 0;
19310 for (regno = FIRST_VFP_REGNUM;
19311 regno < LAST_VFP_REGNUM;
19312 regno += 2)
19314 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19315 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19317 if (count > 0)
19319 /* Workaround ARM10 VFPr1 bug. */
19320 if (count == 2 && !arm_arch6)
19321 count++;
19322 saved += count * 8;
19324 count = 0;
19326 else
19327 count++;
19329 if (count > 0)
19331 if (count == 2 && !arm_arch6)
19332 count++;
19333 saved += count * 8;
19336 return saved;
19340 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19341 everything bar the final return instruction. If simple_return is true,
19342 then do not output epilogue, because it has already been emitted in RTL. */
19343 const char *
19344 output_return_instruction (rtx operand, bool really_return, bool reverse,
19345 bool simple_return)
19347 char conditional[10];
19348 char instr[100];
19349 unsigned reg;
19350 unsigned long live_regs_mask;
19351 unsigned long func_type;
19352 arm_stack_offsets *offsets;
19354 func_type = arm_current_func_type ();
19356 if (IS_NAKED (func_type))
19357 return "";
19359 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19361 /* If this function was declared non-returning, and we have
19362 found a tail call, then we have to trust that the called
19363 function won't return. */
19364 if (really_return)
19366 rtx ops[2];
19368 /* Otherwise, trap an attempted return by aborting. */
19369 ops[0] = operand;
19370 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19371 : "abort");
19372 assemble_external_libcall (ops[1]);
19373 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19376 return "";
19379 gcc_assert (!cfun->calls_alloca || really_return);
19381 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19383 cfun->machine->return_used_this_function = 1;
19385 offsets = arm_get_frame_offsets ();
19386 live_regs_mask = offsets->saved_regs_mask;
19388 if (!simple_return && live_regs_mask)
19390 const char * return_reg;
19392 /* If we do not have any special requirements for function exit
19393 (e.g. interworking) then we can load the return address
19394 directly into the PC. Otherwise we must load it into LR. */
19395 if (really_return
19396 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19397 return_reg = reg_names[PC_REGNUM];
19398 else
19399 return_reg = reg_names[LR_REGNUM];
19401 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19403 /* There are three possible reasons for the IP register
19404 being saved. 1) a stack frame was created, in which case
19405 IP contains the old stack pointer, or 2) an ISR routine
19406 corrupted it, or 3) it was saved to align the stack on
19407 iWMMXt. In case 1, restore IP into SP, otherwise just
19408 restore IP. */
19409 if (frame_pointer_needed)
19411 live_regs_mask &= ~ (1 << IP_REGNUM);
19412 live_regs_mask |= (1 << SP_REGNUM);
19414 else
19415 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19418 /* On some ARM architectures it is faster to use LDR rather than
19419 LDM to load a single register. On other architectures, the
19420 cost is the same. In 26 bit mode, or for exception handlers,
19421 we have to use LDM to load the PC so that the CPSR is also
19422 restored. */
19423 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19424 if (live_regs_mask == (1U << reg))
19425 break;
19427 if (reg <= LAST_ARM_REGNUM
19428 && (reg != LR_REGNUM
19429 || ! really_return
19430 || ! IS_INTERRUPT (func_type)))
19432 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19433 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19435 else
19437 char *p;
19438 int first = 1;
19440 /* Generate the load multiple instruction to restore the
19441 registers. Note we can get here, even if
19442 frame_pointer_needed is true, but only if sp already
19443 points to the base of the saved core registers. */
19444 if (live_regs_mask & (1 << SP_REGNUM))
19446 unsigned HOST_WIDE_INT stack_adjust;
19448 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19449 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19451 if (stack_adjust && arm_arch5 && TARGET_ARM)
19452 if (TARGET_UNIFIED_ASM)
19453 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19454 else
19455 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19456 else
19458 /* If we can't use ldmib (SA110 bug),
19459 then try to pop r3 instead. */
19460 if (stack_adjust)
19461 live_regs_mask |= 1 << 3;
19463 if (TARGET_UNIFIED_ASM)
19464 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19465 else
19466 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19469 else
19470 if (TARGET_UNIFIED_ASM)
19471 sprintf (instr, "pop%s\t{", conditional);
19472 else
19473 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19475 p = instr + strlen (instr);
19477 for (reg = 0; reg <= SP_REGNUM; reg++)
19478 if (live_regs_mask & (1 << reg))
19480 int l = strlen (reg_names[reg]);
19482 if (first)
19483 first = 0;
19484 else
19486 memcpy (p, ", ", 2);
19487 p += 2;
19490 memcpy (p, "%|", 2);
19491 memcpy (p + 2, reg_names[reg], l);
19492 p += l + 2;
19495 if (live_regs_mask & (1 << LR_REGNUM))
19497 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19498 /* If returning from an interrupt, restore the CPSR. */
19499 if (IS_INTERRUPT (func_type))
19500 strcat (p, "^");
19502 else
19503 strcpy (p, "}");
19506 output_asm_insn (instr, & operand);
19508 /* See if we need to generate an extra instruction to
19509 perform the actual function return. */
19510 if (really_return
19511 && func_type != ARM_FT_INTERWORKED
19512 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19514 /* The return has already been handled
19515 by loading the LR into the PC. */
19516 return "";
19520 if (really_return)
19522 switch ((int) ARM_FUNC_TYPE (func_type))
19524 case ARM_FT_ISR:
19525 case ARM_FT_FIQ:
19526 /* ??? This is wrong for unified assembly syntax. */
19527 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19528 break;
19530 case ARM_FT_INTERWORKED:
19531 sprintf (instr, "bx%s\t%%|lr", conditional);
19532 break;
19534 case ARM_FT_EXCEPTION:
19535 /* ??? This is wrong for unified assembly syntax. */
19536 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19537 break;
19539 default:
19540 /* Use bx if it's available. */
19541 if (arm_arch5 || arm_arch4t)
19542 sprintf (instr, "bx%s\t%%|lr", conditional);
19543 else
19544 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19545 break;
19548 output_asm_insn (instr, & operand);
19551 return "";
19554 /* Write the function name into the code section, directly preceding
19555 the function prologue.
19557 Code will be output similar to this:
19559 .ascii "arm_poke_function_name", 0
19560 .align
19562 .word 0xff000000 + (t1 - t0)
19563 arm_poke_function_name
19564 mov ip, sp
19565 stmfd sp!, {fp, ip, lr, pc}
19566 sub fp, ip, #4
19568 When performing a stack backtrace, code can inspect the value
19569 of 'pc' stored at 'fp' + 0. If the trace function then looks
19570 at location pc - 12 and the top 8 bits are set, then we know
19571 that there is a function name embedded immediately preceding this
19572 location and has length ((pc[-3]) & 0xff000000).
19574 We assume that pc is declared as a pointer to an unsigned long.
19576 It is of no benefit to output the function name if we are assembling
19577 a leaf function. These function types will not contain a stack
19578 backtrace structure, therefore it is not possible to determine the
19579 function name. */
19580 void
19581 arm_poke_function_name (FILE *stream, const char *name)
19583 unsigned long alignlength;
19584 unsigned long length;
19585 rtx x;
19587 length = strlen (name) + 1;
19588 alignlength = ROUND_UP_WORD (length);
19590 ASM_OUTPUT_ASCII (stream, name, length);
19591 ASM_OUTPUT_ALIGN (stream, 2);
19592 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19593 assemble_aligned_integer (UNITS_PER_WORD, x);
19596 /* Place some comments into the assembler stream
19597 describing the current function. */
19598 static void
19599 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19601 unsigned long func_type;
19603 /* ??? Do we want to print some of the below anyway? */
19604 if (TARGET_THUMB1)
19605 return;
19607 /* Sanity check. */
19608 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19610 func_type = arm_current_func_type ();
19612 switch ((int) ARM_FUNC_TYPE (func_type))
19614 default:
19615 case ARM_FT_NORMAL:
19616 break;
19617 case ARM_FT_INTERWORKED:
19618 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19619 break;
19620 case ARM_FT_ISR:
19621 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19622 break;
19623 case ARM_FT_FIQ:
19624 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19625 break;
19626 case ARM_FT_EXCEPTION:
19627 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19628 break;
19631 if (IS_NAKED (func_type))
19632 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19634 if (IS_VOLATILE (func_type))
19635 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19637 if (IS_NESTED (func_type))
19638 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19639 if (IS_STACKALIGN (func_type))
19640 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19642 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19643 crtl->args.size,
19644 crtl->args.pretend_args_size, frame_size);
19646 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19647 frame_pointer_needed,
19648 cfun->machine->uses_anonymous_args);
19650 if (cfun->machine->lr_save_eliminated)
19651 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19653 if (crtl->calls_eh_return)
19654 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19658 static void
19659 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19660 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19662 arm_stack_offsets *offsets;
19664 if (TARGET_THUMB1)
19666 int regno;
19668 /* Emit any call-via-reg trampolines that are needed for v4t support
19669 of call_reg and call_value_reg type insns. */
19670 for (regno = 0; regno < LR_REGNUM; regno++)
19672 rtx label = cfun->machine->call_via[regno];
19674 if (label != NULL)
19676 switch_to_section (function_section (current_function_decl));
19677 targetm.asm_out.internal_label (asm_out_file, "L",
19678 CODE_LABEL_NUMBER (label));
19679 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19683 /* ??? Probably not safe to set this here, since it assumes that a
19684 function will be emitted as assembly immediately after we generate
19685 RTL for it. This does not happen for inline functions. */
19686 cfun->machine->return_used_this_function = 0;
19688 else /* TARGET_32BIT */
19690 /* We need to take into account any stack-frame rounding. */
19691 offsets = arm_get_frame_offsets ();
19693 gcc_assert (!use_return_insn (FALSE, NULL)
19694 || (cfun->machine->return_used_this_function != 0)
19695 || offsets->saved_regs == offsets->outgoing_args
19696 || frame_pointer_needed);
19700 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19701 STR and STRD. If an even number of registers are being pushed, one
19702 or more STRD patterns are created for each register pair. If an
19703 odd number of registers are pushed, emit an initial STR followed by
19704 as many STRD instructions as are needed. This works best when the
19705 stack is initially 64-bit aligned (the normal case), since it
19706 ensures that each STRD is also 64-bit aligned. */
19707 static void
19708 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19710 int num_regs = 0;
19711 int i;
19712 int regno;
19713 rtx par = NULL_RTX;
19714 rtx dwarf = NULL_RTX;
19715 rtx tmp;
19716 bool first = true;
19718 num_regs = bit_count (saved_regs_mask);
19720 /* Must be at least one register to save, and can't save SP or PC. */
19721 gcc_assert (num_regs > 0 && num_regs <= 14);
19722 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19723 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19725 /* Create sequence for DWARF info. All the frame-related data for
19726 debugging is held in this wrapper. */
19727 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19729 /* Describe the stack adjustment. */
19730 tmp = gen_rtx_SET (VOIDmode,
19731 stack_pointer_rtx,
19732 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19733 RTX_FRAME_RELATED_P (tmp) = 1;
19734 XVECEXP (dwarf, 0, 0) = tmp;
19736 /* Find the first register. */
19737 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19740 i = 0;
19742 /* If there's an odd number of registers to push. Start off by
19743 pushing a single register. This ensures that subsequent strd
19744 operations are dword aligned (assuming that SP was originally
19745 64-bit aligned). */
19746 if ((num_regs & 1) != 0)
19748 rtx reg, mem, insn;
19750 reg = gen_rtx_REG (SImode, regno);
19751 if (num_regs == 1)
19752 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19753 stack_pointer_rtx));
19754 else
19755 mem = gen_frame_mem (Pmode,
19756 gen_rtx_PRE_MODIFY
19757 (Pmode, stack_pointer_rtx,
19758 plus_constant (Pmode, stack_pointer_rtx,
19759 -4 * num_regs)));
19761 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19762 RTX_FRAME_RELATED_P (tmp) = 1;
19763 insn = emit_insn (tmp);
19764 RTX_FRAME_RELATED_P (insn) = 1;
19765 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19766 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19767 reg);
19768 RTX_FRAME_RELATED_P (tmp) = 1;
19769 i++;
19770 regno++;
19771 XVECEXP (dwarf, 0, i) = tmp;
19772 first = false;
19775 while (i < num_regs)
19776 if (saved_regs_mask & (1 << regno))
19778 rtx reg1, reg2, mem1, mem2;
19779 rtx tmp0, tmp1, tmp2;
19780 int regno2;
19782 /* Find the register to pair with this one. */
19783 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19784 regno2++)
19787 reg1 = gen_rtx_REG (SImode, regno);
19788 reg2 = gen_rtx_REG (SImode, regno2);
19790 if (first)
19792 rtx insn;
19794 first = false;
19795 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19796 stack_pointer_rtx,
19797 -4 * num_regs));
19798 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19799 stack_pointer_rtx,
19800 -4 * (num_regs - 1)));
19801 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19802 plus_constant (Pmode, stack_pointer_rtx,
19803 -4 * (num_regs)));
19804 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19805 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19806 RTX_FRAME_RELATED_P (tmp0) = 1;
19807 RTX_FRAME_RELATED_P (tmp1) = 1;
19808 RTX_FRAME_RELATED_P (tmp2) = 1;
19809 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19810 XVECEXP (par, 0, 0) = tmp0;
19811 XVECEXP (par, 0, 1) = tmp1;
19812 XVECEXP (par, 0, 2) = tmp2;
19813 insn = emit_insn (par);
19814 RTX_FRAME_RELATED_P (insn) = 1;
19815 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19817 else
19819 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19820 stack_pointer_rtx,
19821 4 * i));
19822 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19823 stack_pointer_rtx,
19824 4 * (i + 1)));
19825 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19826 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19827 RTX_FRAME_RELATED_P (tmp1) = 1;
19828 RTX_FRAME_RELATED_P (tmp2) = 1;
19829 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19830 XVECEXP (par, 0, 0) = tmp1;
19831 XVECEXP (par, 0, 1) = tmp2;
19832 emit_insn (par);
19835 /* Create unwind information. This is an approximation. */
19836 tmp1 = gen_rtx_SET (VOIDmode,
19837 gen_frame_mem (Pmode,
19838 plus_constant (Pmode,
19839 stack_pointer_rtx,
19840 4 * i)),
19841 reg1);
19842 tmp2 = gen_rtx_SET (VOIDmode,
19843 gen_frame_mem (Pmode,
19844 plus_constant (Pmode,
19845 stack_pointer_rtx,
19846 4 * (i + 1))),
19847 reg2);
19849 RTX_FRAME_RELATED_P (tmp1) = 1;
19850 RTX_FRAME_RELATED_P (tmp2) = 1;
19851 XVECEXP (dwarf, 0, i + 1) = tmp1;
19852 XVECEXP (dwarf, 0, i + 2) = tmp2;
19853 i += 2;
19854 regno = regno2 + 1;
19856 else
19857 regno++;
19859 return;
19862 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19863 whenever possible, otherwise it emits single-word stores. The first store
19864 also allocates stack space for all saved registers, using writeback with
19865 post-addressing mode. All other stores use offset addressing. If no STRD
19866 can be emitted, this function emits a sequence of single-word stores,
19867 and not an STM as before, because single-word stores provide more freedom
19868 scheduling and can be turned into an STM by peephole optimizations. */
19869 static void
19870 arm_emit_strd_push (unsigned long saved_regs_mask)
19872 int num_regs = 0;
19873 int i, j, dwarf_index = 0;
19874 int offset = 0;
19875 rtx dwarf = NULL_RTX;
19876 rtx insn = NULL_RTX;
19877 rtx tmp, mem;
19879 /* TODO: A more efficient code can be emitted by changing the
19880 layout, e.g., first push all pairs that can use STRD to keep the
19881 stack aligned, and then push all other registers. */
19882 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19883 if (saved_regs_mask & (1 << i))
19884 num_regs++;
19886 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19887 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19888 gcc_assert (num_regs > 0);
19890 /* Create sequence for DWARF info. */
19891 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19893 /* For dwarf info, we generate explicit stack update. */
19894 tmp = gen_rtx_SET (VOIDmode,
19895 stack_pointer_rtx,
19896 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19897 RTX_FRAME_RELATED_P (tmp) = 1;
19898 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19900 /* Save registers. */
19901 offset = - 4 * num_regs;
19902 j = 0;
19903 while (j <= LAST_ARM_REGNUM)
19904 if (saved_regs_mask & (1 << j))
19906 if ((j % 2 == 0)
19907 && (saved_regs_mask & (1 << (j + 1))))
19909 /* Current register and previous register form register pair for
19910 which STRD can be generated. */
19911 if (offset < 0)
19913 /* Allocate stack space for all saved registers. */
19914 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19915 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19916 mem = gen_frame_mem (DImode, tmp);
19917 offset = 0;
19919 else if (offset > 0)
19920 mem = gen_frame_mem (DImode,
19921 plus_constant (Pmode,
19922 stack_pointer_rtx,
19923 offset));
19924 else
19925 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19927 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19928 RTX_FRAME_RELATED_P (tmp) = 1;
19929 tmp = emit_insn (tmp);
19931 /* Record the first store insn. */
19932 if (dwarf_index == 1)
19933 insn = tmp;
19935 /* Generate dwarf info. */
19936 mem = gen_frame_mem (SImode,
19937 plus_constant (Pmode,
19938 stack_pointer_rtx,
19939 offset));
19940 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19941 RTX_FRAME_RELATED_P (tmp) = 1;
19942 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19944 mem = gen_frame_mem (SImode,
19945 plus_constant (Pmode,
19946 stack_pointer_rtx,
19947 offset + 4));
19948 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19949 RTX_FRAME_RELATED_P (tmp) = 1;
19950 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19952 offset += 8;
19953 j += 2;
19955 else
19957 /* Emit a single word store. */
19958 if (offset < 0)
19960 /* Allocate stack space for all saved registers. */
19961 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19962 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19963 mem = gen_frame_mem (SImode, tmp);
19964 offset = 0;
19966 else if (offset > 0)
19967 mem = gen_frame_mem (SImode,
19968 plus_constant (Pmode,
19969 stack_pointer_rtx,
19970 offset));
19971 else
19972 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19974 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19975 RTX_FRAME_RELATED_P (tmp) = 1;
19976 tmp = emit_insn (tmp);
19978 /* Record the first store insn. */
19979 if (dwarf_index == 1)
19980 insn = tmp;
19982 /* Generate dwarf info. */
19983 mem = gen_frame_mem (SImode,
19984 plus_constant(Pmode,
19985 stack_pointer_rtx,
19986 offset));
19987 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19988 RTX_FRAME_RELATED_P (tmp) = 1;
19989 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19991 offset += 4;
19992 j += 1;
19995 else
19996 j++;
19998 /* Attach dwarf info to the first insn we generate. */
19999 gcc_assert (insn != NULL_RTX);
20000 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20001 RTX_FRAME_RELATED_P (insn) = 1;
20004 /* Generate and emit an insn that we will recognize as a push_multi.
20005 Unfortunately, since this insn does not reflect very well the actual
20006 semantics of the operation, we need to annotate the insn for the benefit
20007 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20008 MASK for registers that should be annotated for DWARF2 frame unwind
20009 information. */
20010 static rtx
20011 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20013 int num_regs = 0;
20014 int num_dwarf_regs = 0;
20015 int i, j;
20016 rtx par;
20017 rtx dwarf;
20018 int dwarf_par_index;
20019 rtx tmp, reg;
20021 /* We don't record the PC in the dwarf frame information. */
20022 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20024 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20026 if (mask & (1 << i))
20027 num_regs++;
20028 if (dwarf_regs_mask & (1 << i))
20029 num_dwarf_regs++;
20032 gcc_assert (num_regs && num_regs <= 16);
20033 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20035 /* For the body of the insn we are going to generate an UNSPEC in
20036 parallel with several USEs. This allows the insn to be recognized
20037 by the push_multi pattern in the arm.md file.
20039 The body of the insn looks something like this:
20041 (parallel [
20042 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20043 (const_int:SI <num>)))
20044 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20045 (use (reg:SI XX))
20046 (use (reg:SI YY))
20050 For the frame note however, we try to be more explicit and actually
20051 show each register being stored into the stack frame, plus a (single)
20052 decrement of the stack pointer. We do it this way in order to be
20053 friendly to the stack unwinding code, which only wants to see a single
20054 stack decrement per instruction. The RTL we generate for the note looks
20055 something like this:
20057 (sequence [
20058 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20059 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20060 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20061 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20065 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20066 instead we'd have a parallel expression detailing all
20067 the stores to the various memory addresses so that debug
20068 information is more up-to-date. Remember however while writing
20069 this to take care of the constraints with the push instruction.
20071 Note also that this has to be taken care of for the VFP registers.
20073 For more see PR43399. */
20075 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20076 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20077 dwarf_par_index = 1;
20079 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20081 if (mask & (1 << i))
20083 reg = gen_rtx_REG (SImode, i);
20085 XVECEXP (par, 0, 0)
20086 = gen_rtx_SET (VOIDmode,
20087 gen_frame_mem
20088 (BLKmode,
20089 gen_rtx_PRE_MODIFY (Pmode,
20090 stack_pointer_rtx,
20091 plus_constant
20092 (Pmode, stack_pointer_rtx,
20093 -4 * num_regs))
20095 gen_rtx_UNSPEC (BLKmode,
20096 gen_rtvec (1, reg),
20097 UNSPEC_PUSH_MULT));
20099 if (dwarf_regs_mask & (1 << i))
20101 tmp = gen_rtx_SET (VOIDmode,
20102 gen_frame_mem (SImode, stack_pointer_rtx),
20103 reg);
20104 RTX_FRAME_RELATED_P (tmp) = 1;
20105 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20108 break;
20112 for (j = 1, i++; j < num_regs; i++)
20114 if (mask & (1 << i))
20116 reg = gen_rtx_REG (SImode, i);
20118 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20120 if (dwarf_regs_mask & (1 << i))
20123 = gen_rtx_SET (VOIDmode,
20124 gen_frame_mem
20125 (SImode,
20126 plus_constant (Pmode, stack_pointer_rtx,
20127 4 * j)),
20128 reg);
20129 RTX_FRAME_RELATED_P (tmp) = 1;
20130 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20133 j++;
20137 par = emit_insn (par);
20139 tmp = gen_rtx_SET (VOIDmode,
20140 stack_pointer_rtx,
20141 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20142 RTX_FRAME_RELATED_P (tmp) = 1;
20143 XVECEXP (dwarf, 0, 0) = tmp;
20145 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20147 return par;
20150 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20151 SIZE is the offset to be adjusted.
20152 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20153 static void
20154 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20156 rtx dwarf;
20158 RTX_FRAME_RELATED_P (insn) = 1;
20159 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20160 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20163 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20164 SAVED_REGS_MASK shows which registers need to be restored.
20166 Unfortunately, since this insn does not reflect very well the actual
20167 semantics of the operation, we need to annotate the insn for the benefit
20168 of DWARF2 frame unwind information. */
20169 static void
20170 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20172 int num_regs = 0;
20173 int i, j;
20174 rtx par;
20175 rtx dwarf = NULL_RTX;
20176 rtx tmp, reg;
20177 bool return_in_pc;
20178 int offset_adj;
20179 int emit_update;
20181 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20182 offset_adj = return_in_pc ? 1 : 0;
20183 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20184 if (saved_regs_mask & (1 << i))
20185 num_regs++;
20187 gcc_assert (num_regs && num_regs <= 16);
20189 /* If SP is in reglist, then we don't emit SP update insn. */
20190 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20192 /* The parallel needs to hold num_regs SETs
20193 and one SET for the stack update. */
20194 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20196 if (return_in_pc)
20198 tmp = ret_rtx;
20199 XVECEXP (par, 0, 0) = tmp;
20202 if (emit_update)
20204 /* Increment the stack pointer, based on there being
20205 num_regs 4-byte registers to restore. */
20206 tmp = gen_rtx_SET (VOIDmode,
20207 stack_pointer_rtx,
20208 plus_constant (Pmode,
20209 stack_pointer_rtx,
20210 4 * num_regs));
20211 RTX_FRAME_RELATED_P (tmp) = 1;
20212 XVECEXP (par, 0, offset_adj) = tmp;
20215 /* Now restore every reg, which may include PC. */
20216 for (j = 0, i = 0; j < num_regs; i++)
20217 if (saved_regs_mask & (1 << i))
20219 reg = gen_rtx_REG (SImode, i);
20220 if ((num_regs == 1) && emit_update && !return_in_pc)
20222 /* Emit single load with writeback. */
20223 tmp = gen_frame_mem (SImode,
20224 gen_rtx_POST_INC (Pmode,
20225 stack_pointer_rtx));
20226 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20227 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20228 return;
20231 tmp = gen_rtx_SET (VOIDmode,
20232 reg,
20233 gen_frame_mem
20234 (SImode,
20235 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20236 RTX_FRAME_RELATED_P (tmp) = 1;
20237 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20239 /* We need to maintain a sequence for DWARF info too. As dwarf info
20240 should not have PC, skip PC. */
20241 if (i != PC_REGNUM)
20242 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20244 j++;
20247 if (return_in_pc)
20248 par = emit_jump_insn (par);
20249 else
20250 par = emit_insn (par);
20252 REG_NOTES (par) = dwarf;
20253 if (!return_in_pc)
20254 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20255 stack_pointer_rtx, stack_pointer_rtx);
20258 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20259 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20261 Unfortunately, since this insn does not reflect very well the actual
20262 semantics of the operation, we need to annotate the insn for the benefit
20263 of DWARF2 frame unwind information. */
20264 static void
20265 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20267 int i, j;
20268 rtx par;
20269 rtx dwarf = NULL_RTX;
20270 rtx tmp, reg;
20272 gcc_assert (num_regs && num_regs <= 32);
20274 /* Workaround ARM10 VFPr1 bug. */
20275 if (num_regs == 2 && !arm_arch6)
20277 if (first_reg == 15)
20278 first_reg--;
20280 num_regs++;
20283 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20284 there could be up to 32 D-registers to restore.
20285 If there are more than 16 D-registers, make two recursive calls,
20286 each of which emits one pop_multi instruction. */
20287 if (num_regs > 16)
20289 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20290 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20291 return;
20294 /* The parallel needs to hold num_regs SETs
20295 and one SET for the stack update. */
20296 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20298 /* Increment the stack pointer, based on there being
20299 num_regs 8-byte registers to restore. */
20300 tmp = gen_rtx_SET (VOIDmode,
20301 base_reg,
20302 plus_constant (Pmode, base_reg, 8 * num_regs));
20303 RTX_FRAME_RELATED_P (tmp) = 1;
20304 XVECEXP (par, 0, 0) = tmp;
20306 /* Now show every reg that will be restored, using a SET for each. */
20307 for (j = 0, i=first_reg; j < num_regs; i += 2)
20309 reg = gen_rtx_REG (DFmode, i);
20311 tmp = gen_rtx_SET (VOIDmode,
20312 reg,
20313 gen_frame_mem
20314 (DFmode,
20315 plus_constant (Pmode, base_reg, 8 * j)));
20316 RTX_FRAME_RELATED_P (tmp) = 1;
20317 XVECEXP (par, 0, j + 1) = tmp;
20319 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20321 j++;
20324 par = emit_insn (par);
20325 REG_NOTES (par) = dwarf;
20327 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20328 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20330 RTX_FRAME_RELATED_P (par) = 1;
20331 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20333 else
20334 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20335 base_reg, base_reg);
20338 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20339 number of registers are being popped, multiple LDRD patterns are created for
20340 all register pairs. If odd number of registers are popped, last register is
20341 loaded by using LDR pattern. */
20342 static void
20343 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20345 int num_regs = 0;
20346 int i, j;
20347 rtx par = NULL_RTX;
20348 rtx dwarf = NULL_RTX;
20349 rtx tmp, reg, tmp1;
20350 bool return_in_pc;
20352 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20353 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20354 if (saved_regs_mask & (1 << i))
20355 num_regs++;
20357 gcc_assert (num_regs && num_regs <= 16);
20359 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20360 to be popped. So, if num_regs is even, now it will become odd,
20361 and we can generate pop with PC. If num_regs is odd, it will be
20362 even now, and ldr with return can be generated for PC. */
20363 if (return_in_pc)
20364 num_regs--;
20366 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20368 /* Var j iterates over all the registers to gather all the registers in
20369 saved_regs_mask. Var i gives index of saved registers in stack frame.
20370 A PARALLEL RTX of register-pair is created here, so that pattern for
20371 LDRD can be matched. As PC is always last register to be popped, and
20372 we have already decremented num_regs if PC, we don't have to worry
20373 about PC in this loop. */
20374 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20375 if (saved_regs_mask & (1 << j))
20377 /* Create RTX for memory load. */
20378 reg = gen_rtx_REG (SImode, j);
20379 tmp = gen_rtx_SET (SImode,
20380 reg,
20381 gen_frame_mem (SImode,
20382 plus_constant (Pmode,
20383 stack_pointer_rtx, 4 * i)));
20384 RTX_FRAME_RELATED_P (tmp) = 1;
20386 if (i % 2 == 0)
20388 /* When saved-register index (i) is even, the RTX to be emitted is
20389 yet to be created. Hence create it first. The LDRD pattern we
20390 are generating is :
20391 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20392 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20393 where target registers need not be consecutive. */
20394 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20395 dwarf = NULL_RTX;
20398 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20399 added as 0th element and if i is odd, reg_i is added as 1st element
20400 of LDRD pattern shown above. */
20401 XVECEXP (par, 0, (i % 2)) = tmp;
20402 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20404 if ((i % 2) == 1)
20406 /* When saved-register index (i) is odd, RTXs for both the registers
20407 to be loaded are generated in above given LDRD pattern, and the
20408 pattern can be emitted now. */
20409 par = emit_insn (par);
20410 REG_NOTES (par) = dwarf;
20411 RTX_FRAME_RELATED_P (par) = 1;
20414 i++;
20417 /* If the number of registers pushed is odd AND return_in_pc is false OR
20418 number of registers are even AND return_in_pc is true, last register is
20419 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20420 then LDR with post increment. */
20422 /* Increment the stack pointer, based on there being
20423 num_regs 4-byte registers to restore. */
20424 tmp = gen_rtx_SET (VOIDmode,
20425 stack_pointer_rtx,
20426 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20427 RTX_FRAME_RELATED_P (tmp) = 1;
20428 tmp = emit_insn (tmp);
20429 if (!return_in_pc)
20431 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20432 stack_pointer_rtx, stack_pointer_rtx);
20435 dwarf = NULL_RTX;
20437 if (((num_regs % 2) == 1 && !return_in_pc)
20438 || ((num_regs % 2) == 0 && return_in_pc))
20440 /* Scan for the single register to be popped. Skip until the saved
20441 register is found. */
20442 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20444 /* Gen LDR with post increment here. */
20445 tmp1 = gen_rtx_MEM (SImode,
20446 gen_rtx_POST_INC (SImode,
20447 stack_pointer_rtx));
20448 set_mem_alias_set (tmp1, get_frame_alias_set ());
20450 reg = gen_rtx_REG (SImode, j);
20451 tmp = gen_rtx_SET (SImode, reg, tmp1);
20452 RTX_FRAME_RELATED_P (tmp) = 1;
20453 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20455 if (return_in_pc)
20457 /* If return_in_pc, j must be PC_REGNUM. */
20458 gcc_assert (j == PC_REGNUM);
20459 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20460 XVECEXP (par, 0, 0) = ret_rtx;
20461 XVECEXP (par, 0, 1) = tmp;
20462 par = emit_jump_insn (par);
20464 else
20466 par = emit_insn (tmp);
20467 REG_NOTES (par) = dwarf;
20468 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20469 stack_pointer_rtx, stack_pointer_rtx);
20473 else if ((num_regs % 2) == 1 && return_in_pc)
20475 /* There are 2 registers to be popped. So, generate the pattern
20476 pop_multiple_with_stack_update_and_return to pop in PC. */
20477 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20480 return;
20483 /* LDRD in ARM mode needs consecutive registers as operands. This function
20484 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20485 offset addressing and then generates one separate stack udpate. This provides
20486 more scheduling freedom, compared to writeback on every load. However,
20487 if the function returns using load into PC directly
20488 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20489 before the last load. TODO: Add a peephole optimization to recognize
20490 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20491 peephole optimization to merge the load at stack-offset zero
20492 with the stack update instruction using load with writeback
20493 in post-index addressing mode. */
20494 static void
20495 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20497 int j = 0;
20498 int offset = 0;
20499 rtx par = NULL_RTX;
20500 rtx dwarf = NULL_RTX;
20501 rtx tmp, mem;
20503 /* Restore saved registers. */
20504 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20505 j = 0;
20506 while (j <= LAST_ARM_REGNUM)
20507 if (saved_regs_mask & (1 << j))
20509 if ((j % 2) == 0
20510 && (saved_regs_mask & (1 << (j + 1)))
20511 && (j + 1) != PC_REGNUM)
20513 /* Current register and next register form register pair for which
20514 LDRD can be generated. PC is always the last register popped, and
20515 we handle it separately. */
20516 if (offset > 0)
20517 mem = gen_frame_mem (DImode,
20518 plus_constant (Pmode,
20519 stack_pointer_rtx,
20520 offset));
20521 else
20522 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20524 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20525 tmp = emit_insn (tmp);
20526 RTX_FRAME_RELATED_P (tmp) = 1;
20528 /* Generate dwarf info. */
20530 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20531 gen_rtx_REG (SImode, j),
20532 NULL_RTX);
20533 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20534 gen_rtx_REG (SImode, j + 1),
20535 dwarf);
20537 REG_NOTES (tmp) = dwarf;
20539 offset += 8;
20540 j += 2;
20542 else if (j != PC_REGNUM)
20544 /* Emit a single word load. */
20545 if (offset > 0)
20546 mem = gen_frame_mem (SImode,
20547 plus_constant (Pmode,
20548 stack_pointer_rtx,
20549 offset));
20550 else
20551 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20553 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20554 tmp = emit_insn (tmp);
20555 RTX_FRAME_RELATED_P (tmp) = 1;
20557 /* Generate dwarf info. */
20558 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20559 gen_rtx_REG (SImode, j),
20560 NULL_RTX);
20562 offset += 4;
20563 j += 1;
20565 else /* j == PC_REGNUM */
20566 j++;
20568 else
20569 j++;
20571 /* Update the stack. */
20572 if (offset > 0)
20574 tmp = gen_rtx_SET (Pmode,
20575 stack_pointer_rtx,
20576 plus_constant (Pmode,
20577 stack_pointer_rtx,
20578 offset));
20579 tmp = emit_insn (tmp);
20580 arm_add_cfa_adjust_cfa_note (tmp, offset,
20581 stack_pointer_rtx, stack_pointer_rtx);
20582 offset = 0;
20585 if (saved_regs_mask & (1 << PC_REGNUM))
20587 /* Only PC is to be popped. */
20588 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20589 XVECEXP (par, 0, 0) = ret_rtx;
20590 tmp = gen_rtx_SET (SImode,
20591 gen_rtx_REG (SImode, PC_REGNUM),
20592 gen_frame_mem (SImode,
20593 gen_rtx_POST_INC (SImode,
20594 stack_pointer_rtx)));
20595 RTX_FRAME_RELATED_P (tmp) = 1;
20596 XVECEXP (par, 0, 1) = tmp;
20597 par = emit_jump_insn (par);
20599 /* Generate dwarf info. */
20600 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20601 gen_rtx_REG (SImode, PC_REGNUM),
20602 NULL_RTX);
20603 REG_NOTES (par) = dwarf;
20604 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20605 stack_pointer_rtx, stack_pointer_rtx);
20609 /* Calculate the size of the return value that is passed in registers. */
20610 static unsigned
20611 arm_size_return_regs (void)
20613 machine_mode mode;
20615 if (crtl->return_rtx != 0)
20616 mode = GET_MODE (crtl->return_rtx);
20617 else
20618 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20620 return GET_MODE_SIZE (mode);
20623 /* Return true if the current function needs to save/restore LR. */
20624 static bool
20625 thumb_force_lr_save (void)
20627 return !cfun->machine->lr_save_eliminated
20628 && (!leaf_function_p ()
20629 || thumb_far_jump_used_p ()
20630 || df_regs_ever_live_p (LR_REGNUM));
20633 /* We do not know if r3 will be available because
20634 we do have an indirect tailcall happening in this
20635 particular case. */
20636 static bool
20637 is_indirect_tailcall_p (rtx call)
20639 rtx pat = PATTERN (call);
20641 /* Indirect tail call. */
20642 pat = XVECEXP (pat, 0, 0);
20643 if (GET_CODE (pat) == SET)
20644 pat = SET_SRC (pat);
20646 pat = XEXP (XEXP (pat, 0), 0);
20647 return REG_P (pat);
20650 /* Return true if r3 is used by any of the tail call insns in the
20651 current function. */
20652 static bool
20653 any_sibcall_could_use_r3 (void)
20655 edge_iterator ei;
20656 edge e;
20658 if (!crtl->tail_call_emit)
20659 return false;
20660 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20661 if (e->flags & EDGE_SIBCALL)
20663 rtx call = BB_END (e->src);
20664 if (!CALL_P (call))
20665 call = prev_nonnote_nondebug_insn (call);
20666 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20667 if (find_regno_fusage (call, USE, 3)
20668 || is_indirect_tailcall_p (call))
20669 return true;
20671 return false;
20675 /* Compute the distance from register FROM to register TO.
20676 These can be the arg pointer (26), the soft frame pointer (25),
20677 the stack pointer (13) or the hard frame pointer (11).
20678 In thumb mode r7 is used as the soft frame pointer, if needed.
20679 Typical stack layout looks like this:
20681 old stack pointer -> | |
20682 ----
20683 | | \
20684 | | saved arguments for
20685 | | vararg functions
20686 | | /
20688 hard FP & arg pointer -> | | \
20689 | | stack
20690 | | frame
20691 | | /
20693 | | \
20694 | | call saved
20695 | | registers
20696 soft frame pointer -> | | /
20698 | | \
20699 | | local
20700 | | variables
20701 locals base pointer -> | | /
20703 | | \
20704 | | outgoing
20705 | | arguments
20706 current stack pointer -> | | /
20709 For a given function some or all of these stack components
20710 may not be needed, giving rise to the possibility of
20711 eliminating some of the registers.
20713 The values returned by this function must reflect the behavior
20714 of arm_expand_prologue() and arm_compute_save_reg_mask().
20716 The sign of the number returned reflects the direction of stack
20717 growth, so the values are positive for all eliminations except
20718 from the soft frame pointer to the hard frame pointer.
20720 SFP may point just inside the local variables block to ensure correct
20721 alignment. */
20724 /* Calculate stack offsets. These are used to calculate register elimination
20725 offsets and in prologue/epilogue code. Also calculates which registers
20726 should be saved. */
20728 static arm_stack_offsets *
20729 arm_get_frame_offsets (void)
20731 struct arm_stack_offsets *offsets;
20732 unsigned long func_type;
20733 int leaf;
20734 int saved;
20735 int core_saved;
20736 HOST_WIDE_INT frame_size;
20737 int i;
20739 offsets = &cfun->machine->stack_offsets;
20741 /* We need to know if we are a leaf function. Unfortunately, it
20742 is possible to be called after start_sequence has been called,
20743 which causes get_insns to return the insns for the sequence,
20744 not the function, which will cause leaf_function_p to return
20745 the incorrect result.
20747 to know about leaf functions once reload has completed, and the
20748 frame size cannot be changed after that time, so we can safely
20749 use the cached value. */
20751 if (reload_completed)
20752 return offsets;
20754 /* Initially this is the size of the local variables. It will translated
20755 into an offset once we have determined the size of preceding data. */
20756 frame_size = ROUND_UP_WORD (get_frame_size ());
20758 leaf = leaf_function_p ();
20760 /* Space for variadic functions. */
20761 offsets->saved_args = crtl->args.pretend_args_size;
20763 /* In Thumb mode this is incorrect, but never used. */
20764 offsets->frame
20765 = (offsets->saved_args
20766 + arm_compute_static_chain_stack_bytes ()
20767 + (frame_pointer_needed ? 4 : 0));
20769 if (TARGET_32BIT)
20771 unsigned int regno;
20773 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20774 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20775 saved = core_saved;
20777 /* We know that SP will be doubleword aligned on entry, and we must
20778 preserve that condition at any subroutine call. We also require the
20779 soft frame pointer to be doubleword aligned. */
20781 if (TARGET_REALLY_IWMMXT)
20783 /* Check for the call-saved iWMMXt registers. */
20784 for (regno = FIRST_IWMMXT_REGNUM;
20785 regno <= LAST_IWMMXT_REGNUM;
20786 regno++)
20787 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20788 saved += 8;
20791 func_type = arm_current_func_type ();
20792 /* Space for saved VFP registers. */
20793 if (! IS_VOLATILE (func_type)
20794 && TARGET_HARD_FLOAT && TARGET_VFP)
20795 saved += arm_get_vfp_saved_size ();
20797 else /* TARGET_THUMB1 */
20799 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20800 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20801 saved = core_saved;
20802 if (TARGET_BACKTRACE)
20803 saved += 16;
20806 /* Saved registers include the stack frame. */
20807 offsets->saved_regs
20808 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20809 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20811 /* A leaf function does not need any stack alignment if it has nothing
20812 on the stack. */
20813 if (leaf && frame_size == 0
20814 /* However if it calls alloca(), we have a dynamically allocated
20815 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20816 && ! cfun->calls_alloca)
20818 offsets->outgoing_args = offsets->soft_frame;
20819 offsets->locals_base = offsets->soft_frame;
20820 return offsets;
20823 /* Ensure SFP has the correct alignment. */
20824 if (ARM_DOUBLEWORD_ALIGN
20825 && (offsets->soft_frame & 7))
20827 offsets->soft_frame += 4;
20828 /* Try to align stack by pushing an extra reg. Don't bother doing this
20829 when there is a stack frame as the alignment will be rolled into
20830 the normal stack adjustment. */
20831 if (frame_size + crtl->outgoing_args_size == 0)
20833 int reg = -1;
20835 /* Register r3 is caller-saved. Normally it does not need to be
20836 saved on entry by the prologue. However if we choose to save
20837 it for padding then we may confuse the compiler into thinking
20838 a prologue sequence is required when in fact it is not. This
20839 will occur when shrink-wrapping if r3 is used as a scratch
20840 register and there are no other callee-saved writes.
20842 This situation can be avoided when other callee-saved registers
20843 are available and r3 is not mandatory if we choose a callee-saved
20844 register for padding. */
20845 bool prefer_callee_reg_p = false;
20847 /* If it is safe to use r3, then do so. This sometimes
20848 generates better code on Thumb-2 by avoiding the need to
20849 use 32-bit push/pop instructions. */
20850 if (! any_sibcall_could_use_r3 ()
20851 && arm_size_return_regs () <= 12
20852 && (offsets->saved_regs_mask & (1 << 3)) == 0
20853 && (TARGET_THUMB2
20854 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20856 reg = 3;
20857 if (!TARGET_THUMB2)
20858 prefer_callee_reg_p = true;
20860 if (reg == -1
20861 || prefer_callee_reg_p)
20863 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20865 /* Avoid fixed registers; they may be changed at
20866 arbitrary times so it's unsafe to restore them
20867 during the epilogue. */
20868 if (!fixed_regs[i]
20869 && (offsets->saved_regs_mask & (1 << i)) == 0)
20871 reg = i;
20872 break;
20877 if (reg != -1)
20879 offsets->saved_regs += 4;
20880 offsets->saved_regs_mask |= (1 << reg);
20885 offsets->locals_base = offsets->soft_frame + frame_size;
20886 offsets->outgoing_args = (offsets->locals_base
20887 + crtl->outgoing_args_size);
20889 if (ARM_DOUBLEWORD_ALIGN)
20891 /* Ensure SP remains doubleword aligned. */
20892 if (offsets->outgoing_args & 7)
20893 offsets->outgoing_args += 4;
20894 gcc_assert (!(offsets->outgoing_args & 7));
20897 return offsets;
20901 /* Calculate the relative offsets for the different stack pointers. Positive
20902 offsets are in the direction of stack growth. */
20904 HOST_WIDE_INT
20905 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20907 arm_stack_offsets *offsets;
20909 offsets = arm_get_frame_offsets ();
20911 /* OK, now we have enough information to compute the distances.
20912 There must be an entry in these switch tables for each pair
20913 of registers in ELIMINABLE_REGS, even if some of the entries
20914 seem to be redundant or useless. */
20915 switch (from)
20917 case ARG_POINTER_REGNUM:
20918 switch (to)
20920 case THUMB_HARD_FRAME_POINTER_REGNUM:
20921 return 0;
20923 case FRAME_POINTER_REGNUM:
20924 /* This is the reverse of the soft frame pointer
20925 to hard frame pointer elimination below. */
20926 return offsets->soft_frame - offsets->saved_args;
20928 case ARM_HARD_FRAME_POINTER_REGNUM:
20929 /* This is only non-zero in the case where the static chain register
20930 is stored above the frame. */
20931 return offsets->frame - offsets->saved_args - 4;
20933 case STACK_POINTER_REGNUM:
20934 /* If nothing has been pushed on the stack at all
20935 then this will return -4. This *is* correct! */
20936 return offsets->outgoing_args - (offsets->saved_args + 4);
20938 default:
20939 gcc_unreachable ();
20941 gcc_unreachable ();
20943 case FRAME_POINTER_REGNUM:
20944 switch (to)
20946 case THUMB_HARD_FRAME_POINTER_REGNUM:
20947 return 0;
20949 case ARM_HARD_FRAME_POINTER_REGNUM:
20950 /* The hard frame pointer points to the top entry in the
20951 stack frame. The soft frame pointer to the bottom entry
20952 in the stack frame. If there is no stack frame at all,
20953 then they are identical. */
20955 return offsets->frame - offsets->soft_frame;
20957 case STACK_POINTER_REGNUM:
20958 return offsets->outgoing_args - offsets->soft_frame;
20960 default:
20961 gcc_unreachable ();
20963 gcc_unreachable ();
20965 default:
20966 /* You cannot eliminate from the stack pointer.
20967 In theory you could eliminate from the hard frame
20968 pointer to the stack pointer, but this will never
20969 happen, since if a stack frame is not needed the
20970 hard frame pointer will never be used. */
20971 gcc_unreachable ();
20975 /* Given FROM and TO register numbers, say whether this elimination is
20976 allowed. Frame pointer elimination is automatically handled.
20978 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20979 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20980 pointer, we must eliminate FRAME_POINTER_REGNUM into
20981 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20982 ARG_POINTER_REGNUM. */
20984 bool
20985 arm_can_eliminate (const int from, const int to)
20987 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20988 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20989 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20990 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20991 true);
20994 /* Emit RTL to save coprocessor registers on function entry. Returns the
20995 number of bytes pushed. */
20997 static int
20998 arm_save_coproc_regs(void)
21000 int saved_size = 0;
21001 unsigned reg;
21002 unsigned start_reg;
21003 rtx insn;
21005 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21006 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21008 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21009 insn = gen_rtx_MEM (V2SImode, insn);
21010 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21011 RTX_FRAME_RELATED_P (insn) = 1;
21012 saved_size += 8;
21015 if (TARGET_HARD_FLOAT && TARGET_VFP)
21017 start_reg = FIRST_VFP_REGNUM;
21019 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21021 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21022 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21024 if (start_reg != reg)
21025 saved_size += vfp_emit_fstmd (start_reg,
21026 (reg - start_reg) / 2);
21027 start_reg = reg + 2;
21030 if (start_reg != reg)
21031 saved_size += vfp_emit_fstmd (start_reg,
21032 (reg - start_reg) / 2);
21034 return saved_size;
21038 /* Set the Thumb frame pointer from the stack pointer. */
21040 static void
21041 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21043 HOST_WIDE_INT amount;
21044 rtx insn, dwarf;
21046 amount = offsets->outgoing_args - offsets->locals_base;
21047 if (amount < 1024)
21048 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21049 stack_pointer_rtx, GEN_INT (amount)));
21050 else
21052 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21053 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21054 expects the first two operands to be the same. */
21055 if (TARGET_THUMB2)
21057 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21058 stack_pointer_rtx,
21059 hard_frame_pointer_rtx));
21061 else
21063 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21064 hard_frame_pointer_rtx,
21065 stack_pointer_rtx));
21067 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21068 plus_constant (Pmode, stack_pointer_rtx, amount));
21069 RTX_FRAME_RELATED_P (dwarf) = 1;
21070 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21073 RTX_FRAME_RELATED_P (insn) = 1;
21076 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21077 function. */
21078 void
21079 arm_expand_prologue (void)
21081 rtx amount;
21082 rtx insn;
21083 rtx ip_rtx;
21084 unsigned long live_regs_mask;
21085 unsigned long func_type;
21086 int fp_offset = 0;
21087 int saved_pretend_args = 0;
21088 int saved_regs = 0;
21089 unsigned HOST_WIDE_INT args_to_push;
21090 arm_stack_offsets *offsets;
21092 func_type = arm_current_func_type ();
21094 /* Naked functions don't have prologues. */
21095 if (IS_NAKED (func_type))
21096 return;
21098 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21099 args_to_push = crtl->args.pretend_args_size;
21101 /* Compute which register we will have to save onto the stack. */
21102 offsets = arm_get_frame_offsets ();
21103 live_regs_mask = offsets->saved_regs_mask;
21105 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21107 if (IS_STACKALIGN (func_type))
21109 rtx r0, r1;
21111 /* Handle a word-aligned stack pointer. We generate the following:
21113 mov r0, sp
21114 bic r1, r0, #7
21115 mov sp, r1
21116 <save and restore r0 in normal prologue/epilogue>
21117 mov sp, r0
21118 bx lr
21120 The unwinder doesn't need to know about the stack realignment.
21121 Just tell it we saved SP in r0. */
21122 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21124 r0 = gen_rtx_REG (SImode, 0);
21125 r1 = gen_rtx_REG (SImode, 1);
21127 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21128 RTX_FRAME_RELATED_P (insn) = 1;
21129 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21131 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21133 /* ??? The CFA changes here, which may cause GDB to conclude that it
21134 has entered a different function. That said, the unwind info is
21135 correct, individually, before and after this instruction because
21136 we've described the save of SP, which will override the default
21137 handling of SP as restoring from the CFA. */
21138 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21141 /* For APCS frames, if IP register is clobbered
21142 when creating frame, save that register in a special
21143 way. */
21144 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21146 if (IS_INTERRUPT (func_type))
21148 /* Interrupt functions must not corrupt any registers.
21149 Creating a frame pointer however, corrupts the IP
21150 register, so we must push it first. */
21151 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21153 /* Do not set RTX_FRAME_RELATED_P on this insn.
21154 The dwarf stack unwinding code only wants to see one
21155 stack decrement per function, and this is not it. If
21156 this instruction is labeled as being part of the frame
21157 creation sequence then dwarf2out_frame_debug_expr will
21158 die when it encounters the assignment of IP to FP
21159 later on, since the use of SP here establishes SP as
21160 the CFA register and not IP.
21162 Anyway this instruction is not really part of the stack
21163 frame creation although it is part of the prologue. */
21165 else if (IS_NESTED (func_type))
21167 /* The static chain register is the same as the IP register
21168 used as a scratch register during stack frame creation.
21169 To get around this need to find somewhere to store IP
21170 whilst the frame is being created. We try the following
21171 places in order:
21173 1. The last argument register r3 if it is available.
21174 2. A slot on the stack above the frame if there are no
21175 arguments to push onto the stack.
21176 3. Register r3 again, after pushing the argument registers
21177 onto the stack, if this is a varargs function.
21178 4. The last slot on the stack created for the arguments to
21179 push, if this isn't a varargs function.
21181 Note - we only need to tell the dwarf2 backend about the SP
21182 adjustment in the second variant; the static chain register
21183 doesn't need to be unwound, as it doesn't contain a value
21184 inherited from the caller. */
21186 if (!arm_r3_live_at_start_p ())
21187 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21188 else if (args_to_push == 0)
21190 rtx addr, dwarf;
21192 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21193 saved_regs += 4;
21195 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21196 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21197 fp_offset = 4;
21199 /* Just tell the dwarf backend that we adjusted SP. */
21200 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21201 plus_constant (Pmode, stack_pointer_rtx,
21202 -fp_offset));
21203 RTX_FRAME_RELATED_P (insn) = 1;
21204 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21206 else
21208 /* Store the args on the stack. */
21209 if (cfun->machine->uses_anonymous_args)
21211 insn
21212 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21213 (0xf0 >> (args_to_push / 4)) & 0xf);
21214 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21215 saved_pretend_args = 1;
21217 else
21219 rtx addr, dwarf;
21221 if (args_to_push == 4)
21222 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21223 else
21224 addr
21225 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21226 plus_constant (Pmode,
21227 stack_pointer_rtx,
21228 -args_to_push));
21230 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21232 /* Just tell the dwarf backend that we adjusted SP. */
21233 dwarf
21234 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21235 plus_constant (Pmode, stack_pointer_rtx,
21236 -args_to_push));
21237 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21240 RTX_FRAME_RELATED_P (insn) = 1;
21241 fp_offset = args_to_push;
21242 args_to_push = 0;
21246 insn = emit_set_insn (ip_rtx,
21247 plus_constant (Pmode, stack_pointer_rtx,
21248 fp_offset));
21249 RTX_FRAME_RELATED_P (insn) = 1;
21252 if (args_to_push)
21254 /* Push the argument registers, or reserve space for them. */
21255 if (cfun->machine->uses_anonymous_args)
21256 insn = emit_multi_reg_push
21257 ((0xf0 >> (args_to_push / 4)) & 0xf,
21258 (0xf0 >> (args_to_push / 4)) & 0xf);
21259 else
21260 insn = emit_insn
21261 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21262 GEN_INT (- args_to_push)));
21263 RTX_FRAME_RELATED_P (insn) = 1;
21266 /* If this is an interrupt service routine, and the link register
21267 is going to be pushed, and we're not generating extra
21268 push of IP (needed when frame is needed and frame layout if apcs),
21269 subtracting four from LR now will mean that the function return
21270 can be done with a single instruction. */
21271 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21272 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21273 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21274 && TARGET_ARM)
21276 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21278 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21281 if (live_regs_mask)
21283 unsigned long dwarf_regs_mask = live_regs_mask;
21285 saved_regs += bit_count (live_regs_mask) * 4;
21286 if (optimize_size && !frame_pointer_needed
21287 && saved_regs == offsets->saved_regs - offsets->saved_args)
21289 /* If no coprocessor registers are being pushed and we don't have
21290 to worry about a frame pointer then push extra registers to
21291 create the stack frame. This is done is a way that does not
21292 alter the frame layout, so is independent of the epilogue. */
21293 int n;
21294 int frame;
21295 n = 0;
21296 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21297 n++;
21298 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21299 if (frame && n * 4 >= frame)
21301 n = frame / 4;
21302 live_regs_mask |= (1 << n) - 1;
21303 saved_regs += frame;
21307 if (TARGET_LDRD
21308 && current_tune->prefer_ldrd_strd
21309 && !optimize_function_for_size_p (cfun))
21311 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21312 if (TARGET_THUMB2)
21313 thumb2_emit_strd_push (live_regs_mask);
21314 else if (TARGET_ARM
21315 && !TARGET_APCS_FRAME
21316 && !IS_INTERRUPT (func_type))
21317 arm_emit_strd_push (live_regs_mask);
21318 else
21320 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21321 RTX_FRAME_RELATED_P (insn) = 1;
21324 else
21326 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21327 RTX_FRAME_RELATED_P (insn) = 1;
21331 if (! IS_VOLATILE (func_type))
21332 saved_regs += arm_save_coproc_regs ();
21334 if (frame_pointer_needed && TARGET_ARM)
21336 /* Create the new frame pointer. */
21337 if (TARGET_APCS_FRAME)
21339 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21340 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21341 RTX_FRAME_RELATED_P (insn) = 1;
21343 if (IS_NESTED (func_type))
21345 /* Recover the static chain register. */
21346 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21347 insn = gen_rtx_REG (SImode, 3);
21348 else
21350 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21351 insn = gen_frame_mem (SImode, insn);
21353 emit_set_insn (ip_rtx, insn);
21354 /* Add a USE to stop propagate_one_insn() from barfing. */
21355 emit_insn (gen_force_register_use (ip_rtx));
21358 else
21360 insn = GEN_INT (saved_regs - 4);
21361 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21362 stack_pointer_rtx, insn));
21363 RTX_FRAME_RELATED_P (insn) = 1;
21367 if (flag_stack_usage_info)
21368 current_function_static_stack_size
21369 = offsets->outgoing_args - offsets->saved_args;
21371 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21373 /* This add can produce multiple insns for a large constant, so we
21374 need to get tricky. */
21375 rtx_insn *last = get_last_insn ();
21377 amount = GEN_INT (offsets->saved_args + saved_regs
21378 - offsets->outgoing_args);
21380 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21381 amount));
21384 last = last ? NEXT_INSN (last) : get_insns ();
21385 RTX_FRAME_RELATED_P (last) = 1;
21387 while (last != insn);
21389 /* If the frame pointer is needed, emit a special barrier that
21390 will prevent the scheduler from moving stores to the frame
21391 before the stack adjustment. */
21392 if (frame_pointer_needed)
21393 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21394 hard_frame_pointer_rtx));
21398 if (frame_pointer_needed && TARGET_THUMB2)
21399 thumb_set_frame_pointer (offsets);
21401 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21403 unsigned long mask;
21405 mask = live_regs_mask;
21406 mask &= THUMB2_WORK_REGS;
21407 if (!IS_NESTED (func_type))
21408 mask |= (1 << IP_REGNUM);
21409 arm_load_pic_register (mask);
21412 /* If we are profiling, make sure no instructions are scheduled before
21413 the call to mcount. Similarly if the user has requested no
21414 scheduling in the prolog. Similarly if we want non-call exceptions
21415 using the EABI unwinder, to prevent faulting instructions from being
21416 swapped with a stack adjustment. */
21417 if (crtl->profile || !TARGET_SCHED_PROLOG
21418 || (arm_except_unwind_info (&global_options) == UI_TARGET
21419 && cfun->can_throw_non_call_exceptions))
21420 emit_insn (gen_blockage ());
21422 /* If the link register is being kept alive, with the return address in it,
21423 then make sure that it does not get reused by the ce2 pass. */
21424 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21425 cfun->machine->lr_save_eliminated = 1;
21428 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21429 static void
21430 arm_print_condition (FILE *stream)
21432 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21434 /* Branch conversion is not implemented for Thumb-2. */
21435 if (TARGET_THUMB)
21437 output_operand_lossage ("predicated Thumb instruction");
21438 return;
21440 if (current_insn_predicate != NULL)
21442 output_operand_lossage
21443 ("predicated instruction in conditional sequence");
21444 return;
21447 fputs (arm_condition_codes[arm_current_cc], stream);
21449 else if (current_insn_predicate)
21451 enum arm_cond_code code;
21453 if (TARGET_THUMB1)
21455 output_operand_lossage ("predicated Thumb instruction");
21456 return;
21459 code = get_arm_condition_code (current_insn_predicate);
21460 fputs (arm_condition_codes[code], stream);
21465 /* Globally reserved letters: acln
21466 Puncutation letters currently used: @_|?().!#
21467 Lower case letters currently used: bcdefhimpqtvwxyz
21468 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21469 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21471 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21473 If CODE is 'd', then the X is a condition operand and the instruction
21474 should only be executed if the condition is true.
21475 if CODE is 'D', then the X is a condition operand and the instruction
21476 should only be executed if the condition is false: however, if the mode
21477 of the comparison is CCFPEmode, then always execute the instruction -- we
21478 do this because in these circumstances !GE does not necessarily imply LT;
21479 in these cases the instruction pattern will take care to make sure that
21480 an instruction containing %d will follow, thereby undoing the effects of
21481 doing this instruction unconditionally.
21482 If CODE is 'N' then X is a floating point operand that must be negated
21483 before output.
21484 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21485 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21486 static void
21487 arm_print_operand (FILE *stream, rtx x, int code)
21489 switch (code)
21491 case '@':
21492 fputs (ASM_COMMENT_START, stream);
21493 return;
21495 case '_':
21496 fputs (user_label_prefix, stream);
21497 return;
21499 case '|':
21500 fputs (REGISTER_PREFIX, stream);
21501 return;
21503 case '?':
21504 arm_print_condition (stream);
21505 return;
21507 case '(':
21508 /* Nothing in unified syntax, otherwise the current condition code. */
21509 if (!TARGET_UNIFIED_ASM)
21510 arm_print_condition (stream);
21511 break;
21513 case ')':
21514 /* The current condition code in unified syntax, otherwise nothing. */
21515 if (TARGET_UNIFIED_ASM)
21516 arm_print_condition (stream);
21517 break;
21519 case '.':
21520 /* The current condition code for a condition code setting instruction.
21521 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21522 if (TARGET_UNIFIED_ASM)
21524 fputc('s', stream);
21525 arm_print_condition (stream);
21527 else
21529 arm_print_condition (stream);
21530 fputc('s', stream);
21532 return;
21534 case '!':
21535 /* If the instruction is conditionally executed then print
21536 the current condition code, otherwise print 's'. */
21537 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21538 if (current_insn_predicate)
21539 arm_print_condition (stream);
21540 else
21541 fputc('s', stream);
21542 break;
21544 /* %# is a "break" sequence. It doesn't output anything, but is used to
21545 separate e.g. operand numbers from following text, if that text consists
21546 of further digits which we don't want to be part of the operand
21547 number. */
21548 case '#':
21549 return;
21551 case 'N':
21553 REAL_VALUE_TYPE r;
21554 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21555 r = real_value_negate (&r);
21556 fprintf (stream, "%s", fp_const_from_val (&r));
21558 return;
21560 /* An integer or symbol address without a preceding # sign. */
21561 case 'c':
21562 switch (GET_CODE (x))
21564 case CONST_INT:
21565 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21566 break;
21568 case SYMBOL_REF:
21569 output_addr_const (stream, x);
21570 break;
21572 case CONST:
21573 if (GET_CODE (XEXP (x, 0)) == PLUS
21574 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21576 output_addr_const (stream, x);
21577 break;
21579 /* Fall through. */
21581 default:
21582 output_operand_lossage ("Unsupported operand for code '%c'", code);
21584 return;
21586 /* An integer that we want to print in HEX. */
21587 case 'x':
21588 switch (GET_CODE (x))
21590 case CONST_INT:
21591 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21592 break;
21594 default:
21595 output_operand_lossage ("Unsupported operand for code '%c'", code);
21597 return;
21599 case 'B':
21600 if (CONST_INT_P (x))
21602 HOST_WIDE_INT val;
21603 val = ARM_SIGN_EXTEND (~INTVAL (x));
21604 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21606 else
21608 putc ('~', stream);
21609 output_addr_const (stream, x);
21611 return;
21613 case 'b':
21614 /* Print the log2 of a CONST_INT. */
21616 HOST_WIDE_INT val;
21618 if (!CONST_INT_P (x)
21619 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21620 output_operand_lossage ("Unsupported operand for code '%c'", code);
21621 else
21622 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21624 return;
21626 case 'L':
21627 /* The low 16 bits of an immediate constant. */
21628 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21629 return;
21631 case 'i':
21632 fprintf (stream, "%s", arithmetic_instr (x, 1));
21633 return;
21635 case 'I':
21636 fprintf (stream, "%s", arithmetic_instr (x, 0));
21637 return;
21639 case 'S':
21641 HOST_WIDE_INT val;
21642 const char *shift;
21644 shift = shift_op (x, &val);
21646 if (shift)
21648 fprintf (stream, ", %s ", shift);
21649 if (val == -1)
21650 arm_print_operand (stream, XEXP (x, 1), 0);
21651 else
21652 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21655 return;
21657 /* An explanation of the 'Q', 'R' and 'H' register operands:
21659 In a pair of registers containing a DI or DF value the 'Q'
21660 operand returns the register number of the register containing
21661 the least significant part of the value. The 'R' operand returns
21662 the register number of the register containing the most
21663 significant part of the value.
21665 The 'H' operand returns the higher of the two register numbers.
21666 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21667 same as the 'Q' operand, since the most significant part of the
21668 value is held in the lower number register. The reverse is true
21669 on systems where WORDS_BIG_ENDIAN is false.
21671 The purpose of these operands is to distinguish between cases
21672 where the endian-ness of the values is important (for example
21673 when they are added together), and cases where the endian-ness
21674 is irrelevant, but the order of register operations is important.
21675 For example when loading a value from memory into a register
21676 pair, the endian-ness does not matter. Provided that the value
21677 from the lower memory address is put into the lower numbered
21678 register, and the value from the higher address is put into the
21679 higher numbered register, the load will work regardless of whether
21680 the value being loaded is big-wordian or little-wordian. The
21681 order of the two register loads can matter however, if the address
21682 of the memory location is actually held in one of the registers
21683 being overwritten by the load.
21685 The 'Q' and 'R' constraints are also available for 64-bit
21686 constants. */
21687 case 'Q':
21688 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21690 rtx part = gen_lowpart (SImode, x);
21691 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21692 return;
21695 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21697 output_operand_lossage ("invalid operand for code '%c'", code);
21698 return;
21701 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21702 return;
21704 case 'R':
21705 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21707 machine_mode mode = GET_MODE (x);
21708 rtx part;
21710 if (mode == VOIDmode)
21711 mode = DImode;
21712 part = gen_highpart_mode (SImode, mode, x);
21713 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21714 return;
21717 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21719 output_operand_lossage ("invalid operand for code '%c'", code);
21720 return;
21723 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21724 return;
21726 case 'H':
21727 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21729 output_operand_lossage ("invalid operand for code '%c'", code);
21730 return;
21733 asm_fprintf (stream, "%r", REGNO (x) + 1);
21734 return;
21736 case 'J':
21737 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21739 output_operand_lossage ("invalid operand for code '%c'", code);
21740 return;
21743 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21744 return;
21746 case 'K':
21747 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21749 output_operand_lossage ("invalid operand for code '%c'", code);
21750 return;
21753 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21754 return;
21756 case 'm':
21757 asm_fprintf (stream, "%r",
21758 REG_P (XEXP (x, 0))
21759 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21760 return;
21762 case 'M':
21763 asm_fprintf (stream, "{%r-%r}",
21764 REGNO (x),
21765 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21766 return;
21768 /* Like 'M', but writing doubleword vector registers, for use by Neon
21769 insns. */
21770 case 'h':
21772 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21773 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21774 if (numregs == 1)
21775 asm_fprintf (stream, "{d%d}", regno);
21776 else
21777 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21779 return;
21781 case 'd':
21782 /* CONST_TRUE_RTX means always -- that's the default. */
21783 if (x == const_true_rtx)
21784 return;
21786 if (!COMPARISON_P (x))
21788 output_operand_lossage ("invalid operand for code '%c'", code);
21789 return;
21792 fputs (arm_condition_codes[get_arm_condition_code (x)],
21793 stream);
21794 return;
21796 case 'D':
21797 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21798 want to do that. */
21799 if (x == const_true_rtx)
21801 output_operand_lossage ("instruction never executed");
21802 return;
21804 if (!COMPARISON_P (x))
21806 output_operand_lossage ("invalid operand for code '%c'", code);
21807 return;
21810 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21811 (get_arm_condition_code (x))],
21812 stream);
21813 return;
21815 case 's':
21816 case 'V':
21817 case 'W':
21818 case 'X':
21819 case 'Y':
21820 case 'Z':
21821 /* Former Maverick support, removed after GCC-4.7. */
21822 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21823 return;
21825 case 'U':
21826 if (!REG_P (x)
21827 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21828 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21829 /* Bad value for wCG register number. */
21831 output_operand_lossage ("invalid operand for code '%c'", code);
21832 return;
21835 else
21836 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21837 return;
21839 /* Print an iWMMXt control register name. */
21840 case 'w':
21841 if (!CONST_INT_P (x)
21842 || INTVAL (x) < 0
21843 || INTVAL (x) >= 16)
21844 /* Bad value for wC register number. */
21846 output_operand_lossage ("invalid operand for code '%c'", code);
21847 return;
21850 else
21852 static const char * wc_reg_names [16] =
21854 "wCID", "wCon", "wCSSF", "wCASF",
21855 "wC4", "wC5", "wC6", "wC7",
21856 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21857 "wC12", "wC13", "wC14", "wC15"
21860 fputs (wc_reg_names [INTVAL (x)], stream);
21862 return;
21864 /* Print the high single-precision register of a VFP double-precision
21865 register. */
21866 case 'p':
21868 machine_mode mode = GET_MODE (x);
21869 int regno;
21871 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21873 output_operand_lossage ("invalid operand for code '%c'", code);
21874 return;
21877 regno = REGNO (x);
21878 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21880 output_operand_lossage ("invalid operand for code '%c'", code);
21881 return;
21884 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21886 return;
21888 /* Print a VFP/Neon double precision or quad precision register name. */
21889 case 'P':
21890 case 'q':
21892 machine_mode mode = GET_MODE (x);
21893 int is_quad = (code == 'q');
21894 int regno;
21896 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21898 output_operand_lossage ("invalid operand for code '%c'", code);
21899 return;
21902 if (!REG_P (x)
21903 || !IS_VFP_REGNUM (REGNO (x)))
21905 output_operand_lossage ("invalid operand for code '%c'", code);
21906 return;
21909 regno = REGNO (x);
21910 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21911 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21913 output_operand_lossage ("invalid operand for code '%c'", code);
21914 return;
21917 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21918 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21920 return;
21922 /* These two codes print the low/high doubleword register of a Neon quad
21923 register, respectively. For pair-structure types, can also print
21924 low/high quadword registers. */
21925 case 'e':
21926 case 'f':
21928 machine_mode mode = GET_MODE (x);
21929 int regno;
21931 if ((GET_MODE_SIZE (mode) != 16
21932 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21934 output_operand_lossage ("invalid operand for code '%c'", code);
21935 return;
21938 regno = REGNO (x);
21939 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21941 output_operand_lossage ("invalid operand for code '%c'", code);
21942 return;
21945 if (GET_MODE_SIZE (mode) == 16)
21946 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21947 + (code == 'f' ? 1 : 0));
21948 else
21949 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21950 + (code == 'f' ? 1 : 0));
21952 return;
21954 /* Print a VFPv3 floating-point constant, represented as an integer
21955 index. */
21956 case 'G':
21958 int index = vfp3_const_double_index (x);
21959 gcc_assert (index != -1);
21960 fprintf (stream, "%d", index);
21962 return;
21964 /* Print bits representing opcode features for Neon.
21966 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21967 and polynomials as unsigned.
21969 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21971 Bit 2 is 1 for rounding functions, 0 otherwise. */
21973 /* Identify the type as 's', 'u', 'p' or 'f'. */
21974 case 'T':
21976 HOST_WIDE_INT bits = INTVAL (x);
21977 fputc ("uspf"[bits & 3], stream);
21979 return;
21981 /* Likewise, but signed and unsigned integers are both 'i'. */
21982 case 'F':
21984 HOST_WIDE_INT bits = INTVAL (x);
21985 fputc ("iipf"[bits & 3], stream);
21987 return;
21989 /* As for 'T', but emit 'u' instead of 'p'. */
21990 case 't':
21992 HOST_WIDE_INT bits = INTVAL (x);
21993 fputc ("usuf"[bits & 3], stream);
21995 return;
21997 /* Bit 2: rounding (vs none). */
21998 case 'O':
22000 HOST_WIDE_INT bits = INTVAL (x);
22001 fputs ((bits & 4) != 0 ? "r" : "", stream);
22003 return;
22005 /* Memory operand for vld1/vst1 instruction. */
22006 case 'A':
22008 rtx addr;
22009 bool postinc = FALSE;
22010 rtx postinc_reg = NULL;
22011 unsigned align, memsize, align_bits;
22013 gcc_assert (MEM_P (x));
22014 addr = XEXP (x, 0);
22015 if (GET_CODE (addr) == POST_INC)
22017 postinc = 1;
22018 addr = XEXP (addr, 0);
22020 if (GET_CODE (addr) == POST_MODIFY)
22022 postinc_reg = XEXP( XEXP (addr, 1), 1);
22023 addr = XEXP (addr, 0);
22025 asm_fprintf (stream, "[%r", REGNO (addr));
22027 /* We know the alignment of this access, so we can emit a hint in the
22028 instruction (for some alignments) as an aid to the memory subsystem
22029 of the target. */
22030 align = MEM_ALIGN (x) >> 3;
22031 memsize = MEM_SIZE (x);
22033 /* Only certain alignment specifiers are supported by the hardware. */
22034 if (memsize == 32 && (align % 32) == 0)
22035 align_bits = 256;
22036 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22037 align_bits = 128;
22038 else if (memsize >= 8 && (align % 8) == 0)
22039 align_bits = 64;
22040 else
22041 align_bits = 0;
22043 if (align_bits != 0)
22044 asm_fprintf (stream, ":%d", align_bits);
22046 asm_fprintf (stream, "]");
22048 if (postinc)
22049 fputs("!", stream);
22050 if (postinc_reg)
22051 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22053 return;
22055 case 'C':
22057 rtx addr;
22059 gcc_assert (MEM_P (x));
22060 addr = XEXP (x, 0);
22061 gcc_assert (REG_P (addr));
22062 asm_fprintf (stream, "[%r]", REGNO (addr));
22064 return;
22066 /* Translate an S register number into a D register number and element index. */
22067 case 'y':
22069 machine_mode mode = GET_MODE (x);
22070 int regno;
22072 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22074 output_operand_lossage ("invalid operand for code '%c'", code);
22075 return;
22078 regno = REGNO (x);
22079 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22081 output_operand_lossage ("invalid operand for code '%c'", code);
22082 return;
22085 regno = regno - FIRST_VFP_REGNUM;
22086 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22088 return;
22090 case 'v':
22091 gcc_assert (CONST_DOUBLE_P (x));
22092 int result;
22093 result = vfp3_const_double_for_fract_bits (x);
22094 if (result == 0)
22095 result = vfp3_const_double_for_bits (x);
22096 fprintf (stream, "#%d", result);
22097 return;
22099 /* Register specifier for vld1.16/vst1.16. Translate the S register
22100 number into a D register number and element index. */
22101 case 'z':
22103 machine_mode mode = GET_MODE (x);
22104 int regno;
22106 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22108 output_operand_lossage ("invalid operand for code '%c'", code);
22109 return;
22112 regno = REGNO (x);
22113 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22115 output_operand_lossage ("invalid operand for code '%c'", code);
22116 return;
22119 regno = regno - FIRST_VFP_REGNUM;
22120 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22122 return;
22124 default:
22125 if (x == 0)
22127 output_operand_lossage ("missing operand");
22128 return;
22131 switch (GET_CODE (x))
22133 case REG:
22134 asm_fprintf (stream, "%r", REGNO (x));
22135 break;
22137 case MEM:
22138 output_memory_reference_mode = GET_MODE (x);
22139 output_address (XEXP (x, 0));
22140 break;
22142 case CONST_DOUBLE:
22144 char fpstr[20];
22145 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22146 sizeof (fpstr), 0, 1);
22147 fprintf (stream, "#%s", fpstr);
22149 break;
22151 default:
22152 gcc_assert (GET_CODE (x) != NEG);
22153 fputc ('#', stream);
22154 if (GET_CODE (x) == HIGH)
22156 fputs (":lower16:", stream);
22157 x = XEXP (x, 0);
22160 output_addr_const (stream, x);
22161 break;
22166 /* Target hook for printing a memory address. */
22167 static void
22168 arm_print_operand_address (FILE *stream, rtx x)
22170 if (TARGET_32BIT)
22172 int is_minus = GET_CODE (x) == MINUS;
22174 if (REG_P (x))
22175 asm_fprintf (stream, "[%r]", REGNO (x));
22176 else if (GET_CODE (x) == PLUS || is_minus)
22178 rtx base = XEXP (x, 0);
22179 rtx index = XEXP (x, 1);
22180 HOST_WIDE_INT offset = 0;
22181 if (!REG_P (base)
22182 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22184 /* Ensure that BASE is a register. */
22185 /* (one of them must be). */
22186 /* Also ensure the SP is not used as in index register. */
22187 rtx temp = base;
22188 base = index;
22189 index = temp;
22191 switch (GET_CODE (index))
22193 case CONST_INT:
22194 offset = INTVAL (index);
22195 if (is_minus)
22196 offset = -offset;
22197 asm_fprintf (stream, "[%r, #%wd]",
22198 REGNO (base), offset);
22199 break;
22201 case REG:
22202 asm_fprintf (stream, "[%r, %s%r]",
22203 REGNO (base), is_minus ? "-" : "",
22204 REGNO (index));
22205 break;
22207 case MULT:
22208 case ASHIFTRT:
22209 case LSHIFTRT:
22210 case ASHIFT:
22211 case ROTATERT:
22213 asm_fprintf (stream, "[%r, %s%r",
22214 REGNO (base), is_minus ? "-" : "",
22215 REGNO (XEXP (index, 0)));
22216 arm_print_operand (stream, index, 'S');
22217 fputs ("]", stream);
22218 break;
22221 default:
22222 gcc_unreachable ();
22225 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22226 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22228 extern machine_mode output_memory_reference_mode;
22230 gcc_assert (REG_P (XEXP (x, 0)));
22232 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22233 asm_fprintf (stream, "[%r, #%s%d]!",
22234 REGNO (XEXP (x, 0)),
22235 GET_CODE (x) == PRE_DEC ? "-" : "",
22236 GET_MODE_SIZE (output_memory_reference_mode));
22237 else
22238 asm_fprintf (stream, "[%r], #%s%d",
22239 REGNO (XEXP (x, 0)),
22240 GET_CODE (x) == POST_DEC ? "-" : "",
22241 GET_MODE_SIZE (output_memory_reference_mode));
22243 else if (GET_CODE (x) == PRE_MODIFY)
22245 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22246 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22247 asm_fprintf (stream, "#%wd]!",
22248 INTVAL (XEXP (XEXP (x, 1), 1)));
22249 else
22250 asm_fprintf (stream, "%r]!",
22251 REGNO (XEXP (XEXP (x, 1), 1)));
22253 else if (GET_CODE (x) == POST_MODIFY)
22255 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22256 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22257 asm_fprintf (stream, "#%wd",
22258 INTVAL (XEXP (XEXP (x, 1), 1)));
22259 else
22260 asm_fprintf (stream, "%r",
22261 REGNO (XEXP (XEXP (x, 1), 1)));
22263 else output_addr_const (stream, x);
22265 else
22267 if (REG_P (x))
22268 asm_fprintf (stream, "[%r]", REGNO (x));
22269 else if (GET_CODE (x) == POST_INC)
22270 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22271 else if (GET_CODE (x) == PLUS)
22273 gcc_assert (REG_P (XEXP (x, 0)));
22274 if (CONST_INT_P (XEXP (x, 1)))
22275 asm_fprintf (stream, "[%r, #%wd]",
22276 REGNO (XEXP (x, 0)),
22277 INTVAL (XEXP (x, 1)));
22278 else
22279 asm_fprintf (stream, "[%r, %r]",
22280 REGNO (XEXP (x, 0)),
22281 REGNO (XEXP (x, 1)));
22283 else
22284 output_addr_const (stream, x);
22288 /* Target hook for indicating whether a punctuation character for
22289 TARGET_PRINT_OPERAND is valid. */
22290 static bool
22291 arm_print_operand_punct_valid_p (unsigned char code)
22293 return (code == '@' || code == '|' || code == '.'
22294 || code == '(' || code == ')' || code == '#'
22295 || (TARGET_32BIT && (code == '?'))
22296 || (TARGET_THUMB2 && (code == '!'))
22297 || (TARGET_THUMB && (code == '_')));
22300 /* Target hook for assembling integer objects. The ARM version needs to
22301 handle word-sized values specially. */
22302 static bool
22303 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22305 machine_mode mode;
22307 if (size == UNITS_PER_WORD && aligned_p)
22309 fputs ("\t.word\t", asm_out_file);
22310 output_addr_const (asm_out_file, x);
22312 /* Mark symbols as position independent. We only do this in the
22313 .text segment, not in the .data segment. */
22314 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22315 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22317 /* See legitimize_pic_address for an explanation of the
22318 TARGET_VXWORKS_RTP check. */
22319 if (!arm_pic_data_is_text_relative
22320 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22321 fputs ("(GOT)", asm_out_file);
22322 else
22323 fputs ("(GOTOFF)", asm_out_file);
22325 fputc ('\n', asm_out_file);
22326 return true;
22329 mode = GET_MODE (x);
22331 if (arm_vector_mode_supported_p (mode))
22333 int i, units;
22335 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22337 units = CONST_VECTOR_NUNITS (x);
22338 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22340 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22341 for (i = 0; i < units; i++)
22343 rtx elt = CONST_VECTOR_ELT (x, i);
22344 assemble_integer
22345 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22347 else
22348 for (i = 0; i < units; i++)
22350 rtx elt = CONST_VECTOR_ELT (x, i);
22351 REAL_VALUE_TYPE rval;
22353 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22355 assemble_real
22356 (rval, GET_MODE_INNER (mode),
22357 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22360 return true;
22363 return default_assemble_integer (x, size, aligned_p);
22366 static void
22367 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22369 section *s;
22371 if (!TARGET_AAPCS_BASED)
22373 (is_ctor ?
22374 default_named_section_asm_out_constructor
22375 : default_named_section_asm_out_destructor) (symbol, priority);
22376 return;
22379 /* Put these in the .init_array section, using a special relocation. */
22380 if (priority != DEFAULT_INIT_PRIORITY)
22382 char buf[18];
22383 sprintf (buf, "%s.%.5u",
22384 is_ctor ? ".init_array" : ".fini_array",
22385 priority);
22386 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22388 else if (is_ctor)
22389 s = ctors_section;
22390 else
22391 s = dtors_section;
22393 switch_to_section (s);
22394 assemble_align (POINTER_SIZE);
22395 fputs ("\t.word\t", asm_out_file);
22396 output_addr_const (asm_out_file, symbol);
22397 fputs ("(target1)\n", asm_out_file);
22400 /* Add a function to the list of static constructors. */
22402 static void
22403 arm_elf_asm_constructor (rtx symbol, int priority)
22405 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22408 /* Add a function to the list of static destructors. */
22410 static void
22411 arm_elf_asm_destructor (rtx symbol, int priority)
22413 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22416 /* A finite state machine takes care of noticing whether or not instructions
22417 can be conditionally executed, and thus decrease execution time and code
22418 size by deleting branch instructions. The fsm is controlled by
22419 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22421 /* The state of the fsm controlling condition codes are:
22422 0: normal, do nothing special
22423 1: make ASM_OUTPUT_OPCODE not output this instruction
22424 2: make ASM_OUTPUT_OPCODE not output this instruction
22425 3: make instructions conditional
22426 4: make instructions conditional
22428 State transitions (state->state by whom under condition):
22429 0 -> 1 final_prescan_insn if the `target' is a label
22430 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22431 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22432 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22433 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22434 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22435 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22436 (the target insn is arm_target_insn).
22438 If the jump clobbers the conditions then we use states 2 and 4.
22440 A similar thing can be done with conditional return insns.
22442 XXX In case the `target' is an unconditional branch, this conditionalising
22443 of the instructions always reduces code size, but not always execution
22444 time. But then, I want to reduce the code size to somewhere near what
22445 /bin/cc produces. */
22447 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22448 instructions. When a COND_EXEC instruction is seen the subsequent
22449 instructions are scanned so that multiple conditional instructions can be
22450 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22451 specify the length and true/false mask for the IT block. These will be
22452 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22454 /* Returns the index of the ARM condition code string in
22455 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22456 COMPARISON should be an rtx like `(eq (...) (...))'. */
22458 enum arm_cond_code
22459 maybe_get_arm_condition_code (rtx comparison)
22461 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22462 enum arm_cond_code code;
22463 enum rtx_code comp_code = GET_CODE (comparison);
22465 if (GET_MODE_CLASS (mode) != MODE_CC)
22466 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22467 XEXP (comparison, 1));
22469 switch (mode)
22471 case CC_DNEmode: code = ARM_NE; goto dominance;
22472 case CC_DEQmode: code = ARM_EQ; goto dominance;
22473 case CC_DGEmode: code = ARM_GE; goto dominance;
22474 case CC_DGTmode: code = ARM_GT; goto dominance;
22475 case CC_DLEmode: code = ARM_LE; goto dominance;
22476 case CC_DLTmode: code = ARM_LT; goto dominance;
22477 case CC_DGEUmode: code = ARM_CS; goto dominance;
22478 case CC_DGTUmode: code = ARM_HI; goto dominance;
22479 case CC_DLEUmode: code = ARM_LS; goto dominance;
22480 case CC_DLTUmode: code = ARM_CC;
22482 dominance:
22483 if (comp_code == EQ)
22484 return ARM_INVERSE_CONDITION_CODE (code);
22485 if (comp_code == NE)
22486 return code;
22487 return ARM_NV;
22489 case CC_NOOVmode:
22490 switch (comp_code)
22492 case NE: return ARM_NE;
22493 case EQ: return ARM_EQ;
22494 case GE: return ARM_PL;
22495 case LT: return ARM_MI;
22496 default: return ARM_NV;
22499 case CC_Zmode:
22500 switch (comp_code)
22502 case NE: return ARM_NE;
22503 case EQ: return ARM_EQ;
22504 default: return ARM_NV;
22507 case CC_Nmode:
22508 switch (comp_code)
22510 case NE: return ARM_MI;
22511 case EQ: return ARM_PL;
22512 default: return ARM_NV;
22515 case CCFPEmode:
22516 case CCFPmode:
22517 /* We can handle all cases except UNEQ and LTGT. */
22518 switch (comp_code)
22520 case GE: return ARM_GE;
22521 case GT: return ARM_GT;
22522 case LE: return ARM_LS;
22523 case LT: return ARM_MI;
22524 case NE: return ARM_NE;
22525 case EQ: return ARM_EQ;
22526 case ORDERED: return ARM_VC;
22527 case UNORDERED: return ARM_VS;
22528 case UNLT: return ARM_LT;
22529 case UNLE: return ARM_LE;
22530 case UNGT: return ARM_HI;
22531 case UNGE: return ARM_PL;
22532 /* UNEQ and LTGT do not have a representation. */
22533 case UNEQ: /* Fall through. */
22534 case LTGT: /* Fall through. */
22535 default: return ARM_NV;
22538 case CC_SWPmode:
22539 switch (comp_code)
22541 case NE: return ARM_NE;
22542 case EQ: return ARM_EQ;
22543 case GE: return ARM_LE;
22544 case GT: return ARM_LT;
22545 case LE: return ARM_GE;
22546 case LT: return ARM_GT;
22547 case GEU: return ARM_LS;
22548 case GTU: return ARM_CC;
22549 case LEU: return ARM_CS;
22550 case LTU: return ARM_HI;
22551 default: return ARM_NV;
22554 case CC_Cmode:
22555 switch (comp_code)
22557 case LTU: return ARM_CS;
22558 case GEU: return ARM_CC;
22559 default: return ARM_NV;
22562 case CC_CZmode:
22563 switch (comp_code)
22565 case NE: return ARM_NE;
22566 case EQ: return ARM_EQ;
22567 case GEU: return ARM_CS;
22568 case GTU: return ARM_HI;
22569 case LEU: return ARM_LS;
22570 case LTU: return ARM_CC;
22571 default: return ARM_NV;
22574 case CC_NCVmode:
22575 switch (comp_code)
22577 case GE: return ARM_GE;
22578 case LT: return ARM_LT;
22579 case GEU: return ARM_CS;
22580 case LTU: return ARM_CC;
22581 default: return ARM_NV;
22584 case CCmode:
22585 switch (comp_code)
22587 case NE: return ARM_NE;
22588 case EQ: return ARM_EQ;
22589 case GE: return ARM_GE;
22590 case GT: return ARM_GT;
22591 case LE: return ARM_LE;
22592 case LT: return ARM_LT;
22593 case GEU: return ARM_CS;
22594 case GTU: return ARM_HI;
22595 case LEU: return ARM_LS;
22596 case LTU: return ARM_CC;
22597 default: return ARM_NV;
22600 default: gcc_unreachable ();
22604 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22605 static enum arm_cond_code
22606 get_arm_condition_code (rtx comparison)
22608 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22609 gcc_assert (code != ARM_NV);
22610 return code;
22613 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22614 instructions. */
22615 void
22616 thumb2_final_prescan_insn (rtx_insn *insn)
22618 rtx_insn *first_insn = insn;
22619 rtx body = PATTERN (insn);
22620 rtx predicate;
22621 enum arm_cond_code code;
22622 int n;
22623 int mask;
22624 int max;
22626 /* max_insns_skipped in the tune was already taken into account in the
22627 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22628 just emit the IT blocks as we can. It does not make sense to split
22629 the IT blocks. */
22630 max = MAX_INSN_PER_IT_BLOCK;
22632 /* Remove the previous insn from the count of insns to be output. */
22633 if (arm_condexec_count)
22634 arm_condexec_count--;
22636 /* Nothing to do if we are already inside a conditional block. */
22637 if (arm_condexec_count)
22638 return;
22640 if (GET_CODE (body) != COND_EXEC)
22641 return;
22643 /* Conditional jumps are implemented directly. */
22644 if (JUMP_P (insn))
22645 return;
22647 predicate = COND_EXEC_TEST (body);
22648 arm_current_cc = get_arm_condition_code (predicate);
22650 n = get_attr_ce_count (insn);
22651 arm_condexec_count = 1;
22652 arm_condexec_mask = (1 << n) - 1;
22653 arm_condexec_masklen = n;
22654 /* See if subsequent instructions can be combined into the same block. */
22655 for (;;)
22657 insn = next_nonnote_insn (insn);
22659 /* Jumping into the middle of an IT block is illegal, so a label or
22660 barrier terminates the block. */
22661 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22662 break;
22664 body = PATTERN (insn);
22665 /* USE and CLOBBER aren't really insns, so just skip them. */
22666 if (GET_CODE (body) == USE
22667 || GET_CODE (body) == CLOBBER)
22668 continue;
22670 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22671 if (GET_CODE (body) != COND_EXEC)
22672 break;
22673 /* Maximum number of conditionally executed instructions in a block. */
22674 n = get_attr_ce_count (insn);
22675 if (arm_condexec_masklen + n > max)
22676 break;
22678 predicate = COND_EXEC_TEST (body);
22679 code = get_arm_condition_code (predicate);
22680 mask = (1 << n) - 1;
22681 if (arm_current_cc == code)
22682 arm_condexec_mask |= (mask << arm_condexec_masklen);
22683 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22684 break;
22686 arm_condexec_count++;
22687 arm_condexec_masklen += n;
22689 /* A jump must be the last instruction in a conditional block. */
22690 if (JUMP_P (insn))
22691 break;
22693 /* Restore recog_data (getting the attributes of other insns can
22694 destroy this array, but final.c assumes that it remains intact
22695 across this call). */
22696 extract_constrain_insn_cached (first_insn);
22699 void
22700 arm_final_prescan_insn (rtx_insn *insn)
22702 /* BODY will hold the body of INSN. */
22703 rtx body = PATTERN (insn);
22705 /* This will be 1 if trying to repeat the trick, and things need to be
22706 reversed if it appears to fail. */
22707 int reverse = 0;
22709 /* If we start with a return insn, we only succeed if we find another one. */
22710 int seeking_return = 0;
22711 enum rtx_code return_code = UNKNOWN;
22713 /* START_INSN will hold the insn from where we start looking. This is the
22714 first insn after the following code_label if REVERSE is true. */
22715 rtx_insn *start_insn = insn;
22717 /* If in state 4, check if the target branch is reached, in order to
22718 change back to state 0. */
22719 if (arm_ccfsm_state == 4)
22721 if (insn == arm_target_insn)
22723 arm_target_insn = NULL;
22724 arm_ccfsm_state = 0;
22726 return;
22729 /* If in state 3, it is possible to repeat the trick, if this insn is an
22730 unconditional branch to a label, and immediately following this branch
22731 is the previous target label which is only used once, and the label this
22732 branch jumps to is not too far off. */
22733 if (arm_ccfsm_state == 3)
22735 if (simplejump_p (insn))
22737 start_insn = next_nonnote_insn (start_insn);
22738 if (BARRIER_P (start_insn))
22740 /* XXX Isn't this always a barrier? */
22741 start_insn = next_nonnote_insn (start_insn);
22743 if (LABEL_P (start_insn)
22744 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22745 && LABEL_NUSES (start_insn) == 1)
22746 reverse = TRUE;
22747 else
22748 return;
22750 else if (ANY_RETURN_P (body))
22752 start_insn = next_nonnote_insn (start_insn);
22753 if (BARRIER_P (start_insn))
22754 start_insn = next_nonnote_insn (start_insn);
22755 if (LABEL_P (start_insn)
22756 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22757 && LABEL_NUSES (start_insn) == 1)
22759 reverse = TRUE;
22760 seeking_return = 1;
22761 return_code = GET_CODE (body);
22763 else
22764 return;
22766 else
22767 return;
22770 gcc_assert (!arm_ccfsm_state || reverse);
22771 if (!JUMP_P (insn))
22772 return;
22774 /* This jump might be paralleled with a clobber of the condition codes
22775 the jump should always come first */
22776 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22777 body = XVECEXP (body, 0, 0);
22779 if (reverse
22780 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22781 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22783 int insns_skipped;
22784 int fail = FALSE, succeed = FALSE;
22785 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22786 int then_not_else = TRUE;
22787 rtx_insn *this_insn = start_insn;
22788 rtx label = 0;
22790 /* Register the insn jumped to. */
22791 if (reverse)
22793 if (!seeking_return)
22794 label = XEXP (SET_SRC (body), 0);
22796 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22797 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22798 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22800 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22801 then_not_else = FALSE;
22803 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22805 seeking_return = 1;
22806 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22808 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22810 seeking_return = 1;
22811 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22812 then_not_else = FALSE;
22814 else
22815 gcc_unreachable ();
22817 /* See how many insns this branch skips, and what kind of insns. If all
22818 insns are okay, and the label or unconditional branch to the same
22819 label is not too far away, succeed. */
22820 for (insns_skipped = 0;
22821 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22823 rtx scanbody;
22825 this_insn = next_nonnote_insn (this_insn);
22826 if (!this_insn)
22827 break;
22829 switch (GET_CODE (this_insn))
22831 case CODE_LABEL:
22832 /* Succeed if it is the target label, otherwise fail since
22833 control falls in from somewhere else. */
22834 if (this_insn == label)
22836 arm_ccfsm_state = 1;
22837 succeed = TRUE;
22839 else
22840 fail = TRUE;
22841 break;
22843 case BARRIER:
22844 /* Succeed if the following insn is the target label.
22845 Otherwise fail.
22846 If return insns are used then the last insn in a function
22847 will be a barrier. */
22848 this_insn = next_nonnote_insn (this_insn);
22849 if (this_insn && this_insn == label)
22851 arm_ccfsm_state = 1;
22852 succeed = TRUE;
22854 else
22855 fail = TRUE;
22856 break;
22858 case CALL_INSN:
22859 /* The AAPCS says that conditional calls should not be
22860 used since they make interworking inefficient (the
22861 linker can't transform BL<cond> into BLX). That's
22862 only a problem if the machine has BLX. */
22863 if (arm_arch5)
22865 fail = TRUE;
22866 break;
22869 /* Succeed if the following insn is the target label, or
22870 if the following two insns are a barrier and the
22871 target label. */
22872 this_insn = next_nonnote_insn (this_insn);
22873 if (this_insn && BARRIER_P (this_insn))
22874 this_insn = next_nonnote_insn (this_insn);
22876 if (this_insn && this_insn == label
22877 && insns_skipped < max_insns_skipped)
22879 arm_ccfsm_state = 1;
22880 succeed = TRUE;
22882 else
22883 fail = TRUE;
22884 break;
22886 case JUMP_INSN:
22887 /* If this is an unconditional branch to the same label, succeed.
22888 If it is to another label, do nothing. If it is conditional,
22889 fail. */
22890 /* XXX Probably, the tests for SET and the PC are
22891 unnecessary. */
22893 scanbody = PATTERN (this_insn);
22894 if (GET_CODE (scanbody) == SET
22895 && GET_CODE (SET_DEST (scanbody)) == PC)
22897 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22898 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22900 arm_ccfsm_state = 2;
22901 succeed = TRUE;
22903 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22904 fail = TRUE;
22906 /* Fail if a conditional return is undesirable (e.g. on a
22907 StrongARM), but still allow this if optimizing for size. */
22908 else if (GET_CODE (scanbody) == return_code
22909 && !use_return_insn (TRUE, NULL)
22910 && !optimize_size)
22911 fail = TRUE;
22912 else if (GET_CODE (scanbody) == return_code)
22914 arm_ccfsm_state = 2;
22915 succeed = TRUE;
22917 else if (GET_CODE (scanbody) == PARALLEL)
22919 switch (get_attr_conds (this_insn))
22921 case CONDS_NOCOND:
22922 break;
22923 default:
22924 fail = TRUE;
22925 break;
22928 else
22929 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22931 break;
22933 case INSN:
22934 /* Instructions using or affecting the condition codes make it
22935 fail. */
22936 scanbody = PATTERN (this_insn);
22937 if (!(GET_CODE (scanbody) == SET
22938 || GET_CODE (scanbody) == PARALLEL)
22939 || get_attr_conds (this_insn) != CONDS_NOCOND)
22940 fail = TRUE;
22941 break;
22943 default:
22944 break;
22947 if (succeed)
22949 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22950 arm_target_label = CODE_LABEL_NUMBER (label);
22951 else
22953 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22955 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22957 this_insn = next_nonnote_insn (this_insn);
22958 gcc_assert (!this_insn
22959 || (!BARRIER_P (this_insn)
22960 && !LABEL_P (this_insn)));
22962 if (!this_insn)
22964 /* Oh, dear! we ran off the end.. give up. */
22965 extract_constrain_insn_cached (insn);
22966 arm_ccfsm_state = 0;
22967 arm_target_insn = NULL;
22968 return;
22970 arm_target_insn = this_insn;
22973 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22974 what it was. */
22975 if (!reverse)
22976 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22978 if (reverse || then_not_else)
22979 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22982 /* Restore recog_data (getting the attributes of other insns can
22983 destroy this array, but final.c assumes that it remains intact
22984 across this call. */
22985 extract_constrain_insn_cached (insn);
22989 /* Output IT instructions. */
22990 void
22991 thumb2_asm_output_opcode (FILE * stream)
22993 char buff[5];
22994 int n;
22996 if (arm_condexec_mask)
22998 for (n = 0; n < arm_condexec_masklen; n++)
22999 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23000 buff[n] = 0;
23001 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23002 arm_condition_codes[arm_current_cc]);
23003 arm_condexec_mask = 0;
23007 /* Returns true if REGNO is a valid register
23008 for holding a quantity of type MODE. */
23010 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23012 if (GET_MODE_CLASS (mode) == MODE_CC)
23013 return (regno == CC_REGNUM
23014 || (TARGET_HARD_FLOAT && TARGET_VFP
23015 && regno == VFPCC_REGNUM));
23017 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23018 return false;
23020 if (TARGET_THUMB1)
23021 /* For the Thumb we only allow values bigger than SImode in
23022 registers 0 - 6, so that there is always a second low
23023 register available to hold the upper part of the value.
23024 We probably we ought to ensure that the register is the
23025 start of an even numbered register pair. */
23026 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23028 if (TARGET_HARD_FLOAT && TARGET_VFP
23029 && IS_VFP_REGNUM (regno))
23031 if (mode == SFmode || mode == SImode)
23032 return VFP_REGNO_OK_FOR_SINGLE (regno);
23034 if (mode == DFmode)
23035 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23037 /* VFP registers can hold HFmode values, but there is no point in
23038 putting them there unless we have hardware conversion insns. */
23039 if (mode == HFmode)
23040 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23042 if (TARGET_NEON)
23043 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23044 || (VALID_NEON_QREG_MODE (mode)
23045 && NEON_REGNO_OK_FOR_QUAD (regno))
23046 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23047 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23048 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23049 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23050 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23052 return FALSE;
23055 if (TARGET_REALLY_IWMMXT)
23057 if (IS_IWMMXT_GR_REGNUM (regno))
23058 return mode == SImode;
23060 if (IS_IWMMXT_REGNUM (regno))
23061 return VALID_IWMMXT_REG_MODE (mode);
23064 /* We allow almost any value to be stored in the general registers.
23065 Restrict doubleword quantities to even register pairs in ARM state
23066 so that we can use ldrd. Do not allow very large Neon structure
23067 opaque modes in general registers; they would use too many. */
23068 if (regno <= LAST_ARM_REGNUM)
23070 if (ARM_NUM_REGS (mode) > 4)
23071 return FALSE;
23073 if (TARGET_THUMB2)
23074 return TRUE;
23076 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23079 if (regno == FRAME_POINTER_REGNUM
23080 || regno == ARG_POINTER_REGNUM)
23081 /* We only allow integers in the fake hard registers. */
23082 return GET_MODE_CLASS (mode) == MODE_INT;
23084 return FALSE;
23087 /* Implement MODES_TIEABLE_P. */
23089 bool
23090 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23092 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23093 return true;
23095 /* We specifically want to allow elements of "structure" modes to
23096 be tieable to the structure. This more general condition allows
23097 other rarer situations too. */
23098 if (TARGET_NEON
23099 && (VALID_NEON_DREG_MODE (mode1)
23100 || VALID_NEON_QREG_MODE (mode1)
23101 || VALID_NEON_STRUCT_MODE (mode1))
23102 && (VALID_NEON_DREG_MODE (mode2)
23103 || VALID_NEON_QREG_MODE (mode2)
23104 || VALID_NEON_STRUCT_MODE (mode2)))
23105 return true;
23107 return false;
23110 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23111 not used in arm mode. */
23113 enum reg_class
23114 arm_regno_class (int regno)
23116 if (regno == PC_REGNUM)
23117 return NO_REGS;
23119 if (TARGET_THUMB1)
23121 if (regno == STACK_POINTER_REGNUM)
23122 return STACK_REG;
23123 if (regno == CC_REGNUM)
23124 return CC_REG;
23125 if (regno < 8)
23126 return LO_REGS;
23127 return HI_REGS;
23130 if (TARGET_THUMB2 && regno < 8)
23131 return LO_REGS;
23133 if ( regno <= LAST_ARM_REGNUM
23134 || regno == FRAME_POINTER_REGNUM
23135 || regno == ARG_POINTER_REGNUM)
23136 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23138 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23139 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23141 if (IS_VFP_REGNUM (regno))
23143 if (regno <= D7_VFP_REGNUM)
23144 return VFP_D0_D7_REGS;
23145 else if (regno <= LAST_LO_VFP_REGNUM)
23146 return VFP_LO_REGS;
23147 else
23148 return VFP_HI_REGS;
23151 if (IS_IWMMXT_REGNUM (regno))
23152 return IWMMXT_REGS;
23154 if (IS_IWMMXT_GR_REGNUM (regno))
23155 return IWMMXT_GR_REGS;
23157 return NO_REGS;
23160 /* Handle a special case when computing the offset
23161 of an argument from the frame pointer. */
23163 arm_debugger_arg_offset (int value, rtx addr)
23165 rtx_insn *insn;
23167 /* We are only interested if dbxout_parms() failed to compute the offset. */
23168 if (value != 0)
23169 return 0;
23171 /* We can only cope with the case where the address is held in a register. */
23172 if (!REG_P (addr))
23173 return 0;
23175 /* If we are using the frame pointer to point at the argument, then
23176 an offset of 0 is correct. */
23177 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23178 return 0;
23180 /* If we are using the stack pointer to point at the
23181 argument, then an offset of 0 is correct. */
23182 /* ??? Check this is consistent with thumb2 frame layout. */
23183 if ((TARGET_THUMB || !frame_pointer_needed)
23184 && REGNO (addr) == SP_REGNUM)
23185 return 0;
23187 /* Oh dear. The argument is pointed to by a register rather
23188 than being held in a register, or being stored at a known
23189 offset from the frame pointer. Since GDB only understands
23190 those two kinds of argument we must translate the address
23191 held in the register into an offset from the frame pointer.
23192 We do this by searching through the insns for the function
23193 looking to see where this register gets its value. If the
23194 register is initialized from the frame pointer plus an offset
23195 then we are in luck and we can continue, otherwise we give up.
23197 This code is exercised by producing debugging information
23198 for a function with arguments like this:
23200 double func (double a, double b, int c, double d) {return d;}
23202 Without this code the stab for parameter 'd' will be set to
23203 an offset of 0 from the frame pointer, rather than 8. */
23205 /* The if() statement says:
23207 If the insn is a normal instruction
23208 and if the insn is setting the value in a register
23209 and if the register being set is the register holding the address of the argument
23210 and if the address is computing by an addition
23211 that involves adding to a register
23212 which is the frame pointer
23213 a constant integer
23215 then... */
23217 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23219 if ( NONJUMP_INSN_P (insn)
23220 && GET_CODE (PATTERN (insn)) == SET
23221 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23222 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23223 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23224 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23225 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23228 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23230 break;
23234 if (value == 0)
23236 debug_rtx (addr);
23237 warning (0, "unable to compute real location of stacked parameter");
23238 value = 8; /* XXX magic hack */
23241 return value;
23244 typedef enum {
23245 T_V8QI,
23246 T_V4HI,
23247 T_V4HF,
23248 T_V2SI,
23249 T_V2SF,
23250 T_DI,
23251 T_V16QI,
23252 T_V8HI,
23253 T_V4SI,
23254 T_V4SF,
23255 T_V2DI,
23256 T_TI,
23257 T_EI,
23258 T_OI,
23259 T_MAX /* Size of enum. Keep last. */
23260 } neon_builtin_type_mode;
23262 #define TYPE_MODE_BIT(X) (1 << (X))
23264 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23265 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23266 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23267 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23268 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23269 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23271 #define v8qi_UP T_V8QI
23272 #define v4hi_UP T_V4HI
23273 #define v4hf_UP T_V4HF
23274 #define v2si_UP T_V2SI
23275 #define v2sf_UP T_V2SF
23276 #define di_UP T_DI
23277 #define v16qi_UP T_V16QI
23278 #define v8hi_UP T_V8HI
23279 #define v4si_UP T_V4SI
23280 #define v4sf_UP T_V4SF
23281 #define v2di_UP T_V2DI
23282 #define ti_UP T_TI
23283 #define ei_UP T_EI
23284 #define oi_UP T_OI
23286 #define UP(X) X##_UP
23288 typedef enum {
23289 NEON_BINOP,
23290 NEON_TERNOP,
23291 NEON_UNOP,
23292 NEON_BSWAP,
23293 NEON_GETLANE,
23294 NEON_SETLANE,
23295 NEON_CREATE,
23296 NEON_RINT,
23297 NEON_COPYSIGNF,
23298 NEON_DUP,
23299 NEON_DUPLANE,
23300 NEON_COMBINE,
23301 NEON_SPLIT,
23302 NEON_LANEMUL,
23303 NEON_LANEMULL,
23304 NEON_LANEMULH,
23305 NEON_LANEMAC,
23306 NEON_SCALARMUL,
23307 NEON_SCALARMULL,
23308 NEON_SCALARMULH,
23309 NEON_SCALARMAC,
23310 NEON_CONVERT,
23311 NEON_FLOAT_WIDEN,
23312 NEON_FLOAT_NARROW,
23313 NEON_FIXCONV,
23314 NEON_SELECT,
23315 NEON_REINTERP,
23316 NEON_VTBL,
23317 NEON_VTBX,
23318 NEON_LOAD1,
23319 NEON_LOAD1LANE,
23320 NEON_STORE1,
23321 NEON_STORE1LANE,
23322 NEON_LOADSTRUCT,
23323 NEON_LOADSTRUCTLANE,
23324 NEON_STORESTRUCT,
23325 NEON_STORESTRUCTLANE,
23326 NEON_LOGICBINOP,
23327 NEON_SHIFTINSERT,
23328 NEON_SHIFTIMM,
23329 NEON_SHIFTACC
23330 } neon_itype;
23332 typedef struct {
23333 const char *name;
23334 const neon_itype itype;
23335 const neon_builtin_type_mode mode;
23336 const enum insn_code code;
23337 unsigned int fcode;
23338 } neon_builtin_datum;
23340 #define CF(N,X) CODE_FOR_neon_##N##X
23342 #define VAR1(T, N, A) \
23343 {#N, NEON_##T, UP (A), CF (N, A), 0}
23344 #define VAR2(T, N, A, B) \
23345 VAR1 (T, N, A), \
23346 {#N, NEON_##T, UP (B), CF (N, B), 0}
23347 #define VAR3(T, N, A, B, C) \
23348 VAR2 (T, N, A, B), \
23349 {#N, NEON_##T, UP (C), CF (N, C), 0}
23350 #define VAR4(T, N, A, B, C, D) \
23351 VAR3 (T, N, A, B, C), \
23352 {#N, NEON_##T, UP (D), CF (N, D), 0}
23353 #define VAR5(T, N, A, B, C, D, E) \
23354 VAR4 (T, N, A, B, C, D), \
23355 {#N, NEON_##T, UP (E), CF (N, E), 0}
23356 #define VAR6(T, N, A, B, C, D, E, F) \
23357 VAR5 (T, N, A, B, C, D, E), \
23358 {#N, NEON_##T, UP (F), CF (N, F), 0}
23359 #define VAR7(T, N, A, B, C, D, E, F, G) \
23360 VAR6 (T, N, A, B, C, D, E, F), \
23361 {#N, NEON_##T, UP (G), CF (N, G), 0}
23362 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23363 VAR7 (T, N, A, B, C, D, E, F, G), \
23364 {#N, NEON_##T, UP (H), CF (N, H), 0}
23365 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23366 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23367 {#N, NEON_##T, UP (I), CF (N, I), 0}
23368 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23369 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23370 {#N, NEON_##T, UP (J), CF (N, J), 0}
23372 /* The NEON builtin data can be found in arm_neon_builtins.def.
23373 The mode entries in the following table correspond to the "key" type of the
23374 instruction variant, i.e. equivalent to that which would be specified after
23375 the assembler mnemonic, which usually refers to the last vector operand.
23376 (Signed/unsigned/polynomial types are not differentiated between though, and
23377 are all mapped onto the same mode for a given element size.) The modes
23378 listed per instruction should be the same as those defined for that
23379 instruction's pattern in neon.md. */
23381 static neon_builtin_datum neon_builtin_data[] =
23383 #include "arm_neon_builtins.def"
23386 #undef CF
23387 #undef VAR1
23388 #undef VAR2
23389 #undef VAR3
23390 #undef VAR4
23391 #undef VAR5
23392 #undef VAR6
23393 #undef VAR7
23394 #undef VAR8
23395 #undef VAR9
23396 #undef VAR10
23398 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23399 #define VAR1(T, N, A) \
23400 CF (N, A)
23401 #define VAR2(T, N, A, B) \
23402 VAR1 (T, N, A), \
23403 CF (N, B)
23404 #define VAR3(T, N, A, B, C) \
23405 VAR2 (T, N, A, B), \
23406 CF (N, C)
23407 #define VAR4(T, N, A, B, C, D) \
23408 VAR3 (T, N, A, B, C), \
23409 CF (N, D)
23410 #define VAR5(T, N, A, B, C, D, E) \
23411 VAR4 (T, N, A, B, C, D), \
23412 CF (N, E)
23413 #define VAR6(T, N, A, B, C, D, E, F) \
23414 VAR5 (T, N, A, B, C, D, E), \
23415 CF (N, F)
23416 #define VAR7(T, N, A, B, C, D, E, F, G) \
23417 VAR6 (T, N, A, B, C, D, E, F), \
23418 CF (N, G)
23419 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23420 VAR7 (T, N, A, B, C, D, E, F, G), \
23421 CF (N, H)
23422 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23423 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23424 CF (N, I)
23425 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23426 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23427 CF (N, J)
23428 enum arm_builtins
23430 ARM_BUILTIN_GETWCGR0,
23431 ARM_BUILTIN_GETWCGR1,
23432 ARM_BUILTIN_GETWCGR2,
23433 ARM_BUILTIN_GETWCGR3,
23435 ARM_BUILTIN_SETWCGR0,
23436 ARM_BUILTIN_SETWCGR1,
23437 ARM_BUILTIN_SETWCGR2,
23438 ARM_BUILTIN_SETWCGR3,
23440 ARM_BUILTIN_WZERO,
23442 ARM_BUILTIN_WAVG2BR,
23443 ARM_BUILTIN_WAVG2HR,
23444 ARM_BUILTIN_WAVG2B,
23445 ARM_BUILTIN_WAVG2H,
23447 ARM_BUILTIN_WACCB,
23448 ARM_BUILTIN_WACCH,
23449 ARM_BUILTIN_WACCW,
23451 ARM_BUILTIN_WMACS,
23452 ARM_BUILTIN_WMACSZ,
23453 ARM_BUILTIN_WMACU,
23454 ARM_BUILTIN_WMACUZ,
23456 ARM_BUILTIN_WSADB,
23457 ARM_BUILTIN_WSADBZ,
23458 ARM_BUILTIN_WSADH,
23459 ARM_BUILTIN_WSADHZ,
23461 ARM_BUILTIN_WALIGNI,
23462 ARM_BUILTIN_WALIGNR0,
23463 ARM_BUILTIN_WALIGNR1,
23464 ARM_BUILTIN_WALIGNR2,
23465 ARM_BUILTIN_WALIGNR3,
23467 ARM_BUILTIN_TMIA,
23468 ARM_BUILTIN_TMIAPH,
23469 ARM_BUILTIN_TMIABB,
23470 ARM_BUILTIN_TMIABT,
23471 ARM_BUILTIN_TMIATB,
23472 ARM_BUILTIN_TMIATT,
23474 ARM_BUILTIN_TMOVMSKB,
23475 ARM_BUILTIN_TMOVMSKH,
23476 ARM_BUILTIN_TMOVMSKW,
23478 ARM_BUILTIN_TBCSTB,
23479 ARM_BUILTIN_TBCSTH,
23480 ARM_BUILTIN_TBCSTW,
23482 ARM_BUILTIN_WMADDS,
23483 ARM_BUILTIN_WMADDU,
23485 ARM_BUILTIN_WPACKHSS,
23486 ARM_BUILTIN_WPACKWSS,
23487 ARM_BUILTIN_WPACKDSS,
23488 ARM_BUILTIN_WPACKHUS,
23489 ARM_BUILTIN_WPACKWUS,
23490 ARM_BUILTIN_WPACKDUS,
23492 ARM_BUILTIN_WADDB,
23493 ARM_BUILTIN_WADDH,
23494 ARM_BUILTIN_WADDW,
23495 ARM_BUILTIN_WADDSSB,
23496 ARM_BUILTIN_WADDSSH,
23497 ARM_BUILTIN_WADDSSW,
23498 ARM_BUILTIN_WADDUSB,
23499 ARM_BUILTIN_WADDUSH,
23500 ARM_BUILTIN_WADDUSW,
23501 ARM_BUILTIN_WSUBB,
23502 ARM_BUILTIN_WSUBH,
23503 ARM_BUILTIN_WSUBW,
23504 ARM_BUILTIN_WSUBSSB,
23505 ARM_BUILTIN_WSUBSSH,
23506 ARM_BUILTIN_WSUBSSW,
23507 ARM_BUILTIN_WSUBUSB,
23508 ARM_BUILTIN_WSUBUSH,
23509 ARM_BUILTIN_WSUBUSW,
23511 ARM_BUILTIN_WAND,
23512 ARM_BUILTIN_WANDN,
23513 ARM_BUILTIN_WOR,
23514 ARM_BUILTIN_WXOR,
23516 ARM_BUILTIN_WCMPEQB,
23517 ARM_BUILTIN_WCMPEQH,
23518 ARM_BUILTIN_WCMPEQW,
23519 ARM_BUILTIN_WCMPGTUB,
23520 ARM_BUILTIN_WCMPGTUH,
23521 ARM_BUILTIN_WCMPGTUW,
23522 ARM_BUILTIN_WCMPGTSB,
23523 ARM_BUILTIN_WCMPGTSH,
23524 ARM_BUILTIN_WCMPGTSW,
23526 ARM_BUILTIN_TEXTRMSB,
23527 ARM_BUILTIN_TEXTRMSH,
23528 ARM_BUILTIN_TEXTRMSW,
23529 ARM_BUILTIN_TEXTRMUB,
23530 ARM_BUILTIN_TEXTRMUH,
23531 ARM_BUILTIN_TEXTRMUW,
23532 ARM_BUILTIN_TINSRB,
23533 ARM_BUILTIN_TINSRH,
23534 ARM_BUILTIN_TINSRW,
23536 ARM_BUILTIN_WMAXSW,
23537 ARM_BUILTIN_WMAXSH,
23538 ARM_BUILTIN_WMAXSB,
23539 ARM_BUILTIN_WMAXUW,
23540 ARM_BUILTIN_WMAXUH,
23541 ARM_BUILTIN_WMAXUB,
23542 ARM_BUILTIN_WMINSW,
23543 ARM_BUILTIN_WMINSH,
23544 ARM_BUILTIN_WMINSB,
23545 ARM_BUILTIN_WMINUW,
23546 ARM_BUILTIN_WMINUH,
23547 ARM_BUILTIN_WMINUB,
23549 ARM_BUILTIN_WMULUM,
23550 ARM_BUILTIN_WMULSM,
23551 ARM_BUILTIN_WMULUL,
23553 ARM_BUILTIN_PSADBH,
23554 ARM_BUILTIN_WSHUFH,
23556 ARM_BUILTIN_WSLLH,
23557 ARM_BUILTIN_WSLLW,
23558 ARM_BUILTIN_WSLLD,
23559 ARM_BUILTIN_WSRAH,
23560 ARM_BUILTIN_WSRAW,
23561 ARM_BUILTIN_WSRAD,
23562 ARM_BUILTIN_WSRLH,
23563 ARM_BUILTIN_WSRLW,
23564 ARM_BUILTIN_WSRLD,
23565 ARM_BUILTIN_WRORH,
23566 ARM_BUILTIN_WRORW,
23567 ARM_BUILTIN_WRORD,
23568 ARM_BUILTIN_WSLLHI,
23569 ARM_BUILTIN_WSLLWI,
23570 ARM_BUILTIN_WSLLDI,
23571 ARM_BUILTIN_WSRAHI,
23572 ARM_BUILTIN_WSRAWI,
23573 ARM_BUILTIN_WSRADI,
23574 ARM_BUILTIN_WSRLHI,
23575 ARM_BUILTIN_WSRLWI,
23576 ARM_BUILTIN_WSRLDI,
23577 ARM_BUILTIN_WRORHI,
23578 ARM_BUILTIN_WRORWI,
23579 ARM_BUILTIN_WRORDI,
23581 ARM_BUILTIN_WUNPCKIHB,
23582 ARM_BUILTIN_WUNPCKIHH,
23583 ARM_BUILTIN_WUNPCKIHW,
23584 ARM_BUILTIN_WUNPCKILB,
23585 ARM_BUILTIN_WUNPCKILH,
23586 ARM_BUILTIN_WUNPCKILW,
23588 ARM_BUILTIN_WUNPCKEHSB,
23589 ARM_BUILTIN_WUNPCKEHSH,
23590 ARM_BUILTIN_WUNPCKEHSW,
23591 ARM_BUILTIN_WUNPCKEHUB,
23592 ARM_BUILTIN_WUNPCKEHUH,
23593 ARM_BUILTIN_WUNPCKEHUW,
23594 ARM_BUILTIN_WUNPCKELSB,
23595 ARM_BUILTIN_WUNPCKELSH,
23596 ARM_BUILTIN_WUNPCKELSW,
23597 ARM_BUILTIN_WUNPCKELUB,
23598 ARM_BUILTIN_WUNPCKELUH,
23599 ARM_BUILTIN_WUNPCKELUW,
23601 ARM_BUILTIN_WABSB,
23602 ARM_BUILTIN_WABSH,
23603 ARM_BUILTIN_WABSW,
23605 ARM_BUILTIN_WADDSUBHX,
23606 ARM_BUILTIN_WSUBADDHX,
23608 ARM_BUILTIN_WABSDIFFB,
23609 ARM_BUILTIN_WABSDIFFH,
23610 ARM_BUILTIN_WABSDIFFW,
23612 ARM_BUILTIN_WADDCH,
23613 ARM_BUILTIN_WADDCW,
23615 ARM_BUILTIN_WAVG4,
23616 ARM_BUILTIN_WAVG4R,
23618 ARM_BUILTIN_WMADDSX,
23619 ARM_BUILTIN_WMADDUX,
23621 ARM_BUILTIN_WMADDSN,
23622 ARM_BUILTIN_WMADDUN,
23624 ARM_BUILTIN_WMULWSM,
23625 ARM_BUILTIN_WMULWUM,
23627 ARM_BUILTIN_WMULWSMR,
23628 ARM_BUILTIN_WMULWUMR,
23630 ARM_BUILTIN_WMULWL,
23632 ARM_BUILTIN_WMULSMR,
23633 ARM_BUILTIN_WMULUMR,
23635 ARM_BUILTIN_WQMULM,
23636 ARM_BUILTIN_WQMULMR,
23638 ARM_BUILTIN_WQMULWM,
23639 ARM_BUILTIN_WQMULWMR,
23641 ARM_BUILTIN_WADDBHUSM,
23642 ARM_BUILTIN_WADDBHUSL,
23644 ARM_BUILTIN_WQMIABB,
23645 ARM_BUILTIN_WQMIABT,
23646 ARM_BUILTIN_WQMIATB,
23647 ARM_BUILTIN_WQMIATT,
23649 ARM_BUILTIN_WQMIABBN,
23650 ARM_BUILTIN_WQMIABTN,
23651 ARM_BUILTIN_WQMIATBN,
23652 ARM_BUILTIN_WQMIATTN,
23654 ARM_BUILTIN_WMIABB,
23655 ARM_BUILTIN_WMIABT,
23656 ARM_BUILTIN_WMIATB,
23657 ARM_BUILTIN_WMIATT,
23659 ARM_BUILTIN_WMIABBN,
23660 ARM_BUILTIN_WMIABTN,
23661 ARM_BUILTIN_WMIATBN,
23662 ARM_BUILTIN_WMIATTN,
23664 ARM_BUILTIN_WMIAWBB,
23665 ARM_BUILTIN_WMIAWBT,
23666 ARM_BUILTIN_WMIAWTB,
23667 ARM_BUILTIN_WMIAWTT,
23669 ARM_BUILTIN_WMIAWBBN,
23670 ARM_BUILTIN_WMIAWBTN,
23671 ARM_BUILTIN_WMIAWTBN,
23672 ARM_BUILTIN_WMIAWTTN,
23674 ARM_BUILTIN_WMERGE,
23676 ARM_BUILTIN_CRC32B,
23677 ARM_BUILTIN_CRC32H,
23678 ARM_BUILTIN_CRC32W,
23679 ARM_BUILTIN_CRC32CB,
23680 ARM_BUILTIN_CRC32CH,
23681 ARM_BUILTIN_CRC32CW,
23683 ARM_BUILTIN_GET_FPSCR,
23684 ARM_BUILTIN_SET_FPSCR,
23686 #undef CRYPTO1
23687 #undef CRYPTO2
23688 #undef CRYPTO3
23690 #define CRYPTO1(L, U, M1, M2) \
23691 ARM_BUILTIN_CRYPTO_##U,
23692 #define CRYPTO2(L, U, M1, M2, M3) \
23693 ARM_BUILTIN_CRYPTO_##U,
23694 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23695 ARM_BUILTIN_CRYPTO_##U,
23697 #include "crypto.def"
23699 #undef CRYPTO1
23700 #undef CRYPTO2
23701 #undef CRYPTO3
23703 #include "arm_neon_builtins.def"
23705 ,ARM_BUILTIN_MAX
23708 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23710 #undef CF
23711 #undef VAR1
23712 #undef VAR2
23713 #undef VAR3
23714 #undef VAR4
23715 #undef VAR5
23716 #undef VAR6
23717 #undef VAR7
23718 #undef VAR8
23719 #undef VAR9
23720 #undef VAR10
23722 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23724 #define NUM_DREG_TYPES 5
23725 #define NUM_QREG_TYPES 6
23727 static void
23728 arm_init_neon_builtins (void)
23730 unsigned int i, fcode;
23731 tree decl;
23733 tree neon_intQI_type_node;
23734 tree neon_intHI_type_node;
23735 tree neon_floatHF_type_node;
23736 tree neon_polyQI_type_node;
23737 tree neon_polyHI_type_node;
23738 tree neon_intSI_type_node;
23739 tree neon_intDI_type_node;
23740 tree neon_intUTI_type_node;
23741 tree neon_float_type_node;
23743 tree intQI_pointer_node;
23744 tree intHI_pointer_node;
23745 tree intSI_pointer_node;
23746 tree intDI_pointer_node;
23747 tree float_pointer_node;
23749 tree const_intQI_node;
23750 tree const_intHI_node;
23751 tree const_intSI_node;
23752 tree const_intDI_node;
23753 tree const_float_node;
23755 tree const_intQI_pointer_node;
23756 tree const_intHI_pointer_node;
23757 tree const_intSI_pointer_node;
23758 tree const_intDI_pointer_node;
23759 tree const_float_pointer_node;
23761 tree V8QI_type_node;
23762 tree V4HI_type_node;
23763 tree V4UHI_type_node;
23764 tree V4HF_type_node;
23765 tree V2SI_type_node;
23766 tree V2USI_type_node;
23767 tree V2SF_type_node;
23768 tree V16QI_type_node;
23769 tree V8HI_type_node;
23770 tree V8UHI_type_node;
23771 tree V4SI_type_node;
23772 tree V4USI_type_node;
23773 tree V4SF_type_node;
23774 tree V2DI_type_node;
23775 tree V2UDI_type_node;
23777 tree intUQI_type_node;
23778 tree intUHI_type_node;
23779 tree intUSI_type_node;
23780 tree intUDI_type_node;
23782 tree intEI_type_node;
23783 tree intOI_type_node;
23784 tree intCI_type_node;
23785 tree intXI_type_node;
23787 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23788 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23789 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23791 /* Create distinguished type nodes for NEON vector element types,
23792 and pointers to values of such types, so we can detect them later. */
23793 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23794 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23795 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23796 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23797 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23798 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23799 neon_float_type_node = make_node (REAL_TYPE);
23800 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23801 layout_type (neon_float_type_node);
23802 neon_floatHF_type_node = make_node (REAL_TYPE);
23803 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23804 layout_type (neon_floatHF_type_node);
23806 /* Define typedefs which exactly correspond to the modes we are basing vector
23807 types on. If you change these names you'll need to change
23808 the table used by arm_mangle_type too. */
23809 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23810 "__builtin_neon_qi");
23811 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23812 "__builtin_neon_hi");
23813 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23814 "__builtin_neon_hf");
23815 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23816 "__builtin_neon_si");
23817 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23818 "__builtin_neon_sf");
23819 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23820 "__builtin_neon_di");
23821 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23822 "__builtin_neon_poly8");
23823 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23824 "__builtin_neon_poly16");
23826 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23827 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23828 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23829 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23830 float_pointer_node = build_pointer_type (neon_float_type_node);
23832 /* Next create constant-qualified versions of the above types. */
23833 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23834 TYPE_QUAL_CONST);
23835 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23836 TYPE_QUAL_CONST);
23837 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23838 TYPE_QUAL_CONST);
23839 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23840 TYPE_QUAL_CONST);
23841 const_float_node = build_qualified_type (neon_float_type_node,
23842 TYPE_QUAL_CONST);
23844 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23845 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23846 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23847 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23848 const_float_pointer_node = build_pointer_type (const_float_node);
23850 /* Unsigned integer types for various mode sizes. */
23851 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23852 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23853 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23854 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23855 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23856 /* Now create vector types based on our NEON element types. */
23857 /* 64-bit vectors. */
23858 V8QI_type_node =
23859 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23860 V4HI_type_node =
23861 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23862 V4UHI_type_node =
23863 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23864 V4HF_type_node =
23865 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23866 V2SI_type_node =
23867 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23868 V2USI_type_node =
23869 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23870 V2SF_type_node =
23871 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23872 /* 128-bit vectors. */
23873 V16QI_type_node =
23874 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23875 V8HI_type_node =
23876 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23877 V8UHI_type_node =
23878 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23879 V4SI_type_node =
23880 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23881 V4USI_type_node =
23882 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23883 V4SF_type_node =
23884 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23885 V2DI_type_node =
23886 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23887 V2UDI_type_node =
23888 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23891 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23892 "__builtin_neon_uqi");
23893 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23894 "__builtin_neon_uhi");
23895 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23896 "__builtin_neon_usi");
23897 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23898 "__builtin_neon_udi");
23899 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23900 "__builtin_neon_poly64");
23901 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23902 "__builtin_neon_poly128");
23904 /* Opaque integer types for structures of vectors. */
23905 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23906 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23907 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23908 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23910 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23911 "__builtin_neon_ti");
23912 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23913 "__builtin_neon_ei");
23914 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23915 "__builtin_neon_oi");
23916 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23917 "__builtin_neon_ci");
23918 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23919 "__builtin_neon_xi");
23921 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23924 tree V16UQI_type_node =
23925 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23927 tree v16uqi_ftype_v16uqi
23928 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23930 tree v16uqi_ftype_v16uqi_v16uqi
23931 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23932 V16UQI_type_node, NULL_TREE);
23934 tree v4usi_ftype_v4usi
23935 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23937 tree v4usi_ftype_v4usi_v4usi
23938 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23939 V4USI_type_node, NULL_TREE);
23941 tree v4usi_ftype_v4usi_v4usi_v4usi
23942 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23943 V4USI_type_node, V4USI_type_node, NULL_TREE);
23945 tree uti_ftype_udi_udi
23946 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23947 intUDI_type_node, NULL_TREE);
23949 #undef CRYPTO1
23950 #undef CRYPTO2
23951 #undef CRYPTO3
23952 #undef C
23953 #undef N
23954 #undef CF
23955 #undef FT1
23956 #undef FT2
23957 #undef FT3
23959 #define C(U) \
23960 ARM_BUILTIN_CRYPTO_##U
23961 #define N(L) \
23962 "__builtin_arm_crypto_"#L
23963 #define FT1(R, A) \
23964 R##_ftype_##A
23965 #define FT2(R, A1, A2) \
23966 R##_ftype_##A1##_##A2
23967 #define FT3(R, A1, A2, A3) \
23968 R##_ftype_##A1##_##A2##_##A3
23969 #define CRYPTO1(L, U, R, A) \
23970 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23971 C (U), BUILT_IN_MD, \
23972 NULL, NULL_TREE);
23973 #define CRYPTO2(L, U, R, A1, A2) \
23974 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23975 C (U), BUILT_IN_MD, \
23976 NULL, NULL_TREE);
23978 #define CRYPTO3(L, U, R, A1, A2, A3) \
23979 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23980 C (U), BUILT_IN_MD, \
23981 NULL, NULL_TREE);
23982 #include "crypto.def"
23984 #undef CRYPTO1
23985 #undef CRYPTO2
23986 #undef CRYPTO3
23987 #undef C
23988 #undef N
23989 #undef FT1
23990 #undef FT2
23991 #undef FT3
23993 dreg_types[0] = V8QI_type_node;
23994 dreg_types[1] = V4HI_type_node;
23995 dreg_types[2] = V2SI_type_node;
23996 dreg_types[3] = V2SF_type_node;
23997 dreg_types[4] = neon_intDI_type_node;
23999 qreg_types[0] = V16QI_type_node;
24000 qreg_types[1] = V8HI_type_node;
24001 qreg_types[2] = V4SI_type_node;
24002 qreg_types[3] = V4SF_type_node;
24003 qreg_types[4] = V2DI_type_node;
24004 qreg_types[5] = neon_intUTI_type_node;
24006 for (i = 0; i < NUM_QREG_TYPES; i++)
24008 int j;
24009 for (j = 0; j < NUM_QREG_TYPES; j++)
24011 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24012 reinterp_ftype_dreg[i][j]
24013 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24015 reinterp_ftype_qreg[i][j]
24016 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24020 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24021 i < ARRAY_SIZE (neon_builtin_data);
24022 i++, fcode++)
24024 neon_builtin_datum *d = &neon_builtin_data[i];
24026 const char* const modenames[] = {
24027 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24028 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24029 "ti", "ei", "oi"
24031 char namebuf[60];
24032 tree ftype = NULL;
24033 int is_load = 0, is_store = 0;
24035 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24037 d->fcode = fcode;
24039 switch (d->itype)
24041 case NEON_LOAD1:
24042 case NEON_LOAD1LANE:
24043 case NEON_LOADSTRUCT:
24044 case NEON_LOADSTRUCTLANE:
24045 is_load = 1;
24046 /* Fall through. */
24047 case NEON_STORE1:
24048 case NEON_STORE1LANE:
24049 case NEON_STORESTRUCT:
24050 case NEON_STORESTRUCTLANE:
24051 if (!is_load)
24052 is_store = 1;
24053 /* Fall through. */
24054 case NEON_UNOP:
24055 case NEON_RINT:
24056 case NEON_BINOP:
24057 case NEON_LOGICBINOP:
24058 case NEON_SHIFTINSERT:
24059 case NEON_TERNOP:
24060 case NEON_GETLANE:
24061 case NEON_SETLANE:
24062 case NEON_CREATE:
24063 case NEON_DUP:
24064 case NEON_DUPLANE:
24065 case NEON_SHIFTIMM:
24066 case NEON_SHIFTACC:
24067 case NEON_COMBINE:
24068 case NEON_SPLIT:
24069 case NEON_CONVERT:
24070 case NEON_FIXCONV:
24071 case NEON_LANEMUL:
24072 case NEON_LANEMULL:
24073 case NEON_LANEMULH:
24074 case NEON_LANEMAC:
24075 case NEON_SCALARMUL:
24076 case NEON_SCALARMULL:
24077 case NEON_SCALARMULH:
24078 case NEON_SCALARMAC:
24079 case NEON_SELECT:
24080 case NEON_VTBL:
24081 case NEON_VTBX:
24083 int k;
24084 tree return_type = void_type_node, args = void_list_node;
24086 /* Build a function type directly from the insn_data for
24087 this builtin. The build_function_type() function takes
24088 care of removing duplicates for us. */
24089 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24091 tree eltype;
24093 if (is_load && k == 1)
24095 /* Neon load patterns always have the memory
24096 operand in the operand 1 position. */
24097 gcc_assert (insn_data[d->code].operand[k].predicate
24098 == neon_struct_operand);
24100 switch (d->mode)
24102 case T_V8QI:
24103 case T_V16QI:
24104 eltype = const_intQI_pointer_node;
24105 break;
24107 case T_V4HI:
24108 case T_V8HI:
24109 eltype = const_intHI_pointer_node;
24110 break;
24112 case T_V2SI:
24113 case T_V4SI:
24114 eltype = const_intSI_pointer_node;
24115 break;
24117 case T_V2SF:
24118 case T_V4SF:
24119 eltype = const_float_pointer_node;
24120 break;
24122 case T_DI:
24123 case T_V2DI:
24124 eltype = const_intDI_pointer_node;
24125 break;
24127 default: gcc_unreachable ();
24130 else if (is_store && k == 0)
24132 /* Similarly, Neon store patterns use operand 0 as
24133 the memory location to store to. */
24134 gcc_assert (insn_data[d->code].operand[k].predicate
24135 == neon_struct_operand);
24137 switch (d->mode)
24139 case T_V8QI:
24140 case T_V16QI:
24141 eltype = intQI_pointer_node;
24142 break;
24144 case T_V4HI:
24145 case T_V8HI:
24146 eltype = intHI_pointer_node;
24147 break;
24149 case T_V2SI:
24150 case T_V4SI:
24151 eltype = intSI_pointer_node;
24152 break;
24154 case T_V2SF:
24155 case T_V4SF:
24156 eltype = float_pointer_node;
24157 break;
24159 case T_DI:
24160 case T_V2DI:
24161 eltype = intDI_pointer_node;
24162 break;
24164 default: gcc_unreachable ();
24167 else
24169 switch (insn_data[d->code].operand[k].mode)
24171 case VOIDmode: eltype = void_type_node; break;
24172 /* Scalars. */
24173 case QImode: eltype = neon_intQI_type_node; break;
24174 case HImode: eltype = neon_intHI_type_node; break;
24175 case SImode: eltype = neon_intSI_type_node; break;
24176 case SFmode: eltype = neon_float_type_node; break;
24177 case DImode: eltype = neon_intDI_type_node; break;
24178 case TImode: eltype = intTI_type_node; break;
24179 case EImode: eltype = intEI_type_node; break;
24180 case OImode: eltype = intOI_type_node; break;
24181 case CImode: eltype = intCI_type_node; break;
24182 case XImode: eltype = intXI_type_node; break;
24183 /* 64-bit vectors. */
24184 case V8QImode: eltype = V8QI_type_node; break;
24185 case V4HImode: eltype = V4HI_type_node; break;
24186 case V2SImode: eltype = V2SI_type_node; break;
24187 case V2SFmode: eltype = V2SF_type_node; break;
24188 /* 128-bit vectors. */
24189 case V16QImode: eltype = V16QI_type_node; break;
24190 case V8HImode: eltype = V8HI_type_node; break;
24191 case V4SImode: eltype = V4SI_type_node; break;
24192 case V4SFmode: eltype = V4SF_type_node; break;
24193 case V2DImode: eltype = V2DI_type_node; break;
24194 default: gcc_unreachable ();
24198 if (k == 0 && !is_store)
24199 return_type = eltype;
24200 else
24201 args = tree_cons (NULL_TREE, eltype, args);
24204 ftype = build_function_type (return_type, args);
24206 break;
24208 case NEON_REINTERP:
24210 /* We iterate over NUM_DREG_TYPES doubleword types,
24211 then NUM_QREG_TYPES quadword types.
24212 V4HF is not a type used in reinterpret, so we translate
24213 d->mode to the correct index in reinterp_ftype_dreg. */
24214 bool qreg_p
24215 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24216 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24217 % NUM_QREG_TYPES;
24218 switch (insn_data[d->code].operand[0].mode)
24220 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24221 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24222 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24223 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24224 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24225 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24226 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24227 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24228 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24229 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24230 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24231 default: gcc_unreachable ();
24234 break;
24235 case NEON_FLOAT_WIDEN:
24237 tree eltype = NULL_TREE;
24238 tree return_type = NULL_TREE;
24240 switch (insn_data[d->code].operand[1].mode)
24242 case V4HFmode:
24243 eltype = V4HF_type_node;
24244 return_type = V4SF_type_node;
24245 break;
24246 default: gcc_unreachable ();
24248 ftype = build_function_type_list (return_type, eltype, NULL);
24249 break;
24251 case NEON_FLOAT_NARROW:
24253 tree eltype = NULL_TREE;
24254 tree return_type = NULL_TREE;
24256 switch (insn_data[d->code].operand[1].mode)
24258 case V4SFmode:
24259 eltype = V4SF_type_node;
24260 return_type = V4HF_type_node;
24261 break;
24262 default: gcc_unreachable ();
24264 ftype = build_function_type_list (return_type, eltype, NULL);
24265 break;
24267 case NEON_BSWAP:
24269 tree eltype = NULL_TREE;
24270 switch (insn_data[d->code].operand[1].mode)
24272 case V4HImode:
24273 eltype = V4UHI_type_node;
24274 break;
24275 case V8HImode:
24276 eltype = V8UHI_type_node;
24277 break;
24278 case V2SImode:
24279 eltype = V2USI_type_node;
24280 break;
24281 case V4SImode:
24282 eltype = V4USI_type_node;
24283 break;
24284 case V2DImode:
24285 eltype = V2UDI_type_node;
24286 break;
24287 default: gcc_unreachable ();
24289 ftype = build_function_type_list (eltype, eltype, NULL);
24290 break;
24292 case NEON_COPYSIGNF:
24294 tree eltype = NULL_TREE;
24295 switch (insn_data[d->code].operand[1].mode)
24297 case V2SFmode:
24298 eltype = V2SF_type_node;
24299 break;
24300 case V4SFmode:
24301 eltype = V4SF_type_node;
24302 break;
24303 default: gcc_unreachable ();
24305 ftype = build_function_type_list (eltype, eltype, NULL);
24306 break;
24308 default:
24309 gcc_unreachable ();
24312 gcc_assert (ftype != NULL);
24314 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24316 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24317 NULL_TREE);
24318 arm_builtin_decls[fcode] = decl;
24322 #undef NUM_DREG_TYPES
24323 #undef NUM_QREG_TYPES
24325 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24326 do \
24328 if ((MASK) & insn_flags) \
24330 tree bdecl; \
24331 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24332 BUILT_IN_MD, NULL, NULL_TREE); \
24333 arm_builtin_decls[CODE] = bdecl; \
24336 while (0)
24338 struct builtin_description
24340 const unsigned int mask;
24341 const enum insn_code icode;
24342 const char * const name;
24343 const enum arm_builtins code;
24344 const enum rtx_code comparison;
24345 const unsigned int flag;
24348 static const struct builtin_description bdesc_2arg[] =
24350 #define IWMMXT_BUILTIN(code, string, builtin) \
24351 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24352 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24354 #define IWMMXT2_BUILTIN(code, string, builtin) \
24355 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24356 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24358 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24359 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24360 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24361 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24362 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24363 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24364 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24365 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24366 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24367 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24368 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24369 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24370 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24371 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24372 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24373 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24374 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24375 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24376 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24377 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24378 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24379 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24380 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24381 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24382 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24383 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24384 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24385 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24386 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24387 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24388 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24389 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24390 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24391 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24392 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24393 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24394 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24395 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24396 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24397 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24398 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24399 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24400 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24401 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24402 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24403 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24404 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24405 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24406 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24407 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24408 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24409 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24410 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24411 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24412 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24413 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24414 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24415 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24416 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24417 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24418 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24419 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24420 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24421 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24422 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24423 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24424 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24425 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24426 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24427 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24428 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24429 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24430 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24431 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24432 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24433 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24434 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24435 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24437 #define IWMMXT_BUILTIN2(code, builtin) \
24438 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24440 #define IWMMXT2_BUILTIN2(code, builtin) \
24441 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24443 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24444 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24445 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24446 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24447 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24448 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24449 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24450 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24451 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24452 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24455 #define FP_BUILTIN(L, U) \
24456 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24457 UNKNOWN, 0},
24459 FP_BUILTIN (get_fpscr, GET_FPSCR)
24460 FP_BUILTIN (set_fpscr, SET_FPSCR)
24461 #undef FP_BUILTIN
24463 #define CRC32_BUILTIN(L, U) \
24464 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24465 UNKNOWN, 0},
24466 CRC32_BUILTIN (crc32b, CRC32B)
24467 CRC32_BUILTIN (crc32h, CRC32H)
24468 CRC32_BUILTIN (crc32w, CRC32W)
24469 CRC32_BUILTIN (crc32cb, CRC32CB)
24470 CRC32_BUILTIN (crc32ch, CRC32CH)
24471 CRC32_BUILTIN (crc32cw, CRC32CW)
24472 #undef CRC32_BUILTIN
24475 #define CRYPTO_BUILTIN(L, U) \
24476 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24477 UNKNOWN, 0},
24478 #undef CRYPTO1
24479 #undef CRYPTO2
24480 #undef CRYPTO3
24481 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24482 #define CRYPTO1(L, U, R, A)
24483 #define CRYPTO3(L, U, R, A1, A2, A3)
24484 #include "crypto.def"
24485 #undef CRYPTO1
24486 #undef CRYPTO2
24487 #undef CRYPTO3
24491 static const struct builtin_description bdesc_1arg[] =
24493 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24494 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24495 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24496 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24497 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24498 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24499 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24500 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24501 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24502 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24503 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24504 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24505 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24506 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24507 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24508 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24509 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24510 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24511 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24512 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24513 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24514 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24515 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24516 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24518 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24519 #define CRYPTO2(L, U, R, A1, A2)
24520 #define CRYPTO3(L, U, R, A1, A2, A3)
24521 #include "crypto.def"
24522 #undef CRYPTO1
24523 #undef CRYPTO2
24524 #undef CRYPTO3
24527 static const struct builtin_description bdesc_3arg[] =
24529 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24530 #define CRYPTO1(L, U, R, A)
24531 #define CRYPTO2(L, U, R, A1, A2)
24532 #include "crypto.def"
24533 #undef CRYPTO1
24534 #undef CRYPTO2
24535 #undef CRYPTO3
24537 #undef CRYPTO_BUILTIN
24539 /* Set up all the iWMMXt builtins. This is not called if
24540 TARGET_IWMMXT is zero. */
24542 static void
24543 arm_init_iwmmxt_builtins (void)
24545 const struct builtin_description * d;
24546 size_t i;
24548 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24549 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24550 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24552 tree v8qi_ftype_v8qi_v8qi_int
24553 = build_function_type_list (V8QI_type_node,
24554 V8QI_type_node, V8QI_type_node,
24555 integer_type_node, NULL_TREE);
24556 tree v4hi_ftype_v4hi_int
24557 = build_function_type_list (V4HI_type_node,
24558 V4HI_type_node, integer_type_node, NULL_TREE);
24559 tree v2si_ftype_v2si_int
24560 = build_function_type_list (V2SI_type_node,
24561 V2SI_type_node, integer_type_node, NULL_TREE);
24562 tree v2si_ftype_di_di
24563 = build_function_type_list (V2SI_type_node,
24564 long_long_integer_type_node,
24565 long_long_integer_type_node,
24566 NULL_TREE);
24567 tree di_ftype_di_int
24568 = build_function_type_list (long_long_integer_type_node,
24569 long_long_integer_type_node,
24570 integer_type_node, NULL_TREE);
24571 tree di_ftype_di_int_int
24572 = build_function_type_list (long_long_integer_type_node,
24573 long_long_integer_type_node,
24574 integer_type_node,
24575 integer_type_node, NULL_TREE);
24576 tree int_ftype_v8qi
24577 = build_function_type_list (integer_type_node,
24578 V8QI_type_node, NULL_TREE);
24579 tree int_ftype_v4hi
24580 = build_function_type_list (integer_type_node,
24581 V4HI_type_node, NULL_TREE);
24582 tree int_ftype_v2si
24583 = build_function_type_list (integer_type_node,
24584 V2SI_type_node, NULL_TREE);
24585 tree int_ftype_v8qi_int
24586 = build_function_type_list (integer_type_node,
24587 V8QI_type_node, integer_type_node, NULL_TREE);
24588 tree int_ftype_v4hi_int
24589 = build_function_type_list (integer_type_node,
24590 V4HI_type_node, integer_type_node, NULL_TREE);
24591 tree int_ftype_v2si_int
24592 = build_function_type_list (integer_type_node,
24593 V2SI_type_node, integer_type_node, NULL_TREE);
24594 tree v8qi_ftype_v8qi_int_int
24595 = build_function_type_list (V8QI_type_node,
24596 V8QI_type_node, integer_type_node,
24597 integer_type_node, NULL_TREE);
24598 tree v4hi_ftype_v4hi_int_int
24599 = build_function_type_list (V4HI_type_node,
24600 V4HI_type_node, integer_type_node,
24601 integer_type_node, NULL_TREE);
24602 tree v2si_ftype_v2si_int_int
24603 = build_function_type_list (V2SI_type_node,
24604 V2SI_type_node, integer_type_node,
24605 integer_type_node, NULL_TREE);
24606 /* Miscellaneous. */
24607 tree v8qi_ftype_v4hi_v4hi
24608 = build_function_type_list (V8QI_type_node,
24609 V4HI_type_node, V4HI_type_node, NULL_TREE);
24610 tree v4hi_ftype_v2si_v2si
24611 = build_function_type_list (V4HI_type_node,
24612 V2SI_type_node, V2SI_type_node, NULL_TREE);
24613 tree v8qi_ftype_v4hi_v8qi
24614 = build_function_type_list (V8QI_type_node,
24615 V4HI_type_node, V8QI_type_node, NULL_TREE);
24616 tree v2si_ftype_v4hi_v4hi
24617 = build_function_type_list (V2SI_type_node,
24618 V4HI_type_node, V4HI_type_node, NULL_TREE);
24619 tree v2si_ftype_v8qi_v8qi
24620 = build_function_type_list (V2SI_type_node,
24621 V8QI_type_node, V8QI_type_node, NULL_TREE);
24622 tree v4hi_ftype_v4hi_di
24623 = build_function_type_list (V4HI_type_node,
24624 V4HI_type_node, long_long_integer_type_node,
24625 NULL_TREE);
24626 tree v2si_ftype_v2si_di
24627 = build_function_type_list (V2SI_type_node,
24628 V2SI_type_node, long_long_integer_type_node,
24629 NULL_TREE);
24630 tree di_ftype_void
24631 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24632 tree int_ftype_void
24633 = build_function_type_list (integer_type_node, NULL_TREE);
24634 tree di_ftype_v8qi
24635 = build_function_type_list (long_long_integer_type_node,
24636 V8QI_type_node, NULL_TREE);
24637 tree di_ftype_v4hi
24638 = build_function_type_list (long_long_integer_type_node,
24639 V4HI_type_node, NULL_TREE);
24640 tree di_ftype_v2si
24641 = build_function_type_list (long_long_integer_type_node,
24642 V2SI_type_node, NULL_TREE);
24643 tree v2si_ftype_v4hi
24644 = build_function_type_list (V2SI_type_node,
24645 V4HI_type_node, NULL_TREE);
24646 tree v4hi_ftype_v8qi
24647 = build_function_type_list (V4HI_type_node,
24648 V8QI_type_node, NULL_TREE);
24649 tree v8qi_ftype_v8qi
24650 = build_function_type_list (V8QI_type_node,
24651 V8QI_type_node, NULL_TREE);
24652 tree v4hi_ftype_v4hi
24653 = build_function_type_list (V4HI_type_node,
24654 V4HI_type_node, NULL_TREE);
24655 tree v2si_ftype_v2si
24656 = build_function_type_list (V2SI_type_node,
24657 V2SI_type_node, NULL_TREE);
24659 tree di_ftype_di_v4hi_v4hi
24660 = build_function_type_list (long_long_unsigned_type_node,
24661 long_long_unsigned_type_node,
24662 V4HI_type_node, V4HI_type_node,
24663 NULL_TREE);
24665 tree di_ftype_v4hi_v4hi
24666 = build_function_type_list (long_long_unsigned_type_node,
24667 V4HI_type_node,V4HI_type_node,
24668 NULL_TREE);
24670 tree v2si_ftype_v2si_v4hi_v4hi
24671 = build_function_type_list (V2SI_type_node,
24672 V2SI_type_node, V4HI_type_node,
24673 V4HI_type_node, NULL_TREE);
24675 tree v2si_ftype_v2si_v8qi_v8qi
24676 = build_function_type_list (V2SI_type_node,
24677 V2SI_type_node, V8QI_type_node,
24678 V8QI_type_node, NULL_TREE);
24680 tree di_ftype_di_v2si_v2si
24681 = build_function_type_list (long_long_unsigned_type_node,
24682 long_long_unsigned_type_node,
24683 V2SI_type_node, V2SI_type_node,
24684 NULL_TREE);
24686 tree di_ftype_di_di_int
24687 = build_function_type_list (long_long_unsigned_type_node,
24688 long_long_unsigned_type_node,
24689 long_long_unsigned_type_node,
24690 integer_type_node, NULL_TREE);
24692 tree void_ftype_int
24693 = build_function_type_list (void_type_node,
24694 integer_type_node, NULL_TREE);
24696 tree v8qi_ftype_char
24697 = build_function_type_list (V8QI_type_node,
24698 signed_char_type_node, NULL_TREE);
24700 tree v4hi_ftype_short
24701 = build_function_type_list (V4HI_type_node,
24702 short_integer_type_node, NULL_TREE);
24704 tree v2si_ftype_int
24705 = build_function_type_list (V2SI_type_node,
24706 integer_type_node, NULL_TREE);
24708 /* Normal vector binops. */
24709 tree v8qi_ftype_v8qi_v8qi
24710 = build_function_type_list (V8QI_type_node,
24711 V8QI_type_node, V8QI_type_node, NULL_TREE);
24712 tree v4hi_ftype_v4hi_v4hi
24713 = build_function_type_list (V4HI_type_node,
24714 V4HI_type_node,V4HI_type_node, NULL_TREE);
24715 tree v2si_ftype_v2si_v2si
24716 = build_function_type_list (V2SI_type_node,
24717 V2SI_type_node, V2SI_type_node, NULL_TREE);
24718 tree di_ftype_di_di
24719 = build_function_type_list (long_long_unsigned_type_node,
24720 long_long_unsigned_type_node,
24721 long_long_unsigned_type_node,
24722 NULL_TREE);
24724 /* Add all builtins that are more or less simple operations on two
24725 operands. */
24726 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24728 /* Use one of the operands; the target can have a different mode for
24729 mask-generating compares. */
24730 machine_mode mode;
24731 tree type;
24733 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24734 continue;
24736 mode = insn_data[d->icode].operand[1].mode;
24738 switch (mode)
24740 case V8QImode:
24741 type = v8qi_ftype_v8qi_v8qi;
24742 break;
24743 case V4HImode:
24744 type = v4hi_ftype_v4hi_v4hi;
24745 break;
24746 case V2SImode:
24747 type = v2si_ftype_v2si_v2si;
24748 break;
24749 case DImode:
24750 type = di_ftype_di_di;
24751 break;
24753 default:
24754 gcc_unreachable ();
24757 def_mbuiltin (d->mask, d->name, type, d->code);
24760 /* Add the remaining MMX insns with somewhat more complicated types. */
24761 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24762 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24763 ARM_BUILTIN_ ## CODE)
24765 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24766 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24767 ARM_BUILTIN_ ## CODE)
24769 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24770 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24771 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24772 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24773 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24774 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24775 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24776 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24777 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24779 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24780 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24781 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24782 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24783 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24784 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24786 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24787 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24788 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24789 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24790 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24791 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24793 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24794 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24795 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24796 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24797 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24798 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24800 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24801 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24802 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24803 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24804 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24805 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24807 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24809 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24810 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24811 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24812 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24813 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24814 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24815 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24816 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24817 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24818 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24820 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24821 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24822 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24823 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24824 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24825 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24826 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24827 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24828 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24830 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24831 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24832 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24834 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24835 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24836 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24838 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24839 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24841 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24842 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24843 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24844 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24845 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24846 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24848 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24849 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24850 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24851 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24852 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24853 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24854 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24855 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24856 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24857 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24858 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24859 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24861 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24862 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24863 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24864 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24866 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24867 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24868 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24869 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24870 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24871 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24872 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24874 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24875 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24876 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24878 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24879 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24880 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24881 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24883 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24884 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24885 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24886 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24888 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24889 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24890 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24891 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24893 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24894 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24895 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24896 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24898 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24899 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24900 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24901 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24903 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24904 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24905 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24906 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24908 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24910 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24911 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24912 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24914 #undef iwmmx_mbuiltin
24915 #undef iwmmx2_mbuiltin
24918 static void
24919 arm_init_fp16_builtins (void)
24921 tree fp16_type = make_node (REAL_TYPE);
24922 TYPE_PRECISION (fp16_type) = 16;
24923 layout_type (fp16_type);
24924 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24927 static void
24928 arm_init_crc32_builtins ()
24930 tree si_ftype_si_qi
24931 = build_function_type_list (unsigned_intSI_type_node,
24932 unsigned_intSI_type_node,
24933 unsigned_intQI_type_node, NULL_TREE);
24934 tree si_ftype_si_hi
24935 = build_function_type_list (unsigned_intSI_type_node,
24936 unsigned_intSI_type_node,
24937 unsigned_intHI_type_node, NULL_TREE);
24938 tree si_ftype_si_si
24939 = build_function_type_list (unsigned_intSI_type_node,
24940 unsigned_intSI_type_node,
24941 unsigned_intSI_type_node, NULL_TREE);
24943 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24944 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24945 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24946 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24947 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24948 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24949 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24950 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24951 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24952 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24953 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24954 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24955 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24956 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24957 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24958 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24959 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24960 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24963 static void
24964 arm_init_builtins (void)
24966 if (TARGET_REALLY_IWMMXT)
24967 arm_init_iwmmxt_builtins ();
24969 if (TARGET_NEON)
24970 arm_init_neon_builtins ();
24972 if (arm_fp16_format)
24973 arm_init_fp16_builtins ();
24975 if (TARGET_CRC32)
24976 arm_init_crc32_builtins ();
24978 if (TARGET_VFP && TARGET_HARD_FLOAT)
24980 tree ftype_set_fpscr
24981 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24982 tree ftype_get_fpscr
24983 = build_function_type_list (unsigned_type_node, NULL);
24985 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24986 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24987 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24988 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24989 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24990 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24994 /* Return the ARM builtin for CODE. */
24996 static tree
24997 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24999 if (code >= ARM_BUILTIN_MAX)
25000 return error_mark_node;
25002 return arm_builtin_decls[code];
25005 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25007 static const char *
25008 arm_invalid_parameter_type (const_tree t)
25010 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25011 return N_("function parameters cannot have __fp16 type");
25012 return NULL;
25015 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25017 static const char *
25018 arm_invalid_return_type (const_tree t)
25020 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25021 return N_("functions cannot return __fp16 type");
25022 return NULL;
25025 /* Implement TARGET_PROMOTED_TYPE. */
25027 static tree
25028 arm_promoted_type (const_tree t)
25030 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25031 return float_type_node;
25032 return NULL_TREE;
25035 /* Implement TARGET_CONVERT_TO_TYPE.
25036 Specifically, this hook implements the peculiarity of the ARM
25037 half-precision floating-point C semantics that requires conversions between
25038 __fp16 to or from double to do an intermediate conversion to float. */
25040 static tree
25041 arm_convert_to_type (tree type, tree expr)
25043 tree fromtype = TREE_TYPE (expr);
25044 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25045 return NULL_TREE;
25046 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25047 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25048 return convert (type, convert (float_type_node, expr));
25049 return NULL_TREE;
25052 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25053 This simply adds HFmode as a supported mode; even though we don't
25054 implement arithmetic on this type directly, it's supported by
25055 optabs conversions, much the way the double-word arithmetic is
25056 special-cased in the default hook. */
25058 static bool
25059 arm_scalar_mode_supported_p (machine_mode mode)
25061 if (mode == HFmode)
25062 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25063 else if (ALL_FIXED_POINT_MODE_P (mode))
25064 return true;
25065 else
25066 return default_scalar_mode_supported_p (mode);
25069 /* Errors in the source file can cause expand_expr to return const0_rtx
25070 where we expect a vector. To avoid crashing, use one of the vector
25071 clear instructions. */
25073 static rtx
25074 safe_vector_operand (rtx x, machine_mode mode)
25076 if (x != const0_rtx)
25077 return x;
25078 x = gen_reg_rtx (mode);
25080 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25081 : gen_rtx_SUBREG (DImode, x, 0)));
25082 return x;
25085 /* Function to expand ternary builtins. */
25086 static rtx
25087 arm_expand_ternop_builtin (enum insn_code icode,
25088 tree exp, rtx target)
25090 rtx pat;
25091 tree arg0 = CALL_EXPR_ARG (exp, 0);
25092 tree arg1 = CALL_EXPR_ARG (exp, 1);
25093 tree arg2 = CALL_EXPR_ARG (exp, 2);
25095 rtx op0 = expand_normal (arg0);
25096 rtx op1 = expand_normal (arg1);
25097 rtx op2 = expand_normal (arg2);
25098 rtx op3 = NULL_RTX;
25100 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25101 lane operand depending on endianness. */
25102 bool builtin_sha1cpm_p = false;
25104 if (insn_data[icode].n_operands == 5)
25106 gcc_assert (icode == CODE_FOR_crypto_sha1c
25107 || icode == CODE_FOR_crypto_sha1p
25108 || icode == CODE_FOR_crypto_sha1m);
25109 builtin_sha1cpm_p = true;
25111 machine_mode tmode = insn_data[icode].operand[0].mode;
25112 machine_mode mode0 = insn_data[icode].operand[1].mode;
25113 machine_mode mode1 = insn_data[icode].operand[2].mode;
25114 machine_mode mode2 = insn_data[icode].operand[3].mode;
25117 if (VECTOR_MODE_P (mode0))
25118 op0 = safe_vector_operand (op0, mode0);
25119 if (VECTOR_MODE_P (mode1))
25120 op1 = safe_vector_operand (op1, mode1);
25121 if (VECTOR_MODE_P (mode2))
25122 op2 = safe_vector_operand (op2, mode2);
25124 if (! target
25125 || GET_MODE (target) != tmode
25126 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25127 target = gen_reg_rtx (tmode);
25129 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25130 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25131 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25133 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25134 op0 = copy_to_mode_reg (mode0, op0);
25135 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25136 op1 = copy_to_mode_reg (mode1, op1);
25137 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25138 op2 = copy_to_mode_reg (mode2, op2);
25139 if (builtin_sha1cpm_p)
25140 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25142 if (builtin_sha1cpm_p)
25143 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25144 else
25145 pat = GEN_FCN (icode) (target, op0, op1, op2);
25146 if (! pat)
25147 return 0;
25148 emit_insn (pat);
25149 return target;
25152 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25154 static rtx
25155 arm_expand_binop_builtin (enum insn_code icode,
25156 tree exp, rtx target)
25158 rtx pat;
25159 tree arg0 = CALL_EXPR_ARG (exp, 0);
25160 tree arg1 = CALL_EXPR_ARG (exp, 1);
25161 rtx op0 = expand_normal (arg0);
25162 rtx op1 = expand_normal (arg1);
25163 machine_mode tmode = insn_data[icode].operand[0].mode;
25164 machine_mode mode0 = insn_data[icode].operand[1].mode;
25165 machine_mode mode1 = insn_data[icode].operand[2].mode;
25167 if (VECTOR_MODE_P (mode0))
25168 op0 = safe_vector_operand (op0, mode0);
25169 if (VECTOR_MODE_P (mode1))
25170 op1 = safe_vector_operand (op1, mode1);
25172 if (! target
25173 || GET_MODE (target) != tmode
25174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25175 target = gen_reg_rtx (tmode);
25177 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25178 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25180 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25181 op0 = copy_to_mode_reg (mode0, op0);
25182 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25183 op1 = copy_to_mode_reg (mode1, op1);
25185 pat = GEN_FCN (icode) (target, op0, op1);
25186 if (! pat)
25187 return 0;
25188 emit_insn (pat);
25189 return target;
25192 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25194 static rtx
25195 arm_expand_unop_builtin (enum insn_code icode,
25196 tree exp, rtx target, int do_load)
25198 rtx pat;
25199 tree arg0 = CALL_EXPR_ARG (exp, 0);
25200 rtx op0 = expand_normal (arg0);
25201 rtx op1 = NULL_RTX;
25202 machine_mode tmode = insn_data[icode].operand[0].mode;
25203 machine_mode mode0 = insn_data[icode].operand[1].mode;
25204 bool builtin_sha1h_p = false;
25206 if (insn_data[icode].n_operands == 3)
25208 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25209 builtin_sha1h_p = true;
25212 if (! target
25213 || GET_MODE (target) != tmode
25214 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25215 target = gen_reg_rtx (tmode);
25216 if (do_load)
25217 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25218 else
25220 if (VECTOR_MODE_P (mode0))
25221 op0 = safe_vector_operand (op0, mode0);
25223 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25224 op0 = copy_to_mode_reg (mode0, op0);
25226 if (builtin_sha1h_p)
25227 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25229 if (builtin_sha1h_p)
25230 pat = GEN_FCN (icode) (target, op0, op1);
25231 else
25232 pat = GEN_FCN (icode) (target, op0);
25233 if (! pat)
25234 return 0;
25235 emit_insn (pat);
25236 return target;
25239 typedef enum {
25240 NEON_ARG_COPY_TO_REG,
25241 NEON_ARG_CONSTANT,
25242 NEON_ARG_MEMORY,
25243 NEON_ARG_STOP
25244 } builtin_arg;
25246 #define NEON_MAX_BUILTIN_ARGS 5
25248 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25249 and return an expression for the accessed memory.
25251 The intrinsic function operates on a block of registers that has
25252 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25253 function references the memory at EXP of type TYPE and in mode
25254 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25255 available. */
25257 static tree
25258 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25259 machine_mode reg_mode,
25260 neon_builtin_type_mode type_mode)
25262 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25263 tree elem_type, upper_bound, array_type;
25265 /* Work out the size of the register block in bytes. */
25266 reg_size = GET_MODE_SIZE (reg_mode);
25268 /* Work out the size of each vector in bytes. */
25269 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25270 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25272 /* Work out how many vectors there are. */
25273 gcc_assert (reg_size % vector_size == 0);
25274 nvectors = reg_size / vector_size;
25276 /* Work out the type of each element. */
25277 gcc_assert (POINTER_TYPE_P (type));
25278 elem_type = TREE_TYPE (type);
25280 /* Work out how many elements are being loaded or stored.
25281 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25282 and memory elements; anything else implies a lane load or store. */
25283 if (mem_mode == reg_mode)
25284 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25285 else
25286 nelems = nvectors;
25288 /* Create a type that describes the full access. */
25289 upper_bound = build_int_cst (size_type_node, nelems - 1);
25290 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25292 /* Dereference EXP using that type. */
25293 return fold_build2 (MEM_REF, array_type, exp,
25294 build_int_cst (build_pointer_type (array_type), 0));
25297 /* Expand a Neon builtin. */
25298 static rtx
25299 arm_expand_neon_args (rtx target, int icode, int have_retval,
25300 neon_builtin_type_mode type_mode,
25301 tree exp, int fcode, ...)
25303 va_list ap;
25304 rtx pat;
25305 tree arg[NEON_MAX_BUILTIN_ARGS];
25306 rtx op[NEON_MAX_BUILTIN_ARGS];
25307 tree arg_type;
25308 tree formals;
25309 machine_mode tmode = insn_data[icode].operand[0].mode;
25310 machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25311 machine_mode other_mode;
25312 int argc = 0;
25313 int opno;
25315 if (have_retval
25316 && (!target
25317 || GET_MODE (target) != tmode
25318 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25319 target = gen_reg_rtx (tmode);
25321 va_start (ap, fcode);
25323 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25325 for (;;)
25327 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25329 if (thisarg == NEON_ARG_STOP)
25330 break;
25331 else
25333 opno = argc + have_retval;
25334 mode[argc] = insn_data[icode].operand[opno].mode;
25335 arg[argc] = CALL_EXPR_ARG (exp, argc);
25336 arg_type = TREE_VALUE (formals);
25337 if (thisarg == NEON_ARG_MEMORY)
25339 other_mode = insn_data[icode].operand[1 - opno].mode;
25340 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25341 mode[argc], other_mode,
25342 type_mode);
25345 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25346 be returned. */
25347 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25348 (thisarg == NEON_ARG_MEMORY
25349 ? EXPAND_MEMORY : EXPAND_NORMAL));
25351 switch (thisarg)
25353 case NEON_ARG_COPY_TO_REG:
25354 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25355 if (!(*insn_data[icode].operand[opno].predicate)
25356 (op[argc], mode[argc]))
25357 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25358 break;
25360 case NEON_ARG_CONSTANT:
25361 /* FIXME: This error message is somewhat unhelpful. */
25362 if (!(*insn_data[icode].operand[opno].predicate)
25363 (op[argc], mode[argc]))
25364 error ("argument must be a constant");
25365 break;
25367 case NEON_ARG_MEMORY:
25368 /* Check if expand failed. */
25369 if (op[argc] == const0_rtx)
25370 return 0;
25371 gcc_assert (MEM_P (op[argc]));
25372 PUT_MODE (op[argc], mode[argc]);
25373 /* ??? arm_neon.h uses the same built-in functions for signed
25374 and unsigned accesses, casting where necessary. This isn't
25375 alias safe. */
25376 set_mem_alias_set (op[argc], 0);
25377 if (!(*insn_data[icode].operand[opno].predicate)
25378 (op[argc], mode[argc]))
25379 op[argc] = (replace_equiv_address
25380 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25381 break;
25383 case NEON_ARG_STOP:
25384 gcc_unreachable ();
25387 argc++;
25388 formals = TREE_CHAIN (formals);
25392 va_end (ap);
25394 if (have_retval)
25395 switch (argc)
25397 case 1:
25398 pat = GEN_FCN (icode) (target, op[0]);
25399 break;
25401 case 2:
25402 pat = GEN_FCN (icode) (target, op[0], op[1]);
25403 break;
25405 case 3:
25406 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25407 break;
25409 case 4:
25410 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25411 break;
25413 case 5:
25414 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25415 break;
25417 default:
25418 gcc_unreachable ();
25420 else
25421 switch (argc)
25423 case 1:
25424 pat = GEN_FCN (icode) (op[0]);
25425 break;
25427 case 2:
25428 pat = GEN_FCN (icode) (op[0], op[1]);
25429 break;
25431 case 3:
25432 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25433 break;
25435 case 4:
25436 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25437 break;
25439 case 5:
25440 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25441 break;
25443 default:
25444 gcc_unreachable ();
25447 if (!pat)
25448 return 0;
25450 emit_insn (pat);
25452 return target;
25455 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25456 constants defined per-instruction or per instruction-variant. Instead, the
25457 required info is looked up in the table neon_builtin_data. */
25458 static rtx
25459 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25461 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25462 neon_itype itype = d->itype;
25463 enum insn_code icode = d->code;
25464 neon_builtin_type_mode type_mode = d->mode;
25466 switch (itype)
25468 case NEON_UNOP:
25469 case NEON_CONVERT:
25470 case NEON_DUPLANE:
25471 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25472 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25474 case NEON_BINOP:
25475 case NEON_LOGICBINOP:
25476 case NEON_SCALARMUL:
25477 case NEON_SCALARMULL:
25478 case NEON_SCALARMULH:
25479 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25480 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25482 case NEON_TERNOP:
25483 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25484 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25485 NEON_ARG_STOP);
25487 case NEON_GETLANE:
25488 case NEON_FIXCONV:
25489 case NEON_SHIFTIMM:
25490 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25491 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25492 NEON_ARG_STOP);
25494 case NEON_CREATE:
25495 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25496 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25498 case NEON_DUP:
25499 case NEON_RINT:
25500 case NEON_SPLIT:
25501 case NEON_FLOAT_WIDEN:
25502 case NEON_FLOAT_NARROW:
25503 case NEON_BSWAP:
25504 case NEON_REINTERP:
25505 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25506 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25508 case NEON_COPYSIGNF:
25509 case NEON_COMBINE:
25510 case NEON_VTBL:
25511 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25512 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25514 case NEON_LANEMUL:
25515 case NEON_LANEMULL:
25516 case NEON_LANEMULH:
25517 case NEON_SETLANE:
25518 case NEON_SHIFTINSERT:
25519 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25520 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25521 NEON_ARG_STOP);
25523 case NEON_LANEMAC:
25524 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25525 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25526 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25528 case NEON_SHIFTACC:
25529 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25530 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25531 NEON_ARG_STOP);
25533 case NEON_SCALARMAC:
25534 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25535 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25536 NEON_ARG_STOP);
25538 case NEON_SELECT:
25539 case NEON_VTBX:
25540 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25541 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25542 NEON_ARG_STOP);
25544 case NEON_LOAD1:
25545 case NEON_LOADSTRUCT:
25546 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25547 NEON_ARG_MEMORY, NEON_ARG_STOP);
25549 case NEON_LOAD1LANE:
25550 case NEON_LOADSTRUCTLANE:
25551 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25552 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25553 NEON_ARG_STOP);
25555 case NEON_STORE1:
25556 case NEON_STORESTRUCT:
25557 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25558 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25560 case NEON_STORE1LANE:
25561 case NEON_STORESTRUCTLANE:
25562 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25563 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25564 NEON_ARG_STOP);
25567 gcc_unreachable ();
25570 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25571 void
25572 neon_reinterpret (rtx dest, rtx src)
25574 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25577 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25578 not to early-clobber SRC registers in the process.
25580 We assume that the operands described by SRC and DEST represent a
25581 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25582 number of components into which the copy has been decomposed. */
25583 void
25584 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25586 unsigned int i;
25588 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25589 || REGNO (operands[0]) < REGNO (operands[1]))
25591 for (i = 0; i < count; i++)
25593 operands[2 * i] = dest[i];
25594 operands[2 * i + 1] = src[i];
25597 else
25599 for (i = 0; i < count; i++)
25601 operands[2 * i] = dest[count - i - 1];
25602 operands[2 * i + 1] = src[count - i - 1];
25607 /* Split operands into moves from op[1] + op[2] into op[0]. */
25609 void
25610 neon_split_vcombine (rtx operands[3])
25612 unsigned int dest = REGNO (operands[0]);
25613 unsigned int src1 = REGNO (operands[1]);
25614 unsigned int src2 = REGNO (operands[2]);
25615 machine_mode halfmode = GET_MODE (operands[1]);
25616 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25617 rtx destlo, desthi;
25619 if (src1 == dest && src2 == dest + halfregs)
25621 /* No-op move. Can't split to nothing; emit something. */
25622 emit_note (NOTE_INSN_DELETED);
25623 return;
25626 /* Preserve register attributes for variable tracking. */
25627 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25628 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25629 GET_MODE_SIZE (halfmode));
25631 /* Special case of reversed high/low parts. Use VSWP. */
25632 if (src2 == dest && src1 == dest + halfregs)
25634 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25635 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25636 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25637 return;
25640 if (!reg_overlap_mentioned_p (operands[2], destlo))
25642 /* Try to avoid unnecessary moves if part of the result
25643 is in the right place already. */
25644 if (src1 != dest)
25645 emit_move_insn (destlo, operands[1]);
25646 if (src2 != dest + halfregs)
25647 emit_move_insn (desthi, operands[2]);
25649 else
25651 if (src2 != dest + halfregs)
25652 emit_move_insn (desthi, operands[2]);
25653 if (src1 != dest)
25654 emit_move_insn (destlo, operands[1]);
25658 /* Expand an expression EXP that calls a built-in function,
25659 with result going to TARGET if that's convenient
25660 (and in mode MODE if that's convenient).
25661 SUBTARGET may be used as the target for computing one of EXP's operands.
25662 IGNORE is nonzero if the value is to be ignored. */
25664 static rtx
25665 arm_expand_builtin (tree exp,
25666 rtx target,
25667 rtx subtarget ATTRIBUTE_UNUSED,
25668 machine_mode mode ATTRIBUTE_UNUSED,
25669 int ignore ATTRIBUTE_UNUSED)
25671 const struct builtin_description * d;
25672 enum insn_code icode;
25673 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25674 tree arg0;
25675 tree arg1;
25676 tree arg2;
25677 rtx op0;
25678 rtx op1;
25679 rtx op2;
25680 rtx pat;
25681 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25682 size_t i;
25683 machine_mode tmode;
25684 machine_mode mode0;
25685 machine_mode mode1;
25686 machine_mode mode2;
25687 int opint;
25688 int selector;
25689 int mask;
25690 int imm;
25692 if (fcode >= ARM_BUILTIN_NEON_BASE)
25693 return arm_expand_neon_builtin (fcode, exp, target);
25695 switch (fcode)
25697 case ARM_BUILTIN_GET_FPSCR:
25698 case ARM_BUILTIN_SET_FPSCR:
25699 if (fcode == ARM_BUILTIN_GET_FPSCR)
25701 icode = CODE_FOR_get_fpscr;
25702 target = gen_reg_rtx (SImode);
25703 pat = GEN_FCN (icode) (target);
25705 else
25707 target = NULL_RTX;
25708 icode = CODE_FOR_set_fpscr;
25709 arg0 = CALL_EXPR_ARG (exp, 0);
25710 op0 = expand_normal (arg0);
25711 pat = GEN_FCN (icode) (op0);
25713 emit_insn (pat);
25714 return target;
25716 case ARM_BUILTIN_TEXTRMSB:
25717 case ARM_BUILTIN_TEXTRMUB:
25718 case ARM_BUILTIN_TEXTRMSH:
25719 case ARM_BUILTIN_TEXTRMUH:
25720 case ARM_BUILTIN_TEXTRMSW:
25721 case ARM_BUILTIN_TEXTRMUW:
25722 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25723 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25724 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25725 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25726 : CODE_FOR_iwmmxt_textrmw);
25728 arg0 = CALL_EXPR_ARG (exp, 0);
25729 arg1 = CALL_EXPR_ARG (exp, 1);
25730 op0 = expand_normal (arg0);
25731 op1 = expand_normal (arg1);
25732 tmode = insn_data[icode].operand[0].mode;
25733 mode0 = insn_data[icode].operand[1].mode;
25734 mode1 = insn_data[icode].operand[2].mode;
25736 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25737 op0 = copy_to_mode_reg (mode0, op0);
25738 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25740 /* @@@ better error message */
25741 error ("selector must be an immediate");
25742 return gen_reg_rtx (tmode);
25745 opint = INTVAL (op1);
25746 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25748 if (opint > 7 || opint < 0)
25749 error ("the range of selector should be in 0 to 7");
25751 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25753 if (opint > 3 || opint < 0)
25754 error ("the range of selector should be in 0 to 3");
25756 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25758 if (opint > 1 || opint < 0)
25759 error ("the range of selector should be in 0 to 1");
25762 if (target == 0
25763 || GET_MODE (target) != tmode
25764 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25765 target = gen_reg_rtx (tmode);
25766 pat = GEN_FCN (icode) (target, op0, op1);
25767 if (! pat)
25768 return 0;
25769 emit_insn (pat);
25770 return target;
25772 case ARM_BUILTIN_WALIGNI:
25773 /* If op2 is immediate, call walighi, else call walighr. */
25774 arg0 = CALL_EXPR_ARG (exp, 0);
25775 arg1 = CALL_EXPR_ARG (exp, 1);
25776 arg2 = CALL_EXPR_ARG (exp, 2);
25777 op0 = expand_normal (arg0);
25778 op1 = expand_normal (arg1);
25779 op2 = expand_normal (arg2);
25780 if (CONST_INT_P (op2))
25782 icode = CODE_FOR_iwmmxt_waligni;
25783 tmode = insn_data[icode].operand[0].mode;
25784 mode0 = insn_data[icode].operand[1].mode;
25785 mode1 = insn_data[icode].operand[2].mode;
25786 mode2 = insn_data[icode].operand[3].mode;
25787 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25788 op0 = copy_to_mode_reg (mode0, op0);
25789 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25790 op1 = copy_to_mode_reg (mode1, op1);
25791 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25792 selector = INTVAL (op2);
25793 if (selector > 7 || selector < 0)
25794 error ("the range of selector should be in 0 to 7");
25796 else
25798 icode = CODE_FOR_iwmmxt_walignr;
25799 tmode = insn_data[icode].operand[0].mode;
25800 mode0 = insn_data[icode].operand[1].mode;
25801 mode1 = insn_data[icode].operand[2].mode;
25802 mode2 = insn_data[icode].operand[3].mode;
25803 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25804 op0 = copy_to_mode_reg (mode0, op0);
25805 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25806 op1 = copy_to_mode_reg (mode1, op1);
25807 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25808 op2 = copy_to_mode_reg (mode2, op2);
25810 if (target == 0
25811 || GET_MODE (target) != tmode
25812 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25813 target = gen_reg_rtx (tmode);
25814 pat = GEN_FCN (icode) (target, op0, op1, op2);
25815 if (!pat)
25816 return 0;
25817 emit_insn (pat);
25818 return target;
25820 case ARM_BUILTIN_TINSRB:
25821 case ARM_BUILTIN_TINSRH:
25822 case ARM_BUILTIN_TINSRW:
25823 case ARM_BUILTIN_WMERGE:
25824 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25825 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25826 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25827 : CODE_FOR_iwmmxt_tinsrw);
25828 arg0 = CALL_EXPR_ARG (exp, 0);
25829 arg1 = CALL_EXPR_ARG (exp, 1);
25830 arg2 = CALL_EXPR_ARG (exp, 2);
25831 op0 = expand_normal (arg0);
25832 op1 = expand_normal (arg1);
25833 op2 = expand_normal (arg2);
25834 tmode = insn_data[icode].operand[0].mode;
25835 mode0 = insn_data[icode].operand[1].mode;
25836 mode1 = insn_data[icode].operand[2].mode;
25837 mode2 = insn_data[icode].operand[3].mode;
25839 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25840 op0 = copy_to_mode_reg (mode0, op0);
25841 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25842 op1 = copy_to_mode_reg (mode1, op1);
25843 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25845 error ("selector must be an immediate");
25846 return const0_rtx;
25848 if (icode == CODE_FOR_iwmmxt_wmerge)
25850 selector = INTVAL (op2);
25851 if (selector > 7 || selector < 0)
25852 error ("the range of selector should be in 0 to 7");
25854 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25855 || (icode == CODE_FOR_iwmmxt_tinsrh)
25856 || (icode == CODE_FOR_iwmmxt_tinsrw))
25858 mask = 0x01;
25859 selector= INTVAL (op2);
25860 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25861 error ("the range of selector should be in 0 to 7");
25862 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25863 error ("the range of selector should be in 0 to 3");
25864 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25865 error ("the range of selector should be in 0 to 1");
25866 mask <<= selector;
25867 op2 = GEN_INT (mask);
25869 if (target == 0
25870 || GET_MODE (target) != tmode
25871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25872 target = gen_reg_rtx (tmode);
25873 pat = GEN_FCN (icode) (target, op0, op1, op2);
25874 if (! pat)
25875 return 0;
25876 emit_insn (pat);
25877 return target;
25879 case ARM_BUILTIN_SETWCGR0:
25880 case ARM_BUILTIN_SETWCGR1:
25881 case ARM_BUILTIN_SETWCGR2:
25882 case ARM_BUILTIN_SETWCGR3:
25883 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25884 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25885 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25886 : CODE_FOR_iwmmxt_setwcgr3);
25887 arg0 = CALL_EXPR_ARG (exp, 0);
25888 op0 = expand_normal (arg0);
25889 mode0 = insn_data[icode].operand[0].mode;
25890 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25891 op0 = copy_to_mode_reg (mode0, op0);
25892 pat = GEN_FCN (icode) (op0);
25893 if (!pat)
25894 return 0;
25895 emit_insn (pat);
25896 return 0;
25898 case ARM_BUILTIN_GETWCGR0:
25899 case ARM_BUILTIN_GETWCGR1:
25900 case ARM_BUILTIN_GETWCGR2:
25901 case ARM_BUILTIN_GETWCGR3:
25902 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25903 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25904 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25905 : CODE_FOR_iwmmxt_getwcgr3);
25906 tmode = insn_data[icode].operand[0].mode;
25907 if (target == 0
25908 || GET_MODE (target) != tmode
25909 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25910 target = gen_reg_rtx (tmode);
25911 pat = GEN_FCN (icode) (target);
25912 if (!pat)
25913 return 0;
25914 emit_insn (pat);
25915 return target;
25917 case ARM_BUILTIN_WSHUFH:
25918 icode = CODE_FOR_iwmmxt_wshufh;
25919 arg0 = CALL_EXPR_ARG (exp, 0);
25920 arg1 = CALL_EXPR_ARG (exp, 1);
25921 op0 = expand_normal (arg0);
25922 op1 = expand_normal (arg1);
25923 tmode = insn_data[icode].operand[0].mode;
25924 mode1 = insn_data[icode].operand[1].mode;
25925 mode2 = insn_data[icode].operand[2].mode;
25927 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25928 op0 = copy_to_mode_reg (mode1, op0);
25929 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25931 error ("mask must be an immediate");
25932 return const0_rtx;
25934 selector = INTVAL (op1);
25935 if (selector < 0 || selector > 255)
25936 error ("the range of mask should be in 0 to 255");
25937 if (target == 0
25938 || GET_MODE (target) != tmode
25939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25940 target = gen_reg_rtx (tmode);
25941 pat = GEN_FCN (icode) (target, op0, op1);
25942 if (! pat)
25943 return 0;
25944 emit_insn (pat);
25945 return target;
25947 case ARM_BUILTIN_WMADDS:
25948 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25949 case ARM_BUILTIN_WMADDSX:
25950 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25951 case ARM_BUILTIN_WMADDSN:
25952 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25953 case ARM_BUILTIN_WMADDU:
25954 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25955 case ARM_BUILTIN_WMADDUX:
25956 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25957 case ARM_BUILTIN_WMADDUN:
25958 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25959 case ARM_BUILTIN_WSADBZ:
25960 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25961 case ARM_BUILTIN_WSADHZ:
25962 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25964 /* Several three-argument builtins. */
25965 case ARM_BUILTIN_WMACS:
25966 case ARM_BUILTIN_WMACU:
25967 case ARM_BUILTIN_TMIA:
25968 case ARM_BUILTIN_TMIAPH:
25969 case ARM_BUILTIN_TMIATT:
25970 case ARM_BUILTIN_TMIATB:
25971 case ARM_BUILTIN_TMIABT:
25972 case ARM_BUILTIN_TMIABB:
25973 case ARM_BUILTIN_WQMIABB:
25974 case ARM_BUILTIN_WQMIABT:
25975 case ARM_BUILTIN_WQMIATB:
25976 case ARM_BUILTIN_WQMIATT:
25977 case ARM_BUILTIN_WQMIABBN:
25978 case ARM_BUILTIN_WQMIABTN:
25979 case ARM_BUILTIN_WQMIATBN:
25980 case ARM_BUILTIN_WQMIATTN:
25981 case ARM_BUILTIN_WMIABB:
25982 case ARM_BUILTIN_WMIABT:
25983 case ARM_BUILTIN_WMIATB:
25984 case ARM_BUILTIN_WMIATT:
25985 case ARM_BUILTIN_WMIABBN:
25986 case ARM_BUILTIN_WMIABTN:
25987 case ARM_BUILTIN_WMIATBN:
25988 case ARM_BUILTIN_WMIATTN:
25989 case ARM_BUILTIN_WMIAWBB:
25990 case ARM_BUILTIN_WMIAWBT:
25991 case ARM_BUILTIN_WMIAWTB:
25992 case ARM_BUILTIN_WMIAWTT:
25993 case ARM_BUILTIN_WMIAWBBN:
25994 case ARM_BUILTIN_WMIAWBTN:
25995 case ARM_BUILTIN_WMIAWTBN:
25996 case ARM_BUILTIN_WMIAWTTN:
25997 case ARM_BUILTIN_WSADB:
25998 case ARM_BUILTIN_WSADH:
25999 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26000 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26001 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26002 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26003 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26004 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26005 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26006 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26007 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26008 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26009 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26010 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26011 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26012 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26013 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26014 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26015 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26016 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26017 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26018 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26019 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26020 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26021 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26022 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26023 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26024 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26025 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26026 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26027 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26028 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26029 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26030 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26031 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26032 : CODE_FOR_iwmmxt_wsadh);
26033 arg0 = CALL_EXPR_ARG (exp, 0);
26034 arg1 = CALL_EXPR_ARG (exp, 1);
26035 arg2 = CALL_EXPR_ARG (exp, 2);
26036 op0 = expand_normal (arg0);
26037 op1 = expand_normal (arg1);
26038 op2 = expand_normal (arg2);
26039 tmode = insn_data[icode].operand[0].mode;
26040 mode0 = insn_data[icode].operand[1].mode;
26041 mode1 = insn_data[icode].operand[2].mode;
26042 mode2 = insn_data[icode].operand[3].mode;
26044 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26045 op0 = copy_to_mode_reg (mode0, op0);
26046 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26047 op1 = copy_to_mode_reg (mode1, op1);
26048 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26049 op2 = copy_to_mode_reg (mode2, op2);
26050 if (target == 0
26051 || GET_MODE (target) != tmode
26052 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26053 target = gen_reg_rtx (tmode);
26054 pat = GEN_FCN (icode) (target, op0, op1, op2);
26055 if (! pat)
26056 return 0;
26057 emit_insn (pat);
26058 return target;
26060 case ARM_BUILTIN_WZERO:
26061 target = gen_reg_rtx (DImode);
26062 emit_insn (gen_iwmmxt_clrdi (target));
26063 return target;
26065 case ARM_BUILTIN_WSRLHI:
26066 case ARM_BUILTIN_WSRLWI:
26067 case ARM_BUILTIN_WSRLDI:
26068 case ARM_BUILTIN_WSLLHI:
26069 case ARM_BUILTIN_WSLLWI:
26070 case ARM_BUILTIN_WSLLDI:
26071 case ARM_BUILTIN_WSRAHI:
26072 case ARM_BUILTIN_WSRAWI:
26073 case ARM_BUILTIN_WSRADI:
26074 case ARM_BUILTIN_WRORHI:
26075 case ARM_BUILTIN_WRORWI:
26076 case ARM_BUILTIN_WRORDI:
26077 case ARM_BUILTIN_WSRLH:
26078 case ARM_BUILTIN_WSRLW:
26079 case ARM_BUILTIN_WSRLD:
26080 case ARM_BUILTIN_WSLLH:
26081 case ARM_BUILTIN_WSLLW:
26082 case ARM_BUILTIN_WSLLD:
26083 case ARM_BUILTIN_WSRAH:
26084 case ARM_BUILTIN_WSRAW:
26085 case ARM_BUILTIN_WSRAD:
26086 case ARM_BUILTIN_WRORH:
26087 case ARM_BUILTIN_WRORW:
26088 case ARM_BUILTIN_WRORD:
26089 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26090 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26091 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26092 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26093 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26094 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26095 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26096 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26097 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26098 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26099 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26100 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26101 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26102 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26103 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26104 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26105 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26106 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26107 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26108 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26109 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26110 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26111 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26112 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26113 : CODE_FOR_nothing);
26114 arg1 = CALL_EXPR_ARG (exp, 1);
26115 op1 = expand_normal (arg1);
26116 if (GET_MODE (op1) == VOIDmode)
26118 imm = INTVAL (op1);
26119 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26120 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26121 && (imm < 0 || imm > 32))
26123 if (fcode == ARM_BUILTIN_WRORHI)
26124 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26125 else if (fcode == ARM_BUILTIN_WRORWI)
26126 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26127 else if (fcode == ARM_BUILTIN_WRORH)
26128 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26129 else
26130 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26132 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26133 && (imm < 0 || imm > 64))
26135 if (fcode == ARM_BUILTIN_WRORDI)
26136 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26137 else
26138 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26140 else if (imm < 0)
26142 if (fcode == ARM_BUILTIN_WSRLHI)
26143 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26144 else if (fcode == ARM_BUILTIN_WSRLWI)
26145 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26146 else if (fcode == ARM_BUILTIN_WSRLDI)
26147 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26148 else if (fcode == ARM_BUILTIN_WSLLHI)
26149 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26150 else if (fcode == ARM_BUILTIN_WSLLWI)
26151 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26152 else if (fcode == ARM_BUILTIN_WSLLDI)
26153 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26154 else if (fcode == ARM_BUILTIN_WSRAHI)
26155 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26156 else if (fcode == ARM_BUILTIN_WSRAWI)
26157 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26158 else if (fcode == ARM_BUILTIN_WSRADI)
26159 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26160 else if (fcode == ARM_BUILTIN_WSRLH)
26161 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26162 else if (fcode == ARM_BUILTIN_WSRLW)
26163 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26164 else if (fcode == ARM_BUILTIN_WSRLD)
26165 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26166 else if (fcode == ARM_BUILTIN_WSLLH)
26167 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26168 else if (fcode == ARM_BUILTIN_WSLLW)
26169 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26170 else if (fcode == ARM_BUILTIN_WSLLD)
26171 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26172 else if (fcode == ARM_BUILTIN_WSRAH)
26173 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26174 else if (fcode == ARM_BUILTIN_WSRAW)
26175 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26176 else
26177 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26180 return arm_expand_binop_builtin (icode, exp, target);
26182 default:
26183 break;
26186 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26187 if (d->code == (const enum arm_builtins) fcode)
26188 return arm_expand_binop_builtin (d->icode, exp, target);
26190 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26191 if (d->code == (const enum arm_builtins) fcode)
26192 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26194 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26195 if (d->code == (const enum arm_builtins) fcode)
26196 return arm_expand_ternop_builtin (d->icode, exp, target);
26198 /* @@@ Should really do something sensible here. */
26199 return NULL_RTX;
26202 /* Return the number (counting from 0) of
26203 the least significant set bit in MASK. */
26205 inline static int
26206 number_of_first_bit_set (unsigned mask)
26208 return ctz_hwi (mask);
26211 /* Like emit_multi_reg_push, but allowing for a different set of
26212 registers to be described as saved. MASK is the set of registers
26213 to be saved; REAL_REGS is the set of registers to be described as
26214 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26216 static rtx_insn *
26217 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26219 unsigned long regno;
26220 rtx par[10], tmp, reg;
26221 rtx_insn *insn;
26222 int i, j;
26224 /* Build the parallel of the registers actually being stored. */
26225 for (i = 0; mask; ++i, mask &= mask - 1)
26227 regno = ctz_hwi (mask);
26228 reg = gen_rtx_REG (SImode, regno);
26230 if (i == 0)
26231 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26232 else
26233 tmp = gen_rtx_USE (VOIDmode, reg);
26235 par[i] = tmp;
26238 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26239 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26240 tmp = gen_frame_mem (BLKmode, tmp);
26241 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26242 par[0] = tmp;
26244 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26245 insn = emit_insn (tmp);
26247 /* Always build the stack adjustment note for unwind info. */
26248 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26249 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26250 par[0] = tmp;
26252 /* Build the parallel of the registers recorded as saved for unwind. */
26253 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26255 regno = ctz_hwi (real_regs);
26256 reg = gen_rtx_REG (SImode, regno);
26258 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26259 tmp = gen_frame_mem (SImode, tmp);
26260 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26261 RTX_FRAME_RELATED_P (tmp) = 1;
26262 par[j + 1] = tmp;
26265 if (j == 0)
26266 tmp = par[0];
26267 else
26269 RTX_FRAME_RELATED_P (par[0]) = 1;
26270 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26273 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26275 return insn;
26278 /* Emit code to push or pop registers to or from the stack. F is the
26279 assembly file. MASK is the registers to pop. */
26280 static void
26281 thumb_pop (FILE *f, unsigned long mask)
26283 int regno;
26284 int lo_mask = mask & 0xFF;
26285 int pushed_words = 0;
26287 gcc_assert (mask);
26289 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26291 /* Special case. Do not generate a POP PC statement here, do it in
26292 thumb_exit() */
26293 thumb_exit (f, -1);
26294 return;
26297 fprintf (f, "\tpop\t{");
26299 /* Look at the low registers first. */
26300 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26302 if (lo_mask & 1)
26304 asm_fprintf (f, "%r", regno);
26306 if ((lo_mask & ~1) != 0)
26307 fprintf (f, ", ");
26309 pushed_words++;
26313 if (mask & (1 << PC_REGNUM))
26315 /* Catch popping the PC. */
26316 if (TARGET_INTERWORK || TARGET_BACKTRACE
26317 || crtl->calls_eh_return)
26319 /* The PC is never poped directly, instead
26320 it is popped into r3 and then BX is used. */
26321 fprintf (f, "}\n");
26323 thumb_exit (f, -1);
26325 return;
26327 else
26329 if (mask & 0xFF)
26330 fprintf (f, ", ");
26332 asm_fprintf (f, "%r", PC_REGNUM);
26336 fprintf (f, "}\n");
26339 /* Generate code to return from a thumb function.
26340 If 'reg_containing_return_addr' is -1, then the return address is
26341 actually on the stack, at the stack pointer. */
26342 static void
26343 thumb_exit (FILE *f, int reg_containing_return_addr)
26345 unsigned regs_available_for_popping;
26346 unsigned regs_to_pop;
26347 int pops_needed;
26348 unsigned available;
26349 unsigned required;
26350 machine_mode mode;
26351 int size;
26352 int restore_a4 = FALSE;
26354 /* Compute the registers we need to pop. */
26355 regs_to_pop = 0;
26356 pops_needed = 0;
26358 if (reg_containing_return_addr == -1)
26360 regs_to_pop |= 1 << LR_REGNUM;
26361 ++pops_needed;
26364 if (TARGET_BACKTRACE)
26366 /* Restore the (ARM) frame pointer and stack pointer. */
26367 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26368 pops_needed += 2;
26371 /* If there is nothing to pop then just emit the BX instruction and
26372 return. */
26373 if (pops_needed == 0)
26375 if (crtl->calls_eh_return)
26376 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26378 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26379 return;
26381 /* Otherwise if we are not supporting interworking and we have not created
26382 a backtrace structure and the function was not entered in ARM mode then
26383 just pop the return address straight into the PC. */
26384 else if (!TARGET_INTERWORK
26385 && !TARGET_BACKTRACE
26386 && !is_called_in_ARM_mode (current_function_decl)
26387 && !crtl->calls_eh_return)
26389 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26390 return;
26393 /* Find out how many of the (return) argument registers we can corrupt. */
26394 regs_available_for_popping = 0;
26396 /* If returning via __builtin_eh_return, the bottom three registers
26397 all contain information needed for the return. */
26398 if (crtl->calls_eh_return)
26399 size = 12;
26400 else
26402 /* If we can deduce the registers used from the function's
26403 return value. This is more reliable that examining
26404 df_regs_ever_live_p () because that will be set if the register is
26405 ever used in the function, not just if the register is used
26406 to hold a return value. */
26408 if (crtl->return_rtx != 0)
26409 mode = GET_MODE (crtl->return_rtx);
26410 else
26411 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26413 size = GET_MODE_SIZE (mode);
26415 if (size == 0)
26417 /* In a void function we can use any argument register.
26418 In a function that returns a structure on the stack
26419 we can use the second and third argument registers. */
26420 if (mode == VOIDmode)
26421 regs_available_for_popping =
26422 (1 << ARG_REGISTER (1))
26423 | (1 << ARG_REGISTER (2))
26424 | (1 << ARG_REGISTER (3));
26425 else
26426 regs_available_for_popping =
26427 (1 << ARG_REGISTER (2))
26428 | (1 << ARG_REGISTER (3));
26430 else if (size <= 4)
26431 regs_available_for_popping =
26432 (1 << ARG_REGISTER (2))
26433 | (1 << ARG_REGISTER (3));
26434 else if (size <= 8)
26435 regs_available_for_popping =
26436 (1 << ARG_REGISTER (3));
26439 /* Match registers to be popped with registers into which we pop them. */
26440 for (available = regs_available_for_popping,
26441 required = regs_to_pop;
26442 required != 0 && available != 0;
26443 available &= ~(available & - available),
26444 required &= ~(required & - required))
26445 -- pops_needed;
26447 /* If we have any popping registers left over, remove them. */
26448 if (available > 0)
26449 regs_available_for_popping &= ~available;
26451 /* Otherwise if we need another popping register we can use
26452 the fourth argument register. */
26453 else if (pops_needed)
26455 /* If we have not found any free argument registers and
26456 reg a4 contains the return address, we must move it. */
26457 if (regs_available_for_popping == 0
26458 && reg_containing_return_addr == LAST_ARG_REGNUM)
26460 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26461 reg_containing_return_addr = LR_REGNUM;
26463 else if (size > 12)
26465 /* Register a4 is being used to hold part of the return value,
26466 but we have dire need of a free, low register. */
26467 restore_a4 = TRUE;
26469 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26472 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26474 /* The fourth argument register is available. */
26475 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26477 --pops_needed;
26481 /* Pop as many registers as we can. */
26482 thumb_pop (f, regs_available_for_popping);
26484 /* Process the registers we popped. */
26485 if (reg_containing_return_addr == -1)
26487 /* The return address was popped into the lowest numbered register. */
26488 regs_to_pop &= ~(1 << LR_REGNUM);
26490 reg_containing_return_addr =
26491 number_of_first_bit_set (regs_available_for_popping);
26493 /* Remove this register for the mask of available registers, so that
26494 the return address will not be corrupted by further pops. */
26495 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26498 /* If we popped other registers then handle them here. */
26499 if (regs_available_for_popping)
26501 int frame_pointer;
26503 /* Work out which register currently contains the frame pointer. */
26504 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26506 /* Move it into the correct place. */
26507 asm_fprintf (f, "\tmov\t%r, %r\n",
26508 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26510 /* (Temporarily) remove it from the mask of popped registers. */
26511 regs_available_for_popping &= ~(1 << frame_pointer);
26512 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26514 if (regs_available_for_popping)
26516 int stack_pointer;
26518 /* We popped the stack pointer as well,
26519 find the register that contains it. */
26520 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26522 /* Move it into the stack register. */
26523 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26525 /* At this point we have popped all necessary registers, so
26526 do not worry about restoring regs_available_for_popping
26527 to its correct value:
26529 assert (pops_needed == 0)
26530 assert (regs_available_for_popping == (1 << frame_pointer))
26531 assert (regs_to_pop == (1 << STACK_POINTER)) */
26533 else
26535 /* Since we have just move the popped value into the frame
26536 pointer, the popping register is available for reuse, and
26537 we know that we still have the stack pointer left to pop. */
26538 regs_available_for_popping |= (1 << frame_pointer);
26542 /* If we still have registers left on the stack, but we no longer have
26543 any registers into which we can pop them, then we must move the return
26544 address into the link register and make available the register that
26545 contained it. */
26546 if (regs_available_for_popping == 0 && pops_needed > 0)
26548 regs_available_for_popping |= 1 << reg_containing_return_addr;
26550 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26551 reg_containing_return_addr);
26553 reg_containing_return_addr = LR_REGNUM;
26556 /* If we have registers left on the stack then pop some more.
26557 We know that at most we will want to pop FP and SP. */
26558 if (pops_needed > 0)
26560 int popped_into;
26561 int move_to;
26563 thumb_pop (f, regs_available_for_popping);
26565 /* We have popped either FP or SP.
26566 Move whichever one it is into the correct register. */
26567 popped_into = number_of_first_bit_set (regs_available_for_popping);
26568 move_to = number_of_first_bit_set (regs_to_pop);
26570 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26572 regs_to_pop &= ~(1 << move_to);
26574 --pops_needed;
26577 /* If we still have not popped everything then we must have only
26578 had one register available to us and we are now popping the SP. */
26579 if (pops_needed > 0)
26581 int popped_into;
26583 thumb_pop (f, regs_available_for_popping);
26585 popped_into = number_of_first_bit_set (regs_available_for_popping);
26587 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26589 assert (regs_to_pop == (1 << STACK_POINTER))
26590 assert (pops_needed == 1)
26594 /* If necessary restore the a4 register. */
26595 if (restore_a4)
26597 if (reg_containing_return_addr != LR_REGNUM)
26599 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26600 reg_containing_return_addr = LR_REGNUM;
26603 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26606 if (crtl->calls_eh_return)
26607 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26609 /* Return to caller. */
26610 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26613 /* Scan INSN just before assembler is output for it.
26614 For Thumb-1, we track the status of the condition codes; this
26615 information is used in the cbranchsi4_insn pattern. */
26616 void
26617 thumb1_final_prescan_insn (rtx_insn *insn)
26619 if (flag_print_asm_name)
26620 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26621 INSN_ADDRESSES (INSN_UID (insn)));
26622 /* Don't overwrite the previous setter when we get to a cbranch. */
26623 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26625 enum attr_conds conds;
26627 if (cfun->machine->thumb1_cc_insn)
26629 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26630 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26631 CC_STATUS_INIT;
26633 conds = get_attr_conds (insn);
26634 if (conds == CONDS_SET)
26636 rtx set = single_set (insn);
26637 cfun->machine->thumb1_cc_insn = insn;
26638 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26639 cfun->machine->thumb1_cc_op1 = const0_rtx;
26640 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26641 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26643 rtx src1 = XEXP (SET_SRC (set), 1);
26644 if (src1 == const0_rtx)
26645 cfun->machine->thumb1_cc_mode = CCmode;
26647 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26649 /* Record the src register operand instead of dest because
26650 cprop_hardreg pass propagates src. */
26651 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26654 else if (conds != CONDS_NOCOND)
26655 cfun->machine->thumb1_cc_insn = NULL_RTX;
26658 /* Check if unexpected far jump is used. */
26659 if (cfun->machine->lr_save_eliminated
26660 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26661 internal_error("Unexpected thumb1 far jump");
26665 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26667 unsigned HOST_WIDE_INT mask = 0xff;
26668 int i;
26670 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26671 if (val == 0) /* XXX */
26672 return 0;
26674 for (i = 0; i < 25; i++)
26675 if ((val & (mask << i)) == val)
26676 return 1;
26678 return 0;
26681 /* Returns nonzero if the current function contains,
26682 or might contain a far jump. */
26683 static int
26684 thumb_far_jump_used_p (void)
26686 rtx_insn *insn;
26687 bool far_jump = false;
26688 unsigned int func_size = 0;
26690 /* This test is only important for leaf functions. */
26691 /* assert (!leaf_function_p ()); */
26693 /* If we have already decided that far jumps may be used,
26694 do not bother checking again, and always return true even if
26695 it turns out that they are not being used. Once we have made
26696 the decision that far jumps are present (and that hence the link
26697 register will be pushed onto the stack) we cannot go back on it. */
26698 if (cfun->machine->far_jump_used)
26699 return 1;
26701 /* If this function is not being called from the prologue/epilogue
26702 generation code then it must be being called from the
26703 INITIAL_ELIMINATION_OFFSET macro. */
26704 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26706 /* In this case we know that we are being asked about the elimination
26707 of the arg pointer register. If that register is not being used,
26708 then there are no arguments on the stack, and we do not have to
26709 worry that a far jump might force the prologue to push the link
26710 register, changing the stack offsets. In this case we can just
26711 return false, since the presence of far jumps in the function will
26712 not affect stack offsets.
26714 If the arg pointer is live (or if it was live, but has now been
26715 eliminated and so set to dead) then we do have to test to see if
26716 the function might contain a far jump. This test can lead to some
26717 false negatives, since before reload is completed, then length of
26718 branch instructions is not known, so gcc defaults to returning their
26719 longest length, which in turn sets the far jump attribute to true.
26721 A false negative will not result in bad code being generated, but it
26722 will result in a needless push and pop of the link register. We
26723 hope that this does not occur too often.
26725 If we need doubleword stack alignment this could affect the other
26726 elimination offsets so we can't risk getting it wrong. */
26727 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26728 cfun->machine->arg_pointer_live = 1;
26729 else if (!cfun->machine->arg_pointer_live)
26730 return 0;
26733 /* We should not change far_jump_used during or after reload, as there is
26734 no chance to change stack frame layout. */
26735 if (reload_in_progress || reload_completed)
26736 return 0;
26738 /* Check to see if the function contains a branch
26739 insn with the far jump attribute set. */
26740 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26742 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26744 far_jump = true;
26746 func_size += get_attr_length (insn);
26749 /* Attribute far_jump will always be true for thumb1 before
26750 shorten_branch pass. So checking far_jump attribute before
26751 shorten_branch isn't much useful.
26753 Following heuristic tries to estimate more accurately if a far jump
26754 may finally be used. The heuristic is very conservative as there is
26755 no chance to roll-back the decision of not to use far jump.
26757 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26758 2-byte insn is associated with a 4 byte constant pool. Using
26759 function size 2048/3 as the threshold is conservative enough. */
26760 if (far_jump)
26762 if ((func_size * 3) >= 2048)
26764 /* Record the fact that we have decided that
26765 the function does use far jumps. */
26766 cfun->machine->far_jump_used = 1;
26767 return 1;
26771 return 0;
26774 /* Return nonzero if FUNC must be entered in ARM mode. */
26776 is_called_in_ARM_mode (tree func)
26778 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26780 /* Ignore the problem about functions whose address is taken. */
26781 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26782 return TRUE;
26784 #ifdef ARM_PE
26785 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26786 #else
26787 return FALSE;
26788 #endif
26791 /* Given the stack offsets and register mask in OFFSETS, decide how
26792 many additional registers to push instead of subtracting a constant
26793 from SP. For epilogues the principle is the same except we use pop.
26794 FOR_PROLOGUE indicates which we're generating. */
26795 static int
26796 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26798 HOST_WIDE_INT amount;
26799 unsigned long live_regs_mask = offsets->saved_regs_mask;
26800 /* Extract a mask of the ones we can give to the Thumb's push/pop
26801 instruction. */
26802 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26803 /* Then count how many other high registers will need to be pushed. */
26804 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26805 int n_free, reg_base, size;
26807 if (!for_prologue && frame_pointer_needed)
26808 amount = offsets->locals_base - offsets->saved_regs;
26809 else
26810 amount = offsets->outgoing_args - offsets->saved_regs;
26812 /* If the stack frame size is 512 exactly, we can save one load
26813 instruction, which should make this a win even when optimizing
26814 for speed. */
26815 if (!optimize_size && amount != 512)
26816 return 0;
26818 /* Can't do this if there are high registers to push. */
26819 if (high_regs_pushed != 0)
26820 return 0;
26822 /* Shouldn't do it in the prologue if no registers would normally
26823 be pushed at all. In the epilogue, also allow it if we'll have
26824 a pop insn for the PC. */
26825 if (l_mask == 0
26826 && (for_prologue
26827 || TARGET_BACKTRACE
26828 || (live_regs_mask & 1 << LR_REGNUM) == 0
26829 || TARGET_INTERWORK
26830 || crtl->args.pretend_args_size != 0))
26831 return 0;
26833 /* Don't do this if thumb_expand_prologue wants to emit instructions
26834 between the push and the stack frame allocation. */
26835 if (for_prologue
26836 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26837 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26838 return 0;
26840 reg_base = 0;
26841 n_free = 0;
26842 if (!for_prologue)
26844 size = arm_size_return_regs ();
26845 reg_base = ARM_NUM_INTS (size);
26846 live_regs_mask >>= reg_base;
26849 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26850 && (for_prologue || call_used_regs[reg_base + n_free]))
26852 live_regs_mask >>= 1;
26853 n_free++;
26856 if (n_free == 0)
26857 return 0;
26858 gcc_assert (amount / 4 * 4 == amount);
26860 if (amount >= 512 && (amount - n_free * 4) < 512)
26861 return (amount - 508) / 4;
26862 if (amount <= n_free * 4)
26863 return amount / 4;
26864 return 0;
26867 /* The bits which aren't usefully expanded as rtl. */
26868 const char *
26869 thumb1_unexpanded_epilogue (void)
26871 arm_stack_offsets *offsets;
26872 int regno;
26873 unsigned long live_regs_mask = 0;
26874 int high_regs_pushed = 0;
26875 int extra_pop;
26876 int had_to_push_lr;
26877 int size;
26879 if (cfun->machine->return_used_this_function != 0)
26880 return "";
26882 if (IS_NAKED (arm_current_func_type ()))
26883 return "";
26885 offsets = arm_get_frame_offsets ();
26886 live_regs_mask = offsets->saved_regs_mask;
26887 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26889 /* If we can deduce the registers used from the function's return value.
26890 This is more reliable that examining df_regs_ever_live_p () because that
26891 will be set if the register is ever used in the function, not just if
26892 the register is used to hold a return value. */
26893 size = arm_size_return_regs ();
26895 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26896 if (extra_pop > 0)
26898 unsigned long extra_mask = (1 << extra_pop) - 1;
26899 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26902 /* The prolog may have pushed some high registers to use as
26903 work registers. e.g. the testsuite file:
26904 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26905 compiles to produce:
26906 push {r4, r5, r6, r7, lr}
26907 mov r7, r9
26908 mov r6, r8
26909 push {r6, r7}
26910 as part of the prolog. We have to undo that pushing here. */
26912 if (high_regs_pushed)
26914 unsigned long mask = live_regs_mask & 0xff;
26915 int next_hi_reg;
26917 /* The available low registers depend on the size of the value we are
26918 returning. */
26919 if (size <= 12)
26920 mask |= 1 << 3;
26921 if (size <= 8)
26922 mask |= 1 << 2;
26924 if (mask == 0)
26925 /* Oh dear! We have no low registers into which we can pop
26926 high registers! */
26927 internal_error
26928 ("no low registers available for popping high registers");
26930 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26931 if (live_regs_mask & (1 << next_hi_reg))
26932 break;
26934 while (high_regs_pushed)
26936 /* Find lo register(s) into which the high register(s) can
26937 be popped. */
26938 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26940 if (mask & (1 << regno))
26941 high_regs_pushed--;
26942 if (high_regs_pushed == 0)
26943 break;
26946 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26948 /* Pop the values into the low register(s). */
26949 thumb_pop (asm_out_file, mask);
26951 /* Move the value(s) into the high registers. */
26952 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26954 if (mask & (1 << regno))
26956 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26957 regno);
26959 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26960 if (live_regs_mask & (1 << next_hi_reg))
26961 break;
26965 live_regs_mask &= ~0x0f00;
26968 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26969 live_regs_mask &= 0xff;
26971 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26973 /* Pop the return address into the PC. */
26974 if (had_to_push_lr)
26975 live_regs_mask |= 1 << PC_REGNUM;
26977 /* Either no argument registers were pushed or a backtrace
26978 structure was created which includes an adjusted stack
26979 pointer, so just pop everything. */
26980 if (live_regs_mask)
26981 thumb_pop (asm_out_file, live_regs_mask);
26983 /* We have either just popped the return address into the
26984 PC or it is was kept in LR for the entire function.
26985 Note that thumb_pop has already called thumb_exit if the
26986 PC was in the list. */
26987 if (!had_to_push_lr)
26988 thumb_exit (asm_out_file, LR_REGNUM);
26990 else
26992 /* Pop everything but the return address. */
26993 if (live_regs_mask)
26994 thumb_pop (asm_out_file, live_regs_mask);
26996 if (had_to_push_lr)
26998 if (size > 12)
27000 /* We have no free low regs, so save one. */
27001 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27002 LAST_ARG_REGNUM);
27005 /* Get the return address into a temporary register. */
27006 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27008 if (size > 12)
27010 /* Move the return address to lr. */
27011 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27012 LAST_ARG_REGNUM);
27013 /* Restore the low register. */
27014 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27015 IP_REGNUM);
27016 regno = LR_REGNUM;
27018 else
27019 regno = LAST_ARG_REGNUM;
27021 else
27022 regno = LR_REGNUM;
27024 /* Remove the argument registers that were pushed onto the stack. */
27025 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27026 SP_REGNUM, SP_REGNUM,
27027 crtl->args.pretend_args_size);
27029 thumb_exit (asm_out_file, regno);
27032 return "";
27035 /* Functions to save and restore machine-specific function data. */
27036 static struct machine_function *
27037 arm_init_machine_status (void)
27039 struct machine_function *machine;
27040 machine = ggc_cleared_alloc<machine_function> ();
27042 #if ARM_FT_UNKNOWN != 0
27043 machine->func_type = ARM_FT_UNKNOWN;
27044 #endif
27045 return machine;
27048 /* Return an RTX indicating where the return address to the
27049 calling function can be found. */
27051 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27053 if (count != 0)
27054 return NULL_RTX;
27056 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27059 /* Do anything needed before RTL is emitted for each function. */
27060 void
27061 arm_init_expanders (void)
27063 /* Arrange to initialize and mark the machine per-function status. */
27064 init_machine_status = arm_init_machine_status;
27066 /* This is to stop the combine pass optimizing away the alignment
27067 adjustment of va_arg. */
27068 /* ??? It is claimed that this should not be necessary. */
27069 if (cfun)
27070 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27074 /* Like arm_compute_initial_elimination offset. Simpler because there
27075 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27076 to point at the base of the local variables after static stack
27077 space for a function has been allocated. */
27079 HOST_WIDE_INT
27080 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27082 arm_stack_offsets *offsets;
27084 offsets = arm_get_frame_offsets ();
27086 switch (from)
27088 case ARG_POINTER_REGNUM:
27089 switch (to)
27091 case STACK_POINTER_REGNUM:
27092 return offsets->outgoing_args - offsets->saved_args;
27094 case FRAME_POINTER_REGNUM:
27095 return offsets->soft_frame - offsets->saved_args;
27097 case ARM_HARD_FRAME_POINTER_REGNUM:
27098 return offsets->saved_regs - offsets->saved_args;
27100 case THUMB_HARD_FRAME_POINTER_REGNUM:
27101 return offsets->locals_base - offsets->saved_args;
27103 default:
27104 gcc_unreachable ();
27106 break;
27108 case FRAME_POINTER_REGNUM:
27109 switch (to)
27111 case STACK_POINTER_REGNUM:
27112 return offsets->outgoing_args - offsets->soft_frame;
27114 case ARM_HARD_FRAME_POINTER_REGNUM:
27115 return offsets->saved_regs - offsets->soft_frame;
27117 case THUMB_HARD_FRAME_POINTER_REGNUM:
27118 return offsets->locals_base - offsets->soft_frame;
27120 default:
27121 gcc_unreachable ();
27123 break;
27125 default:
27126 gcc_unreachable ();
27130 /* Generate the function's prologue. */
27132 void
27133 thumb1_expand_prologue (void)
27135 rtx_insn *insn;
27137 HOST_WIDE_INT amount;
27138 arm_stack_offsets *offsets;
27139 unsigned long func_type;
27140 int regno;
27141 unsigned long live_regs_mask;
27142 unsigned long l_mask;
27143 unsigned high_regs_pushed = 0;
27145 func_type = arm_current_func_type ();
27147 /* Naked functions don't have prologues. */
27148 if (IS_NAKED (func_type))
27149 return;
27151 if (IS_INTERRUPT (func_type))
27153 error ("interrupt Service Routines cannot be coded in Thumb mode");
27154 return;
27157 if (is_called_in_ARM_mode (current_function_decl))
27158 emit_insn (gen_prologue_thumb1_interwork ());
27160 offsets = arm_get_frame_offsets ();
27161 live_regs_mask = offsets->saved_regs_mask;
27163 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27164 l_mask = live_regs_mask & 0x40ff;
27165 /* Then count how many other high registers will need to be pushed. */
27166 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27168 if (crtl->args.pretend_args_size)
27170 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27172 if (cfun->machine->uses_anonymous_args)
27174 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27175 unsigned long mask;
27177 mask = 1ul << (LAST_ARG_REGNUM + 1);
27178 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27180 insn = thumb1_emit_multi_reg_push (mask, 0);
27182 else
27184 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27185 stack_pointer_rtx, x));
27187 RTX_FRAME_RELATED_P (insn) = 1;
27190 if (TARGET_BACKTRACE)
27192 HOST_WIDE_INT offset = 0;
27193 unsigned work_register;
27194 rtx work_reg, x, arm_hfp_rtx;
27196 /* We have been asked to create a stack backtrace structure.
27197 The code looks like this:
27199 0 .align 2
27200 0 func:
27201 0 sub SP, #16 Reserve space for 4 registers.
27202 2 push {R7} Push low registers.
27203 4 add R7, SP, #20 Get the stack pointer before the push.
27204 6 str R7, [SP, #8] Store the stack pointer
27205 (before reserving the space).
27206 8 mov R7, PC Get hold of the start of this code + 12.
27207 10 str R7, [SP, #16] Store it.
27208 12 mov R7, FP Get hold of the current frame pointer.
27209 14 str R7, [SP, #4] Store it.
27210 16 mov R7, LR Get hold of the current return address.
27211 18 str R7, [SP, #12] Store it.
27212 20 add R7, SP, #16 Point at the start of the
27213 backtrace structure.
27214 22 mov FP, R7 Put this value into the frame pointer. */
27216 work_register = thumb_find_work_register (live_regs_mask);
27217 work_reg = gen_rtx_REG (SImode, work_register);
27218 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27220 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27221 stack_pointer_rtx, GEN_INT (-16)));
27222 RTX_FRAME_RELATED_P (insn) = 1;
27224 if (l_mask)
27226 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27227 RTX_FRAME_RELATED_P (insn) = 1;
27229 offset = bit_count (l_mask) * UNITS_PER_WORD;
27232 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27233 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27235 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27236 x = gen_frame_mem (SImode, x);
27237 emit_move_insn (x, work_reg);
27239 /* Make sure that the instruction fetching the PC is in the right place
27240 to calculate "start of backtrace creation code + 12". */
27241 /* ??? The stores using the common WORK_REG ought to be enough to
27242 prevent the scheduler from doing anything weird. Failing that
27243 we could always move all of the following into an UNSPEC_VOLATILE. */
27244 if (l_mask)
27246 x = gen_rtx_REG (SImode, PC_REGNUM);
27247 emit_move_insn (work_reg, x);
27249 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27250 x = gen_frame_mem (SImode, x);
27251 emit_move_insn (x, work_reg);
27253 emit_move_insn (work_reg, arm_hfp_rtx);
27255 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27256 x = gen_frame_mem (SImode, x);
27257 emit_move_insn (x, work_reg);
27259 else
27261 emit_move_insn (work_reg, arm_hfp_rtx);
27263 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27264 x = gen_frame_mem (SImode, x);
27265 emit_move_insn (x, work_reg);
27267 x = gen_rtx_REG (SImode, PC_REGNUM);
27268 emit_move_insn (work_reg, x);
27270 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27271 x = gen_frame_mem (SImode, x);
27272 emit_move_insn (x, work_reg);
27275 x = gen_rtx_REG (SImode, LR_REGNUM);
27276 emit_move_insn (work_reg, x);
27278 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27279 x = gen_frame_mem (SImode, x);
27280 emit_move_insn (x, work_reg);
27282 x = GEN_INT (offset + 12);
27283 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27285 emit_move_insn (arm_hfp_rtx, work_reg);
27287 /* Optimization: If we are not pushing any low registers but we are going
27288 to push some high registers then delay our first push. This will just
27289 be a push of LR and we can combine it with the push of the first high
27290 register. */
27291 else if ((l_mask & 0xff) != 0
27292 || (high_regs_pushed == 0 && l_mask))
27294 unsigned long mask = l_mask;
27295 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27296 insn = thumb1_emit_multi_reg_push (mask, mask);
27297 RTX_FRAME_RELATED_P (insn) = 1;
27300 if (high_regs_pushed)
27302 unsigned pushable_regs;
27303 unsigned next_hi_reg;
27304 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27305 : crtl->args.info.nregs;
27306 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27308 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27309 if (live_regs_mask & (1 << next_hi_reg))
27310 break;
27312 /* Here we need to mask out registers used for passing arguments
27313 even if they can be pushed. This is to avoid using them to stash the high
27314 registers. Such kind of stash may clobber the use of arguments. */
27315 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27317 if (pushable_regs == 0)
27318 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27320 while (high_regs_pushed > 0)
27322 unsigned long real_regs_mask = 0;
27324 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27326 if (pushable_regs & (1 << regno))
27328 emit_move_insn (gen_rtx_REG (SImode, regno),
27329 gen_rtx_REG (SImode, next_hi_reg));
27331 high_regs_pushed --;
27332 real_regs_mask |= (1 << next_hi_reg);
27334 if (high_regs_pushed)
27336 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27337 next_hi_reg --)
27338 if (live_regs_mask & (1 << next_hi_reg))
27339 break;
27341 else
27343 pushable_regs &= ~((1 << regno) - 1);
27344 break;
27349 /* If we had to find a work register and we have not yet
27350 saved the LR then add it to the list of regs to push. */
27351 if (l_mask == (1 << LR_REGNUM))
27353 pushable_regs |= l_mask;
27354 real_regs_mask |= l_mask;
27355 l_mask = 0;
27358 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27359 RTX_FRAME_RELATED_P (insn) = 1;
27363 /* Load the pic register before setting the frame pointer,
27364 so we can use r7 as a temporary work register. */
27365 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27366 arm_load_pic_register (live_regs_mask);
27368 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27369 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27370 stack_pointer_rtx);
27372 if (flag_stack_usage_info)
27373 current_function_static_stack_size
27374 = offsets->outgoing_args - offsets->saved_args;
27376 amount = offsets->outgoing_args - offsets->saved_regs;
27377 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27378 if (amount)
27380 if (amount < 512)
27382 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27383 GEN_INT (- amount)));
27384 RTX_FRAME_RELATED_P (insn) = 1;
27386 else
27388 rtx reg, dwarf;
27390 /* The stack decrement is too big for an immediate value in a single
27391 insn. In theory we could issue multiple subtracts, but after
27392 three of them it becomes more space efficient to place the full
27393 value in the constant pool and load into a register. (Also the
27394 ARM debugger really likes to see only one stack decrement per
27395 function). So instead we look for a scratch register into which
27396 we can load the decrement, and then we subtract this from the
27397 stack pointer. Unfortunately on the thumb the only available
27398 scratch registers are the argument registers, and we cannot use
27399 these as they may hold arguments to the function. Instead we
27400 attempt to locate a call preserved register which is used by this
27401 function. If we can find one, then we know that it will have
27402 been pushed at the start of the prologue and so we can corrupt
27403 it now. */
27404 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27405 if (live_regs_mask & (1 << regno))
27406 break;
27408 gcc_assert(regno <= LAST_LO_REGNUM);
27410 reg = gen_rtx_REG (SImode, regno);
27412 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27414 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27415 stack_pointer_rtx, reg));
27417 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27418 plus_constant (Pmode, stack_pointer_rtx,
27419 -amount));
27420 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27421 RTX_FRAME_RELATED_P (insn) = 1;
27425 if (frame_pointer_needed)
27426 thumb_set_frame_pointer (offsets);
27428 /* If we are profiling, make sure no instructions are scheduled before
27429 the call to mcount. Similarly if the user has requested no
27430 scheduling in the prolog. Similarly if we want non-call exceptions
27431 using the EABI unwinder, to prevent faulting instructions from being
27432 swapped with a stack adjustment. */
27433 if (crtl->profile || !TARGET_SCHED_PROLOG
27434 || (arm_except_unwind_info (&global_options) == UI_TARGET
27435 && cfun->can_throw_non_call_exceptions))
27436 emit_insn (gen_blockage ());
27438 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27439 if (live_regs_mask & 0xff)
27440 cfun->machine->lr_save_eliminated = 0;
27443 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27444 POP instruction can be generated. LR should be replaced by PC. All
27445 the checks required are already done by USE_RETURN_INSN (). Hence,
27446 all we really need to check here is if single register is to be
27447 returned, or multiple register return. */
27448 void
27449 thumb2_expand_return (bool simple_return)
27451 int i, num_regs;
27452 unsigned long saved_regs_mask;
27453 arm_stack_offsets *offsets;
27455 offsets = arm_get_frame_offsets ();
27456 saved_regs_mask = offsets->saved_regs_mask;
27458 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27459 if (saved_regs_mask & (1 << i))
27460 num_regs++;
27462 if (!simple_return && saved_regs_mask)
27464 if (num_regs == 1)
27466 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27467 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27468 rtx addr = gen_rtx_MEM (SImode,
27469 gen_rtx_POST_INC (SImode,
27470 stack_pointer_rtx));
27471 set_mem_alias_set (addr, get_frame_alias_set ());
27472 XVECEXP (par, 0, 0) = ret_rtx;
27473 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27474 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27475 emit_jump_insn (par);
27477 else
27479 saved_regs_mask &= ~ (1 << LR_REGNUM);
27480 saved_regs_mask |= (1 << PC_REGNUM);
27481 arm_emit_multi_reg_pop (saved_regs_mask);
27484 else
27486 emit_jump_insn (simple_return_rtx);
27490 void
27491 thumb1_expand_epilogue (void)
27493 HOST_WIDE_INT amount;
27494 arm_stack_offsets *offsets;
27495 int regno;
27497 /* Naked functions don't have prologues. */
27498 if (IS_NAKED (arm_current_func_type ()))
27499 return;
27501 offsets = arm_get_frame_offsets ();
27502 amount = offsets->outgoing_args - offsets->saved_regs;
27504 if (frame_pointer_needed)
27506 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27507 amount = offsets->locals_base - offsets->saved_regs;
27509 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27511 gcc_assert (amount >= 0);
27512 if (amount)
27514 emit_insn (gen_blockage ());
27516 if (amount < 512)
27517 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27518 GEN_INT (amount)));
27519 else
27521 /* r3 is always free in the epilogue. */
27522 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27524 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27525 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27529 /* Emit a USE (stack_pointer_rtx), so that
27530 the stack adjustment will not be deleted. */
27531 emit_insn (gen_force_register_use (stack_pointer_rtx));
27533 if (crtl->profile || !TARGET_SCHED_PROLOG)
27534 emit_insn (gen_blockage ());
27536 /* Emit a clobber for each insn that will be restored in the epilogue,
27537 so that flow2 will get register lifetimes correct. */
27538 for (regno = 0; regno < 13; regno++)
27539 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27540 emit_clobber (gen_rtx_REG (SImode, regno));
27542 if (! df_regs_ever_live_p (LR_REGNUM))
27543 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27546 /* Epilogue code for APCS frame. */
27547 static void
27548 arm_expand_epilogue_apcs_frame (bool really_return)
27550 unsigned long func_type;
27551 unsigned long saved_regs_mask;
27552 int num_regs = 0;
27553 int i;
27554 int floats_from_frame = 0;
27555 arm_stack_offsets *offsets;
27557 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27558 func_type = arm_current_func_type ();
27560 /* Get frame offsets for ARM. */
27561 offsets = arm_get_frame_offsets ();
27562 saved_regs_mask = offsets->saved_regs_mask;
27564 /* Find the offset of the floating-point save area in the frame. */
27565 floats_from_frame
27566 = (offsets->saved_args
27567 + arm_compute_static_chain_stack_bytes ()
27568 - offsets->frame);
27570 /* Compute how many core registers saved and how far away the floats are. */
27571 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27572 if (saved_regs_mask & (1 << i))
27574 num_regs++;
27575 floats_from_frame += 4;
27578 if (TARGET_HARD_FLOAT && TARGET_VFP)
27580 int start_reg;
27581 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27583 /* The offset is from IP_REGNUM. */
27584 int saved_size = arm_get_vfp_saved_size ();
27585 if (saved_size > 0)
27587 rtx_insn *insn;
27588 floats_from_frame += saved_size;
27589 insn = emit_insn (gen_addsi3 (ip_rtx,
27590 hard_frame_pointer_rtx,
27591 GEN_INT (-floats_from_frame)));
27592 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27593 ip_rtx, hard_frame_pointer_rtx);
27596 /* Generate VFP register multi-pop. */
27597 start_reg = FIRST_VFP_REGNUM;
27599 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27600 /* Look for a case where a reg does not need restoring. */
27601 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27602 && (!df_regs_ever_live_p (i + 1)
27603 || call_used_regs[i + 1]))
27605 if (start_reg != i)
27606 arm_emit_vfp_multi_reg_pop (start_reg,
27607 (i - start_reg) / 2,
27608 gen_rtx_REG (SImode,
27609 IP_REGNUM));
27610 start_reg = i + 2;
27613 /* Restore the remaining regs that we have discovered (or possibly
27614 even all of them, if the conditional in the for loop never
27615 fired). */
27616 if (start_reg != i)
27617 arm_emit_vfp_multi_reg_pop (start_reg,
27618 (i - start_reg) / 2,
27619 gen_rtx_REG (SImode, IP_REGNUM));
27622 if (TARGET_IWMMXT)
27624 /* The frame pointer is guaranteed to be non-double-word aligned, as
27625 it is set to double-word-aligned old_stack_pointer - 4. */
27626 rtx_insn *insn;
27627 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27629 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27630 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27632 rtx addr = gen_frame_mem (V2SImode,
27633 plus_constant (Pmode, hard_frame_pointer_rtx,
27634 - lrm_count * 4));
27635 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27636 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27637 gen_rtx_REG (V2SImode, i),
27638 NULL_RTX);
27639 lrm_count += 2;
27643 /* saved_regs_mask should contain IP which contains old stack pointer
27644 at the time of activation creation. Since SP and IP are adjacent registers,
27645 we can restore the value directly into SP. */
27646 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27647 saved_regs_mask &= ~(1 << IP_REGNUM);
27648 saved_regs_mask |= (1 << SP_REGNUM);
27650 /* There are two registers left in saved_regs_mask - LR and PC. We
27651 only need to restore LR (the return address), but to
27652 save time we can load it directly into PC, unless we need a
27653 special function exit sequence, or we are not really returning. */
27654 if (really_return
27655 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27656 && !crtl->calls_eh_return)
27657 /* Delete LR from the register mask, so that LR on
27658 the stack is loaded into the PC in the register mask. */
27659 saved_regs_mask &= ~(1 << LR_REGNUM);
27660 else
27661 saved_regs_mask &= ~(1 << PC_REGNUM);
27663 num_regs = bit_count (saved_regs_mask);
27664 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27666 rtx_insn *insn;
27667 emit_insn (gen_blockage ());
27668 /* Unwind the stack to just below the saved registers. */
27669 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27670 hard_frame_pointer_rtx,
27671 GEN_INT (- 4 * num_regs)));
27673 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27674 stack_pointer_rtx, hard_frame_pointer_rtx);
27677 arm_emit_multi_reg_pop (saved_regs_mask);
27679 if (IS_INTERRUPT (func_type))
27681 /* Interrupt handlers will have pushed the
27682 IP onto the stack, so restore it now. */
27683 rtx_insn *insn;
27684 rtx addr = gen_rtx_MEM (SImode,
27685 gen_rtx_POST_INC (SImode,
27686 stack_pointer_rtx));
27687 set_mem_alias_set (addr, get_frame_alias_set ());
27688 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27689 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27690 gen_rtx_REG (SImode, IP_REGNUM),
27691 NULL_RTX);
27694 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27695 return;
27697 if (crtl->calls_eh_return)
27698 emit_insn (gen_addsi3 (stack_pointer_rtx,
27699 stack_pointer_rtx,
27700 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27702 if (IS_STACKALIGN (func_type))
27703 /* Restore the original stack pointer. Before prologue, the stack was
27704 realigned and the original stack pointer saved in r0. For details,
27705 see comment in arm_expand_prologue. */
27706 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27708 emit_jump_insn (simple_return_rtx);
27711 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27712 function is not a sibcall. */
27713 void
27714 arm_expand_epilogue (bool really_return)
27716 unsigned long func_type;
27717 unsigned long saved_regs_mask;
27718 int num_regs = 0;
27719 int i;
27720 int amount;
27721 arm_stack_offsets *offsets;
27723 func_type = arm_current_func_type ();
27725 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27726 let output_return_instruction take care of instruction emission if any. */
27727 if (IS_NAKED (func_type)
27728 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27730 if (really_return)
27731 emit_jump_insn (simple_return_rtx);
27732 return;
27735 /* If we are throwing an exception, then we really must be doing a
27736 return, so we can't tail-call. */
27737 gcc_assert (!crtl->calls_eh_return || really_return);
27739 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27741 arm_expand_epilogue_apcs_frame (really_return);
27742 return;
27745 /* Get frame offsets for ARM. */
27746 offsets = arm_get_frame_offsets ();
27747 saved_regs_mask = offsets->saved_regs_mask;
27748 num_regs = bit_count (saved_regs_mask);
27750 if (frame_pointer_needed)
27752 rtx_insn *insn;
27753 /* Restore stack pointer if necessary. */
27754 if (TARGET_ARM)
27756 /* In ARM mode, frame pointer points to first saved register.
27757 Restore stack pointer to last saved register. */
27758 amount = offsets->frame - offsets->saved_regs;
27760 /* Force out any pending memory operations that reference stacked data
27761 before stack de-allocation occurs. */
27762 emit_insn (gen_blockage ());
27763 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27764 hard_frame_pointer_rtx,
27765 GEN_INT (amount)));
27766 arm_add_cfa_adjust_cfa_note (insn, amount,
27767 stack_pointer_rtx,
27768 hard_frame_pointer_rtx);
27770 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27771 deleted. */
27772 emit_insn (gen_force_register_use (stack_pointer_rtx));
27774 else
27776 /* In Thumb-2 mode, the frame pointer points to the last saved
27777 register. */
27778 amount = offsets->locals_base - offsets->saved_regs;
27779 if (amount)
27781 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27782 hard_frame_pointer_rtx,
27783 GEN_INT (amount)));
27784 arm_add_cfa_adjust_cfa_note (insn, amount,
27785 hard_frame_pointer_rtx,
27786 hard_frame_pointer_rtx);
27789 /* Force out any pending memory operations that reference stacked data
27790 before stack de-allocation occurs. */
27791 emit_insn (gen_blockage ());
27792 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27793 hard_frame_pointer_rtx));
27794 arm_add_cfa_adjust_cfa_note (insn, 0,
27795 stack_pointer_rtx,
27796 hard_frame_pointer_rtx);
27797 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27798 deleted. */
27799 emit_insn (gen_force_register_use (stack_pointer_rtx));
27802 else
27804 /* Pop off outgoing args and local frame to adjust stack pointer to
27805 last saved register. */
27806 amount = offsets->outgoing_args - offsets->saved_regs;
27807 if (amount)
27809 rtx_insn *tmp;
27810 /* Force out any pending memory operations that reference stacked data
27811 before stack de-allocation occurs. */
27812 emit_insn (gen_blockage ());
27813 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27814 stack_pointer_rtx,
27815 GEN_INT (amount)));
27816 arm_add_cfa_adjust_cfa_note (tmp, amount,
27817 stack_pointer_rtx, stack_pointer_rtx);
27818 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27819 not deleted. */
27820 emit_insn (gen_force_register_use (stack_pointer_rtx));
27824 if (TARGET_HARD_FLOAT && TARGET_VFP)
27826 /* Generate VFP register multi-pop. */
27827 int end_reg = LAST_VFP_REGNUM + 1;
27829 /* Scan the registers in reverse order. We need to match
27830 any groupings made in the prologue and generate matching
27831 vldm operations. The need to match groups is because,
27832 unlike pop, vldm can only do consecutive regs. */
27833 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27834 /* Look for a case where a reg does not need restoring. */
27835 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27836 && (!df_regs_ever_live_p (i + 1)
27837 || call_used_regs[i + 1]))
27839 /* Restore the regs discovered so far (from reg+2 to
27840 end_reg). */
27841 if (end_reg > i + 2)
27842 arm_emit_vfp_multi_reg_pop (i + 2,
27843 (end_reg - (i + 2)) / 2,
27844 stack_pointer_rtx);
27845 end_reg = i;
27848 /* Restore the remaining regs that we have discovered (or possibly
27849 even all of them, if the conditional in the for loop never
27850 fired). */
27851 if (end_reg > i + 2)
27852 arm_emit_vfp_multi_reg_pop (i + 2,
27853 (end_reg - (i + 2)) / 2,
27854 stack_pointer_rtx);
27857 if (TARGET_IWMMXT)
27858 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27859 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27861 rtx_insn *insn;
27862 rtx addr = gen_rtx_MEM (V2SImode,
27863 gen_rtx_POST_INC (SImode,
27864 stack_pointer_rtx));
27865 set_mem_alias_set (addr, get_frame_alias_set ());
27866 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27867 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27868 gen_rtx_REG (V2SImode, i),
27869 NULL_RTX);
27870 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27871 stack_pointer_rtx, stack_pointer_rtx);
27874 if (saved_regs_mask)
27876 rtx insn;
27877 bool return_in_pc = false;
27879 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27880 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27881 && !IS_STACKALIGN (func_type)
27882 && really_return
27883 && crtl->args.pretend_args_size == 0
27884 && saved_regs_mask & (1 << LR_REGNUM)
27885 && !crtl->calls_eh_return)
27887 saved_regs_mask &= ~(1 << LR_REGNUM);
27888 saved_regs_mask |= (1 << PC_REGNUM);
27889 return_in_pc = true;
27892 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27894 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27895 if (saved_regs_mask & (1 << i))
27897 rtx addr = gen_rtx_MEM (SImode,
27898 gen_rtx_POST_INC (SImode,
27899 stack_pointer_rtx));
27900 set_mem_alias_set (addr, get_frame_alias_set ());
27902 if (i == PC_REGNUM)
27904 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27905 XVECEXP (insn, 0, 0) = ret_rtx;
27906 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27907 gen_rtx_REG (SImode, i),
27908 addr);
27909 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27910 insn = emit_jump_insn (insn);
27912 else
27914 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27915 addr));
27916 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27917 gen_rtx_REG (SImode, i),
27918 NULL_RTX);
27919 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27920 stack_pointer_rtx,
27921 stack_pointer_rtx);
27925 else
27927 if (TARGET_LDRD
27928 && current_tune->prefer_ldrd_strd
27929 && !optimize_function_for_size_p (cfun))
27931 if (TARGET_THUMB2)
27932 thumb2_emit_ldrd_pop (saved_regs_mask);
27933 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27934 arm_emit_ldrd_pop (saved_regs_mask);
27935 else
27936 arm_emit_multi_reg_pop (saved_regs_mask);
27938 else
27939 arm_emit_multi_reg_pop (saved_regs_mask);
27942 if (return_in_pc == true)
27943 return;
27946 if (crtl->args.pretend_args_size)
27948 int i, j;
27949 rtx dwarf = NULL_RTX;
27950 rtx_insn *tmp =
27951 emit_insn (gen_addsi3 (stack_pointer_rtx,
27952 stack_pointer_rtx,
27953 GEN_INT (crtl->args.pretend_args_size)));
27955 RTX_FRAME_RELATED_P (tmp) = 1;
27957 if (cfun->machine->uses_anonymous_args)
27959 /* Restore pretend args. Refer arm_expand_prologue on how to save
27960 pretend_args in stack. */
27961 int num_regs = crtl->args.pretend_args_size / 4;
27962 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27963 for (j = 0, i = 0; j < num_regs; i++)
27964 if (saved_regs_mask & (1 << i))
27966 rtx reg = gen_rtx_REG (SImode, i);
27967 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27968 j++;
27970 REG_NOTES (tmp) = dwarf;
27972 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27973 stack_pointer_rtx, stack_pointer_rtx);
27976 if (!really_return)
27977 return;
27979 if (crtl->calls_eh_return)
27980 emit_insn (gen_addsi3 (stack_pointer_rtx,
27981 stack_pointer_rtx,
27982 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27984 if (IS_STACKALIGN (func_type))
27985 /* Restore the original stack pointer. Before prologue, the stack was
27986 realigned and the original stack pointer saved in r0. For details,
27987 see comment in arm_expand_prologue. */
27988 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27990 emit_jump_insn (simple_return_rtx);
27993 /* Implementation of insn prologue_thumb1_interwork. This is the first
27994 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27996 const char *
27997 thumb1_output_interwork (void)
27999 const char * name;
28000 FILE *f = asm_out_file;
28002 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28003 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28004 == SYMBOL_REF);
28005 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28007 /* Generate code sequence to switch us into Thumb mode. */
28008 /* The .code 32 directive has already been emitted by
28009 ASM_DECLARE_FUNCTION_NAME. */
28010 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28011 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28013 /* Generate a label, so that the debugger will notice the
28014 change in instruction sets. This label is also used by
28015 the assembler to bypass the ARM code when this function
28016 is called from a Thumb encoded function elsewhere in the
28017 same file. Hence the definition of STUB_NAME here must
28018 agree with the definition in gas/config/tc-arm.c. */
28020 #define STUB_NAME ".real_start_of"
28022 fprintf (f, "\t.code\t16\n");
28023 #ifdef ARM_PE
28024 if (arm_dllexport_name_p (name))
28025 name = arm_strip_name_encoding (name);
28026 #endif
28027 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28028 fprintf (f, "\t.thumb_func\n");
28029 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28031 return "";
28034 /* Handle the case of a double word load into a low register from
28035 a computed memory address. The computed address may involve a
28036 register which is overwritten by the load. */
28037 const char *
28038 thumb_load_double_from_address (rtx *operands)
28040 rtx addr;
28041 rtx base;
28042 rtx offset;
28043 rtx arg1;
28044 rtx arg2;
28046 gcc_assert (REG_P (operands[0]));
28047 gcc_assert (MEM_P (operands[1]));
28049 /* Get the memory address. */
28050 addr = XEXP (operands[1], 0);
28052 /* Work out how the memory address is computed. */
28053 switch (GET_CODE (addr))
28055 case REG:
28056 operands[2] = adjust_address (operands[1], SImode, 4);
28058 if (REGNO (operands[0]) == REGNO (addr))
28060 output_asm_insn ("ldr\t%H0, %2", operands);
28061 output_asm_insn ("ldr\t%0, %1", operands);
28063 else
28065 output_asm_insn ("ldr\t%0, %1", operands);
28066 output_asm_insn ("ldr\t%H0, %2", operands);
28068 break;
28070 case CONST:
28071 /* Compute <address> + 4 for the high order load. */
28072 operands[2] = adjust_address (operands[1], SImode, 4);
28074 output_asm_insn ("ldr\t%0, %1", operands);
28075 output_asm_insn ("ldr\t%H0, %2", operands);
28076 break;
28078 case PLUS:
28079 arg1 = XEXP (addr, 0);
28080 arg2 = XEXP (addr, 1);
28082 if (CONSTANT_P (arg1))
28083 base = arg2, offset = arg1;
28084 else
28085 base = arg1, offset = arg2;
28087 gcc_assert (REG_P (base));
28089 /* Catch the case of <address> = <reg> + <reg> */
28090 if (REG_P (offset))
28092 int reg_offset = REGNO (offset);
28093 int reg_base = REGNO (base);
28094 int reg_dest = REGNO (operands[0]);
28096 /* Add the base and offset registers together into the
28097 higher destination register. */
28098 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28099 reg_dest + 1, reg_base, reg_offset);
28101 /* Load the lower destination register from the address in
28102 the higher destination register. */
28103 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28104 reg_dest, reg_dest + 1);
28106 /* Load the higher destination register from its own address
28107 plus 4. */
28108 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28109 reg_dest + 1, reg_dest + 1);
28111 else
28113 /* Compute <address> + 4 for the high order load. */
28114 operands[2] = adjust_address (operands[1], SImode, 4);
28116 /* If the computed address is held in the low order register
28117 then load the high order register first, otherwise always
28118 load the low order register first. */
28119 if (REGNO (operands[0]) == REGNO (base))
28121 output_asm_insn ("ldr\t%H0, %2", operands);
28122 output_asm_insn ("ldr\t%0, %1", operands);
28124 else
28126 output_asm_insn ("ldr\t%0, %1", operands);
28127 output_asm_insn ("ldr\t%H0, %2", operands);
28130 break;
28132 case LABEL_REF:
28133 /* With no registers to worry about we can just load the value
28134 directly. */
28135 operands[2] = adjust_address (operands[1], SImode, 4);
28137 output_asm_insn ("ldr\t%H0, %2", operands);
28138 output_asm_insn ("ldr\t%0, %1", operands);
28139 break;
28141 default:
28142 gcc_unreachable ();
28145 return "";
28148 const char *
28149 thumb_output_move_mem_multiple (int n, rtx *operands)
28151 rtx tmp;
28153 switch (n)
28155 case 2:
28156 if (REGNO (operands[4]) > REGNO (operands[5]))
28158 tmp = operands[4];
28159 operands[4] = operands[5];
28160 operands[5] = tmp;
28162 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28163 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28164 break;
28166 case 3:
28167 if (REGNO (operands[4]) > REGNO (operands[5]))
28169 tmp = operands[4];
28170 operands[4] = operands[5];
28171 operands[5] = tmp;
28173 if (REGNO (operands[5]) > REGNO (operands[6]))
28175 tmp = operands[5];
28176 operands[5] = operands[6];
28177 operands[6] = tmp;
28179 if (REGNO (operands[4]) > REGNO (operands[5]))
28181 tmp = operands[4];
28182 operands[4] = operands[5];
28183 operands[5] = tmp;
28186 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28187 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28188 break;
28190 default:
28191 gcc_unreachable ();
28194 return "";
28197 /* Output a call-via instruction for thumb state. */
28198 const char *
28199 thumb_call_via_reg (rtx reg)
28201 int regno = REGNO (reg);
28202 rtx *labelp;
28204 gcc_assert (regno < LR_REGNUM);
28206 /* If we are in the normal text section we can use a single instance
28207 per compilation unit. If we are doing function sections, then we need
28208 an entry per section, since we can't rely on reachability. */
28209 if (in_section == text_section)
28211 thumb_call_reg_needed = 1;
28213 if (thumb_call_via_label[regno] == NULL)
28214 thumb_call_via_label[regno] = gen_label_rtx ();
28215 labelp = thumb_call_via_label + regno;
28217 else
28219 if (cfun->machine->call_via[regno] == NULL)
28220 cfun->machine->call_via[regno] = gen_label_rtx ();
28221 labelp = cfun->machine->call_via + regno;
28224 output_asm_insn ("bl\t%a0", labelp);
28225 return "";
28228 /* Routines for generating rtl. */
28229 void
28230 thumb_expand_movmemqi (rtx *operands)
28232 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28233 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28234 HOST_WIDE_INT len = INTVAL (operands[2]);
28235 HOST_WIDE_INT offset = 0;
28237 while (len >= 12)
28239 emit_insn (gen_movmem12b (out, in, out, in));
28240 len -= 12;
28243 if (len >= 8)
28245 emit_insn (gen_movmem8b (out, in, out, in));
28246 len -= 8;
28249 if (len >= 4)
28251 rtx reg = gen_reg_rtx (SImode);
28252 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28253 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28254 len -= 4;
28255 offset += 4;
28258 if (len >= 2)
28260 rtx reg = gen_reg_rtx (HImode);
28261 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28262 plus_constant (Pmode, in,
28263 offset))));
28264 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28265 offset)),
28266 reg));
28267 len -= 2;
28268 offset += 2;
28271 if (len)
28273 rtx reg = gen_reg_rtx (QImode);
28274 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28275 plus_constant (Pmode, in,
28276 offset))));
28277 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28278 offset)),
28279 reg));
28283 void
28284 thumb_reload_out_hi (rtx *operands)
28286 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28289 /* Handle reading a half-word from memory during reload. */
28290 void
28291 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28293 gcc_unreachable ();
28296 /* Return the length of a function name prefix
28297 that starts with the character 'c'. */
28298 static int
28299 arm_get_strip_length (int c)
28301 switch (c)
28303 ARM_NAME_ENCODING_LENGTHS
28304 default: return 0;
28308 /* Return a pointer to a function's name with any
28309 and all prefix encodings stripped from it. */
28310 const char *
28311 arm_strip_name_encoding (const char *name)
28313 int skip;
28315 while ((skip = arm_get_strip_length (* name)))
28316 name += skip;
28318 return name;
28321 /* If there is a '*' anywhere in the name's prefix, then
28322 emit the stripped name verbatim, otherwise prepend an
28323 underscore if leading underscores are being used. */
28324 void
28325 arm_asm_output_labelref (FILE *stream, const char *name)
28327 int skip;
28328 int verbatim = 0;
28330 while ((skip = arm_get_strip_length (* name)))
28332 verbatim |= (*name == '*');
28333 name += skip;
28336 if (verbatim)
28337 fputs (name, stream);
28338 else
28339 asm_fprintf (stream, "%U%s", name);
28342 /* This function is used to emit an EABI tag and its associated value.
28343 We emit the numerical value of the tag in case the assembler does not
28344 support textual tags. (Eg gas prior to 2.20). If requested we include
28345 the tag name in a comment so that anyone reading the assembler output
28346 will know which tag is being set.
28348 This function is not static because arm-c.c needs it too. */
28350 void
28351 arm_emit_eabi_attribute (const char *name, int num, int val)
28353 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28354 if (flag_verbose_asm || flag_debug_asm)
28355 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28356 asm_fprintf (asm_out_file, "\n");
28359 static void
28360 arm_file_start (void)
28362 int val;
28364 if (TARGET_UNIFIED_ASM)
28365 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28367 if (TARGET_BPABI)
28369 const char *fpu_name;
28370 if (arm_selected_arch)
28372 /* armv7ve doesn't support any extensions. */
28373 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28375 /* Keep backward compatability for assemblers
28376 which don't support armv7ve. */
28377 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28378 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28379 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28380 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28381 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28383 else
28385 const char* pos = strchr (arm_selected_arch->name, '+');
28386 if (pos)
28388 char buf[15];
28389 gcc_assert (strlen (arm_selected_arch->name)
28390 <= sizeof (buf) / sizeof (*pos));
28391 strncpy (buf, arm_selected_arch->name,
28392 (pos - arm_selected_arch->name) * sizeof (*pos));
28393 buf[pos - arm_selected_arch->name] = '\0';
28394 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28395 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28397 else
28398 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28401 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28402 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28403 else
28405 const char* truncated_name
28406 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28407 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28410 if (TARGET_SOFT_FLOAT)
28412 fpu_name = "softvfp";
28414 else
28416 fpu_name = arm_fpu_desc->name;
28417 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28419 if (TARGET_HARD_FLOAT)
28420 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28421 if (TARGET_HARD_FLOAT_ABI)
28422 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28425 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28427 /* Some of these attributes only apply when the corresponding features
28428 are used. However we don't have any easy way of figuring this out.
28429 Conservatively record the setting that would have been used. */
28431 if (flag_rounding_math)
28432 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28434 if (!flag_unsafe_math_optimizations)
28436 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28437 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28439 if (flag_signaling_nans)
28440 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28442 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28443 flag_finite_math_only ? 1 : 3);
28445 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28446 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28447 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28448 flag_short_enums ? 1 : 2);
28450 /* Tag_ABI_optimization_goals. */
28451 if (optimize_size)
28452 val = 4;
28453 else if (optimize >= 2)
28454 val = 2;
28455 else if (optimize)
28456 val = 1;
28457 else
28458 val = 6;
28459 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28461 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28462 unaligned_access);
28464 if (arm_fp16_format)
28465 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28466 (int) arm_fp16_format);
28468 if (arm_lang_output_object_attributes_hook)
28469 arm_lang_output_object_attributes_hook();
28472 default_file_start ();
28475 static void
28476 arm_file_end (void)
28478 int regno;
28480 if (NEED_INDICATE_EXEC_STACK)
28481 /* Add .note.GNU-stack. */
28482 file_end_indicate_exec_stack ();
28484 if (! thumb_call_reg_needed)
28485 return;
28487 switch_to_section (text_section);
28488 asm_fprintf (asm_out_file, "\t.code 16\n");
28489 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28491 for (regno = 0; regno < LR_REGNUM; regno++)
28493 rtx label = thumb_call_via_label[regno];
28495 if (label != 0)
28497 targetm.asm_out.internal_label (asm_out_file, "L",
28498 CODE_LABEL_NUMBER (label));
28499 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28504 #ifndef ARM_PE
28505 /* Symbols in the text segment can be accessed without indirecting via the
28506 constant pool; it may take an extra binary operation, but this is still
28507 faster than indirecting via memory. Don't do this when not optimizing,
28508 since we won't be calculating al of the offsets necessary to do this
28509 simplification. */
28511 static void
28512 arm_encode_section_info (tree decl, rtx rtl, int first)
28514 if (optimize > 0 && TREE_CONSTANT (decl))
28515 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28517 default_encode_section_info (decl, rtl, first);
28519 #endif /* !ARM_PE */
28521 static void
28522 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28524 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28525 && !strcmp (prefix, "L"))
28527 arm_ccfsm_state = 0;
28528 arm_target_insn = NULL;
28530 default_internal_label (stream, prefix, labelno);
28533 /* Output code to add DELTA to the first argument, and then jump
28534 to FUNCTION. Used for C++ multiple inheritance. */
28535 static void
28536 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28537 HOST_WIDE_INT delta,
28538 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28539 tree function)
28541 static int thunk_label = 0;
28542 char label[256];
28543 char labelpc[256];
28544 int mi_delta = delta;
28545 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28546 int shift = 0;
28547 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28548 ? 1 : 0);
28549 if (mi_delta < 0)
28550 mi_delta = - mi_delta;
28552 final_start_function (emit_barrier (), file, 1);
28554 if (TARGET_THUMB1)
28556 int labelno = thunk_label++;
28557 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28558 /* Thunks are entered in arm mode when avaiable. */
28559 if (TARGET_THUMB1_ONLY)
28561 /* push r3 so we can use it as a temporary. */
28562 /* TODO: Omit this save if r3 is not used. */
28563 fputs ("\tpush {r3}\n", file);
28564 fputs ("\tldr\tr3, ", file);
28566 else
28568 fputs ("\tldr\tr12, ", file);
28570 assemble_name (file, label);
28571 fputc ('\n', file);
28572 if (flag_pic)
28574 /* If we are generating PIC, the ldr instruction below loads
28575 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28576 the address of the add + 8, so we have:
28578 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28579 = target + 1.
28581 Note that we have "+ 1" because some versions of GNU ld
28582 don't set the low bit of the result for R_ARM_REL32
28583 relocations against thumb function symbols.
28584 On ARMv6M this is +4, not +8. */
28585 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28586 assemble_name (file, labelpc);
28587 fputs (":\n", file);
28588 if (TARGET_THUMB1_ONLY)
28590 /* This is 2 insns after the start of the thunk, so we know it
28591 is 4-byte aligned. */
28592 fputs ("\tadd\tr3, pc, r3\n", file);
28593 fputs ("\tmov r12, r3\n", file);
28595 else
28596 fputs ("\tadd\tr12, pc, r12\n", file);
28598 else if (TARGET_THUMB1_ONLY)
28599 fputs ("\tmov r12, r3\n", file);
28601 if (TARGET_THUMB1_ONLY)
28603 if (mi_delta > 255)
28605 fputs ("\tldr\tr3, ", file);
28606 assemble_name (file, label);
28607 fputs ("+4\n", file);
28608 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28609 mi_op, this_regno, this_regno);
28611 else if (mi_delta != 0)
28613 /* Thumb1 unified syntax requires s suffix in instruction name when
28614 one of the operands is immediate. */
28615 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28616 mi_op, this_regno, this_regno,
28617 mi_delta);
28620 else
28622 /* TODO: Use movw/movt for large constants when available. */
28623 while (mi_delta != 0)
28625 if ((mi_delta & (3 << shift)) == 0)
28626 shift += 2;
28627 else
28629 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28630 mi_op, this_regno, this_regno,
28631 mi_delta & (0xff << shift));
28632 mi_delta &= ~(0xff << shift);
28633 shift += 8;
28637 if (TARGET_THUMB1)
28639 if (TARGET_THUMB1_ONLY)
28640 fputs ("\tpop\t{r3}\n", file);
28642 fprintf (file, "\tbx\tr12\n");
28643 ASM_OUTPUT_ALIGN (file, 2);
28644 assemble_name (file, label);
28645 fputs (":\n", file);
28646 if (flag_pic)
28648 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28649 rtx tem = XEXP (DECL_RTL (function), 0);
28650 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28651 pipeline offset is four rather than eight. Adjust the offset
28652 accordingly. */
28653 tem = plus_constant (GET_MODE (tem), tem,
28654 TARGET_THUMB1_ONLY ? -3 : -7);
28655 tem = gen_rtx_MINUS (GET_MODE (tem),
28656 tem,
28657 gen_rtx_SYMBOL_REF (Pmode,
28658 ggc_strdup (labelpc)));
28659 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28661 else
28662 /* Output ".word .LTHUNKn". */
28663 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28665 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28666 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28668 else
28670 fputs ("\tb\t", file);
28671 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28672 if (NEED_PLT_RELOC)
28673 fputs ("(PLT)", file);
28674 fputc ('\n', file);
28677 final_end_function ();
28681 arm_emit_vector_const (FILE *file, rtx x)
28683 int i;
28684 const char * pattern;
28686 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28688 switch (GET_MODE (x))
28690 case V2SImode: pattern = "%08x"; break;
28691 case V4HImode: pattern = "%04x"; break;
28692 case V8QImode: pattern = "%02x"; break;
28693 default: gcc_unreachable ();
28696 fprintf (file, "0x");
28697 for (i = CONST_VECTOR_NUNITS (x); i--;)
28699 rtx element;
28701 element = CONST_VECTOR_ELT (x, i);
28702 fprintf (file, pattern, INTVAL (element));
28705 return 1;
28708 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28709 HFmode constant pool entries are actually loaded with ldr. */
28710 void
28711 arm_emit_fp16_const (rtx c)
28713 REAL_VALUE_TYPE r;
28714 long bits;
28716 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28717 bits = real_to_target (NULL, &r, HFmode);
28718 if (WORDS_BIG_ENDIAN)
28719 assemble_zeros (2);
28720 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28721 if (!WORDS_BIG_ENDIAN)
28722 assemble_zeros (2);
28725 const char *
28726 arm_output_load_gr (rtx *operands)
28728 rtx reg;
28729 rtx offset;
28730 rtx wcgr;
28731 rtx sum;
28733 if (!MEM_P (operands [1])
28734 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28735 || !REG_P (reg = XEXP (sum, 0))
28736 || !CONST_INT_P (offset = XEXP (sum, 1))
28737 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28738 return "wldrw%?\t%0, %1";
28740 /* Fix up an out-of-range load of a GR register. */
28741 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28742 wcgr = operands[0];
28743 operands[0] = reg;
28744 output_asm_insn ("ldr%?\t%0, %1", operands);
28746 operands[0] = wcgr;
28747 operands[1] = reg;
28748 output_asm_insn ("tmcr%?\t%0, %1", operands);
28749 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28751 return "";
28754 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28756 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28757 named arg and all anonymous args onto the stack.
28758 XXX I know the prologue shouldn't be pushing registers, but it is faster
28759 that way. */
28761 static void
28762 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28763 machine_mode mode,
28764 tree type,
28765 int *pretend_size,
28766 int second_time ATTRIBUTE_UNUSED)
28768 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28769 int nregs;
28771 cfun->machine->uses_anonymous_args = 1;
28772 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28774 nregs = pcum->aapcs_ncrn;
28775 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28776 nregs++;
28778 else
28779 nregs = pcum->nregs;
28781 if (nregs < NUM_ARG_REGS)
28782 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28785 /* We can't rely on the caller doing the proper promotion when
28786 using APCS or ATPCS. */
28788 static bool
28789 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28791 return !TARGET_AAPCS_BASED;
28794 static machine_mode
28795 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28796 machine_mode mode,
28797 int *punsignedp ATTRIBUTE_UNUSED,
28798 const_tree fntype ATTRIBUTE_UNUSED,
28799 int for_return ATTRIBUTE_UNUSED)
28801 if (GET_MODE_CLASS (mode) == MODE_INT
28802 && GET_MODE_SIZE (mode) < 4)
28803 return SImode;
28805 return mode;
28808 /* AAPCS based ABIs use short enums by default. */
28810 static bool
28811 arm_default_short_enums (void)
28813 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28817 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28819 static bool
28820 arm_align_anon_bitfield (void)
28822 return TARGET_AAPCS_BASED;
28826 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28828 static tree
28829 arm_cxx_guard_type (void)
28831 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28835 /* The EABI says test the least significant bit of a guard variable. */
28837 static bool
28838 arm_cxx_guard_mask_bit (void)
28840 return TARGET_AAPCS_BASED;
28844 /* The EABI specifies that all array cookies are 8 bytes long. */
28846 static tree
28847 arm_get_cookie_size (tree type)
28849 tree size;
28851 if (!TARGET_AAPCS_BASED)
28852 return default_cxx_get_cookie_size (type);
28854 size = build_int_cst (sizetype, 8);
28855 return size;
28859 /* The EABI says that array cookies should also contain the element size. */
28861 static bool
28862 arm_cookie_has_size (void)
28864 return TARGET_AAPCS_BASED;
28868 /* The EABI says constructors and destructors should return a pointer to
28869 the object constructed/destroyed. */
28871 static bool
28872 arm_cxx_cdtor_returns_this (void)
28874 return TARGET_AAPCS_BASED;
28877 /* The EABI says that an inline function may never be the key
28878 method. */
28880 static bool
28881 arm_cxx_key_method_may_be_inline (void)
28883 return !TARGET_AAPCS_BASED;
28886 static void
28887 arm_cxx_determine_class_data_visibility (tree decl)
28889 if (!TARGET_AAPCS_BASED
28890 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28891 return;
28893 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28894 is exported. However, on systems without dynamic vague linkage,
28895 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28896 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28897 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28898 else
28899 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28900 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28903 static bool
28904 arm_cxx_class_data_always_comdat (void)
28906 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28907 vague linkage if the class has no key function. */
28908 return !TARGET_AAPCS_BASED;
28912 /* The EABI says __aeabi_atexit should be used to register static
28913 destructors. */
28915 static bool
28916 arm_cxx_use_aeabi_atexit (void)
28918 return TARGET_AAPCS_BASED;
28922 void
28923 arm_set_return_address (rtx source, rtx scratch)
28925 arm_stack_offsets *offsets;
28926 HOST_WIDE_INT delta;
28927 rtx addr;
28928 unsigned long saved_regs;
28930 offsets = arm_get_frame_offsets ();
28931 saved_regs = offsets->saved_regs_mask;
28933 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28934 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28935 else
28937 if (frame_pointer_needed)
28938 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28939 else
28941 /* LR will be the first saved register. */
28942 delta = offsets->outgoing_args - (offsets->frame + 4);
28945 if (delta >= 4096)
28947 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28948 GEN_INT (delta & ~4095)));
28949 addr = scratch;
28950 delta &= 4095;
28952 else
28953 addr = stack_pointer_rtx;
28955 addr = plus_constant (Pmode, addr, delta);
28957 /* The store needs to be marked as frame related in order to prevent
28958 DSE from deleting it as dead if it is based on fp. */
28959 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
28960 RTX_FRAME_RELATED_P (insn) = 1;
28961 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
28966 void
28967 thumb_set_return_address (rtx source, rtx scratch)
28969 arm_stack_offsets *offsets;
28970 HOST_WIDE_INT delta;
28971 HOST_WIDE_INT limit;
28972 int reg;
28973 rtx addr;
28974 unsigned long mask;
28976 emit_use (source);
28978 offsets = arm_get_frame_offsets ();
28979 mask = offsets->saved_regs_mask;
28980 if (mask & (1 << LR_REGNUM))
28982 limit = 1024;
28983 /* Find the saved regs. */
28984 if (frame_pointer_needed)
28986 delta = offsets->soft_frame - offsets->saved_args;
28987 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28988 if (TARGET_THUMB1)
28989 limit = 128;
28991 else
28993 delta = offsets->outgoing_args - offsets->saved_args;
28994 reg = SP_REGNUM;
28996 /* Allow for the stack frame. */
28997 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28998 delta -= 16;
28999 /* The link register is always the first saved register. */
29000 delta -= 4;
29002 /* Construct the address. */
29003 addr = gen_rtx_REG (SImode, reg);
29004 if (delta > limit)
29006 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29007 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29008 addr = scratch;
29010 else
29011 addr = plus_constant (Pmode, addr, delta);
29013 /* The store needs to be marked as frame related in order to prevent
29014 DSE from deleting it as dead if it is based on fp. */
29015 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29016 RTX_FRAME_RELATED_P (insn) = 1;
29017 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29019 else
29020 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29023 /* Implements target hook vector_mode_supported_p. */
29024 bool
29025 arm_vector_mode_supported_p (machine_mode mode)
29027 /* Neon also supports V2SImode, etc. listed in the clause below. */
29028 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29029 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29030 return true;
29032 if ((TARGET_NEON || TARGET_IWMMXT)
29033 && ((mode == V2SImode)
29034 || (mode == V4HImode)
29035 || (mode == V8QImode)))
29036 return true;
29038 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29039 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29040 || mode == V2HAmode))
29041 return true;
29043 return false;
29046 /* Implements target hook array_mode_supported_p. */
29048 static bool
29049 arm_array_mode_supported_p (machine_mode mode,
29050 unsigned HOST_WIDE_INT nelems)
29052 if (TARGET_NEON
29053 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29054 && (nelems >= 2 && nelems <= 4))
29055 return true;
29057 return false;
29060 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29061 registers when autovectorizing for Neon, at least until multiple vector
29062 widths are supported properly by the middle-end. */
29064 static machine_mode
29065 arm_preferred_simd_mode (machine_mode mode)
29067 if (TARGET_NEON)
29068 switch (mode)
29070 case SFmode:
29071 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29072 case SImode:
29073 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29074 case HImode:
29075 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29076 case QImode:
29077 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29078 case DImode:
29079 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29080 return V2DImode;
29081 break;
29083 default:;
29086 if (TARGET_REALLY_IWMMXT)
29087 switch (mode)
29089 case SImode:
29090 return V2SImode;
29091 case HImode:
29092 return V4HImode;
29093 case QImode:
29094 return V8QImode;
29096 default:;
29099 return word_mode;
29102 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29104 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29105 using r0-r4 for function arguments, r7 for the stack frame and don't have
29106 enough left over to do doubleword arithmetic. For Thumb-2 all the
29107 potentially problematic instructions accept high registers so this is not
29108 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29109 that require many low registers. */
29110 static bool
29111 arm_class_likely_spilled_p (reg_class_t rclass)
29113 if ((TARGET_THUMB1 && rclass == LO_REGS)
29114 || rclass == CC_REG)
29115 return true;
29117 return false;
29120 /* Implements target hook small_register_classes_for_mode_p. */
29121 bool
29122 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29124 return TARGET_THUMB1;
29127 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29128 ARM insns and therefore guarantee that the shift count is modulo 256.
29129 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29130 guarantee no particular behavior for out-of-range counts. */
29132 static unsigned HOST_WIDE_INT
29133 arm_shift_truncation_mask (machine_mode mode)
29135 return mode == SImode ? 255 : 0;
29139 /* Map internal gcc register numbers to DWARF2 register numbers. */
29141 unsigned int
29142 arm_dbx_register_number (unsigned int regno)
29144 if (regno < 16)
29145 return regno;
29147 if (IS_VFP_REGNUM (regno))
29149 /* See comment in arm_dwarf_register_span. */
29150 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29151 return 64 + regno - FIRST_VFP_REGNUM;
29152 else
29153 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29156 if (IS_IWMMXT_GR_REGNUM (regno))
29157 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29159 if (IS_IWMMXT_REGNUM (regno))
29160 return 112 + regno - FIRST_IWMMXT_REGNUM;
29162 gcc_unreachable ();
29165 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29166 GCC models tham as 64 32-bit registers, so we need to describe this to
29167 the DWARF generation code. Other registers can use the default. */
29168 static rtx
29169 arm_dwarf_register_span (rtx rtl)
29171 machine_mode mode;
29172 unsigned regno;
29173 rtx parts[16];
29174 int nregs;
29175 int i;
29177 regno = REGNO (rtl);
29178 if (!IS_VFP_REGNUM (regno))
29179 return NULL_RTX;
29181 /* XXX FIXME: The EABI defines two VFP register ranges:
29182 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29183 256-287: D0-D31
29184 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29185 corresponding D register. Until GDB supports this, we shall use the
29186 legacy encodings. We also use these encodings for D0-D15 for
29187 compatibility with older debuggers. */
29188 mode = GET_MODE (rtl);
29189 if (GET_MODE_SIZE (mode) < 8)
29190 return NULL_RTX;
29192 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29194 nregs = GET_MODE_SIZE (mode) / 4;
29195 for (i = 0; i < nregs; i += 2)
29196 if (TARGET_BIG_END)
29198 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29199 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29201 else
29203 parts[i] = gen_rtx_REG (SImode, regno + i);
29204 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29207 else
29209 nregs = GET_MODE_SIZE (mode) / 8;
29210 for (i = 0; i < nregs; i++)
29211 parts[i] = gen_rtx_REG (DImode, regno + i);
29214 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29217 #if ARM_UNWIND_INFO
29218 /* Emit unwind directives for a store-multiple instruction or stack pointer
29219 push during alignment.
29220 These should only ever be generated by the function prologue code, so
29221 expect them to have a particular form.
29222 The store-multiple instruction sometimes pushes pc as the last register,
29223 although it should not be tracked into unwind information, or for -Os
29224 sometimes pushes some dummy registers before first register that needs
29225 to be tracked in unwind information; such dummy registers are there just
29226 to avoid separate stack adjustment, and will not be restored in the
29227 epilogue. */
29229 static void
29230 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29232 int i;
29233 HOST_WIDE_INT offset;
29234 HOST_WIDE_INT nregs;
29235 int reg_size;
29236 unsigned reg;
29237 unsigned lastreg;
29238 unsigned padfirst = 0, padlast = 0;
29239 rtx e;
29241 e = XVECEXP (p, 0, 0);
29242 gcc_assert (GET_CODE (e) == SET);
29244 /* First insn will adjust the stack pointer. */
29245 gcc_assert (GET_CODE (e) == SET
29246 && REG_P (SET_DEST (e))
29247 && REGNO (SET_DEST (e)) == SP_REGNUM
29248 && GET_CODE (SET_SRC (e)) == PLUS);
29250 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29251 nregs = XVECLEN (p, 0) - 1;
29252 gcc_assert (nregs);
29254 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29255 if (reg < 16)
29257 /* For -Os dummy registers can be pushed at the beginning to
29258 avoid separate stack pointer adjustment. */
29259 e = XVECEXP (p, 0, 1);
29260 e = XEXP (SET_DEST (e), 0);
29261 if (GET_CODE (e) == PLUS)
29262 padfirst = INTVAL (XEXP (e, 1));
29263 gcc_assert (padfirst == 0 || optimize_size);
29264 /* The function prologue may also push pc, but not annotate it as it is
29265 never restored. We turn this into a stack pointer adjustment. */
29266 e = XVECEXP (p, 0, nregs);
29267 e = XEXP (SET_DEST (e), 0);
29268 if (GET_CODE (e) == PLUS)
29269 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29270 else
29271 padlast = offset - 4;
29272 gcc_assert (padlast == 0 || padlast == 4);
29273 if (padlast == 4)
29274 fprintf (asm_out_file, "\t.pad #4\n");
29275 reg_size = 4;
29276 fprintf (asm_out_file, "\t.save {");
29278 else if (IS_VFP_REGNUM (reg))
29280 reg_size = 8;
29281 fprintf (asm_out_file, "\t.vsave {");
29283 else
29284 /* Unknown register type. */
29285 gcc_unreachable ();
29287 /* If the stack increment doesn't match the size of the saved registers,
29288 something has gone horribly wrong. */
29289 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29291 offset = padfirst;
29292 lastreg = 0;
29293 /* The remaining insns will describe the stores. */
29294 for (i = 1; i <= nregs; i++)
29296 /* Expect (set (mem <addr>) (reg)).
29297 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29298 e = XVECEXP (p, 0, i);
29299 gcc_assert (GET_CODE (e) == SET
29300 && MEM_P (SET_DEST (e))
29301 && REG_P (SET_SRC (e)));
29303 reg = REGNO (SET_SRC (e));
29304 gcc_assert (reg >= lastreg);
29306 if (i != 1)
29307 fprintf (asm_out_file, ", ");
29308 /* We can't use %r for vfp because we need to use the
29309 double precision register names. */
29310 if (IS_VFP_REGNUM (reg))
29311 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29312 else
29313 asm_fprintf (asm_out_file, "%r", reg);
29315 #ifdef ENABLE_CHECKING
29316 /* Check that the addresses are consecutive. */
29317 e = XEXP (SET_DEST (e), 0);
29318 if (GET_CODE (e) == PLUS)
29319 gcc_assert (REG_P (XEXP (e, 0))
29320 && REGNO (XEXP (e, 0)) == SP_REGNUM
29321 && CONST_INT_P (XEXP (e, 1))
29322 && offset == INTVAL (XEXP (e, 1)));
29323 else
29324 gcc_assert (i == 1
29325 && REG_P (e)
29326 && REGNO (e) == SP_REGNUM);
29327 offset += reg_size;
29328 #endif
29330 fprintf (asm_out_file, "}\n");
29331 if (padfirst)
29332 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29335 /* Emit unwind directives for a SET. */
29337 static void
29338 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29340 rtx e0;
29341 rtx e1;
29342 unsigned reg;
29344 e0 = XEXP (p, 0);
29345 e1 = XEXP (p, 1);
29346 switch (GET_CODE (e0))
29348 case MEM:
29349 /* Pushing a single register. */
29350 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29351 || !REG_P (XEXP (XEXP (e0, 0), 0))
29352 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29353 abort ();
29355 asm_fprintf (asm_out_file, "\t.save ");
29356 if (IS_VFP_REGNUM (REGNO (e1)))
29357 asm_fprintf(asm_out_file, "{d%d}\n",
29358 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29359 else
29360 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29361 break;
29363 case REG:
29364 if (REGNO (e0) == SP_REGNUM)
29366 /* A stack increment. */
29367 if (GET_CODE (e1) != PLUS
29368 || !REG_P (XEXP (e1, 0))
29369 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29370 || !CONST_INT_P (XEXP (e1, 1)))
29371 abort ();
29373 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29374 -INTVAL (XEXP (e1, 1)));
29376 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29378 HOST_WIDE_INT offset;
29380 if (GET_CODE (e1) == PLUS)
29382 if (!REG_P (XEXP (e1, 0))
29383 || !CONST_INT_P (XEXP (e1, 1)))
29384 abort ();
29385 reg = REGNO (XEXP (e1, 0));
29386 offset = INTVAL (XEXP (e1, 1));
29387 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29388 HARD_FRAME_POINTER_REGNUM, reg,
29389 offset);
29391 else if (REG_P (e1))
29393 reg = REGNO (e1);
29394 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29395 HARD_FRAME_POINTER_REGNUM, reg);
29397 else
29398 abort ();
29400 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29402 /* Move from sp to reg. */
29403 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29405 else if (GET_CODE (e1) == PLUS
29406 && REG_P (XEXP (e1, 0))
29407 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29408 && CONST_INT_P (XEXP (e1, 1)))
29410 /* Set reg to offset from sp. */
29411 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29412 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29414 else
29415 abort ();
29416 break;
29418 default:
29419 abort ();
29424 /* Emit unwind directives for the given insn. */
29426 static void
29427 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29429 rtx note, pat;
29430 bool handled_one = false;
29432 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29433 return;
29435 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29436 && (TREE_NOTHROW (current_function_decl)
29437 || crtl->all_throwers_are_sibcalls))
29438 return;
29440 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29441 return;
29443 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29445 switch (REG_NOTE_KIND (note))
29447 case REG_FRAME_RELATED_EXPR:
29448 pat = XEXP (note, 0);
29449 goto found;
29451 case REG_CFA_REGISTER:
29452 pat = XEXP (note, 0);
29453 if (pat == NULL)
29455 pat = PATTERN (insn);
29456 if (GET_CODE (pat) == PARALLEL)
29457 pat = XVECEXP (pat, 0, 0);
29460 /* Only emitted for IS_STACKALIGN re-alignment. */
29462 rtx dest, src;
29463 unsigned reg;
29465 src = SET_SRC (pat);
29466 dest = SET_DEST (pat);
29468 gcc_assert (src == stack_pointer_rtx);
29469 reg = REGNO (dest);
29470 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29471 reg + 0x90, reg);
29473 handled_one = true;
29474 break;
29476 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29477 to get correct dwarf information for shrink-wrap. We should not
29478 emit unwind information for it because these are used either for
29479 pretend arguments or notes to adjust sp and restore registers from
29480 stack. */
29481 case REG_CFA_DEF_CFA:
29482 case REG_CFA_ADJUST_CFA:
29483 case REG_CFA_RESTORE:
29484 return;
29486 case REG_CFA_EXPRESSION:
29487 case REG_CFA_OFFSET:
29488 /* ??? Only handling here what we actually emit. */
29489 gcc_unreachable ();
29491 default:
29492 break;
29495 if (handled_one)
29496 return;
29497 pat = PATTERN (insn);
29498 found:
29500 switch (GET_CODE (pat))
29502 case SET:
29503 arm_unwind_emit_set (asm_out_file, pat);
29504 break;
29506 case SEQUENCE:
29507 /* Store multiple. */
29508 arm_unwind_emit_sequence (asm_out_file, pat);
29509 break;
29511 default:
29512 abort();
29517 /* Output a reference from a function exception table to the type_info
29518 object X. The EABI specifies that the symbol should be relocated by
29519 an R_ARM_TARGET2 relocation. */
29521 static bool
29522 arm_output_ttype (rtx x)
29524 fputs ("\t.word\t", asm_out_file);
29525 output_addr_const (asm_out_file, x);
29526 /* Use special relocations for symbol references. */
29527 if (!CONST_INT_P (x))
29528 fputs ("(TARGET2)", asm_out_file);
29529 fputc ('\n', asm_out_file);
29531 return TRUE;
29534 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29536 static void
29537 arm_asm_emit_except_personality (rtx personality)
29539 fputs ("\t.personality\t", asm_out_file);
29540 output_addr_const (asm_out_file, personality);
29541 fputc ('\n', asm_out_file);
29544 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29546 static void
29547 arm_asm_init_sections (void)
29549 exception_section = get_unnamed_section (0, output_section_asm_op,
29550 "\t.handlerdata");
29552 #endif /* ARM_UNWIND_INFO */
29554 /* Output unwind directives for the start/end of a function. */
29556 void
29557 arm_output_fn_unwind (FILE * f, bool prologue)
29559 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29560 return;
29562 if (prologue)
29563 fputs ("\t.fnstart\n", f);
29564 else
29566 /* If this function will never be unwound, then mark it as such.
29567 The came condition is used in arm_unwind_emit to suppress
29568 the frame annotations. */
29569 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29570 && (TREE_NOTHROW (current_function_decl)
29571 || crtl->all_throwers_are_sibcalls))
29572 fputs("\t.cantunwind\n", f);
29574 fputs ("\t.fnend\n", f);
29578 static bool
29579 arm_emit_tls_decoration (FILE *fp, rtx x)
29581 enum tls_reloc reloc;
29582 rtx val;
29584 val = XVECEXP (x, 0, 0);
29585 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29587 output_addr_const (fp, val);
29589 switch (reloc)
29591 case TLS_GD32:
29592 fputs ("(tlsgd)", fp);
29593 break;
29594 case TLS_LDM32:
29595 fputs ("(tlsldm)", fp);
29596 break;
29597 case TLS_LDO32:
29598 fputs ("(tlsldo)", fp);
29599 break;
29600 case TLS_IE32:
29601 fputs ("(gottpoff)", fp);
29602 break;
29603 case TLS_LE32:
29604 fputs ("(tpoff)", fp);
29605 break;
29606 case TLS_DESCSEQ:
29607 fputs ("(tlsdesc)", fp);
29608 break;
29609 default:
29610 gcc_unreachable ();
29613 switch (reloc)
29615 case TLS_GD32:
29616 case TLS_LDM32:
29617 case TLS_IE32:
29618 case TLS_DESCSEQ:
29619 fputs (" + (. - ", fp);
29620 output_addr_const (fp, XVECEXP (x, 0, 2));
29621 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29622 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29623 output_addr_const (fp, XVECEXP (x, 0, 3));
29624 fputc (')', fp);
29625 break;
29626 default:
29627 break;
29630 return TRUE;
29633 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29635 static void
29636 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29638 gcc_assert (size == 4);
29639 fputs ("\t.word\t", file);
29640 output_addr_const (file, x);
29641 fputs ("(tlsldo)", file);
29644 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29646 static bool
29647 arm_output_addr_const_extra (FILE *fp, rtx x)
29649 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29650 return arm_emit_tls_decoration (fp, x);
29651 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29653 char label[256];
29654 int labelno = INTVAL (XVECEXP (x, 0, 0));
29656 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29657 assemble_name_raw (fp, label);
29659 return TRUE;
29661 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29663 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29664 if (GOT_PCREL)
29665 fputs ("+.", fp);
29666 fputs ("-(", fp);
29667 output_addr_const (fp, XVECEXP (x, 0, 0));
29668 fputc (')', fp);
29669 return TRUE;
29671 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29673 output_addr_const (fp, XVECEXP (x, 0, 0));
29674 if (GOT_PCREL)
29675 fputs ("+.", fp);
29676 fputs ("-(", fp);
29677 output_addr_const (fp, XVECEXP (x, 0, 1));
29678 fputc (')', fp);
29679 return TRUE;
29681 else if (GET_CODE (x) == CONST_VECTOR)
29682 return arm_emit_vector_const (fp, x);
29684 return FALSE;
29687 /* Output assembly for a shift instruction.
29688 SET_FLAGS determines how the instruction modifies the condition codes.
29689 0 - Do not set condition codes.
29690 1 - Set condition codes.
29691 2 - Use smallest instruction. */
29692 const char *
29693 arm_output_shift(rtx * operands, int set_flags)
29695 char pattern[100];
29696 static const char flag_chars[3] = {'?', '.', '!'};
29697 const char *shift;
29698 HOST_WIDE_INT val;
29699 char c;
29701 c = flag_chars[set_flags];
29702 if (TARGET_UNIFIED_ASM)
29704 shift = shift_op(operands[3], &val);
29705 if (shift)
29707 if (val != -1)
29708 operands[2] = GEN_INT(val);
29709 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29711 else
29712 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29714 else
29715 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29716 output_asm_insn (pattern, operands);
29717 return "";
29720 /* Output assembly for a WMMX immediate shift instruction. */
29721 const char *
29722 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29724 int shift = INTVAL (operands[2]);
29725 char templ[50];
29726 machine_mode opmode = GET_MODE (operands[0]);
29728 gcc_assert (shift >= 0);
29730 /* If the shift value in the register versions is > 63 (for D qualifier),
29731 31 (for W qualifier) or 15 (for H qualifier). */
29732 if (((opmode == V4HImode) && (shift > 15))
29733 || ((opmode == V2SImode) && (shift > 31))
29734 || ((opmode == DImode) && (shift > 63)))
29736 if (wror_or_wsra)
29738 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29739 output_asm_insn (templ, operands);
29740 if (opmode == DImode)
29742 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29743 output_asm_insn (templ, operands);
29746 else
29748 /* The destination register will contain all zeros. */
29749 sprintf (templ, "wzero\t%%0");
29750 output_asm_insn (templ, operands);
29752 return "";
29755 if ((opmode == DImode) && (shift > 32))
29757 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29758 output_asm_insn (templ, operands);
29759 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29760 output_asm_insn (templ, operands);
29762 else
29764 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29765 output_asm_insn (templ, operands);
29767 return "";
29770 /* Output assembly for a WMMX tinsr instruction. */
29771 const char *
29772 arm_output_iwmmxt_tinsr (rtx *operands)
29774 int mask = INTVAL (operands[3]);
29775 int i;
29776 char templ[50];
29777 int units = mode_nunits[GET_MODE (operands[0])];
29778 gcc_assert ((mask & (mask - 1)) == 0);
29779 for (i = 0; i < units; ++i)
29781 if ((mask & 0x01) == 1)
29783 break;
29785 mask >>= 1;
29787 gcc_assert (i < units);
29789 switch (GET_MODE (operands[0]))
29791 case V8QImode:
29792 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29793 break;
29794 case V4HImode:
29795 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29796 break;
29797 case V2SImode:
29798 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29799 break;
29800 default:
29801 gcc_unreachable ();
29802 break;
29804 output_asm_insn (templ, operands);
29806 return "";
29809 /* Output a Thumb-1 casesi dispatch sequence. */
29810 const char *
29811 thumb1_output_casesi (rtx *operands)
29813 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29815 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29817 switch (GET_MODE(diff_vec))
29819 case QImode:
29820 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29821 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29822 case HImode:
29823 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29824 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29825 case SImode:
29826 return "bl\t%___gnu_thumb1_case_si";
29827 default:
29828 gcc_unreachable ();
29832 /* Output a Thumb-2 casesi instruction. */
29833 const char *
29834 thumb2_output_casesi (rtx *operands)
29836 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29838 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29840 output_asm_insn ("cmp\t%0, %1", operands);
29841 output_asm_insn ("bhi\t%l3", operands);
29842 switch (GET_MODE(diff_vec))
29844 case QImode:
29845 return "tbb\t[%|pc, %0]";
29846 case HImode:
29847 return "tbh\t[%|pc, %0, lsl #1]";
29848 case SImode:
29849 if (flag_pic)
29851 output_asm_insn ("adr\t%4, %l2", operands);
29852 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29853 output_asm_insn ("add\t%4, %4, %5", operands);
29854 return "bx\t%4";
29856 else
29858 output_asm_insn ("adr\t%4, %l2", operands);
29859 return "ldr\t%|pc, [%4, %0, lsl #2]";
29861 default:
29862 gcc_unreachable ();
29866 /* Most ARM cores are single issue, but some newer ones can dual issue.
29867 The scheduler descriptions rely on this being correct. */
29868 static int
29869 arm_issue_rate (void)
29871 switch (arm_tune)
29873 case cortexa15:
29874 case cortexa57:
29875 return 3;
29877 case cortexm7:
29878 case cortexr4:
29879 case cortexr4f:
29880 case cortexr5:
29881 case genericv7a:
29882 case cortexa5:
29883 case cortexa7:
29884 case cortexa8:
29885 case cortexa9:
29886 case cortexa12:
29887 case cortexa53:
29888 case fa726te:
29889 case marvell_pj4:
29890 return 2;
29892 default:
29893 return 1;
29897 /* A table and a function to perform ARM-specific name mangling for
29898 NEON vector types in order to conform to the AAPCS (see "Procedure
29899 Call Standard for the ARM Architecture", Appendix A). To qualify
29900 for emission with the mangled names defined in that document, a
29901 vector type must not only be of the correct mode but also be
29902 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29903 typedef struct
29905 machine_mode mode;
29906 const char *element_type_name;
29907 const char *aapcs_name;
29908 } arm_mangle_map_entry;
29910 static arm_mangle_map_entry arm_mangle_map[] = {
29911 /* 64-bit containerized types. */
29912 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29913 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29914 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29915 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29916 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29917 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29918 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29919 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29920 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29921 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29923 /* 128-bit containerized types. */
29924 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29925 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29926 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29927 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29928 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29929 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29930 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29931 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29932 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29933 { VOIDmode, NULL, NULL }
29936 const char *
29937 arm_mangle_type (const_tree type)
29939 arm_mangle_map_entry *pos = arm_mangle_map;
29941 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29942 has to be managled as if it is in the "std" namespace. */
29943 if (TARGET_AAPCS_BASED
29944 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29945 return "St9__va_list";
29947 /* Half-precision float. */
29948 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29949 return "Dh";
29951 if (TREE_CODE (type) != VECTOR_TYPE)
29952 return NULL;
29954 /* Check the mode of the vector type, and the name of the vector
29955 element type, against the table. */
29956 while (pos->mode != VOIDmode)
29958 tree elt_type = TREE_TYPE (type);
29960 if (pos->mode == TYPE_MODE (type)
29961 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29962 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29963 pos->element_type_name))
29964 return pos->aapcs_name;
29966 pos++;
29969 /* Use the default mangling for unrecognized (possibly user-defined)
29970 vector types. */
29971 return NULL;
29974 /* Order of allocation of core registers for Thumb: this allocation is
29975 written over the corresponding initial entries of the array
29976 initialized with REG_ALLOC_ORDER. We allocate all low registers
29977 first. Saving and restoring a low register is usually cheaper than
29978 using a call-clobbered high register. */
29980 static const int thumb_core_reg_alloc_order[] =
29982 3, 2, 1, 0, 4, 5, 6, 7,
29983 14, 12, 8, 9, 10, 11
29986 /* Adjust register allocation order when compiling for Thumb. */
29988 void
29989 arm_order_regs_for_local_alloc (void)
29991 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29992 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29993 if (TARGET_THUMB)
29994 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29995 sizeof (thumb_core_reg_alloc_order));
29998 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30000 bool
30001 arm_frame_pointer_required (void)
30003 return (cfun->has_nonlocal_label
30004 || SUBTARGET_FRAME_POINTER_REQUIRED
30005 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30008 /* Only thumb1 can't support conditional execution, so return true if
30009 the target is not thumb1. */
30010 static bool
30011 arm_have_conditional_execution (void)
30013 return !TARGET_THUMB1;
30016 tree
30017 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30019 machine_mode in_mode, out_mode;
30020 int in_n, out_n;
30021 bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30023 if (TREE_CODE (type_out) != VECTOR_TYPE
30024 || TREE_CODE (type_in) != VECTOR_TYPE)
30025 return NULL_TREE;
30027 out_mode = TYPE_MODE (TREE_TYPE (type_out));
30028 out_n = TYPE_VECTOR_SUBPARTS (type_out);
30029 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30030 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30032 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30033 decl of the vectorized builtin for the appropriate vector mode.
30034 NULL_TREE is returned if no such builtin is available. */
30035 #undef ARM_CHECK_BUILTIN_MODE
30036 #define ARM_CHECK_BUILTIN_MODE(C) \
30037 (TARGET_NEON && TARGET_FPU_ARMV8 \
30038 && flag_unsafe_math_optimizations \
30039 && ARM_CHECK_BUILTIN_MODE_1 (C))
30041 #undef ARM_CHECK_BUILTIN_MODE_1
30042 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30043 (out_mode == SFmode && out_n == C \
30044 && in_mode == SFmode && in_n == C)
30046 #undef ARM_FIND_VRINT_VARIANT
30047 #define ARM_FIND_VRINT_VARIANT(N) \
30048 (ARM_CHECK_BUILTIN_MODE (2) \
30049 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30050 : (ARM_CHECK_BUILTIN_MODE (4) \
30051 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30052 : NULL_TREE))
30054 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30056 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30057 switch (fn)
30059 case BUILT_IN_FLOORF:
30060 return ARM_FIND_VRINT_VARIANT (vrintm);
30061 case BUILT_IN_CEILF:
30062 return ARM_FIND_VRINT_VARIANT (vrintp);
30063 case BUILT_IN_TRUNCF:
30064 return ARM_FIND_VRINT_VARIANT (vrintz);
30065 case BUILT_IN_ROUNDF:
30066 return ARM_FIND_VRINT_VARIANT (vrinta);
30067 #undef ARM_CHECK_BUILTIN_MODE_1
30068 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30069 (out_mode == SImode && out_n == C \
30070 && in_mode == SFmode && in_n == C)
30072 #define ARM_FIND_VCVT_VARIANT(N) \
30073 (ARM_CHECK_BUILTIN_MODE (2) \
30074 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30075 : (ARM_CHECK_BUILTIN_MODE (4) \
30076 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30077 : NULL_TREE))
30079 #define ARM_FIND_VCVTU_VARIANT(N) \
30080 (ARM_CHECK_BUILTIN_MODE (2) \
30081 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30082 : (ARM_CHECK_BUILTIN_MODE (4) \
30083 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30084 : NULL_TREE))
30085 case BUILT_IN_LROUNDF:
30086 return out_unsigned_p
30087 ? ARM_FIND_VCVTU_VARIANT (vcvta)
30088 : ARM_FIND_VCVT_VARIANT (vcvta);
30089 case BUILT_IN_LCEILF:
30090 return out_unsigned_p
30091 ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30092 : ARM_FIND_VCVT_VARIANT (vcvtp);
30093 case BUILT_IN_LFLOORF:
30094 return out_unsigned_p
30095 ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30096 : ARM_FIND_VCVT_VARIANT (vcvtm);
30097 #undef ARM_CHECK_BUILTIN_MODE
30098 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30099 (out_mode == N##mode && out_n == C \
30100 && in_mode == N##mode && in_n == C)
30101 case BUILT_IN_BSWAP16:
30102 if (ARM_CHECK_BUILTIN_MODE (4, HI))
30103 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30104 else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30105 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30106 else
30107 return NULL_TREE;
30108 case BUILT_IN_BSWAP32:
30109 if (ARM_CHECK_BUILTIN_MODE (2, SI))
30110 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30111 else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30112 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30113 else
30114 return NULL_TREE;
30115 case BUILT_IN_BSWAP64:
30116 if (ARM_CHECK_BUILTIN_MODE (2, DI))
30117 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30118 else
30119 return NULL_TREE;
30120 case BUILT_IN_COPYSIGNF:
30121 if (ARM_CHECK_BUILTIN_MODE (2, SF))
30122 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30123 else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30124 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30125 else
30126 return NULL_TREE;
30128 default:
30129 return NULL_TREE;
30132 return NULL_TREE;
30134 #undef ARM_FIND_VCVT_VARIANT
30135 #undef ARM_FIND_VCVTU_VARIANT
30136 #undef ARM_CHECK_BUILTIN_MODE
30137 #undef ARM_FIND_VRINT_VARIANT
30140 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30141 static HOST_WIDE_INT
30142 arm_vector_alignment (const_tree type)
30144 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30146 if (TARGET_AAPCS_BASED)
30147 align = MIN (align, 64);
30149 return align;
30152 static unsigned int
30153 arm_autovectorize_vector_sizes (void)
30155 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30158 static bool
30159 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30161 /* Vectors which aren't in packed structures will not be less aligned than
30162 the natural alignment of their element type, so this is safe. */
30163 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30164 return !is_packed;
30166 return default_builtin_vector_alignment_reachable (type, is_packed);
30169 static bool
30170 arm_builtin_support_vector_misalignment (machine_mode mode,
30171 const_tree type, int misalignment,
30172 bool is_packed)
30174 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30176 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30178 if (is_packed)
30179 return align == 1;
30181 /* If the misalignment is unknown, we should be able to handle the access
30182 so long as it is not to a member of a packed data structure. */
30183 if (misalignment == -1)
30184 return true;
30186 /* Return true if the misalignment is a multiple of the natural alignment
30187 of the vector's element type. This is probably always going to be
30188 true in practice, since we've already established that this isn't a
30189 packed access. */
30190 return ((misalignment % align) == 0);
30193 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30194 is_packed);
30197 static void
30198 arm_conditional_register_usage (void)
30200 int regno;
30202 if (TARGET_THUMB1 && optimize_size)
30204 /* When optimizing for size on Thumb-1, it's better not
30205 to use the HI regs, because of the overhead of
30206 stacking them. */
30207 for (regno = FIRST_HI_REGNUM;
30208 regno <= LAST_HI_REGNUM; ++regno)
30209 fixed_regs[regno] = call_used_regs[regno] = 1;
30212 /* The link register can be clobbered by any branch insn,
30213 but we have no way to track that at present, so mark
30214 it as unavailable. */
30215 if (TARGET_THUMB1)
30216 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30218 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30220 /* VFPv3 registers are disabled when earlier VFP
30221 versions are selected due to the definition of
30222 LAST_VFP_REGNUM. */
30223 for (regno = FIRST_VFP_REGNUM;
30224 regno <= LAST_VFP_REGNUM; ++ regno)
30226 fixed_regs[regno] = 0;
30227 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30228 || regno >= FIRST_VFP_REGNUM + 32;
30232 if (TARGET_REALLY_IWMMXT)
30234 regno = FIRST_IWMMXT_GR_REGNUM;
30235 /* The 2002/10/09 revision of the XScale ABI has wCG0
30236 and wCG1 as call-preserved registers. The 2002/11/21
30237 revision changed this so that all wCG registers are
30238 scratch registers. */
30239 for (regno = FIRST_IWMMXT_GR_REGNUM;
30240 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30241 fixed_regs[regno] = 0;
30242 /* The XScale ABI has wR0 - wR9 as scratch registers,
30243 the rest as call-preserved registers. */
30244 for (regno = FIRST_IWMMXT_REGNUM;
30245 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30247 fixed_regs[regno] = 0;
30248 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30252 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30254 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30255 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30257 else if (TARGET_APCS_STACK)
30259 fixed_regs[10] = 1;
30260 call_used_regs[10] = 1;
30262 /* -mcaller-super-interworking reserves r11 for calls to
30263 _interwork_r11_call_via_rN(). Making the register global
30264 is an easy way of ensuring that it remains valid for all
30265 calls. */
30266 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30267 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30269 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30270 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30271 if (TARGET_CALLER_INTERWORKING)
30272 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30274 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30277 static reg_class_t
30278 arm_preferred_rename_class (reg_class_t rclass)
30280 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30281 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30282 and code size can be reduced. */
30283 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30284 return LO_REGS;
30285 else
30286 return NO_REGS;
30289 /* Compute the atrribute "length" of insn "*push_multi".
30290 So this function MUST be kept in sync with that insn pattern. */
30292 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30294 int i, regno, hi_reg;
30295 int num_saves = XVECLEN (parallel_op, 0);
30297 /* ARM mode. */
30298 if (TARGET_ARM)
30299 return 4;
30300 /* Thumb1 mode. */
30301 if (TARGET_THUMB1)
30302 return 2;
30304 /* Thumb2 mode. */
30305 regno = REGNO (first_op);
30306 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30307 for (i = 1; i < num_saves && !hi_reg; i++)
30309 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30310 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30313 if (!hi_reg)
30314 return 2;
30315 return 4;
30318 /* Compute the number of instructions emitted by output_move_double. */
30320 arm_count_output_move_double_insns (rtx *operands)
30322 int count;
30323 rtx ops[2];
30324 /* output_move_double may modify the operands array, so call it
30325 here on a copy of the array. */
30326 ops[0] = operands[0];
30327 ops[1] = operands[1];
30328 output_move_double (ops, false, &count);
30329 return count;
30333 vfp3_const_double_for_fract_bits (rtx operand)
30335 REAL_VALUE_TYPE r0;
30337 if (!CONST_DOUBLE_P (operand))
30338 return 0;
30340 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30341 if (exact_real_inverse (DFmode, &r0))
30343 if (exact_real_truncate (DFmode, &r0))
30345 HOST_WIDE_INT value = real_to_integer (&r0);
30346 value = value & 0xffffffff;
30347 if ((value != 0) && ( (value & (value - 1)) == 0))
30348 return int_log2 (value);
30351 return 0;
30355 vfp3_const_double_for_bits (rtx operand)
30357 REAL_VALUE_TYPE r0;
30359 if (!CONST_DOUBLE_P (operand))
30360 return 0;
30362 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30363 if (exact_real_truncate (DFmode, &r0))
30365 HOST_WIDE_INT value = real_to_integer (&r0);
30366 value = value & 0xffffffff;
30367 if ((value != 0) && ( (value & (value - 1)) == 0))
30368 return int_log2 (value);
30371 return 0;
30374 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30376 static void
30377 arm_pre_atomic_barrier (enum memmodel model)
30379 if (need_atomic_barrier_p (model, true))
30380 emit_insn (gen_memory_barrier ());
30383 static void
30384 arm_post_atomic_barrier (enum memmodel model)
30386 if (need_atomic_barrier_p (model, false))
30387 emit_insn (gen_memory_barrier ());
30390 /* Emit the load-exclusive and store-exclusive instructions.
30391 Use acquire and release versions if necessary. */
30393 static void
30394 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30396 rtx (*gen) (rtx, rtx);
30398 if (acq)
30400 switch (mode)
30402 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30403 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30404 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30405 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30406 default:
30407 gcc_unreachable ();
30410 else
30412 switch (mode)
30414 case QImode: gen = gen_arm_load_exclusiveqi; break;
30415 case HImode: gen = gen_arm_load_exclusivehi; break;
30416 case SImode: gen = gen_arm_load_exclusivesi; break;
30417 case DImode: gen = gen_arm_load_exclusivedi; break;
30418 default:
30419 gcc_unreachable ();
30423 emit_insn (gen (rval, mem));
30426 static void
30427 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30428 rtx mem, bool rel)
30430 rtx (*gen) (rtx, rtx, rtx);
30432 if (rel)
30434 switch (mode)
30436 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30437 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30438 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30439 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30440 default:
30441 gcc_unreachable ();
30444 else
30446 switch (mode)
30448 case QImode: gen = gen_arm_store_exclusiveqi; break;
30449 case HImode: gen = gen_arm_store_exclusivehi; break;
30450 case SImode: gen = gen_arm_store_exclusivesi; break;
30451 case DImode: gen = gen_arm_store_exclusivedi; break;
30452 default:
30453 gcc_unreachable ();
30457 emit_insn (gen (bval, rval, mem));
30460 /* Mark the previous jump instruction as unlikely. */
30462 static void
30463 emit_unlikely_jump (rtx insn)
30465 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30467 insn = emit_jump_insn (insn);
30468 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30471 /* Expand a compare and swap pattern. */
30473 void
30474 arm_expand_compare_and_swap (rtx operands[])
30476 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30477 machine_mode mode;
30478 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30480 bval = operands[0];
30481 rval = operands[1];
30482 mem = operands[2];
30483 oldval = operands[3];
30484 newval = operands[4];
30485 is_weak = operands[5];
30486 mod_s = operands[6];
30487 mod_f = operands[7];
30488 mode = GET_MODE (mem);
30490 /* Normally the succ memory model must be stronger than fail, but in the
30491 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30492 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30494 if (TARGET_HAVE_LDACQ
30495 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30496 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30497 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30499 switch (mode)
30501 case QImode:
30502 case HImode:
30503 /* For narrow modes, we're going to perform the comparison in SImode,
30504 so do the zero-extension now. */
30505 rval = gen_reg_rtx (SImode);
30506 oldval = convert_modes (SImode, mode, oldval, true);
30507 /* FALLTHRU */
30509 case SImode:
30510 /* Force the value into a register if needed. We waited until after
30511 the zero-extension above to do this properly. */
30512 if (!arm_add_operand (oldval, SImode))
30513 oldval = force_reg (SImode, oldval);
30514 break;
30516 case DImode:
30517 if (!cmpdi_operand (oldval, mode))
30518 oldval = force_reg (mode, oldval);
30519 break;
30521 default:
30522 gcc_unreachable ();
30525 switch (mode)
30527 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30528 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30529 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30530 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30531 default:
30532 gcc_unreachable ();
30535 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30537 if (mode == QImode || mode == HImode)
30538 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30540 /* In all cases, we arrange for success to be signaled by Z set.
30541 This arrangement allows for the boolean result to be used directly
30542 in a subsequent branch, post optimization. */
30543 x = gen_rtx_REG (CCmode, CC_REGNUM);
30544 x = gen_rtx_EQ (SImode, x, const0_rtx);
30545 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30548 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30549 another memory store between the load-exclusive and store-exclusive can
30550 reset the monitor from Exclusive to Open state. This means we must wait
30551 until after reload to split the pattern, lest we get a register spill in
30552 the middle of the atomic sequence. */
30554 void
30555 arm_split_compare_and_swap (rtx operands[])
30557 rtx rval, mem, oldval, newval, scratch;
30558 machine_mode mode;
30559 enum memmodel mod_s, mod_f;
30560 bool is_weak;
30561 rtx_code_label *label1, *label2;
30562 rtx x, cond;
30564 rval = operands[0];
30565 mem = operands[1];
30566 oldval = operands[2];
30567 newval = operands[3];
30568 is_weak = (operands[4] != const0_rtx);
30569 mod_s = (enum memmodel) INTVAL (operands[5]);
30570 mod_f = (enum memmodel) INTVAL (operands[6]);
30571 scratch = operands[7];
30572 mode = GET_MODE (mem);
30574 bool use_acquire = TARGET_HAVE_LDACQ
30575 && !(mod_s == MEMMODEL_RELAXED
30576 || mod_s == MEMMODEL_CONSUME
30577 || mod_s == MEMMODEL_RELEASE);
30579 bool use_release = TARGET_HAVE_LDACQ
30580 && !(mod_s == MEMMODEL_RELAXED
30581 || mod_s == MEMMODEL_CONSUME
30582 || mod_s == MEMMODEL_ACQUIRE);
30584 /* Checks whether a barrier is needed and emits one accordingly. */
30585 if (!(use_acquire || use_release))
30586 arm_pre_atomic_barrier (mod_s);
30588 label1 = NULL;
30589 if (!is_weak)
30591 label1 = gen_label_rtx ();
30592 emit_label (label1);
30594 label2 = gen_label_rtx ();
30596 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30598 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30599 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30600 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30601 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30602 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30604 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30606 /* Weak or strong, we want EQ to be true for success, so that we
30607 match the flags that we got from the compare above. */
30608 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30609 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30610 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30612 if (!is_weak)
30614 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30615 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30616 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30617 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30620 if (mod_f != MEMMODEL_RELAXED)
30621 emit_label (label2);
30623 /* Checks whether a barrier is needed and emits one accordingly. */
30624 if (!(use_acquire || use_release))
30625 arm_post_atomic_barrier (mod_s);
30627 if (mod_f == MEMMODEL_RELAXED)
30628 emit_label (label2);
30631 void
30632 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30633 rtx value, rtx model_rtx, rtx cond)
30635 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30636 machine_mode mode = GET_MODE (mem);
30637 machine_mode wmode = (mode == DImode ? DImode : SImode);
30638 rtx_code_label *label;
30639 rtx x;
30641 bool use_acquire = TARGET_HAVE_LDACQ
30642 && !(model == MEMMODEL_RELAXED
30643 || model == MEMMODEL_CONSUME
30644 || model == MEMMODEL_RELEASE);
30646 bool use_release = TARGET_HAVE_LDACQ
30647 && !(model == MEMMODEL_RELAXED
30648 || model == MEMMODEL_CONSUME
30649 || model == MEMMODEL_ACQUIRE);
30651 /* Checks whether a barrier is needed and emits one accordingly. */
30652 if (!(use_acquire || use_release))
30653 arm_pre_atomic_barrier (model);
30655 label = gen_label_rtx ();
30656 emit_label (label);
30658 if (new_out)
30659 new_out = gen_lowpart (wmode, new_out);
30660 if (old_out)
30661 old_out = gen_lowpart (wmode, old_out);
30662 else
30663 old_out = new_out;
30664 value = simplify_gen_subreg (wmode, value, mode, 0);
30666 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30668 switch (code)
30670 case SET:
30671 new_out = value;
30672 break;
30674 case NOT:
30675 x = gen_rtx_AND (wmode, old_out, value);
30676 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30677 x = gen_rtx_NOT (wmode, new_out);
30678 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30679 break;
30681 case MINUS:
30682 if (CONST_INT_P (value))
30684 value = GEN_INT (-INTVAL (value));
30685 code = PLUS;
30687 /* FALLTHRU */
30689 case PLUS:
30690 if (mode == DImode)
30692 /* DImode plus/minus need to clobber flags. */
30693 /* The adddi3 and subdi3 patterns are incorrectly written so that
30694 they require matching operands, even when we could easily support
30695 three operands. Thankfully, this can be fixed up post-splitting,
30696 as the individual add+adc patterns do accept three operands and
30697 post-reload cprop can make these moves go away. */
30698 emit_move_insn (new_out, old_out);
30699 if (code == PLUS)
30700 x = gen_adddi3 (new_out, new_out, value);
30701 else
30702 x = gen_subdi3 (new_out, new_out, value);
30703 emit_insn (x);
30704 break;
30706 /* FALLTHRU */
30708 default:
30709 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30710 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30711 break;
30714 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30715 use_release);
30717 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30718 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30720 /* Checks whether a barrier is needed and emits one accordingly. */
30721 if (!(use_acquire || use_release))
30722 arm_post_atomic_barrier (model);
30725 #define MAX_VECT_LEN 16
30727 struct expand_vec_perm_d
30729 rtx target, op0, op1;
30730 unsigned char perm[MAX_VECT_LEN];
30731 machine_mode vmode;
30732 unsigned char nelt;
30733 bool one_vector_p;
30734 bool testing_p;
30737 /* Generate a variable permutation. */
30739 static void
30740 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30742 machine_mode vmode = GET_MODE (target);
30743 bool one_vector_p = rtx_equal_p (op0, op1);
30745 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30746 gcc_checking_assert (GET_MODE (op0) == vmode);
30747 gcc_checking_assert (GET_MODE (op1) == vmode);
30748 gcc_checking_assert (GET_MODE (sel) == vmode);
30749 gcc_checking_assert (TARGET_NEON);
30751 if (one_vector_p)
30753 if (vmode == V8QImode)
30754 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30755 else
30756 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30758 else
30760 rtx pair;
30762 if (vmode == V8QImode)
30764 pair = gen_reg_rtx (V16QImode);
30765 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30766 pair = gen_lowpart (TImode, pair);
30767 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30769 else
30771 pair = gen_reg_rtx (OImode);
30772 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30773 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30778 void
30779 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30781 machine_mode vmode = GET_MODE (target);
30782 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30783 bool one_vector_p = rtx_equal_p (op0, op1);
30784 rtx rmask[MAX_VECT_LEN], mask;
30786 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30787 numbering of elements for big-endian, we must reverse the order. */
30788 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30790 /* The VTBL instruction does not use a modulo index, so we must take care
30791 of that ourselves. */
30792 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30793 for (i = 0; i < nelt; ++i)
30794 rmask[i] = mask;
30795 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30796 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30798 arm_expand_vec_perm_1 (target, op0, op1, sel);
30801 /* Generate or test for an insn that supports a constant permutation. */
30803 /* Recognize patterns for the VUZP insns. */
30805 static bool
30806 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30808 unsigned int i, odd, mask, nelt = d->nelt;
30809 rtx out0, out1, in0, in1, x;
30810 rtx (*gen)(rtx, rtx, rtx, rtx);
30812 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30813 return false;
30815 /* Note that these are little-endian tests. Adjust for big-endian later. */
30816 if (d->perm[0] == 0)
30817 odd = 0;
30818 else if (d->perm[0] == 1)
30819 odd = 1;
30820 else
30821 return false;
30822 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30824 for (i = 0; i < nelt; i++)
30826 unsigned elt = (i * 2 + odd) & mask;
30827 if (d->perm[i] != elt)
30828 return false;
30831 /* Success! */
30832 if (d->testing_p)
30833 return true;
30835 switch (d->vmode)
30837 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30838 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30839 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30840 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30841 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30842 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30843 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30844 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30845 default:
30846 gcc_unreachable ();
30849 in0 = d->op0;
30850 in1 = d->op1;
30851 if (BYTES_BIG_ENDIAN)
30853 x = in0, in0 = in1, in1 = x;
30854 odd = !odd;
30857 out0 = d->target;
30858 out1 = gen_reg_rtx (d->vmode);
30859 if (odd)
30860 x = out0, out0 = out1, out1 = x;
30862 emit_insn (gen (out0, in0, in1, out1));
30863 return true;
30866 /* Recognize patterns for the VZIP insns. */
30868 static bool
30869 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30871 unsigned int i, high, mask, nelt = d->nelt;
30872 rtx out0, out1, in0, in1, x;
30873 rtx (*gen)(rtx, rtx, rtx, rtx);
30875 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30876 return false;
30878 /* Note that these are little-endian tests. Adjust for big-endian later. */
30879 high = nelt / 2;
30880 if (d->perm[0] == high)
30882 else if (d->perm[0] == 0)
30883 high = 0;
30884 else
30885 return false;
30886 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30888 for (i = 0; i < nelt / 2; i++)
30890 unsigned elt = (i + high) & mask;
30891 if (d->perm[i * 2] != elt)
30892 return false;
30893 elt = (elt + nelt) & mask;
30894 if (d->perm[i * 2 + 1] != elt)
30895 return false;
30898 /* Success! */
30899 if (d->testing_p)
30900 return true;
30902 switch (d->vmode)
30904 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30905 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30906 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30907 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30908 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30909 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30910 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30911 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30912 default:
30913 gcc_unreachable ();
30916 in0 = d->op0;
30917 in1 = d->op1;
30918 if (BYTES_BIG_ENDIAN)
30920 x = in0, in0 = in1, in1 = x;
30921 high = !high;
30924 out0 = d->target;
30925 out1 = gen_reg_rtx (d->vmode);
30926 if (high)
30927 x = out0, out0 = out1, out1 = x;
30929 emit_insn (gen (out0, in0, in1, out1));
30930 return true;
30933 /* Recognize patterns for the VREV insns. */
30935 static bool
30936 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30938 unsigned int i, j, diff, nelt = d->nelt;
30939 rtx (*gen)(rtx, rtx);
30941 if (!d->one_vector_p)
30942 return false;
30944 diff = d->perm[0];
30945 switch (diff)
30947 case 7:
30948 switch (d->vmode)
30950 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30951 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30952 default:
30953 return false;
30955 break;
30956 case 3:
30957 switch (d->vmode)
30959 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30960 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30961 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30962 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30963 default:
30964 return false;
30966 break;
30967 case 1:
30968 switch (d->vmode)
30970 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30971 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30972 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30973 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30974 case V4SImode: gen = gen_neon_vrev64v4si; break;
30975 case V2SImode: gen = gen_neon_vrev64v2si; break;
30976 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30977 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30978 default:
30979 return false;
30981 break;
30982 default:
30983 return false;
30986 for (i = 0; i < nelt ; i += diff + 1)
30987 for (j = 0; j <= diff; j += 1)
30989 /* This is guaranteed to be true as the value of diff
30990 is 7, 3, 1 and we should have enough elements in the
30991 queue to generate this. Getting a vector mask with a
30992 value of diff other than these values implies that
30993 something is wrong by the time we get here. */
30994 gcc_assert (i + j < nelt);
30995 if (d->perm[i + j] != i + diff - j)
30996 return false;
30999 /* Success! */
31000 if (d->testing_p)
31001 return true;
31003 emit_insn (gen (d->target, d->op0));
31004 return true;
31007 /* Recognize patterns for the VTRN insns. */
31009 static bool
31010 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31012 unsigned int i, odd, mask, nelt = d->nelt;
31013 rtx out0, out1, in0, in1, x;
31014 rtx (*gen)(rtx, rtx, rtx, rtx);
31016 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31017 return false;
31019 /* Note that these are little-endian tests. Adjust for big-endian later. */
31020 if (d->perm[0] == 0)
31021 odd = 0;
31022 else if (d->perm[0] == 1)
31023 odd = 1;
31024 else
31025 return false;
31026 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31028 for (i = 0; i < nelt; i += 2)
31030 if (d->perm[i] != i + odd)
31031 return false;
31032 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31033 return false;
31036 /* Success! */
31037 if (d->testing_p)
31038 return true;
31040 switch (d->vmode)
31042 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31043 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
31044 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
31045 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
31046 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
31047 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
31048 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
31049 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
31050 default:
31051 gcc_unreachable ();
31054 in0 = d->op0;
31055 in1 = d->op1;
31056 if (BYTES_BIG_ENDIAN)
31058 x = in0, in0 = in1, in1 = x;
31059 odd = !odd;
31062 out0 = d->target;
31063 out1 = gen_reg_rtx (d->vmode);
31064 if (odd)
31065 x = out0, out0 = out1, out1 = x;
31067 emit_insn (gen (out0, in0, in1, out1));
31068 return true;
31071 /* Recognize patterns for the VEXT insns. */
31073 static bool
31074 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31076 unsigned int i, nelt = d->nelt;
31077 rtx (*gen) (rtx, rtx, rtx, rtx);
31078 rtx offset;
31080 unsigned int location;
31082 unsigned int next = d->perm[0] + 1;
31084 /* TODO: Handle GCC's numbering of elements for big-endian. */
31085 if (BYTES_BIG_ENDIAN)
31086 return false;
31088 /* Check if the extracted indexes are increasing by one. */
31089 for (i = 1; i < nelt; next++, i++)
31091 /* If we hit the most significant element of the 2nd vector in
31092 the previous iteration, no need to test further. */
31093 if (next == 2 * nelt)
31094 return false;
31096 /* If we are operating on only one vector: it could be a
31097 rotation. If there are only two elements of size < 64, let
31098 arm_evpc_neon_vrev catch it. */
31099 if (d->one_vector_p && (next == nelt))
31101 if ((nelt == 2) && (d->vmode != V2DImode))
31102 return false;
31103 else
31104 next = 0;
31107 if (d->perm[i] != next)
31108 return false;
31111 location = d->perm[0];
31113 switch (d->vmode)
31115 case V16QImode: gen = gen_neon_vextv16qi; break;
31116 case V8QImode: gen = gen_neon_vextv8qi; break;
31117 case V4HImode: gen = gen_neon_vextv4hi; break;
31118 case V8HImode: gen = gen_neon_vextv8hi; break;
31119 case V2SImode: gen = gen_neon_vextv2si; break;
31120 case V4SImode: gen = gen_neon_vextv4si; break;
31121 case V2SFmode: gen = gen_neon_vextv2sf; break;
31122 case V4SFmode: gen = gen_neon_vextv4sf; break;
31123 case V2DImode: gen = gen_neon_vextv2di; break;
31124 default:
31125 return false;
31128 /* Success! */
31129 if (d->testing_p)
31130 return true;
31132 offset = GEN_INT (location);
31133 emit_insn (gen (d->target, d->op0, d->op1, offset));
31134 return true;
31137 /* The NEON VTBL instruction is a fully variable permuation that's even
31138 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31139 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31140 can do slightly better by expanding this as a constant where we don't
31141 have to apply a mask. */
31143 static bool
31144 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31146 rtx rperm[MAX_VECT_LEN], sel;
31147 machine_mode vmode = d->vmode;
31148 unsigned int i, nelt = d->nelt;
31150 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31151 numbering of elements for big-endian, we must reverse the order. */
31152 if (BYTES_BIG_ENDIAN)
31153 return false;
31155 if (d->testing_p)
31156 return true;
31158 /* Generic code will try constant permutation twice. Once with the
31159 original mode and again with the elements lowered to QImode.
31160 So wait and don't do the selector expansion ourselves. */
31161 if (vmode != V8QImode && vmode != V16QImode)
31162 return false;
31164 for (i = 0; i < nelt; ++i)
31165 rperm[i] = GEN_INT (d->perm[i]);
31166 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31167 sel = force_reg (vmode, sel);
31169 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31170 return true;
31173 static bool
31174 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31176 /* Check if the input mask matches vext before reordering the
31177 operands. */
31178 if (TARGET_NEON)
31179 if (arm_evpc_neon_vext (d))
31180 return true;
31182 /* The pattern matching functions above are written to look for a small
31183 number to begin the sequence (0, 1, N/2). If we begin with an index
31184 from the second operand, we can swap the operands. */
31185 if (d->perm[0] >= d->nelt)
31187 unsigned i, nelt = d->nelt;
31188 rtx x;
31190 for (i = 0; i < nelt; ++i)
31191 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31193 x = d->op0;
31194 d->op0 = d->op1;
31195 d->op1 = x;
31198 if (TARGET_NEON)
31200 if (arm_evpc_neon_vuzp (d))
31201 return true;
31202 if (arm_evpc_neon_vzip (d))
31203 return true;
31204 if (arm_evpc_neon_vrev (d))
31205 return true;
31206 if (arm_evpc_neon_vtrn (d))
31207 return true;
31208 return arm_evpc_neon_vtbl (d);
31210 return false;
31213 /* Expand a vec_perm_const pattern. */
31215 bool
31216 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31218 struct expand_vec_perm_d d;
31219 int i, nelt, which;
31221 d.target = target;
31222 d.op0 = op0;
31223 d.op1 = op1;
31225 d.vmode = GET_MODE (target);
31226 gcc_assert (VECTOR_MODE_P (d.vmode));
31227 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31228 d.testing_p = false;
31230 for (i = which = 0; i < nelt; ++i)
31232 rtx e = XVECEXP (sel, 0, i);
31233 int ei = INTVAL (e) & (2 * nelt - 1);
31234 which |= (ei < nelt ? 1 : 2);
31235 d.perm[i] = ei;
31238 switch (which)
31240 default:
31241 gcc_unreachable();
31243 case 3:
31244 d.one_vector_p = false;
31245 if (!rtx_equal_p (op0, op1))
31246 break;
31248 /* The elements of PERM do not suggest that only the first operand
31249 is used, but both operands are identical. Allow easier matching
31250 of the permutation by folding the permutation into the single
31251 input vector. */
31252 /* FALLTHRU */
31253 case 2:
31254 for (i = 0; i < nelt; ++i)
31255 d.perm[i] &= nelt - 1;
31256 d.op0 = op1;
31257 d.one_vector_p = true;
31258 break;
31260 case 1:
31261 d.op1 = op0;
31262 d.one_vector_p = true;
31263 break;
31266 return arm_expand_vec_perm_const_1 (&d);
31269 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31271 static bool
31272 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31273 const unsigned char *sel)
31275 struct expand_vec_perm_d d;
31276 unsigned int i, nelt, which;
31277 bool ret;
31279 d.vmode = vmode;
31280 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31281 d.testing_p = true;
31282 memcpy (d.perm, sel, nelt);
31284 /* Categorize the set of elements in the selector. */
31285 for (i = which = 0; i < nelt; ++i)
31287 unsigned char e = d.perm[i];
31288 gcc_assert (e < 2 * nelt);
31289 which |= (e < nelt ? 1 : 2);
31292 /* For all elements from second vector, fold the elements to first. */
31293 if (which == 2)
31294 for (i = 0; i < nelt; ++i)
31295 d.perm[i] -= nelt;
31297 /* Check whether the mask can be applied to the vector type. */
31298 d.one_vector_p = (which != 3);
31300 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31301 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31302 if (!d.one_vector_p)
31303 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31305 start_sequence ();
31306 ret = arm_expand_vec_perm_const_1 (&d);
31307 end_sequence ();
31309 return ret;
31312 bool
31313 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31315 /* If we are soft float and we do not have ldrd
31316 then all auto increment forms are ok. */
31317 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31318 return true;
31320 switch (code)
31322 /* Post increment and Pre Decrement are supported for all
31323 instruction forms except for vector forms. */
31324 case ARM_POST_INC:
31325 case ARM_PRE_DEC:
31326 if (VECTOR_MODE_P (mode))
31328 if (code != ARM_PRE_DEC)
31329 return true;
31330 else
31331 return false;
31334 return true;
31336 case ARM_POST_DEC:
31337 case ARM_PRE_INC:
31338 /* Without LDRD and mode size greater than
31339 word size, there is no point in auto-incrementing
31340 because ldm and stm will not have these forms. */
31341 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31342 return false;
31344 /* Vector and floating point modes do not support
31345 these auto increment forms. */
31346 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31347 return false;
31349 return true;
31351 default:
31352 return false;
31356 return false;
31359 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31360 on ARM, since we know that shifts by negative amounts are no-ops.
31361 Additionally, the default expansion code is not available or suitable
31362 for post-reload insn splits (this can occur when the register allocator
31363 chooses not to do a shift in NEON).
31365 This function is used in both initial expand and post-reload splits, and
31366 handles all kinds of 64-bit shifts.
31368 Input requirements:
31369 - It is safe for the input and output to be the same register, but
31370 early-clobber rules apply for the shift amount and scratch registers.
31371 - Shift by register requires both scratch registers. In all other cases
31372 the scratch registers may be NULL.
31373 - Ashiftrt by a register also clobbers the CC register. */
31374 void
31375 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31376 rtx amount, rtx scratch1, rtx scratch2)
31378 rtx out_high = gen_highpart (SImode, out);
31379 rtx out_low = gen_lowpart (SImode, out);
31380 rtx in_high = gen_highpart (SImode, in);
31381 rtx in_low = gen_lowpart (SImode, in);
31383 /* Terminology:
31384 in = the register pair containing the input value.
31385 out = the destination register pair.
31386 up = the high- or low-part of each pair.
31387 down = the opposite part to "up".
31388 In a shift, we can consider bits to shift from "up"-stream to
31389 "down"-stream, so in a left-shift "up" is the low-part and "down"
31390 is the high-part of each register pair. */
31392 rtx out_up = code == ASHIFT ? out_low : out_high;
31393 rtx out_down = code == ASHIFT ? out_high : out_low;
31394 rtx in_up = code == ASHIFT ? in_low : in_high;
31395 rtx in_down = code == ASHIFT ? in_high : in_low;
31397 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31398 gcc_assert (out
31399 && (REG_P (out) || GET_CODE (out) == SUBREG)
31400 && GET_MODE (out) == DImode);
31401 gcc_assert (in
31402 && (REG_P (in) || GET_CODE (in) == SUBREG)
31403 && GET_MODE (in) == DImode);
31404 gcc_assert (amount
31405 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31406 && GET_MODE (amount) == SImode)
31407 || CONST_INT_P (amount)));
31408 gcc_assert (scratch1 == NULL
31409 || (GET_CODE (scratch1) == SCRATCH)
31410 || (GET_MODE (scratch1) == SImode
31411 && REG_P (scratch1)));
31412 gcc_assert (scratch2 == NULL
31413 || (GET_CODE (scratch2) == SCRATCH)
31414 || (GET_MODE (scratch2) == SImode
31415 && REG_P (scratch2)));
31416 gcc_assert (!REG_P (out) || !REG_P (amount)
31417 || !HARD_REGISTER_P (out)
31418 || (REGNO (out) != REGNO (amount)
31419 && REGNO (out) + 1 != REGNO (amount)));
31421 /* Macros to make following code more readable. */
31422 #define SUB_32(DEST,SRC) \
31423 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31424 #define RSB_32(DEST,SRC) \
31425 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31426 #define SUB_S_32(DEST,SRC) \
31427 gen_addsi3_compare0 ((DEST), (SRC), \
31428 GEN_INT (-32))
31429 #define SET(DEST,SRC) \
31430 gen_rtx_SET (SImode, (DEST), (SRC))
31431 #define SHIFT(CODE,SRC,AMOUNT) \
31432 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31433 #define LSHIFT(CODE,SRC,AMOUNT) \
31434 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31435 SImode, (SRC), (AMOUNT))
31436 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31437 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31438 SImode, (SRC), (AMOUNT))
31439 #define ORR(A,B) \
31440 gen_rtx_IOR (SImode, (A), (B))
31441 #define BRANCH(COND,LABEL) \
31442 gen_arm_cond_branch ((LABEL), \
31443 gen_rtx_ ## COND (CCmode, cc_reg, \
31444 const0_rtx), \
31445 cc_reg)
31447 /* Shifts by register and shifts by constant are handled separately. */
31448 if (CONST_INT_P (amount))
31450 /* We have a shift-by-constant. */
31452 /* First, handle out-of-range shift amounts.
31453 In both cases we try to match the result an ARM instruction in a
31454 shift-by-register would give. This helps reduce execution
31455 differences between optimization levels, but it won't stop other
31456 parts of the compiler doing different things. This is "undefined
31457 behaviour, in any case. */
31458 if (INTVAL (amount) <= 0)
31459 emit_insn (gen_movdi (out, in));
31460 else if (INTVAL (amount) >= 64)
31462 if (code == ASHIFTRT)
31464 rtx const31_rtx = GEN_INT (31);
31465 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31466 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31468 else
31469 emit_insn (gen_movdi (out, const0_rtx));
31472 /* Now handle valid shifts. */
31473 else if (INTVAL (amount) < 32)
31475 /* Shifts by a constant less than 32. */
31476 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31478 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31479 emit_insn (SET (out_down,
31480 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31481 out_down)));
31482 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31484 else
31486 /* Shifts by a constant greater than 31. */
31487 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31489 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31490 if (code == ASHIFTRT)
31491 emit_insn (gen_ashrsi3 (out_up, in_up,
31492 GEN_INT (31)));
31493 else
31494 emit_insn (SET (out_up, const0_rtx));
31497 else
31499 /* We have a shift-by-register. */
31500 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31502 /* This alternative requires the scratch registers. */
31503 gcc_assert (scratch1 && REG_P (scratch1));
31504 gcc_assert (scratch2 && REG_P (scratch2));
31506 /* We will need the values "amount-32" and "32-amount" later.
31507 Swapping them around now allows the later code to be more general. */
31508 switch (code)
31510 case ASHIFT:
31511 emit_insn (SUB_32 (scratch1, amount));
31512 emit_insn (RSB_32 (scratch2, amount));
31513 break;
31514 case ASHIFTRT:
31515 emit_insn (RSB_32 (scratch1, amount));
31516 /* Also set CC = amount > 32. */
31517 emit_insn (SUB_S_32 (scratch2, amount));
31518 break;
31519 case LSHIFTRT:
31520 emit_insn (RSB_32 (scratch1, amount));
31521 emit_insn (SUB_32 (scratch2, amount));
31522 break;
31523 default:
31524 gcc_unreachable ();
31527 /* Emit code like this:
31529 arithmetic-left:
31530 out_down = in_down << amount;
31531 out_down = (in_up << (amount - 32)) | out_down;
31532 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31533 out_up = in_up << amount;
31535 arithmetic-right:
31536 out_down = in_down >> amount;
31537 out_down = (in_up << (32 - amount)) | out_down;
31538 if (amount < 32)
31539 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31540 out_up = in_up << amount;
31542 logical-right:
31543 out_down = in_down >> amount;
31544 out_down = (in_up << (32 - amount)) | out_down;
31545 if (amount < 32)
31546 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31547 out_up = in_up << amount;
31549 The ARM and Thumb2 variants are the same but implemented slightly
31550 differently. If this were only called during expand we could just
31551 use the Thumb2 case and let combine do the right thing, but this
31552 can also be called from post-reload splitters. */
31554 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31556 if (!TARGET_THUMB2)
31558 /* Emit code for ARM mode. */
31559 emit_insn (SET (out_down,
31560 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31561 if (code == ASHIFTRT)
31563 rtx_code_label *done_label = gen_label_rtx ();
31564 emit_jump_insn (BRANCH (LT, done_label));
31565 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31566 out_down)));
31567 emit_label (done_label);
31569 else
31570 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31571 out_down)));
31573 else
31575 /* Emit code for Thumb2 mode.
31576 Thumb2 can't do shift and or in one insn. */
31577 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31578 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31580 if (code == ASHIFTRT)
31582 rtx_code_label *done_label = gen_label_rtx ();
31583 emit_jump_insn (BRANCH (LT, done_label));
31584 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31585 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31586 emit_label (done_label);
31588 else
31590 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31591 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31595 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31598 #undef SUB_32
31599 #undef RSB_32
31600 #undef SUB_S_32
31601 #undef SET
31602 #undef SHIFT
31603 #undef LSHIFT
31604 #undef REV_LSHIFT
31605 #undef ORR
31606 #undef BRANCH
31610 /* Returns true if a valid comparison operation and makes
31611 the operands in a form that is valid. */
31612 bool
31613 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31615 enum rtx_code code = GET_CODE (*comparison);
31616 int code_int;
31617 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31618 ? GET_MODE (*op2) : GET_MODE (*op1);
31620 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31622 if (code == UNEQ || code == LTGT)
31623 return false;
31625 code_int = (int)code;
31626 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31627 PUT_CODE (*comparison, (enum rtx_code)code_int);
31629 switch (mode)
31631 case SImode:
31632 if (!arm_add_operand (*op1, mode))
31633 *op1 = force_reg (mode, *op1);
31634 if (!arm_add_operand (*op2, mode))
31635 *op2 = force_reg (mode, *op2);
31636 return true;
31638 case DImode:
31639 if (!cmpdi_operand (*op1, mode))
31640 *op1 = force_reg (mode, *op1);
31641 if (!cmpdi_operand (*op2, mode))
31642 *op2 = force_reg (mode, *op2);
31643 return true;
31645 case SFmode:
31646 case DFmode:
31647 if (!arm_float_compare_operand (*op1, mode))
31648 *op1 = force_reg (mode, *op1);
31649 if (!arm_float_compare_operand (*op2, mode))
31650 *op2 = force_reg (mode, *op2);
31651 return true;
31652 default:
31653 break;
31656 return false;
31660 /* Maximum number of instructions to set block of memory. */
31661 static int
31662 arm_block_set_max_insns (void)
31664 if (optimize_function_for_size_p (cfun))
31665 return 4;
31666 else
31667 return current_tune->max_insns_inline_memset;
31670 /* Return TRUE if it's profitable to set block of memory for
31671 non-vectorized case. VAL is the value to set the memory
31672 with. LENGTH is the number of bytes to set. ALIGN is the
31673 alignment of the destination memory in bytes. UNALIGNED_P
31674 is TRUE if we can only set the memory with instructions
31675 meeting alignment requirements. USE_STRD_P is TRUE if we
31676 can use strd to set the memory. */
31677 static bool
31678 arm_block_set_non_vect_profit_p (rtx val,
31679 unsigned HOST_WIDE_INT length,
31680 unsigned HOST_WIDE_INT align,
31681 bool unaligned_p, bool use_strd_p)
31683 int num = 0;
31684 /* For leftovers in bytes of 0-7, we can set the memory block using
31685 strb/strh/str with minimum instruction number. */
31686 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31688 if (unaligned_p)
31690 num = arm_const_inline_cost (SET, val);
31691 num += length / align + length % align;
31693 else if (use_strd_p)
31695 num = arm_const_double_inline_cost (val);
31696 num += (length >> 3) + leftover[length & 7];
31698 else
31700 num = arm_const_inline_cost (SET, val);
31701 num += (length >> 2) + leftover[length & 3];
31704 /* We may be able to combine last pair STRH/STRB into a single STR
31705 by shifting one byte back. */
31706 if (unaligned_access && length > 3 && (length & 3) == 3)
31707 num--;
31709 return (num <= arm_block_set_max_insns ());
31712 /* Return TRUE if it's profitable to set block of memory for
31713 vectorized case. LENGTH is the number of bytes to set.
31714 ALIGN is the alignment of destination memory in bytes.
31715 MODE is the vector mode used to set the memory. */
31716 static bool
31717 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31718 unsigned HOST_WIDE_INT align,
31719 machine_mode mode)
31721 int num;
31722 bool unaligned_p = ((align & 3) != 0);
31723 unsigned int nelt = GET_MODE_NUNITS (mode);
31725 /* Instruction loading constant value. */
31726 num = 1;
31727 /* Instructions storing the memory. */
31728 num += (length + nelt - 1) / nelt;
31729 /* Instructions adjusting the address expression. Only need to
31730 adjust address expression if it's 4 bytes aligned and bytes
31731 leftover can only be stored by mis-aligned store instruction. */
31732 if (!unaligned_p && (length & 3) != 0)
31733 num++;
31735 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31736 if (!unaligned_p && mode == V16QImode)
31737 num--;
31739 return (num <= arm_block_set_max_insns ());
31742 /* Set a block of memory using vectorization instructions for the
31743 unaligned case. We fill the first LENGTH bytes of the memory
31744 area starting from DSTBASE with byte constant VALUE. ALIGN is
31745 the alignment requirement of memory. Return TRUE if succeeded. */
31746 static bool
31747 arm_block_set_unaligned_vect (rtx dstbase,
31748 unsigned HOST_WIDE_INT length,
31749 unsigned HOST_WIDE_INT value,
31750 unsigned HOST_WIDE_INT align)
31752 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31753 rtx dst, mem;
31754 rtx val_elt, val_vec, reg;
31755 rtx rval[MAX_VECT_LEN];
31756 rtx (*gen_func) (rtx, rtx);
31757 machine_mode mode;
31758 unsigned HOST_WIDE_INT v = value;
31760 gcc_assert ((align & 0x3) != 0);
31761 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31762 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31763 if (length >= nelt_v16)
31765 mode = V16QImode;
31766 gen_func = gen_movmisalignv16qi;
31768 else
31770 mode = V8QImode;
31771 gen_func = gen_movmisalignv8qi;
31773 nelt_mode = GET_MODE_NUNITS (mode);
31774 gcc_assert (length >= nelt_mode);
31775 /* Skip if it isn't profitable. */
31776 if (!arm_block_set_vect_profit_p (length, align, mode))
31777 return false;
31779 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31780 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31782 v = sext_hwi (v, BITS_PER_WORD);
31783 val_elt = GEN_INT (v);
31784 for (j = 0; j < nelt_mode; j++)
31785 rval[j] = val_elt;
31787 reg = gen_reg_rtx (mode);
31788 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31789 /* Emit instruction loading the constant value. */
31790 emit_move_insn (reg, val_vec);
31792 /* Handle nelt_mode bytes in a vector. */
31793 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31795 emit_insn ((*gen_func) (mem, reg));
31796 if (i + 2 * nelt_mode <= length)
31797 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31800 /* If there are not less than nelt_v8 bytes leftover, we must be in
31801 V16QI mode. */
31802 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31804 /* Handle (8, 16) bytes leftover. */
31805 if (i + nelt_v8 < length)
31807 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31808 /* We are shifting bytes back, set the alignment accordingly. */
31809 if ((length & 1) != 0 && align >= 2)
31810 set_mem_align (mem, BITS_PER_UNIT);
31812 emit_insn (gen_movmisalignv16qi (mem, reg));
31814 /* Handle (0, 8] bytes leftover. */
31815 else if (i < length && i + nelt_v8 >= length)
31817 if (mode == V16QImode)
31819 reg = gen_lowpart (V8QImode, reg);
31820 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31822 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31823 + (nelt_mode - nelt_v8))));
31824 /* We are shifting bytes back, set the alignment accordingly. */
31825 if ((length & 1) != 0 && align >= 2)
31826 set_mem_align (mem, BITS_PER_UNIT);
31828 emit_insn (gen_movmisalignv8qi (mem, reg));
31831 return true;
31834 /* Set a block of memory using vectorization instructions for the
31835 aligned case. We fill the first LENGTH bytes of the memory area
31836 starting from DSTBASE with byte constant VALUE. ALIGN is the
31837 alignment requirement of memory. Return TRUE if succeeded. */
31838 static bool
31839 arm_block_set_aligned_vect (rtx dstbase,
31840 unsigned HOST_WIDE_INT length,
31841 unsigned HOST_WIDE_INT value,
31842 unsigned HOST_WIDE_INT align)
31844 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31845 rtx dst, addr, mem;
31846 rtx val_elt, val_vec, reg;
31847 rtx rval[MAX_VECT_LEN];
31848 machine_mode mode;
31849 unsigned HOST_WIDE_INT v = value;
31851 gcc_assert ((align & 0x3) == 0);
31852 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31853 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31854 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31855 mode = V16QImode;
31856 else
31857 mode = V8QImode;
31859 nelt_mode = GET_MODE_NUNITS (mode);
31860 gcc_assert (length >= nelt_mode);
31861 /* Skip if it isn't profitable. */
31862 if (!arm_block_set_vect_profit_p (length, align, mode))
31863 return false;
31865 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31867 v = sext_hwi (v, BITS_PER_WORD);
31868 val_elt = GEN_INT (v);
31869 for (j = 0; j < nelt_mode; j++)
31870 rval[j] = val_elt;
31872 reg = gen_reg_rtx (mode);
31873 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31874 /* Emit instruction loading the constant value. */
31875 emit_move_insn (reg, val_vec);
31877 i = 0;
31878 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31879 if (mode == V16QImode)
31881 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31882 emit_insn (gen_movmisalignv16qi (mem, reg));
31883 i += nelt_mode;
31884 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31885 if (i + nelt_v8 < length && i + nelt_v16 > length)
31887 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31888 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31889 /* We are shifting bytes back, set the alignment accordingly. */
31890 if ((length & 0x3) == 0)
31891 set_mem_align (mem, BITS_PER_UNIT * 4);
31892 else if ((length & 0x1) == 0)
31893 set_mem_align (mem, BITS_PER_UNIT * 2);
31894 else
31895 set_mem_align (mem, BITS_PER_UNIT);
31897 emit_insn (gen_movmisalignv16qi (mem, reg));
31898 return true;
31900 /* Fall through for bytes leftover. */
31901 mode = V8QImode;
31902 nelt_mode = GET_MODE_NUNITS (mode);
31903 reg = gen_lowpart (V8QImode, reg);
31906 /* Handle 8 bytes in a vector. */
31907 for (; (i + nelt_mode <= length); i += nelt_mode)
31909 addr = plus_constant (Pmode, dst, i);
31910 mem = adjust_automodify_address (dstbase, mode, addr, i);
31911 emit_move_insn (mem, reg);
31914 /* Handle single word leftover by shifting 4 bytes back. We can
31915 use aligned access for this case. */
31916 if (i + UNITS_PER_WORD == length)
31918 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31919 mem = adjust_automodify_address (dstbase, mode,
31920 addr, i - UNITS_PER_WORD);
31921 /* We are shifting 4 bytes back, set the alignment accordingly. */
31922 if (align > UNITS_PER_WORD)
31923 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31925 emit_move_insn (mem, reg);
31927 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31928 We have to use unaligned access for this case. */
31929 else if (i < length)
31931 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31932 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31933 /* We are shifting bytes back, set the alignment accordingly. */
31934 if ((length & 1) == 0)
31935 set_mem_align (mem, BITS_PER_UNIT * 2);
31936 else
31937 set_mem_align (mem, BITS_PER_UNIT);
31939 emit_insn (gen_movmisalignv8qi (mem, reg));
31942 return true;
31945 /* Set a block of memory using plain strh/strb instructions, only
31946 using instructions allowed by ALIGN on processor. We fill the
31947 first LENGTH bytes of the memory area starting from DSTBASE
31948 with byte constant VALUE. ALIGN is the alignment requirement
31949 of memory. */
31950 static bool
31951 arm_block_set_unaligned_non_vect (rtx dstbase,
31952 unsigned HOST_WIDE_INT length,
31953 unsigned HOST_WIDE_INT value,
31954 unsigned HOST_WIDE_INT align)
31956 unsigned int i;
31957 rtx dst, addr, mem;
31958 rtx val_exp, val_reg, reg;
31959 machine_mode mode;
31960 HOST_WIDE_INT v = value;
31962 gcc_assert (align == 1 || align == 2);
31964 if (align == 2)
31965 v |= (value << BITS_PER_UNIT);
31967 v = sext_hwi (v, BITS_PER_WORD);
31968 val_exp = GEN_INT (v);
31969 /* Skip if it isn't profitable. */
31970 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31971 align, true, false))
31972 return false;
31974 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31975 mode = (align == 2 ? HImode : QImode);
31976 val_reg = force_reg (SImode, val_exp);
31977 reg = gen_lowpart (mode, val_reg);
31979 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
31981 addr = plus_constant (Pmode, dst, i);
31982 mem = adjust_automodify_address (dstbase, mode, addr, i);
31983 emit_move_insn (mem, reg);
31986 /* Handle single byte leftover. */
31987 if (i + 1 == length)
31989 reg = gen_lowpart (QImode, val_reg);
31990 addr = plus_constant (Pmode, dst, i);
31991 mem = adjust_automodify_address (dstbase, QImode, addr, i);
31992 emit_move_insn (mem, reg);
31993 i++;
31996 gcc_assert (i == length);
31997 return true;
32000 /* Set a block of memory using plain strd/str/strh/strb instructions,
32001 to permit unaligned copies on processors which support unaligned
32002 semantics for those instructions. We fill the first LENGTH bytes
32003 of the memory area starting from DSTBASE with byte constant VALUE.
32004 ALIGN is the alignment requirement of memory. */
32005 static bool
32006 arm_block_set_aligned_non_vect (rtx dstbase,
32007 unsigned HOST_WIDE_INT length,
32008 unsigned HOST_WIDE_INT value,
32009 unsigned HOST_WIDE_INT align)
32011 unsigned int i;
32012 rtx dst, addr, mem;
32013 rtx val_exp, val_reg, reg;
32014 unsigned HOST_WIDE_INT v;
32015 bool use_strd_p;
32017 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32018 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32020 v = (value | (value << 8) | (value << 16) | (value << 24));
32021 if (length < UNITS_PER_WORD)
32022 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32024 if (use_strd_p)
32025 v |= (v << BITS_PER_WORD);
32026 else
32027 v = sext_hwi (v, BITS_PER_WORD);
32029 val_exp = GEN_INT (v);
32030 /* Skip if it isn't profitable. */
32031 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32032 align, false, use_strd_p))
32034 if (!use_strd_p)
32035 return false;
32037 /* Try without strd. */
32038 v = (v >> BITS_PER_WORD);
32039 v = sext_hwi (v, BITS_PER_WORD);
32040 val_exp = GEN_INT (v);
32041 use_strd_p = false;
32042 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32043 align, false, use_strd_p))
32044 return false;
32047 i = 0;
32048 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32049 /* Handle double words using strd if possible. */
32050 if (use_strd_p)
32052 val_reg = force_reg (DImode, val_exp);
32053 reg = val_reg;
32054 for (; (i + 8 <= length); i += 8)
32056 addr = plus_constant (Pmode, dst, i);
32057 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32058 emit_move_insn (mem, reg);
32061 else
32062 val_reg = force_reg (SImode, val_exp);
32064 /* Handle words. */
32065 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32066 for (; (i + 4 <= length); i += 4)
32068 addr = plus_constant (Pmode, dst, i);
32069 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32070 if ((align & 3) == 0)
32071 emit_move_insn (mem, reg);
32072 else
32073 emit_insn (gen_unaligned_storesi (mem, reg));
32076 /* Merge last pair of STRH and STRB into a STR if possible. */
32077 if (unaligned_access && i > 0 && (i + 3) == length)
32079 addr = plus_constant (Pmode, dst, i - 1);
32080 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32081 /* We are shifting one byte back, set the alignment accordingly. */
32082 if ((align & 1) == 0)
32083 set_mem_align (mem, BITS_PER_UNIT);
32085 /* Most likely this is an unaligned access, and we can't tell at
32086 compilation time. */
32087 emit_insn (gen_unaligned_storesi (mem, reg));
32088 return true;
32091 /* Handle half word leftover. */
32092 if (i + 2 <= length)
32094 reg = gen_lowpart (HImode, val_reg);
32095 addr = plus_constant (Pmode, dst, i);
32096 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32097 if ((align & 1) == 0)
32098 emit_move_insn (mem, reg);
32099 else
32100 emit_insn (gen_unaligned_storehi (mem, reg));
32102 i += 2;
32105 /* Handle single byte leftover. */
32106 if (i + 1 == length)
32108 reg = gen_lowpart (QImode, val_reg);
32109 addr = plus_constant (Pmode, dst, i);
32110 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32111 emit_move_insn (mem, reg);
32114 return true;
32117 /* Set a block of memory using vectorization instructions for both
32118 aligned and unaligned cases. We fill the first LENGTH bytes of
32119 the memory area starting from DSTBASE with byte constant VALUE.
32120 ALIGN is the alignment requirement of memory. */
32121 static bool
32122 arm_block_set_vect (rtx dstbase,
32123 unsigned HOST_WIDE_INT length,
32124 unsigned HOST_WIDE_INT value,
32125 unsigned HOST_WIDE_INT align)
32127 /* Check whether we need to use unaligned store instruction. */
32128 if (((align & 3) != 0 || (length & 3) != 0)
32129 /* Check whether unaligned store instruction is available. */
32130 && (!unaligned_access || BYTES_BIG_ENDIAN))
32131 return false;
32133 if ((align & 3) == 0)
32134 return arm_block_set_aligned_vect (dstbase, length, value, align);
32135 else
32136 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32139 /* Expand string store operation. Firstly we try to do that by using
32140 vectorization instructions, then try with ARM unaligned access and
32141 double-word store if profitable. OPERANDS[0] is the destination,
32142 OPERANDS[1] is the number of bytes, operands[2] is the value to
32143 initialize the memory, OPERANDS[3] is the known alignment of the
32144 destination. */
32145 bool
32146 arm_gen_setmem (rtx *operands)
32148 rtx dstbase = operands[0];
32149 unsigned HOST_WIDE_INT length;
32150 unsigned HOST_WIDE_INT value;
32151 unsigned HOST_WIDE_INT align;
32153 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32154 return false;
32156 length = UINTVAL (operands[1]);
32157 if (length > 64)
32158 return false;
32160 value = (UINTVAL (operands[2]) & 0xFF);
32161 align = UINTVAL (operands[3]);
32162 if (TARGET_NEON && length >= 8
32163 && current_tune->string_ops_prefer_neon
32164 && arm_block_set_vect (dstbase, length, value, align))
32165 return true;
32167 if (!unaligned_access && (align & 3) != 0)
32168 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32170 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32173 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32175 static unsigned HOST_WIDE_INT
32176 arm_asan_shadow_offset (void)
32178 return (unsigned HOST_WIDE_INT) 1 << 29;
32182 /* This is a temporary fix for PR60655. Ideally we need
32183 to handle most of these cases in the generic part but
32184 currently we reject minus (..) (sym_ref). We try to
32185 ameliorate the case with minus (sym_ref1) (sym_ref2)
32186 where they are in the same section. */
32188 static bool
32189 arm_const_not_ok_for_debug_p (rtx p)
32191 tree decl_op0 = NULL;
32192 tree decl_op1 = NULL;
32194 if (GET_CODE (p) == MINUS)
32196 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32198 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32199 if (decl_op1
32200 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32201 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32203 if ((TREE_CODE (decl_op1) == VAR_DECL
32204 || TREE_CODE (decl_op1) == CONST_DECL)
32205 && (TREE_CODE (decl_op0) == VAR_DECL
32206 || TREE_CODE (decl_op0) == CONST_DECL))
32207 return (get_variable_section (decl_op1, false)
32208 != get_variable_section (decl_op0, false));
32210 if (TREE_CODE (decl_op1) == LABEL_DECL
32211 && TREE_CODE (decl_op0) == LABEL_DECL)
32212 return (DECL_CONTEXT (decl_op1)
32213 != DECL_CONTEXT (decl_op0));
32216 return true;
32220 return false;
32223 static void
32224 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32226 const unsigned ARM_FE_INVALID = 1;
32227 const unsigned ARM_FE_DIVBYZERO = 2;
32228 const unsigned ARM_FE_OVERFLOW = 4;
32229 const unsigned ARM_FE_UNDERFLOW = 8;
32230 const unsigned ARM_FE_INEXACT = 16;
32231 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32232 | ARM_FE_DIVBYZERO
32233 | ARM_FE_OVERFLOW
32234 | ARM_FE_UNDERFLOW
32235 | ARM_FE_INEXACT);
32236 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32237 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32238 tree new_fenv_var, reload_fenv, restore_fnenv;
32239 tree update_call, atomic_feraiseexcept, hold_fnclex;
32241 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32242 return;
32244 /* Generate the equivalent of :
32245 unsigned int fenv_var;
32246 fenv_var = __builtin_arm_get_fpscr ();
32248 unsigned int masked_fenv;
32249 masked_fenv = fenv_var & mask;
32251 __builtin_arm_set_fpscr (masked_fenv); */
32253 fenv_var = create_tmp_var (unsigned_type_node, NULL);
32254 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32255 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32256 mask = build_int_cst (unsigned_type_node,
32257 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32258 | ARM_FE_ALL_EXCEPT));
32259 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32260 fenv_var, build_call_expr (get_fpscr, 0));
32261 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32262 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32263 *hold = build2 (COMPOUND_EXPR, void_type_node,
32264 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32265 hold_fnclex);
32267 /* Store the value of masked_fenv to clear the exceptions:
32268 __builtin_arm_set_fpscr (masked_fenv); */
32270 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32272 /* Generate the equivalent of :
32273 unsigned int new_fenv_var;
32274 new_fenv_var = __builtin_arm_get_fpscr ();
32276 __builtin_arm_set_fpscr (fenv_var);
32278 __atomic_feraiseexcept (new_fenv_var); */
32280 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32281 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32282 build_call_expr (get_fpscr, 0));
32283 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32284 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32285 update_call = build_call_expr (atomic_feraiseexcept, 1,
32286 fold_convert (integer_type_node, new_fenv_var));
32287 *update = build2 (COMPOUND_EXPR, void_type_node,
32288 build2 (COMPOUND_EXPR, void_type_node,
32289 reload_fenv, restore_fnenv), update_call);
32292 /* return TRUE if x is a reference to a value in a constant pool */
32293 extern bool
32294 arm_is_constant_pool_ref (rtx x)
32296 return (MEM_P (x)
32297 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32298 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32301 /* If MEM is in the form of [base+offset], extract the two parts
32302 of address and set to BASE and OFFSET, otherwise return false
32303 after clearing BASE and OFFSET. */
32305 static bool
32306 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
32308 rtx addr;
32310 gcc_assert (MEM_P (mem));
32312 addr = XEXP (mem, 0);
32314 /* Strip off const from addresses like (const (addr)). */
32315 if (GET_CODE (addr) == CONST)
32316 addr = XEXP (addr, 0);
32318 if (GET_CODE (addr) == REG)
32320 *base = addr;
32321 *offset = const0_rtx;
32322 return true;
32325 if (GET_CODE (addr) == PLUS
32326 && GET_CODE (XEXP (addr, 0)) == REG
32327 && CONST_INT_P (XEXP (addr, 1)))
32329 *base = XEXP (addr, 0);
32330 *offset = XEXP (addr, 1);
32331 return true;
32334 *base = NULL_RTX;
32335 *offset = NULL_RTX;
32337 return false;
32340 /* If INSN is a load or store of address in the form of [base+offset],
32341 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
32342 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
32343 otherwise return FALSE. */
32345 static bool
32346 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
32348 rtx x, dest, src;
32350 gcc_assert (INSN_P (insn));
32351 x = PATTERN (insn);
32352 if (GET_CODE (x) != SET)
32353 return false;
32355 src = SET_SRC (x);
32356 dest = SET_DEST (x);
32357 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
32359 *is_load = false;
32360 extract_base_offset_in_addr (dest, base, offset);
32362 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
32364 *is_load = true;
32365 extract_base_offset_in_addr (src, base, offset);
32367 else
32368 return false;
32370 return (*base != NULL_RTX && *offset != NULL_RTX);
32373 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
32375 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
32376 and PRI are only calculated for these instructions. For other instruction,
32377 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
32378 instruction fusion can be supported by returning different priorities.
32380 It's important that irrelevant instructions get the largest FUSION_PRI. */
32382 static void
32383 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
32384 int *fusion_pri, int *pri)
32386 int tmp, off_val;
32387 bool is_load;
32388 rtx base, offset;
32390 gcc_assert (INSN_P (insn));
32392 tmp = max_pri - 1;
32393 if (!fusion_load_store (insn, &base, &offset, &is_load))
32395 *pri = tmp;
32396 *fusion_pri = tmp;
32397 return;
32400 /* Load goes first. */
32401 if (is_load)
32402 *fusion_pri = tmp - 1;
32403 else
32404 *fusion_pri = tmp - 2;
32406 tmp /= 2;
32408 /* INSN with smaller base register goes first. */
32409 tmp -= ((REGNO (base) & 0xff) << 20);
32411 /* INSN with smaller offset goes first. */
32412 off_val = (int)(INTVAL (offset));
32413 if (off_val >= 0)
32414 tmp -= (off_val & 0xfffff);
32415 else
32416 tmp += ((- off_val) & 0xfffff);
32418 *pri = tmp;
32419 return;
32421 #include "gt-arm.h"