gcc/ada/
[official-gcc.git] / gcc / config / arm / arm.c
blobf4c4ebd4a2901c1025eb48b730033c65233489cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "optabs.h"
51 #include "diagnostic-core.h"
52 #include "recog.h"
53 #include "predict.h"
54 #include "dominance.h"
55 #include "cfg.h"
56 #include "cfgrtl.h"
57 #include "cfganal.h"
58 #include "lcm.h"
59 #include "cfgbuild.h"
60 #include "cfgcleanup.h"
61 #include "basic-block.h"
62 #include "hash-map.h"
63 #include "is-a.h"
64 #include "plugin-api.h"
65 #include "ipa-ref.h"
66 #include "cgraph.h"
67 #include "ggc.h"
68 #include "except.h"
69 #include "tm_p.h"
70 #include "target.h"
71 #include "sched-int.h"
72 #include "target-def.h"
73 #include "debug.h"
74 #include "langhooks.h"
75 #include "df.h"
76 #include "intl.h"
77 #include "libfuncs.h"
78 #include "params.h"
79 #include "opts.h"
80 #include "dumpfile.h"
81 #include "gimple-expr.h"
82 #include "builtins.h"
83 #include "tm-constrs.h"
85 /* Forward definitions of types. */
86 typedef struct minipool_node Mnode;
87 typedef struct minipool_fixup Mfix;
89 void (*arm_lang_output_object_attributes_hook)(void);
91 struct four_ints
93 int i[4];
96 /* Forward function declarations. */
97 static bool arm_const_not_ok_for_debug_p (rtx);
98 static bool arm_lra_p (void);
99 static bool arm_needs_doubleword_align (machine_mode, const_tree);
100 static int arm_compute_static_chain_stack_bytes (void);
101 static arm_stack_offsets *arm_get_frame_offsets (void);
102 static void arm_add_gc_roots (void);
103 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
104 HOST_WIDE_INT, rtx, rtx, int, int);
105 static unsigned bit_count (unsigned long);
106 static int arm_address_register_rtx_p (rtx, int);
107 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
108 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
109 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
110 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
111 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
112 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
113 inline static int thumb1_index_register_rtx_p (rtx, int);
114 static int thumb_far_jump_used_p (void);
115 static bool thumb_force_lr_save (void);
116 static unsigned arm_size_return_regs (void);
117 static bool arm_assemble_integer (rtx, unsigned int, int);
118 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
119 static void arm_print_operand (FILE *, rtx, int);
120 static void arm_print_operand_address (FILE *, rtx);
121 static bool arm_print_operand_punct_valid_p (unsigned char code);
122 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
123 static arm_cc get_arm_condition_code (rtx);
124 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
125 static const char *output_multi_immediate (rtx *, const char *, const char *,
126 int, HOST_WIDE_INT);
127 static const char *shift_op (rtx, HOST_WIDE_INT *);
128 static struct machine_function *arm_init_machine_status (void);
129 static void thumb_exit (FILE *, int);
130 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
131 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
132 static Mnode *add_minipool_forward_ref (Mfix *);
133 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
134 static Mnode *add_minipool_backward_ref (Mfix *);
135 static void assign_minipool_offsets (Mfix *);
136 static void arm_print_value (FILE *, rtx);
137 static void dump_minipool (rtx_insn *);
138 static int arm_barrier_cost (rtx);
139 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
140 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
141 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
142 machine_mode, rtx);
143 static void arm_reorg (void);
144 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
145 static unsigned long arm_compute_save_reg0_reg12_mask (void);
146 static unsigned long arm_compute_save_reg_mask (void);
147 static unsigned long arm_isr_value (tree);
148 static unsigned long arm_compute_func_type (void);
149 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
150 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
151 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
152 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
153 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
154 #endif
155 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
156 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
157 static int arm_comp_type_attributes (const_tree, const_tree);
158 static void arm_set_default_type_attributes (tree);
159 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence);
164 static int optimal_immediate_sequence_1 (enum rtx_code code,
165 unsigned HOST_WIDE_INT val,
166 struct four_ints *return_sequence,
167 int i);
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree, tree);
170 static machine_mode arm_promote_function_mode (const_tree,
171 machine_mode, int *,
172 const_tree, int);
173 static bool arm_return_in_memory (const_tree, const_tree);
174 static rtx arm_function_value (const_tree, const_tree, bool);
175 static rtx arm_libcall_value_1 (machine_mode);
176 static rtx arm_libcall_value (machine_mode, const_rtx);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
180 tree);
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
183 static bool arm_legitimate_constant_p (machine_mode, rtx);
184 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
185 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
186 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
187 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
188 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
191 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
192 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
193 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
194 static void arm_init_builtins (void);
195 static void arm_init_iwmmxt_builtins (void);
196 static rtx safe_vector_operand (rtx, machine_mode);
197 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
198 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
199 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
200 static tree arm_builtin_decl (unsigned, bool);
201 static void emit_constant_insn (rtx cond, rtx pattern);
202 static rtx_insn *emit_set_insn (rtx, rtx);
203 static rtx emit_multi_reg_push (unsigned long, unsigned long);
204 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
205 tree, bool);
206 static rtx arm_function_arg (cumulative_args_t, machine_mode,
207 const_tree, bool);
208 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
209 const_tree, bool);
210 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
211 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
212 const_tree);
213 static rtx aapcs_libcall_value (machine_mode);
214 static int aapcs_select_return_coproc (const_tree, const_tree);
216 #ifdef OBJECT_FORMAT_ELF
217 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
218 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
219 #endif
220 #ifndef ARM_PE
221 static void arm_encode_section_info (tree, rtx, int);
222 #endif
224 static void arm_file_end (void);
225 static void arm_file_start (void);
227 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
228 tree, int *, int);
229 static bool arm_pass_by_reference (cumulative_args_t,
230 machine_mode, const_tree, bool);
231 static bool arm_promote_prototypes (const_tree);
232 static bool arm_default_short_enums (void);
233 static bool arm_align_anon_bitfield (void);
234 static bool arm_return_in_msb (const_tree);
235 static bool arm_must_pass_in_stack (machine_mode, const_tree);
236 static bool arm_return_in_memory (const_tree, const_tree);
237 #if ARM_UNWIND_INFO
238 static void arm_unwind_emit (FILE *, rtx_insn *);
239 static bool arm_output_ttype (rtx);
240 static void arm_asm_emit_except_personality (rtx);
241 static void arm_asm_init_sections (void);
242 #endif
243 static rtx arm_dwarf_register_span (rtx);
245 static tree arm_cxx_guard_type (void);
246 static bool arm_cxx_guard_mask_bit (void);
247 static tree arm_get_cookie_size (tree);
248 static bool arm_cookie_has_size (void);
249 static bool arm_cxx_cdtor_returns_this (void);
250 static bool arm_cxx_key_method_may_be_inline (void);
251 static void arm_cxx_determine_class_data_visibility (tree);
252 static bool arm_cxx_class_data_always_comdat (void);
253 static bool arm_cxx_use_aeabi_atexit (void);
254 static void arm_init_libfuncs (void);
255 static tree arm_build_builtin_va_list (void);
256 static void arm_expand_builtin_va_start (tree, rtx);
257 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
258 static void arm_option_override (void);
259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
260 static bool arm_cannot_copy_insn_p (rtx_insn *);
261 static int arm_issue_rate (void);
262 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
263 static bool arm_output_addr_const_extra (FILE *, rtx);
264 static bool arm_allocate_stack_slots_for_args (void);
265 static bool arm_warn_func_return (tree);
266 static const char *arm_invalid_parameter_type (const_tree t);
267 static const char *arm_invalid_return_type (const_tree t);
268 static tree arm_promoted_type (const_tree t);
269 static tree arm_convert_to_type (tree type, tree expr);
270 static bool arm_scalar_mode_supported_p (machine_mode);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx, tree, rtx);
275 static rtx arm_trampoline_adjust_address (rtx);
276 static rtx arm_pic_static_addr (rtx orig, rtx reg);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
280 static bool arm_array_mode_supported_p (machine_mode,
281 unsigned HOST_WIDE_INT);
282 static machine_mode arm_preferred_simd_mode (machine_mode);
283 static bool arm_class_likely_spilled_p (reg_class_t);
284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
287 const_tree type,
288 int misalignment,
289 bool is_packed);
290 static void arm_conditional_register_usage (void);
291 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
292 static unsigned int arm_autovectorize_vector_sizes (void);
293 static int arm_default_branch_cost (bool, bool);
294 static int arm_cortex_a5_branch_cost (bool, bool);
295 static int arm_cortex_m_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
298 const unsigned char *sel);
300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
301 tree vectype,
302 int misalign ATTRIBUTE_UNUSED);
303 static unsigned arm_add_stmt_cost (void *data, int count,
304 enum vect_cost_for_stmt kind,
305 struct _stmt_vec_info *stmt_info,
306 int misalign,
307 enum vect_cost_model_location where);
309 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
310 bool op0_preserve_value);
311 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
313 /* Table of machine attributes. */
314 static const struct attribute_spec arm_attribute_table[] =
316 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
317 affects_type_identity } */
318 /* Function calls made to this symbol must be done indirectly, because
319 it may lie outside of the 26 bit addressing range of a normal function
320 call. */
321 { "long_call", 0, 0, false, true, true, NULL, false },
322 /* Whereas these functions are always known to reside within the 26 bit
323 addressing range. */
324 { "short_call", 0, 0, false, true, true, NULL, false },
325 /* Specify the procedure call conventions for a function. */
326 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
327 false },
328 /* Interrupt Service Routines have special prologue and epilogue requirements. */
329 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
330 false },
331 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
332 false },
333 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
334 false },
335 #ifdef ARM_PE
336 /* ARM/PE has three new attributes:
337 interfacearm - ?
338 dllexport - for exporting a function/variable that will live in a dll
339 dllimport - for importing a function/variable from a dll
341 Microsoft allows multiple declspecs in one __declspec, separating
342 them with spaces. We do NOT support this. Instead, use __declspec
343 multiple times.
345 { "dllimport", 0, 0, true, false, false, NULL, false },
346 { "dllexport", 0, 0, true, false, false, NULL, false },
347 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
348 false },
349 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
350 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
351 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
352 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
353 false },
354 #endif
355 { NULL, 0, 0, false, false, false, NULL, false }
358 /* Initialize the GCC target structure. */
359 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
360 #undef TARGET_MERGE_DECL_ATTRIBUTES
361 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
362 #endif
364 #undef TARGET_LEGITIMIZE_ADDRESS
365 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
367 #undef TARGET_LRA_P
368 #define TARGET_LRA_P arm_lra_p
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373 #undef TARGET_ASM_FILE_START
374 #define TARGET_ASM_FILE_START arm_file_start
375 #undef TARGET_ASM_FILE_END
376 #define TARGET_ASM_FILE_END arm_file_end
378 #undef TARGET_ASM_ALIGNED_SI_OP
379 #define TARGET_ASM_ALIGNED_SI_OP NULL
380 #undef TARGET_ASM_INTEGER
381 #define TARGET_ASM_INTEGER arm_assemble_integer
383 #undef TARGET_PRINT_OPERAND
384 #define TARGET_PRINT_OPERAND arm_print_operand
385 #undef TARGET_PRINT_OPERAND_ADDRESS
386 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
387 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
388 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
390 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
391 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
393 #undef TARGET_ASM_FUNCTION_PROLOGUE
394 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
396 #undef TARGET_ASM_FUNCTION_EPILOGUE
397 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_COMP_TYPE_ATTRIBUTES
403 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
405 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
406 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
408 #undef TARGET_SCHED_ADJUST_COST
409 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
411 #undef TARGET_SCHED_REORDER
412 #define TARGET_SCHED_REORDER arm_sched_reorder
414 #undef TARGET_REGISTER_MOVE_COST
415 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
417 #undef TARGET_MEMORY_MOVE_COST
418 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
420 #undef TARGET_ENCODE_SECTION_INFO
421 #ifdef ARM_PE
422 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
423 #else
424 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
425 #endif
427 #undef TARGET_STRIP_NAME_ENCODING
428 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
430 #undef TARGET_ASM_INTERNAL_LABEL
431 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
433 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
434 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
436 #undef TARGET_FUNCTION_VALUE
437 #define TARGET_FUNCTION_VALUE arm_function_value
439 #undef TARGET_LIBCALL_VALUE
440 #define TARGET_LIBCALL_VALUE arm_libcall_value
442 #undef TARGET_FUNCTION_VALUE_REGNO_P
443 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
445 #undef TARGET_ASM_OUTPUT_MI_THUNK
446 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
447 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
448 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
450 #undef TARGET_RTX_COSTS
451 #define TARGET_RTX_COSTS arm_rtx_costs
452 #undef TARGET_ADDRESS_COST
453 #define TARGET_ADDRESS_COST arm_address_cost
455 #undef TARGET_SHIFT_TRUNCATION_MASK
456 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
457 #undef TARGET_VECTOR_MODE_SUPPORTED_P
458 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
459 #undef TARGET_ARRAY_MODE_SUPPORTED_P
460 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
461 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
462 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
463 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
464 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
465 arm_autovectorize_vector_sizes
467 #undef TARGET_MACHINE_DEPENDENT_REORG
468 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
470 #undef TARGET_INIT_BUILTINS
471 #define TARGET_INIT_BUILTINS arm_init_builtins
472 #undef TARGET_EXPAND_BUILTIN
473 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
474 #undef TARGET_BUILTIN_DECL
475 #define TARGET_BUILTIN_DECL arm_builtin_decl
477 #undef TARGET_INIT_LIBFUNCS
478 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
480 #undef TARGET_PROMOTE_FUNCTION_MODE
481 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
482 #undef TARGET_PROMOTE_PROTOTYPES
483 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
484 #undef TARGET_PASS_BY_REFERENCE
485 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
486 #undef TARGET_ARG_PARTIAL_BYTES
487 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
488 #undef TARGET_FUNCTION_ARG
489 #define TARGET_FUNCTION_ARG arm_function_arg
490 #undef TARGET_FUNCTION_ARG_ADVANCE
491 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
492 #undef TARGET_FUNCTION_ARG_BOUNDARY
493 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
495 #undef TARGET_SETUP_INCOMING_VARARGS
496 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
498 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
499 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
501 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
502 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
503 #undef TARGET_TRAMPOLINE_INIT
504 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
505 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
506 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
508 #undef TARGET_WARN_FUNC_RETURN
509 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
511 #undef TARGET_DEFAULT_SHORT_ENUMS
512 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
514 #undef TARGET_ALIGN_ANON_BITFIELD
515 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
517 #undef TARGET_NARROW_VOLATILE_BITFIELD
518 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
520 #undef TARGET_CXX_GUARD_TYPE
521 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
523 #undef TARGET_CXX_GUARD_MASK_BIT
524 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
526 #undef TARGET_CXX_GET_COOKIE_SIZE
527 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
529 #undef TARGET_CXX_COOKIE_HAS_SIZE
530 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
532 #undef TARGET_CXX_CDTOR_RETURNS_THIS
533 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
535 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
536 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
538 #undef TARGET_CXX_USE_AEABI_ATEXIT
539 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
541 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
542 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
543 arm_cxx_determine_class_data_visibility
545 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
546 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
548 #undef TARGET_RETURN_IN_MSB
549 #define TARGET_RETURN_IN_MSB arm_return_in_msb
551 #undef TARGET_RETURN_IN_MEMORY
552 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
554 #undef TARGET_MUST_PASS_IN_STACK
555 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
557 #if ARM_UNWIND_INFO
558 #undef TARGET_ASM_UNWIND_EMIT
559 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
561 /* EABI unwinding tables use a different format for the typeinfo tables. */
562 #undef TARGET_ASM_TTYPE
563 #define TARGET_ASM_TTYPE arm_output_ttype
565 #undef TARGET_ARM_EABI_UNWINDER
566 #define TARGET_ARM_EABI_UNWINDER true
568 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
569 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
571 #undef TARGET_ASM_INIT_SECTIONS
572 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
573 #endif /* ARM_UNWIND_INFO */
575 #undef TARGET_DWARF_REGISTER_SPAN
576 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
578 #undef TARGET_CANNOT_COPY_INSN_P
579 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
581 #ifdef HAVE_AS_TLS
582 #undef TARGET_HAVE_TLS
583 #define TARGET_HAVE_TLS true
584 #endif
586 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
587 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
589 #undef TARGET_LEGITIMATE_CONSTANT_P
590 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
592 #undef TARGET_CANNOT_FORCE_CONST_MEM
593 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
595 #undef TARGET_MAX_ANCHOR_OFFSET
596 #define TARGET_MAX_ANCHOR_OFFSET 4095
598 /* The minimum is set such that the total size of the block
599 for a particular anchor is -4088 + 1 + 4095 bytes, which is
600 divisible by eight, ensuring natural spacing of anchors. */
601 #undef TARGET_MIN_ANCHOR_OFFSET
602 #define TARGET_MIN_ANCHOR_OFFSET -4088
604 #undef TARGET_SCHED_ISSUE_RATE
605 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
607 #undef TARGET_MANGLE_TYPE
608 #define TARGET_MANGLE_TYPE arm_mangle_type
610 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
611 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
613 #undef TARGET_BUILD_BUILTIN_VA_LIST
614 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
615 #undef TARGET_EXPAND_BUILTIN_VA_START
616 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
617 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
618 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
620 #ifdef HAVE_AS_TLS
621 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
622 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
623 #endif
625 #undef TARGET_LEGITIMATE_ADDRESS_P
626 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
628 #undef TARGET_PREFERRED_RELOAD_CLASS
629 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
631 #undef TARGET_INVALID_PARAMETER_TYPE
632 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
634 #undef TARGET_INVALID_RETURN_TYPE
635 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
637 #undef TARGET_PROMOTED_TYPE
638 #define TARGET_PROMOTED_TYPE arm_promoted_type
640 #undef TARGET_CONVERT_TO_TYPE
641 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
643 #undef TARGET_SCALAR_MODE_SUPPORTED_P
644 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
646 #undef TARGET_FRAME_POINTER_REQUIRED
647 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
649 #undef TARGET_CAN_ELIMINATE
650 #define TARGET_CAN_ELIMINATE arm_can_eliminate
652 #undef TARGET_CONDITIONAL_REGISTER_USAGE
653 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
655 #undef TARGET_CLASS_LIKELY_SPILLED_P
656 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
658 #undef TARGET_VECTORIZE_BUILTINS
659 #define TARGET_VECTORIZE_BUILTINS
661 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
662 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
663 arm_builtin_vectorized_function
665 #undef TARGET_VECTOR_ALIGNMENT
666 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
668 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
669 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
670 arm_vector_alignment_reachable
672 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
673 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
674 arm_builtin_support_vector_misalignment
676 #undef TARGET_PREFERRED_RENAME_CLASS
677 #define TARGET_PREFERRED_RENAME_CLASS \
678 arm_preferred_rename_class
680 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
681 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
682 arm_vectorize_vec_perm_const_ok
684 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
685 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
686 arm_builtin_vectorization_cost
687 #undef TARGET_VECTORIZE_ADD_STMT_COST
688 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
690 #undef TARGET_CANONICALIZE_COMPARISON
691 #define TARGET_CANONICALIZE_COMPARISON \
692 arm_canonicalize_comparison
694 #undef TARGET_ASAN_SHADOW_OFFSET
695 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
697 #undef MAX_INSN_PER_IT_BLOCK
698 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
700 #undef TARGET_CAN_USE_DOLOOP_P
701 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
703 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
704 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
706 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
707 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
709 struct gcc_target targetm = TARGET_INITIALIZER;
711 /* Obstack for minipool constant handling. */
712 static struct obstack minipool_obstack;
713 static char * minipool_startobj;
715 /* The maximum number of insns skipped which
716 will be conditionalised if possible. */
717 static int max_insns_skipped = 5;
719 extern FILE * asm_out_file;
721 /* True if we are currently building a constant table. */
722 int making_const_table;
724 /* The processor for which instructions should be scheduled. */
725 enum processor_type arm_tune = arm_none;
727 /* The current tuning set. */
728 const struct tune_params *current_tune;
730 /* Which floating point hardware to schedule for. */
731 int arm_fpu_attr;
733 /* Which floating popint hardware to use. */
734 const struct arm_fpu_desc *arm_fpu_desc;
736 /* Used for Thumb call_via trampolines. */
737 rtx thumb_call_via_label[14];
738 static int thumb_call_reg_needed;
740 /* Bit values used to identify processor capabilities. */
741 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
742 #define FL_ARCH3M (1 << 1) /* Extended multiply */
743 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
744 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
745 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
746 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
747 #define FL_THUMB (1 << 6) /* Thumb aware */
748 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
749 #define FL_STRONG (1 << 8) /* StrongARM */
750 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
751 #define FL_XSCALE (1 << 10) /* XScale */
752 /* spare (1 << 11) */
753 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
754 media instructions. */
755 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
756 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
757 Note: ARM6 & 7 derivatives only. */
758 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
759 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
760 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
761 profile. */
762 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
763 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
764 #define FL_NEON (1 << 20) /* Neon instructions. */
765 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
766 architecture. */
767 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
768 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
769 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
770 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
772 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
773 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
775 /* Flags that only effect tuning, not available instructions. */
776 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
777 | FL_CO_PROC)
779 #define FL_FOR_ARCH2 FL_NOTM
780 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
781 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
782 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
783 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
784 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
785 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
786 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
787 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
788 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
789 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
790 #define FL_FOR_ARCH6J FL_FOR_ARCH6
791 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
792 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
793 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
794 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
795 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
796 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
797 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
798 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
799 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
800 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
801 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
802 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
804 /* The bits in this mask specify which
805 instructions we are allowed to generate. */
806 static unsigned long insn_flags = 0;
808 /* The bits in this mask specify which instruction scheduling options should
809 be used. */
810 static unsigned long tune_flags = 0;
812 /* The highest ARM architecture version supported by the
813 target. */
814 enum base_architecture arm_base_arch = BASE_ARCH_0;
816 /* The following are used in the arm.md file as equivalents to bits
817 in the above two flag variables. */
819 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
820 int arm_arch3m = 0;
822 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
823 int arm_arch4 = 0;
825 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
826 int arm_arch4t = 0;
828 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
829 int arm_arch5 = 0;
831 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
832 int arm_arch5e = 0;
834 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
835 int arm_arch6 = 0;
837 /* Nonzero if this chip supports the ARM 6K extensions. */
838 int arm_arch6k = 0;
840 /* Nonzero if instructions present in ARMv6-M can be used. */
841 int arm_arch6m = 0;
843 /* Nonzero if this chip supports the ARM 7 extensions. */
844 int arm_arch7 = 0;
846 /* Nonzero if instructions not present in the 'M' profile can be used. */
847 int arm_arch_notm = 0;
849 /* Nonzero if instructions present in ARMv7E-M can be used. */
850 int arm_arch7em = 0;
852 /* Nonzero if instructions present in ARMv8 can be used. */
853 int arm_arch8 = 0;
855 /* Nonzero if this chip can benefit from load scheduling. */
856 int arm_ld_sched = 0;
858 /* Nonzero if this chip is a StrongARM. */
859 int arm_tune_strongarm = 0;
861 /* Nonzero if this chip supports Intel Wireless MMX technology. */
862 int arm_arch_iwmmxt = 0;
864 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
865 int arm_arch_iwmmxt2 = 0;
867 /* Nonzero if this chip is an XScale. */
868 int arm_arch_xscale = 0;
870 /* Nonzero if tuning for XScale */
871 int arm_tune_xscale = 0;
873 /* Nonzero if we want to tune for stores that access the write-buffer.
874 This typically means an ARM6 or ARM7 with MMU or MPU. */
875 int arm_tune_wbuf = 0;
877 /* Nonzero if tuning for Cortex-A9. */
878 int arm_tune_cortex_a9 = 0;
880 /* Nonzero if generating Thumb instructions. */
881 int thumb_code = 0;
883 /* Nonzero if generating Thumb-1 instructions. */
884 int thumb1_code = 0;
886 /* Nonzero if we should define __THUMB_INTERWORK__ in the
887 preprocessor.
888 XXX This is a bit of a hack, it's intended to help work around
889 problems in GLD which doesn't understand that armv5t code is
890 interworking clean. */
891 int arm_cpp_interwork = 0;
893 /* Nonzero if chip supports Thumb 2. */
894 int arm_arch_thumb2;
896 /* Nonzero if chip supports integer division instruction. */
897 int arm_arch_arm_hwdiv;
898 int arm_arch_thumb_hwdiv;
900 /* Nonzero if we should use Neon to handle 64-bits operations rather
901 than core registers. */
902 int prefer_neon_for_64bits = 0;
904 /* Nonzero if we shouldn't use literal pools. */
905 bool arm_disable_literal_pool = false;
907 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
908 we must report the mode of the memory reference from
909 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
910 machine_mode output_memory_reference_mode;
912 /* The register number to be used for the PIC offset register. */
913 unsigned arm_pic_register = INVALID_REGNUM;
915 enum arm_pcs arm_pcs_default;
917 /* For an explanation of these variables, see final_prescan_insn below. */
918 int arm_ccfsm_state;
919 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
920 enum arm_cond_code arm_current_cc;
922 rtx arm_target_insn;
923 int arm_target_label;
924 /* The number of conditionally executed insns, including the current insn. */
925 int arm_condexec_count = 0;
926 /* A bitmask specifying the patterns for the IT block.
927 Zero means do not output an IT block before this insn. */
928 int arm_condexec_mask = 0;
929 /* The number of bits used in arm_condexec_mask. */
930 int arm_condexec_masklen = 0;
932 /* Nonzero if chip supports the ARMv8 CRC instructions. */
933 int arm_arch_crc = 0;
935 /* The condition codes of the ARM, and the inverse function. */
936 static const char * const arm_condition_codes[] =
938 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
939 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
942 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
943 int arm_regs_in_sequence[] =
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
948 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
949 #define streq(string1, string2) (strcmp (string1, string2) == 0)
951 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
952 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
953 | (1 << PIC_OFFSET_TABLE_REGNUM)))
955 /* Initialization code. */
957 struct processors
959 const char *const name;
960 enum processor_type core;
961 const char *arch;
962 enum base_architecture base_arch;
963 const unsigned long flags;
964 const struct tune_params *const tune;
968 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
969 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
970 prefetch_slots, \
971 l1_size, \
972 l1_line_size
974 /* arm generic vectorizer costs. */
975 static const
976 struct cpu_vec_costs arm_default_vec_cost = {
977 1, /* scalar_stmt_cost. */
978 1, /* scalar load_cost. */
979 1, /* scalar_store_cost. */
980 1, /* vec_stmt_cost. */
981 1, /* vec_to_scalar_cost. */
982 1, /* scalar_to_vec_cost. */
983 1, /* vec_align_load_cost. */
984 1, /* vec_unalign_load_cost. */
985 1, /* vec_unalign_store_cost. */
986 1, /* vec_store_cost. */
987 3, /* cond_taken_branch_cost. */
988 1, /* cond_not_taken_branch_cost. */
991 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
992 #include "aarch-cost-tables.h"
996 const struct cpu_cost_table cortexa9_extra_costs =
998 /* ALU */
1000 0, /* arith. */
1001 0, /* logical. */
1002 0, /* shift. */
1003 COSTS_N_INSNS (1), /* shift_reg. */
1004 COSTS_N_INSNS (1), /* arith_shift. */
1005 COSTS_N_INSNS (2), /* arith_shift_reg. */
1006 0, /* log_shift. */
1007 COSTS_N_INSNS (1), /* log_shift_reg. */
1008 COSTS_N_INSNS (1), /* extend. */
1009 COSTS_N_INSNS (2), /* extend_arith. */
1010 COSTS_N_INSNS (1), /* bfi. */
1011 COSTS_N_INSNS (1), /* bfx. */
1012 0, /* clz. */
1013 0, /* rev. */
1014 0, /* non_exec. */
1015 true /* non_exec_costs_exec. */
1018 /* MULT SImode */
1020 COSTS_N_INSNS (3), /* simple. */
1021 COSTS_N_INSNS (3), /* flag_setting. */
1022 COSTS_N_INSNS (2), /* extend. */
1023 COSTS_N_INSNS (3), /* add. */
1024 COSTS_N_INSNS (2), /* extend_add. */
1025 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1027 /* MULT DImode */
1029 0, /* simple (N/A). */
1030 0, /* flag_setting (N/A). */
1031 COSTS_N_INSNS (4), /* extend. */
1032 0, /* add (N/A). */
1033 COSTS_N_INSNS (4), /* extend_add. */
1034 0 /* idiv (N/A). */
1037 /* LD/ST */
1039 COSTS_N_INSNS (2), /* load. */
1040 COSTS_N_INSNS (2), /* load_sign_extend. */
1041 COSTS_N_INSNS (2), /* ldrd. */
1042 COSTS_N_INSNS (2), /* ldm_1st. */
1043 1, /* ldm_regs_per_insn_1st. */
1044 2, /* ldm_regs_per_insn_subsequent. */
1045 COSTS_N_INSNS (5), /* loadf. */
1046 COSTS_N_INSNS (5), /* loadd. */
1047 COSTS_N_INSNS (1), /* load_unaligned. */
1048 COSTS_N_INSNS (2), /* store. */
1049 COSTS_N_INSNS (2), /* strd. */
1050 COSTS_N_INSNS (2), /* stm_1st. */
1051 1, /* stm_regs_per_insn_1st. */
1052 2, /* stm_regs_per_insn_subsequent. */
1053 COSTS_N_INSNS (1), /* storef. */
1054 COSTS_N_INSNS (1), /* stored. */
1055 COSTS_N_INSNS (1) /* store_unaligned. */
1058 /* FP SFmode */
1060 COSTS_N_INSNS (14), /* div. */
1061 COSTS_N_INSNS (4), /* mult. */
1062 COSTS_N_INSNS (7), /* mult_addsub. */
1063 COSTS_N_INSNS (30), /* fma. */
1064 COSTS_N_INSNS (3), /* addsub. */
1065 COSTS_N_INSNS (1), /* fpconst. */
1066 COSTS_N_INSNS (1), /* neg. */
1067 COSTS_N_INSNS (3), /* compare. */
1068 COSTS_N_INSNS (3), /* widen. */
1069 COSTS_N_INSNS (3), /* narrow. */
1070 COSTS_N_INSNS (3), /* toint. */
1071 COSTS_N_INSNS (3), /* fromint. */
1072 COSTS_N_INSNS (3) /* roundint. */
1074 /* FP DFmode */
1076 COSTS_N_INSNS (24), /* div. */
1077 COSTS_N_INSNS (5), /* mult. */
1078 COSTS_N_INSNS (8), /* mult_addsub. */
1079 COSTS_N_INSNS (30), /* fma. */
1080 COSTS_N_INSNS (3), /* addsub. */
1081 COSTS_N_INSNS (1), /* fpconst. */
1082 COSTS_N_INSNS (1), /* neg. */
1083 COSTS_N_INSNS (3), /* compare. */
1084 COSTS_N_INSNS (3), /* widen. */
1085 COSTS_N_INSNS (3), /* narrow. */
1086 COSTS_N_INSNS (3), /* toint. */
1087 COSTS_N_INSNS (3), /* fromint. */
1088 COSTS_N_INSNS (3) /* roundint. */
1091 /* Vector */
1093 COSTS_N_INSNS (1) /* alu. */
1097 const struct cpu_cost_table cortexa8_extra_costs =
1099 /* ALU */
1101 0, /* arith. */
1102 0, /* logical. */
1103 COSTS_N_INSNS (1), /* shift. */
1104 0, /* shift_reg. */
1105 COSTS_N_INSNS (1), /* arith_shift. */
1106 0, /* arith_shift_reg. */
1107 COSTS_N_INSNS (1), /* log_shift. */
1108 0, /* log_shift_reg. */
1109 0, /* extend. */
1110 0, /* extend_arith. */
1111 0, /* bfi. */
1112 0, /* bfx. */
1113 0, /* clz. */
1114 0, /* rev. */
1115 0, /* non_exec. */
1116 true /* non_exec_costs_exec. */
1119 /* MULT SImode */
1121 COSTS_N_INSNS (1), /* simple. */
1122 COSTS_N_INSNS (1), /* flag_setting. */
1123 COSTS_N_INSNS (1), /* extend. */
1124 COSTS_N_INSNS (1), /* add. */
1125 COSTS_N_INSNS (1), /* extend_add. */
1126 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1128 /* MULT DImode */
1130 0, /* simple (N/A). */
1131 0, /* flag_setting (N/A). */
1132 COSTS_N_INSNS (2), /* extend. */
1133 0, /* add (N/A). */
1134 COSTS_N_INSNS (2), /* extend_add. */
1135 0 /* idiv (N/A). */
1138 /* LD/ST */
1140 COSTS_N_INSNS (1), /* load. */
1141 COSTS_N_INSNS (1), /* load_sign_extend. */
1142 COSTS_N_INSNS (1), /* ldrd. */
1143 COSTS_N_INSNS (1), /* ldm_1st. */
1144 1, /* ldm_regs_per_insn_1st. */
1145 2, /* ldm_regs_per_insn_subsequent. */
1146 COSTS_N_INSNS (1), /* loadf. */
1147 COSTS_N_INSNS (1), /* loadd. */
1148 COSTS_N_INSNS (1), /* load_unaligned. */
1149 COSTS_N_INSNS (1), /* store. */
1150 COSTS_N_INSNS (1), /* strd. */
1151 COSTS_N_INSNS (1), /* stm_1st. */
1152 1, /* stm_regs_per_insn_1st. */
1153 2, /* stm_regs_per_insn_subsequent. */
1154 COSTS_N_INSNS (1), /* storef. */
1155 COSTS_N_INSNS (1), /* stored. */
1156 COSTS_N_INSNS (1) /* store_unaligned. */
1159 /* FP SFmode */
1161 COSTS_N_INSNS (36), /* div. */
1162 COSTS_N_INSNS (11), /* mult. */
1163 COSTS_N_INSNS (20), /* mult_addsub. */
1164 COSTS_N_INSNS (30), /* fma. */
1165 COSTS_N_INSNS (9), /* addsub. */
1166 COSTS_N_INSNS (3), /* fpconst. */
1167 COSTS_N_INSNS (3), /* neg. */
1168 COSTS_N_INSNS (6), /* compare. */
1169 COSTS_N_INSNS (4), /* widen. */
1170 COSTS_N_INSNS (4), /* narrow. */
1171 COSTS_N_INSNS (8), /* toint. */
1172 COSTS_N_INSNS (8), /* fromint. */
1173 COSTS_N_INSNS (8) /* roundint. */
1175 /* FP DFmode */
1177 COSTS_N_INSNS (64), /* div. */
1178 COSTS_N_INSNS (16), /* mult. */
1179 COSTS_N_INSNS (25), /* mult_addsub. */
1180 COSTS_N_INSNS (30), /* fma. */
1181 COSTS_N_INSNS (9), /* addsub. */
1182 COSTS_N_INSNS (3), /* fpconst. */
1183 COSTS_N_INSNS (3), /* neg. */
1184 COSTS_N_INSNS (6), /* compare. */
1185 COSTS_N_INSNS (6), /* widen. */
1186 COSTS_N_INSNS (6), /* narrow. */
1187 COSTS_N_INSNS (8), /* toint. */
1188 COSTS_N_INSNS (8), /* fromint. */
1189 COSTS_N_INSNS (8) /* roundint. */
1192 /* Vector */
1194 COSTS_N_INSNS (1) /* alu. */
1198 const struct cpu_cost_table cortexa5_extra_costs =
1200 /* ALU */
1202 0, /* arith. */
1203 0, /* logical. */
1204 COSTS_N_INSNS (1), /* shift. */
1205 COSTS_N_INSNS (1), /* shift_reg. */
1206 COSTS_N_INSNS (1), /* arith_shift. */
1207 COSTS_N_INSNS (1), /* arith_shift_reg. */
1208 COSTS_N_INSNS (1), /* log_shift. */
1209 COSTS_N_INSNS (1), /* log_shift_reg. */
1210 COSTS_N_INSNS (1), /* extend. */
1211 COSTS_N_INSNS (1), /* extend_arith. */
1212 COSTS_N_INSNS (1), /* bfi. */
1213 COSTS_N_INSNS (1), /* bfx. */
1214 COSTS_N_INSNS (1), /* clz. */
1215 COSTS_N_INSNS (1), /* rev. */
1216 0, /* non_exec. */
1217 true /* non_exec_costs_exec. */
1221 /* MULT SImode */
1223 0, /* simple. */
1224 COSTS_N_INSNS (1), /* flag_setting. */
1225 COSTS_N_INSNS (1), /* extend. */
1226 COSTS_N_INSNS (1), /* add. */
1227 COSTS_N_INSNS (1), /* extend_add. */
1228 COSTS_N_INSNS (7) /* idiv. */
1230 /* MULT DImode */
1232 0, /* simple (N/A). */
1233 0, /* flag_setting (N/A). */
1234 COSTS_N_INSNS (1), /* extend. */
1235 0, /* add. */
1236 COSTS_N_INSNS (2), /* extend_add. */
1237 0 /* idiv (N/A). */
1240 /* LD/ST */
1242 COSTS_N_INSNS (1), /* load. */
1243 COSTS_N_INSNS (1), /* load_sign_extend. */
1244 COSTS_N_INSNS (6), /* ldrd. */
1245 COSTS_N_INSNS (1), /* ldm_1st. */
1246 1, /* ldm_regs_per_insn_1st. */
1247 2, /* ldm_regs_per_insn_subsequent. */
1248 COSTS_N_INSNS (2), /* loadf. */
1249 COSTS_N_INSNS (4), /* loadd. */
1250 COSTS_N_INSNS (1), /* load_unaligned. */
1251 COSTS_N_INSNS (1), /* store. */
1252 COSTS_N_INSNS (3), /* strd. */
1253 COSTS_N_INSNS (1), /* stm_1st. */
1254 1, /* stm_regs_per_insn_1st. */
1255 2, /* stm_regs_per_insn_subsequent. */
1256 COSTS_N_INSNS (2), /* storef. */
1257 COSTS_N_INSNS (2), /* stored. */
1258 COSTS_N_INSNS (1) /* store_unaligned. */
1261 /* FP SFmode */
1263 COSTS_N_INSNS (15), /* div. */
1264 COSTS_N_INSNS (3), /* mult. */
1265 COSTS_N_INSNS (7), /* mult_addsub. */
1266 COSTS_N_INSNS (7), /* fma. */
1267 COSTS_N_INSNS (3), /* addsub. */
1268 COSTS_N_INSNS (3), /* fpconst. */
1269 COSTS_N_INSNS (3), /* neg. */
1270 COSTS_N_INSNS (3), /* compare. */
1271 COSTS_N_INSNS (3), /* widen. */
1272 COSTS_N_INSNS (3), /* narrow. */
1273 COSTS_N_INSNS (3), /* toint. */
1274 COSTS_N_INSNS (3), /* fromint. */
1275 COSTS_N_INSNS (3) /* roundint. */
1277 /* FP DFmode */
1279 COSTS_N_INSNS (30), /* div. */
1280 COSTS_N_INSNS (6), /* mult. */
1281 COSTS_N_INSNS (10), /* mult_addsub. */
1282 COSTS_N_INSNS (7), /* fma. */
1283 COSTS_N_INSNS (3), /* addsub. */
1284 COSTS_N_INSNS (3), /* fpconst. */
1285 COSTS_N_INSNS (3), /* neg. */
1286 COSTS_N_INSNS (3), /* compare. */
1287 COSTS_N_INSNS (3), /* widen. */
1288 COSTS_N_INSNS (3), /* narrow. */
1289 COSTS_N_INSNS (3), /* toint. */
1290 COSTS_N_INSNS (3), /* fromint. */
1291 COSTS_N_INSNS (3) /* roundint. */
1294 /* Vector */
1296 COSTS_N_INSNS (1) /* alu. */
1301 const struct cpu_cost_table cortexa7_extra_costs =
1303 /* ALU */
1305 0, /* arith. */
1306 0, /* logical. */
1307 COSTS_N_INSNS (1), /* shift. */
1308 COSTS_N_INSNS (1), /* shift_reg. */
1309 COSTS_N_INSNS (1), /* arith_shift. */
1310 COSTS_N_INSNS (1), /* arith_shift_reg. */
1311 COSTS_N_INSNS (1), /* log_shift. */
1312 COSTS_N_INSNS (1), /* log_shift_reg. */
1313 COSTS_N_INSNS (1), /* extend. */
1314 COSTS_N_INSNS (1), /* extend_arith. */
1315 COSTS_N_INSNS (1), /* bfi. */
1316 COSTS_N_INSNS (1), /* bfx. */
1317 COSTS_N_INSNS (1), /* clz. */
1318 COSTS_N_INSNS (1), /* rev. */
1319 0, /* non_exec. */
1320 true /* non_exec_costs_exec. */
1324 /* MULT SImode */
1326 0, /* simple. */
1327 COSTS_N_INSNS (1), /* flag_setting. */
1328 COSTS_N_INSNS (1), /* extend. */
1329 COSTS_N_INSNS (1), /* add. */
1330 COSTS_N_INSNS (1), /* extend_add. */
1331 COSTS_N_INSNS (7) /* idiv. */
1333 /* MULT DImode */
1335 0, /* simple (N/A). */
1336 0, /* flag_setting (N/A). */
1337 COSTS_N_INSNS (1), /* extend. */
1338 0, /* add. */
1339 COSTS_N_INSNS (2), /* extend_add. */
1340 0 /* idiv (N/A). */
1343 /* LD/ST */
1345 COSTS_N_INSNS (1), /* load. */
1346 COSTS_N_INSNS (1), /* load_sign_extend. */
1347 COSTS_N_INSNS (3), /* ldrd. */
1348 COSTS_N_INSNS (1), /* ldm_1st. */
1349 1, /* ldm_regs_per_insn_1st. */
1350 2, /* ldm_regs_per_insn_subsequent. */
1351 COSTS_N_INSNS (2), /* loadf. */
1352 COSTS_N_INSNS (2), /* loadd. */
1353 COSTS_N_INSNS (1), /* load_unaligned. */
1354 COSTS_N_INSNS (1), /* store. */
1355 COSTS_N_INSNS (3), /* strd. */
1356 COSTS_N_INSNS (1), /* stm_1st. */
1357 1, /* stm_regs_per_insn_1st. */
1358 2, /* stm_regs_per_insn_subsequent. */
1359 COSTS_N_INSNS (2), /* storef. */
1360 COSTS_N_INSNS (2), /* stored. */
1361 COSTS_N_INSNS (1) /* store_unaligned. */
1364 /* FP SFmode */
1366 COSTS_N_INSNS (15), /* div. */
1367 COSTS_N_INSNS (3), /* mult. */
1368 COSTS_N_INSNS (7), /* mult_addsub. */
1369 COSTS_N_INSNS (7), /* fma. */
1370 COSTS_N_INSNS (3), /* addsub. */
1371 COSTS_N_INSNS (3), /* fpconst. */
1372 COSTS_N_INSNS (3), /* neg. */
1373 COSTS_N_INSNS (3), /* compare. */
1374 COSTS_N_INSNS (3), /* widen. */
1375 COSTS_N_INSNS (3), /* narrow. */
1376 COSTS_N_INSNS (3), /* toint. */
1377 COSTS_N_INSNS (3), /* fromint. */
1378 COSTS_N_INSNS (3) /* roundint. */
1380 /* FP DFmode */
1382 COSTS_N_INSNS (30), /* div. */
1383 COSTS_N_INSNS (6), /* mult. */
1384 COSTS_N_INSNS (10), /* mult_addsub. */
1385 COSTS_N_INSNS (7), /* fma. */
1386 COSTS_N_INSNS (3), /* addsub. */
1387 COSTS_N_INSNS (3), /* fpconst. */
1388 COSTS_N_INSNS (3), /* neg. */
1389 COSTS_N_INSNS (3), /* compare. */
1390 COSTS_N_INSNS (3), /* widen. */
1391 COSTS_N_INSNS (3), /* narrow. */
1392 COSTS_N_INSNS (3), /* toint. */
1393 COSTS_N_INSNS (3), /* fromint. */
1394 COSTS_N_INSNS (3) /* roundint. */
1397 /* Vector */
1399 COSTS_N_INSNS (1) /* alu. */
1403 const struct cpu_cost_table cortexa12_extra_costs =
1405 /* ALU */
1407 0, /* arith. */
1408 0, /* logical. */
1409 0, /* shift. */
1410 COSTS_N_INSNS (1), /* shift_reg. */
1411 COSTS_N_INSNS (1), /* arith_shift. */
1412 COSTS_N_INSNS (1), /* arith_shift_reg. */
1413 COSTS_N_INSNS (1), /* log_shift. */
1414 COSTS_N_INSNS (1), /* log_shift_reg. */
1415 0, /* extend. */
1416 COSTS_N_INSNS (1), /* extend_arith. */
1417 0, /* bfi. */
1418 COSTS_N_INSNS (1), /* bfx. */
1419 COSTS_N_INSNS (1), /* clz. */
1420 COSTS_N_INSNS (1), /* rev. */
1421 0, /* non_exec. */
1422 true /* non_exec_costs_exec. */
1424 /* MULT SImode */
1427 COSTS_N_INSNS (2), /* simple. */
1428 COSTS_N_INSNS (3), /* flag_setting. */
1429 COSTS_N_INSNS (2), /* extend. */
1430 COSTS_N_INSNS (3), /* add. */
1431 COSTS_N_INSNS (2), /* extend_add. */
1432 COSTS_N_INSNS (18) /* idiv. */
1434 /* MULT DImode */
1436 0, /* simple (N/A). */
1437 0, /* flag_setting (N/A). */
1438 COSTS_N_INSNS (3), /* extend. */
1439 0, /* add (N/A). */
1440 COSTS_N_INSNS (3), /* extend_add. */
1441 0 /* idiv (N/A). */
1444 /* LD/ST */
1446 COSTS_N_INSNS (3), /* load. */
1447 COSTS_N_INSNS (3), /* load_sign_extend. */
1448 COSTS_N_INSNS (3), /* ldrd. */
1449 COSTS_N_INSNS (3), /* ldm_1st. */
1450 1, /* ldm_regs_per_insn_1st. */
1451 2, /* ldm_regs_per_insn_subsequent. */
1452 COSTS_N_INSNS (3), /* loadf. */
1453 COSTS_N_INSNS (3), /* loadd. */
1454 0, /* load_unaligned. */
1455 0, /* store. */
1456 0, /* strd. */
1457 0, /* stm_1st. */
1458 1, /* stm_regs_per_insn_1st. */
1459 2, /* stm_regs_per_insn_subsequent. */
1460 COSTS_N_INSNS (2), /* storef. */
1461 COSTS_N_INSNS (2), /* stored. */
1462 0 /* store_unaligned. */
1465 /* FP SFmode */
1467 COSTS_N_INSNS (17), /* div. */
1468 COSTS_N_INSNS (4), /* mult. */
1469 COSTS_N_INSNS (8), /* mult_addsub. */
1470 COSTS_N_INSNS (8), /* fma. */
1471 COSTS_N_INSNS (4), /* addsub. */
1472 COSTS_N_INSNS (2), /* fpconst. */
1473 COSTS_N_INSNS (2), /* neg. */
1474 COSTS_N_INSNS (2), /* compare. */
1475 COSTS_N_INSNS (4), /* widen. */
1476 COSTS_N_INSNS (4), /* narrow. */
1477 COSTS_N_INSNS (4), /* toint. */
1478 COSTS_N_INSNS (4), /* fromint. */
1479 COSTS_N_INSNS (4) /* roundint. */
1481 /* FP DFmode */
1483 COSTS_N_INSNS (31), /* div. */
1484 COSTS_N_INSNS (4), /* mult. */
1485 COSTS_N_INSNS (8), /* mult_addsub. */
1486 COSTS_N_INSNS (8), /* fma. */
1487 COSTS_N_INSNS (4), /* addsub. */
1488 COSTS_N_INSNS (2), /* fpconst. */
1489 COSTS_N_INSNS (2), /* neg. */
1490 COSTS_N_INSNS (2), /* compare. */
1491 COSTS_N_INSNS (4), /* widen. */
1492 COSTS_N_INSNS (4), /* narrow. */
1493 COSTS_N_INSNS (4), /* toint. */
1494 COSTS_N_INSNS (4), /* fromint. */
1495 COSTS_N_INSNS (4) /* roundint. */
1498 /* Vector */
1500 COSTS_N_INSNS (1) /* alu. */
1504 const struct cpu_cost_table cortexa15_extra_costs =
1506 /* ALU */
1508 0, /* arith. */
1509 0, /* logical. */
1510 0, /* shift. */
1511 0, /* shift_reg. */
1512 COSTS_N_INSNS (1), /* arith_shift. */
1513 COSTS_N_INSNS (1), /* arith_shift_reg. */
1514 COSTS_N_INSNS (1), /* log_shift. */
1515 COSTS_N_INSNS (1), /* log_shift_reg. */
1516 0, /* extend. */
1517 COSTS_N_INSNS (1), /* extend_arith. */
1518 COSTS_N_INSNS (1), /* bfi. */
1519 0, /* bfx. */
1520 0, /* clz. */
1521 0, /* rev. */
1522 0, /* non_exec. */
1523 true /* non_exec_costs_exec. */
1525 /* MULT SImode */
1528 COSTS_N_INSNS (2), /* simple. */
1529 COSTS_N_INSNS (3), /* flag_setting. */
1530 COSTS_N_INSNS (2), /* extend. */
1531 COSTS_N_INSNS (2), /* add. */
1532 COSTS_N_INSNS (2), /* extend_add. */
1533 COSTS_N_INSNS (18) /* idiv. */
1535 /* MULT DImode */
1537 0, /* simple (N/A). */
1538 0, /* flag_setting (N/A). */
1539 COSTS_N_INSNS (3), /* extend. */
1540 0, /* add (N/A). */
1541 COSTS_N_INSNS (3), /* extend_add. */
1542 0 /* idiv (N/A). */
1545 /* LD/ST */
1547 COSTS_N_INSNS (3), /* load. */
1548 COSTS_N_INSNS (3), /* load_sign_extend. */
1549 COSTS_N_INSNS (3), /* ldrd. */
1550 COSTS_N_INSNS (4), /* ldm_1st. */
1551 1, /* ldm_regs_per_insn_1st. */
1552 2, /* ldm_regs_per_insn_subsequent. */
1553 COSTS_N_INSNS (4), /* loadf. */
1554 COSTS_N_INSNS (4), /* loadd. */
1555 0, /* load_unaligned. */
1556 0, /* store. */
1557 0, /* strd. */
1558 COSTS_N_INSNS (1), /* stm_1st. */
1559 1, /* stm_regs_per_insn_1st. */
1560 2, /* stm_regs_per_insn_subsequent. */
1561 0, /* storef. */
1562 0, /* stored. */
1563 0 /* store_unaligned. */
1566 /* FP SFmode */
1568 COSTS_N_INSNS (17), /* div. */
1569 COSTS_N_INSNS (4), /* mult. */
1570 COSTS_N_INSNS (8), /* mult_addsub. */
1571 COSTS_N_INSNS (8), /* fma. */
1572 COSTS_N_INSNS (4), /* addsub. */
1573 COSTS_N_INSNS (2), /* fpconst. */
1574 COSTS_N_INSNS (2), /* neg. */
1575 COSTS_N_INSNS (5), /* compare. */
1576 COSTS_N_INSNS (4), /* widen. */
1577 COSTS_N_INSNS (4), /* narrow. */
1578 COSTS_N_INSNS (4), /* toint. */
1579 COSTS_N_INSNS (4), /* fromint. */
1580 COSTS_N_INSNS (4) /* roundint. */
1582 /* FP DFmode */
1584 COSTS_N_INSNS (31), /* div. */
1585 COSTS_N_INSNS (4), /* mult. */
1586 COSTS_N_INSNS (8), /* mult_addsub. */
1587 COSTS_N_INSNS (8), /* fma. */
1588 COSTS_N_INSNS (4), /* addsub. */
1589 COSTS_N_INSNS (2), /* fpconst. */
1590 COSTS_N_INSNS (2), /* neg. */
1591 COSTS_N_INSNS (2), /* compare. */
1592 COSTS_N_INSNS (4), /* widen. */
1593 COSTS_N_INSNS (4), /* narrow. */
1594 COSTS_N_INSNS (4), /* toint. */
1595 COSTS_N_INSNS (4), /* fromint. */
1596 COSTS_N_INSNS (4) /* roundint. */
1599 /* Vector */
1601 COSTS_N_INSNS (1) /* alu. */
1605 const struct cpu_cost_table v7m_extra_costs =
1607 /* ALU */
1609 0, /* arith. */
1610 0, /* logical. */
1611 0, /* shift. */
1612 0, /* shift_reg. */
1613 0, /* arith_shift. */
1614 COSTS_N_INSNS (1), /* arith_shift_reg. */
1615 0, /* log_shift. */
1616 COSTS_N_INSNS (1), /* log_shift_reg. */
1617 0, /* extend. */
1618 COSTS_N_INSNS (1), /* extend_arith. */
1619 0, /* bfi. */
1620 0, /* bfx. */
1621 0, /* clz. */
1622 0, /* rev. */
1623 COSTS_N_INSNS (1), /* non_exec. */
1624 false /* non_exec_costs_exec. */
1627 /* MULT SImode */
1629 COSTS_N_INSNS (1), /* simple. */
1630 COSTS_N_INSNS (1), /* flag_setting. */
1631 COSTS_N_INSNS (2), /* extend. */
1632 COSTS_N_INSNS (1), /* add. */
1633 COSTS_N_INSNS (3), /* extend_add. */
1634 COSTS_N_INSNS (8) /* idiv. */
1636 /* MULT DImode */
1638 0, /* simple (N/A). */
1639 0, /* flag_setting (N/A). */
1640 COSTS_N_INSNS (2), /* extend. */
1641 0, /* add (N/A). */
1642 COSTS_N_INSNS (3), /* extend_add. */
1643 0 /* idiv (N/A). */
1646 /* LD/ST */
1648 COSTS_N_INSNS (2), /* load. */
1649 0, /* load_sign_extend. */
1650 COSTS_N_INSNS (3), /* ldrd. */
1651 COSTS_N_INSNS (2), /* ldm_1st. */
1652 1, /* ldm_regs_per_insn_1st. */
1653 1, /* ldm_regs_per_insn_subsequent. */
1654 COSTS_N_INSNS (2), /* loadf. */
1655 COSTS_N_INSNS (3), /* loadd. */
1656 COSTS_N_INSNS (1), /* load_unaligned. */
1657 COSTS_N_INSNS (2), /* store. */
1658 COSTS_N_INSNS (3), /* strd. */
1659 COSTS_N_INSNS (2), /* stm_1st. */
1660 1, /* stm_regs_per_insn_1st. */
1661 1, /* stm_regs_per_insn_subsequent. */
1662 COSTS_N_INSNS (2), /* storef. */
1663 COSTS_N_INSNS (3), /* stored. */
1664 COSTS_N_INSNS (1) /* store_unaligned. */
1667 /* FP SFmode */
1669 COSTS_N_INSNS (7), /* div. */
1670 COSTS_N_INSNS (2), /* mult. */
1671 COSTS_N_INSNS (5), /* mult_addsub. */
1672 COSTS_N_INSNS (3), /* fma. */
1673 COSTS_N_INSNS (1), /* addsub. */
1674 0, /* fpconst. */
1675 0, /* neg. */
1676 0, /* compare. */
1677 0, /* widen. */
1678 0, /* narrow. */
1679 0, /* toint. */
1680 0, /* fromint. */
1681 0 /* roundint. */
1683 /* FP DFmode */
1685 COSTS_N_INSNS (15), /* div. */
1686 COSTS_N_INSNS (5), /* mult. */
1687 COSTS_N_INSNS (7), /* mult_addsub. */
1688 COSTS_N_INSNS (7), /* fma. */
1689 COSTS_N_INSNS (3), /* addsub. */
1690 0, /* fpconst. */
1691 0, /* neg. */
1692 0, /* compare. */
1693 0, /* widen. */
1694 0, /* narrow. */
1695 0, /* toint. */
1696 0, /* fromint. */
1697 0 /* roundint. */
1700 /* Vector */
1702 COSTS_N_INSNS (1) /* alu. */
1706 const struct tune_params arm_slowmul_tune =
1708 arm_slowmul_rtx_costs,
1709 NULL,
1710 NULL, /* Sched adj cost. */
1711 3, /* Constant limit. */
1712 5, /* Max cond insns. */
1713 ARM_PREFETCH_NOT_BENEFICIAL,
1714 true, /* Prefer constant pool. */
1715 arm_default_branch_cost,
1716 false, /* Prefer LDRD/STRD. */
1717 {true, true}, /* Prefer non short circuit. */
1718 &arm_default_vec_cost, /* Vectorizer costs. */
1719 false, /* Prefer Neon for 64-bits bitops. */
1720 false, false, /* Prefer 32-bit encodings. */
1721 false, /* Prefer Neon for stringops. */
1722 8 /* Maximum insns to inline memset. */
1725 const struct tune_params arm_fastmul_tune =
1727 arm_fastmul_rtx_costs,
1728 NULL,
1729 NULL, /* Sched adj cost. */
1730 1, /* Constant limit. */
1731 5, /* Max cond insns. */
1732 ARM_PREFETCH_NOT_BENEFICIAL,
1733 true, /* Prefer constant pool. */
1734 arm_default_branch_cost,
1735 false, /* Prefer LDRD/STRD. */
1736 {true, true}, /* Prefer non short circuit. */
1737 &arm_default_vec_cost, /* Vectorizer costs. */
1738 false, /* Prefer Neon for 64-bits bitops. */
1739 false, false, /* Prefer 32-bit encodings. */
1740 false, /* Prefer Neon for stringops. */
1741 8 /* Maximum insns to inline memset. */
1744 /* StrongARM has early execution of branches, so a sequence that is worth
1745 skipping is shorter. Set max_insns_skipped to a lower value. */
1747 const struct tune_params arm_strongarm_tune =
1749 arm_fastmul_rtx_costs,
1750 NULL,
1751 NULL, /* Sched adj cost. */
1752 1, /* Constant limit. */
1753 3, /* Max cond insns. */
1754 ARM_PREFETCH_NOT_BENEFICIAL,
1755 true, /* Prefer constant pool. */
1756 arm_default_branch_cost,
1757 false, /* Prefer LDRD/STRD. */
1758 {true, true}, /* Prefer non short circuit. */
1759 &arm_default_vec_cost, /* Vectorizer costs. */
1760 false, /* Prefer Neon for 64-bits bitops. */
1761 false, false, /* Prefer 32-bit encodings. */
1762 false, /* Prefer Neon for stringops. */
1763 8 /* Maximum insns to inline memset. */
1766 const struct tune_params arm_xscale_tune =
1768 arm_xscale_rtx_costs,
1769 NULL,
1770 xscale_sched_adjust_cost,
1771 2, /* Constant limit. */
1772 3, /* Max cond insns. */
1773 ARM_PREFETCH_NOT_BENEFICIAL,
1774 true, /* Prefer constant pool. */
1775 arm_default_branch_cost,
1776 false, /* Prefer LDRD/STRD. */
1777 {true, true}, /* Prefer non short circuit. */
1778 &arm_default_vec_cost, /* Vectorizer costs. */
1779 false, /* Prefer Neon for 64-bits bitops. */
1780 false, false, /* Prefer 32-bit encodings. */
1781 false, /* Prefer Neon for stringops. */
1782 8 /* Maximum insns to inline memset. */
1785 const struct tune_params arm_9e_tune =
1787 arm_9e_rtx_costs,
1788 NULL,
1789 NULL, /* Sched adj cost. */
1790 1, /* Constant limit. */
1791 5, /* Max cond insns. */
1792 ARM_PREFETCH_NOT_BENEFICIAL,
1793 true, /* Prefer constant pool. */
1794 arm_default_branch_cost,
1795 false, /* Prefer LDRD/STRD. */
1796 {true, true}, /* Prefer non short circuit. */
1797 &arm_default_vec_cost, /* Vectorizer costs. */
1798 false, /* Prefer Neon for 64-bits bitops. */
1799 false, false, /* Prefer 32-bit encodings. */
1800 false, /* Prefer Neon for stringops. */
1801 8 /* Maximum insns to inline memset. */
1804 const struct tune_params arm_v6t2_tune =
1806 arm_9e_rtx_costs,
1807 NULL,
1808 NULL, /* Sched adj cost. */
1809 1, /* Constant limit. */
1810 5, /* Max cond insns. */
1811 ARM_PREFETCH_NOT_BENEFICIAL,
1812 false, /* Prefer constant pool. */
1813 arm_default_branch_cost,
1814 false, /* Prefer LDRD/STRD. */
1815 {true, true}, /* Prefer non short circuit. */
1816 &arm_default_vec_cost, /* Vectorizer costs. */
1817 false, /* Prefer Neon for 64-bits bitops. */
1818 false, false, /* Prefer 32-bit encodings. */
1819 false, /* Prefer Neon for stringops. */
1820 8 /* Maximum insns to inline memset. */
1823 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1824 const struct tune_params arm_cortex_tune =
1826 arm_9e_rtx_costs,
1827 &generic_extra_costs,
1828 NULL, /* Sched adj cost. */
1829 1, /* Constant limit. */
1830 5, /* Max cond insns. */
1831 ARM_PREFETCH_NOT_BENEFICIAL,
1832 false, /* Prefer constant pool. */
1833 arm_default_branch_cost,
1834 false, /* Prefer LDRD/STRD. */
1835 {true, true}, /* Prefer non short circuit. */
1836 &arm_default_vec_cost, /* Vectorizer costs. */
1837 false, /* Prefer Neon for 64-bits bitops. */
1838 false, false, /* Prefer 32-bit encodings. */
1839 false, /* Prefer Neon for stringops. */
1840 8 /* Maximum insns to inline memset. */
1843 const struct tune_params arm_cortex_a8_tune =
1845 arm_9e_rtx_costs,
1846 &cortexa8_extra_costs,
1847 NULL, /* Sched adj cost. */
1848 1, /* Constant limit. */
1849 5, /* Max cond insns. */
1850 ARM_PREFETCH_NOT_BENEFICIAL,
1851 false, /* Prefer constant pool. */
1852 arm_default_branch_cost,
1853 false, /* Prefer LDRD/STRD. */
1854 {true, true}, /* Prefer non short circuit. */
1855 &arm_default_vec_cost, /* Vectorizer costs. */
1856 false, /* Prefer Neon for 64-bits bitops. */
1857 false, false, /* Prefer 32-bit encodings. */
1858 true, /* Prefer Neon for stringops. */
1859 8 /* Maximum insns to inline memset. */
1862 const struct tune_params arm_cortex_a7_tune =
1864 arm_9e_rtx_costs,
1865 &cortexa7_extra_costs,
1866 NULL,
1867 1, /* Constant limit. */
1868 5, /* Max cond insns. */
1869 ARM_PREFETCH_NOT_BENEFICIAL,
1870 false, /* Prefer constant pool. */
1871 arm_default_branch_cost,
1872 false, /* Prefer LDRD/STRD. */
1873 {true, true}, /* Prefer non short circuit. */
1874 &arm_default_vec_cost, /* Vectorizer costs. */
1875 false, /* Prefer Neon for 64-bits bitops. */
1876 false, false, /* Prefer 32-bit encodings. */
1877 true, /* Prefer Neon for stringops. */
1878 8 /* Maximum insns to inline memset. */
1881 const struct tune_params arm_cortex_a15_tune =
1883 arm_9e_rtx_costs,
1884 &cortexa15_extra_costs,
1885 NULL, /* Sched adj cost. */
1886 1, /* Constant limit. */
1887 2, /* Max cond insns. */
1888 ARM_PREFETCH_NOT_BENEFICIAL,
1889 false, /* Prefer constant pool. */
1890 arm_default_branch_cost,
1891 true, /* Prefer LDRD/STRD. */
1892 {true, true}, /* Prefer non short circuit. */
1893 &arm_default_vec_cost, /* Vectorizer costs. */
1894 false, /* Prefer Neon for 64-bits bitops. */
1895 true, true, /* Prefer 32-bit encodings. */
1896 true, /* Prefer Neon for stringops. */
1897 8 /* Maximum insns to inline memset. */
1900 const struct tune_params arm_cortex_a53_tune =
1902 arm_9e_rtx_costs,
1903 &cortexa53_extra_costs,
1904 NULL, /* Scheduler cost adjustment. */
1905 1, /* Constant limit. */
1906 5, /* Max cond insns. */
1907 ARM_PREFETCH_NOT_BENEFICIAL,
1908 false, /* Prefer constant pool. */
1909 arm_default_branch_cost,
1910 false, /* Prefer LDRD/STRD. */
1911 {true, true}, /* Prefer non short circuit. */
1912 &arm_default_vec_cost, /* Vectorizer costs. */
1913 false, /* Prefer Neon for 64-bits bitops. */
1914 false, false, /* Prefer 32-bit encodings. */
1915 false, /* Prefer Neon for stringops. */
1916 8 /* Maximum insns to inline memset. */
1919 const struct tune_params arm_cortex_a57_tune =
1921 arm_9e_rtx_costs,
1922 &cortexa57_extra_costs,
1923 NULL, /* Scheduler cost adjustment. */
1924 1, /* Constant limit. */
1925 2, /* Max cond insns. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 false, /* Prefer constant pool. */
1928 arm_default_branch_cost,
1929 true, /* Prefer LDRD/STRD. */
1930 {true, true}, /* Prefer non short circuit. */
1931 &arm_default_vec_cost, /* Vectorizer costs. */
1932 false, /* Prefer Neon for 64-bits bitops. */
1933 true, true, /* Prefer 32-bit encodings. */
1934 false, /* Prefer Neon for stringops. */
1935 8 /* Maximum insns to inline memset. */
1938 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1939 less appealing. Set max_insns_skipped to a low value. */
1941 const struct tune_params arm_cortex_a5_tune =
1943 arm_9e_rtx_costs,
1944 &cortexa5_extra_costs,
1945 NULL, /* Sched adj cost. */
1946 1, /* Constant limit. */
1947 1, /* Max cond insns. */
1948 ARM_PREFETCH_NOT_BENEFICIAL,
1949 false, /* Prefer constant pool. */
1950 arm_cortex_a5_branch_cost,
1951 false, /* Prefer LDRD/STRD. */
1952 {false, false}, /* Prefer non short circuit. */
1953 &arm_default_vec_cost, /* Vectorizer costs. */
1954 false, /* Prefer Neon for 64-bits bitops. */
1955 false, false, /* Prefer 32-bit encodings. */
1956 true, /* Prefer Neon for stringops. */
1957 8 /* Maximum insns to inline memset. */
1960 const struct tune_params arm_cortex_a9_tune =
1962 arm_9e_rtx_costs,
1963 &cortexa9_extra_costs,
1964 cortex_a9_sched_adjust_cost,
1965 1, /* Constant limit. */
1966 5, /* Max cond insns. */
1967 ARM_PREFETCH_BENEFICIAL(4,32,32),
1968 false, /* Prefer constant pool. */
1969 arm_default_branch_cost,
1970 false, /* Prefer LDRD/STRD. */
1971 {true, true}, /* Prefer non short circuit. */
1972 &arm_default_vec_cost, /* Vectorizer costs. */
1973 false, /* Prefer Neon for 64-bits bitops. */
1974 false, false, /* Prefer 32-bit encodings. */
1975 false, /* Prefer Neon for stringops. */
1976 8 /* Maximum insns to inline memset. */
1979 const struct tune_params arm_cortex_a12_tune =
1981 arm_9e_rtx_costs,
1982 &cortexa12_extra_costs,
1983 NULL,
1984 1, /* Constant limit. */
1985 5, /* Max cond insns. */
1986 ARM_PREFETCH_BENEFICIAL(4,32,32),
1987 false, /* Prefer constant pool. */
1988 arm_default_branch_cost,
1989 true, /* Prefer LDRD/STRD. */
1990 {true, true}, /* Prefer non short circuit. */
1991 &arm_default_vec_cost, /* Vectorizer costs. */
1992 false, /* Prefer Neon for 64-bits bitops. */
1993 false, false, /* Prefer 32-bit encodings. */
1994 true, /* Prefer Neon for stringops. */
1995 8 /* Maximum insns to inline memset. */
1998 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1999 cycle to execute each. An LDR from the constant pool also takes two cycles
2000 to execute, but mildly increases pipelining opportunity (consecutive
2001 loads/stores can be pipelined together, saving one cycle), and may also
2002 improve icache utilisation. Hence we prefer the constant pool for such
2003 processors. */
2005 const struct tune_params arm_v7m_tune =
2007 arm_9e_rtx_costs,
2008 &v7m_extra_costs,
2009 NULL, /* Sched adj cost. */
2010 1, /* Constant limit. */
2011 2, /* Max cond insns. */
2012 ARM_PREFETCH_NOT_BENEFICIAL,
2013 true, /* Prefer constant pool. */
2014 arm_cortex_m_branch_cost,
2015 false, /* Prefer LDRD/STRD. */
2016 {false, false}, /* Prefer non short circuit. */
2017 &arm_default_vec_cost, /* Vectorizer costs. */
2018 false, /* Prefer Neon for 64-bits bitops. */
2019 false, false, /* Prefer 32-bit encodings. */
2020 false, /* Prefer Neon for stringops. */
2021 8 /* Maximum insns to inline memset. */
2024 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2025 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2026 const struct tune_params arm_v6m_tune =
2028 arm_9e_rtx_costs,
2029 NULL,
2030 NULL, /* Sched adj cost. */
2031 1, /* Constant limit. */
2032 5, /* Max cond insns. */
2033 ARM_PREFETCH_NOT_BENEFICIAL,
2034 false, /* Prefer constant pool. */
2035 arm_default_branch_cost,
2036 false, /* Prefer LDRD/STRD. */
2037 {false, false}, /* Prefer non short circuit. */
2038 &arm_default_vec_cost, /* Vectorizer costs. */
2039 false, /* Prefer Neon for 64-bits bitops. */
2040 false, false, /* Prefer 32-bit encodings. */
2041 false, /* Prefer Neon for stringops. */
2042 8 /* Maximum insns to inline memset. */
2045 const struct tune_params arm_fa726te_tune =
2047 arm_9e_rtx_costs,
2048 NULL,
2049 fa726te_sched_adjust_cost,
2050 1, /* Constant limit. */
2051 5, /* Max cond insns. */
2052 ARM_PREFETCH_NOT_BENEFICIAL,
2053 true, /* Prefer constant pool. */
2054 arm_default_branch_cost,
2055 false, /* Prefer LDRD/STRD. */
2056 {true, true}, /* Prefer non short circuit. */
2057 &arm_default_vec_cost, /* Vectorizer costs. */
2058 false, /* Prefer Neon for 64-bits bitops. */
2059 false, false, /* Prefer 32-bit encodings. */
2060 false, /* Prefer Neon for stringops. */
2061 8 /* Maximum insns to inline memset. */
2065 /* Not all of these give usefully different compilation alternatives,
2066 but there is no simple way of generalizing them. */
2067 static const struct processors all_cores[] =
2069 /* ARM Cores */
2070 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2071 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2072 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2073 #include "arm-cores.def"
2074 #undef ARM_CORE
2075 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2078 static const struct processors all_architectures[] =
2080 /* ARM Architectures */
2081 /* We don't specify tuning costs here as it will be figured out
2082 from the core. */
2084 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2085 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2086 #include "arm-arches.def"
2087 #undef ARM_ARCH
2088 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2092 /* These are populated as commandline arguments are processed, or NULL
2093 if not specified. */
2094 static const struct processors *arm_selected_arch;
2095 static const struct processors *arm_selected_cpu;
2096 static const struct processors *arm_selected_tune;
2098 /* The name of the preprocessor macro to define for this architecture. */
2100 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2102 /* Available values for -mfpu=. */
2104 static const struct arm_fpu_desc all_fpus[] =
2106 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2107 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2108 #include "arm-fpus.def"
2109 #undef ARM_FPU
2113 /* Supported TLS relocations. */
2115 enum tls_reloc {
2116 TLS_GD32,
2117 TLS_LDM32,
2118 TLS_LDO32,
2119 TLS_IE32,
2120 TLS_LE32,
2121 TLS_DESCSEQ /* GNU scheme */
2124 /* The maximum number of insns to be used when loading a constant. */
2125 inline static int
2126 arm_constant_limit (bool size_p)
2128 return size_p ? 1 : current_tune->constant_limit;
2131 /* Emit an insn that's a simple single-set. Both the operands must be known
2132 to be valid. */
2133 inline static rtx_insn *
2134 emit_set_insn (rtx x, rtx y)
2136 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2139 /* Return the number of bits set in VALUE. */
2140 static unsigned
2141 bit_count (unsigned long value)
2143 unsigned long count = 0;
2145 while (value)
2147 count++;
2148 value &= value - 1; /* Clear the least-significant set bit. */
2151 return count;
2154 typedef struct
2156 machine_mode mode;
2157 const char *name;
2158 } arm_fixed_mode_set;
2160 /* A small helper for setting fixed-point library libfuncs. */
2162 static void
2163 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2164 const char *funcname, const char *modename,
2165 int num_suffix)
2167 char buffer[50];
2169 if (num_suffix == 0)
2170 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2171 else
2172 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2174 set_optab_libfunc (optable, mode, buffer);
2177 static void
2178 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2179 machine_mode from, const char *funcname,
2180 const char *toname, const char *fromname)
2182 char buffer[50];
2183 const char *maybe_suffix_2 = "";
2185 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2186 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2187 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2188 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2189 maybe_suffix_2 = "2";
2191 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2192 maybe_suffix_2);
2194 set_conv_libfunc (optable, to, from, buffer);
2197 /* Set up library functions unique to ARM. */
2199 static void
2200 arm_init_libfuncs (void)
2202 /* For Linux, we have access to kernel support for atomic operations. */
2203 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2204 init_sync_libfuncs (2 * UNITS_PER_WORD);
2206 /* There are no special library functions unless we are using the
2207 ARM BPABI. */
2208 if (!TARGET_BPABI)
2209 return;
2211 /* The functions below are described in Section 4 of the "Run-Time
2212 ABI for the ARM architecture", Version 1.0. */
2214 /* Double-precision floating-point arithmetic. Table 2. */
2215 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2216 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2217 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2218 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2219 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2221 /* Double-precision comparisons. Table 3. */
2222 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2223 set_optab_libfunc (ne_optab, DFmode, NULL);
2224 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2225 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2226 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2227 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2228 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2230 /* Single-precision floating-point arithmetic. Table 4. */
2231 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2232 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2233 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2234 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2235 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2237 /* Single-precision comparisons. Table 5. */
2238 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2239 set_optab_libfunc (ne_optab, SFmode, NULL);
2240 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2241 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2242 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2243 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2244 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2246 /* Floating-point to integer conversions. Table 6. */
2247 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2248 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2249 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2250 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2251 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2252 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2253 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2254 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2256 /* Conversions between floating types. Table 7. */
2257 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2258 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2260 /* Integer to floating-point conversions. Table 8. */
2261 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2262 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2263 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2264 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2265 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2266 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2267 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2268 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2270 /* Long long. Table 9. */
2271 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2272 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2273 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2274 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2275 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2276 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2277 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2278 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2280 /* Integer (32/32->32) division. \S 4.3.1. */
2281 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2282 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2284 /* The divmod functions are designed so that they can be used for
2285 plain division, even though they return both the quotient and the
2286 remainder. The quotient is returned in the usual location (i.e.,
2287 r0 for SImode, {r0, r1} for DImode), just as would be expected
2288 for an ordinary division routine. Because the AAPCS calling
2289 conventions specify that all of { r0, r1, r2, r3 } are
2290 callee-saved registers, there is no need to tell the compiler
2291 explicitly that those registers are clobbered by these
2292 routines. */
2293 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2294 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2296 /* For SImode division the ABI provides div-without-mod routines,
2297 which are faster. */
2298 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2299 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2301 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2302 divmod libcalls instead. */
2303 set_optab_libfunc (smod_optab, DImode, NULL);
2304 set_optab_libfunc (umod_optab, DImode, NULL);
2305 set_optab_libfunc (smod_optab, SImode, NULL);
2306 set_optab_libfunc (umod_optab, SImode, NULL);
2308 /* Half-precision float operations. The compiler handles all operations
2309 with NULL libfuncs by converting the SFmode. */
2310 switch (arm_fp16_format)
2312 case ARM_FP16_FORMAT_IEEE:
2313 case ARM_FP16_FORMAT_ALTERNATIVE:
2315 /* Conversions. */
2316 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2317 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2318 ? "__gnu_f2h_ieee"
2319 : "__gnu_f2h_alternative"));
2320 set_conv_libfunc (sext_optab, SFmode, HFmode,
2321 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2322 ? "__gnu_h2f_ieee"
2323 : "__gnu_h2f_alternative"));
2325 /* Arithmetic. */
2326 set_optab_libfunc (add_optab, HFmode, NULL);
2327 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2328 set_optab_libfunc (smul_optab, HFmode, NULL);
2329 set_optab_libfunc (neg_optab, HFmode, NULL);
2330 set_optab_libfunc (sub_optab, HFmode, NULL);
2332 /* Comparisons. */
2333 set_optab_libfunc (eq_optab, HFmode, NULL);
2334 set_optab_libfunc (ne_optab, HFmode, NULL);
2335 set_optab_libfunc (lt_optab, HFmode, NULL);
2336 set_optab_libfunc (le_optab, HFmode, NULL);
2337 set_optab_libfunc (ge_optab, HFmode, NULL);
2338 set_optab_libfunc (gt_optab, HFmode, NULL);
2339 set_optab_libfunc (unord_optab, HFmode, NULL);
2340 break;
2342 default:
2343 break;
2346 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2348 const arm_fixed_mode_set fixed_arith_modes[] =
2350 { QQmode, "qq" },
2351 { UQQmode, "uqq" },
2352 { HQmode, "hq" },
2353 { UHQmode, "uhq" },
2354 { SQmode, "sq" },
2355 { USQmode, "usq" },
2356 { DQmode, "dq" },
2357 { UDQmode, "udq" },
2358 { TQmode, "tq" },
2359 { UTQmode, "utq" },
2360 { HAmode, "ha" },
2361 { UHAmode, "uha" },
2362 { SAmode, "sa" },
2363 { USAmode, "usa" },
2364 { DAmode, "da" },
2365 { UDAmode, "uda" },
2366 { TAmode, "ta" },
2367 { UTAmode, "uta" }
2369 const arm_fixed_mode_set fixed_conv_modes[] =
2371 { QQmode, "qq" },
2372 { UQQmode, "uqq" },
2373 { HQmode, "hq" },
2374 { UHQmode, "uhq" },
2375 { SQmode, "sq" },
2376 { USQmode, "usq" },
2377 { DQmode, "dq" },
2378 { UDQmode, "udq" },
2379 { TQmode, "tq" },
2380 { UTQmode, "utq" },
2381 { HAmode, "ha" },
2382 { UHAmode, "uha" },
2383 { SAmode, "sa" },
2384 { USAmode, "usa" },
2385 { DAmode, "da" },
2386 { UDAmode, "uda" },
2387 { TAmode, "ta" },
2388 { UTAmode, "uta" },
2389 { QImode, "qi" },
2390 { HImode, "hi" },
2391 { SImode, "si" },
2392 { DImode, "di" },
2393 { TImode, "ti" },
2394 { SFmode, "sf" },
2395 { DFmode, "df" }
2397 unsigned int i, j;
2399 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2401 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2402 "add", fixed_arith_modes[i].name, 3);
2403 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2404 "ssadd", fixed_arith_modes[i].name, 3);
2405 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2406 "usadd", fixed_arith_modes[i].name, 3);
2407 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2408 "sub", fixed_arith_modes[i].name, 3);
2409 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2410 "sssub", fixed_arith_modes[i].name, 3);
2411 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2412 "ussub", fixed_arith_modes[i].name, 3);
2413 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2414 "mul", fixed_arith_modes[i].name, 3);
2415 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2416 "ssmul", fixed_arith_modes[i].name, 3);
2417 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2418 "usmul", fixed_arith_modes[i].name, 3);
2419 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2420 "div", fixed_arith_modes[i].name, 3);
2421 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2422 "udiv", fixed_arith_modes[i].name, 3);
2423 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2424 "ssdiv", fixed_arith_modes[i].name, 3);
2425 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2426 "usdiv", fixed_arith_modes[i].name, 3);
2427 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2428 "neg", fixed_arith_modes[i].name, 2);
2429 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2430 "ssneg", fixed_arith_modes[i].name, 2);
2431 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2432 "usneg", fixed_arith_modes[i].name, 2);
2433 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2434 "ashl", fixed_arith_modes[i].name, 3);
2435 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2436 "ashr", fixed_arith_modes[i].name, 3);
2437 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2438 "lshr", fixed_arith_modes[i].name, 3);
2439 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2440 "ssashl", fixed_arith_modes[i].name, 3);
2441 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2442 "usashl", fixed_arith_modes[i].name, 3);
2443 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2444 "cmp", fixed_arith_modes[i].name, 2);
2447 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2448 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2450 if (i == j
2451 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2452 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2453 continue;
2455 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2456 fixed_conv_modes[j].mode, "fract",
2457 fixed_conv_modes[i].name,
2458 fixed_conv_modes[j].name);
2459 arm_set_fixed_conv_libfunc (satfract_optab,
2460 fixed_conv_modes[i].mode,
2461 fixed_conv_modes[j].mode, "satfract",
2462 fixed_conv_modes[i].name,
2463 fixed_conv_modes[j].name);
2464 arm_set_fixed_conv_libfunc (fractuns_optab,
2465 fixed_conv_modes[i].mode,
2466 fixed_conv_modes[j].mode, "fractuns",
2467 fixed_conv_modes[i].name,
2468 fixed_conv_modes[j].name);
2469 arm_set_fixed_conv_libfunc (satfractuns_optab,
2470 fixed_conv_modes[i].mode,
2471 fixed_conv_modes[j].mode, "satfractuns",
2472 fixed_conv_modes[i].name,
2473 fixed_conv_modes[j].name);
2477 if (TARGET_AAPCS_BASED)
2478 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2481 /* On AAPCS systems, this is the "struct __va_list". */
2482 static GTY(()) tree va_list_type;
2484 /* Return the type to use as __builtin_va_list. */
2485 static tree
2486 arm_build_builtin_va_list (void)
2488 tree va_list_name;
2489 tree ap_field;
2491 if (!TARGET_AAPCS_BASED)
2492 return std_build_builtin_va_list ();
2494 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2495 defined as:
2497 struct __va_list
2499 void *__ap;
2502 The C Library ABI further reinforces this definition in \S
2503 4.1.
2505 We must follow this definition exactly. The structure tag
2506 name is visible in C++ mangled names, and thus forms a part
2507 of the ABI. The field name may be used by people who
2508 #include <stdarg.h>. */
2509 /* Create the type. */
2510 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2511 /* Give it the required name. */
2512 va_list_name = build_decl (BUILTINS_LOCATION,
2513 TYPE_DECL,
2514 get_identifier ("__va_list"),
2515 va_list_type);
2516 DECL_ARTIFICIAL (va_list_name) = 1;
2517 TYPE_NAME (va_list_type) = va_list_name;
2518 TYPE_STUB_DECL (va_list_type) = va_list_name;
2519 /* Create the __ap field. */
2520 ap_field = build_decl (BUILTINS_LOCATION,
2521 FIELD_DECL,
2522 get_identifier ("__ap"),
2523 ptr_type_node);
2524 DECL_ARTIFICIAL (ap_field) = 1;
2525 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2526 TYPE_FIELDS (va_list_type) = ap_field;
2527 /* Compute its layout. */
2528 layout_type (va_list_type);
2530 return va_list_type;
2533 /* Return an expression of type "void *" pointing to the next
2534 available argument in a variable-argument list. VALIST is the
2535 user-level va_list object, of type __builtin_va_list. */
2536 static tree
2537 arm_extract_valist_ptr (tree valist)
2539 if (TREE_TYPE (valist) == error_mark_node)
2540 return error_mark_node;
2542 /* On an AAPCS target, the pointer is stored within "struct
2543 va_list". */
2544 if (TARGET_AAPCS_BASED)
2546 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2547 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2548 valist, ap_field, NULL_TREE);
2551 return valist;
2554 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2555 static void
2556 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2558 valist = arm_extract_valist_ptr (valist);
2559 std_expand_builtin_va_start (valist, nextarg);
2562 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2563 static tree
2564 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2565 gimple_seq *post_p)
2567 valist = arm_extract_valist_ptr (valist);
2568 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2571 /* Fix up any incompatible options that the user has specified. */
2572 static void
2573 arm_option_override (void)
2575 if (global_options_set.x_arm_arch_option)
2576 arm_selected_arch = &all_architectures[arm_arch_option];
2578 if (global_options_set.x_arm_cpu_option)
2580 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2581 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2584 if (global_options_set.x_arm_tune_option)
2585 arm_selected_tune = &all_cores[(int) arm_tune_option];
2587 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2588 SUBTARGET_OVERRIDE_OPTIONS;
2589 #endif
2591 if (arm_selected_arch)
2593 if (arm_selected_cpu)
2595 /* Check for conflict between mcpu and march. */
2596 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2598 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2599 arm_selected_cpu->name, arm_selected_arch->name);
2600 /* -march wins for code generation.
2601 -mcpu wins for default tuning. */
2602 if (!arm_selected_tune)
2603 arm_selected_tune = arm_selected_cpu;
2605 arm_selected_cpu = arm_selected_arch;
2607 else
2608 /* -mcpu wins. */
2609 arm_selected_arch = NULL;
2611 else
2612 /* Pick a CPU based on the architecture. */
2613 arm_selected_cpu = arm_selected_arch;
2616 /* If the user did not specify a processor, choose one for them. */
2617 if (!arm_selected_cpu)
2619 const struct processors * sel;
2620 unsigned int sought;
2622 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2623 if (!arm_selected_cpu->name)
2625 #ifdef SUBTARGET_CPU_DEFAULT
2626 /* Use the subtarget default CPU if none was specified by
2627 configure. */
2628 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2629 #endif
2630 /* Default to ARM6. */
2631 if (!arm_selected_cpu->name)
2632 arm_selected_cpu = &all_cores[arm6];
2635 sel = arm_selected_cpu;
2636 insn_flags = sel->flags;
2638 /* Now check to see if the user has specified some command line
2639 switch that require certain abilities from the cpu. */
2640 sought = 0;
2642 if (TARGET_INTERWORK || TARGET_THUMB)
2644 sought |= (FL_THUMB | FL_MODE32);
2646 /* There are no ARM processors that support both APCS-26 and
2647 interworking. Therefore we force FL_MODE26 to be removed
2648 from insn_flags here (if it was set), so that the search
2649 below will always be able to find a compatible processor. */
2650 insn_flags &= ~FL_MODE26;
2653 if (sought != 0 && ((sought & insn_flags) != sought))
2655 /* Try to locate a CPU type that supports all of the abilities
2656 of the default CPU, plus the extra abilities requested by
2657 the user. */
2658 for (sel = all_cores; sel->name != NULL; sel++)
2659 if ((sel->flags & sought) == (sought | insn_flags))
2660 break;
2662 if (sel->name == NULL)
2664 unsigned current_bit_count = 0;
2665 const struct processors * best_fit = NULL;
2667 /* Ideally we would like to issue an error message here
2668 saying that it was not possible to find a CPU compatible
2669 with the default CPU, but which also supports the command
2670 line options specified by the programmer, and so they
2671 ought to use the -mcpu=<name> command line option to
2672 override the default CPU type.
2674 If we cannot find a cpu that has both the
2675 characteristics of the default cpu and the given
2676 command line options we scan the array again looking
2677 for a best match. */
2678 for (sel = all_cores; sel->name != NULL; sel++)
2679 if ((sel->flags & sought) == sought)
2681 unsigned count;
2683 count = bit_count (sel->flags & insn_flags);
2685 if (count >= current_bit_count)
2687 best_fit = sel;
2688 current_bit_count = count;
2692 gcc_assert (best_fit);
2693 sel = best_fit;
2696 arm_selected_cpu = sel;
2700 gcc_assert (arm_selected_cpu);
2701 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2702 if (!arm_selected_tune)
2703 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2705 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2706 insn_flags = arm_selected_cpu->flags;
2707 arm_base_arch = arm_selected_cpu->base_arch;
2709 arm_tune = arm_selected_tune->core;
2710 tune_flags = arm_selected_tune->flags;
2711 current_tune = arm_selected_tune->tune;
2713 /* Make sure that the processor choice does not conflict with any of the
2714 other command line choices. */
2715 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2716 error ("target CPU does not support ARM mode");
2718 /* BPABI targets use linker tricks to allow interworking on cores
2719 without thumb support. */
2720 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2722 warning (0, "target CPU does not support interworking" );
2723 target_flags &= ~MASK_INTERWORK;
2726 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2728 warning (0, "target CPU does not support THUMB instructions");
2729 target_flags &= ~MASK_THUMB;
2732 if (TARGET_APCS_FRAME && TARGET_THUMB)
2734 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2735 target_flags &= ~MASK_APCS_FRAME;
2738 /* Callee super interworking implies thumb interworking. Adding
2739 this to the flags here simplifies the logic elsewhere. */
2740 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2741 target_flags |= MASK_INTERWORK;
2743 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2744 from here where no function is being compiled currently. */
2745 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2746 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2748 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2749 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2751 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2753 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2754 target_flags |= MASK_APCS_FRAME;
2757 if (TARGET_POKE_FUNCTION_NAME)
2758 target_flags |= MASK_APCS_FRAME;
2760 if (TARGET_APCS_REENT && flag_pic)
2761 error ("-fpic and -mapcs-reent are incompatible");
2763 if (TARGET_APCS_REENT)
2764 warning (0, "APCS reentrant code not supported. Ignored");
2766 /* If this target is normally configured to use APCS frames, warn if they
2767 are turned off and debugging is turned on. */
2768 if (TARGET_ARM
2769 && write_symbols != NO_DEBUG
2770 && !TARGET_APCS_FRAME
2771 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2772 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2774 if (TARGET_APCS_FLOAT)
2775 warning (0, "passing floating point arguments in fp regs not yet supported");
2777 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2778 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2779 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2780 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2781 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2782 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2783 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2784 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2785 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2786 arm_arch6m = arm_arch6 && !arm_arch_notm;
2787 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2788 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2789 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2790 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2791 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2793 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2794 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2795 thumb_code = TARGET_ARM == 0;
2796 thumb1_code = TARGET_THUMB1 != 0;
2797 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2798 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2799 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2800 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2801 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2802 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2803 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2804 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2805 if (arm_restrict_it == 2)
2806 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2808 if (!TARGET_THUMB2)
2809 arm_restrict_it = 0;
2811 /* If we are not using the default (ARM mode) section anchor offset
2812 ranges, then set the correct ranges now. */
2813 if (TARGET_THUMB1)
2815 /* Thumb-1 LDR instructions cannot have negative offsets.
2816 Permissible positive offset ranges are 5-bit (for byte loads),
2817 6-bit (for halfword loads), or 7-bit (for word loads).
2818 Empirical results suggest a 7-bit anchor range gives the best
2819 overall code size. */
2820 targetm.min_anchor_offset = 0;
2821 targetm.max_anchor_offset = 127;
2823 else if (TARGET_THUMB2)
2825 /* The minimum is set such that the total size of the block
2826 for a particular anchor is 248 + 1 + 4095 bytes, which is
2827 divisible by eight, ensuring natural spacing of anchors. */
2828 targetm.min_anchor_offset = -248;
2829 targetm.max_anchor_offset = 4095;
2832 /* V5 code we generate is completely interworking capable, so we turn off
2833 TARGET_INTERWORK here to avoid many tests later on. */
2835 /* XXX However, we must pass the right pre-processor defines to CPP
2836 or GLD can get confused. This is a hack. */
2837 if (TARGET_INTERWORK)
2838 arm_cpp_interwork = 1;
2840 if (arm_arch5)
2841 target_flags &= ~MASK_INTERWORK;
2843 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2844 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2846 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2847 error ("iwmmxt abi requires an iwmmxt capable cpu");
2849 if (!global_options_set.x_arm_fpu_index)
2851 const char *target_fpu_name;
2852 bool ok;
2854 #ifdef FPUTYPE_DEFAULT
2855 target_fpu_name = FPUTYPE_DEFAULT;
2856 #else
2857 target_fpu_name = "vfp";
2858 #endif
2860 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2861 CL_TARGET);
2862 gcc_assert (ok);
2865 arm_fpu_desc = &all_fpus[arm_fpu_index];
2867 if (TARGET_NEON && !arm_arch7)
2868 error ("target CPU does not support NEON");
2870 switch (arm_fpu_desc->model)
2872 case ARM_FP_MODEL_VFP:
2873 arm_fpu_attr = FPU_VFP;
2874 break;
2876 default:
2877 gcc_unreachable();
2880 if (TARGET_AAPCS_BASED)
2882 if (TARGET_CALLER_INTERWORKING)
2883 error ("AAPCS does not support -mcaller-super-interworking");
2884 else
2885 if (TARGET_CALLEE_INTERWORKING)
2886 error ("AAPCS does not support -mcallee-super-interworking");
2889 /* iWMMXt and NEON are incompatible. */
2890 if (TARGET_IWMMXT && TARGET_NEON)
2891 error ("iWMMXt and NEON are incompatible");
2893 /* iWMMXt unsupported under Thumb mode. */
2894 if (TARGET_THUMB && TARGET_IWMMXT)
2895 error ("iWMMXt unsupported under Thumb mode");
2897 /* __fp16 support currently assumes the core has ldrh. */
2898 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2899 sorry ("__fp16 and no ldrh");
2901 /* If soft-float is specified then don't use FPU. */
2902 if (TARGET_SOFT_FLOAT)
2903 arm_fpu_attr = FPU_NONE;
2905 if (TARGET_AAPCS_BASED)
2907 if (arm_abi == ARM_ABI_IWMMXT)
2908 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2909 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2910 && TARGET_HARD_FLOAT
2911 && TARGET_VFP)
2912 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2913 else
2914 arm_pcs_default = ARM_PCS_AAPCS;
2916 else
2918 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2919 sorry ("-mfloat-abi=hard and VFP");
2921 if (arm_abi == ARM_ABI_APCS)
2922 arm_pcs_default = ARM_PCS_APCS;
2923 else
2924 arm_pcs_default = ARM_PCS_ATPCS;
2927 /* For arm2/3 there is no need to do any scheduling if we are doing
2928 software floating-point. */
2929 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2930 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2932 /* Use the cp15 method if it is available. */
2933 if (target_thread_pointer == TP_AUTO)
2935 if (arm_arch6k && !TARGET_THUMB1)
2936 target_thread_pointer = TP_CP15;
2937 else
2938 target_thread_pointer = TP_SOFT;
2941 if (TARGET_HARD_TP && TARGET_THUMB1)
2942 error ("can not use -mtp=cp15 with 16-bit Thumb");
2944 /* Override the default structure alignment for AAPCS ABI. */
2945 if (!global_options_set.x_arm_structure_size_boundary)
2947 if (TARGET_AAPCS_BASED)
2948 arm_structure_size_boundary = 8;
2950 else
2952 if (arm_structure_size_boundary != 8
2953 && arm_structure_size_boundary != 32
2954 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2956 if (ARM_DOUBLEWORD_ALIGN)
2957 warning (0,
2958 "structure size boundary can only be set to 8, 32 or 64");
2959 else
2960 warning (0, "structure size boundary can only be set to 8 or 32");
2961 arm_structure_size_boundary
2962 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2966 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2968 error ("RTP PIC is incompatible with Thumb");
2969 flag_pic = 0;
2972 /* If stack checking is disabled, we can use r10 as the PIC register,
2973 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2974 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2976 if (TARGET_VXWORKS_RTP)
2977 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2978 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2981 if (flag_pic && TARGET_VXWORKS_RTP)
2982 arm_pic_register = 9;
2984 if (arm_pic_register_string != NULL)
2986 int pic_register = decode_reg_name (arm_pic_register_string);
2988 if (!flag_pic)
2989 warning (0, "-mpic-register= is useless without -fpic");
2991 /* Prevent the user from choosing an obviously stupid PIC register. */
2992 else if (pic_register < 0 || call_used_regs[pic_register]
2993 || pic_register == HARD_FRAME_POINTER_REGNUM
2994 || pic_register == STACK_POINTER_REGNUM
2995 || pic_register >= PC_REGNUM
2996 || (TARGET_VXWORKS_RTP
2997 && (unsigned int) pic_register != arm_pic_register))
2998 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2999 else
3000 arm_pic_register = pic_register;
3003 if (TARGET_VXWORKS_RTP
3004 && !global_options_set.x_arm_pic_data_is_text_relative)
3005 arm_pic_data_is_text_relative = 0;
3007 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3008 if (fix_cm3_ldrd == 2)
3010 if (arm_selected_cpu->core == cortexm3)
3011 fix_cm3_ldrd = 1;
3012 else
3013 fix_cm3_ldrd = 0;
3016 /* Enable -munaligned-access by default for
3017 - all ARMv6 architecture-based processors
3018 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3019 - ARMv8 architecture-base processors.
3021 Disable -munaligned-access by default for
3022 - all pre-ARMv6 architecture-based processors
3023 - ARMv6-M architecture-based processors. */
3025 if (unaligned_access == 2)
3027 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3028 unaligned_access = 1;
3029 else
3030 unaligned_access = 0;
3032 else if (unaligned_access == 1
3033 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3035 warning (0, "target CPU does not support unaligned accesses");
3036 unaligned_access = 0;
3039 if (TARGET_THUMB1 && flag_schedule_insns)
3041 /* Don't warn since it's on by default in -O2. */
3042 flag_schedule_insns = 0;
3045 if (optimize_size)
3047 /* If optimizing for size, bump the number of instructions that we
3048 are prepared to conditionally execute (even on a StrongARM). */
3049 max_insns_skipped = 6;
3051 /* For THUMB2, we limit the conditional sequence to one IT block. */
3052 if (TARGET_THUMB2)
3053 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3055 else
3056 max_insns_skipped = current_tune->max_insns_skipped;
3058 /* Hot/Cold partitioning is not currently supported, since we can't
3059 handle literal pool placement in that case. */
3060 if (flag_reorder_blocks_and_partition)
3062 inform (input_location,
3063 "-freorder-blocks-and-partition not supported on this architecture");
3064 flag_reorder_blocks_and_partition = 0;
3065 flag_reorder_blocks = 1;
3068 if (flag_pic)
3069 /* Hoisting PIC address calculations more aggressively provides a small,
3070 but measurable, size reduction for PIC code. Therefore, we decrease
3071 the bar for unrestricted expression hoisting to the cost of PIC address
3072 calculation, which is 2 instructions. */
3073 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3074 global_options.x_param_values,
3075 global_options_set.x_param_values);
3077 /* ARM EABI defaults to strict volatile bitfields. */
3078 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3079 && abi_version_at_least(2))
3080 flag_strict_volatile_bitfields = 1;
3082 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3083 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3084 if (flag_prefetch_loop_arrays < 0
3085 && HAVE_prefetch
3086 && optimize >= 3
3087 && current_tune->num_prefetch_slots > 0)
3088 flag_prefetch_loop_arrays = 1;
3090 /* Set up parameters to be used in prefetching algorithm. Do not override the
3091 defaults unless we are tuning for a core we have researched values for. */
3092 if (current_tune->num_prefetch_slots > 0)
3093 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3094 current_tune->num_prefetch_slots,
3095 global_options.x_param_values,
3096 global_options_set.x_param_values);
3097 if (current_tune->l1_cache_line_size >= 0)
3098 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3099 current_tune->l1_cache_line_size,
3100 global_options.x_param_values,
3101 global_options_set.x_param_values);
3102 if (current_tune->l1_cache_size >= 0)
3103 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3104 current_tune->l1_cache_size,
3105 global_options.x_param_values,
3106 global_options_set.x_param_values);
3108 /* Use Neon to perform 64-bits operations rather than core
3109 registers. */
3110 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3111 if (use_neon_for_64bits == 1)
3112 prefer_neon_for_64bits = true;
3114 /* Use the alternative scheduling-pressure algorithm by default. */
3115 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3116 global_options.x_param_values,
3117 global_options_set.x_param_values);
3119 /* Disable shrink-wrap when optimizing function for size, since it tends to
3120 generate additional returns. */
3121 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3122 flag_shrink_wrap = false;
3123 /* TBD: Dwarf info for apcs frame is not handled yet. */
3124 if (TARGET_APCS_FRAME)
3125 flag_shrink_wrap = false;
3127 /* We only support -mslow-flash-data on armv7-m targets. */
3128 if (target_slow_flash_data
3129 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3130 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3131 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3133 /* Currently, for slow flash data, we just disable literal pools. */
3134 if (target_slow_flash_data)
3135 arm_disable_literal_pool = true;
3137 /* Register global variables with the garbage collector. */
3138 arm_add_gc_roots ();
3141 static void
3142 arm_add_gc_roots (void)
3144 gcc_obstack_init(&minipool_obstack);
3145 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3148 /* A table of known ARM exception types.
3149 For use with the interrupt function attribute. */
3151 typedef struct
3153 const char *const arg;
3154 const unsigned long return_value;
3156 isr_attribute_arg;
3158 static const isr_attribute_arg isr_attribute_args [] =
3160 { "IRQ", ARM_FT_ISR },
3161 { "irq", ARM_FT_ISR },
3162 { "FIQ", ARM_FT_FIQ },
3163 { "fiq", ARM_FT_FIQ },
3164 { "ABORT", ARM_FT_ISR },
3165 { "abort", ARM_FT_ISR },
3166 { "ABORT", ARM_FT_ISR },
3167 { "abort", ARM_FT_ISR },
3168 { "UNDEF", ARM_FT_EXCEPTION },
3169 { "undef", ARM_FT_EXCEPTION },
3170 { "SWI", ARM_FT_EXCEPTION },
3171 { "swi", ARM_FT_EXCEPTION },
3172 { NULL, ARM_FT_NORMAL }
3175 /* Returns the (interrupt) function type of the current
3176 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3178 static unsigned long
3179 arm_isr_value (tree argument)
3181 const isr_attribute_arg * ptr;
3182 const char * arg;
3184 if (!arm_arch_notm)
3185 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3187 /* No argument - default to IRQ. */
3188 if (argument == NULL_TREE)
3189 return ARM_FT_ISR;
3191 /* Get the value of the argument. */
3192 if (TREE_VALUE (argument) == NULL_TREE
3193 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3194 return ARM_FT_UNKNOWN;
3196 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3198 /* Check it against the list of known arguments. */
3199 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3200 if (streq (arg, ptr->arg))
3201 return ptr->return_value;
3203 /* An unrecognized interrupt type. */
3204 return ARM_FT_UNKNOWN;
3207 /* Computes the type of the current function. */
3209 static unsigned long
3210 arm_compute_func_type (void)
3212 unsigned long type = ARM_FT_UNKNOWN;
3213 tree a;
3214 tree attr;
3216 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3218 /* Decide if the current function is volatile. Such functions
3219 never return, and many memory cycles can be saved by not storing
3220 register values that will never be needed again. This optimization
3221 was added to speed up context switching in a kernel application. */
3222 if (optimize > 0
3223 && (TREE_NOTHROW (current_function_decl)
3224 || !(flag_unwind_tables
3225 || (flag_exceptions
3226 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3227 && TREE_THIS_VOLATILE (current_function_decl))
3228 type |= ARM_FT_VOLATILE;
3230 if (cfun->static_chain_decl != NULL)
3231 type |= ARM_FT_NESTED;
3233 attr = DECL_ATTRIBUTES (current_function_decl);
3235 a = lookup_attribute ("naked", attr);
3236 if (a != NULL_TREE)
3237 type |= ARM_FT_NAKED;
3239 a = lookup_attribute ("isr", attr);
3240 if (a == NULL_TREE)
3241 a = lookup_attribute ("interrupt", attr);
3243 if (a == NULL_TREE)
3244 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3245 else
3246 type |= arm_isr_value (TREE_VALUE (a));
3248 return type;
3251 /* Returns the type of the current function. */
3253 unsigned long
3254 arm_current_func_type (void)
3256 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3257 cfun->machine->func_type = arm_compute_func_type ();
3259 return cfun->machine->func_type;
3262 bool
3263 arm_allocate_stack_slots_for_args (void)
3265 /* Naked functions should not allocate stack slots for arguments. */
3266 return !IS_NAKED (arm_current_func_type ());
3269 static bool
3270 arm_warn_func_return (tree decl)
3272 /* Naked functions are implemented entirely in assembly, including the
3273 return sequence, so suppress warnings about this. */
3274 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3278 /* Output assembler code for a block containing the constant parts
3279 of a trampoline, leaving space for the variable parts.
3281 On the ARM, (if r8 is the static chain regnum, and remembering that
3282 referencing pc adds an offset of 8) the trampoline looks like:
3283 ldr r8, [pc, #0]
3284 ldr pc, [pc]
3285 .word static chain value
3286 .word function's address
3287 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3289 static void
3290 arm_asm_trampoline_template (FILE *f)
3292 if (TARGET_ARM)
3294 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3295 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3297 else if (TARGET_THUMB2)
3299 /* The Thumb-2 trampoline is similar to the arm implementation.
3300 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3301 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3302 STATIC_CHAIN_REGNUM, PC_REGNUM);
3303 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3305 else
3307 ASM_OUTPUT_ALIGN (f, 2);
3308 fprintf (f, "\t.code\t16\n");
3309 fprintf (f, ".Ltrampoline_start:\n");
3310 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3311 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3312 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3313 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3314 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3315 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3317 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3318 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3321 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3323 static void
3324 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3326 rtx fnaddr, mem, a_tramp;
3328 emit_block_move (m_tramp, assemble_trampoline_template (),
3329 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3331 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3332 emit_move_insn (mem, chain_value);
3334 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3335 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3336 emit_move_insn (mem, fnaddr);
3338 a_tramp = XEXP (m_tramp, 0);
3339 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3340 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3341 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3344 /* Thumb trampolines should be entered in thumb mode, so set
3345 the bottom bit of the address. */
3347 static rtx
3348 arm_trampoline_adjust_address (rtx addr)
3350 if (TARGET_THUMB)
3351 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3352 NULL, 0, OPTAB_LIB_WIDEN);
3353 return addr;
3356 /* Return 1 if it is possible to return using a single instruction.
3357 If SIBLING is non-null, this is a test for a return before a sibling
3358 call. SIBLING is the call insn, so we can examine its register usage. */
3361 use_return_insn (int iscond, rtx sibling)
3363 int regno;
3364 unsigned int func_type;
3365 unsigned long saved_int_regs;
3366 unsigned HOST_WIDE_INT stack_adjust;
3367 arm_stack_offsets *offsets;
3369 /* Never use a return instruction before reload has run. */
3370 if (!reload_completed)
3371 return 0;
3373 func_type = arm_current_func_type ();
3375 /* Naked, volatile and stack alignment functions need special
3376 consideration. */
3377 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3378 return 0;
3380 /* So do interrupt functions that use the frame pointer and Thumb
3381 interrupt functions. */
3382 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3383 return 0;
3385 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3386 && !optimize_function_for_size_p (cfun))
3387 return 0;
3389 offsets = arm_get_frame_offsets ();
3390 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3392 /* As do variadic functions. */
3393 if (crtl->args.pretend_args_size
3394 || cfun->machine->uses_anonymous_args
3395 /* Or if the function calls __builtin_eh_return () */
3396 || crtl->calls_eh_return
3397 /* Or if the function calls alloca */
3398 || cfun->calls_alloca
3399 /* Or if there is a stack adjustment. However, if the stack pointer
3400 is saved on the stack, we can use a pre-incrementing stack load. */
3401 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3402 && stack_adjust == 4)))
3403 return 0;
3405 saved_int_regs = offsets->saved_regs_mask;
3407 /* Unfortunately, the insn
3409 ldmib sp, {..., sp, ...}
3411 triggers a bug on most SA-110 based devices, such that the stack
3412 pointer won't be correctly restored if the instruction takes a
3413 page fault. We work around this problem by popping r3 along with
3414 the other registers, since that is never slower than executing
3415 another instruction.
3417 We test for !arm_arch5 here, because code for any architecture
3418 less than this could potentially be run on one of the buggy
3419 chips. */
3420 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3422 /* Validate that r3 is a call-clobbered register (always true in
3423 the default abi) ... */
3424 if (!call_used_regs[3])
3425 return 0;
3427 /* ... that it isn't being used for a return value ... */
3428 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3429 return 0;
3431 /* ... or for a tail-call argument ... */
3432 if (sibling)
3434 gcc_assert (CALL_P (sibling));
3436 if (find_regno_fusage (sibling, USE, 3))
3437 return 0;
3440 /* ... and that there are no call-saved registers in r0-r2
3441 (always true in the default ABI). */
3442 if (saved_int_regs & 0x7)
3443 return 0;
3446 /* Can't be done if interworking with Thumb, and any registers have been
3447 stacked. */
3448 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3449 return 0;
3451 /* On StrongARM, conditional returns are expensive if they aren't
3452 taken and multiple registers have been stacked. */
3453 if (iscond && arm_tune_strongarm)
3455 /* Conditional return when just the LR is stored is a simple
3456 conditional-load instruction, that's not expensive. */
3457 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3458 return 0;
3460 if (flag_pic
3461 && arm_pic_register != INVALID_REGNUM
3462 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3463 return 0;
3466 /* If there are saved registers but the LR isn't saved, then we need
3467 two instructions for the return. */
3468 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3469 return 0;
3471 /* Can't be done if any of the VFP regs are pushed,
3472 since this also requires an insn. */
3473 if (TARGET_HARD_FLOAT && TARGET_VFP)
3474 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3475 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3476 return 0;
3478 if (TARGET_REALLY_IWMMXT)
3479 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3480 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3481 return 0;
3483 return 1;
3486 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3487 shrink-wrapping if possible. This is the case if we need to emit a
3488 prologue, which we can test by looking at the offsets. */
3489 bool
3490 use_simple_return_p (void)
3492 arm_stack_offsets *offsets;
3494 offsets = arm_get_frame_offsets ();
3495 return offsets->outgoing_args != 0;
3498 /* Return TRUE if int I is a valid immediate ARM constant. */
3501 const_ok_for_arm (HOST_WIDE_INT i)
3503 int lowbit;
3505 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3506 be all zero, or all one. */
3507 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3508 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3509 != ((~(unsigned HOST_WIDE_INT) 0)
3510 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3511 return FALSE;
3513 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3515 /* Fast return for 0 and small values. We must do this for zero, since
3516 the code below can't handle that one case. */
3517 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3518 return TRUE;
3520 /* Get the number of trailing zeros. */
3521 lowbit = ffs((int) i) - 1;
3523 /* Only even shifts are allowed in ARM mode so round down to the
3524 nearest even number. */
3525 if (TARGET_ARM)
3526 lowbit &= ~1;
3528 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3529 return TRUE;
3531 if (TARGET_ARM)
3533 /* Allow rotated constants in ARM mode. */
3534 if (lowbit <= 4
3535 && ((i & ~0xc000003f) == 0
3536 || (i & ~0xf000000f) == 0
3537 || (i & ~0xfc000003) == 0))
3538 return TRUE;
3540 else
3542 HOST_WIDE_INT v;
3544 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3545 v = i & 0xff;
3546 v |= v << 16;
3547 if (i == v || i == (v | (v << 8)))
3548 return TRUE;
3550 /* Allow repeated pattern 0xXY00XY00. */
3551 v = i & 0xff00;
3552 v |= v << 16;
3553 if (i == v)
3554 return TRUE;
3557 return FALSE;
3560 /* Return true if I is a valid constant for the operation CODE. */
3562 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3564 if (const_ok_for_arm (i))
3565 return 1;
3567 switch (code)
3569 case SET:
3570 /* See if we can use movw. */
3571 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3572 return 1;
3573 else
3574 /* Otherwise, try mvn. */
3575 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3577 case PLUS:
3578 /* See if we can use addw or subw. */
3579 if (TARGET_THUMB2
3580 && ((i & 0xfffff000) == 0
3581 || ((-i) & 0xfffff000) == 0))
3582 return 1;
3583 /* else fall through. */
3585 case COMPARE:
3586 case EQ:
3587 case NE:
3588 case GT:
3589 case LE:
3590 case LT:
3591 case GE:
3592 case GEU:
3593 case LTU:
3594 case GTU:
3595 case LEU:
3596 case UNORDERED:
3597 case ORDERED:
3598 case UNEQ:
3599 case UNGE:
3600 case UNLT:
3601 case UNGT:
3602 case UNLE:
3603 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3605 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3606 case XOR:
3607 return 0;
3609 case IOR:
3610 if (TARGET_THUMB2)
3611 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3612 return 0;
3614 case AND:
3615 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3617 default:
3618 gcc_unreachable ();
3622 /* Return true if I is a valid di mode constant for the operation CODE. */
3624 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3626 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3627 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3628 rtx hi = GEN_INT (hi_val);
3629 rtx lo = GEN_INT (lo_val);
3631 if (TARGET_THUMB1)
3632 return 0;
3634 switch (code)
3636 case AND:
3637 case IOR:
3638 case XOR:
3639 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3640 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3641 case PLUS:
3642 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3644 default:
3645 return 0;
3649 /* Emit a sequence of insns to handle a large constant.
3650 CODE is the code of the operation required, it can be any of SET, PLUS,
3651 IOR, AND, XOR, MINUS;
3652 MODE is the mode in which the operation is being performed;
3653 VAL is the integer to operate on;
3654 SOURCE is the other operand (a register, or a null-pointer for SET);
3655 SUBTARGETS means it is safe to create scratch registers if that will
3656 either produce a simpler sequence, or we will want to cse the values.
3657 Return value is the number of insns emitted. */
3659 /* ??? Tweak this for thumb2. */
3661 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3662 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3664 rtx cond;
3666 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3667 cond = COND_EXEC_TEST (PATTERN (insn));
3668 else
3669 cond = NULL_RTX;
3671 if (subtargets || code == SET
3672 || (REG_P (target) && REG_P (source)
3673 && REGNO (target) != REGNO (source)))
3675 /* After arm_reorg has been called, we can't fix up expensive
3676 constants by pushing them into memory so we must synthesize
3677 them in-line, regardless of the cost. This is only likely to
3678 be more costly on chips that have load delay slots and we are
3679 compiling without running the scheduler (so no splitting
3680 occurred before the final instruction emission).
3682 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3684 if (!cfun->machine->after_arm_reorg
3685 && !cond
3686 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3687 1, 0)
3688 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3689 + (code != SET))))
3691 if (code == SET)
3693 /* Currently SET is the only monadic value for CODE, all
3694 the rest are diadic. */
3695 if (TARGET_USE_MOVT)
3696 arm_emit_movpair (target, GEN_INT (val));
3697 else
3698 emit_set_insn (target, GEN_INT (val));
3700 return 1;
3702 else
3704 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3706 if (TARGET_USE_MOVT)
3707 arm_emit_movpair (temp, GEN_INT (val));
3708 else
3709 emit_set_insn (temp, GEN_INT (val));
3711 /* For MINUS, the value is subtracted from, since we never
3712 have subtraction of a constant. */
3713 if (code == MINUS)
3714 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3715 else
3716 emit_set_insn (target,
3717 gen_rtx_fmt_ee (code, mode, source, temp));
3718 return 2;
3723 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3727 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3728 ARM/THUMB2 immediates, and add up to VAL.
3729 Thr function return value gives the number of insns required. */
3730 static int
3731 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3732 struct four_ints *return_sequence)
3734 int best_consecutive_zeros = 0;
3735 int i;
3736 int best_start = 0;
3737 int insns1, insns2;
3738 struct four_ints tmp_sequence;
3740 /* If we aren't targeting ARM, the best place to start is always at
3741 the bottom, otherwise look more closely. */
3742 if (TARGET_ARM)
3744 for (i = 0; i < 32; i += 2)
3746 int consecutive_zeros = 0;
3748 if (!(val & (3 << i)))
3750 while ((i < 32) && !(val & (3 << i)))
3752 consecutive_zeros += 2;
3753 i += 2;
3755 if (consecutive_zeros > best_consecutive_zeros)
3757 best_consecutive_zeros = consecutive_zeros;
3758 best_start = i - consecutive_zeros;
3760 i -= 2;
3765 /* So long as it won't require any more insns to do so, it's
3766 desirable to emit a small constant (in bits 0...9) in the last
3767 insn. This way there is more chance that it can be combined with
3768 a later addressing insn to form a pre-indexed load or store
3769 operation. Consider:
3771 *((volatile int *)0xe0000100) = 1;
3772 *((volatile int *)0xe0000110) = 2;
3774 We want this to wind up as:
3776 mov rA, #0xe0000000
3777 mov rB, #1
3778 str rB, [rA, #0x100]
3779 mov rB, #2
3780 str rB, [rA, #0x110]
3782 rather than having to synthesize both large constants from scratch.
3784 Therefore, we calculate how many insns would be required to emit
3785 the constant starting from `best_start', and also starting from
3786 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3787 yield a shorter sequence, we may as well use zero. */
3788 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3789 if (best_start != 0
3790 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3792 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3793 if (insns2 <= insns1)
3795 *return_sequence = tmp_sequence;
3796 insns1 = insns2;
3800 return insns1;
3803 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3804 static int
3805 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3806 struct four_ints *return_sequence, int i)
3808 int remainder = val & 0xffffffff;
3809 int insns = 0;
3811 /* Try and find a way of doing the job in either two or three
3812 instructions.
3814 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3815 location. We start at position I. This may be the MSB, or
3816 optimial_immediate_sequence may have positioned it at the largest block
3817 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3818 wrapping around to the top of the word when we drop off the bottom.
3819 In the worst case this code should produce no more than four insns.
3821 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3822 constants, shifted to any arbitrary location. We should always start
3823 at the MSB. */
3826 int end;
3827 unsigned int b1, b2, b3, b4;
3828 unsigned HOST_WIDE_INT result;
3829 int loc;
3831 gcc_assert (insns < 4);
3833 if (i <= 0)
3834 i += 32;
3836 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3837 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3839 loc = i;
3840 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3841 /* We can use addw/subw for the last 12 bits. */
3842 result = remainder;
3843 else
3845 /* Use an 8-bit shifted/rotated immediate. */
3846 end = i - 8;
3847 if (end < 0)
3848 end += 32;
3849 result = remainder & ((0x0ff << end)
3850 | ((i < end) ? (0xff >> (32 - end))
3851 : 0));
3852 i -= 8;
3855 else
3857 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3858 arbitrary shifts. */
3859 i -= TARGET_ARM ? 2 : 1;
3860 continue;
3863 /* Next, see if we can do a better job with a thumb2 replicated
3864 constant.
3866 We do it this way around to catch the cases like 0x01F001E0 where
3867 two 8-bit immediates would work, but a replicated constant would
3868 make it worse.
3870 TODO: 16-bit constants that don't clear all the bits, but still win.
3871 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3872 if (TARGET_THUMB2)
3874 b1 = (remainder & 0xff000000) >> 24;
3875 b2 = (remainder & 0x00ff0000) >> 16;
3876 b3 = (remainder & 0x0000ff00) >> 8;
3877 b4 = remainder & 0xff;
3879 if (loc > 24)
3881 /* The 8-bit immediate already found clears b1 (and maybe b2),
3882 but must leave b3 and b4 alone. */
3884 /* First try to find a 32-bit replicated constant that clears
3885 almost everything. We can assume that we can't do it in one,
3886 or else we wouldn't be here. */
3887 unsigned int tmp = b1 & b2 & b3 & b4;
3888 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3889 + (tmp << 24);
3890 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3891 + (tmp == b3) + (tmp == b4);
3892 if (tmp
3893 && (matching_bytes >= 3
3894 || (matching_bytes == 2
3895 && const_ok_for_op (remainder & ~tmp2, code))))
3897 /* At least 3 of the bytes match, and the fourth has at
3898 least as many bits set, or two of the bytes match
3899 and it will only require one more insn to finish. */
3900 result = tmp2;
3901 i = tmp != b1 ? 32
3902 : tmp != b2 ? 24
3903 : tmp != b3 ? 16
3904 : 8;
3907 /* Second, try to find a 16-bit replicated constant that can
3908 leave three of the bytes clear. If b2 or b4 is already
3909 zero, then we can. If the 8-bit from above would not
3910 clear b2 anyway, then we still win. */
3911 else if (b1 == b3 && (!b2 || !b4
3912 || (remainder & 0x00ff0000 & ~result)))
3914 result = remainder & 0xff00ff00;
3915 i = 24;
3918 else if (loc > 16)
3920 /* The 8-bit immediate already found clears b2 (and maybe b3)
3921 and we don't get here unless b1 is alredy clear, but it will
3922 leave b4 unchanged. */
3924 /* If we can clear b2 and b4 at once, then we win, since the
3925 8-bits couldn't possibly reach that far. */
3926 if (b2 == b4)
3928 result = remainder & 0x00ff00ff;
3929 i = 16;
3934 return_sequence->i[insns++] = result;
3935 remainder &= ~result;
3937 if (code == SET || code == MINUS)
3938 code = PLUS;
3940 while (remainder);
3942 return insns;
3945 /* Emit an instruction with the indicated PATTERN. If COND is
3946 non-NULL, conditionalize the execution of the instruction on COND
3947 being true. */
3949 static void
3950 emit_constant_insn (rtx cond, rtx pattern)
3952 if (cond)
3953 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3954 emit_insn (pattern);
3957 /* As above, but extra parameter GENERATE which, if clear, suppresses
3958 RTL generation. */
3960 static int
3961 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3962 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3963 int generate)
3965 int can_invert = 0;
3966 int can_negate = 0;
3967 int final_invert = 0;
3968 int i;
3969 int set_sign_bit_copies = 0;
3970 int clear_sign_bit_copies = 0;
3971 int clear_zero_bit_copies = 0;
3972 int set_zero_bit_copies = 0;
3973 int insns = 0, neg_insns, inv_insns;
3974 unsigned HOST_WIDE_INT temp1, temp2;
3975 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3976 struct four_ints *immediates;
3977 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3979 /* Find out which operations are safe for a given CODE. Also do a quick
3980 check for degenerate cases; these can occur when DImode operations
3981 are split. */
3982 switch (code)
3984 case SET:
3985 can_invert = 1;
3986 break;
3988 case PLUS:
3989 can_negate = 1;
3990 break;
3992 case IOR:
3993 if (remainder == 0xffffffff)
3995 if (generate)
3996 emit_constant_insn (cond,
3997 gen_rtx_SET (VOIDmode, target,
3998 GEN_INT (ARM_SIGN_EXTEND (val))));
3999 return 1;
4002 if (remainder == 0)
4004 if (reload_completed && rtx_equal_p (target, source))
4005 return 0;
4007 if (generate)
4008 emit_constant_insn (cond,
4009 gen_rtx_SET (VOIDmode, target, source));
4010 return 1;
4012 break;
4014 case AND:
4015 if (remainder == 0)
4017 if (generate)
4018 emit_constant_insn (cond,
4019 gen_rtx_SET (VOIDmode, target, const0_rtx));
4020 return 1;
4022 if (remainder == 0xffffffff)
4024 if (reload_completed && rtx_equal_p (target, source))
4025 return 0;
4026 if (generate)
4027 emit_constant_insn (cond,
4028 gen_rtx_SET (VOIDmode, target, source));
4029 return 1;
4031 can_invert = 1;
4032 break;
4034 case XOR:
4035 if (remainder == 0)
4037 if (reload_completed && rtx_equal_p (target, source))
4038 return 0;
4039 if (generate)
4040 emit_constant_insn (cond,
4041 gen_rtx_SET (VOIDmode, target, source));
4042 return 1;
4045 if (remainder == 0xffffffff)
4047 if (generate)
4048 emit_constant_insn (cond,
4049 gen_rtx_SET (VOIDmode, target,
4050 gen_rtx_NOT (mode, source)));
4051 return 1;
4053 final_invert = 1;
4054 break;
4056 case MINUS:
4057 /* We treat MINUS as (val - source), since (source - val) is always
4058 passed as (source + (-val)). */
4059 if (remainder == 0)
4061 if (generate)
4062 emit_constant_insn (cond,
4063 gen_rtx_SET (VOIDmode, target,
4064 gen_rtx_NEG (mode, source)));
4065 return 1;
4067 if (const_ok_for_arm (val))
4069 if (generate)
4070 emit_constant_insn (cond,
4071 gen_rtx_SET (VOIDmode, target,
4072 gen_rtx_MINUS (mode, GEN_INT (val),
4073 source)));
4074 return 1;
4077 break;
4079 default:
4080 gcc_unreachable ();
4083 /* If we can do it in one insn get out quickly. */
4084 if (const_ok_for_op (val, code))
4086 if (generate)
4087 emit_constant_insn (cond,
4088 gen_rtx_SET (VOIDmode, target,
4089 (source
4090 ? gen_rtx_fmt_ee (code, mode, source,
4091 GEN_INT (val))
4092 : GEN_INT (val))));
4093 return 1;
4096 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4097 insn. */
4098 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4099 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4101 if (generate)
4103 if (mode == SImode && i == 16)
4104 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4105 smaller insn. */
4106 emit_constant_insn (cond,
4107 gen_zero_extendhisi2
4108 (target, gen_lowpart (HImode, source)));
4109 else
4110 /* Extz only supports SImode, but we can coerce the operands
4111 into that mode. */
4112 emit_constant_insn (cond,
4113 gen_extzv_t2 (gen_lowpart (SImode, target),
4114 gen_lowpart (SImode, source),
4115 GEN_INT (i), const0_rtx));
4118 return 1;
4121 /* Calculate a few attributes that may be useful for specific
4122 optimizations. */
4123 /* Count number of leading zeros. */
4124 for (i = 31; i >= 0; i--)
4126 if ((remainder & (1 << i)) == 0)
4127 clear_sign_bit_copies++;
4128 else
4129 break;
4132 /* Count number of leading 1's. */
4133 for (i = 31; i >= 0; i--)
4135 if ((remainder & (1 << i)) != 0)
4136 set_sign_bit_copies++;
4137 else
4138 break;
4141 /* Count number of trailing zero's. */
4142 for (i = 0; i <= 31; i++)
4144 if ((remainder & (1 << i)) == 0)
4145 clear_zero_bit_copies++;
4146 else
4147 break;
4150 /* Count number of trailing 1's. */
4151 for (i = 0; i <= 31; i++)
4153 if ((remainder & (1 << i)) != 0)
4154 set_zero_bit_copies++;
4155 else
4156 break;
4159 switch (code)
4161 case SET:
4162 /* See if we can do this by sign_extending a constant that is known
4163 to be negative. This is a good, way of doing it, since the shift
4164 may well merge into a subsequent insn. */
4165 if (set_sign_bit_copies > 1)
4167 if (const_ok_for_arm
4168 (temp1 = ARM_SIGN_EXTEND (remainder
4169 << (set_sign_bit_copies - 1))))
4171 if (generate)
4173 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4174 emit_constant_insn (cond,
4175 gen_rtx_SET (VOIDmode, new_src,
4176 GEN_INT (temp1)));
4177 emit_constant_insn (cond,
4178 gen_ashrsi3 (target, new_src,
4179 GEN_INT (set_sign_bit_copies - 1)));
4181 return 2;
4183 /* For an inverted constant, we will need to set the low bits,
4184 these will be shifted out of harm's way. */
4185 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4186 if (const_ok_for_arm (~temp1))
4188 if (generate)
4190 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4191 emit_constant_insn (cond,
4192 gen_rtx_SET (VOIDmode, new_src,
4193 GEN_INT (temp1)));
4194 emit_constant_insn (cond,
4195 gen_ashrsi3 (target, new_src,
4196 GEN_INT (set_sign_bit_copies - 1)));
4198 return 2;
4202 /* See if we can calculate the value as the difference between two
4203 valid immediates. */
4204 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4206 int topshift = clear_sign_bit_copies & ~1;
4208 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4209 & (0xff000000 >> topshift));
4211 /* If temp1 is zero, then that means the 9 most significant
4212 bits of remainder were 1 and we've caused it to overflow.
4213 When topshift is 0 we don't need to do anything since we
4214 can borrow from 'bit 32'. */
4215 if (temp1 == 0 && topshift != 0)
4216 temp1 = 0x80000000 >> (topshift - 1);
4218 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4220 if (const_ok_for_arm (temp2))
4222 if (generate)
4224 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4225 emit_constant_insn (cond,
4226 gen_rtx_SET (VOIDmode, new_src,
4227 GEN_INT (temp1)));
4228 emit_constant_insn (cond,
4229 gen_addsi3 (target, new_src,
4230 GEN_INT (-temp2)));
4233 return 2;
4237 /* See if we can generate this by setting the bottom (or the top)
4238 16 bits, and then shifting these into the other half of the
4239 word. We only look for the simplest cases, to do more would cost
4240 too much. Be careful, however, not to generate this when the
4241 alternative would take fewer insns. */
4242 if (val & 0xffff0000)
4244 temp1 = remainder & 0xffff0000;
4245 temp2 = remainder & 0x0000ffff;
4247 /* Overlaps outside this range are best done using other methods. */
4248 for (i = 9; i < 24; i++)
4250 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4251 && !const_ok_for_arm (temp2))
4253 rtx new_src = (subtargets
4254 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4255 : target);
4256 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4257 source, subtargets, generate);
4258 source = new_src;
4259 if (generate)
4260 emit_constant_insn
4261 (cond,
4262 gen_rtx_SET
4263 (VOIDmode, target,
4264 gen_rtx_IOR (mode,
4265 gen_rtx_ASHIFT (mode, source,
4266 GEN_INT (i)),
4267 source)));
4268 return insns + 1;
4272 /* Don't duplicate cases already considered. */
4273 for (i = 17; i < 24; i++)
4275 if (((temp1 | (temp1 >> i)) == remainder)
4276 && !const_ok_for_arm (temp1))
4278 rtx new_src = (subtargets
4279 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4280 : target);
4281 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4282 source, subtargets, generate);
4283 source = new_src;
4284 if (generate)
4285 emit_constant_insn
4286 (cond,
4287 gen_rtx_SET (VOIDmode, target,
4288 gen_rtx_IOR
4289 (mode,
4290 gen_rtx_LSHIFTRT (mode, source,
4291 GEN_INT (i)),
4292 source)));
4293 return insns + 1;
4297 break;
4299 case IOR:
4300 case XOR:
4301 /* If we have IOR or XOR, and the constant can be loaded in a
4302 single instruction, and we can find a temporary to put it in,
4303 then this can be done in two instructions instead of 3-4. */
4304 if (subtargets
4305 /* TARGET can't be NULL if SUBTARGETS is 0 */
4306 || (reload_completed && !reg_mentioned_p (target, source)))
4308 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4310 if (generate)
4312 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4314 emit_constant_insn (cond,
4315 gen_rtx_SET (VOIDmode, sub,
4316 GEN_INT (val)));
4317 emit_constant_insn (cond,
4318 gen_rtx_SET (VOIDmode, target,
4319 gen_rtx_fmt_ee (code, mode,
4320 source, sub)));
4322 return 2;
4326 if (code == XOR)
4327 break;
4329 /* Convert.
4330 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4331 and the remainder 0s for e.g. 0xfff00000)
4332 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4334 This can be done in 2 instructions by using shifts with mov or mvn.
4335 e.g. for
4336 x = x | 0xfff00000;
4337 we generate.
4338 mvn r0, r0, asl #12
4339 mvn r0, r0, lsr #12 */
4340 if (set_sign_bit_copies > 8
4341 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4343 if (generate)
4345 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4346 rtx shift = GEN_INT (set_sign_bit_copies);
4348 emit_constant_insn
4349 (cond,
4350 gen_rtx_SET (VOIDmode, sub,
4351 gen_rtx_NOT (mode,
4352 gen_rtx_ASHIFT (mode,
4353 source,
4354 shift))));
4355 emit_constant_insn
4356 (cond,
4357 gen_rtx_SET (VOIDmode, target,
4358 gen_rtx_NOT (mode,
4359 gen_rtx_LSHIFTRT (mode, sub,
4360 shift))));
4362 return 2;
4365 /* Convert
4366 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4368 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4370 For eg. r0 = r0 | 0xfff
4371 mvn r0, r0, lsr #12
4372 mvn r0, r0, asl #12
4375 if (set_zero_bit_copies > 8
4376 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4378 if (generate)
4380 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4381 rtx shift = GEN_INT (set_zero_bit_copies);
4383 emit_constant_insn
4384 (cond,
4385 gen_rtx_SET (VOIDmode, sub,
4386 gen_rtx_NOT (mode,
4387 gen_rtx_LSHIFTRT (mode,
4388 source,
4389 shift))));
4390 emit_constant_insn
4391 (cond,
4392 gen_rtx_SET (VOIDmode, target,
4393 gen_rtx_NOT (mode,
4394 gen_rtx_ASHIFT (mode, sub,
4395 shift))));
4397 return 2;
4400 /* This will never be reached for Thumb2 because orn is a valid
4401 instruction. This is for Thumb1 and the ARM 32 bit cases.
4403 x = y | constant (such that ~constant is a valid constant)
4404 Transform this to
4405 x = ~(~y & ~constant).
4407 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4409 if (generate)
4411 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4412 emit_constant_insn (cond,
4413 gen_rtx_SET (VOIDmode, sub,
4414 gen_rtx_NOT (mode, source)));
4415 source = sub;
4416 if (subtargets)
4417 sub = gen_reg_rtx (mode);
4418 emit_constant_insn (cond,
4419 gen_rtx_SET (VOIDmode, sub,
4420 gen_rtx_AND (mode, source,
4421 GEN_INT (temp1))));
4422 emit_constant_insn (cond,
4423 gen_rtx_SET (VOIDmode, target,
4424 gen_rtx_NOT (mode, sub)));
4426 return 3;
4428 break;
4430 case AND:
4431 /* See if two shifts will do 2 or more insn's worth of work. */
4432 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4434 HOST_WIDE_INT shift_mask = ((0xffffffff
4435 << (32 - clear_sign_bit_copies))
4436 & 0xffffffff);
4438 if ((remainder | shift_mask) != 0xffffffff)
4440 if (generate)
4442 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4443 insns = arm_gen_constant (AND, mode, cond,
4444 remainder | shift_mask,
4445 new_src, source, subtargets, 1);
4446 source = new_src;
4448 else
4450 rtx targ = subtargets ? NULL_RTX : target;
4451 insns = arm_gen_constant (AND, mode, cond,
4452 remainder | shift_mask,
4453 targ, source, subtargets, 0);
4457 if (generate)
4459 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4460 rtx shift = GEN_INT (clear_sign_bit_copies);
4462 emit_insn (gen_ashlsi3 (new_src, source, shift));
4463 emit_insn (gen_lshrsi3 (target, new_src, shift));
4466 return insns + 2;
4469 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4471 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4473 if ((remainder | shift_mask) != 0xffffffff)
4475 if (generate)
4477 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4479 insns = arm_gen_constant (AND, mode, cond,
4480 remainder | shift_mask,
4481 new_src, source, subtargets, 1);
4482 source = new_src;
4484 else
4486 rtx targ = subtargets ? NULL_RTX : target;
4488 insns = arm_gen_constant (AND, mode, cond,
4489 remainder | shift_mask,
4490 targ, source, subtargets, 0);
4494 if (generate)
4496 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4497 rtx shift = GEN_INT (clear_zero_bit_copies);
4499 emit_insn (gen_lshrsi3 (new_src, source, shift));
4500 emit_insn (gen_ashlsi3 (target, new_src, shift));
4503 return insns + 2;
4506 break;
4508 default:
4509 break;
4512 /* Calculate what the instruction sequences would be if we generated it
4513 normally, negated, or inverted. */
4514 if (code == AND)
4515 /* AND cannot be split into multiple insns, so invert and use BIC. */
4516 insns = 99;
4517 else
4518 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4520 if (can_negate)
4521 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4522 &neg_immediates);
4523 else
4524 neg_insns = 99;
4526 if (can_invert || final_invert)
4527 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4528 &inv_immediates);
4529 else
4530 inv_insns = 99;
4532 immediates = &pos_immediates;
4534 /* Is the negated immediate sequence more efficient? */
4535 if (neg_insns < insns && neg_insns <= inv_insns)
4537 insns = neg_insns;
4538 immediates = &neg_immediates;
4540 else
4541 can_negate = 0;
4543 /* Is the inverted immediate sequence more efficient?
4544 We must allow for an extra NOT instruction for XOR operations, although
4545 there is some chance that the final 'mvn' will get optimized later. */
4546 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4548 insns = inv_insns;
4549 immediates = &inv_immediates;
4551 else
4553 can_invert = 0;
4554 final_invert = 0;
4557 /* Now output the chosen sequence as instructions. */
4558 if (generate)
4560 for (i = 0; i < insns; i++)
4562 rtx new_src, temp1_rtx;
4564 temp1 = immediates->i[i];
4566 if (code == SET || code == MINUS)
4567 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4568 else if ((final_invert || i < (insns - 1)) && subtargets)
4569 new_src = gen_reg_rtx (mode);
4570 else
4571 new_src = target;
4573 if (can_invert)
4574 temp1 = ~temp1;
4575 else if (can_negate)
4576 temp1 = -temp1;
4578 temp1 = trunc_int_for_mode (temp1, mode);
4579 temp1_rtx = GEN_INT (temp1);
4581 if (code == SET)
4583 else if (code == MINUS)
4584 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4585 else
4586 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4588 emit_constant_insn (cond,
4589 gen_rtx_SET (VOIDmode, new_src,
4590 temp1_rtx));
4591 source = new_src;
4593 if (code == SET)
4595 can_negate = can_invert;
4596 can_invert = 0;
4597 code = PLUS;
4599 else if (code == MINUS)
4600 code = PLUS;
4604 if (final_invert)
4606 if (generate)
4607 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4608 gen_rtx_NOT (mode, source)));
4609 insns++;
4612 return insns;
4615 /* Canonicalize a comparison so that we are more likely to recognize it.
4616 This can be done for a few constant compares, where we can make the
4617 immediate value easier to load. */
4619 static void
4620 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4621 bool op0_preserve_value)
4623 machine_mode mode;
4624 unsigned HOST_WIDE_INT i, maxval;
4626 mode = GET_MODE (*op0);
4627 if (mode == VOIDmode)
4628 mode = GET_MODE (*op1);
4630 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4632 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4633 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4634 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4635 for GTU/LEU in Thumb mode. */
4636 if (mode == DImode)
4638 rtx tem;
4640 if (*code == GT || *code == LE
4641 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4643 /* Missing comparison. First try to use an available
4644 comparison. */
4645 if (CONST_INT_P (*op1))
4647 i = INTVAL (*op1);
4648 switch (*code)
4650 case GT:
4651 case LE:
4652 if (i != maxval
4653 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4655 *op1 = GEN_INT (i + 1);
4656 *code = *code == GT ? GE : LT;
4657 return;
4659 break;
4660 case GTU:
4661 case LEU:
4662 if (i != ~((unsigned HOST_WIDE_INT) 0)
4663 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4665 *op1 = GEN_INT (i + 1);
4666 *code = *code == GTU ? GEU : LTU;
4667 return;
4669 break;
4670 default:
4671 gcc_unreachable ();
4675 /* If that did not work, reverse the condition. */
4676 if (!op0_preserve_value)
4678 tem = *op0;
4679 *op0 = *op1;
4680 *op1 = tem;
4681 *code = (int)swap_condition ((enum rtx_code)*code);
4684 return;
4687 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4688 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4689 to facilitate possible combining with a cmp into 'ands'. */
4690 if (mode == SImode
4691 && GET_CODE (*op0) == ZERO_EXTEND
4692 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4693 && GET_MODE (XEXP (*op0, 0)) == QImode
4694 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4695 && subreg_lowpart_p (XEXP (*op0, 0))
4696 && *op1 == const0_rtx)
4697 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4698 GEN_INT (255));
4700 /* Comparisons smaller than DImode. Only adjust comparisons against
4701 an out-of-range constant. */
4702 if (!CONST_INT_P (*op1)
4703 || const_ok_for_arm (INTVAL (*op1))
4704 || const_ok_for_arm (- INTVAL (*op1)))
4705 return;
4707 i = INTVAL (*op1);
4709 switch (*code)
4711 case EQ:
4712 case NE:
4713 return;
4715 case GT:
4716 case LE:
4717 if (i != maxval
4718 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4720 *op1 = GEN_INT (i + 1);
4721 *code = *code == GT ? GE : LT;
4722 return;
4724 break;
4726 case GE:
4727 case LT:
4728 if (i != ~maxval
4729 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4731 *op1 = GEN_INT (i - 1);
4732 *code = *code == GE ? GT : LE;
4733 return;
4735 break;
4737 case GTU:
4738 case LEU:
4739 if (i != ~((unsigned HOST_WIDE_INT) 0)
4740 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4742 *op1 = GEN_INT (i + 1);
4743 *code = *code == GTU ? GEU : LTU;
4744 return;
4746 break;
4748 case GEU:
4749 case LTU:
4750 if (i != 0
4751 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4753 *op1 = GEN_INT (i - 1);
4754 *code = *code == GEU ? GTU : LEU;
4755 return;
4757 break;
4759 default:
4760 gcc_unreachable ();
4765 /* Define how to find the value returned by a function. */
4767 static rtx
4768 arm_function_value(const_tree type, const_tree func,
4769 bool outgoing ATTRIBUTE_UNUSED)
4771 machine_mode mode;
4772 int unsignedp ATTRIBUTE_UNUSED;
4773 rtx r ATTRIBUTE_UNUSED;
4775 mode = TYPE_MODE (type);
4777 if (TARGET_AAPCS_BASED)
4778 return aapcs_allocate_return_reg (mode, type, func);
4780 /* Promote integer types. */
4781 if (INTEGRAL_TYPE_P (type))
4782 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4784 /* Promotes small structs returned in a register to full-word size
4785 for big-endian AAPCS. */
4786 if (arm_return_in_msb (type))
4788 HOST_WIDE_INT size = int_size_in_bytes (type);
4789 if (size % UNITS_PER_WORD != 0)
4791 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4792 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4796 return arm_libcall_value_1 (mode);
4799 /* libcall hashtable helpers. */
4801 struct libcall_hasher : typed_noop_remove <rtx_def>
4803 typedef rtx_def value_type;
4804 typedef rtx_def compare_type;
4805 static inline hashval_t hash (const value_type *);
4806 static inline bool equal (const value_type *, const compare_type *);
4807 static inline void remove (value_type *);
4810 inline bool
4811 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4813 return rtx_equal_p (p1, p2);
4816 inline hashval_t
4817 libcall_hasher::hash (const value_type *p1)
4819 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4822 typedef hash_table<libcall_hasher> libcall_table_type;
4824 static void
4825 add_libcall (libcall_table_type *htab, rtx libcall)
4827 *htab->find_slot (libcall, INSERT) = libcall;
4830 static bool
4831 arm_libcall_uses_aapcs_base (const_rtx libcall)
4833 static bool init_done = false;
4834 static libcall_table_type *libcall_htab = NULL;
4836 if (!init_done)
4838 init_done = true;
4840 libcall_htab = new libcall_table_type (31);
4841 add_libcall (libcall_htab,
4842 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4843 add_libcall (libcall_htab,
4844 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4845 add_libcall (libcall_htab,
4846 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4847 add_libcall (libcall_htab,
4848 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4850 add_libcall (libcall_htab,
4851 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4852 add_libcall (libcall_htab,
4853 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4854 add_libcall (libcall_htab,
4855 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4856 add_libcall (libcall_htab,
4857 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4859 add_libcall (libcall_htab,
4860 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4861 add_libcall (libcall_htab,
4862 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4863 add_libcall (libcall_htab,
4864 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4865 add_libcall (libcall_htab,
4866 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4867 add_libcall (libcall_htab,
4868 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4869 add_libcall (libcall_htab,
4870 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4871 add_libcall (libcall_htab,
4872 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4873 add_libcall (libcall_htab,
4874 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4876 /* Values from double-precision helper functions are returned in core
4877 registers if the selected core only supports single-precision
4878 arithmetic, even if we are using the hard-float ABI. The same is
4879 true for single-precision helpers, but we will never be using the
4880 hard-float ABI on a CPU which doesn't support single-precision
4881 operations in hardware. */
4882 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4883 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4884 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4885 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4886 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4887 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4888 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4889 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4890 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4891 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4892 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4893 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4894 SFmode));
4895 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4896 DFmode));
4899 return libcall && libcall_htab->find (libcall) != NULL;
4902 static rtx
4903 arm_libcall_value_1 (machine_mode mode)
4905 if (TARGET_AAPCS_BASED)
4906 return aapcs_libcall_value (mode);
4907 else if (TARGET_IWMMXT_ABI
4908 && arm_vector_mode_supported_p (mode))
4909 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4910 else
4911 return gen_rtx_REG (mode, ARG_REGISTER (1));
4914 /* Define how to find the value returned by a library function
4915 assuming the value has mode MODE. */
4917 static rtx
4918 arm_libcall_value (machine_mode mode, const_rtx libcall)
4920 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4921 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4923 /* The following libcalls return their result in integer registers,
4924 even though they return a floating point value. */
4925 if (arm_libcall_uses_aapcs_base (libcall))
4926 return gen_rtx_REG (mode, ARG_REGISTER(1));
4930 return arm_libcall_value_1 (mode);
4933 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4935 static bool
4936 arm_function_value_regno_p (const unsigned int regno)
4938 if (regno == ARG_REGISTER (1)
4939 || (TARGET_32BIT
4940 && TARGET_AAPCS_BASED
4941 && TARGET_VFP
4942 && TARGET_HARD_FLOAT
4943 && regno == FIRST_VFP_REGNUM)
4944 || (TARGET_IWMMXT_ABI
4945 && regno == FIRST_IWMMXT_REGNUM))
4946 return true;
4948 return false;
4951 /* Determine the amount of memory needed to store the possible return
4952 registers of an untyped call. */
4954 arm_apply_result_size (void)
4956 int size = 16;
4958 if (TARGET_32BIT)
4960 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4961 size += 32;
4962 if (TARGET_IWMMXT_ABI)
4963 size += 8;
4966 return size;
4969 /* Decide whether TYPE should be returned in memory (true)
4970 or in a register (false). FNTYPE is the type of the function making
4971 the call. */
4972 static bool
4973 arm_return_in_memory (const_tree type, const_tree fntype)
4975 HOST_WIDE_INT size;
4977 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4979 if (TARGET_AAPCS_BASED)
4981 /* Simple, non-aggregate types (ie not including vectors and
4982 complex) are always returned in a register (or registers).
4983 We don't care about which register here, so we can short-cut
4984 some of the detail. */
4985 if (!AGGREGATE_TYPE_P (type)
4986 && TREE_CODE (type) != VECTOR_TYPE
4987 && TREE_CODE (type) != COMPLEX_TYPE)
4988 return false;
4990 /* Any return value that is no larger than one word can be
4991 returned in r0. */
4992 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4993 return false;
4995 /* Check any available co-processors to see if they accept the
4996 type as a register candidate (VFP, for example, can return
4997 some aggregates in consecutive registers). These aren't
4998 available if the call is variadic. */
4999 if (aapcs_select_return_coproc (type, fntype) >= 0)
5000 return false;
5002 /* Vector values should be returned using ARM registers, not
5003 memory (unless they're over 16 bytes, which will break since
5004 we only have four call-clobbered registers to play with). */
5005 if (TREE_CODE (type) == VECTOR_TYPE)
5006 return (size < 0 || size > (4 * UNITS_PER_WORD));
5008 /* The rest go in memory. */
5009 return true;
5012 if (TREE_CODE (type) == VECTOR_TYPE)
5013 return (size < 0 || size > (4 * UNITS_PER_WORD));
5015 if (!AGGREGATE_TYPE_P (type) &&
5016 (TREE_CODE (type) != VECTOR_TYPE))
5017 /* All simple types are returned in registers. */
5018 return false;
5020 if (arm_abi != ARM_ABI_APCS)
5022 /* ATPCS and later return aggregate types in memory only if they are
5023 larger than a word (or are variable size). */
5024 return (size < 0 || size > UNITS_PER_WORD);
5027 /* For the arm-wince targets we choose to be compatible with Microsoft's
5028 ARM and Thumb compilers, which always return aggregates in memory. */
5029 #ifndef ARM_WINCE
5030 /* All structures/unions bigger than one word are returned in memory.
5031 Also catch the case where int_size_in_bytes returns -1. In this case
5032 the aggregate is either huge or of variable size, and in either case
5033 we will want to return it via memory and not in a register. */
5034 if (size < 0 || size > UNITS_PER_WORD)
5035 return true;
5037 if (TREE_CODE (type) == RECORD_TYPE)
5039 tree field;
5041 /* For a struct the APCS says that we only return in a register
5042 if the type is 'integer like' and every addressable element
5043 has an offset of zero. For practical purposes this means
5044 that the structure can have at most one non bit-field element
5045 and that this element must be the first one in the structure. */
5047 /* Find the first field, ignoring non FIELD_DECL things which will
5048 have been created by C++. */
5049 for (field = TYPE_FIELDS (type);
5050 field && TREE_CODE (field) != FIELD_DECL;
5051 field = DECL_CHAIN (field))
5052 continue;
5054 if (field == NULL)
5055 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5057 /* Check that the first field is valid for returning in a register. */
5059 /* ... Floats are not allowed */
5060 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5061 return true;
5063 /* ... Aggregates that are not themselves valid for returning in
5064 a register are not allowed. */
5065 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5066 return true;
5068 /* Now check the remaining fields, if any. Only bitfields are allowed,
5069 since they are not addressable. */
5070 for (field = DECL_CHAIN (field);
5071 field;
5072 field = DECL_CHAIN (field))
5074 if (TREE_CODE (field) != FIELD_DECL)
5075 continue;
5077 if (!DECL_BIT_FIELD_TYPE (field))
5078 return true;
5081 return false;
5084 if (TREE_CODE (type) == UNION_TYPE)
5086 tree field;
5088 /* Unions can be returned in registers if every element is
5089 integral, or can be returned in an integer register. */
5090 for (field = TYPE_FIELDS (type);
5091 field;
5092 field = DECL_CHAIN (field))
5094 if (TREE_CODE (field) != FIELD_DECL)
5095 continue;
5097 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5098 return true;
5100 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5101 return true;
5104 return false;
5106 #endif /* not ARM_WINCE */
5108 /* Return all other types in memory. */
5109 return true;
5112 const struct pcs_attribute_arg
5114 const char *arg;
5115 enum arm_pcs value;
5116 } pcs_attribute_args[] =
5118 {"aapcs", ARM_PCS_AAPCS},
5119 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5120 #if 0
5121 /* We could recognize these, but changes would be needed elsewhere
5122 * to implement them. */
5123 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5124 {"atpcs", ARM_PCS_ATPCS},
5125 {"apcs", ARM_PCS_APCS},
5126 #endif
5127 {NULL, ARM_PCS_UNKNOWN}
5130 static enum arm_pcs
5131 arm_pcs_from_attribute (tree attr)
5133 const struct pcs_attribute_arg *ptr;
5134 const char *arg;
5136 /* Get the value of the argument. */
5137 if (TREE_VALUE (attr) == NULL_TREE
5138 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5139 return ARM_PCS_UNKNOWN;
5141 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5143 /* Check it against the list of known arguments. */
5144 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5145 if (streq (arg, ptr->arg))
5146 return ptr->value;
5148 /* An unrecognized interrupt type. */
5149 return ARM_PCS_UNKNOWN;
5152 /* Get the PCS variant to use for this call. TYPE is the function's type
5153 specification, DECL is the specific declartion. DECL may be null if
5154 the call could be indirect or if this is a library call. */
5155 static enum arm_pcs
5156 arm_get_pcs_model (const_tree type, const_tree decl)
5158 bool user_convention = false;
5159 enum arm_pcs user_pcs = arm_pcs_default;
5160 tree attr;
5162 gcc_assert (type);
5164 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5165 if (attr)
5167 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5168 user_convention = true;
5171 if (TARGET_AAPCS_BASED)
5173 /* Detect varargs functions. These always use the base rules
5174 (no argument is ever a candidate for a co-processor
5175 register). */
5176 bool base_rules = stdarg_p (type);
5178 if (user_convention)
5180 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5181 sorry ("non-AAPCS derived PCS variant");
5182 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5183 error ("variadic functions must use the base AAPCS variant");
5186 if (base_rules)
5187 return ARM_PCS_AAPCS;
5188 else if (user_convention)
5189 return user_pcs;
5190 else if (decl && flag_unit_at_a_time)
5192 /* Local functions never leak outside this compilation unit,
5193 so we are free to use whatever conventions are
5194 appropriate. */
5195 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5196 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5197 if (i && i->local)
5198 return ARM_PCS_AAPCS_LOCAL;
5201 else if (user_convention && user_pcs != arm_pcs_default)
5202 sorry ("PCS variant");
5204 /* For everything else we use the target's default. */
5205 return arm_pcs_default;
5209 static void
5210 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5211 const_tree fntype ATTRIBUTE_UNUSED,
5212 rtx libcall ATTRIBUTE_UNUSED,
5213 const_tree fndecl ATTRIBUTE_UNUSED)
5215 /* Record the unallocated VFP registers. */
5216 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5217 pcum->aapcs_vfp_reg_alloc = 0;
5220 /* Walk down the type tree of TYPE counting consecutive base elements.
5221 If *MODEP is VOIDmode, then set it to the first valid floating point
5222 type. If a non-floating point type is found, or if a floating point
5223 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5224 otherwise return the count in the sub-tree. */
5225 static int
5226 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5228 machine_mode mode;
5229 HOST_WIDE_INT size;
5231 switch (TREE_CODE (type))
5233 case REAL_TYPE:
5234 mode = TYPE_MODE (type);
5235 if (mode != DFmode && mode != SFmode)
5236 return -1;
5238 if (*modep == VOIDmode)
5239 *modep = mode;
5241 if (*modep == mode)
5242 return 1;
5244 break;
5246 case COMPLEX_TYPE:
5247 mode = TYPE_MODE (TREE_TYPE (type));
5248 if (mode != DFmode && mode != SFmode)
5249 return -1;
5251 if (*modep == VOIDmode)
5252 *modep = mode;
5254 if (*modep == mode)
5255 return 2;
5257 break;
5259 case VECTOR_TYPE:
5260 /* Use V2SImode and V4SImode as representatives of all 64-bit
5261 and 128-bit vector types, whether or not those modes are
5262 supported with the present options. */
5263 size = int_size_in_bytes (type);
5264 switch (size)
5266 case 8:
5267 mode = V2SImode;
5268 break;
5269 case 16:
5270 mode = V4SImode;
5271 break;
5272 default:
5273 return -1;
5276 if (*modep == VOIDmode)
5277 *modep = mode;
5279 /* Vector modes are considered to be opaque: two vectors are
5280 equivalent for the purposes of being homogeneous aggregates
5281 if they are the same size. */
5282 if (*modep == mode)
5283 return 1;
5285 break;
5287 case ARRAY_TYPE:
5289 int count;
5290 tree index = TYPE_DOMAIN (type);
5292 /* Can't handle incomplete types nor sizes that are not
5293 fixed. */
5294 if (!COMPLETE_TYPE_P (type)
5295 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5296 return -1;
5298 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5299 if (count == -1
5300 || !index
5301 || !TYPE_MAX_VALUE (index)
5302 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5303 || !TYPE_MIN_VALUE (index)
5304 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5305 || count < 0)
5306 return -1;
5308 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5309 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5311 /* There must be no padding. */
5312 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5313 return -1;
5315 return count;
5318 case RECORD_TYPE:
5320 int count = 0;
5321 int sub_count;
5322 tree field;
5324 /* Can't handle incomplete types nor sizes that are not
5325 fixed. */
5326 if (!COMPLETE_TYPE_P (type)
5327 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5328 return -1;
5330 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5332 if (TREE_CODE (field) != FIELD_DECL)
5333 continue;
5335 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5336 if (sub_count < 0)
5337 return -1;
5338 count += sub_count;
5341 /* There must be no padding. */
5342 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5343 return -1;
5345 return count;
5348 case UNION_TYPE:
5349 case QUAL_UNION_TYPE:
5351 /* These aren't very interesting except in a degenerate case. */
5352 int count = 0;
5353 int sub_count;
5354 tree field;
5356 /* Can't handle incomplete types nor sizes that are not
5357 fixed. */
5358 if (!COMPLETE_TYPE_P (type)
5359 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5360 return -1;
5362 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5364 if (TREE_CODE (field) != FIELD_DECL)
5365 continue;
5367 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5368 if (sub_count < 0)
5369 return -1;
5370 count = count > sub_count ? count : sub_count;
5373 /* There must be no padding. */
5374 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5375 return -1;
5377 return count;
5380 default:
5381 break;
5384 return -1;
5387 /* Return true if PCS_VARIANT should use VFP registers. */
5388 static bool
5389 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5391 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5393 static bool seen_thumb1_vfp = false;
5395 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5397 sorry ("Thumb-1 hard-float VFP ABI");
5398 /* sorry() is not immediately fatal, so only display this once. */
5399 seen_thumb1_vfp = true;
5402 return true;
5405 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5406 return false;
5408 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5409 (TARGET_VFP_DOUBLE || !is_double));
5412 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5413 suitable for passing or returning in VFP registers for the PCS
5414 variant selected. If it is, then *BASE_MODE is updated to contain
5415 a machine mode describing each element of the argument's type and
5416 *COUNT to hold the number of such elements. */
5417 static bool
5418 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5419 machine_mode mode, const_tree type,
5420 machine_mode *base_mode, int *count)
5422 machine_mode new_mode = VOIDmode;
5424 /* If we have the type information, prefer that to working things
5425 out from the mode. */
5426 if (type)
5428 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5430 if (ag_count > 0 && ag_count <= 4)
5431 *count = ag_count;
5432 else
5433 return false;
5435 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5436 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5437 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5439 *count = 1;
5440 new_mode = mode;
5442 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5444 *count = 2;
5445 new_mode = (mode == DCmode ? DFmode : SFmode);
5447 else
5448 return false;
5451 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5452 return false;
5454 *base_mode = new_mode;
5455 return true;
5458 static bool
5459 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5460 machine_mode mode, const_tree type)
5462 int count ATTRIBUTE_UNUSED;
5463 machine_mode ag_mode ATTRIBUTE_UNUSED;
5465 if (!use_vfp_abi (pcs_variant, false))
5466 return false;
5467 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5468 &ag_mode, &count);
5471 static bool
5472 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5473 const_tree type)
5475 if (!use_vfp_abi (pcum->pcs_variant, false))
5476 return false;
5478 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5479 &pcum->aapcs_vfp_rmode,
5480 &pcum->aapcs_vfp_rcount);
5483 static bool
5484 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5485 const_tree type ATTRIBUTE_UNUSED)
5487 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5488 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5489 int regno;
5491 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5492 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5494 pcum->aapcs_vfp_reg_alloc = mask << regno;
5495 if (mode == BLKmode
5496 || (mode == TImode && ! TARGET_NEON)
5497 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5499 int i;
5500 int rcount = pcum->aapcs_vfp_rcount;
5501 int rshift = shift;
5502 machine_mode rmode = pcum->aapcs_vfp_rmode;
5503 rtx par;
5504 if (!TARGET_NEON)
5506 /* Avoid using unsupported vector modes. */
5507 if (rmode == V2SImode)
5508 rmode = DImode;
5509 else if (rmode == V4SImode)
5511 rmode = DImode;
5512 rcount *= 2;
5513 rshift /= 2;
5516 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5517 for (i = 0; i < rcount; i++)
5519 rtx tmp = gen_rtx_REG (rmode,
5520 FIRST_VFP_REGNUM + regno + i * rshift);
5521 tmp = gen_rtx_EXPR_LIST
5522 (VOIDmode, tmp,
5523 GEN_INT (i * GET_MODE_SIZE (rmode)));
5524 XVECEXP (par, 0, i) = tmp;
5527 pcum->aapcs_reg = par;
5529 else
5530 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5531 return true;
5533 return false;
5536 static rtx
5537 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5538 machine_mode mode,
5539 const_tree type ATTRIBUTE_UNUSED)
5541 if (!use_vfp_abi (pcs_variant, false))
5542 return NULL;
5544 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5546 int count;
5547 machine_mode ag_mode;
5548 int i;
5549 rtx par;
5550 int shift;
5552 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5553 &ag_mode, &count);
5555 if (!TARGET_NEON)
5557 if (ag_mode == V2SImode)
5558 ag_mode = DImode;
5559 else if (ag_mode == V4SImode)
5561 ag_mode = DImode;
5562 count *= 2;
5565 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5566 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5567 for (i = 0; i < count; i++)
5569 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5570 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5571 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5572 XVECEXP (par, 0, i) = tmp;
5575 return par;
5578 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5581 static void
5582 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5583 machine_mode mode ATTRIBUTE_UNUSED,
5584 const_tree type ATTRIBUTE_UNUSED)
5586 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5587 pcum->aapcs_vfp_reg_alloc = 0;
5588 return;
5591 #define AAPCS_CP(X) \
5593 aapcs_ ## X ## _cum_init, \
5594 aapcs_ ## X ## _is_call_candidate, \
5595 aapcs_ ## X ## _allocate, \
5596 aapcs_ ## X ## _is_return_candidate, \
5597 aapcs_ ## X ## _allocate_return_reg, \
5598 aapcs_ ## X ## _advance \
5601 /* Table of co-processors that can be used to pass arguments in
5602 registers. Idealy no arugment should be a candidate for more than
5603 one co-processor table entry, but the table is processed in order
5604 and stops after the first match. If that entry then fails to put
5605 the argument into a co-processor register, the argument will go on
5606 the stack. */
5607 static struct
5609 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5610 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5612 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5613 BLKmode) is a candidate for this co-processor's registers; this
5614 function should ignore any position-dependent state in
5615 CUMULATIVE_ARGS and only use call-type dependent information. */
5616 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5618 /* Return true if the argument does get a co-processor register; it
5619 should set aapcs_reg to an RTX of the register allocated as is
5620 required for a return from FUNCTION_ARG. */
5621 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5623 /* Return true if a result of mode MODE (or type TYPE if MODE is
5624 BLKmode) is can be returned in this co-processor's registers. */
5625 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5627 /* Allocate and return an RTX element to hold the return type of a
5628 call, this routine must not fail and will only be called if
5629 is_return_candidate returned true with the same parameters. */
5630 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5632 /* Finish processing this argument and prepare to start processing
5633 the next one. */
5634 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5635 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5637 AAPCS_CP(vfp)
5640 #undef AAPCS_CP
5642 static int
5643 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5644 const_tree type)
5646 int i;
5648 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5649 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5650 return i;
5652 return -1;
5655 static int
5656 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5658 /* We aren't passed a decl, so we can't check that a call is local.
5659 However, it isn't clear that that would be a win anyway, since it
5660 might limit some tail-calling opportunities. */
5661 enum arm_pcs pcs_variant;
5663 if (fntype)
5665 const_tree fndecl = NULL_TREE;
5667 if (TREE_CODE (fntype) == FUNCTION_DECL)
5669 fndecl = fntype;
5670 fntype = TREE_TYPE (fntype);
5673 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5675 else
5676 pcs_variant = arm_pcs_default;
5678 if (pcs_variant != ARM_PCS_AAPCS)
5680 int i;
5682 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5683 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5684 TYPE_MODE (type),
5685 type))
5686 return i;
5688 return -1;
5691 static rtx
5692 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5693 const_tree fntype)
5695 /* We aren't passed a decl, so we can't check that a call is local.
5696 However, it isn't clear that that would be a win anyway, since it
5697 might limit some tail-calling opportunities. */
5698 enum arm_pcs pcs_variant;
5699 int unsignedp ATTRIBUTE_UNUSED;
5701 if (fntype)
5703 const_tree fndecl = NULL_TREE;
5705 if (TREE_CODE (fntype) == FUNCTION_DECL)
5707 fndecl = fntype;
5708 fntype = TREE_TYPE (fntype);
5711 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5713 else
5714 pcs_variant = arm_pcs_default;
5716 /* Promote integer types. */
5717 if (type && INTEGRAL_TYPE_P (type))
5718 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5720 if (pcs_variant != ARM_PCS_AAPCS)
5722 int i;
5724 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5725 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5726 type))
5727 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5728 mode, type);
5731 /* Promotes small structs returned in a register to full-word size
5732 for big-endian AAPCS. */
5733 if (type && arm_return_in_msb (type))
5735 HOST_WIDE_INT size = int_size_in_bytes (type);
5736 if (size % UNITS_PER_WORD != 0)
5738 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5739 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5743 return gen_rtx_REG (mode, R0_REGNUM);
5746 static rtx
5747 aapcs_libcall_value (machine_mode mode)
5749 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5750 && GET_MODE_SIZE (mode) <= 4)
5751 mode = SImode;
5753 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5756 /* Lay out a function argument using the AAPCS rules. The rule
5757 numbers referred to here are those in the AAPCS. */
5758 static void
5759 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5760 const_tree type, bool named)
5762 int nregs, nregs2;
5763 int ncrn;
5765 /* We only need to do this once per argument. */
5766 if (pcum->aapcs_arg_processed)
5767 return;
5769 pcum->aapcs_arg_processed = true;
5771 /* Special case: if named is false then we are handling an incoming
5772 anonymous argument which is on the stack. */
5773 if (!named)
5774 return;
5776 /* Is this a potential co-processor register candidate? */
5777 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5779 int slot = aapcs_select_call_coproc (pcum, mode, type);
5780 pcum->aapcs_cprc_slot = slot;
5782 /* We don't have to apply any of the rules from part B of the
5783 preparation phase, these are handled elsewhere in the
5784 compiler. */
5786 if (slot >= 0)
5788 /* A Co-processor register candidate goes either in its own
5789 class of registers or on the stack. */
5790 if (!pcum->aapcs_cprc_failed[slot])
5792 /* C1.cp - Try to allocate the argument to co-processor
5793 registers. */
5794 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5795 return;
5797 /* C2.cp - Put the argument on the stack and note that we
5798 can't assign any more candidates in this slot. We also
5799 need to note that we have allocated stack space, so that
5800 we won't later try to split a non-cprc candidate between
5801 core registers and the stack. */
5802 pcum->aapcs_cprc_failed[slot] = true;
5803 pcum->can_split = false;
5806 /* We didn't get a register, so this argument goes on the
5807 stack. */
5808 gcc_assert (pcum->can_split == false);
5809 return;
5813 /* C3 - For double-word aligned arguments, round the NCRN up to the
5814 next even number. */
5815 ncrn = pcum->aapcs_ncrn;
5816 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5817 ncrn++;
5819 nregs = ARM_NUM_REGS2(mode, type);
5821 /* Sigh, this test should really assert that nregs > 0, but a GCC
5822 extension allows empty structs and then gives them empty size; it
5823 then allows such a structure to be passed by value. For some of
5824 the code below we have to pretend that such an argument has
5825 non-zero size so that we 'locate' it correctly either in
5826 registers or on the stack. */
5827 gcc_assert (nregs >= 0);
5829 nregs2 = nregs ? nregs : 1;
5831 /* C4 - Argument fits entirely in core registers. */
5832 if (ncrn + nregs2 <= NUM_ARG_REGS)
5834 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5835 pcum->aapcs_next_ncrn = ncrn + nregs;
5836 return;
5839 /* C5 - Some core registers left and there are no arguments already
5840 on the stack: split this argument between the remaining core
5841 registers and the stack. */
5842 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5844 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5845 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5846 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5847 return;
5850 /* C6 - NCRN is set to 4. */
5851 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5853 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5854 return;
5857 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5858 for a call to a function whose data type is FNTYPE.
5859 For a library call, FNTYPE is NULL. */
5860 void
5861 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5862 rtx libname,
5863 tree fndecl ATTRIBUTE_UNUSED)
5865 /* Long call handling. */
5866 if (fntype)
5867 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5868 else
5869 pcum->pcs_variant = arm_pcs_default;
5871 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5873 if (arm_libcall_uses_aapcs_base (libname))
5874 pcum->pcs_variant = ARM_PCS_AAPCS;
5876 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5877 pcum->aapcs_reg = NULL_RTX;
5878 pcum->aapcs_partial = 0;
5879 pcum->aapcs_arg_processed = false;
5880 pcum->aapcs_cprc_slot = -1;
5881 pcum->can_split = true;
5883 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5885 int i;
5887 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5889 pcum->aapcs_cprc_failed[i] = false;
5890 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5893 return;
5896 /* Legacy ABIs */
5898 /* On the ARM, the offset starts at 0. */
5899 pcum->nregs = 0;
5900 pcum->iwmmxt_nregs = 0;
5901 pcum->can_split = true;
5903 /* Varargs vectors are treated the same as long long.
5904 named_count avoids having to change the way arm handles 'named' */
5905 pcum->named_count = 0;
5906 pcum->nargs = 0;
5908 if (TARGET_REALLY_IWMMXT && fntype)
5910 tree fn_arg;
5912 for (fn_arg = TYPE_ARG_TYPES (fntype);
5913 fn_arg;
5914 fn_arg = TREE_CHAIN (fn_arg))
5915 pcum->named_count += 1;
5917 if (! pcum->named_count)
5918 pcum->named_count = INT_MAX;
5922 /* Return true if we use LRA instead of reload pass. */
5923 static bool
5924 arm_lra_p (void)
5926 return arm_lra_flag;
5929 /* Return true if mode/type need doubleword alignment. */
5930 static bool
5931 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5933 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5934 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5938 /* Determine where to put an argument to a function.
5939 Value is zero to push the argument on the stack,
5940 or a hard register in which to store the argument.
5942 MODE is the argument's machine mode.
5943 TYPE is the data type of the argument (as a tree).
5944 This is null for libcalls where that information may
5945 not be available.
5946 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5947 the preceding args and about the function being called.
5948 NAMED is nonzero if this argument is a named parameter
5949 (otherwise it is an extra parameter matching an ellipsis).
5951 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5952 other arguments are passed on the stack. If (NAMED == 0) (which happens
5953 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5954 defined), say it is passed in the stack (function_prologue will
5955 indeed make it pass in the stack if necessary). */
5957 static rtx
5958 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5959 const_tree type, bool named)
5961 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5962 int nregs;
5964 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5965 a call insn (op3 of a call_value insn). */
5966 if (mode == VOIDmode)
5967 return const0_rtx;
5969 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5971 aapcs_layout_arg (pcum, mode, type, named);
5972 return pcum->aapcs_reg;
5975 /* Varargs vectors are treated the same as long long.
5976 named_count avoids having to change the way arm handles 'named' */
5977 if (TARGET_IWMMXT_ABI
5978 && arm_vector_mode_supported_p (mode)
5979 && pcum->named_count > pcum->nargs + 1)
5981 if (pcum->iwmmxt_nregs <= 9)
5982 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5983 else
5985 pcum->can_split = false;
5986 return NULL_RTX;
5990 /* Put doubleword aligned quantities in even register pairs. */
5991 if (pcum->nregs & 1
5992 && ARM_DOUBLEWORD_ALIGN
5993 && arm_needs_doubleword_align (mode, type))
5994 pcum->nregs++;
5996 /* Only allow splitting an arg between regs and memory if all preceding
5997 args were allocated to regs. For args passed by reference we only count
5998 the reference pointer. */
5999 if (pcum->can_split)
6000 nregs = 1;
6001 else
6002 nregs = ARM_NUM_REGS2 (mode, type);
6004 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6005 return NULL_RTX;
6007 return gen_rtx_REG (mode, pcum->nregs);
6010 static unsigned int
6011 arm_function_arg_boundary (machine_mode mode, const_tree type)
6013 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6014 ? DOUBLEWORD_ALIGNMENT
6015 : PARM_BOUNDARY);
6018 static int
6019 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6020 tree type, bool named)
6022 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6023 int nregs = pcum->nregs;
6025 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6027 aapcs_layout_arg (pcum, mode, type, named);
6028 return pcum->aapcs_partial;
6031 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6032 return 0;
6034 if (NUM_ARG_REGS > nregs
6035 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6036 && pcum->can_split)
6037 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6039 return 0;
6042 /* Update the data in PCUM to advance over an argument
6043 of mode MODE and data type TYPE.
6044 (TYPE is null for libcalls where that information may not be available.) */
6046 static void
6047 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6048 const_tree type, bool named)
6050 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6052 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6054 aapcs_layout_arg (pcum, mode, type, named);
6056 if (pcum->aapcs_cprc_slot >= 0)
6058 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6059 type);
6060 pcum->aapcs_cprc_slot = -1;
6063 /* Generic stuff. */
6064 pcum->aapcs_arg_processed = false;
6065 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6066 pcum->aapcs_reg = NULL_RTX;
6067 pcum->aapcs_partial = 0;
6069 else
6071 pcum->nargs += 1;
6072 if (arm_vector_mode_supported_p (mode)
6073 && pcum->named_count > pcum->nargs
6074 && TARGET_IWMMXT_ABI)
6075 pcum->iwmmxt_nregs += 1;
6076 else
6077 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6081 /* Variable sized types are passed by reference. This is a GCC
6082 extension to the ARM ABI. */
6084 static bool
6085 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6086 machine_mode mode ATTRIBUTE_UNUSED,
6087 const_tree type, bool named ATTRIBUTE_UNUSED)
6089 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6092 /* Encode the current state of the #pragma [no_]long_calls. */
6093 typedef enum
6095 OFF, /* No #pragma [no_]long_calls is in effect. */
6096 LONG, /* #pragma long_calls is in effect. */
6097 SHORT /* #pragma no_long_calls is in effect. */
6098 } arm_pragma_enum;
6100 static arm_pragma_enum arm_pragma_long_calls = OFF;
6102 void
6103 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6105 arm_pragma_long_calls = LONG;
6108 void
6109 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6111 arm_pragma_long_calls = SHORT;
6114 void
6115 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6117 arm_pragma_long_calls = OFF;
6120 /* Handle an attribute requiring a FUNCTION_DECL;
6121 arguments as in struct attribute_spec.handler. */
6122 static tree
6123 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6124 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6126 if (TREE_CODE (*node) != FUNCTION_DECL)
6128 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6129 name);
6130 *no_add_attrs = true;
6133 return NULL_TREE;
6136 /* Handle an "interrupt" or "isr" attribute;
6137 arguments as in struct attribute_spec.handler. */
6138 static tree
6139 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6140 bool *no_add_attrs)
6142 if (DECL_P (*node))
6144 if (TREE_CODE (*node) != FUNCTION_DECL)
6146 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6147 name);
6148 *no_add_attrs = true;
6150 /* FIXME: the argument if any is checked for type attributes;
6151 should it be checked for decl ones? */
6153 else
6155 if (TREE_CODE (*node) == FUNCTION_TYPE
6156 || TREE_CODE (*node) == METHOD_TYPE)
6158 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6160 warning (OPT_Wattributes, "%qE attribute ignored",
6161 name);
6162 *no_add_attrs = true;
6165 else if (TREE_CODE (*node) == POINTER_TYPE
6166 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6167 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6168 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6170 *node = build_variant_type_copy (*node);
6171 TREE_TYPE (*node) = build_type_attribute_variant
6172 (TREE_TYPE (*node),
6173 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6174 *no_add_attrs = true;
6176 else
6178 /* Possibly pass this attribute on from the type to a decl. */
6179 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6180 | (int) ATTR_FLAG_FUNCTION_NEXT
6181 | (int) ATTR_FLAG_ARRAY_NEXT))
6183 *no_add_attrs = true;
6184 return tree_cons (name, args, NULL_TREE);
6186 else
6188 warning (OPT_Wattributes, "%qE attribute ignored",
6189 name);
6194 return NULL_TREE;
6197 /* Handle a "pcs" attribute; arguments as in struct
6198 attribute_spec.handler. */
6199 static tree
6200 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6201 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6203 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6205 warning (OPT_Wattributes, "%qE attribute ignored", name);
6206 *no_add_attrs = true;
6208 return NULL_TREE;
6211 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6212 /* Handle the "notshared" attribute. This attribute is another way of
6213 requesting hidden visibility. ARM's compiler supports
6214 "__declspec(notshared)"; we support the same thing via an
6215 attribute. */
6217 static tree
6218 arm_handle_notshared_attribute (tree *node,
6219 tree name ATTRIBUTE_UNUSED,
6220 tree args ATTRIBUTE_UNUSED,
6221 int flags ATTRIBUTE_UNUSED,
6222 bool *no_add_attrs)
6224 tree decl = TYPE_NAME (*node);
6226 if (decl)
6228 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6229 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6230 *no_add_attrs = false;
6232 return NULL_TREE;
6234 #endif
6236 /* Return 0 if the attributes for two types are incompatible, 1 if they
6237 are compatible, and 2 if they are nearly compatible (which causes a
6238 warning to be generated). */
6239 static int
6240 arm_comp_type_attributes (const_tree type1, const_tree type2)
6242 int l1, l2, s1, s2;
6244 /* Check for mismatch of non-default calling convention. */
6245 if (TREE_CODE (type1) != FUNCTION_TYPE)
6246 return 1;
6248 /* Check for mismatched call attributes. */
6249 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6250 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6251 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6252 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6254 /* Only bother to check if an attribute is defined. */
6255 if (l1 | l2 | s1 | s2)
6257 /* If one type has an attribute, the other must have the same attribute. */
6258 if ((l1 != l2) || (s1 != s2))
6259 return 0;
6261 /* Disallow mixed attributes. */
6262 if ((l1 & s2) || (l2 & s1))
6263 return 0;
6266 /* Check for mismatched ISR attribute. */
6267 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6268 if (! l1)
6269 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6270 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6271 if (! l2)
6272 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6273 if (l1 != l2)
6274 return 0;
6276 return 1;
6279 /* Assigns default attributes to newly defined type. This is used to
6280 set short_call/long_call attributes for function types of
6281 functions defined inside corresponding #pragma scopes. */
6282 static void
6283 arm_set_default_type_attributes (tree type)
6285 /* Add __attribute__ ((long_call)) to all functions, when
6286 inside #pragma long_calls or __attribute__ ((short_call)),
6287 when inside #pragma no_long_calls. */
6288 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6290 tree type_attr_list, attr_name;
6291 type_attr_list = TYPE_ATTRIBUTES (type);
6293 if (arm_pragma_long_calls == LONG)
6294 attr_name = get_identifier ("long_call");
6295 else if (arm_pragma_long_calls == SHORT)
6296 attr_name = get_identifier ("short_call");
6297 else
6298 return;
6300 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6301 TYPE_ATTRIBUTES (type) = type_attr_list;
6305 /* Return true if DECL is known to be linked into section SECTION. */
6307 static bool
6308 arm_function_in_section_p (tree decl, section *section)
6310 /* We can only be certain about functions defined in the same
6311 compilation unit. */
6312 if (!TREE_STATIC (decl))
6313 return false;
6315 /* Make sure that SYMBOL always binds to the definition in this
6316 compilation unit. */
6317 if (!targetm.binds_local_p (decl))
6318 return false;
6320 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6321 if (!DECL_SECTION_NAME (decl))
6323 /* Make sure that we will not create a unique section for DECL. */
6324 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6325 return false;
6328 return function_section (decl) == section;
6331 /* Return nonzero if a 32-bit "long_call" should be generated for
6332 a call from the current function to DECL. We generate a long_call
6333 if the function:
6335 a. has an __attribute__((long call))
6336 or b. is within the scope of a #pragma long_calls
6337 or c. the -mlong-calls command line switch has been specified
6339 However we do not generate a long call if the function:
6341 d. has an __attribute__ ((short_call))
6342 or e. is inside the scope of a #pragma no_long_calls
6343 or f. is defined in the same section as the current function. */
6345 bool
6346 arm_is_long_call_p (tree decl)
6348 tree attrs;
6350 if (!decl)
6351 return TARGET_LONG_CALLS;
6353 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6354 if (lookup_attribute ("short_call", attrs))
6355 return false;
6357 /* For "f", be conservative, and only cater for cases in which the
6358 whole of the current function is placed in the same section. */
6359 if (!flag_reorder_blocks_and_partition
6360 && TREE_CODE (decl) == FUNCTION_DECL
6361 && arm_function_in_section_p (decl, current_function_section ()))
6362 return false;
6364 if (lookup_attribute ("long_call", attrs))
6365 return true;
6367 return TARGET_LONG_CALLS;
6370 /* Return nonzero if it is ok to make a tail-call to DECL. */
6371 static bool
6372 arm_function_ok_for_sibcall (tree decl, tree exp)
6374 unsigned long func_type;
6376 if (cfun->machine->sibcall_blocked)
6377 return false;
6379 /* Never tailcall something if we are generating code for Thumb-1. */
6380 if (TARGET_THUMB1)
6381 return false;
6383 /* The PIC register is live on entry to VxWorks PLT entries, so we
6384 must make the call before restoring the PIC register. */
6385 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6386 return false;
6388 /* If we are interworking and the function is not declared static
6389 then we can't tail-call it unless we know that it exists in this
6390 compilation unit (since it might be a Thumb routine). */
6391 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6392 && !TREE_ASM_WRITTEN (decl))
6393 return false;
6395 func_type = arm_current_func_type ();
6396 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6397 if (IS_INTERRUPT (func_type))
6398 return false;
6400 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6402 /* Check that the return value locations are the same. For
6403 example that we aren't returning a value from the sibling in
6404 a VFP register but then need to transfer it to a core
6405 register. */
6406 rtx a, b;
6408 a = arm_function_value (TREE_TYPE (exp), decl, false);
6409 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6410 cfun->decl, false);
6411 if (!rtx_equal_p (a, b))
6412 return false;
6415 /* Never tailcall if function may be called with a misaligned SP. */
6416 if (IS_STACKALIGN (func_type))
6417 return false;
6419 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6420 references should become a NOP. Don't convert such calls into
6421 sibling calls. */
6422 if (TARGET_AAPCS_BASED
6423 && arm_abi == ARM_ABI_AAPCS
6424 && decl
6425 && DECL_WEAK (decl))
6426 return false;
6428 /* Everything else is ok. */
6429 return true;
6433 /* Addressing mode support functions. */
6435 /* Return nonzero if X is a legitimate immediate operand when compiling
6436 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6438 legitimate_pic_operand_p (rtx x)
6440 if (GET_CODE (x) == SYMBOL_REF
6441 || (GET_CODE (x) == CONST
6442 && GET_CODE (XEXP (x, 0)) == PLUS
6443 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6444 return 0;
6446 return 1;
6449 /* Record that the current function needs a PIC register. Initialize
6450 cfun->machine->pic_reg if we have not already done so. */
6452 static void
6453 require_pic_register (void)
6455 /* A lot of the logic here is made obscure by the fact that this
6456 routine gets called as part of the rtx cost estimation process.
6457 We don't want those calls to affect any assumptions about the real
6458 function; and further, we can't call entry_of_function() until we
6459 start the real expansion process. */
6460 if (!crtl->uses_pic_offset_table)
6462 gcc_assert (can_create_pseudo_p ());
6463 if (arm_pic_register != INVALID_REGNUM
6464 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6466 if (!cfun->machine->pic_reg)
6467 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6469 /* Play games to avoid marking the function as needing pic
6470 if we are being called as part of the cost-estimation
6471 process. */
6472 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6473 crtl->uses_pic_offset_table = 1;
6475 else
6477 rtx_insn *seq, *insn;
6479 if (!cfun->machine->pic_reg)
6480 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6482 /* Play games to avoid marking the function as needing pic
6483 if we are being called as part of the cost-estimation
6484 process. */
6485 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6487 crtl->uses_pic_offset_table = 1;
6488 start_sequence ();
6490 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6491 && arm_pic_register > LAST_LO_REGNUM)
6492 emit_move_insn (cfun->machine->pic_reg,
6493 gen_rtx_REG (Pmode, arm_pic_register));
6494 else
6495 arm_load_pic_register (0UL);
6497 seq = get_insns ();
6498 end_sequence ();
6500 for (insn = seq; insn; insn = NEXT_INSN (insn))
6501 if (INSN_P (insn))
6502 INSN_LOCATION (insn) = prologue_location;
6504 /* We can be called during expansion of PHI nodes, where
6505 we can't yet emit instructions directly in the final
6506 insn stream. Queue the insns on the entry edge, they will
6507 be committed after everything else is expanded. */
6508 insert_insn_on_edge (seq,
6509 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6516 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6518 if (GET_CODE (orig) == SYMBOL_REF
6519 || GET_CODE (orig) == LABEL_REF)
6521 rtx insn;
6523 if (reg == 0)
6525 gcc_assert (can_create_pseudo_p ());
6526 reg = gen_reg_rtx (Pmode);
6529 /* VxWorks does not impose a fixed gap between segments; the run-time
6530 gap can be different from the object-file gap. We therefore can't
6531 use GOTOFF unless we are absolutely sure that the symbol is in the
6532 same segment as the GOT. Unfortunately, the flexibility of linker
6533 scripts means that we can't be sure of that in general, so assume
6534 that GOTOFF is never valid on VxWorks. */
6535 if ((GET_CODE (orig) == LABEL_REF
6536 || (GET_CODE (orig) == SYMBOL_REF &&
6537 SYMBOL_REF_LOCAL_P (orig)))
6538 && NEED_GOT_RELOC
6539 && arm_pic_data_is_text_relative)
6540 insn = arm_pic_static_addr (orig, reg);
6541 else
6543 rtx pat;
6544 rtx mem;
6546 /* If this function doesn't have a pic register, create one now. */
6547 require_pic_register ();
6549 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6551 /* Make the MEM as close to a constant as possible. */
6552 mem = SET_SRC (pat);
6553 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6554 MEM_READONLY_P (mem) = 1;
6555 MEM_NOTRAP_P (mem) = 1;
6557 insn = emit_insn (pat);
6560 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6561 by loop. */
6562 set_unique_reg_note (insn, REG_EQUAL, orig);
6564 return reg;
6566 else if (GET_CODE (orig) == CONST)
6568 rtx base, offset;
6570 if (GET_CODE (XEXP (orig, 0)) == PLUS
6571 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6572 return orig;
6574 /* Handle the case where we have: const (UNSPEC_TLS). */
6575 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6576 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6577 return orig;
6579 /* Handle the case where we have:
6580 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6581 CONST_INT. */
6582 if (GET_CODE (XEXP (orig, 0)) == PLUS
6583 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6584 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6586 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6587 return orig;
6590 if (reg == 0)
6592 gcc_assert (can_create_pseudo_p ());
6593 reg = gen_reg_rtx (Pmode);
6596 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6598 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6599 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6600 base == reg ? 0 : reg);
6602 if (CONST_INT_P (offset))
6604 /* The base register doesn't really matter, we only want to
6605 test the index for the appropriate mode. */
6606 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6608 gcc_assert (can_create_pseudo_p ());
6609 offset = force_reg (Pmode, offset);
6612 if (CONST_INT_P (offset))
6613 return plus_constant (Pmode, base, INTVAL (offset));
6616 if (GET_MODE_SIZE (mode) > 4
6617 && (GET_MODE_CLASS (mode) == MODE_INT
6618 || TARGET_SOFT_FLOAT))
6620 emit_insn (gen_addsi3 (reg, base, offset));
6621 return reg;
6624 return gen_rtx_PLUS (Pmode, base, offset);
6627 return orig;
6631 /* Find a spare register to use during the prolog of a function. */
6633 static int
6634 thumb_find_work_register (unsigned long pushed_regs_mask)
6636 int reg;
6638 /* Check the argument registers first as these are call-used. The
6639 register allocation order means that sometimes r3 might be used
6640 but earlier argument registers might not, so check them all. */
6641 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6642 if (!df_regs_ever_live_p (reg))
6643 return reg;
6645 /* Before going on to check the call-saved registers we can try a couple
6646 more ways of deducing that r3 is available. The first is when we are
6647 pushing anonymous arguments onto the stack and we have less than 4
6648 registers worth of fixed arguments(*). In this case r3 will be part of
6649 the variable argument list and so we can be sure that it will be
6650 pushed right at the start of the function. Hence it will be available
6651 for the rest of the prologue.
6652 (*): ie crtl->args.pretend_args_size is greater than 0. */
6653 if (cfun->machine->uses_anonymous_args
6654 && crtl->args.pretend_args_size > 0)
6655 return LAST_ARG_REGNUM;
6657 /* The other case is when we have fixed arguments but less than 4 registers
6658 worth. In this case r3 might be used in the body of the function, but
6659 it is not being used to convey an argument into the function. In theory
6660 we could just check crtl->args.size to see how many bytes are
6661 being passed in argument registers, but it seems that it is unreliable.
6662 Sometimes it will have the value 0 when in fact arguments are being
6663 passed. (See testcase execute/20021111-1.c for an example). So we also
6664 check the args_info.nregs field as well. The problem with this field is
6665 that it makes no allowances for arguments that are passed to the
6666 function but which are not used. Hence we could miss an opportunity
6667 when a function has an unused argument in r3. But it is better to be
6668 safe than to be sorry. */
6669 if (! cfun->machine->uses_anonymous_args
6670 && crtl->args.size >= 0
6671 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6672 && (TARGET_AAPCS_BASED
6673 ? crtl->args.info.aapcs_ncrn < 4
6674 : crtl->args.info.nregs < 4))
6675 return LAST_ARG_REGNUM;
6677 /* Otherwise look for a call-saved register that is going to be pushed. */
6678 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6679 if (pushed_regs_mask & (1 << reg))
6680 return reg;
6682 if (TARGET_THUMB2)
6684 /* Thumb-2 can use high regs. */
6685 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6686 if (pushed_regs_mask & (1 << reg))
6687 return reg;
6689 /* Something went wrong - thumb_compute_save_reg_mask()
6690 should have arranged for a suitable register to be pushed. */
6691 gcc_unreachable ();
6694 static GTY(()) int pic_labelno;
6696 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6697 low register. */
6699 void
6700 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6702 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6704 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6705 return;
6707 gcc_assert (flag_pic);
6709 pic_reg = cfun->machine->pic_reg;
6710 if (TARGET_VXWORKS_RTP)
6712 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6713 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6714 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6716 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6718 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6719 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6721 else
6723 /* We use an UNSPEC rather than a LABEL_REF because this label
6724 never appears in the code stream. */
6726 labelno = GEN_INT (pic_labelno++);
6727 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6728 l1 = gen_rtx_CONST (VOIDmode, l1);
6730 /* On the ARM the PC register contains 'dot + 8' at the time of the
6731 addition, on the Thumb it is 'dot + 4'. */
6732 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6733 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6734 UNSPEC_GOTSYM_OFF);
6735 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6737 if (TARGET_32BIT)
6739 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6741 else /* TARGET_THUMB1 */
6743 if (arm_pic_register != INVALID_REGNUM
6744 && REGNO (pic_reg) > LAST_LO_REGNUM)
6746 /* We will have pushed the pic register, so we should always be
6747 able to find a work register. */
6748 pic_tmp = gen_rtx_REG (SImode,
6749 thumb_find_work_register (saved_regs));
6750 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6751 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6752 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6754 else if (arm_pic_register != INVALID_REGNUM
6755 && arm_pic_register > LAST_LO_REGNUM
6756 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6758 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6759 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6760 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6762 else
6763 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6767 /* Need to emit this whether or not we obey regdecls,
6768 since setjmp/longjmp can cause life info to screw up. */
6769 emit_use (pic_reg);
6772 /* Generate code to load the address of a static var when flag_pic is set. */
6773 static rtx
6774 arm_pic_static_addr (rtx orig, rtx reg)
6776 rtx l1, labelno, offset_rtx, insn;
6778 gcc_assert (flag_pic);
6780 /* We use an UNSPEC rather than a LABEL_REF because this label
6781 never appears in the code stream. */
6782 labelno = GEN_INT (pic_labelno++);
6783 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6784 l1 = gen_rtx_CONST (VOIDmode, l1);
6786 /* On the ARM the PC register contains 'dot + 8' at the time of the
6787 addition, on the Thumb it is 'dot + 4'. */
6788 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6789 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6790 UNSPEC_SYMBOL_OFFSET);
6791 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6793 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6794 return insn;
6797 /* Return nonzero if X is valid as an ARM state addressing register. */
6798 static int
6799 arm_address_register_rtx_p (rtx x, int strict_p)
6801 int regno;
6803 if (!REG_P (x))
6804 return 0;
6806 regno = REGNO (x);
6808 if (strict_p)
6809 return ARM_REGNO_OK_FOR_BASE_P (regno);
6811 return (regno <= LAST_ARM_REGNUM
6812 || regno >= FIRST_PSEUDO_REGISTER
6813 || regno == FRAME_POINTER_REGNUM
6814 || regno == ARG_POINTER_REGNUM);
6817 /* Return TRUE if this rtx is the difference of a symbol and a label,
6818 and will reduce to a PC-relative relocation in the object file.
6819 Expressions like this can be left alone when generating PIC, rather
6820 than forced through the GOT. */
6821 static int
6822 pcrel_constant_p (rtx x)
6824 if (GET_CODE (x) == MINUS)
6825 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6827 return FALSE;
6830 /* Return true if X will surely end up in an index register after next
6831 splitting pass. */
6832 static bool
6833 will_be_in_index_register (const_rtx x)
6835 /* arm.md: calculate_pic_address will split this into a register. */
6836 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6839 /* Return nonzero if X is a valid ARM state address operand. */
6841 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6842 int strict_p)
6844 bool use_ldrd;
6845 enum rtx_code code = GET_CODE (x);
6847 if (arm_address_register_rtx_p (x, strict_p))
6848 return 1;
6850 use_ldrd = (TARGET_LDRD
6851 && (mode == DImode
6852 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6854 if (code == POST_INC || code == PRE_DEC
6855 || ((code == PRE_INC || code == POST_DEC)
6856 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6857 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6859 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6860 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6861 && GET_CODE (XEXP (x, 1)) == PLUS
6862 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6864 rtx addend = XEXP (XEXP (x, 1), 1);
6866 /* Don't allow ldrd post increment by register because it's hard
6867 to fixup invalid register choices. */
6868 if (use_ldrd
6869 && GET_CODE (x) == POST_MODIFY
6870 && REG_P (addend))
6871 return 0;
6873 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6874 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6877 /* After reload constants split into minipools will have addresses
6878 from a LABEL_REF. */
6879 else if (reload_completed
6880 && (code == LABEL_REF
6881 || (code == CONST
6882 && GET_CODE (XEXP (x, 0)) == PLUS
6883 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6884 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6885 return 1;
6887 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6888 return 0;
6890 else if (code == PLUS)
6892 rtx xop0 = XEXP (x, 0);
6893 rtx xop1 = XEXP (x, 1);
6895 return ((arm_address_register_rtx_p (xop0, strict_p)
6896 && ((CONST_INT_P (xop1)
6897 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6898 || (!strict_p && will_be_in_index_register (xop1))))
6899 || (arm_address_register_rtx_p (xop1, strict_p)
6900 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6903 #if 0
6904 /* Reload currently can't handle MINUS, so disable this for now */
6905 else if (GET_CODE (x) == MINUS)
6907 rtx xop0 = XEXP (x, 0);
6908 rtx xop1 = XEXP (x, 1);
6910 return (arm_address_register_rtx_p (xop0, strict_p)
6911 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6913 #endif
6915 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6916 && code == SYMBOL_REF
6917 && CONSTANT_POOL_ADDRESS_P (x)
6918 && ! (flag_pic
6919 && symbol_mentioned_p (get_pool_constant (x))
6920 && ! pcrel_constant_p (get_pool_constant (x))))
6921 return 1;
6923 return 0;
6926 /* Return nonzero if X is a valid Thumb-2 address operand. */
6927 static int
6928 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6930 bool use_ldrd;
6931 enum rtx_code code = GET_CODE (x);
6933 if (arm_address_register_rtx_p (x, strict_p))
6934 return 1;
6936 use_ldrd = (TARGET_LDRD
6937 && (mode == DImode
6938 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6940 if (code == POST_INC || code == PRE_DEC
6941 || ((code == PRE_INC || code == POST_DEC)
6942 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6943 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6945 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6946 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6947 && GET_CODE (XEXP (x, 1)) == PLUS
6948 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6950 /* Thumb-2 only has autoincrement by constant. */
6951 rtx addend = XEXP (XEXP (x, 1), 1);
6952 HOST_WIDE_INT offset;
6954 if (!CONST_INT_P (addend))
6955 return 0;
6957 offset = INTVAL(addend);
6958 if (GET_MODE_SIZE (mode) <= 4)
6959 return (offset > -256 && offset < 256);
6961 return (use_ldrd && offset > -1024 && offset < 1024
6962 && (offset & 3) == 0);
6965 /* After reload constants split into minipools will have addresses
6966 from a LABEL_REF. */
6967 else if (reload_completed
6968 && (code == LABEL_REF
6969 || (code == CONST
6970 && GET_CODE (XEXP (x, 0)) == PLUS
6971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6972 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6973 return 1;
6975 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6976 return 0;
6978 else if (code == PLUS)
6980 rtx xop0 = XEXP (x, 0);
6981 rtx xop1 = XEXP (x, 1);
6983 return ((arm_address_register_rtx_p (xop0, strict_p)
6984 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6985 || (!strict_p && will_be_in_index_register (xop1))))
6986 || (arm_address_register_rtx_p (xop1, strict_p)
6987 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6990 /* Normally we can assign constant values to target registers without
6991 the help of constant pool. But there are cases we have to use constant
6992 pool like:
6993 1) assign a label to register.
6994 2) sign-extend a 8bit value to 32bit and then assign to register.
6996 Constant pool access in format:
6997 (set (reg r0) (mem (symbol_ref (".LC0"))))
6998 will cause the use of literal pool (later in function arm_reorg).
6999 So here we mark such format as an invalid format, then the compiler
7000 will adjust it into:
7001 (set (reg r0) (symbol_ref (".LC0")))
7002 (set (reg r0) (mem (reg r0))).
7003 No extra register is required, and (mem (reg r0)) won't cause the use
7004 of literal pools. */
7005 else if (arm_disable_literal_pool && code == SYMBOL_REF
7006 && CONSTANT_POOL_ADDRESS_P (x))
7007 return 0;
7009 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7010 && code == SYMBOL_REF
7011 && CONSTANT_POOL_ADDRESS_P (x)
7012 && ! (flag_pic
7013 && symbol_mentioned_p (get_pool_constant (x))
7014 && ! pcrel_constant_p (get_pool_constant (x))))
7015 return 1;
7017 return 0;
7020 /* Return nonzero if INDEX is valid for an address index operand in
7021 ARM state. */
7022 static int
7023 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7024 int strict_p)
7026 HOST_WIDE_INT range;
7027 enum rtx_code code = GET_CODE (index);
7029 /* Standard coprocessor addressing modes. */
7030 if (TARGET_HARD_FLOAT
7031 && TARGET_VFP
7032 && (mode == SFmode || mode == DFmode))
7033 return (code == CONST_INT && INTVAL (index) < 1024
7034 && INTVAL (index) > -1024
7035 && (INTVAL (index) & 3) == 0);
7037 /* For quad modes, we restrict the constant offset to be slightly less
7038 than what the instruction format permits. We do this because for
7039 quad mode moves, we will actually decompose them into two separate
7040 double-mode reads or writes. INDEX must therefore be a valid
7041 (double-mode) offset and so should INDEX+8. */
7042 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7043 return (code == CONST_INT
7044 && INTVAL (index) < 1016
7045 && INTVAL (index) > -1024
7046 && (INTVAL (index) & 3) == 0);
7048 /* We have no such constraint on double mode offsets, so we permit the
7049 full range of the instruction format. */
7050 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7051 return (code == CONST_INT
7052 && INTVAL (index) < 1024
7053 && INTVAL (index) > -1024
7054 && (INTVAL (index) & 3) == 0);
7056 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7057 return (code == CONST_INT
7058 && INTVAL (index) < 1024
7059 && INTVAL (index) > -1024
7060 && (INTVAL (index) & 3) == 0);
7062 if (arm_address_register_rtx_p (index, strict_p)
7063 && (GET_MODE_SIZE (mode) <= 4))
7064 return 1;
7066 if (mode == DImode || mode == DFmode)
7068 if (code == CONST_INT)
7070 HOST_WIDE_INT val = INTVAL (index);
7072 if (TARGET_LDRD)
7073 return val > -256 && val < 256;
7074 else
7075 return val > -4096 && val < 4092;
7078 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7081 if (GET_MODE_SIZE (mode) <= 4
7082 && ! (arm_arch4
7083 && (mode == HImode
7084 || mode == HFmode
7085 || (mode == QImode && outer == SIGN_EXTEND))))
7087 if (code == MULT)
7089 rtx xiop0 = XEXP (index, 0);
7090 rtx xiop1 = XEXP (index, 1);
7092 return ((arm_address_register_rtx_p (xiop0, strict_p)
7093 && power_of_two_operand (xiop1, SImode))
7094 || (arm_address_register_rtx_p (xiop1, strict_p)
7095 && power_of_two_operand (xiop0, SImode)));
7097 else if (code == LSHIFTRT || code == ASHIFTRT
7098 || code == ASHIFT || code == ROTATERT)
7100 rtx op = XEXP (index, 1);
7102 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7103 && CONST_INT_P (op)
7104 && INTVAL (op) > 0
7105 && INTVAL (op) <= 31);
7109 /* For ARM v4 we may be doing a sign-extend operation during the
7110 load. */
7111 if (arm_arch4)
7113 if (mode == HImode
7114 || mode == HFmode
7115 || (outer == SIGN_EXTEND && mode == QImode))
7116 range = 256;
7117 else
7118 range = 4096;
7120 else
7121 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7123 return (code == CONST_INT
7124 && INTVAL (index) < range
7125 && INTVAL (index) > -range);
7128 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7129 index operand. i.e. 1, 2, 4 or 8. */
7130 static bool
7131 thumb2_index_mul_operand (rtx op)
7133 HOST_WIDE_INT val;
7135 if (!CONST_INT_P (op))
7136 return false;
7138 val = INTVAL(op);
7139 return (val == 1 || val == 2 || val == 4 || val == 8);
7142 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7143 static int
7144 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7146 enum rtx_code code = GET_CODE (index);
7148 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7149 /* Standard coprocessor addressing modes. */
7150 if (TARGET_HARD_FLOAT
7151 && TARGET_VFP
7152 && (mode == SFmode || mode == DFmode))
7153 return (code == CONST_INT && INTVAL (index) < 1024
7154 /* Thumb-2 allows only > -256 index range for it's core register
7155 load/stores. Since we allow SF/DF in core registers, we have
7156 to use the intersection between -256~4096 (core) and -1024~1024
7157 (coprocessor). */
7158 && INTVAL (index) > -256
7159 && (INTVAL (index) & 3) == 0);
7161 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7163 /* For DImode assume values will usually live in core regs
7164 and only allow LDRD addressing modes. */
7165 if (!TARGET_LDRD || mode != DImode)
7166 return (code == CONST_INT
7167 && INTVAL (index) < 1024
7168 && INTVAL (index) > -1024
7169 && (INTVAL (index) & 3) == 0);
7172 /* For quad modes, we restrict the constant offset to be slightly less
7173 than what the instruction format permits. We do this because for
7174 quad mode moves, we will actually decompose them into two separate
7175 double-mode reads or writes. INDEX must therefore be a valid
7176 (double-mode) offset and so should INDEX+8. */
7177 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7178 return (code == CONST_INT
7179 && INTVAL (index) < 1016
7180 && INTVAL (index) > -1024
7181 && (INTVAL (index) & 3) == 0);
7183 /* We have no such constraint on double mode offsets, so we permit the
7184 full range of the instruction format. */
7185 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7186 return (code == CONST_INT
7187 && INTVAL (index) < 1024
7188 && INTVAL (index) > -1024
7189 && (INTVAL (index) & 3) == 0);
7191 if (arm_address_register_rtx_p (index, strict_p)
7192 && (GET_MODE_SIZE (mode) <= 4))
7193 return 1;
7195 if (mode == DImode || mode == DFmode)
7197 if (code == CONST_INT)
7199 HOST_WIDE_INT val = INTVAL (index);
7200 /* ??? Can we assume ldrd for thumb2? */
7201 /* Thumb-2 ldrd only has reg+const addressing modes. */
7202 /* ldrd supports offsets of +-1020.
7203 However the ldr fallback does not. */
7204 return val > -256 && val < 256 && (val & 3) == 0;
7206 else
7207 return 0;
7210 if (code == MULT)
7212 rtx xiop0 = XEXP (index, 0);
7213 rtx xiop1 = XEXP (index, 1);
7215 return ((arm_address_register_rtx_p (xiop0, strict_p)
7216 && thumb2_index_mul_operand (xiop1))
7217 || (arm_address_register_rtx_p (xiop1, strict_p)
7218 && thumb2_index_mul_operand (xiop0)));
7220 else if (code == ASHIFT)
7222 rtx op = XEXP (index, 1);
7224 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7225 && CONST_INT_P (op)
7226 && INTVAL (op) > 0
7227 && INTVAL (op) <= 3);
7230 return (code == CONST_INT
7231 && INTVAL (index) < 4096
7232 && INTVAL (index) > -256);
7235 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7236 static int
7237 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7239 int regno;
7241 if (!REG_P (x))
7242 return 0;
7244 regno = REGNO (x);
7246 if (strict_p)
7247 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7249 return (regno <= LAST_LO_REGNUM
7250 || regno > LAST_VIRTUAL_REGISTER
7251 || regno == FRAME_POINTER_REGNUM
7252 || (GET_MODE_SIZE (mode) >= 4
7253 && (regno == STACK_POINTER_REGNUM
7254 || regno >= FIRST_PSEUDO_REGISTER
7255 || x == hard_frame_pointer_rtx
7256 || x == arg_pointer_rtx)));
7259 /* Return nonzero if x is a legitimate index register. This is the case
7260 for any base register that can access a QImode object. */
7261 inline static int
7262 thumb1_index_register_rtx_p (rtx x, int strict_p)
7264 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7267 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7269 The AP may be eliminated to either the SP or the FP, so we use the
7270 least common denominator, e.g. SImode, and offsets from 0 to 64.
7272 ??? Verify whether the above is the right approach.
7274 ??? Also, the FP may be eliminated to the SP, so perhaps that
7275 needs special handling also.
7277 ??? Look at how the mips16 port solves this problem. It probably uses
7278 better ways to solve some of these problems.
7280 Although it is not incorrect, we don't accept QImode and HImode
7281 addresses based on the frame pointer or arg pointer until the
7282 reload pass starts. This is so that eliminating such addresses
7283 into stack based ones won't produce impossible code. */
7285 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7287 /* ??? Not clear if this is right. Experiment. */
7288 if (GET_MODE_SIZE (mode) < 4
7289 && !(reload_in_progress || reload_completed)
7290 && (reg_mentioned_p (frame_pointer_rtx, x)
7291 || reg_mentioned_p (arg_pointer_rtx, x)
7292 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7293 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7294 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7295 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7296 return 0;
7298 /* Accept any base register. SP only in SImode or larger. */
7299 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7300 return 1;
7302 /* This is PC relative data before arm_reorg runs. */
7303 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7304 && GET_CODE (x) == SYMBOL_REF
7305 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7306 return 1;
7308 /* This is PC relative data after arm_reorg runs. */
7309 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7310 && reload_completed
7311 && (GET_CODE (x) == LABEL_REF
7312 || (GET_CODE (x) == CONST
7313 && GET_CODE (XEXP (x, 0)) == PLUS
7314 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7315 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7316 return 1;
7318 /* Post-inc indexing only supported for SImode and larger. */
7319 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7320 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7321 return 1;
7323 else if (GET_CODE (x) == PLUS)
7325 /* REG+REG address can be any two index registers. */
7326 /* We disallow FRAME+REG addressing since we know that FRAME
7327 will be replaced with STACK, and SP relative addressing only
7328 permits SP+OFFSET. */
7329 if (GET_MODE_SIZE (mode) <= 4
7330 && XEXP (x, 0) != frame_pointer_rtx
7331 && XEXP (x, 1) != frame_pointer_rtx
7332 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7333 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7334 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7335 return 1;
7337 /* REG+const has 5-7 bit offset for non-SP registers. */
7338 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7339 || XEXP (x, 0) == arg_pointer_rtx)
7340 && CONST_INT_P (XEXP (x, 1))
7341 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7342 return 1;
7344 /* REG+const has 10-bit offset for SP, but only SImode and
7345 larger is supported. */
7346 /* ??? Should probably check for DI/DFmode overflow here
7347 just like GO_IF_LEGITIMATE_OFFSET does. */
7348 else if (REG_P (XEXP (x, 0))
7349 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7350 && GET_MODE_SIZE (mode) >= 4
7351 && CONST_INT_P (XEXP (x, 1))
7352 && INTVAL (XEXP (x, 1)) >= 0
7353 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7354 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7355 return 1;
7357 else if (REG_P (XEXP (x, 0))
7358 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7359 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7360 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7361 && REGNO (XEXP (x, 0))
7362 <= LAST_VIRTUAL_POINTER_REGISTER))
7363 && GET_MODE_SIZE (mode) >= 4
7364 && CONST_INT_P (XEXP (x, 1))
7365 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7366 return 1;
7369 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7370 && GET_MODE_SIZE (mode) == 4
7371 && GET_CODE (x) == SYMBOL_REF
7372 && CONSTANT_POOL_ADDRESS_P (x)
7373 && ! (flag_pic
7374 && symbol_mentioned_p (get_pool_constant (x))
7375 && ! pcrel_constant_p (get_pool_constant (x))))
7376 return 1;
7378 return 0;
7381 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7382 instruction of mode MODE. */
7384 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7386 switch (GET_MODE_SIZE (mode))
7388 case 1:
7389 return val >= 0 && val < 32;
7391 case 2:
7392 return val >= 0 && val < 64 && (val & 1) == 0;
7394 default:
7395 return (val >= 0
7396 && (val + GET_MODE_SIZE (mode)) <= 128
7397 && (val & 3) == 0);
7401 bool
7402 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7404 if (TARGET_ARM)
7405 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7406 else if (TARGET_THUMB2)
7407 return thumb2_legitimate_address_p (mode, x, strict_p);
7408 else /* if (TARGET_THUMB1) */
7409 return thumb1_legitimate_address_p (mode, x, strict_p);
7412 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7414 Given an rtx X being reloaded into a reg required to be
7415 in class CLASS, return the class of reg to actually use.
7416 In general this is just CLASS, but for the Thumb core registers and
7417 immediate constants we prefer a LO_REGS class or a subset. */
7419 static reg_class_t
7420 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7422 if (TARGET_32BIT)
7423 return rclass;
7424 else
7426 if (rclass == GENERAL_REGS)
7427 return LO_REGS;
7428 else
7429 return rclass;
7433 /* Build the SYMBOL_REF for __tls_get_addr. */
7435 static GTY(()) rtx tls_get_addr_libfunc;
7437 static rtx
7438 get_tls_get_addr (void)
7440 if (!tls_get_addr_libfunc)
7441 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7442 return tls_get_addr_libfunc;
7446 arm_load_tp (rtx target)
7448 if (!target)
7449 target = gen_reg_rtx (SImode);
7451 if (TARGET_HARD_TP)
7453 /* Can return in any reg. */
7454 emit_insn (gen_load_tp_hard (target));
7456 else
7458 /* Always returned in r0. Immediately copy the result into a pseudo,
7459 otherwise other uses of r0 (e.g. setting up function arguments) may
7460 clobber the value. */
7462 rtx tmp;
7464 emit_insn (gen_load_tp_soft ());
7466 tmp = gen_rtx_REG (SImode, 0);
7467 emit_move_insn (target, tmp);
7469 return target;
7472 static rtx
7473 load_tls_operand (rtx x, rtx reg)
7475 rtx tmp;
7477 if (reg == NULL_RTX)
7478 reg = gen_reg_rtx (SImode);
7480 tmp = gen_rtx_CONST (SImode, x);
7482 emit_move_insn (reg, tmp);
7484 return reg;
7487 static rtx
7488 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7490 rtx insns, label, labelno, sum;
7492 gcc_assert (reloc != TLS_DESCSEQ);
7493 start_sequence ();
7495 labelno = GEN_INT (pic_labelno++);
7496 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7497 label = gen_rtx_CONST (VOIDmode, label);
7499 sum = gen_rtx_UNSPEC (Pmode,
7500 gen_rtvec (4, x, GEN_INT (reloc), label,
7501 GEN_INT (TARGET_ARM ? 8 : 4)),
7502 UNSPEC_TLS);
7503 reg = load_tls_operand (sum, reg);
7505 if (TARGET_ARM)
7506 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7507 else
7508 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7510 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7511 LCT_PURE, /* LCT_CONST? */
7512 Pmode, 1, reg, Pmode);
7514 insns = get_insns ();
7515 end_sequence ();
7517 return insns;
7520 static rtx
7521 arm_tls_descseq_addr (rtx x, rtx reg)
7523 rtx labelno = GEN_INT (pic_labelno++);
7524 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7525 rtx sum = gen_rtx_UNSPEC (Pmode,
7526 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7527 gen_rtx_CONST (VOIDmode, label),
7528 GEN_INT (!TARGET_ARM)),
7529 UNSPEC_TLS);
7530 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7532 emit_insn (gen_tlscall (x, labelno));
7533 if (!reg)
7534 reg = gen_reg_rtx (SImode);
7535 else
7536 gcc_assert (REGNO (reg) != 0);
7538 emit_move_insn (reg, reg0);
7540 return reg;
7544 legitimize_tls_address (rtx x, rtx reg)
7546 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7547 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7549 switch (model)
7551 case TLS_MODEL_GLOBAL_DYNAMIC:
7552 if (TARGET_GNU2_TLS)
7554 reg = arm_tls_descseq_addr (x, reg);
7556 tp = arm_load_tp (NULL_RTX);
7558 dest = gen_rtx_PLUS (Pmode, tp, reg);
7560 else
7562 /* Original scheme */
7563 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7564 dest = gen_reg_rtx (Pmode);
7565 emit_libcall_block (insns, dest, ret, x);
7567 return dest;
7569 case TLS_MODEL_LOCAL_DYNAMIC:
7570 if (TARGET_GNU2_TLS)
7572 reg = arm_tls_descseq_addr (x, reg);
7574 tp = arm_load_tp (NULL_RTX);
7576 dest = gen_rtx_PLUS (Pmode, tp, reg);
7578 else
7580 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7582 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7583 share the LDM result with other LD model accesses. */
7584 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7585 UNSPEC_TLS);
7586 dest = gen_reg_rtx (Pmode);
7587 emit_libcall_block (insns, dest, ret, eqv);
7589 /* Load the addend. */
7590 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7591 GEN_INT (TLS_LDO32)),
7592 UNSPEC_TLS);
7593 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7594 dest = gen_rtx_PLUS (Pmode, dest, addend);
7596 return dest;
7598 case TLS_MODEL_INITIAL_EXEC:
7599 labelno = GEN_INT (pic_labelno++);
7600 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7601 label = gen_rtx_CONST (VOIDmode, label);
7602 sum = gen_rtx_UNSPEC (Pmode,
7603 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7604 GEN_INT (TARGET_ARM ? 8 : 4)),
7605 UNSPEC_TLS);
7606 reg = load_tls_operand (sum, reg);
7608 if (TARGET_ARM)
7609 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7610 else if (TARGET_THUMB2)
7611 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7612 else
7614 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7615 emit_move_insn (reg, gen_const_mem (SImode, reg));
7618 tp = arm_load_tp (NULL_RTX);
7620 return gen_rtx_PLUS (Pmode, tp, reg);
7622 case TLS_MODEL_LOCAL_EXEC:
7623 tp = arm_load_tp (NULL_RTX);
7625 reg = gen_rtx_UNSPEC (Pmode,
7626 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7627 UNSPEC_TLS);
7628 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7630 return gen_rtx_PLUS (Pmode, tp, reg);
7632 default:
7633 abort ();
7637 /* Try machine-dependent ways of modifying an illegitimate address
7638 to be legitimate. If we find one, return the new, valid address. */
7640 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7642 if (arm_tls_referenced_p (x))
7644 rtx addend = NULL;
7646 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7648 addend = XEXP (XEXP (x, 0), 1);
7649 x = XEXP (XEXP (x, 0), 0);
7652 if (GET_CODE (x) != SYMBOL_REF)
7653 return x;
7655 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7657 x = legitimize_tls_address (x, NULL_RTX);
7659 if (addend)
7661 x = gen_rtx_PLUS (SImode, x, addend);
7662 orig_x = x;
7664 else
7665 return x;
7668 if (!TARGET_ARM)
7670 /* TODO: legitimize_address for Thumb2. */
7671 if (TARGET_THUMB2)
7672 return x;
7673 return thumb_legitimize_address (x, orig_x, mode);
7676 if (GET_CODE (x) == PLUS)
7678 rtx xop0 = XEXP (x, 0);
7679 rtx xop1 = XEXP (x, 1);
7681 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7682 xop0 = force_reg (SImode, xop0);
7684 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7685 && !symbol_mentioned_p (xop1))
7686 xop1 = force_reg (SImode, xop1);
7688 if (ARM_BASE_REGISTER_RTX_P (xop0)
7689 && CONST_INT_P (xop1))
7691 HOST_WIDE_INT n, low_n;
7692 rtx base_reg, val;
7693 n = INTVAL (xop1);
7695 /* VFP addressing modes actually allow greater offsets, but for
7696 now we just stick with the lowest common denominator. */
7697 if (mode == DImode
7698 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7700 low_n = n & 0x0f;
7701 n &= ~0x0f;
7702 if (low_n > 4)
7704 n += 16;
7705 low_n -= 16;
7708 else
7710 low_n = ((mode) == TImode ? 0
7711 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7712 n -= low_n;
7715 base_reg = gen_reg_rtx (SImode);
7716 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7717 emit_move_insn (base_reg, val);
7718 x = plus_constant (Pmode, base_reg, low_n);
7720 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7721 x = gen_rtx_PLUS (SImode, xop0, xop1);
7724 /* XXX We don't allow MINUS any more -- see comment in
7725 arm_legitimate_address_outer_p (). */
7726 else if (GET_CODE (x) == MINUS)
7728 rtx xop0 = XEXP (x, 0);
7729 rtx xop1 = XEXP (x, 1);
7731 if (CONSTANT_P (xop0))
7732 xop0 = force_reg (SImode, xop0);
7734 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7735 xop1 = force_reg (SImode, xop1);
7737 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7738 x = gen_rtx_MINUS (SImode, xop0, xop1);
7741 /* Make sure to take full advantage of the pre-indexed addressing mode
7742 with absolute addresses which often allows for the base register to
7743 be factorized for multiple adjacent memory references, and it might
7744 even allows for the mini pool to be avoided entirely. */
7745 else if (CONST_INT_P (x) && optimize > 0)
7747 unsigned int bits;
7748 HOST_WIDE_INT mask, base, index;
7749 rtx base_reg;
7751 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7752 use a 8-bit index. So let's use a 12-bit index for SImode only and
7753 hope that arm_gen_constant will enable ldrb to use more bits. */
7754 bits = (mode == SImode) ? 12 : 8;
7755 mask = (1 << bits) - 1;
7756 base = INTVAL (x) & ~mask;
7757 index = INTVAL (x) & mask;
7758 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7760 /* It'll most probably be more efficient to generate the base
7761 with more bits set and use a negative index instead. */
7762 base |= mask;
7763 index -= mask;
7765 base_reg = force_reg (SImode, GEN_INT (base));
7766 x = plus_constant (Pmode, base_reg, index);
7769 if (flag_pic)
7771 /* We need to find and carefully transform any SYMBOL and LABEL
7772 references; so go back to the original address expression. */
7773 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7775 if (new_x != orig_x)
7776 x = new_x;
7779 return x;
7783 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7784 to be legitimate. If we find one, return the new, valid address. */
7786 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7788 if (GET_CODE (x) == PLUS
7789 && CONST_INT_P (XEXP (x, 1))
7790 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7791 || INTVAL (XEXP (x, 1)) < 0))
7793 rtx xop0 = XEXP (x, 0);
7794 rtx xop1 = XEXP (x, 1);
7795 HOST_WIDE_INT offset = INTVAL (xop1);
7797 /* Try and fold the offset into a biasing of the base register and
7798 then offsetting that. Don't do this when optimizing for space
7799 since it can cause too many CSEs. */
7800 if (optimize_size && offset >= 0
7801 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7803 HOST_WIDE_INT delta;
7805 if (offset >= 256)
7806 delta = offset - (256 - GET_MODE_SIZE (mode));
7807 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7808 delta = 31 * GET_MODE_SIZE (mode);
7809 else
7810 delta = offset & (~31 * GET_MODE_SIZE (mode));
7812 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7813 NULL_RTX);
7814 x = plus_constant (Pmode, xop0, delta);
7816 else if (offset < 0 && offset > -256)
7817 /* Small negative offsets are best done with a subtract before the
7818 dereference, forcing these into a register normally takes two
7819 instructions. */
7820 x = force_operand (x, NULL_RTX);
7821 else
7823 /* For the remaining cases, force the constant into a register. */
7824 xop1 = force_reg (SImode, xop1);
7825 x = gen_rtx_PLUS (SImode, xop0, xop1);
7828 else if (GET_CODE (x) == PLUS
7829 && s_register_operand (XEXP (x, 1), SImode)
7830 && !s_register_operand (XEXP (x, 0), SImode))
7832 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7834 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7837 if (flag_pic)
7839 /* We need to find and carefully transform any SYMBOL and LABEL
7840 references; so go back to the original address expression. */
7841 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7843 if (new_x != orig_x)
7844 x = new_x;
7847 return x;
7850 bool
7851 arm_legitimize_reload_address (rtx *p,
7852 machine_mode mode,
7853 int opnum, int type,
7854 int ind_levels ATTRIBUTE_UNUSED)
7856 /* We must recognize output that we have already generated ourselves. */
7857 if (GET_CODE (*p) == PLUS
7858 && GET_CODE (XEXP (*p, 0)) == PLUS
7859 && REG_P (XEXP (XEXP (*p, 0), 0))
7860 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7861 && CONST_INT_P (XEXP (*p, 1)))
7863 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7864 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7865 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7866 return true;
7869 if (GET_CODE (*p) == PLUS
7870 && REG_P (XEXP (*p, 0))
7871 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7872 /* If the base register is equivalent to a constant, let the generic
7873 code handle it. Otherwise we will run into problems if a future
7874 reload pass decides to rematerialize the constant. */
7875 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7876 && CONST_INT_P (XEXP (*p, 1)))
7878 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7879 HOST_WIDE_INT low, high;
7881 /* Detect coprocessor load/stores. */
7882 bool coproc_p = ((TARGET_HARD_FLOAT
7883 && TARGET_VFP
7884 && (mode == SFmode || mode == DFmode))
7885 || (TARGET_REALLY_IWMMXT
7886 && VALID_IWMMXT_REG_MODE (mode))
7887 || (TARGET_NEON
7888 && (VALID_NEON_DREG_MODE (mode)
7889 || VALID_NEON_QREG_MODE (mode))));
7891 /* For some conditions, bail out when lower two bits are unaligned. */
7892 if ((val & 0x3) != 0
7893 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7894 && (coproc_p
7895 /* For DI, and DF under soft-float: */
7896 || ((mode == DImode || mode == DFmode)
7897 /* Without ldrd, we use stm/ldm, which does not
7898 fair well with unaligned bits. */
7899 && (! TARGET_LDRD
7900 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7901 || TARGET_THUMB2))))
7902 return false;
7904 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7905 of which the (reg+high) gets turned into a reload add insn,
7906 we try to decompose the index into high/low values that can often
7907 also lead to better reload CSE.
7908 For example:
7909 ldr r0, [r2, #4100] // Offset too large
7910 ldr r1, [r2, #4104] // Offset too large
7912 is best reloaded as:
7913 add t1, r2, #4096
7914 ldr r0, [t1, #4]
7915 add t2, r2, #4096
7916 ldr r1, [t2, #8]
7918 which post-reload CSE can simplify in most cases to eliminate the
7919 second add instruction:
7920 add t1, r2, #4096
7921 ldr r0, [t1, #4]
7922 ldr r1, [t1, #8]
7924 The idea here is that we want to split out the bits of the constant
7925 as a mask, rather than as subtracting the maximum offset that the
7926 respective type of load/store used can handle.
7928 When encountering negative offsets, we can still utilize it even if
7929 the overall offset is positive; sometimes this may lead to an immediate
7930 that can be constructed with fewer instructions.
7931 For example:
7932 ldr r0, [r2, #0x3FFFFC]
7934 This is best reloaded as:
7935 add t1, r2, #0x400000
7936 ldr r0, [t1, #-4]
7938 The trick for spotting this for a load insn with N bits of offset
7939 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7940 negative offset that is going to make bit N and all the bits below
7941 it become zero in the remainder part.
7943 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7944 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7945 used in most cases of ARM load/store instructions. */
7947 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7948 (((VAL) & ((1 << (N)) - 1)) \
7949 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7950 : 0)
7952 if (coproc_p)
7954 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7956 /* NEON quad-word load/stores are made of two double-word accesses,
7957 so the valid index range is reduced by 8. Treat as 9-bit range if
7958 we go over it. */
7959 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7960 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7962 else if (GET_MODE_SIZE (mode) == 8)
7964 if (TARGET_LDRD)
7965 low = (TARGET_THUMB2
7966 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7967 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7968 else
7969 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7970 to access doublewords. The supported load/store offsets are
7971 -8, -4, and 4, which we try to produce here. */
7972 low = ((val & 0xf) ^ 0x8) - 0x8;
7974 else if (GET_MODE_SIZE (mode) < 8)
7976 /* NEON element load/stores do not have an offset. */
7977 if (TARGET_NEON_FP16 && mode == HFmode)
7978 return false;
7980 if (TARGET_THUMB2)
7982 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7983 Try the wider 12-bit range first, and re-try if the result
7984 is out of range. */
7985 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7986 if (low < -255)
7987 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7989 else
7991 if (mode == HImode || mode == HFmode)
7993 if (arm_arch4)
7994 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7995 else
7997 /* The storehi/movhi_bytes fallbacks can use only
7998 [-4094,+4094] of the full ldrb/strb index range. */
7999 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8000 if (low == 4095 || low == -4095)
8001 return false;
8004 else
8005 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8008 else
8009 return false;
8011 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8012 ^ (unsigned HOST_WIDE_INT) 0x80000000)
8013 - (unsigned HOST_WIDE_INT) 0x80000000);
8014 /* Check for overflow or zero */
8015 if (low == 0 || high == 0 || (high + low != val))
8016 return false;
8018 /* Reload the high part into a base reg; leave the low part
8019 in the mem.
8020 Note that replacing this gen_rtx_PLUS with plus_constant is
8021 wrong in this case because we rely on the
8022 (plus (plus reg c1) c2) structure being preserved so that
8023 XEXP (*p, 0) in push_reload below uses the correct term. */
8024 *p = gen_rtx_PLUS (GET_MODE (*p),
8025 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8026 GEN_INT (high)),
8027 GEN_INT (low));
8028 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8029 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8030 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8031 return true;
8034 return false;
8038 thumb_legitimize_reload_address (rtx *x_p,
8039 machine_mode mode,
8040 int opnum, int type,
8041 int ind_levels ATTRIBUTE_UNUSED)
8043 rtx x = *x_p;
8045 if (GET_CODE (x) == PLUS
8046 && GET_MODE_SIZE (mode) < 4
8047 && REG_P (XEXP (x, 0))
8048 && XEXP (x, 0) == stack_pointer_rtx
8049 && CONST_INT_P (XEXP (x, 1))
8050 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8052 rtx orig_x = x;
8054 x = copy_rtx (x);
8055 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8056 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8057 return x;
8060 /* If both registers are hi-regs, then it's better to reload the
8061 entire expression rather than each register individually. That
8062 only requires one reload register rather than two. */
8063 if (GET_CODE (x) == PLUS
8064 && REG_P (XEXP (x, 0))
8065 && REG_P (XEXP (x, 1))
8066 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8067 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8069 rtx orig_x = x;
8071 x = copy_rtx (x);
8072 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8073 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8074 return x;
8077 return NULL;
8080 /* Test for various thread-local symbols. */
8082 /* Helper for arm_tls_referenced_p. */
8084 static int
8085 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
8087 if (GET_CODE (*x) == SYMBOL_REF)
8088 return SYMBOL_REF_TLS_MODEL (*x) != 0;
8090 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8091 TLS offsets, not real symbol references. */
8092 if (GET_CODE (*x) == UNSPEC
8093 && XINT (*x, 1) == UNSPEC_TLS)
8094 return -1;
8096 return 0;
8099 /* Return TRUE if X contains any TLS symbol references. */
8101 bool
8102 arm_tls_referenced_p (rtx x)
8104 if (! TARGET_HAVE_TLS)
8105 return false;
8107 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
8110 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8112 On the ARM, allow any integer (invalid ones are removed later by insn
8113 patterns), nice doubles and symbol_refs which refer to the function's
8114 constant pool XXX.
8116 When generating pic allow anything. */
8118 static bool
8119 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8121 /* At present, we have no support for Neon structure constants, so forbid
8122 them here. It might be possible to handle simple cases like 0 and -1
8123 in future. */
8124 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8125 return false;
8127 return flag_pic || !label_mentioned_p (x);
8130 static bool
8131 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8133 return (CONST_INT_P (x)
8134 || CONST_DOUBLE_P (x)
8135 || CONSTANT_ADDRESS_P (x)
8136 || flag_pic);
8139 static bool
8140 arm_legitimate_constant_p (machine_mode mode, rtx x)
8142 return (!arm_cannot_force_const_mem (mode, x)
8143 && (TARGET_32BIT
8144 ? arm_legitimate_constant_p_1 (mode, x)
8145 : thumb_legitimate_constant_p (mode, x)));
8148 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8150 static bool
8151 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8153 rtx base, offset;
8155 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8157 split_const (x, &base, &offset);
8158 if (GET_CODE (base) == SYMBOL_REF
8159 && !offset_within_block_p (base, INTVAL (offset)))
8160 return true;
8162 return arm_tls_referenced_p (x);
8165 #define REG_OR_SUBREG_REG(X) \
8166 (REG_P (X) \
8167 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8169 #define REG_OR_SUBREG_RTX(X) \
8170 (REG_P (X) ? (X) : SUBREG_REG (X))
8172 static inline int
8173 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8175 machine_mode mode = GET_MODE (x);
8176 int total, words;
8178 switch (code)
8180 case ASHIFT:
8181 case ASHIFTRT:
8182 case LSHIFTRT:
8183 case ROTATERT:
8184 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8186 case PLUS:
8187 case MINUS:
8188 case COMPARE:
8189 case NEG:
8190 case NOT:
8191 return COSTS_N_INSNS (1);
8193 case MULT:
8194 if (CONST_INT_P (XEXP (x, 1)))
8196 int cycles = 0;
8197 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8199 while (i)
8201 i >>= 2;
8202 cycles++;
8204 return COSTS_N_INSNS (2) + cycles;
8206 return COSTS_N_INSNS (1) + 16;
8208 case SET:
8209 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8210 the mode. */
8211 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8212 return (COSTS_N_INSNS (words)
8213 + 4 * ((MEM_P (SET_SRC (x)))
8214 + MEM_P (SET_DEST (x))));
8216 case CONST_INT:
8217 if (outer == SET)
8219 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8220 return 0;
8221 if (thumb_shiftable_const (INTVAL (x)))
8222 return COSTS_N_INSNS (2);
8223 return COSTS_N_INSNS (3);
8225 else if ((outer == PLUS || outer == COMPARE)
8226 && INTVAL (x) < 256 && INTVAL (x) > -256)
8227 return 0;
8228 else if ((outer == IOR || outer == XOR || outer == AND)
8229 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8230 return COSTS_N_INSNS (1);
8231 else if (outer == AND)
8233 int i;
8234 /* This duplicates the tests in the andsi3 expander. */
8235 for (i = 9; i <= 31; i++)
8236 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8237 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8238 return COSTS_N_INSNS (2);
8240 else if (outer == ASHIFT || outer == ASHIFTRT
8241 || outer == LSHIFTRT)
8242 return 0;
8243 return COSTS_N_INSNS (2);
8245 case CONST:
8246 case CONST_DOUBLE:
8247 case LABEL_REF:
8248 case SYMBOL_REF:
8249 return COSTS_N_INSNS (3);
8251 case UDIV:
8252 case UMOD:
8253 case DIV:
8254 case MOD:
8255 return 100;
8257 case TRUNCATE:
8258 return 99;
8260 case AND:
8261 case XOR:
8262 case IOR:
8263 /* XXX guess. */
8264 return 8;
8266 case MEM:
8267 /* XXX another guess. */
8268 /* Memory costs quite a lot for the first word, but subsequent words
8269 load at the equivalent of a single insn each. */
8270 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8271 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8272 ? 4 : 0));
8274 case IF_THEN_ELSE:
8275 /* XXX a guess. */
8276 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8277 return 14;
8278 return 2;
8280 case SIGN_EXTEND:
8281 case ZERO_EXTEND:
8282 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8283 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8285 if (mode == SImode)
8286 return total;
8288 if (arm_arch6)
8289 return total + COSTS_N_INSNS (1);
8291 /* Assume a two-shift sequence. Increase the cost slightly so
8292 we prefer actual shifts over an extend operation. */
8293 return total + 1 + COSTS_N_INSNS (2);
8295 default:
8296 return 99;
8300 static inline bool
8301 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8303 machine_mode mode = GET_MODE (x);
8304 enum rtx_code subcode;
8305 rtx operand;
8306 enum rtx_code code = GET_CODE (x);
8307 *total = 0;
8309 switch (code)
8311 case MEM:
8312 /* Memory costs quite a lot for the first word, but subsequent words
8313 load at the equivalent of a single insn each. */
8314 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8315 return true;
8317 case DIV:
8318 case MOD:
8319 case UDIV:
8320 case UMOD:
8321 if (TARGET_HARD_FLOAT && mode == SFmode)
8322 *total = COSTS_N_INSNS (2);
8323 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8324 *total = COSTS_N_INSNS (4);
8325 else
8326 *total = COSTS_N_INSNS (20);
8327 return false;
8329 case ROTATE:
8330 if (REG_P (XEXP (x, 1)))
8331 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8332 else if (!CONST_INT_P (XEXP (x, 1)))
8333 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8335 /* Fall through */
8336 case ROTATERT:
8337 if (mode != SImode)
8339 *total += COSTS_N_INSNS (4);
8340 return true;
8343 /* Fall through */
8344 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8345 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8346 if (mode == DImode)
8348 *total += COSTS_N_INSNS (3);
8349 return true;
8352 *total += COSTS_N_INSNS (1);
8353 /* Increase the cost of complex shifts because they aren't any faster,
8354 and reduce dual issue opportunities. */
8355 if (arm_tune_cortex_a9
8356 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8357 ++*total;
8359 return true;
8361 case MINUS:
8362 if (mode == DImode)
8364 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8365 if (CONST_INT_P (XEXP (x, 0))
8366 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8368 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8369 return true;
8372 if (CONST_INT_P (XEXP (x, 1))
8373 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8375 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8376 return true;
8379 return false;
8382 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8384 if (TARGET_HARD_FLOAT
8385 && (mode == SFmode
8386 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8388 *total = COSTS_N_INSNS (1);
8389 if (CONST_DOUBLE_P (XEXP (x, 0))
8390 && arm_const_double_rtx (XEXP (x, 0)))
8392 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8393 return true;
8396 if (CONST_DOUBLE_P (XEXP (x, 1))
8397 && arm_const_double_rtx (XEXP (x, 1)))
8399 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8400 return true;
8403 return false;
8405 *total = COSTS_N_INSNS (20);
8406 return false;
8409 *total = COSTS_N_INSNS (1);
8410 if (CONST_INT_P (XEXP (x, 0))
8411 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8413 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8414 return true;
8417 subcode = GET_CODE (XEXP (x, 1));
8418 if (subcode == ASHIFT || subcode == ASHIFTRT
8419 || subcode == LSHIFTRT
8420 || subcode == ROTATE || subcode == ROTATERT)
8422 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8423 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8424 return true;
8427 /* A shift as a part of RSB costs no more than RSB itself. */
8428 if (GET_CODE (XEXP (x, 0)) == MULT
8429 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8431 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8432 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8433 return true;
8436 if (subcode == MULT
8437 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8439 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8440 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8441 return true;
8444 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8445 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8447 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8448 if (REG_P (XEXP (XEXP (x, 1), 0))
8449 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8450 *total += COSTS_N_INSNS (1);
8452 return true;
8455 /* Fall through */
8457 case PLUS:
8458 if (code == PLUS && arm_arch6 && mode == SImode
8459 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8460 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8462 *total = COSTS_N_INSNS (1);
8463 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8464 0, speed);
8465 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8466 return true;
8469 /* MLA: All arguments must be registers. We filter out
8470 multiplication by a power of two, so that we fall down into
8471 the code below. */
8472 if (GET_CODE (XEXP (x, 0)) == MULT
8473 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8475 /* The cost comes from the cost of the multiply. */
8476 return false;
8479 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8481 if (TARGET_HARD_FLOAT
8482 && (mode == SFmode
8483 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8485 *total = COSTS_N_INSNS (1);
8486 if (CONST_DOUBLE_P (XEXP (x, 1))
8487 && arm_const_double_rtx (XEXP (x, 1)))
8489 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8490 return true;
8493 return false;
8496 *total = COSTS_N_INSNS (20);
8497 return false;
8500 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8501 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8503 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8504 if (REG_P (XEXP (XEXP (x, 0), 0))
8505 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8506 *total += COSTS_N_INSNS (1);
8507 return true;
8510 /* Fall through */
8512 case AND: case XOR: case IOR:
8514 /* Normally the frame registers will be spilt into reg+const during
8515 reload, so it is a bad idea to combine them with other instructions,
8516 since then they might not be moved outside of loops. As a compromise
8517 we allow integration with ops that have a constant as their second
8518 operand. */
8519 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8520 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8521 && !CONST_INT_P (XEXP (x, 1)))
8522 *total = COSTS_N_INSNS (1);
8524 if (mode == DImode)
8526 *total += COSTS_N_INSNS (2);
8527 if (CONST_INT_P (XEXP (x, 1))
8528 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8530 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8531 return true;
8534 return false;
8537 *total += COSTS_N_INSNS (1);
8538 if (CONST_INT_P (XEXP (x, 1))
8539 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8541 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8542 return true;
8544 subcode = GET_CODE (XEXP (x, 0));
8545 if (subcode == ASHIFT || subcode == ASHIFTRT
8546 || subcode == LSHIFTRT
8547 || subcode == ROTATE || subcode == ROTATERT)
8549 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8550 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8551 return true;
8554 if (subcode == MULT
8555 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8557 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8558 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8559 return true;
8562 if (subcode == UMIN || subcode == UMAX
8563 || subcode == SMIN || subcode == SMAX)
8565 *total = COSTS_N_INSNS (3);
8566 return true;
8569 return false;
8571 case MULT:
8572 /* This should have been handled by the CPU specific routines. */
8573 gcc_unreachable ();
8575 case TRUNCATE:
8576 if (arm_arch3m && mode == SImode
8577 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8578 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8579 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8580 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8581 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8582 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8584 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8585 return true;
8587 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8588 return false;
8590 case NEG:
8591 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8593 if (TARGET_HARD_FLOAT
8594 && (mode == SFmode
8595 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8597 *total = COSTS_N_INSNS (1);
8598 return false;
8600 *total = COSTS_N_INSNS (2);
8601 return false;
8604 /* Fall through */
8605 case NOT:
8606 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8607 if (mode == SImode && code == NOT)
8609 subcode = GET_CODE (XEXP (x, 0));
8610 if (subcode == ASHIFT || subcode == ASHIFTRT
8611 || subcode == LSHIFTRT
8612 || subcode == ROTATE || subcode == ROTATERT
8613 || (subcode == MULT
8614 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8616 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8617 /* Register shifts cost an extra cycle. */
8618 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8619 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8620 subcode, 1, speed);
8621 return true;
8625 return false;
8627 case IF_THEN_ELSE:
8628 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8630 *total = COSTS_N_INSNS (4);
8631 return true;
8634 operand = XEXP (x, 0);
8636 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8637 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8638 && REG_P (XEXP (operand, 0))
8639 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8640 *total += COSTS_N_INSNS (1);
8641 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8642 + rtx_cost (XEXP (x, 2), code, 2, speed));
8643 return true;
8645 case NE:
8646 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8648 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8649 return true;
8651 goto scc_insn;
8653 case GE:
8654 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8655 && mode == SImode && XEXP (x, 1) == const0_rtx)
8657 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8658 return true;
8660 goto scc_insn;
8662 case LT:
8663 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8664 && mode == SImode && XEXP (x, 1) == const0_rtx)
8666 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8667 return true;
8669 goto scc_insn;
8671 case EQ:
8672 case GT:
8673 case LE:
8674 case GEU:
8675 case LTU:
8676 case GTU:
8677 case LEU:
8678 case UNORDERED:
8679 case ORDERED:
8680 case UNEQ:
8681 case UNGE:
8682 case UNLT:
8683 case UNGT:
8684 case UNLE:
8685 scc_insn:
8686 /* SCC insns. In the case where the comparison has already been
8687 performed, then they cost 2 instructions. Otherwise they need
8688 an additional comparison before them. */
8689 *total = COSTS_N_INSNS (2);
8690 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8692 return true;
8695 /* Fall through */
8696 case COMPARE:
8697 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8699 *total = 0;
8700 return true;
8703 *total += COSTS_N_INSNS (1);
8704 if (CONST_INT_P (XEXP (x, 1))
8705 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8707 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8708 return true;
8711 subcode = GET_CODE (XEXP (x, 0));
8712 if (subcode == ASHIFT || subcode == ASHIFTRT
8713 || subcode == LSHIFTRT
8714 || subcode == ROTATE || subcode == ROTATERT)
8716 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8717 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8718 return true;
8721 if (subcode == MULT
8722 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8724 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8725 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8726 return true;
8729 return false;
8731 case UMIN:
8732 case UMAX:
8733 case SMIN:
8734 case SMAX:
8735 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8736 if (!CONST_INT_P (XEXP (x, 1))
8737 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8738 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8739 return true;
8741 case ABS:
8742 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8744 if (TARGET_HARD_FLOAT
8745 && (mode == SFmode
8746 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8748 *total = COSTS_N_INSNS (1);
8749 return false;
8751 *total = COSTS_N_INSNS (20);
8752 return false;
8754 *total = COSTS_N_INSNS (1);
8755 if (mode == DImode)
8756 *total += COSTS_N_INSNS (3);
8757 return false;
8759 case SIGN_EXTEND:
8760 case ZERO_EXTEND:
8761 *total = 0;
8762 if (GET_MODE_CLASS (mode) == MODE_INT)
8764 rtx op = XEXP (x, 0);
8765 machine_mode opmode = GET_MODE (op);
8767 if (mode == DImode)
8768 *total += COSTS_N_INSNS (1);
8770 if (opmode != SImode)
8772 if (MEM_P (op))
8774 /* If !arm_arch4, we use one of the extendhisi2_mem
8775 or movhi_bytes patterns for HImode. For a QImode
8776 sign extension, we first zero-extend from memory
8777 and then perform a shift sequence. */
8778 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8779 *total += COSTS_N_INSNS (2);
8781 else if (arm_arch6)
8782 *total += COSTS_N_INSNS (1);
8784 /* We don't have the necessary insn, so we need to perform some
8785 other operation. */
8786 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8787 /* An and with constant 255. */
8788 *total += COSTS_N_INSNS (1);
8789 else
8790 /* A shift sequence. Increase costs slightly to avoid
8791 combining two shifts into an extend operation. */
8792 *total += COSTS_N_INSNS (2) + 1;
8795 return false;
8798 switch (GET_MODE (XEXP (x, 0)))
8800 case V8QImode:
8801 case V4HImode:
8802 case V2SImode:
8803 case V4QImode:
8804 case V2HImode:
8805 *total = COSTS_N_INSNS (1);
8806 return false;
8808 default:
8809 gcc_unreachable ();
8811 gcc_unreachable ();
8813 case ZERO_EXTRACT:
8814 case SIGN_EXTRACT:
8815 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8816 return true;
8818 case CONST_INT:
8819 if (const_ok_for_arm (INTVAL (x))
8820 || const_ok_for_arm (~INTVAL (x)))
8821 *total = COSTS_N_INSNS (1);
8822 else
8823 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8824 INTVAL (x), NULL_RTX,
8825 NULL_RTX, 0, 0));
8826 return true;
8828 case CONST:
8829 case LABEL_REF:
8830 case SYMBOL_REF:
8831 *total = COSTS_N_INSNS (3);
8832 return true;
8834 case HIGH:
8835 *total = COSTS_N_INSNS (1);
8836 return true;
8838 case LO_SUM:
8839 *total = COSTS_N_INSNS (1);
8840 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8841 return true;
8843 case CONST_DOUBLE:
8844 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8845 && (mode == SFmode || !TARGET_VFP_SINGLE))
8846 *total = COSTS_N_INSNS (1);
8847 else
8848 *total = COSTS_N_INSNS (4);
8849 return true;
8851 case SET:
8852 /* The vec_extract patterns accept memory operands that require an
8853 address reload. Account for the cost of that reload to give the
8854 auto-inc-dec pass an incentive to try to replace them. */
8855 if (TARGET_NEON && MEM_P (SET_DEST (x))
8856 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8858 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8859 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8860 *total += COSTS_N_INSNS (1);
8861 return true;
8863 /* Likewise for the vec_set patterns. */
8864 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8865 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8866 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8868 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8869 *total = rtx_cost (mem, code, 0, speed);
8870 if (!neon_vector_mem_operand (mem, 2, true))
8871 *total += COSTS_N_INSNS (1);
8872 return true;
8874 return false;
8876 case UNSPEC:
8877 /* We cost this as high as our memory costs to allow this to
8878 be hoisted from loops. */
8879 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8881 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8883 return true;
8885 case CONST_VECTOR:
8886 if (TARGET_NEON
8887 && TARGET_HARD_FLOAT
8888 && outer == SET
8889 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8890 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8891 *total = COSTS_N_INSNS (1);
8892 else
8893 *total = COSTS_N_INSNS (4);
8894 return true;
8896 default:
8897 *total = COSTS_N_INSNS (4);
8898 return false;
8902 /* Estimates the size cost of thumb1 instructions.
8903 For now most of the code is copied from thumb1_rtx_costs. We need more
8904 fine grain tuning when we have more related test cases. */
8905 static inline int
8906 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8908 machine_mode mode = GET_MODE (x);
8909 int words;
8911 switch (code)
8913 case ASHIFT:
8914 case ASHIFTRT:
8915 case LSHIFTRT:
8916 case ROTATERT:
8917 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8919 case PLUS:
8920 case MINUS:
8921 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8922 defined by RTL expansion, especially for the expansion of
8923 multiplication. */
8924 if ((GET_CODE (XEXP (x, 0)) == MULT
8925 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8926 || (GET_CODE (XEXP (x, 1)) == MULT
8927 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8928 return COSTS_N_INSNS (2);
8929 /* On purpose fall through for normal RTX. */
8930 case COMPARE:
8931 case NEG:
8932 case NOT:
8933 return COSTS_N_INSNS (1);
8935 case MULT:
8936 if (CONST_INT_P (XEXP (x, 1)))
8938 /* Thumb1 mul instruction can't operate on const. We must Load it
8939 into a register first. */
8940 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8941 return COSTS_N_INSNS (1) + const_size;
8943 return COSTS_N_INSNS (1);
8945 case SET:
8946 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8947 the mode. */
8948 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8949 return COSTS_N_INSNS (words)
8950 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8951 || satisfies_constraint_K (SET_SRC (x))
8952 /* thumb1_movdi_insn. */
8953 || ((words > 1) && MEM_P (SET_SRC (x))));
8955 case CONST_INT:
8956 if (outer == SET)
8958 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8959 return COSTS_N_INSNS (1);
8960 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8961 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8962 return COSTS_N_INSNS (2);
8963 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8964 if (thumb_shiftable_const (INTVAL (x)))
8965 return COSTS_N_INSNS (2);
8966 return COSTS_N_INSNS (3);
8968 else if ((outer == PLUS || outer == COMPARE)
8969 && INTVAL (x) < 256 && INTVAL (x) > -256)
8970 return 0;
8971 else if ((outer == IOR || outer == XOR || outer == AND)
8972 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8973 return COSTS_N_INSNS (1);
8974 else if (outer == AND)
8976 int i;
8977 /* This duplicates the tests in the andsi3 expander. */
8978 for (i = 9; i <= 31; i++)
8979 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8980 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8981 return COSTS_N_INSNS (2);
8983 else if (outer == ASHIFT || outer == ASHIFTRT
8984 || outer == LSHIFTRT)
8985 return 0;
8986 return COSTS_N_INSNS (2);
8988 case CONST:
8989 case CONST_DOUBLE:
8990 case LABEL_REF:
8991 case SYMBOL_REF:
8992 return COSTS_N_INSNS (3);
8994 case UDIV:
8995 case UMOD:
8996 case DIV:
8997 case MOD:
8998 return 100;
9000 case TRUNCATE:
9001 return 99;
9003 case AND:
9004 case XOR:
9005 case IOR:
9006 return COSTS_N_INSNS (1);
9008 case MEM:
9009 return (COSTS_N_INSNS (1)
9010 + COSTS_N_INSNS (1)
9011 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9012 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9013 ? COSTS_N_INSNS (1) : 0));
9015 case IF_THEN_ELSE:
9016 /* XXX a guess. */
9017 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9018 return 14;
9019 return 2;
9021 case ZERO_EXTEND:
9022 /* XXX still guessing. */
9023 switch (GET_MODE (XEXP (x, 0)))
9025 case QImode:
9026 return (1 + (mode == DImode ? 4 : 0)
9027 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9029 case HImode:
9030 return (4 + (mode == DImode ? 4 : 0)
9031 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9033 case SImode:
9034 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9036 default:
9037 return 99;
9040 default:
9041 return 99;
9045 /* RTX costs when optimizing for size. */
9046 static bool
9047 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9048 int *total)
9050 machine_mode mode = GET_MODE (x);
9051 if (TARGET_THUMB1)
9053 *total = thumb1_size_rtx_costs (x, code, outer_code);
9054 return true;
9057 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9058 switch (code)
9060 case MEM:
9061 /* A memory access costs 1 insn if the mode is small, or the address is
9062 a single register, otherwise it costs one insn per word. */
9063 if (REG_P (XEXP (x, 0)))
9064 *total = COSTS_N_INSNS (1);
9065 else if (flag_pic
9066 && GET_CODE (XEXP (x, 0)) == PLUS
9067 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9068 /* This will be split into two instructions.
9069 See arm.md:calculate_pic_address. */
9070 *total = COSTS_N_INSNS (2);
9071 else
9072 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9073 return true;
9075 case DIV:
9076 case MOD:
9077 case UDIV:
9078 case UMOD:
9079 /* Needs a libcall, so it costs about this. */
9080 *total = COSTS_N_INSNS (2);
9081 return false;
9083 case ROTATE:
9084 if (mode == SImode && REG_P (XEXP (x, 1)))
9086 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9087 return true;
9089 /* Fall through */
9090 case ROTATERT:
9091 case ASHIFT:
9092 case LSHIFTRT:
9093 case ASHIFTRT:
9094 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9096 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9097 return true;
9099 else if (mode == SImode)
9101 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9102 /* Slightly disparage register shifts, but not by much. */
9103 if (!CONST_INT_P (XEXP (x, 1)))
9104 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9105 return true;
9108 /* Needs a libcall. */
9109 *total = COSTS_N_INSNS (2);
9110 return false;
9112 case MINUS:
9113 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9114 && (mode == SFmode || !TARGET_VFP_SINGLE))
9116 *total = COSTS_N_INSNS (1);
9117 return false;
9120 if (mode == SImode)
9122 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9123 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9125 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9126 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9127 || subcode1 == ROTATE || subcode1 == ROTATERT
9128 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9129 || subcode1 == ASHIFTRT)
9131 /* It's just the cost of the two operands. */
9132 *total = 0;
9133 return false;
9136 *total = COSTS_N_INSNS (1);
9137 return false;
9140 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9141 return false;
9143 case PLUS:
9144 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9145 && (mode == SFmode || !TARGET_VFP_SINGLE))
9147 *total = COSTS_N_INSNS (1);
9148 return false;
9151 /* A shift as a part of ADD costs nothing. */
9152 if (GET_CODE (XEXP (x, 0)) == MULT
9153 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9155 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9156 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9157 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9158 return true;
9161 /* Fall through */
9162 case AND: case XOR: case IOR:
9163 if (mode == SImode)
9165 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9167 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9168 || subcode == LSHIFTRT || subcode == ASHIFTRT
9169 || (code == AND && subcode == NOT))
9171 /* It's just the cost of the two operands. */
9172 *total = 0;
9173 return false;
9177 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9178 return false;
9180 case MULT:
9181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9182 return false;
9184 case NEG:
9185 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9186 && (mode == SFmode || !TARGET_VFP_SINGLE))
9188 *total = COSTS_N_INSNS (1);
9189 return false;
9192 /* Fall through */
9193 case NOT:
9194 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9196 return false;
9198 case IF_THEN_ELSE:
9199 *total = 0;
9200 return false;
9202 case COMPARE:
9203 if (cc_register (XEXP (x, 0), VOIDmode))
9204 * total = 0;
9205 else
9206 *total = COSTS_N_INSNS (1);
9207 return false;
9209 case ABS:
9210 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9211 && (mode == SFmode || !TARGET_VFP_SINGLE))
9212 *total = COSTS_N_INSNS (1);
9213 else
9214 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9215 return false;
9217 case SIGN_EXTEND:
9218 case ZERO_EXTEND:
9219 return arm_rtx_costs_1 (x, outer_code, total, 0);
9221 case CONST_INT:
9222 if (const_ok_for_arm (INTVAL (x)))
9223 /* A multiplication by a constant requires another instruction
9224 to load the constant to a register. */
9225 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9226 ? 1 : 0);
9227 else if (const_ok_for_arm (~INTVAL (x)))
9228 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9229 else if (const_ok_for_arm (-INTVAL (x)))
9231 if (outer_code == COMPARE || outer_code == PLUS
9232 || outer_code == MINUS)
9233 *total = 0;
9234 else
9235 *total = COSTS_N_INSNS (1);
9237 else
9238 *total = COSTS_N_INSNS (2);
9239 return true;
9241 case CONST:
9242 case LABEL_REF:
9243 case SYMBOL_REF:
9244 *total = COSTS_N_INSNS (2);
9245 return true;
9247 case CONST_DOUBLE:
9248 *total = COSTS_N_INSNS (4);
9249 return true;
9251 case CONST_VECTOR:
9252 if (TARGET_NEON
9253 && TARGET_HARD_FLOAT
9254 && outer_code == SET
9255 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9256 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9257 *total = COSTS_N_INSNS (1);
9258 else
9259 *total = COSTS_N_INSNS (4);
9260 return true;
9262 case HIGH:
9263 case LO_SUM:
9264 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9265 cost of these slightly. */
9266 *total = COSTS_N_INSNS (1) + 1;
9267 return true;
9269 case SET:
9270 return false;
9272 default:
9273 if (mode != VOIDmode)
9274 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9275 else
9276 *total = COSTS_N_INSNS (4); /* How knows? */
9277 return false;
9281 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9282 operand, then return the operand that is being shifted. If the shift
9283 is not by a constant, then set SHIFT_REG to point to the operand.
9284 Return NULL if OP is not a shifter operand. */
9285 static rtx
9286 shifter_op_p (rtx op, rtx *shift_reg)
9288 enum rtx_code code = GET_CODE (op);
9290 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9291 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9292 return XEXP (op, 0);
9293 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9294 return XEXP (op, 0);
9295 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9296 || code == ASHIFTRT)
9298 if (!CONST_INT_P (XEXP (op, 1)))
9299 *shift_reg = XEXP (op, 1);
9300 return XEXP (op, 0);
9303 return NULL;
9306 static bool
9307 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9309 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9310 gcc_assert (GET_CODE (x) == UNSPEC);
9312 switch (XINT (x, 1))
9314 case UNSPEC_UNALIGNED_LOAD:
9315 /* We can only do unaligned loads into the integer unit, and we can't
9316 use LDM or LDRD. */
9317 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9318 if (speed_p)
9319 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9320 + extra_cost->ldst.load_unaligned);
9322 #ifdef NOT_YET
9323 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9324 ADDR_SPACE_GENERIC, speed_p);
9325 #endif
9326 return true;
9328 case UNSPEC_UNALIGNED_STORE:
9329 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9330 if (speed_p)
9331 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9332 + extra_cost->ldst.store_unaligned);
9334 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9335 #ifdef NOT_YET
9336 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9337 ADDR_SPACE_GENERIC, speed_p);
9338 #endif
9339 return true;
9341 case UNSPEC_VRINTZ:
9342 case UNSPEC_VRINTP:
9343 case UNSPEC_VRINTM:
9344 case UNSPEC_VRINTR:
9345 case UNSPEC_VRINTX:
9346 case UNSPEC_VRINTA:
9347 *cost = COSTS_N_INSNS (1);
9348 if (speed_p)
9349 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9351 return true;
9352 default:
9353 *cost = COSTS_N_INSNS (2);
9354 break;
9356 return false;
9359 /* Cost of a libcall. We assume one insn per argument, an amount for the
9360 call (one insn for -Os) and then one for processing the result. */
9361 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9363 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9364 do \
9366 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9367 if (shift_op != NULL \
9368 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9370 if (shift_reg) \
9372 if (speed_p) \
9373 *cost += extra_cost->alu.arith_shift_reg; \
9374 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9376 else if (speed_p) \
9377 *cost += extra_cost->alu.arith_shift; \
9379 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9380 + rtx_cost (XEXP (x, 1 - IDX), \
9381 OP, 1, speed_p)); \
9382 return true; \
9385 while (0);
9387 /* RTX costs. Make an estimate of the cost of executing the operation
9388 X, which is contained with an operation with code OUTER_CODE.
9389 SPEED_P indicates whether the cost desired is the performance cost,
9390 or the size cost. The estimate is stored in COST and the return
9391 value is TRUE if the cost calculation is final, or FALSE if the
9392 caller should recurse through the operands of X to add additional
9393 costs.
9395 We currently make no attempt to model the size savings of Thumb-2
9396 16-bit instructions. At the normal points in compilation where
9397 this code is called we have no measure of whether the condition
9398 flags are live or not, and thus no realistic way to determine what
9399 the size will eventually be. */
9400 static bool
9401 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9402 const struct cpu_cost_table *extra_cost,
9403 int *cost, bool speed_p)
9405 machine_mode mode = GET_MODE (x);
9407 if (TARGET_THUMB1)
9409 if (speed_p)
9410 *cost = thumb1_rtx_costs (x, code, outer_code);
9411 else
9412 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9413 return true;
9416 switch (code)
9418 case SET:
9419 *cost = 0;
9420 /* SET RTXs don't have a mode so we get it from the destination. */
9421 mode = GET_MODE (SET_DEST (x));
9423 if (REG_P (SET_SRC (x))
9424 && REG_P (SET_DEST (x)))
9426 /* Assume that most copies can be done with a single insn,
9427 unless we don't have HW FP, in which case everything
9428 larger than word mode will require two insns. */
9429 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9430 && GET_MODE_SIZE (mode) > 4)
9431 || mode == DImode)
9432 ? 2 : 1);
9433 /* Conditional register moves can be encoded
9434 in 16 bits in Thumb mode. */
9435 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9436 *cost >>= 1;
9438 return true;
9441 if (CONST_INT_P (SET_SRC (x)))
9443 /* Handle CONST_INT here, since the value doesn't have a mode
9444 and we would otherwise be unable to work out the true cost. */
9445 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9446 outer_code = SET;
9447 /* Slightly lower the cost of setting a core reg to a constant.
9448 This helps break up chains and allows for better scheduling. */
9449 if (REG_P (SET_DEST (x))
9450 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9451 *cost -= 1;
9452 x = SET_SRC (x);
9453 /* Immediate moves with an immediate in the range [0, 255] can be
9454 encoded in 16 bits in Thumb mode. */
9455 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9456 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9457 *cost >>= 1;
9458 goto const_int_cost;
9461 return false;
9463 case MEM:
9464 /* A memory access costs 1 insn if the mode is small, or the address is
9465 a single register, otherwise it costs one insn per word. */
9466 if (REG_P (XEXP (x, 0)))
9467 *cost = COSTS_N_INSNS (1);
9468 else if (flag_pic
9469 && GET_CODE (XEXP (x, 0)) == PLUS
9470 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9471 /* This will be split into two instructions.
9472 See arm.md:calculate_pic_address. */
9473 *cost = COSTS_N_INSNS (2);
9474 else
9475 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9477 /* For speed optimizations, add the costs of the address and
9478 accessing memory. */
9479 if (speed_p)
9480 #ifdef NOT_YET
9481 *cost += (extra_cost->ldst.load
9482 + arm_address_cost (XEXP (x, 0), mode,
9483 ADDR_SPACE_GENERIC, speed_p));
9484 #else
9485 *cost += extra_cost->ldst.load;
9486 #endif
9487 return true;
9489 case PARALLEL:
9491 /* Calculations of LDM costs are complex. We assume an initial cost
9492 (ldm_1st) which will load the number of registers mentioned in
9493 ldm_regs_per_insn_1st registers; then each additional
9494 ldm_regs_per_insn_subsequent registers cost one more insn. The
9495 formula for N regs is thus:
9497 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9498 + ldm_regs_per_insn_subsequent - 1)
9499 / ldm_regs_per_insn_subsequent).
9501 Additional costs may also be added for addressing. A similar
9502 formula is used for STM. */
9504 bool is_ldm = load_multiple_operation (x, SImode);
9505 bool is_stm = store_multiple_operation (x, SImode);
9507 *cost = COSTS_N_INSNS (1);
9509 if (is_ldm || is_stm)
9511 if (speed_p)
9513 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9514 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9515 ? extra_cost->ldst.ldm_regs_per_insn_1st
9516 : extra_cost->ldst.stm_regs_per_insn_1st;
9517 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9518 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9519 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9521 *cost += regs_per_insn_1st
9522 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9523 + regs_per_insn_sub - 1)
9524 / regs_per_insn_sub);
9525 return true;
9529 return false;
9531 case DIV:
9532 case UDIV:
9533 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9534 && (mode == SFmode || !TARGET_VFP_SINGLE))
9535 *cost = COSTS_N_INSNS (speed_p
9536 ? extra_cost->fp[mode != SFmode].div : 1);
9537 else if (mode == SImode && TARGET_IDIV)
9538 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9539 else
9540 *cost = LIBCALL_COST (2);
9541 return false; /* All arguments must be in registers. */
9543 case MOD:
9544 case UMOD:
9545 *cost = LIBCALL_COST (2);
9546 return false; /* All arguments must be in registers. */
9548 case ROTATE:
9549 if (mode == SImode && REG_P (XEXP (x, 1)))
9551 *cost = (COSTS_N_INSNS (2)
9552 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9553 if (speed_p)
9554 *cost += extra_cost->alu.shift_reg;
9555 return true;
9557 /* Fall through */
9558 case ROTATERT:
9559 case ASHIFT:
9560 case LSHIFTRT:
9561 case ASHIFTRT:
9562 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9564 *cost = (COSTS_N_INSNS (3)
9565 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9566 if (speed_p)
9567 *cost += 2 * extra_cost->alu.shift;
9568 return true;
9570 else if (mode == SImode)
9572 *cost = (COSTS_N_INSNS (1)
9573 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9574 /* Slightly disparage register shifts at -Os, but not by much. */
9575 if (!CONST_INT_P (XEXP (x, 1)))
9576 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9577 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9578 return true;
9580 else if (GET_MODE_CLASS (mode) == MODE_INT
9581 && GET_MODE_SIZE (mode) < 4)
9583 if (code == ASHIFT)
9585 *cost = (COSTS_N_INSNS (1)
9586 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9587 /* Slightly disparage register shifts at -Os, but not by
9588 much. */
9589 if (!CONST_INT_P (XEXP (x, 1)))
9590 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9591 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9593 else if (code == LSHIFTRT || code == ASHIFTRT)
9595 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9597 /* Can use SBFX/UBFX. */
9598 *cost = COSTS_N_INSNS (1);
9599 if (speed_p)
9600 *cost += extra_cost->alu.bfx;
9601 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9603 else
9605 *cost = COSTS_N_INSNS (2);
9606 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9607 if (speed_p)
9609 if (CONST_INT_P (XEXP (x, 1)))
9610 *cost += 2 * extra_cost->alu.shift;
9611 else
9612 *cost += (extra_cost->alu.shift
9613 + extra_cost->alu.shift_reg);
9615 else
9616 /* Slightly disparage register shifts. */
9617 *cost += !CONST_INT_P (XEXP (x, 1));
9620 else /* Rotates. */
9622 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9623 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9624 if (speed_p)
9626 if (CONST_INT_P (XEXP (x, 1)))
9627 *cost += (2 * extra_cost->alu.shift
9628 + extra_cost->alu.log_shift);
9629 else
9630 *cost += (extra_cost->alu.shift
9631 + extra_cost->alu.shift_reg
9632 + extra_cost->alu.log_shift_reg);
9635 return true;
9638 *cost = LIBCALL_COST (2);
9639 return false;
9641 case BSWAP:
9642 if (arm_arch6)
9644 if (mode == SImode)
9646 *cost = COSTS_N_INSNS (1);
9647 if (speed_p)
9648 *cost += extra_cost->alu.rev;
9650 return false;
9653 else
9655 /* No rev instruction available. Look at arm_legacy_rev
9656 and thumb_legacy_rev for the form of RTL used then. */
9657 if (TARGET_THUMB)
9659 *cost = COSTS_N_INSNS (10);
9661 if (speed_p)
9663 *cost += 6 * extra_cost->alu.shift;
9664 *cost += 3 * extra_cost->alu.logical;
9667 else
9669 *cost = COSTS_N_INSNS (5);
9671 if (speed_p)
9673 *cost += 2 * extra_cost->alu.shift;
9674 *cost += extra_cost->alu.arith_shift;
9675 *cost += 2 * extra_cost->alu.logical;
9678 return true;
9680 return false;
9682 case MINUS:
9683 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9684 && (mode == SFmode || !TARGET_VFP_SINGLE))
9686 *cost = COSTS_N_INSNS (1);
9687 if (GET_CODE (XEXP (x, 0)) == MULT
9688 || GET_CODE (XEXP (x, 1)) == MULT)
9690 rtx mul_op0, mul_op1, sub_op;
9692 if (speed_p)
9693 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9695 if (GET_CODE (XEXP (x, 0)) == MULT)
9697 mul_op0 = XEXP (XEXP (x, 0), 0);
9698 mul_op1 = XEXP (XEXP (x, 0), 1);
9699 sub_op = XEXP (x, 1);
9701 else
9703 mul_op0 = XEXP (XEXP (x, 1), 0);
9704 mul_op1 = XEXP (XEXP (x, 1), 1);
9705 sub_op = XEXP (x, 0);
9708 /* The first operand of the multiply may be optionally
9709 negated. */
9710 if (GET_CODE (mul_op0) == NEG)
9711 mul_op0 = XEXP (mul_op0, 0);
9713 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9714 + rtx_cost (mul_op1, code, 0, speed_p)
9715 + rtx_cost (sub_op, code, 0, speed_p));
9717 return true;
9720 if (speed_p)
9721 *cost += extra_cost->fp[mode != SFmode].addsub;
9722 return false;
9725 if (mode == SImode)
9727 rtx shift_by_reg = NULL;
9728 rtx shift_op;
9729 rtx non_shift_op;
9731 *cost = COSTS_N_INSNS (1);
9733 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9734 if (shift_op == NULL)
9736 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9737 non_shift_op = XEXP (x, 0);
9739 else
9740 non_shift_op = XEXP (x, 1);
9742 if (shift_op != NULL)
9744 if (shift_by_reg != NULL)
9746 if (speed_p)
9747 *cost += extra_cost->alu.arith_shift_reg;
9748 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9750 else if (speed_p)
9751 *cost += extra_cost->alu.arith_shift;
9753 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9754 + rtx_cost (non_shift_op, code, 0, speed_p));
9755 return true;
9758 if (arm_arch_thumb2
9759 && GET_CODE (XEXP (x, 1)) == MULT)
9761 /* MLS. */
9762 if (speed_p)
9763 *cost += extra_cost->mult[0].add;
9764 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9765 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9766 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9767 return true;
9770 if (CONST_INT_P (XEXP (x, 0)))
9772 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9773 INTVAL (XEXP (x, 0)), NULL_RTX,
9774 NULL_RTX, 1, 0);
9775 *cost = COSTS_N_INSNS (insns);
9776 if (speed_p)
9777 *cost += insns * extra_cost->alu.arith;
9778 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9779 return true;
9782 return false;
9785 if (GET_MODE_CLASS (mode) == MODE_INT
9786 && GET_MODE_SIZE (mode) < 4)
9788 rtx shift_op, shift_reg;
9789 shift_reg = NULL;
9791 /* We check both sides of the MINUS for shifter operands since,
9792 unlike PLUS, it's not commutative. */
9794 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9795 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9797 /* Slightly disparage, as we might need to widen the result. */
9798 *cost = 1 + COSTS_N_INSNS (1);
9799 if (speed_p)
9800 *cost += extra_cost->alu.arith;
9802 if (CONST_INT_P (XEXP (x, 0)))
9804 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9805 return true;
9808 return false;
9811 if (mode == DImode)
9813 *cost = COSTS_N_INSNS (2);
9815 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9817 rtx op1 = XEXP (x, 1);
9819 if (speed_p)
9820 *cost += 2 * extra_cost->alu.arith;
9822 if (GET_CODE (op1) == ZERO_EXTEND)
9823 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9824 else
9825 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9826 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9827 0, speed_p);
9828 return true;
9830 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9832 if (speed_p)
9833 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9834 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9835 0, speed_p)
9836 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9837 return true;
9839 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9840 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9842 if (speed_p)
9843 *cost += (extra_cost->alu.arith
9844 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9845 ? extra_cost->alu.arith
9846 : extra_cost->alu.arith_shift));
9847 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9848 + rtx_cost (XEXP (XEXP (x, 1), 0),
9849 GET_CODE (XEXP (x, 1)), 0, speed_p));
9850 return true;
9853 if (speed_p)
9854 *cost += 2 * extra_cost->alu.arith;
9855 return false;
9858 /* Vector mode? */
9860 *cost = LIBCALL_COST (2);
9861 return false;
9863 case PLUS:
9864 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9865 && (mode == SFmode || !TARGET_VFP_SINGLE))
9867 *cost = COSTS_N_INSNS (1);
9868 if (GET_CODE (XEXP (x, 0)) == MULT)
9870 rtx mul_op0, mul_op1, add_op;
9872 if (speed_p)
9873 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9875 mul_op0 = XEXP (XEXP (x, 0), 0);
9876 mul_op1 = XEXP (XEXP (x, 0), 1);
9877 add_op = XEXP (x, 1);
9879 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9880 + rtx_cost (mul_op1, code, 0, speed_p)
9881 + rtx_cost (add_op, code, 0, speed_p));
9883 return true;
9886 if (speed_p)
9887 *cost += extra_cost->fp[mode != SFmode].addsub;
9888 return false;
9890 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9892 *cost = LIBCALL_COST (2);
9893 return false;
9896 /* Narrow modes can be synthesized in SImode, but the range
9897 of useful sub-operations is limited. Check for shift operations
9898 on one of the operands. Only left shifts can be used in the
9899 narrow modes. */
9900 if (GET_MODE_CLASS (mode) == MODE_INT
9901 && GET_MODE_SIZE (mode) < 4)
9903 rtx shift_op, shift_reg;
9904 shift_reg = NULL;
9906 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9908 if (CONST_INT_P (XEXP (x, 1)))
9910 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9911 INTVAL (XEXP (x, 1)), NULL_RTX,
9912 NULL_RTX, 1, 0);
9913 *cost = COSTS_N_INSNS (insns);
9914 if (speed_p)
9915 *cost += insns * extra_cost->alu.arith;
9916 /* Slightly penalize a narrow operation as the result may
9917 need widening. */
9918 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9919 return true;
9922 /* Slightly penalize a narrow operation as the result may
9923 need widening. */
9924 *cost = 1 + COSTS_N_INSNS (1);
9925 if (speed_p)
9926 *cost += extra_cost->alu.arith;
9928 return false;
9931 if (mode == SImode)
9933 rtx shift_op, shift_reg;
9935 *cost = COSTS_N_INSNS (1);
9936 if (TARGET_INT_SIMD
9937 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9938 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9940 /* UXTA[BH] or SXTA[BH]. */
9941 if (speed_p)
9942 *cost += extra_cost->alu.extend_arith;
9943 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9944 speed_p)
9945 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9946 return true;
9949 shift_reg = NULL;
9950 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9951 if (shift_op != NULL)
9953 if (shift_reg)
9955 if (speed_p)
9956 *cost += extra_cost->alu.arith_shift_reg;
9957 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9959 else if (speed_p)
9960 *cost += extra_cost->alu.arith_shift;
9962 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9963 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9964 return true;
9966 if (GET_CODE (XEXP (x, 0)) == MULT)
9968 rtx mul_op = XEXP (x, 0);
9970 *cost = COSTS_N_INSNS (1);
9972 if (TARGET_DSP_MULTIPLY
9973 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9974 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9975 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9976 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9977 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9978 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9979 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9980 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9981 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9982 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9983 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9984 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9985 == 16))))))
9987 /* SMLA[BT][BT]. */
9988 if (speed_p)
9989 *cost += extra_cost->mult[0].extend_add;
9990 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9991 SIGN_EXTEND, 0, speed_p)
9992 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9993 SIGN_EXTEND, 0, speed_p)
9994 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9995 return true;
9998 if (speed_p)
9999 *cost += extra_cost->mult[0].add;
10000 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10001 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10002 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10003 return true;
10005 if (CONST_INT_P (XEXP (x, 1)))
10007 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10008 INTVAL (XEXP (x, 1)), NULL_RTX,
10009 NULL_RTX, 1, 0);
10010 *cost = COSTS_N_INSNS (insns);
10011 if (speed_p)
10012 *cost += insns * extra_cost->alu.arith;
10013 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10014 return true;
10016 return false;
10019 if (mode == DImode)
10021 if (arm_arch3m
10022 && GET_CODE (XEXP (x, 0)) == MULT
10023 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10024 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10025 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10026 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10028 *cost = COSTS_N_INSNS (1);
10029 if (speed_p)
10030 *cost += extra_cost->mult[1].extend_add;
10031 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10032 ZERO_EXTEND, 0, speed_p)
10033 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10034 ZERO_EXTEND, 0, speed_p)
10035 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036 return true;
10039 *cost = COSTS_N_INSNS (2);
10041 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10042 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10044 if (speed_p)
10045 *cost += (extra_cost->alu.arith
10046 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10047 ? extra_cost->alu.arith
10048 : extra_cost->alu.arith_shift));
10050 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10051 speed_p)
10052 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10053 return true;
10056 if (speed_p)
10057 *cost += 2 * extra_cost->alu.arith;
10058 return false;
10061 /* Vector mode? */
10062 *cost = LIBCALL_COST (2);
10063 return false;
10064 case IOR:
10065 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10067 *cost = COSTS_N_INSNS (1);
10068 if (speed_p)
10069 *cost += extra_cost->alu.rev;
10071 return true;
10073 /* Fall through. */
10074 case AND: case XOR:
10075 if (mode == SImode)
10077 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10078 rtx op0 = XEXP (x, 0);
10079 rtx shift_op, shift_reg;
10081 *cost = COSTS_N_INSNS (1);
10083 if (subcode == NOT
10084 && (code == AND
10085 || (code == IOR && TARGET_THUMB2)))
10086 op0 = XEXP (op0, 0);
10088 shift_reg = NULL;
10089 shift_op = shifter_op_p (op0, &shift_reg);
10090 if (shift_op != NULL)
10092 if (shift_reg)
10094 if (speed_p)
10095 *cost += extra_cost->alu.log_shift_reg;
10096 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10098 else if (speed_p)
10099 *cost += extra_cost->alu.log_shift;
10101 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10102 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10103 return true;
10106 if (CONST_INT_P (XEXP (x, 1)))
10108 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10109 INTVAL (XEXP (x, 1)), NULL_RTX,
10110 NULL_RTX, 1, 0);
10112 *cost = COSTS_N_INSNS (insns);
10113 if (speed_p)
10114 *cost += insns * extra_cost->alu.logical;
10115 *cost += rtx_cost (op0, code, 0, speed_p);
10116 return true;
10119 if (speed_p)
10120 *cost += extra_cost->alu.logical;
10121 *cost += (rtx_cost (op0, code, 0, speed_p)
10122 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10123 return true;
10126 if (mode == DImode)
10128 rtx op0 = XEXP (x, 0);
10129 enum rtx_code subcode = GET_CODE (op0);
10131 *cost = COSTS_N_INSNS (2);
10133 if (subcode == NOT
10134 && (code == AND
10135 || (code == IOR && TARGET_THUMB2)))
10136 op0 = XEXP (op0, 0);
10138 if (GET_CODE (op0) == ZERO_EXTEND)
10140 if (speed_p)
10141 *cost += 2 * extra_cost->alu.logical;
10143 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10144 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10145 return true;
10147 else if (GET_CODE (op0) == SIGN_EXTEND)
10149 if (speed_p)
10150 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10152 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10153 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10154 return true;
10157 if (speed_p)
10158 *cost += 2 * extra_cost->alu.logical;
10160 return true;
10162 /* Vector mode? */
10164 *cost = LIBCALL_COST (2);
10165 return false;
10167 case MULT:
10168 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10169 && (mode == SFmode || !TARGET_VFP_SINGLE))
10171 rtx op0 = XEXP (x, 0);
10173 *cost = COSTS_N_INSNS (1);
10175 if (GET_CODE (op0) == NEG)
10176 op0 = XEXP (op0, 0);
10178 if (speed_p)
10179 *cost += extra_cost->fp[mode != SFmode].mult;
10181 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10182 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10183 return true;
10185 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10187 *cost = LIBCALL_COST (2);
10188 return false;
10191 if (mode == SImode)
10193 *cost = COSTS_N_INSNS (1);
10194 if (TARGET_DSP_MULTIPLY
10195 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10196 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10197 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10198 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10199 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10200 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10201 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10202 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10203 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10204 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10205 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10206 && (INTVAL (XEXP (XEXP (x, 1), 1))
10207 == 16))))))
10209 /* SMUL[TB][TB]. */
10210 if (speed_p)
10211 *cost += extra_cost->mult[0].extend;
10212 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10213 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10214 return true;
10216 if (speed_p)
10217 *cost += extra_cost->mult[0].simple;
10218 return false;
10221 if (mode == DImode)
10223 if (arm_arch3m
10224 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10225 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10226 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10227 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10229 *cost = COSTS_N_INSNS (1);
10230 if (speed_p)
10231 *cost += extra_cost->mult[1].extend;
10232 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10233 ZERO_EXTEND, 0, speed_p)
10234 + rtx_cost (XEXP (XEXP (x, 1), 0),
10235 ZERO_EXTEND, 0, speed_p));
10236 return true;
10239 *cost = LIBCALL_COST (2);
10240 return false;
10243 /* Vector mode? */
10244 *cost = LIBCALL_COST (2);
10245 return false;
10247 case NEG:
10248 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10249 && (mode == SFmode || !TARGET_VFP_SINGLE))
10251 *cost = COSTS_N_INSNS (1);
10252 if (speed_p)
10253 *cost += extra_cost->fp[mode != SFmode].neg;
10255 return false;
10257 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10259 *cost = LIBCALL_COST (1);
10260 return false;
10263 if (mode == SImode)
10265 if (GET_CODE (XEXP (x, 0)) == ABS)
10267 *cost = COSTS_N_INSNS (2);
10268 /* Assume the non-flag-changing variant. */
10269 if (speed_p)
10270 *cost += (extra_cost->alu.log_shift
10271 + extra_cost->alu.arith_shift);
10272 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10273 return true;
10276 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10277 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10279 *cost = COSTS_N_INSNS (2);
10280 /* No extra cost for MOV imm and MVN imm. */
10281 /* If the comparison op is using the flags, there's no further
10282 cost, otherwise we need to add the cost of the comparison. */
10283 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10284 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10285 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10287 *cost += (COSTS_N_INSNS (1)
10288 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10289 speed_p)
10290 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10291 speed_p));
10292 if (speed_p)
10293 *cost += extra_cost->alu.arith;
10295 return true;
10297 *cost = COSTS_N_INSNS (1);
10298 if (speed_p)
10299 *cost += extra_cost->alu.arith;
10300 return false;
10303 if (GET_MODE_CLASS (mode) == MODE_INT
10304 && GET_MODE_SIZE (mode) < 4)
10306 /* Slightly disparage, as we might need an extend operation. */
10307 *cost = 1 + COSTS_N_INSNS (1);
10308 if (speed_p)
10309 *cost += extra_cost->alu.arith;
10310 return false;
10313 if (mode == DImode)
10315 *cost = COSTS_N_INSNS (2);
10316 if (speed_p)
10317 *cost += 2 * extra_cost->alu.arith;
10318 return false;
10321 /* Vector mode? */
10322 *cost = LIBCALL_COST (1);
10323 return false;
10325 case NOT:
10326 if (mode == SImode)
10328 rtx shift_op;
10329 rtx shift_reg = NULL;
10331 *cost = COSTS_N_INSNS (1);
10332 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10334 if (shift_op)
10336 if (shift_reg != NULL)
10338 if (speed_p)
10339 *cost += extra_cost->alu.log_shift_reg;
10340 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10342 else if (speed_p)
10343 *cost += extra_cost->alu.log_shift;
10344 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10345 return true;
10348 if (speed_p)
10349 *cost += extra_cost->alu.logical;
10350 return false;
10352 if (mode == DImode)
10354 *cost = COSTS_N_INSNS (2);
10355 return false;
10358 /* Vector mode? */
10360 *cost += LIBCALL_COST (1);
10361 return false;
10363 case IF_THEN_ELSE:
10365 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10367 *cost = COSTS_N_INSNS (4);
10368 return true;
10370 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10371 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10373 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10374 /* Assume that if one arm of the if_then_else is a register,
10375 that it will be tied with the result and eliminate the
10376 conditional insn. */
10377 if (REG_P (XEXP (x, 1)))
10378 *cost += op2cost;
10379 else if (REG_P (XEXP (x, 2)))
10380 *cost += op1cost;
10381 else
10383 if (speed_p)
10385 if (extra_cost->alu.non_exec_costs_exec)
10386 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10387 else
10388 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10390 else
10391 *cost += op1cost + op2cost;
10394 return true;
10396 case COMPARE:
10397 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10398 *cost = 0;
10399 else
10401 machine_mode op0mode;
10402 /* We'll mostly assume that the cost of a compare is the cost of the
10403 LHS. However, there are some notable exceptions. */
10405 /* Floating point compares are never done as side-effects. */
10406 op0mode = GET_MODE (XEXP (x, 0));
10407 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10408 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10410 *cost = COSTS_N_INSNS (1);
10411 if (speed_p)
10412 *cost += extra_cost->fp[op0mode != SFmode].compare;
10414 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10416 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10417 return true;
10420 return false;
10422 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10424 *cost = LIBCALL_COST (2);
10425 return false;
10428 /* DImode compares normally take two insns. */
10429 if (op0mode == DImode)
10431 *cost = COSTS_N_INSNS (2);
10432 if (speed_p)
10433 *cost += 2 * extra_cost->alu.arith;
10434 return false;
10437 if (op0mode == SImode)
10439 rtx shift_op;
10440 rtx shift_reg;
10442 if (XEXP (x, 1) == const0_rtx
10443 && !(REG_P (XEXP (x, 0))
10444 || (GET_CODE (XEXP (x, 0)) == SUBREG
10445 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10447 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10449 /* Multiply operations that set the flags are often
10450 significantly more expensive. */
10451 if (speed_p
10452 && GET_CODE (XEXP (x, 0)) == MULT
10453 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10454 *cost += extra_cost->mult[0].flag_setting;
10456 if (speed_p
10457 && GET_CODE (XEXP (x, 0)) == PLUS
10458 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10459 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10460 0), 1), mode))
10461 *cost += extra_cost->mult[0].flag_setting;
10462 return true;
10465 shift_reg = NULL;
10466 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10467 if (shift_op != NULL)
10469 *cost = COSTS_N_INSNS (1);
10470 if (shift_reg != NULL)
10472 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10473 if (speed_p)
10474 *cost += extra_cost->alu.arith_shift_reg;
10476 else if (speed_p)
10477 *cost += extra_cost->alu.arith_shift;
10478 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10479 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10480 return true;
10483 *cost = COSTS_N_INSNS (1);
10484 if (speed_p)
10485 *cost += extra_cost->alu.arith;
10486 if (CONST_INT_P (XEXP (x, 1))
10487 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10489 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10490 return true;
10492 return false;
10495 /* Vector mode? */
10497 *cost = LIBCALL_COST (2);
10498 return false;
10500 return true;
10502 case EQ:
10503 case NE:
10504 case LT:
10505 case LE:
10506 case GT:
10507 case GE:
10508 case LTU:
10509 case LEU:
10510 case GEU:
10511 case GTU:
10512 case ORDERED:
10513 case UNORDERED:
10514 case UNEQ:
10515 case UNLE:
10516 case UNLT:
10517 case UNGE:
10518 case UNGT:
10519 case LTGT:
10520 if (outer_code == SET)
10522 /* Is it a store-flag operation? */
10523 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10524 && XEXP (x, 1) == const0_rtx)
10526 /* Thumb also needs an IT insn. */
10527 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10528 return true;
10530 if (XEXP (x, 1) == const0_rtx)
10532 switch (code)
10534 case LT:
10535 /* LSR Rd, Rn, #31. */
10536 *cost = COSTS_N_INSNS (1);
10537 if (speed_p)
10538 *cost += extra_cost->alu.shift;
10539 break;
10541 case EQ:
10542 /* RSBS T1, Rn, #0
10543 ADC Rd, Rn, T1. */
10545 case NE:
10546 /* SUBS T1, Rn, #1
10547 SBC Rd, Rn, T1. */
10548 *cost = COSTS_N_INSNS (2);
10549 break;
10551 case LE:
10552 /* RSBS T1, Rn, Rn, LSR #31
10553 ADC Rd, Rn, T1. */
10554 *cost = COSTS_N_INSNS (2);
10555 if (speed_p)
10556 *cost += extra_cost->alu.arith_shift;
10557 break;
10559 case GT:
10560 /* RSB Rd, Rn, Rn, ASR #1
10561 LSR Rd, Rd, #31. */
10562 *cost = COSTS_N_INSNS (2);
10563 if (speed_p)
10564 *cost += (extra_cost->alu.arith_shift
10565 + extra_cost->alu.shift);
10566 break;
10568 case GE:
10569 /* ASR Rd, Rn, #31
10570 ADD Rd, Rn, #1. */
10571 *cost = COSTS_N_INSNS (2);
10572 if (speed_p)
10573 *cost += extra_cost->alu.shift;
10574 break;
10576 default:
10577 /* Remaining cases are either meaningless or would take
10578 three insns anyway. */
10579 *cost = COSTS_N_INSNS (3);
10580 break;
10582 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10583 return true;
10585 else
10587 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10588 if (CONST_INT_P (XEXP (x, 1))
10589 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10591 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10592 return true;
10595 return false;
10598 /* Not directly inside a set. If it involves the condition code
10599 register it must be the condition for a branch, cond_exec or
10600 I_T_E operation. Since the comparison is performed elsewhere
10601 this is just the control part which has no additional
10602 cost. */
10603 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10604 && XEXP (x, 1) == const0_rtx)
10606 *cost = 0;
10607 return true;
10609 return false;
10611 case ABS:
10612 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10613 && (mode == SFmode || !TARGET_VFP_SINGLE))
10615 *cost = COSTS_N_INSNS (1);
10616 if (speed_p)
10617 *cost += extra_cost->fp[mode != SFmode].neg;
10619 return false;
10621 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10623 *cost = LIBCALL_COST (1);
10624 return false;
10627 if (mode == SImode)
10629 *cost = COSTS_N_INSNS (1);
10630 if (speed_p)
10631 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10632 return false;
10634 /* Vector mode? */
10635 *cost = LIBCALL_COST (1);
10636 return false;
10638 case SIGN_EXTEND:
10639 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10640 && MEM_P (XEXP (x, 0)))
10642 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10644 if (mode == DImode)
10645 *cost += COSTS_N_INSNS (1);
10647 if (!speed_p)
10648 return true;
10650 if (GET_MODE (XEXP (x, 0)) == SImode)
10651 *cost += extra_cost->ldst.load;
10652 else
10653 *cost += extra_cost->ldst.load_sign_extend;
10655 if (mode == DImode)
10656 *cost += extra_cost->alu.shift;
10658 return true;
10661 /* Widening from less than 32-bits requires an extend operation. */
10662 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10664 /* We have SXTB/SXTH. */
10665 *cost = COSTS_N_INSNS (1);
10666 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10667 if (speed_p)
10668 *cost += extra_cost->alu.extend;
10670 else if (GET_MODE (XEXP (x, 0)) != SImode)
10672 /* Needs two shifts. */
10673 *cost = COSTS_N_INSNS (2);
10674 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10675 if (speed_p)
10676 *cost += 2 * extra_cost->alu.shift;
10679 /* Widening beyond 32-bits requires one more insn. */
10680 if (mode == DImode)
10682 *cost += COSTS_N_INSNS (1);
10683 if (speed_p)
10684 *cost += extra_cost->alu.shift;
10687 return true;
10689 case ZERO_EXTEND:
10690 if ((arm_arch4
10691 || GET_MODE (XEXP (x, 0)) == SImode
10692 || GET_MODE (XEXP (x, 0)) == QImode)
10693 && MEM_P (XEXP (x, 0)))
10695 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10697 if (mode == DImode)
10698 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10700 return true;
10703 /* Widening from less than 32-bits requires an extend operation. */
10704 if (GET_MODE (XEXP (x, 0)) == QImode)
10706 /* UXTB can be a shorter instruction in Thumb2, but it might
10707 be slower than the AND Rd, Rn, #255 alternative. When
10708 optimizing for speed it should never be slower to use
10709 AND, and we don't really model 16-bit vs 32-bit insns
10710 here. */
10711 *cost = COSTS_N_INSNS (1);
10712 if (speed_p)
10713 *cost += extra_cost->alu.logical;
10715 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10717 /* We have UXTB/UXTH. */
10718 *cost = COSTS_N_INSNS (1);
10719 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10720 if (speed_p)
10721 *cost += extra_cost->alu.extend;
10723 else if (GET_MODE (XEXP (x, 0)) != SImode)
10725 /* Needs two shifts. It's marginally preferable to use
10726 shifts rather than two BIC instructions as the second
10727 shift may merge with a subsequent insn as a shifter
10728 op. */
10729 *cost = COSTS_N_INSNS (2);
10730 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10731 if (speed_p)
10732 *cost += 2 * extra_cost->alu.shift;
10734 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10735 *cost = COSTS_N_INSNS (1);
10737 /* Widening beyond 32-bits requires one more insn. */
10738 if (mode == DImode)
10740 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10743 return true;
10745 case CONST_INT:
10746 *cost = 0;
10747 /* CONST_INT has no mode, so we cannot tell for sure how many
10748 insns are really going to be needed. The best we can do is
10749 look at the value passed. If it fits in SImode, then assume
10750 that's the mode it will be used for. Otherwise assume it
10751 will be used in DImode. */
10752 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10753 mode = SImode;
10754 else
10755 mode = DImode;
10757 /* Avoid blowing up in arm_gen_constant (). */
10758 if (!(outer_code == PLUS
10759 || outer_code == AND
10760 || outer_code == IOR
10761 || outer_code == XOR
10762 || outer_code == MINUS))
10763 outer_code = SET;
10765 const_int_cost:
10766 if (mode == SImode)
10768 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10769 INTVAL (x), NULL, NULL,
10770 0, 0));
10771 /* Extra costs? */
10773 else
10775 *cost += COSTS_N_INSNS (arm_gen_constant
10776 (outer_code, SImode, NULL,
10777 trunc_int_for_mode (INTVAL (x), SImode),
10778 NULL, NULL, 0, 0)
10779 + arm_gen_constant (outer_code, SImode, NULL,
10780 INTVAL (x) >> 32, NULL,
10781 NULL, 0, 0));
10782 /* Extra costs? */
10785 return true;
10787 case CONST:
10788 case LABEL_REF:
10789 case SYMBOL_REF:
10790 if (speed_p)
10792 if (arm_arch_thumb2 && !flag_pic)
10793 *cost = COSTS_N_INSNS (2);
10794 else
10795 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10797 else
10798 *cost = COSTS_N_INSNS (2);
10800 if (flag_pic)
10802 *cost += COSTS_N_INSNS (1);
10803 if (speed_p)
10804 *cost += extra_cost->alu.arith;
10807 return true;
10809 case CONST_FIXED:
10810 *cost = COSTS_N_INSNS (4);
10811 /* Fixme. */
10812 return true;
10814 case CONST_DOUBLE:
10815 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10816 && (mode == SFmode || !TARGET_VFP_SINGLE))
10818 if (vfp3_const_double_rtx (x))
10820 *cost = COSTS_N_INSNS (1);
10821 if (speed_p)
10822 *cost += extra_cost->fp[mode == DFmode].fpconst;
10823 return true;
10826 if (speed_p)
10828 *cost = COSTS_N_INSNS (1);
10829 if (mode == DFmode)
10830 *cost += extra_cost->ldst.loadd;
10831 else
10832 *cost += extra_cost->ldst.loadf;
10834 else
10835 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10837 return true;
10839 *cost = COSTS_N_INSNS (4);
10840 return true;
10842 case CONST_VECTOR:
10843 /* Fixme. */
10844 if (TARGET_NEON
10845 && TARGET_HARD_FLOAT
10846 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10847 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10848 *cost = COSTS_N_INSNS (1);
10849 else
10850 *cost = COSTS_N_INSNS (4);
10851 return true;
10853 case HIGH:
10854 case LO_SUM:
10855 *cost = COSTS_N_INSNS (1);
10856 /* When optimizing for size, we prefer constant pool entries to
10857 MOVW/MOVT pairs, so bump the cost of these slightly. */
10858 if (!speed_p)
10859 *cost += 1;
10860 return true;
10862 case CLZ:
10863 *cost = COSTS_N_INSNS (1);
10864 if (speed_p)
10865 *cost += extra_cost->alu.clz;
10866 return false;
10868 case SMIN:
10869 if (XEXP (x, 1) == const0_rtx)
10871 *cost = COSTS_N_INSNS (1);
10872 if (speed_p)
10873 *cost += extra_cost->alu.log_shift;
10874 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10875 return true;
10877 /* Fall through. */
10878 case SMAX:
10879 case UMIN:
10880 case UMAX:
10881 *cost = COSTS_N_INSNS (2);
10882 return false;
10884 case TRUNCATE:
10885 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10886 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10887 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10888 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10889 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10890 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10891 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10892 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10893 == ZERO_EXTEND))))
10895 *cost = COSTS_N_INSNS (1);
10896 if (speed_p)
10897 *cost += extra_cost->mult[1].extend;
10898 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10899 speed_p)
10900 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10901 0, speed_p));
10902 return true;
10904 *cost = LIBCALL_COST (1);
10905 return false;
10907 case UNSPEC:
10908 return arm_unspec_cost (x, outer_code, speed_p, cost);
10910 case PC:
10911 /* Reading the PC is like reading any other register. Writing it
10912 is more expensive, but we take that into account elsewhere. */
10913 *cost = 0;
10914 return true;
10916 case ZERO_EXTRACT:
10917 /* TODO: Simple zero_extract of bottom bits using AND. */
10918 /* Fall through. */
10919 case SIGN_EXTRACT:
10920 if (arm_arch6
10921 && mode == SImode
10922 && CONST_INT_P (XEXP (x, 1))
10923 && CONST_INT_P (XEXP (x, 2)))
10925 *cost = COSTS_N_INSNS (1);
10926 if (speed_p)
10927 *cost += extra_cost->alu.bfx;
10928 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10929 return true;
10931 /* Without UBFX/SBFX, need to resort to shift operations. */
10932 *cost = COSTS_N_INSNS (2);
10933 if (speed_p)
10934 *cost += 2 * extra_cost->alu.shift;
10935 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10936 return true;
10938 case FLOAT_EXTEND:
10939 if (TARGET_HARD_FLOAT)
10941 *cost = COSTS_N_INSNS (1);
10942 if (speed_p)
10943 *cost += extra_cost->fp[mode == DFmode].widen;
10944 if (!TARGET_FPU_ARMV8
10945 && GET_MODE (XEXP (x, 0)) == HFmode)
10947 /* Pre v8, widening HF->DF is a two-step process, first
10948 widening to SFmode. */
10949 *cost += COSTS_N_INSNS (1);
10950 if (speed_p)
10951 *cost += extra_cost->fp[0].widen;
10953 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10954 return true;
10957 *cost = LIBCALL_COST (1);
10958 return false;
10960 case FLOAT_TRUNCATE:
10961 if (TARGET_HARD_FLOAT)
10963 *cost = COSTS_N_INSNS (1);
10964 if (speed_p)
10965 *cost += extra_cost->fp[mode == DFmode].narrow;
10966 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10967 return true;
10968 /* Vector modes? */
10970 *cost = LIBCALL_COST (1);
10971 return false;
10973 case FMA:
10974 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10976 rtx op0 = XEXP (x, 0);
10977 rtx op1 = XEXP (x, 1);
10978 rtx op2 = XEXP (x, 2);
10980 *cost = COSTS_N_INSNS (1);
10982 /* vfms or vfnma. */
10983 if (GET_CODE (op0) == NEG)
10984 op0 = XEXP (op0, 0);
10986 /* vfnms or vfnma. */
10987 if (GET_CODE (op2) == NEG)
10988 op2 = XEXP (op2, 0);
10990 *cost += rtx_cost (op0, FMA, 0, speed_p);
10991 *cost += rtx_cost (op1, FMA, 1, speed_p);
10992 *cost += rtx_cost (op2, FMA, 2, speed_p);
10994 if (speed_p)
10995 *cost += extra_cost->fp[mode ==DFmode].fma;
10997 return true;
11000 *cost = LIBCALL_COST (3);
11001 return false;
11003 case FIX:
11004 case UNSIGNED_FIX:
11005 if (TARGET_HARD_FLOAT)
11007 if (GET_MODE_CLASS (mode) == MODE_INT)
11009 *cost = COSTS_N_INSNS (1);
11010 if (speed_p)
11011 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11012 /* Strip of the 'cost' of rounding towards zero. */
11013 if (GET_CODE (XEXP (x, 0)) == FIX)
11014 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11015 else
11016 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11017 /* ??? Increase the cost to deal with transferring from
11018 FP -> CORE registers? */
11019 return true;
11021 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11022 && TARGET_FPU_ARMV8)
11024 *cost = COSTS_N_INSNS (1);
11025 if (speed_p)
11026 *cost += extra_cost->fp[mode == DFmode].roundint;
11027 return false;
11029 /* Vector costs? */
11031 *cost = LIBCALL_COST (1);
11032 return false;
11034 case FLOAT:
11035 case UNSIGNED_FLOAT:
11036 if (TARGET_HARD_FLOAT)
11038 /* ??? Increase the cost to deal with transferring from CORE
11039 -> FP registers? */
11040 *cost = COSTS_N_INSNS (1);
11041 if (speed_p)
11042 *cost += extra_cost->fp[mode == DFmode].fromint;
11043 return false;
11045 *cost = LIBCALL_COST (1);
11046 return false;
11048 case CALL:
11049 *cost = COSTS_N_INSNS (1);
11050 return true;
11052 case ASM_OPERANDS:
11054 /* Just a guess. Guess number of instructions in the asm
11055 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11056 though (see PR60663). */
11057 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11058 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11060 *cost = COSTS_N_INSNS (asm_length + num_operands);
11061 return true;
11063 default:
11064 if (mode != VOIDmode)
11065 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11066 else
11067 *cost = COSTS_N_INSNS (4); /* Who knows? */
11068 return false;
11072 #undef HANDLE_NARROW_SHIFT_ARITH
11074 /* RTX costs when optimizing for size. */
11075 static bool
11076 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11077 int *total, bool speed)
11079 bool result;
11081 if (TARGET_OLD_RTX_COSTS
11082 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11084 /* Old way. (Deprecated.) */
11085 if (!speed)
11086 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11087 (enum rtx_code) outer_code, total);
11088 else
11089 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11090 (enum rtx_code) outer_code, total,
11091 speed);
11093 else
11095 /* New way. */
11096 if (current_tune->insn_extra_cost)
11097 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11098 (enum rtx_code) outer_code,
11099 current_tune->insn_extra_cost,
11100 total, speed);
11101 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11102 && current_tune->insn_extra_cost != NULL */
11103 else
11104 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11105 (enum rtx_code) outer_code,
11106 &generic_extra_costs, total, speed);
11109 if (dump_file && (dump_flags & TDF_DETAILS))
11111 print_rtl_single (dump_file, x);
11112 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11113 *total, result ? "final" : "partial");
11115 return result;
11118 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11119 supported on any "slowmul" cores, so it can be ignored. */
11121 static bool
11122 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11123 int *total, bool speed)
11125 machine_mode mode = GET_MODE (x);
11127 if (TARGET_THUMB)
11129 *total = thumb1_rtx_costs (x, code, outer_code);
11130 return true;
11133 switch (code)
11135 case MULT:
11136 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11137 || mode == DImode)
11139 *total = COSTS_N_INSNS (20);
11140 return false;
11143 if (CONST_INT_P (XEXP (x, 1)))
11145 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11146 & (unsigned HOST_WIDE_INT) 0xffffffff);
11147 int cost, const_ok = const_ok_for_arm (i);
11148 int j, booth_unit_size;
11150 /* Tune as appropriate. */
11151 cost = const_ok ? 4 : 8;
11152 booth_unit_size = 2;
11153 for (j = 0; i && j < 32; j += booth_unit_size)
11155 i >>= booth_unit_size;
11156 cost++;
11159 *total = COSTS_N_INSNS (cost);
11160 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11161 return true;
11164 *total = COSTS_N_INSNS (20);
11165 return false;
11167 default:
11168 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11173 /* RTX cost for cores with a fast multiply unit (M variants). */
11175 static bool
11176 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11177 int *total, bool speed)
11179 machine_mode mode = GET_MODE (x);
11181 if (TARGET_THUMB1)
11183 *total = thumb1_rtx_costs (x, code, outer_code);
11184 return true;
11187 /* ??? should thumb2 use different costs? */
11188 switch (code)
11190 case MULT:
11191 /* There is no point basing this on the tuning, since it is always the
11192 fast variant if it exists at all. */
11193 if (mode == DImode
11194 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11195 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11196 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11198 *total = COSTS_N_INSNS(2);
11199 return false;
11203 if (mode == DImode)
11205 *total = COSTS_N_INSNS (5);
11206 return false;
11209 if (CONST_INT_P (XEXP (x, 1)))
11211 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11212 & (unsigned HOST_WIDE_INT) 0xffffffff);
11213 int cost, const_ok = const_ok_for_arm (i);
11214 int j, booth_unit_size;
11216 /* Tune as appropriate. */
11217 cost = const_ok ? 4 : 8;
11218 booth_unit_size = 8;
11219 for (j = 0; i && j < 32; j += booth_unit_size)
11221 i >>= booth_unit_size;
11222 cost++;
11225 *total = COSTS_N_INSNS(cost);
11226 return false;
11229 if (mode == SImode)
11231 *total = COSTS_N_INSNS (4);
11232 return false;
11235 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11237 if (TARGET_HARD_FLOAT
11238 && (mode == SFmode
11239 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11241 *total = COSTS_N_INSNS (1);
11242 return false;
11246 /* Requires a lib call */
11247 *total = COSTS_N_INSNS (20);
11248 return false;
11250 default:
11251 return arm_rtx_costs_1 (x, outer_code, total, speed);
11256 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11257 so it can be ignored. */
11259 static bool
11260 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11261 int *total, bool speed)
11263 machine_mode mode = GET_MODE (x);
11265 if (TARGET_THUMB)
11267 *total = thumb1_rtx_costs (x, code, outer_code);
11268 return true;
11271 switch (code)
11273 case COMPARE:
11274 if (GET_CODE (XEXP (x, 0)) != MULT)
11275 return arm_rtx_costs_1 (x, outer_code, total, speed);
11277 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11278 will stall until the multiplication is complete. */
11279 *total = COSTS_N_INSNS (3);
11280 return false;
11282 case MULT:
11283 /* There is no point basing this on the tuning, since it is always the
11284 fast variant if it exists at all. */
11285 if (mode == DImode
11286 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11287 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11288 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11290 *total = COSTS_N_INSNS (2);
11291 return false;
11295 if (mode == DImode)
11297 *total = COSTS_N_INSNS (5);
11298 return false;
11301 if (CONST_INT_P (XEXP (x, 1)))
11303 /* If operand 1 is a constant we can more accurately
11304 calculate the cost of the multiply. The multiplier can
11305 retire 15 bits on the first cycle and a further 12 on the
11306 second. We do, of course, have to load the constant into
11307 a register first. */
11308 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11309 /* There's a general overhead of one cycle. */
11310 int cost = 1;
11311 unsigned HOST_WIDE_INT masked_const;
11313 if (i & 0x80000000)
11314 i = ~i;
11316 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11318 masked_const = i & 0xffff8000;
11319 if (masked_const != 0)
11321 cost++;
11322 masked_const = i & 0xf8000000;
11323 if (masked_const != 0)
11324 cost++;
11326 *total = COSTS_N_INSNS (cost);
11327 return false;
11330 if (mode == SImode)
11332 *total = COSTS_N_INSNS (3);
11333 return false;
11336 /* Requires a lib call */
11337 *total = COSTS_N_INSNS (20);
11338 return false;
11340 default:
11341 return arm_rtx_costs_1 (x, outer_code, total, speed);
11346 /* RTX costs for 9e (and later) cores. */
11348 static bool
11349 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11350 int *total, bool speed)
11352 machine_mode mode = GET_MODE (x);
11354 if (TARGET_THUMB1)
11356 switch (code)
11358 case MULT:
11359 *total = COSTS_N_INSNS (3);
11360 return true;
11362 default:
11363 *total = thumb1_rtx_costs (x, code, outer_code);
11364 return true;
11368 switch (code)
11370 case MULT:
11371 /* There is no point basing this on the tuning, since it is always the
11372 fast variant if it exists at all. */
11373 if (mode == DImode
11374 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11375 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11376 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11378 *total = COSTS_N_INSNS (2);
11379 return false;
11383 if (mode == DImode)
11385 *total = COSTS_N_INSNS (5);
11386 return false;
11389 if (mode == SImode)
11391 *total = COSTS_N_INSNS (2);
11392 return false;
11395 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11397 if (TARGET_HARD_FLOAT
11398 && (mode == SFmode
11399 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11401 *total = COSTS_N_INSNS (1);
11402 return false;
11406 *total = COSTS_N_INSNS (20);
11407 return false;
11409 default:
11410 return arm_rtx_costs_1 (x, outer_code, total, speed);
11413 /* All address computations that can be done are free, but rtx cost returns
11414 the same for practically all of them. So we weight the different types
11415 of address here in the order (most pref first):
11416 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11417 static inline int
11418 arm_arm_address_cost (rtx x)
11420 enum rtx_code c = GET_CODE (x);
11422 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11423 return 0;
11424 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11425 return 10;
11427 if (c == PLUS)
11429 if (CONST_INT_P (XEXP (x, 1)))
11430 return 2;
11432 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11433 return 3;
11435 return 4;
11438 return 6;
11441 static inline int
11442 arm_thumb_address_cost (rtx x)
11444 enum rtx_code c = GET_CODE (x);
11446 if (c == REG)
11447 return 1;
11448 if (c == PLUS
11449 && REG_P (XEXP (x, 0))
11450 && CONST_INT_P (XEXP (x, 1)))
11451 return 1;
11453 return 2;
11456 static int
11457 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11458 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11460 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11463 /* Adjust cost hook for XScale. */
11464 static bool
11465 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11467 /* Some true dependencies can have a higher cost depending
11468 on precisely how certain input operands are used. */
11469 if (REG_NOTE_KIND(link) == 0
11470 && recog_memoized (insn) >= 0
11471 && recog_memoized (dep) >= 0)
11473 int shift_opnum = get_attr_shift (insn);
11474 enum attr_type attr_type = get_attr_type (dep);
11476 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11477 operand for INSN. If we have a shifted input operand and the
11478 instruction we depend on is another ALU instruction, then we may
11479 have to account for an additional stall. */
11480 if (shift_opnum != 0
11481 && (attr_type == TYPE_ALU_SHIFT_IMM
11482 || attr_type == TYPE_ALUS_SHIFT_IMM
11483 || attr_type == TYPE_LOGIC_SHIFT_IMM
11484 || attr_type == TYPE_LOGICS_SHIFT_IMM
11485 || attr_type == TYPE_ALU_SHIFT_REG
11486 || attr_type == TYPE_ALUS_SHIFT_REG
11487 || attr_type == TYPE_LOGIC_SHIFT_REG
11488 || attr_type == TYPE_LOGICS_SHIFT_REG
11489 || attr_type == TYPE_MOV_SHIFT
11490 || attr_type == TYPE_MVN_SHIFT
11491 || attr_type == TYPE_MOV_SHIFT_REG
11492 || attr_type == TYPE_MVN_SHIFT_REG))
11494 rtx shifted_operand;
11495 int opno;
11497 /* Get the shifted operand. */
11498 extract_insn (insn);
11499 shifted_operand = recog_data.operand[shift_opnum];
11501 /* Iterate over all the operands in DEP. If we write an operand
11502 that overlaps with SHIFTED_OPERAND, then we have increase the
11503 cost of this dependency. */
11504 extract_insn (dep);
11505 preprocess_constraints (dep);
11506 for (opno = 0; opno < recog_data.n_operands; opno++)
11508 /* We can ignore strict inputs. */
11509 if (recog_data.operand_type[opno] == OP_IN)
11510 continue;
11512 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11513 shifted_operand))
11515 *cost = 2;
11516 return false;
11521 return true;
11524 /* Adjust cost hook for Cortex A9. */
11525 static bool
11526 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11528 switch (REG_NOTE_KIND (link))
11530 case REG_DEP_ANTI:
11531 *cost = 0;
11532 return false;
11534 case REG_DEP_TRUE:
11535 case REG_DEP_OUTPUT:
11536 if (recog_memoized (insn) >= 0
11537 && recog_memoized (dep) >= 0)
11539 if (GET_CODE (PATTERN (insn)) == SET)
11541 if (GET_MODE_CLASS
11542 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11543 || GET_MODE_CLASS
11544 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11546 enum attr_type attr_type_insn = get_attr_type (insn);
11547 enum attr_type attr_type_dep = get_attr_type (dep);
11549 /* By default all dependencies of the form
11550 s0 = s0 <op> s1
11551 s0 = s0 <op> s2
11552 have an extra latency of 1 cycle because
11553 of the input and output dependency in this
11554 case. However this gets modeled as an true
11555 dependency and hence all these checks. */
11556 if (REG_P (SET_DEST (PATTERN (insn)))
11557 && REG_P (SET_DEST (PATTERN (dep)))
11558 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11559 SET_DEST (PATTERN (dep))))
11561 /* FMACS is a special case where the dependent
11562 instruction can be issued 3 cycles before
11563 the normal latency in case of an output
11564 dependency. */
11565 if ((attr_type_insn == TYPE_FMACS
11566 || attr_type_insn == TYPE_FMACD)
11567 && (attr_type_dep == TYPE_FMACS
11568 || attr_type_dep == TYPE_FMACD))
11570 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11571 *cost = insn_default_latency (dep) - 3;
11572 else
11573 *cost = insn_default_latency (dep);
11574 return false;
11576 else
11578 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11579 *cost = insn_default_latency (dep) + 1;
11580 else
11581 *cost = insn_default_latency (dep);
11583 return false;
11588 break;
11590 default:
11591 gcc_unreachable ();
11594 return true;
11597 /* Adjust cost hook for FA726TE. */
11598 static bool
11599 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11601 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11602 have penalty of 3. */
11603 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11604 && recog_memoized (insn) >= 0
11605 && recog_memoized (dep) >= 0
11606 && get_attr_conds (dep) == CONDS_SET)
11608 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11609 if (get_attr_conds (insn) == CONDS_USE
11610 && get_attr_type (insn) != TYPE_BRANCH)
11612 *cost = 3;
11613 return false;
11616 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11617 || get_attr_conds (insn) == CONDS_USE)
11619 *cost = 0;
11620 return false;
11624 return true;
11627 /* Implement TARGET_REGISTER_MOVE_COST.
11629 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11630 it is typically more expensive than a single memory access. We set
11631 the cost to less than two memory accesses so that floating
11632 point to integer conversion does not go through memory. */
11635 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11636 reg_class_t from, reg_class_t to)
11638 if (TARGET_32BIT)
11640 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11641 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11642 return 15;
11643 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11644 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11645 return 4;
11646 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11647 return 20;
11648 else
11649 return 2;
11651 else
11653 if (from == HI_REGS || to == HI_REGS)
11654 return 4;
11655 else
11656 return 2;
11660 /* Implement TARGET_MEMORY_MOVE_COST. */
11663 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11664 bool in ATTRIBUTE_UNUSED)
11666 if (TARGET_32BIT)
11667 return 10;
11668 else
11670 if (GET_MODE_SIZE (mode) < 4)
11671 return 8;
11672 else
11673 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11677 /* Vectorizer cost model implementation. */
11679 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11680 static int
11681 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11682 tree vectype,
11683 int misalign ATTRIBUTE_UNUSED)
11685 unsigned elements;
11687 switch (type_of_cost)
11689 case scalar_stmt:
11690 return current_tune->vec_costs->scalar_stmt_cost;
11692 case scalar_load:
11693 return current_tune->vec_costs->scalar_load_cost;
11695 case scalar_store:
11696 return current_tune->vec_costs->scalar_store_cost;
11698 case vector_stmt:
11699 return current_tune->vec_costs->vec_stmt_cost;
11701 case vector_load:
11702 return current_tune->vec_costs->vec_align_load_cost;
11704 case vector_store:
11705 return current_tune->vec_costs->vec_store_cost;
11707 case vec_to_scalar:
11708 return current_tune->vec_costs->vec_to_scalar_cost;
11710 case scalar_to_vec:
11711 return current_tune->vec_costs->scalar_to_vec_cost;
11713 case unaligned_load:
11714 return current_tune->vec_costs->vec_unalign_load_cost;
11716 case unaligned_store:
11717 return current_tune->vec_costs->vec_unalign_store_cost;
11719 case cond_branch_taken:
11720 return current_tune->vec_costs->cond_taken_branch_cost;
11722 case cond_branch_not_taken:
11723 return current_tune->vec_costs->cond_not_taken_branch_cost;
11725 case vec_perm:
11726 case vec_promote_demote:
11727 return current_tune->vec_costs->vec_stmt_cost;
11729 case vec_construct:
11730 elements = TYPE_VECTOR_SUBPARTS (vectype);
11731 return elements / 2 + 1;
11733 default:
11734 gcc_unreachable ();
11738 /* Implement targetm.vectorize.add_stmt_cost. */
11740 static unsigned
11741 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11742 struct _stmt_vec_info *stmt_info, int misalign,
11743 enum vect_cost_model_location where)
11745 unsigned *cost = (unsigned *) data;
11746 unsigned retval = 0;
11748 if (flag_vect_cost_model)
11750 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11751 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11753 /* Statements in an inner loop relative to the loop being
11754 vectorized are weighted more heavily. The value here is
11755 arbitrary and could potentially be improved with analysis. */
11756 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11757 count *= 50; /* FIXME. */
11759 retval = (unsigned) (count * stmt_cost);
11760 cost[where] += retval;
11763 return retval;
11766 /* Return true if and only if this insn can dual-issue only as older. */
11767 static bool
11768 cortexa7_older_only (rtx_insn *insn)
11770 if (recog_memoized (insn) < 0)
11771 return false;
11773 switch (get_attr_type (insn))
11775 case TYPE_ALU_DSP_REG:
11776 case TYPE_ALU_SREG:
11777 case TYPE_ALUS_SREG:
11778 case TYPE_LOGIC_REG:
11779 case TYPE_LOGICS_REG:
11780 case TYPE_ADC_REG:
11781 case TYPE_ADCS_REG:
11782 case TYPE_ADR:
11783 case TYPE_BFM:
11784 case TYPE_REV:
11785 case TYPE_MVN_REG:
11786 case TYPE_SHIFT_IMM:
11787 case TYPE_SHIFT_REG:
11788 case TYPE_LOAD_BYTE:
11789 case TYPE_LOAD1:
11790 case TYPE_STORE1:
11791 case TYPE_FFARITHS:
11792 case TYPE_FADDS:
11793 case TYPE_FFARITHD:
11794 case TYPE_FADDD:
11795 case TYPE_FMOV:
11796 case TYPE_F_CVT:
11797 case TYPE_FCMPS:
11798 case TYPE_FCMPD:
11799 case TYPE_FCONSTS:
11800 case TYPE_FCONSTD:
11801 case TYPE_FMULS:
11802 case TYPE_FMACS:
11803 case TYPE_FMULD:
11804 case TYPE_FMACD:
11805 case TYPE_FDIVS:
11806 case TYPE_FDIVD:
11807 case TYPE_F_MRC:
11808 case TYPE_F_MRRC:
11809 case TYPE_F_FLAG:
11810 case TYPE_F_LOADS:
11811 case TYPE_F_STORES:
11812 return true;
11813 default:
11814 return false;
11818 /* Return true if and only if this insn can dual-issue as younger. */
11819 static bool
11820 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11822 if (recog_memoized (insn) < 0)
11824 if (verbose > 5)
11825 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11826 return false;
11829 switch (get_attr_type (insn))
11831 case TYPE_ALU_IMM:
11832 case TYPE_ALUS_IMM:
11833 case TYPE_LOGIC_IMM:
11834 case TYPE_LOGICS_IMM:
11835 case TYPE_EXTEND:
11836 case TYPE_MVN_IMM:
11837 case TYPE_MOV_IMM:
11838 case TYPE_MOV_REG:
11839 case TYPE_MOV_SHIFT:
11840 case TYPE_MOV_SHIFT_REG:
11841 case TYPE_BRANCH:
11842 case TYPE_CALL:
11843 return true;
11844 default:
11845 return false;
11850 /* Look for an instruction that can dual issue only as an older
11851 instruction, and move it in front of any instructions that can
11852 dual-issue as younger, while preserving the relative order of all
11853 other instructions in the ready list. This is a hueuristic to help
11854 dual-issue in later cycles, by postponing issue of more flexible
11855 instructions. This heuristic may affect dual issue opportunities
11856 in the current cycle. */
11857 static void
11858 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11859 int *n_readyp, int clock)
11861 int i;
11862 int first_older_only = -1, first_younger = -1;
11864 if (verbose > 5)
11865 fprintf (file,
11866 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11867 clock,
11868 *n_readyp);
11870 /* Traverse the ready list from the head (the instruction to issue
11871 first), and looking for the first instruction that can issue as
11872 younger and the first instruction that can dual-issue only as
11873 older. */
11874 for (i = *n_readyp - 1; i >= 0; i--)
11876 rtx_insn *insn = ready[i];
11877 if (cortexa7_older_only (insn))
11879 first_older_only = i;
11880 if (verbose > 5)
11881 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11882 break;
11884 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11885 first_younger = i;
11888 /* Nothing to reorder because either no younger insn found or insn
11889 that can dual-issue only as older appears before any insn that
11890 can dual-issue as younger. */
11891 if (first_younger == -1)
11893 if (verbose > 5)
11894 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11895 return;
11898 /* Nothing to reorder because no older-only insn in the ready list. */
11899 if (first_older_only == -1)
11901 if (verbose > 5)
11902 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11903 return;
11906 /* Move first_older_only insn before first_younger. */
11907 if (verbose > 5)
11908 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11909 INSN_UID(ready [first_older_only]),
11910 INSN_UID(ready [first_younger]));
11911 rtx_insn *first_older_only_insn = ready [first_older_only];
11912 for (i = first_older_only; i < first_younger; i++)
11914 ready[i] = ready[i+1];
11917 ready[i] = first_older_only_insn;
11918 return;
11921 /* Implement TARGET_SCHED_REORDER. */
11922 static int
11923 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11924 int clock)
11926 switch (arm_tune)
11928 case cortexa7:
11929 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11930 break;
11931 default:
11932 /* Do nothing for other cores. */
11933 break;
11936 return arm_issue_rate ();
11939 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11940 It corrects the value of COST based on the relationship between
11941 INSN and DEP through the dependence LINK. It returns the new
11942 value. There is a per-core adjust_cost hook to adjust scheduler costs
11943 and the per-core hook can choose to completely override the generic
11944 adjust_cost function. Only put bits of code into arm_adjust_cost that
11945 are common across all cores. */
11946 static int
11947 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11949 rtx i_pat, d_pat;
11951 /* When generating Thumb-1 code, we want to place flag-setting operations
11952 close to a conditional branch which depends on them, so that we can
11953 omit the comparison. */
11954 if (TARGET_THUMB1
11955 && REG_NOTE_KIND (link) == 0
11956 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11957 && recog_memoized (dep) >= 0
11958 && get_attr_conds (dep) == CONDS_SET)
11959 return 0;
11961 if (current_tune->sched_adjust_cost != NULL)
11963 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11964 return cost;
11967 /* XXX Is this strictly true? */
11968 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11969 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11970 return 0;
11972 /* Call insns don't incur a stall, even if they follow a load. */
11973 if (REG_NOTE_KIND (link) == 0
11974 && CALL_P (insn))
11975 return 1;
11977 if ((i_pat = single_set (insn)) != NULL
11978 && MEM_P (SET_SRC (i_pat))
11979 && (d_pat = single_set (dep)) != NULL
11980 && MEM_P (SET_DEST (d_pat)))
11982 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11983 /* This is a load after a store, there is no conflict if the load reads
11984 from a cached area. Assume that loads from the stack, and from the
11985 constant pool are cached, and that others will miss. This is a
11986 hack. */
11988 if ((GET_CODE (src_mem) == SYMBOL_REF
11989 && CONSTANT_POOL_ADDRESS_P (src_mem))
11990 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11991 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11992 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11993 return 1;
11996 return cost;
12000 arm_max_conditional_execute (void)
12002 return max_insns_skipped;
12005 static int
12006 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12008 if (TARGET_32BIT)
12009 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12010 else
12011 return (optimize > 0) ? 2 : 0;
12014 static int
12015 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12017 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12020 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12021 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12022 sequences of non-executed instructions in IT blocks probably take the same
12023 amount of time as executed instructions (and the IT instruction itself takes
12024 space in icache). This function was experimentally determined to give good
12025 results on a popular embedded benchmark. */
12027 static int
12028 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12030 return (TARGET_32BIT && speed_p) ? 1
12031 : arm_default_branch_cost (speed_p, predictable_p);
12034 static bool fp_consts_inited = false;
12036 static REAL_VALUE_TYPE value_fp0;
12038 static void
12039 init_fp_table (void)
12041 REAL_VALUE_TYPE r;
12043 r = REAL_VALUE_ATOF ("0", DFmode);
12044 value_fp0 = r;
12045 fp_consts_inited = true;
12048 /* Return TRUE if rtx X is a valid immediate FP constant. */
12050 arm_const_double_rtx (rtx x)
12052 REAL_VALUE_TYPE r;
12054 if (!fp_consts_inited)
12055 init_fp_table ();
12057 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12058 if (REAL_VALUE_MINUS_ZERO (r))
12059 return 0;
12061 if (REAL_VALUES_EQUAL (r, value_fp0))
12062 return 1;
12064 return 0;
12067 /* VFPv3 has a fairly wide range of representable immediates, formed from
12068 "quarter-precision" floating-point values. These can be evaluated using this
12069 formula (with ^ for exponentiation):
12071 -1^s * n * 2^-r
12073 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12074 16 <= n <= 31 and 0 <= r <= 7.
12076 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12078 - A (most-significant) is the sign bit.
12079 - BCD are the exponent (encoded as r XOR 3).
12080 - EFGH are the mantissa (encoded as n - 16).
12083 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12084 fconst[sd] instruction, or -1 if X isn't suitable. */
12085 static int
12086 vfp3_const_double_index (rtx x)
12088 REAL_VALUE_TYPE r, m;
12089 int sign, exponent;
12090 unsigned HOST_WIDE_INT mantissa, mant_hi;
12091 unsigned HOST_WIDE_INT mask;
12092 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12093 bool fail;
12095 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12096 return -1;
12098 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12100 /* We can't represent these things, so detect them first. */
12101 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12102 return -1;
12104 /* Extract sign, exponent and mantissa. */
12105 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12106 r = real_value_abs (&r);
12107 exponent = REAL_EXP (&r);
12108 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12109 highest (sign) bit, with a fixed binary point at bit point_pos.
12110 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12111 bits for the mantissa, this may fail (low bits would be lost). */
12112 real_ldexp (&m, &r, point_pos - exponent);
12113 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12114 mantissa = w.elt (0);
12115 mant_hi = w.elt (1);
12117 /* If there are bits set in the low part of the mantissa, we can't
12118 represent this value. */
12119 if (mantissa != 0)
12120 return -1;
12122 /* Now make it so that mantissa contains the most-significant bits, and move
12123 the point_pos to indicate that the least-significant bits have been
12124 discarded. */
12125 point_pos -= HOST_BITS_PER_WIDE_INT;
12126 mantissa = mant_hi;
12128 /* We can permit four significant bits of mantissa only, plus a high bit
12129 which is always 1. */
12130 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12131 if ((mantissa & mask) != 0)
12132 return -1;
12134 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12135 mantissa >>= point_pos - 5;
12137 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12138 floating-point immediate zero with Neon using an integer-zero load, but
12139 that case is handled elsewhere.) */
12140 if (mantissa == 0)
12141 return -1;
12143 gcc_assert (mantissa >= 16 && mantissa <= 31);
12145 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12146 normalized significands are in the range [1, 2). (Our mantissa is shifted
12147 left 4 places at this point relative to normalized IEEE754 values). GCC
12148 internally uses [0.5, 1) (see real.c), so the exponent returned from
12149 REAL_EXP must be altered. */
12150 exponent = 5 - exponent;
12152 if (exponent < 0 || exponent > 7)
12153 return -1;
12155 /* Sign, mantissa and exponent are now in the correct form to plug into the
12156 formula described in the comment above. */
12157 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12160 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12162 vfp3_const_double_rtx (rtx x)
12164 if (!TARGET_VFP3)
12165 return 0;
12167 return vfp3_const_double_index (x) != -1;
12170 /* Recognize immediates which can be used in various Neon instructions. Legal
12171 immediates are described by the following table (for VMVN variants, the
12172 bitwise inverse of the constant shown is recognized. In either case, VMOV
12173 is output and the correct instruction to use for a given constant is chosen
12174 by the assembler). The constant shown is replicated across all elements of
12175 the destination vector.
12177 insn elems variant constant (binary)
12178 ---- ----- ------- -----------------
12179 vmov i32 0 00000000 00000000 00000000 abcdefgh
12180 vmov i32 1 00000000 00000000 abcdefgh 00000000
12181 vmov i32 2 00000000 abcdefgh 00000000 00000000
12182 vmov i32 3 abcdefgh 00000000 00000000 00000000
12183 vmov i16 4 00000000 abcdefgh
12184 vmov i16 5 abcdefgh 00000000
12185 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12186 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12187 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12188 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12189 vmvn i16 10 00000000 abcdefgh
12190 vmvn i16 11 abcdefgh 00000000
12191 vmov i32 12 00000000 00000000 abcdefgh 11111111
12192 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12193 vmov i32 14 00000000 abcdefgh 11111111 11111111
12194 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12195 vmov i8 16 abcdefgh
12196 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12197 eeeeeeee ffffffff gggggggg hhhhhhhh
12198 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12199 vmov f32 19 00000000 00000000 00000000 00000000
12201 For case 18, B = !b. Representable values are exactly those accepted by
12202 vfp3_const_double_index, but are output as floating-point numbers rather
12203 than indices.
12205 For case 19, we will change it to vmov.i32 when assembling.
12207 Variants 0-5 (inclusive) may also be used as immediates for the second
12208 operand of VORR/VBIC instructions.
12210 The INVERSE argument causes the bitwise inverse of the given operand to be
12211 recognized instead (used for recognizing legal immediates for the VAND/VORN
12212 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12213 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12214 output, rather than the real insns vbic/vorr).
12216 INVERSE makes no difference to the recognition of float vectors.
12218 The return value is the variant of immediate as shown in the above table, or
12219 -1 if the given value doesn't match any of the listed patterns.
12221 static int
12222 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12223 rtx *modconst, int *elementwidth)
12225 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12226 matches = 1; \
12227 for (i = 0; i < idx; i += (STRIDE)) \
12228 if (!(TEST)) \
12229 matches = 0; \
12230 if (matches) \
12232 immtype = (CLASS); \
12233 elsize = (ELSIZE); \
12234 break; \
12237 unsigned int i, elsize = 0, idx = 0, n_elts;
12238 unsigned int innersize;
12239 unsigned char bytes[16];
12240 int immtype = -1, matches;
12241 unsigned int invmask = inverse ? 0xff : 0;
12242 bool vector = GET_CODE (op) == CONST_VECTOR;
12244 if (vector)
12246 n_elts = CONST_VECTOR_NUNITS (op);
12247 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12249 else
12251 n_elts = 1;
12252 if (mode == VOIDmode)
12253 mode = DImode;
12254 innersize = GET_MODE_SIZE (mode);
12257 /* Vectors of float constants. */
12258 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12260 rtx el0 = CONST_VECTOR_ELT (op, 0);
12261 REAL_VALUE_TYPE r0;
12263 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12264 return -1;
12266 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12268 for (i = 1; i < n_elts; i++)
12270 rtx elt = CONST_VECTOR_ELT (op, i);
12271 REAL_VALUE_TYPE re;
12273 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12275 if (!REAL_VALUES_EQUAL (r0, re))
12276 return -1;
12279 if (modconst)
12280 *modconst = CONST_VECTOR_ELT (op, 0);
12282 if (elementwidth)
12283 *elementwidth = 0;
12285 if (el0 == CONST0_RTX (GET_MODE (el0)))
12286 return 19;
12287 else
12288 return 18;
12291 /* Splat vector constant out into a byte vector. */
12292 for (i = 0; i < n_elts; i++)
12294 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12295 unsigned HOST_WIDE_INT elpart;
12296 unsigned int part, parts;
12298 if (CONST_INT_P (el))
12300 elpart = INTVAL (el);
12301 parts = 1;
12303 else if (CONST_DOUBLE_P (el))
12305 elpart = CONST_DOUBLE_LOW (el);
12306 parts = 2;
12308 else
12309 gcc_unreachable ();
12311 for (part = 0; part < parts; part++)
12313 unsigned int byte;
12314 for (byte = 0; byte < innersize; byte++)
12316 bytes[idx++] = (elpart & 0xff) ^ invmask;
12317 elpart >>= BITS_PER_UNIT;
12319 if (CONST_DOUBLE_P (el))
12320 elpart = CONST_DOUBLE_HIGH (el);
12324 /* Sanity check. */
12325 gcc_assert (idx == GET_MODE_SIZE (mode));
12329 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12330 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12332 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12333 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12335 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12336 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12338 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12339 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12341 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12343 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12345 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12346 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12348 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12349 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12351 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12352 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12354 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12355 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12357 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12359 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12361 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12362 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12364 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12365 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12367 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12368 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12370 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12371 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12373 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12375 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12376 && bytes[i] == bytes[(i + 8) % idx]);
12378 while (0);
12380 if (immtype == -1)
12381 return -1;
12383 if (elementwidth)
12384 *elementwidth = elsize;
12386 if (modconst)
12388 unsigned HOST_WIDE_INT imm = 0;
12390 /* Un-invert bytes of recognized vector, if necessary. */
12391 if (invmask != 0)
12392 for (i = 0; i < idx; i++)
12393 bytes[i] ^= invmask;
12395 if (immtype == 17)
12397 /* FIXME: Broken on 32-bit H_W_I hosts. */
12398 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12400 for (i = 0; i < 8; i++)
12401 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12402 << (i * BITS_PER_UNIT);
12404 *modconst = GEN_INT (imm);
12406 else
12408 unsigned HOST_WIDE_INT imm = 0;
12410 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12411 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12413 *modconst = GEN_INT (imm);
12417 return immtype;
12418 #undef CHECK
12421 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12422 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12423 float elements), and a modified constant (whatever should be output for a
12424 VMOV) in *MODCONST. */
12427 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12428 rtx *modconst, int *elementwidth)
12430 rtx tmpconst;
12431 int tmpwidth;
12432 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12434 if (retval == -1)
12435 return 0;
12437 if (modconst)
12438 *modconst = tmpconst;
12440 if (elementwidth)
12441 *elementwidth = tmpwidth;
12443 return 1;
12446 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12447 the immediate is valid, write a constant suitable for using as an operand
12448 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12449 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12452 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12453 rtx *modconst, int *elementwidth)
12455 rtx tmpconst;
12456 int tmpwidth;
12457 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12459 if (retval < 0 || retval > 5)
12460 return 0;
12462 if (modconst)
12463 *modconst = tmpconst;
12465 if (elementwidth)
12466 *elementwidth = tmpwidth;
12468 return 1;
12471 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12472 the immediate is valid, write a constant suitable for using as an operand
12473 to VSHR/VSHL to *MODCONST and the corresponding element width to
12474 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12475 because they have different limitations. */
12478 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12479 rtx *modconst, int *elementwidth,
12480 bool isleftshift)
12482 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12483 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12484 unsigned HOST_WIDE_INT last_elt = 0;
12485 unsigned HOST_WIDE_INT maxshift;
12487 /* Split vector constant out into a byte vector. */
12488 for (i = 0; i < n_elts; i++)
12490 rtx el = CONST_VECTOR_ELT (op, i);
12491 unsigned HOST_WIDE_INT elpart;
12493 if (CONST_INT_P (el))
12494 elpart = INTVAL (el);
12495 else if (CONST_DOUBLE_P (el))
12496 return 0;
12497 else
12498 gcc_unreachable ();
12500 if (i != 0 && elpart != last_elt)
12501 return 0;
12503 last_elt = elpart;
12506 /* Shift less than element size. */
12507 maxshift = innersize * 8;
12509 if (isleftshift)
12511 /* Left shift immediate value can be from 0 to <size>-1. */
12512 if (last_elt >= maxshift)
12513 return 0;
12515 else
12517 /* Right shift immediate value can be from 1 to <size>. */
12518 if (last_elt == 0 || last_elt > maxshift)
12519 return 0;
12522 if (elementwidth)
12523 *elementwidth = innersize * 8;
12525 if (modconst)
12526 *modconst = CONST_VECTOR_ELT (op, 0);
12528 return 1;
12531 /* Return a string suitable for output of Neon immediate logic operation
12532 MNEM. */
12534 char *
12535 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12536 int inverse, int quad)
12538 int width, is_valid;
12539 static char templ[40];
12541 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12543 gcc_assert (is_valid != 0);
12545 if (quad)
12546 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12547 else
12548 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12550 return templ;
12553 /* Return a string suitable for output of Neon immediate shift operation
12554 (VSHR or VSHL) MNEM. */
12556 char *
12557 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12558 machine_mode mode, int quad,
12559 bool isleftshift)
12561 int width, is_valid;
12562 static char templ[40];
12564 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12565 gcc_assert (is_valid != 0);
12567 if (quad)
12568 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12569 else
12570 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12572 return templ;
12575 /* Output a sequence of pairwise operations to implement a reduction.
12576 NOTE: We do "too much work" here, because pairwise operations work on two
12577 registers-worth of operands in one go. Unfortunately we can't exploit those
12578 extra calculations to do the full operation in fewer steps, I don't think.
12579 Although all vector elements of the result but the first are ignored, we
12580 actually calculate the same result in each of the elements. An alternative
12581 such as initially loading a vector with zero to use as each of the second
12582 operands would use up an additional register and take an extra instruction,
12583 for no particular gain. */
12585 void
12586 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12587 rtx (*reduc) (rtx, rtx, rtx))
12589 machine_mode inner = GET_MODE_INNER (mode);
12590 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12591 rtx tmpsum = op1;
12593 for (i = parts / 2; i >= 1; i /= 2)
12595 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12596 emit_insn (reduc (dest, tmpsum, tmpsum));
12597 tmpsum = dest;
12601 /* If VALS is a vector constant that can be loaded into a register
12602 using VDUP, generate instructions to do so and return an RTX to
12603 assign to the register. Otherwise return NULL_RTX. */
12605 static rtx
12606 neon_vdup_constant (rtx vals)
12608 machine_mode mode = GET_MODE (vals);
12609 machine_mode inner_mode = GET_MODE_INNER (mode);
12610 int n_elts = GET_MODE_NUNITS (mode);
12611 bool all_same = true;
12612 rtx x;
12613 int i;
12615 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12616 return NULL_RTX;
12618 for (i = 0; i < n_elts; ++i)
12620 x = XVECEXP (vals, 0, i);
12621 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12622 all_same = false;
12625 if (!all_same)
12626 /* The elements are not all the same. We could handle repeating
12627 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12628 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12629 vdup.i16). */
12630 return NULL_RTX;
12632 /* We can load this constant by using VDUP and a constant in a
12633 single ARM register. This will be cheaper than a vector
12634 load. */
12636 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12637 return gen_rtx_VEC_DUPLICATE (mode, x);
12640 /* Generate code to load VALS, which is a PARALLEL containing only
12641 constants (for vec_init) or CONST_VECTOR, efficiently into a
12642 register. Returns an RTX to copy into the register, or NULL_RTX
12643 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12646 neon_make_constant (rtx vals)
12648 machine_mode mode = GET_MODE (vals);
12649 rtx target;
12650 rtx const_vec = NULL_RTX;
12651 int n_elts = GET_MODE_NUNITS (mode);
12652 int n_const = 0;
12653 int i;
12655 if (GET_CODE (vals) == CONST_VECTOR)
12656 const_vec = vals;
12657 else if (GET_CODE (vals) == PARALLEL)
12659 /* A CONST_VECTOR must contain only CONST_INTs and
12660 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12661 Only store valid constants in a CONST_VECTOR. */
12662 for (i = 0; i < n_elts; ++i)
12664 rtx x = XVECEXP (vals, 0, i);
12665 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12666 n_const++;
12668 if (n_const == n_elts)
12669 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12671 else
12672 gcc_unreachable ();
12674 if (const_vec != NULL
12675 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12676 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12677 return const_vec;
12678 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12679 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12680 pipeline cycle; creating the constant takes one or two ARM
12681 pipeline cycles. */
12682 return target;
12683 else if (const_vec != NULL_RTX)
12684 /* Load from constant pool. On Cortex-A8 this takes two cycles
12685 (for either double or quad vectors). We can not take advantage
12686 of single-cycle VLD1 because we need a PC-relative addressing
12687 mode. */
12688 return const_vec;
12689 else
12690 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12691 We can not construct an initializer. */
12692 return NULL_RTX;
12695 /* Initialize vector TARGET to VALS. */
12697 void
12698 neon_expand_vector_init (rtx target, rtx vals)
12700 machine_mode mode = GET_MODE (target);
12701 machine_mode inner_mode = GET_MODE_INNER (mode);
12702 int n_elts = GET_MODE_NUNITS (mode);
12703 int n_var = 0, one_var = -1;
12704 bool all_same = true;
12705 rtx x, mem;
12706 int i;
12708 for (i = 0; i < n_elts; ++i)
12710 x = XVECEXP (vals, 0, i);
12711 if (!CONSTANT_P (x))
12712 ++n_var, one_var = i;
12714 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12715 all_same = false;
12718 if (n_var == 0)
12720 rtx constant = neon_make_constant (vals);
12721 if (constant != NULL_RTX)
12723 emit_move_insn (target, constant);
12724 return;
12728 /* Splat a single non-constant element if we can. */
12729 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12731 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12732 emit_insn (gen_rtx_SET (VOIDmode, target,
12733 gen_rtx_VEC_DUPLICATE (mode, x)));
12734 return;
12737 /* One field is non-constant. Load constant then overwrite varying
12738 field. This is more efficient than using the stack. */
12739 if (n_var == 1)
12741 rtx copy = copy_rtx (vals);
12742 rtx index = GEN_INT (one_var);
12744 /* Load constant part of vector, substitute neighboring value for
12745 varying element. */
12746 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12747 neon_expand_vector_init (target, copy);
12749 /* Insert variable. */
12750 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12751 switch (mode)
12753 case V8QImode:
12754 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12755 break;
12756 case V16QImode:
12757 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12758 break;
12759 case V4HImode:
12760 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12761 break;
12762 case V8HImode:
12763 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12764 break;
12765 case V2SImode:
12766 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12767 break;
12768 case V4SImode:
12769 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12770 break;
12771 case V2SFmode:
12772 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12773 break;
12774 case V4SFmode:
12775 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12776 break;
12777 case V2DImode:
12778 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12779 break;
12780 default:
12781 gcc_unreachable ();
12783 return;
12786 /* Construct the vector in memory one field at a time
12787 and load the whole vector. */
12788 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12789 for (i = 0; i < n_elts; i++)
12790 emit_move_insn (adjust_address_nv (mem, inner_mode,
12791 i * GET_MODE_SIZE (inner_mode)),
12792 XVECEXP (vals, 0, i));
12793 emit_move_insn (target, mem);
12796 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12797 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12798 reported source locations are bogus. */
12800 static void
12801 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12802 const char *err)
12804 HOST_WIDE_INT lane;
12806 gcc_assert (CONST_INT_P (operand));
12808 lane = INTVAL (operand);
12810 if (lane < low || lane >= high)
12811 error (err);
12814 /* Bounds-check lanes. */
12816 void
12817 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12819 bounds_check (operand, low, high, "lane out of range");
12822 /* Bounds-check constants. */
12824 void
12825 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12827 bounds_check (operand, low, high, "constant out of range");
12830 HOST_WIDE_INT
12831 neon_element_bits (machine_mode mode)
12833 if (mode == DImode)
12834 return GET_MODE_BITSIZE (mode);
12835 else
12836 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12840 /* Predicates for `match_operand' and `match_operator'. */
12842 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12843 WB is true if full writeback address modes are allowed and is false
12844 if limited writeback address modes (POST_INC and PRE_DEC) are
12845 allowed. */
12848 arm_coproc_mem_operand (rtx op, bool wb)
12850 rtx ind;
12852 /* Reject eliminable registers. */
12853 if (! (reload_in_progress || reload_completed || lra_in_progress)
12854 && ( reg_mentioned_p (frame_pointer_rtx, op)
12855 || reg_mentioned_p (arg_pointer_rtx, op)
12856 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12857 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12858 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12859 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12860 return FALSE;
12862 /* Constants are converted into offsets from labels. */
12863 if (!MEM_P (op))
12864 return FALSE;
12866 ind = XEXP (op, 0);
12868 if (reload_completed
12869 && (GET_CODE (ind) == LABEL_REF
12870 || (GET_CODE (ind) == CONST
12871 && GET_CODE (XEXP (ind, 0)) == PLUS
12872 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12873 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12874 return TRUE;
12876 /* Match: (mem (reg)). */
12877 if (REG_P (ind))
12878 return arm_address_register_rtx_p (ind, 0);
12880 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12881 acceptable in any case (subject to verification by
12882 arm_address_register_rtx_p). We need WB to be true to accept
12883 PRE_INC and POST_DEC. */
12884 if (GET_CODE (ind) == POST_INC
12885 || GET_CODE (ind) == PRE_DEC
12886 || (wb
12887 && (GET_CODE (ind) == PRE_INC
12888 || GET_CODE (ind) == POST_DEC)))
12889 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12891 if (wb
12892 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12893 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12894 && GET_CODE (XEXP (ind, 1)) == PLUS
12895 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12896 ind = XEXP (ind, 1);
12898 /* Match:
12899 (plus (reg)
12900 (const)). */
12901 if (GET_CODE (ind) == PLUS
12902 && REG_P (XEXP (ind, 0))
12903 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12904 && CONST_INT_P (XEXP (ind, 1))
12905 && INTVAL (XEXP (ind, 1)) > -1024
12906 && INTVAL (XEXP (ind, 1)) < 1024
12907 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12908 return TRUE;
12910 return FALSE;
12913 /* Return TRUE if OP is a memory operand which we can load or store a vector
12914 to/from. TYPE is one of the following values:
12915 0 - Vector load/stor (vldr)
12916 1 - Core registers (ldm)
12917 2 - Element/structure loads (vld1)
12920 neon_vector_mem_operand (rtx op, int type, bool strict)
12922 rtx ind;
12924 /* Reject eliminable registers. */
12925 if (! (reload_in_progress || reload_completed)
12926 && ( reg_mentioned_p (frame_pointer_rtx, op)
12927 || reg_mentioned_p (arg_pointer_rtx, op)
12928 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12929 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12930 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12931 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12932 return !strict;
12934 /* Constants are converted into offsets from labels. */
12935 if (!MEM_P (op))
12936 return FALSE;
12938 ind = XEXP (op, 0);
12940 if (reload_completed
12941 && (GET_CODE (ind) == LABEL_REF
12942 || (GET_CODE (ind) == CONST
12943 && GET_CODE (XEXP (ind, 0)) == PLUS
12944 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12945 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12946 return TRUE;
12948 /* Match: (mem (reg)). */
12949 if (REG_P (ind))
12950 return arm_address_register_rtx_p (ind, 0);
12952 /* Allow post-increment with Neon registers. */
12953 if ((type != 1 && GET_CODE (ind) == POST_INC)
12954 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12955 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12957 /* Allow post-increment by register for VLDn */
12958 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12959 && GET_CODE (XEXP (ind, 1)) == PLUS
12960 && REG_P (XEXP (XEXP (ind, 1), 1)))
12961 return true;
12963 /* Match:
12964 (plus (reg)
12965 (const)). */
12966 if (type == 0
12967 && GET_CODE (ind) == PLUS
12968 && REG_P (XEXP (ind, 0))
12969 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12970 && CONST_INT_P (XEXP (ind, 1))
12971 && INTVAL (XEXP (ind, 1)) > -1024
12972 /* For quad modes, we restrict the constant offset to be slightly less
12973 than what the instruction format permits. We have no such constraint
12974 on double mode offsets. (This must match arm_legitimate_index_p.) */
12975 && (INTVAL (XEXP (ind, 1))
12976 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12977 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12978 return TRUE;
12980 return FALSE;
12983 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12984 type. */
12986 neon_struct_mem_operand (rtx op)
12988 rtx ind;
12990 /* Reject eliminable registers. */
12991 if (! (reload_in_progress || reload_completed)
12992 && ( reg_mentioned_p (frame_pointer_rtx, op)
12993 || reg_mentioned_p (arg_pointer_rtx, op)
12994 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12995 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12996 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12997 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12998 return FALSE;
13000 /* Constants are converted into offsets from labels. */
13001 if (!MEM_P (op))
13002 return FALSE;
13004 ind = XEXP (op, 0);
13006 if (reload_completed
13007 && (GET_CODE (ind) == LABEL_REF
13008 || (GET_CODE (ind) == CONST
13009 && GET_CODE (XEXP (ind, 0)) == PLUS
13010 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13011 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13012 return TRUE;
13014 /* Match: (mem (reg)). */
13015 if (REG_P (ind))
13016 return arm_address_register_rtx_p (ind, 0);
13018 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13019 if (GET_CODE (ind) == POST_INC
13020 || GET_CODE (ind) == PRE_DEC)
13021 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13023 return FALSE;
13026 /* Return true if X is a register that will be eliminated later on. */
13028 arm_eliminable_register (rtx x)
13030 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13031 || REGNO (x) == ARG_POINTER_REGNUM
13032 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13033 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13036 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13037 coprocessor registers. Otherwise return NO_REGS. */
13039 enum reg_class
13040 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13042 if (mode == HFmode)
13044 if (!TARGET_NEON_FP16)
13045 return GENERAL_REGS;
13046 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13047 return NO_REGS;
13048 return GENERAL_REGS;
13051 /* The neon move patterns handle all legitimate vector and struct
13052 addresses. */
13053 if (TARGET_NEON
13054 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13055 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13056 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13057 || VALID_NEON_STRUCT_MODE (mode)))
13058 return NO_REGS;
13060 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13061 return NO_REGS;
13063 return GENERAL_REGS;
13066 /* Values which must be returned in the most-significant end of the return
13067 register. */
13069 static bool
13070 arm_return_in_msb (const_tree valtype)
13072 return (TARGET_AAPCS_BASED
13073 && BYTES_BIG_ENDIAN
13074 && (AGGREGATE_TYPE_P (valtype)
13075 || TREE_CODE (valtype) == COMPLEX_TYPE
13076 || FIXED_POINT_TYPE_P (valtype)));
13079 /* Return TRUE if X references a SYMBOL_REF. */
13081 symbol_mentioned_p (rtx x)
13083 const char * fmt;
13084 int i;
13086 if (GET_CODE (x) == SYMBOL_REF)
13087 return 1;
13089 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13090 are constant offsets, not symbols. */
13091 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13092 return 0;
13094 fmt = GET_RTX_FORMAT (GET_CODE (x));
13096 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13098 if (fmt[i] == 'E')
13100 int j;
13102 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13103 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13104 return 1;
13106 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13107 return 1;
13110 return 0;
13113 /* Return TRUE if X references a LABEL_REF. */
13115 label_mentioned_p (rtx x)
13117 const char * fmt;
13118 int i;
13120 if (GET_CODE (x) == LABEL_REF)
13121 return 1;
13123 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13124 instruction, but they are constant offsets, not symbols. */
13125 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13126 return 0;
13128 fmt = GET_RTX_FORMAT (GET_CODE (x));
13129 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13131 if (fmt[i] == 'E')
13133 int j;
13135 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13136 if (label_mentioned_p (XVECEXP (x, i, j)))
13137 return 1;
13139 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13140 return 1;
13143 return 0;
13147 tls_mentioned_p (rtx x)
13149 switch (GET_CODE (x))
13151 case CONST:
13152 return tls_mentioned_p (XEXP (x, 0));
13154 case UNSPEC:
13155 if (XINT (x, 1) == UNSPEC_TLS)
13156 return 1;
13158 default:
13159 return 0;
13163 /* Must not copy any rtx that uses a pc-relative address. */
13165 static int
13166 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13168 if (GET_CODE (*x) == UNSPEC
13169 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13170 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13171 return 1;
13172 return 0;
13175 static bool
13176 arm_cannot_copy_insn_p (rtx_insn *insn)
13178 /* The tls call insn cannot be copied, as it is paired with a data
13179 word. */
13180 if (recog_memoized (insn) == CODE_FOR_tlscall)
13181 return true;
13183 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13186 enum rtx_code
13187 minmax_code (rtx x)
13189 enum rtx_code code = GET_CODE (x);
13191 switch (code)
13193 case SMAX:
13194 return GE;
13195 case SMIN:
13196 return LE;
13197 case UMIN:
13198 return LEU;
13199 case UMAX:
13200 return GEU;
13201 default:
13202 gcc_unreachable ();
13206 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13208 bool
13209 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13210 int *mask, bool *signed_sat)
13212 /* The high bound must be a power of two minus one. */
13213 int log = exact_log2 (INTVAL (hi_bound) + 1);
13214 if (log == -1)
13215 return false;
13217 /* The low bound is either zero (for usat) or one less than the
13218 negation of the high bound (for ssat). */
13219 if (INTVAL (lo_bound) == 0)
13221 if (mask)
13222 *mask = log;
13223 if (signed_sat)
13224 *signed_sat = false;
13226 return true;
13229 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13231 if (mask)
13232 *mask = log + 1;
13233 if (signed_sat)
13234 *signed_sat = true;
13236 return true;
13239 return false;
13242 /* Return 1 if memory locations are adjacent. */
13244 adjacent_mem_locations (rtx a, rtx b)
13246 /* We don't guarantee to preserve the order of these memory refs. */
13247 if (volatile_refs_p (a) || volatile_refs_p (b))
13248 return 0;
13250 if ((REG_P (XEXP (a, 0))
13251 || (GET_CODE (XEXP (a, 0)) == PLUS
13252 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13253 && (REG_P (XEXP (b, 0))
13254 || (GET_CODE (XEXP (b, 0)) == PLUS
13255 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13257 HOST_WIDE_INT val0 = 0, val1 = 0;
13258 rtx reg0, reg1;
13259 int val_diff;
13261 if (GET_CODE (XEXP (a, 0)) == PLUS)
13263 reg0 = XEXP (XEXP (a, 0), 0);
13264 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13266 else
13267 reg0 = XEXP (a, 0);
13269 if (GET_CODE (XEXP (b, 0)) == PLUS)
13271 reg1 = XEXP (XEXP (b, 0), 0);
13272 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13274 else
13275 reg1 = XEXP (b, 0);
13277 /* Don't accept any offset that will require multiple
13278 instructions to handle, since this would cause the
13279 arith_adjacentmem pattern to output an overlong sequence. */
13280 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13281 return 0;
13283 /* Don't allow an eliminable register: register elimination can make
13284 the offset too large. */
13285 if (arm_eliminable_register (reg0))
13286 return 0;
13288 val_diff = val1 - val0;
13290 if (arm_ld_sched)
13292 /* If the target has load delay slots, then there's no benefit
13293 to using an ldm instruction unless the offset is zero and
13294 we are optimizing for size. */
13295 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13296 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13297 && (val_diff == 4 || val_diff == -4));
13300 return ((REGNO (reg0) == REGNO (reg1))
13301 && (val_diff == 4 || val_diff == -4));
13304 return 0;
13307 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13308 for load operations, false for store operations. CONSECUTIVE is true
13309 if the register numbers in the operation must be consecutive in the register
13310 bank. RETURN_PC is true if value is to be loaded in PC.
13311 The pattern we are trying to match for load is:
13312 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13313 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13316 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13318 where
13319 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13320 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13321 3. If consecutive is TRUE, then for kth register being loaded,
13322 REGNO (R_dk) = REGNO (R_d0) + k.
13323 The pattern for store is similar. */
13324 bool
13325 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13326 bool consecutive, bool return_pc)
13328 HOST_WIDE_INT count = XVECLEN (op, 0);
13329 rtx reg, mem, addr;
13330 unsigned regno;
13331 unsigned first_regno;
13332 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13333 rtx elt;
13334 bool addr_reg_in_reglist = false;
13335 bool update = false;
13336 int reg_increment;
13337 int offset_adj;
13338 int regs_per_val;
13340 /* If not in SImode, then registers must be consecutive
13341 (e.g., VLDM instructions for DFmode). */
13342 gcc_assert ((mode == SImode) || consecutive);
13343 /* Setting return_pc for stores is illegal. */
13344 gcc_assert (!return_pc || load);
13346 /* Set up the increments and the regs per val based on the mode. */
13347 reg_increment = GET_MODE_SIZE (mode);
13348 regs_per_val = reg_increment / 4;
13349 offset_adj = return_pc ? 1 : 0;
13351 if (count <= 1
13352 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13353 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13354 return false;
13356 /* Check if this is a write-back. */
13357 elt = XVECEXP (op, 0, offset_adj);
13358 if (GET_CODE (SET_SRC (elt)) == PLUS)
13360 i++;
13361 base = 1;
13362 update = true;
13364 /* The offset adjustment must be the number of registers being
13365 popped times the size of a single register. */
13366 if (!REG_P (SET_DEST (elt))
13367 || !REG_P (XEXP (SET_SRC (elt), 0))
13368 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13369 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13370 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13371 ((count - 1 - offset_adj) * reg_increment))
13372 return false;
13375 i = i + offset_adj;
13376 base = base + offset_adj;
13377 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13378 success depends on the type: VLDM can do just one reg,
13379 LDM must do at least two. */
13380 if ((count <= i) && (mode == SImode))
13381 return false;
13383 elt = XVECEXP (op, 0, i - 1);
13384 if (GET_CODE (elt) != SET)
13385 return false;
13387 if (load)
13389 reg = SET_DEST (elt);
13390 mem = SET_SRC (elt);
13392 else
13394 reg = SET_SRC (elt);
13395 mem = SET_DEST (elt);
13398 if (!REG_P (reg) || !MEM_P (mem))
13399 return false;
13401 regno = REGNO (reg);
13402 first_regno = regno;
13403 addr = XEXP (mem, 0);
13404 if (GET_CODE (addr) == PLUS)
13406 if (!CONST_INT_P (XEXP (addr, 1)))
13407 return false;
13409 offset = INTVAL (XEXP (addr, 1));
13410 addr = XEXP (addr, 0);
13413 if (!REG_P (addr))
13414 return false;
13416 /* Don't allow SP to be loaded unless it is also the base register. It
13417 guarantees that SP is reset correctly when an LDM instruction
13418 is interrupted. Otherwise, we might end up with a corrupt stack. */
13419 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13420 return false;
13422 for (; i < count; i++)
13424 elt = XVECEXP (op, 0, i);
13425 if (GET_CODE (elt) != SET)
13426 return false;
13428 if (load)
13430 reg = SET_DEST (elt);
13431 mem = SET_SRC (elt);
13433 else
13435 reg = SET_SRC (elt);
13436 mem = SET_DEST (elt);
13439 if (!REG_P (reg)
13440 || GET_MODE (reg) != mode
13441 || REGNO (reg) <= regno
13442 || (consecutive
13443 && (REGNO (reg) !=
13444 (unsigned int) (first_regno + regs_per_val * (i - base))))
13445 /* Don't allow SP to be loaded unless it is also the base register. It
13446 guarantees that SP is reset correctly when an LDM instruction
13447 is interrupted. Otherwise, we might end up with a corrupt stack. */
13448 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13449 || !MEM_P (mem)
13450 || GET_MODE (mem) != mode
13451 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13452 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13453 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13454 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13455 offset + (i - base) * reg_increment))
13456 && (!REG_P (XEXP (mem, 0))
13457 || offset + (i - base) * reg_increment != 0)))
13458 return false;
13460 regno = REGNO (reg);
13461 if (regno == REGNO (addr))
13462 addr_reg_in_reglist = true;
13465 if (load)
13467 if (update && addr_reg_in_reglist)
13468 return false;
13470 /* For Thumb-1, address register is always modified - either by write-back
13471 or by explicit load. If the pattern does not describe an update,
13472 then the address register must be in the list of loaded registers. */
13473 if (TARGET_THUMB1)
13474 return update || addr_reg_in_reglist;
13477 return true;
13480 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13481 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13482 instruction. ADD_OFFSET is nonzero if the base address register needs
13483 to be modified with an add instruction before we can use it. */
13485 static bool
13486 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13487 int nops, HOST_WIDE_INT add_offset)
13489 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13490 if the offset isn't small enough. The reason 2 ldrs are faster
13491 is because these ARMs are able to do more than one cache access
13492 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13493 whilst the ARM8 has a double bandwidth cache. This means that
13494 these cores can do both an instruction fetch and a data fetch in
13495 a single cycle, so the trick of calculating the address into a
13496 scratch register (one of the result regs) and then doing a load
13497 multiple actually becomes slower (and no smaller in code size).
13498 That is the transformation
13500 ldr rd1, [rbase + offset]
13501 ldr rd2, [rbase + offset + 4]
13505 add rd1, rbase, offset
13506 ldmia rd1, {rd1, rd2}
13508 produces worse code -- '3 cycles + any stalls on rd2' instead of
13509 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13510 access per cycle, the first sequence could never complete in less
13511 than 6 cycles, whereas the ldm sequence would only take 5 and
13512 would make better use of sequential accesses if not hitting the
13513 cache.
13515 We cheat here and test 'arm_ld_sched' which we currently know to
13516 only be true for the ARM8, ARM9 and StrongARM. If this ever
13517 changes, then the test below needs to be reworked. */
13518 if (nops == 2 && arm_ld_sched && add_offset != 0)
13519 return false;
13521 /* XScale has load-store double instructions, but they have stricter
13522 alignment requirements than load-store multiple, so we cannot
13523 use them.
13525 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13526 the pipeline until completion.
13528 NREGS CYCLES
13534 An ldr instruction takes 1-3 cycles, but does not block the
13535 pipeline.
13537 NREGS CYCLES
13538 1 1-3
13539 2 2-6
13540 3 3-9
13541 4 4-12
13543 Best case ldr will always win. However, the more ldr instructions
13544 we issue, the less likely we are to be able to schedule them well.
13545 Using ldr instructions also increases code size.
13547 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13548 for counts of 3 or 4 regs. */
13549 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13550 return false;
13551 return true;
13554 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13555 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13556 an array ORDER which describes the sequence to use when accessing the
13557 offsets that produces an ascending order. In this sequence, each
13558 offset must be larger by exactly 4 than the previous one. ORDER[0]
13559 must have been filled in with the lowest offset by the caller.
13560 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13561 we use to verify that ORDER produces an ascending order of registers.
13562 Return true if it was possible to construct such an order, false if
13563 not. */
13565 static bool
13566 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13567 int *unsorted_regs)
13569 int i;
13570 for (i = 1; i < nops; i++)
13572 int j;
13574 order[i] = order[i - 1];
13575 for (j = 0; j < nops; j++)
13576 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13578 /* We must find exactly one offset that is higher than the
13579 previous one by 4. */
13580 if (order[i] != order[i - 1])
13581 return false;
13582 order[i] = j;
13584 if (order[i] == order[i - 1])
13585 return false;
13586 /* The register numbers must be ascending. */
13587 if (unsorted_regs != NULL
13588 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13589 return false;
13591 return true;
13594 /* Used to determine in a peephole whether a sequence of load
13595 instructions can be changed into a load-multiple instruction.
13596 NOPS is the number of separate load instructions we are examining. The
13597 first NOPS entries in OPERANDS are the destination registers, the
13598 next NOPS entries are memory operands. If this function is
13599 successful, *BASE is set to the common base register of the memory
13600 accesses; *LOAD_OFFSET is set to the first memory location's offset
13601 from that base register.
13602 REGS is an array filled in with the destination register numbers.
13603 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13604 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13605 the sequence of registers in REGS matches the loads from ascending memory
13606 locations, and the function verifies that the register numbers are
13607 themselves ascending. If CHECK_REGS is false, the register numbers
13608 are stored in the order they are found in the operands. */
13609 static int
13610 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13611 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13613 int unsorted_regs[MAX_LDM_STM_OPS];
13614 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13615 int order[MAX_LDM_STM_OPS];
13616 rtx base_reg_rtx = NULL;
13617 int base_reg = -1;
13618 int i, ldm_case;
13620 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13621 easily extended if required. */
13622 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13624 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13626 /* Loop over the operands and check that the memory references are
13627 suitable (i.e. immediate offsets from the same base register). At
13628 the same time, extract the target register, and the memory
13629 offsets. */
13630 for (i = 0; i < nops; i++)
13632 rtx reg;
13633 rtx offset;
13635 /* Convert a subreg of a mem into the mem itself. */
13636 if (GET_CODE (operands[nops + i]) == SUBREG)
13637 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13639 gcc_assert (MEM_P (operands[nops + i]));
13641 /* Don't reorder volatile memory references; it doesn't seem worth
13642 looking for the case where the order is ok anyway. */
13643 if (MEM_VOLATILE_P (operands[nops + i]))
13644 return 0;
13646 offset = const0_rtx;
13648 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13649 || (GET_CODE (reg) == SUBREG
13650 && REG_P (reg = SUBREG_REG (reg))))
13651 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13652 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13653 || (GET_CODE (reg) == SUBREG
13654 && REG_P (reg = SUBREG_REG (reg))))
13655 && (CONST_INT_P (offset
13656 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13658 if (i == 0)
13660 base_reg = REGNO (reg);
13661 base_reg_rtx = reg;
13662 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13663 return 0;
13665 else if (base_reg != (int) REGNO (reg))
13666 /* Not addressed from the same base register. */
13667 return 0;
13669 unsorted_regs[i] = (REG_P (operands[i])
13670 ? REGNO (operands[i])
13671 : REGNO (SUBREG_REG (operands[i])));
13673 /* If it isn't an integer register, or if it overwrites the
13674 base register but isn't the last insn in the list, then
13675 we can't do this. */
13676 if (unsorted_regs[i] < 0
13677 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13678 || unsorted_regs[i] > 14
13679 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13680 return 0;
13682 /* Don't allow SP to be loaded unless it is also the base
13683 register. It guarantees that SP is reset correctly when
13684 an LDM instruction is interrupted. Otherwise, we might
13685 end up with a corrupt stack. */
13686 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13687 return 0;
13689 unsorted_offsets[i] = INTVAL (offset);
13690 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13691 order[0] = i;
13693 else
13694 /* Not a suitable memory address. */
13695 return 0;
13698 /* All the useful information has now been extracted from the
13699 operands into unsorted_regs and unsorted_offsets; additionally,
13700 order[0] has been set to the lowest offset in the list. Sort
13701 the offsets into order, verifying that they are adjacent, and
13702 check that the register numbers are ascending. */
13703 if (!compute_offset_order (nops, unsorted_offsets, order,
13704 check_regs ? unsorted_regs : NULL))
13705 return 0;
13707 if (saved_order)
13708 memcpy (saved_order, order, sizeof order);
13710 if (base)
13712 *base = base_reg;
13714 for (i = 0; i < nops; i++)
13715 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13717 *load_offset = unsorted_offsets[order[0]];
13720 if (TARGET_THUMB1
13721 && !peep2_reg_dead_p (nops, base_reg_rtx))
13722 return 0;
13724 if (unsorted_offsets[order[0]] == 0)
13725 ldm_case = 1; /* ldmia */
13726 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13727 ldm_case = 2; /* ldmib */
13728 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13729 ldm_case = 3; /* ldmda */
13730 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13731 ldm_case = 4; /* ldmdb */
13732 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13733 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13734 ldm_case = 5;
13735 else
13736 return 0;
13738 if (!multiple_operation_profitable_p (false, nops,
13739 ldm_case == 5
13740 ? unsorted_offsets[order[0]] : 0))
13741 return 0;
13743 return ldm_case;
13746 /* Used to determine in a peephole whether a sequence of store instructions can
13747 be changed into a store-multiple instruction.
13748 NOPS is the number of separate store instructions we are examining.
13749 NOPS_TOTAL is the total number of instructions recognized by the peephole
13750 pattern.
13751 The first NOPS entries in OPERANDS are the source registers, the next
13752 NOPS entries are memory operands. If this function is successful, *BASE is
13753 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13754 to the first memory location's offset from that base register. REGS is an
13755 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13756 likewise filled with the corresponding rtx's.
13757 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13758 numbers to an ascending order of stores.
13759 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13760 from ascending memory locations, and the function verifies that the register
13761 numbers are themselves ascending. If CHECK_REGS is false, the register
13762 numbers are stored in the order they are found in the operands. */
13763 static int
13764 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13765 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13766 HOST_WIDE_INT *load_offset, bool check_regs)
13768 int unsorted_regs[MAX_LDM_STM_OPS];
13769 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13770 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13771 int order[MAX_LDM_STM_OPS];
13772 int base_reg = -1;
13773 rtx base_reg_rtx = NULL;
13774 int i, stm_case;
13776 /* Write back of base register is currently only supported for Thumb 1. */
13777 int base_writeback = TARGET_THUMB1;
13779 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13780 easily extended if required. */
13781 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13783 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13785 /* Loop over the operands and check that the memory references are
13786 suitable (i.e. immediate offsets from the same base register). At
13787 the same time, extract the target register, and the memory
13788 offsets. */
13789 for (i = 0; i < nops; i++)
13791 rtx reg;
13792 rtx offset;
13794 /* Convert a subreg of a mem into the mem itself. */
13795 if (GET_CODE (operands[nops + i]) == SUBREG)
13796 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13798 gcc_assert (MEM_P (operands[nops + i]));
13800 /* Don't reorder volatile memory references; it doesn't seem worth
13801 looking for the case where the order is ok anyway. */
13802 if (MEM_VOLATILE_P (operands[nops + i]))
13803 return 0;
13805 offset = const0_rtx;
13807 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13808 || (GET_CODE (reg) == SUBREG
13809 && REG_P (reg = SUBREG_REG (reg))))
13810 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13811 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13812 || (GET_CODE (reg) == SUBREG
13813 && REG_P (reg = SUBREG_REG (reg))))
13814 && (CONST_INT_P (offset
13815 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13817 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13818 ? operands[i] : SUBREG_REG (operands[i]));
13819 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13821 if (i == 0)
13823 base_reg = REGNO (reg);
13824 base_reg_rtx = reg;
13825 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13826 return 0;
13828 else if (base_reg != (int) REGNO (reg))
13829 /* Not addressed from the same base register. */
13830 return 0;
13832 /* If it isn't an integer register, then we can't do this. */
13833 if (unsorted_regs[i] < 0
13834 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13835 /* The effects are unpredictable if the base register is
13836 both updated and stored. */
13837 || (base_writeback && unsorted_regs[i] == base_reg)
13838 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13839 || unsorted_regs[i] > 14)
13840 return 0;
13842 unsorted_offsets[i] = INTVAL (offset);
13843 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13844 order[0] = i;
13846 else
13847 /* Not a suitable memory address. */
13848 return 0;
13851 /* All the useful information has now been extracted from the
13852 operands into unsorted_regs and unsorted_offsets; additionally,
13853 order[0] has been set to the lowest offset in the list. Sort
13854 the offsets into order, verifying that they are adjacent, and
13855 check that the register numbers are ascending. */
13856 if (!compute_offset_order (nops, unsorted_offsets, order,
13857 check_regs ? unsorted_regs : NULL))
13858 return 0;
13860 if (saved_order)
13861 memcpy (saved_order, order, sizeof order);
13863 if (base)
13865 *base = base_reg;
13867 for (i = 0; i < nops; i++)
13869 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13870 if (reg_rtxs)
13871 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13874 *load_offset = unsorted_offsets[order[0]];
13877 if (TARGET_THUMB1
13878 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13879 return 0;
13881 if (unsorted_offsets[order[0]] == 0)
13882 stm_case = 1; /* stmia */
13883 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13884 stm_case = 2; /* stmib */
13885 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13886 stm_case = 3; /* stmda */
13887 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13888 stm_case = 4; /* stmdb */
13889 else
13890 return 0;
13892 if (!multiple_operation_profitable_p (false, nops, 0))
13893 return 0;
13895 return stm_case;
13898 /* Routines for use in generating RTL. */
13900 /* Generate a load-multiple instruction. COUNT is the number of loads in
13901 the instruction; REGS and MEMS are arrays containing the operands.
13902 BASEREG is the base register to be used in addressing the memory operands.
13903 WBACK_OFFSET is nonzero if the instruction should update the base
13904 register. */
13906 static rtx
13907 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13908 HOST_WIDE_INT wback_offset)
13910 int i = 0, j;
13911 rtx result;
13913 if (!multiple_operation_profitable_p (false, count, 0))
13915 rtx seq;
13917 start_sequence ();
13919 for (i = 0; i < count; i++)
13920 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13922 if (wback_offset != 0)
13923 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13925 seq = get_insns ();
13926 end_sequence ();
13928 return seq;
13931 result = gen_rtx_PARALLEL (VOIDmode,
13932 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13933 if (wback_offset != 0)
13935 XVECEXP (result, 0, 0)
13936 = gen_rtx_SET (VOIDmode, basereg,
13937 plus_constant (Pmode, basereg, wback_offset));
13938 i = 1;
13939 count++;
13942 for (j = 0; i < count; i++, j++)
13943 XVECEXP (result, 0, i)
13944 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13946 return result;
13949 /* Generate a store-multiple instruction. COUNT is the number of stores in
13950 the instruction; REGS and MEMS are arrays containing the operands.
13951 BASEREG is the base register to be used in addressing the memory operands.
13952 WBACK_OFFSET is nonzero if the instruction should update the base
13953 register. */
13955 static rtx
13956 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13957 HOST_WIDE_INT wback_offset)
13959 int i = 0, j;
13960 rtx result;
13962 if (GET_CODE (basereg) == PLUS)
13963 basereg = XEXP (basereg, 0);
13965 if (!multiple_operation_profitable_p (false, count, 0))
13967 rtx seq;
13969 start_sequence ();
13971 for (i = 0; i < count; i++)
13972 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13974 if (wback_offset != 0)
13975 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13977 seq = get_insns ();
13978 end_sequence ();
13980 return seq;
13983 result = gen_rtx_PARALLEL (VOIDmode,
13984 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13985 if (wback_offset != 0)
13987 XVECEXP (result, 0, 0)
13988 = gen_rtx_SET (VOIDmode, basereg,
13989 plus_constant (Pmode, basereg, wback_offset));
13990 i = 1;
13991 count++;
13994 for (j = 0; i < count; i++, j++)
13995 XVECEXP (result, 0, i)
13996 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13998 return result;
14001 /* Generate either a load-multiple or a store-multiple instruction. This
14002 function can be used in situations where we can start with a single MEM
14003 rtx and adjust its address upwards.
14004 COUNT is the number of operations in the instruction, not counting a
14005 possible update of the base register. REGS is an array containing the
14006 register operands.
14007 BASEREG is the base register to be used in addressing the memory operands,
14008 which are constructed from BASEMEM.
14009 WRITE_BACK specifies whether the generated instruction should include an
14010 update of the base register.
14011 OFFSETP is used to pass an offset to and from this function; this offset
14012 is not used when constructing the address (instead BASEMEM should have an
14013 appropriate offset in its address), it is used only for setting
14014 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14016 static rtx
14017 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14018 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14020 rtx mems[MAX_LDM_STM_OPS];
14021 HOST_WIDE_INT offset = *offsetp;
14022 int i;
14024 gcc_assert (count <= MAX_LDM_STM_OPS);
14026 if (GET_CODE (basereg) == PLUS)
14027 basereg = XEXP (basereg, 0);
14029 for (i = 0; i < count; i++)
14031 rtx addr = plus_constant (Pmode, basereg, i * 4);
14032 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14033 offset += 4;
14036 if (write_back)
14037 *offsetp = offset;
14039 if (is_load)
14040 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14041 write_back ? 4 * count : 0);
14042 else
14043 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14044 write_back ? 4 * count : 0);
14048 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14049 rtx basemem, HOST_WIDE_INT *offsetp)
14051 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14052 offsetp);
14056 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14057 rtx basemem, HOST_WIDE_INT *offsetp)
14059 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14060 offsetp);
14063 /* Called from a peephole2 expander to turn a sequence of loads into an
14064 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14065 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14066 is true if we can reorder the registers because they are used commutatively
14067 subsequently.
14068 Returns true iff we could generate a new instruction. */
14070 bool
14071 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14073 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14074 rtx mems[MAX_LDM_STM_OPS];
14075 int i, j, base_reg;
14076 rtx base_reg_rtx;
14077 HOST_WIDE_INT offset;
14078 int write_back = FALSE;
14079 int ldm_case;
14080 rtx addr;
14082 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14083 &base_reg, &offset, !sort_regs);
14085 if (ldm_case == 0)
14086 return false;
14088 if (sort_regs)
14089 for (i = 0; i < nops - 1; i++)
14090 for (j = i + 1; j < nops; j++)
14091 if (regs[i] > regs[j])
14093 int t = regs[i];
14094 regs[i] = regs[j];
14095 regs[j] = t;
14097 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14099 if (TARGET_THUMB1)
14101 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14102 gcc_assert (ldm_case == 1 || ldm_case == 5);
14103 write_back = TRUE;
14106 if (ldm_case == 5)
14108 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14109 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14110 offset = 0;
14111 if (!TARGET_THUMB1)
14113 base_reg = regs[0];
14114 base_reg_rtx = newbase;
14118 for (i = 0; i < nops; i++)
14120 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14121 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14122 SImode, addr, 0);
14124 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14125 write_back ? offset + i * 4 : 0));
14126 return true;
14129 /* Called from a peephole2 expander to turn a sequence of stores into an
14130 STM instruction. OPERANDS are the operands found by the peephole matcher;
14131 NOPS indicates how many separate stores we are trying to combine.
14132 Returns true iff we could generate a new instruction. */
14134 bool
14135 gen_stm_seq (rtx *operands, int nops)
14137 int i;
14138 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14139 rtx mems[MAX_LDM_STM_OPS];
14140 int base_reg;
14141 rtx base_reg_rtx;
14142 HOST_WIDE_INT offset;
14143 int write_back = FALSE;
14144 int stm_case;
14145 rtx addr;
14146 bool base_reg_dies;
14148 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14149 mem_order, &base_reg, &offset, true);
14151 if (stm_case == 0)
14152 return false;
14154 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14156 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14157 if (TARGET_THUMB1)
14159 gcc_assert (base_reg_dies);
14160 write_back = TRUE;
14163 if (stm_case == 5)
14165 gcc_assert (base_reg_dies);
14166 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14167 offset = 0;
14170 addr = plus_constant (Pmode, base_reg_rtx, offset);
14172 for (i = 0; i < nops; i++)
14174 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14175 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14176 SImode, addr, 0);
14178 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14179 write_back ? offset + i * 4 : 0));
14180 return true;
14183 /* Called from a peephole2 expander to turn a sequence of stores that are
14184 preceded by constant loads into an STM instruction. OPERANDS are the
14185 operands found by the peephole matcher; NOPS indicates how many
14186 separate stores we are trying to combine; there are 2 * NOPS
14187 instructions in the peephole.
14188 Returns true iff we could generate a new instruction. */
14190 bool
14191 gen_const_stm_seq (rtx *operands, int nops)
14193 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14194 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14195 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14196 rtx mems[MAX_LDM_STM_OPS];
14197 int base_reg;
14198 rtx base_reg_rtx;
14199 HOST_WIDE_INT offset;
14200 int write_back = FALSE;
14201 int stm_case;
14202 rtx addr;
14203 bool base_reg_dies;
14204 int i, j;
14205 HARD_REG_SET allocated;
14207 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14208 mem_order, &base_reg, &offset, false);
14210 if (stm_case == 0)
14211 return false;
14213 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14215 /* If the same register is used more than once, try to find a free
14216 register. */
14217 CLEAR_HARD_REG_SET (allocated);
14218 for (i = 0; i < nops; i++)
14220 for (j = i + 1; j < nops; j++)
14221 if (regs[i] == regs[j])
14223 rtx t = peep2_find_free_register (0, nops * 2,
14224 TARGET_THUMB1 ? "l" : "r",
14225 SImode, &allocated);
14226 if (t == NULL_RTX)
14227 return false;
14228 reg_rtxs[i] = t;
14229 regs[i] = REGNO (t);
14233 /* Compute an ordering that maps the register numbers to an ascending
14234 sequence. */
14235 reg_order[0] = 0;
14236 for (i = 0; i < nops; i++)
14237 if (regs[i] < regs[reg_order[0]])
14238 reg_order[0] = i;
14240 for (i = 1; i < nops; i++)
14242 int this_order = reg_order[i - 1];
14243 for (j = 0; j < nops; j++)
14244 if (regs[j] > regs[reg_order[i - 1]]
14245 && (this_order == reg_order[i - 1]
14246 || regs[j] < regs[this_order]))
14247 this_order = j;
14248 reg_order[i] = this_order;
14251 /* Ensure that registers that must be live after the instruction end
14252 up with the correct value. */
14253 for (i = 0; i < nops; i++)
14255 int this_order = reg_order[i];
14256 if ((this_order != mem_order[i]
14257 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14258 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14259 return false;
14262 /* Load the constants. */
14263 for (i = 0; i < nops; i++)
14265 rtx op = operands[2 * nops + mem_order[i]];
14266 sorted_regs[i] = regs[reg_order[i]];
14267 emit_move_insn (reg_rtxs[reg_order[i]], op);
14270 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14272 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14273 if (TARGET_THUMB1)
14275 gcc_assert (base_reg_dies);
14276 write_back = TRUE;
14279 if (stm_case == 5)
14281 gcc_assert (base_reg_dies);
14282 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14283 offset = 0;
14286 addr = plus_constant (Pmode, base_reg_rtx, offset);
14288 for (i = 0; i < nops; i++)
14290 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14291 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14292 SImode, addr, 0);
14294 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14295 write_back ? offset + i * 4 : 0));
14296 return true;
14299 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14300 unaligned copies on processors which support unaligned semantics for those
14301 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14302 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14303 An interleave factor of 1 (the minimum) will perform no interleaving.
14304 Load/store multiple are used for aligned addresses where possible. */
14306 static void
14307 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14308 HOST_WIDE_INT length,
14309 unsigned int interleave_factor)
14311 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14312 int *regnos = XALLOCAVEC (int, interleave_factor);
14313 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14314 HOST_WIDE_INT i, j;
14315 HOST_WIDE_INT remaining = length, words;
14316 rtx halfword_tmp = NULL, byte_tmp = NULL;
14317 rtx dst, src;
14318 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14319 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14320 HOST_WIDE_INT srcoffset, dstoffset;
14321 HOST_WIDE_INT src_autoinc, dst_autoinc;
14322 rtx mem, addr;
14324 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14326 /* Use hard registers if we have aligned source or destination so we can use
14327 load/store multiple with contiguous registers. */
14328 if (dst_aligned || src_aligned)
14329 for (i = 0; i < interleave_factor; i++)
14330 regs[i] = gen_rtx_REG (SImode, i);
14331 else
14332 for (i = 0; i < interleave_factor; i++)
14333 regs[i] = gen_reg_rtx (SImode);
14335 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14336 src = copy_addr_to_reg (XEXP (srcbase, 0));
14338 srcoffset = dstoffset = 0;
14340 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14341 For copying the last bytes we want to subtract this offset again. */
14342 src_autoinc = dst_autoinc = 0;
14344 for (i = 0; i < interleave_factor; i++)
14345 regnos[i] = i;
14347 /* Copy BLOCK_SIZE_BYTES chunks. */
14349 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14351 /* Load words. */
14352 if (src_aligned && interleave_factor > 1)
14354 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14355 TRUE, srcbase, &srcoffset));
14356 src_autoinc += UNITS_PER_WORD * interleave_factor;
14358 else
14360 for (j = 0; j < interleave_factor; j++)
14362 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14363 - src_autoinc));
14364 mem = adjust_automodify_address (srcbase, SImode, addr,
14365 srcoffset + j * UNITS_PER_WORD);
14366 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14368 srcoffset += block_size_bytes;
14371 /* Store words. */
14372 if (dst_aligned && interleave_factor > 1)
14374 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14375 TRUE, dstbase, &dstoffset));
14376 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14378 else
14380 for (j = 0; j < interleave_factor; j++)
14382 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14383 - dst_autoinc));
14384 mem = adjust_automodify_address (dstbase, SImode, addr,
14385 dstoffset + j * UNITS_PER_WORD);
14386 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14388 dstoffset += block_size_bytes;
14391 remaining -= block_size_bytes;
14394 /* Copy any whole words left (note these aren't interleaved with any
14395 subsequent halfword/byte load/stores in the interests of simplicity). */
14397 words = remaining / UNITS_PER_WORD;
14399 gcc_assert (words < interleave_factor);
14401 if (src_aligned && words > 1)
14403 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14404 &srcoffset));
14405 src_autoinc += UNITS_PER_WORD * words;
14407 else
14409 for (j = 0; j < words; j++)
14411 addr = plus_constant (Pmode, src,
14412 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14413 mem = adjust_automodify_address (srcbase, SImode, addr,
14414 srcoffset + j * UNITS_PER_WORD);
14415 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14417 srcoffset += words * UNITS_PER_WORD;
14420 if (dst_aligned && words > 1)
14422 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14423 &dstoffset));
14424 dst_autoinc += words * UNITS_PER_WORD;
14426 else
14428 for (j = 0; j < words; j++)
14430 addr = plus_constant (Pmode, dst,
14431 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14432 mem = adjust_automodify_address (dstbase, SImode, addr,
14433 dstoffset + j * UNITS_PER_WORD);
14434 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14436 dstoffset += words * UNITS_PER_WORD;
14439 remaining -= words * UNITS_PER_WORD;
14441 gcc_assert (remaining < 4);
14443 /* Copy a halfword if necessary. */
14445 if (remaining >= 2)
14447 halfword_tmp = gen_reg_rtx (SImode);
14449 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14450 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14451 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14453 /* Either write out immediately, or delay until we've loaded the last
14454 byte, depending on interleave factor. */
14455 if (interleave_factor == 1)
14457 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14458 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14459 emit_insn (gen_unaligned_storehi (mem,
14460 gen_lowpart (HImode, halfword_tmp)));
14461 halfword_tmp = NULL;
14462 dstoffset += 2;
14465 remaining -= 2;
14466 srcoffset += 2;
14469 gcc_assert (remaining < 2);
14471 /* Copy last byte. */
14473 if ((remaining & 1) != 0)
14475 byte_tmp = gen_reg_rtx (SImode);
14477 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14478 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14479 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14481 if (interleave_factor == 1)
14483 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14484 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14485 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14486 byte_tmp = NULL;
14487 dstoffset++;
14490 remaining--;
14491 srcoffset++;
14494 /* Store last halfword if we haven't done so already. */
14496 if (halfword_tmp)
14498 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14499 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14500 emit_insn (gen_unaligned_storehi (mem,
14501 gen_lowpart (HImode, halfword_tmp)));
14502 dstoffset += 2;
14505 /* Likewise for last byte. */
14507 if (byte_tmp)
14509 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14510 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14511 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14512 dstoffset++;
14515 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14518 /* From mips_adjust_block_mem:
14520 Helper function for doing a loop-based block operation on memory
14521 reference MEM. Each iteration of the loop will operate on LENGTH
14522 bytes of MEM.
14524 Create a new base register for use within the loop and point it to
14525 the start of MEM. Create a new memory reference that uses this
14526 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14528 static void
14529 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14530 rtx *loop_mem)
14532 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14534 /* Although the new mem does not refer to a known location,
14535 it does keep up to LENGTH bytes of alignment. */
14536 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14537 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14540 /* From mips_block_move_loop:
14542 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14543 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14544 the memory regions do not overlap. */
14546 static void
14547 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14548 unsigned int interleave_factor,
14549 HOST_WIDE_INT bytes_per_iter)
14551 rtx src_reg, dest_reg, final_src, test;
14552 HOST_WIDE_INT leftover;
14554 leftover = length % bytes_per_iter;
14555 length -= leftover;
14557 /* Create registers and memory references for use within the loop. */
14558 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14559 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14561 /* Calculate the value that SRC_REG should have after the last iteration of
14562 the loop. */
14563 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14564 0, 0, OPTAB_WIDEN);
14566 /* Emit the start of the loop. */
14567 rtx_code_label *label = gen_label_rtx ();
14568 emit_label (label);
14570 /* Emit the loop body. */
14571 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14572 interleave_factor);
14574 /* Move on to the next block. */
14575 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14576 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14578 /* Emit the loop condition. */
14579 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14580 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14582 /* Mop up any left-over bytes. */
14583 if (leftover)
14584 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14587 /* Emit a block move when either the source or destination is unaligned (not
14588 aligned to a four-byte boundary). This may need further tuning depending on
14589 core type, optimize_size setting, etc. */
14591 static int
14592 arm_movmemqi_unaligned (rtx *operands)
14594 HOST_WIDE_INT length = INTVAL (operands[2]);
14596 if (optimize_size)
14598 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14599 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14600 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14601 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14602 or dst_aligned though: allow more interleaving in those cases since the
14603 resulting code can be smaller. */
14604 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14605 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14607 if (length > 12)
14608 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14609 interleave_factor, bytes_per_iter);
14610 else
14611 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14612 interleave_factor);
14614 else
14616 /* Note that the loop created by arm_block_move_unaligned_loop may be
14617 subject to loop unrolling, which makes tuning this condition a little
14618 redundant. */
14619 if (length > 32)
14620 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14621 else
14622 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14625 return 1;
14629 arm_gen_movmemqi (rtx *operands)
14631 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14632 HOST_WIDE_INT srcoffset, dstoffset;
14633 int i;
14634 rtx src, dst, srcbase, dstbase;
14635 rtx part_bytes_reg = NULL;
14636 rtx mem;
14638 if (!CONST_INT_P (operands[2])
14639 || !CONST_INT_P (operands[3])
14640 || INTVAL (operands[2]) > 64)
14641 return 0;
14643 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14644 return arm_movmemqi_unaligned (operands);
14646 if (INTVAL (operands[3]) & 3)
14647 return 0;
14649 dstbase = operands[0];
14650 srcbase = operands[1];
14652 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14653 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14655 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14656 out_words_to_go = INTVAL (operands[2]) / 4;
14657 last_bytes = INTVAL (operands[2]) & 3;
14658 dstoffset = srcoffset = 0;
14660 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14661 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14663 for (i = 0; in_words_to_go >= 2; i+=4)
14665 if (in_words_to_go > 4)
14666 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14667 TRUE, srcbase, &srcoffset));
14668 else
14669 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14670 src, FALSE, srcbase,
14671 &srcoffset));
14673 if (out_words_to_go)
14675 if (out_words_to_go > 4)
14676 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14677 TRUE, dstbase, &dstoffset));
14678 else if (out_words_to_go != 1)
14679 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14680 out_words_to_go, dst,
14681 (last_bytes == 0
14682 ? FALSE : TRUE),
14683 dstbase, &dstoffset));
14684 else
14686 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14687 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14688 if (last_bytes != 0)
14690 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14691 dstoffset += 4;
14696 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14697 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14700 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14701 if (out_words_to_go)
14703 rtx sreg;
14705 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14706 sreg = copy_to_reg (mem);
14708 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14709 emit_move_insn (mem, sreg);
14710 in_words_to_go--;
14712 gcc_assert (!in_words_to_go); /* Sanity check */
14715 if (in_words_to_go)
14717 gcc_assert (in_words_to_go > 0);
14719 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14720 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14723 gcc_assert (!last_bytes || part_bytes_reg);
14725 if (BYTES_BIG_ENDIAN && last_bytes)
14727 rtx tmp = gen_reg_rtx (SImode);
14729 /* The bytes we want are in the top end of the word. */
14730 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14731 GEN_INT (8 * (4 - last_bytes))));
14732 part_bytes_reg = tmp;
14734 while (last_bytes)
14736 mem = adjust_automodify_address (dstbase, QImode,
14737 plus_constant (Pmode, dst,
14738 last_bytes - 1),
14739 dstoffset + last_bytes - 1);
14740 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14742 if (--last_bytes)
14744 tmp = gen_reg_rtx (SImode);
14745 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14746 part_bytes_reg = tmp;
14751 else
14753 if (last_bytes > 1)
14755 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14756 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14757 last_bytes -= 2;
14758 if (last_bytes)
14760 rtx tmp = gen_reg_rtx (SImode);
14761 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14762 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14763 part_bytes_reg = tmp;
14764 dstoffset += 2;
14768 if (last_bytes)
14770 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14771 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14775 return 1;
14778 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14779 by mode size. */
14780 inline static rtx
14781 next_consecutive_mem (rtx mem)
14783 machine_mode mode = GET_MODE (mem);
14784 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14785 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14787 return adjust_automodify_address (mem, mode, addr, offset);
14790 /* Copy using LDRD/STRD instructions whenever possible.
14791 Returns true upon success. */
14792 bool
14793 gen_movmem_ldrd_strd (rtx *operands)
14795 unsigned HOST_WIDE_INT len;
14796 HOST_WIDE_INT align;
14797 rtx src, dst, base;
14798 rtx reg0;
14799 bool src_aligned, dst_aligned;
14800 bool src_volatile, dst_volatile;
14802 gcc_assert (CONST_INT_P (operands[2]));
14803 gcc_assert (CONST_INT_P (operands[3]));
14805 len = UINTVAL (operands[2]);
14806 if (len > 64)
14807 return false;
14809 /* Maximum alignment we can assume for both src and dst buffers. */
14810 align = INTVAL (operands[3]);
14812 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14813 return false;
14815 /* Place src and dst addresses in registers
14816 and update the corresponding mem rtx. */
14817 dst = operands[0];
14818 dst_volatile = MEM_VOLATILE_P (dst);
14819 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14820 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14821 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14823 src = operands[1];
14824 src_volatile = MEM_VOLATILE_P (src);
14825 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14826 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14827 src = adjust_automodify_address (src, VOIDmode, base, 0);
14829 if (!unaligned_access && !(src_aligned && dst_aligned))
14830 return false;
14832 if (src_volatile || dst_volatile)
14833 return false;
14835 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14836 if (!(dst_aligned || src_aligned))
14837 return arm_gen_movmemqi (operands);
14839 src = adjust_address (src, DImode, 0);
14840 dst = adjust_address (dst, DImode, 0);
14841 while (len >= 8)
14843 len -= 8;
14844 reg0 = gen_reg_rtx (DImode);
14845 if (src_aligned)
14846 emit_move_insn (reg0, src);
14847 else
14848 emit_insn (gen_unaligned_loaddi (reg0, src));
14850 if (dst_aligned)
14851 emit_move_insn (dst, reg0);
14852 else
14853 emit_insn (gen_unaligned_storedi (dst, reg0));
14855 src = next_consecutive_mem (src);
14856 dst = next_consecutive_mem (dst);
14859 gcc_assert (len < 8);
14860 if (len >= 4)
14862 /* More than a word but less than a double-word to copy. Copy a word. */
14863 reg0 = gen_reg_rtx (SImode);
14864 src = adjust_address (src, SImode, 0);
14865 dst = adjust_address (dst, SImode, 0);
14866 if (src_aligned)
14867 emit_move_insn (reg0, src);
14868 else
14869 emit_insn (gen_unaligned_loadsi (reg0, src));
14871 if (dst_aligned)
14872 emit_move_insn (dst, reg0);
14873 else
14874 emit_insn (gen_unaligned_storesi (dst, reg0));
14876 src = next_consecutive_mem (src);
14877 dst = next_consecutive_mem (dst);
14878 len -= 4;
14881 if (len == 0)
14882 return true;
14884 /* Copy the remaining bytes. */
14885 if (len >= 2)
14887 dst = adjust_address (dst, HImode, 0);
14888 src = adjust_address (src, HImode, 0);
14889 reg0 = gen_reg_rtx (SImode);
14890 if (src_aligned)
14891 emit_insn (gen_zero_extendhisi2 (reg0, src));
14892 else
14893 emit_insn (gen_unaligned_loadhiu (reg0, src));
14895 if (dst_aligned)
14896 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14897 else
14898 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14900 src = next_consecutive_mem (src);
14901 dst = next_consecutive_mem (dst);
14902 if (len == 2)
14903 return true;
14906 dst = adjust_address (dst, QImode, 0);
14907 src = adjust_address (src, QImode, 0);
14908 reg0 = gen_reg_rtx (QImode);
14909 emit_move_insn (reg0, src);
14910 emit_move_insn (dst, reg0);
14911 return true;
14914 /* Select a dominance comparison mode if possible for a test of the general
14915 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14916 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14917 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14918 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14919 In all cases OP will be either EQ or NE, but we don't need to know which
14920 here. If we are unable to support a dominance comparison we return
14921 CC mode. This will then fail to match for the RTL expressions that
14922 generate this call. */
14923 machine_mode
14924 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14926 enum rtx_code cond1, cond2;
14927 int swapped = 0;
14929 /* Currently we will probably get the wrong result if the individual
14930 comparisons are not simple. This also ensures that it is safe to
14931 reverse a comparison if necessary. */
14932 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14933 != CCmode)
14934 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14935 != CCmode))
14936 return CCmode;
14938 /* The if_then_else variant of this tests the second condition if the
14939 first passes, but is true if the first fails. Reverse the first
14940 condition to get a true "inclusive-or" expression. */
14941 if (cond_or == DOM_CC_NX_OR_Y)
14942 cond1 = reverse_condition (cond1);
14944 /* If the comparisons are not equal, and one doesn't dominate the other,
14945 then we can't do this. */
14946 if (cond1 != cond2
14947 && !comparison_dominates_p (cond1, cond2)
14948 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14949 return CCmode;
14951 if (swapped)
14953 enum rtx_code temp = cond1;
14954 cond1 = cond2;
14955 cond2 = temp;
14958 switch (cond1)
14960 case EQ:
14961 if (cond_or == DOM_CC_X_AND_Y)
14962 return CC_DEQmode;
14964 switch (cond2)
14966 case EQ: return CC_DEQmode;
14967 case LE: return CC_DLEmode;
14968 case LEU: return CC_DLEUmode;
14969 case GE: return CC_DGEmode;
14970 case GEU: return CC_DGEUmode;
14971 default: gcc_unreachable ();
14974 case LT:
14975 if (cond_or == DOM_CC_X_AND_Y)
14976 return CC_DLTmode;
14978 switch (cond2)
14980 case LT:
14981 return CC_DLTmode;
14982 case LE:
14983 return CC_DLEmode;
14984 case NE:
14985 return CC_DNEmode;
14986 default:
14987 gcc_unreachable ();
14990 case GT:
14991 if (cond_or == DOM_CC_X_AND_Y)
14992 return CC_DGTmode;
14994 switch (cond2)
14996 case GT:
14997 return CC_DGTmode;
14998 case GE:
14999 return CC_DGEmode;
15000 case NE:
15001 return CC_DNEmode;
15002 default:
15003 gcc_unreachable ();
15006 case LTU:
15007 if (cond_or == DOM_CC_X_AND_Y)
15008 return CC_DLTUmode;
15010 switch (cond2)
15012 case LTU:
15013 return CC_DLTUmode;
15014 case LEU:
15015 return CC_DLEUmode;
15016 case NE:
15017 return CC_DNEmode;
15018 default:
15019 gcc_unreachable ();
15022 case GTU:
15023 if (cond_or == DOM_CC_X_AND_Y)
15024 return CC_DGTUmode;
15026 switch (cond2)
15028 case GTU:
15029 return CC_DGTUmode;
15030 case GEU:
15031 return CC_DGEUmode;
15032 case NE:
15033 return CC_DNEmode;
15034 default:
15035 gcc_unreachable ();
15038 /* The remaining cases only occur when both comparisons are the
15039 same. */
15040 case NE:
15041 gcc_assert (cond1 == cond2);
15042 return CC_DNEmode;
15044 case LE:
15045 gcc_assert (cond1 == cond2);
15046 return CC_DLEmode;
15048 case GE:
15049 gcc_assert (cond1 == cond2);
15050 return CC_DGEmode;
15052 case LEU:
15053 gcc_assert (cond1 == cond2);
15054 return CC_DLEUmode;
15056 case GEU:
15057 gcc_assert (cond1 == cond2);
15058 return CC_DGEUmode;
15060 default:
15061 gcc_unreachable ();
15065 machine_mode
15066 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15068 /* All floating point compares return CCFP if it is an equality
15069 comparison, and CCFPE otherwise. */
15070 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15072 switch (op)
15074 case EQ:
15075 case NE:
15076 case UNORDERED:
15077 case ORDERED:
15078 case UNLT:
15079 case UNLE:
15080 case UNGT:
15081 case UNGE:
15082 case UNEQ:
15083 case LTGT:
15084 return CCFPmode;
15086 case LT:
15087 case LE:
15088 case GT:
15089 case GE:
15090 return CCFPEmode;
15092 default:
15093 gcc_unreachable ();
15097 /* A compare with a shifted operand. Because of canonicalization, the
15098 comparison will have to be swapped when we emit the assembler. */
15099 if (GET_MODE (y) == SImode
15100 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15101 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15102 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15103 || GET_CODE (x) == ROTATERT))
15104 return CC_SWPmode;
15106 /* This operation is performed swapped, but since we only rely on the Z
15107 flag we don't need an additional mode. */
15108 if (GET_MODE (y) == SImode
15109 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15110 && GET_CODE (x) == NEG
15111 && (op == EQ || op == NE))
15112 return CC_Zmode;
15114 /* This is a special case that is used by combine to allow a
15115 comparison of a shifted byte load to be split into a zero-extend
15116 followed by a comparison of the shifted integer (only valid for
15117 equalities and unsigned inequalities). */
15118 if (GET_MODE (x) == SImode
15119 && GET_CODE (x) == ASHIFT
15120 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15121 && GET_CODE (XEXP (x, 0)) == SUBREG
15122 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15123 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15124 && (op == EQ || op == NE
15125 || op == GEU || op == GTU || op == LTU || op == LEU)
15126 && CONST_INT_P (y))
15127 return CC_Zmode;
15129 /* A construct for a conditional compare, if the false arm contains
15130 0, then both conditions must be true, otherwise either condition
15131 must be true. Not all conditions are possible, so CCmode is
15132 returned if it can't be done. */
15133 if (GET_CODE (x) == IF_THEN_ELSE
15134 && (XEXP (x, 2) == const0_rtx
15135 || XEXP (x, 2) == const1_rtx)
15136 && COMPARISON_P (XEXP (x, 0))
15137 && COMPARISON_P (XEXP (x, 1)))
15138 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15139 INTVAL (XEXP (x, 2)));
15141 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15142 if (GET_CODE (x) == AND
15143 && (op == EQ || op == NE)
15144 && COMPARISON_P (XEXP (x, 0))
15145 && COMPARISON_P (XEXP (x, 1)))
15146 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15147 DOM_CC_X_AND_Y);
15149 if (GET_CODE (x) == IOR
15150 && (op == EQ || op == NE)
15151 && COMPARISON_P (XEXP (x, 0))
15152 && COMPARISON_P (XEXP (x, 1)))
15153 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15154 DOM_CC_X_OR_Y);
15156 /* An operation (on Thumb) where we want to test for a single bit.
15157 This is done by shifting that bit up into the top bit of a
15158 scratch register; we can then branch on the sign bit. */
15159 if (TARGET_THUMB1
15160 && GET_MODE (x) == SImode
15161 && (op == EQ || op == NE)
15162 && GET_CODE (x) == ZERO_EXTRACT
15163 && XEXP (x, 1) == const1_rtx)
15164 return CC_Nmode;
15166 /* An operation that sets the condition codes as a side-effect, the
15167 V flag is not set correctly, so we can only use comparisons where
15168 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15169 instead.) */
15170 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15171 if (GET_MODE (x) == SImode
15172 && y == const0_rtx
15173 && (op == EQ || op == NE || op == LT || op == GE)
15174 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15175 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15176 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15177 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15178 || GET_CODE (x) == LSHIFTRT
15179 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15180 || GET_CODE (x) == ROTATERT
15181 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15182 return CC_NOOVmode;
15184 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15185 return CC_Zmode;
15187 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15188 && GET_CODE (x) == PLUS
15189 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15190 return CC_Cmode;
15192 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15194 switch (op)
15196 case EQ:
15197 case NE:
15198 /* A DImode comparison against zero can be implemented by
15199 or'ing the two halves together. */
15200 if (y == const0_rtx)
15201 return CC_Zmode;
15203 /* We can do an equality test in three Thumb instructions. */
15204 if (!TARGET_32BIT)
15205 return CC_Zmode;
15207 /* FALLTHROUGH */
15209 case LTU:
15210 case LEU:
15211 case GTU:
15212 case GEU:
15213 /* DImode unsigned comparisons can be implemented by cmp +
15214 cmpeq without a scratch register. Not worth doing in
15215 Thumb-2. */
15216 if (TARGET_32BIT)
15217 return CC_CZmode;
15219 /* FALLTHROUGH */
15221 case LT:
15222 case LE:
15223 case GT:
15224 case GE:
15225 /* DImode signed and unsigned comparisons can be implemented
15226 by cmp + sbcs with a scratch register, but that does not
15227 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15228 gcc_assert (op != EQ && op != NE);
15229 return CC_NCVmode;
15231 default:
15232 gcc_unreachable ();
15236 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15237 return GET_MODE (x);
15239 return CCmode;
15242 /* X and Y are two things to compare using CODE. Emit the compare insn and
15243 return the rtx for register 0 in the proper mode. FP means this is a
15244 floating point compare: I don't think that it is needed on the arm. */
15246 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15248 machine_mode mode;
15249 rtx cc_reg;
15250 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15252 /* We might have X as a constant, Y as a register because of the predicates
15253 used for cmpdi. If so, force X to a register here. */
15254 if (dimode_comparison && !REG_P (x))
15255 x = force_reg (DImode, x);
15257 mode = SELECT_CC_MODE (code, x, y);
15258 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15260 if (dimode_comparison
15261 && mode != CC_CZmode)
15263 rtx clobber, set;
15265 /* To compare two non-zero values for equality, XOR them and
15266 then compare against zero. Not used for ARM mode; there
15267 CC_CZmode is cheaper. */
15268 if (mode == CC_Zmode && y != const0_rtx)
15270 gcc_assert (!reload_completed);
15271 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15272 y = const0_rtx;
15275 /* A scratch register is required. */
15276 if (reload_completed)
15277 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15278 else
15279 scratch = gen_rtx_SCRATCH (SImode);
15281 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15282 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15283 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15285 else
15286 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15288 return cc_reg;
15291 /* Generate a sequence of insns that will generate the correct return
15292 address mask depending on the physical architecture that the program
15293 is running on. */
15295 arm_gen_return_addr_mask (void)
15297 rtx reg = gen_reg_rtx (Pmode);
15299 emit_insn (gen_return_addr_mask (reg));
15300 return reg;
15303 void
15304 arm_reload_in_hi (rtx *operands)
15306 rtx ref = operands[1];
15307 rtx base, scratch;
15308 HOST_WIDE_INT offset = 0;
15310 if (GET_CODE (ref) == SUBREG)
15312 offset = SUBREG_BYTE (ref);
15313 ref = SUBREG_REG (ref);
15316 if (REG_P (ref))
15318 /* We have a pseudo which has been spilt onto the stack; there
15319 are two cases here: the first where there is a simple
15320 stack-slot replacement and a second where the stack-slot is
15321 out of range, or is used as a subreg. */
15322 if (reg_equiv_mem (REGNO (ref)))
15324 ref = reg_equiv_mem (REGNO (ref));
15325 base = find_replacement (&XEXP (ref, 0));
15327 else
15328 /* The slot is out of range, or was dressed up in a SUBREG. */
15329 base = reg_equiv_address (REGNO (ref));
15331 else
15332 base = find_replacement (&XEXP (ref, 0));
15334 /* Handle the case where the address is too complex to be offset by 1. */
15335 if (GET_CODE (base) == MINUS
15336 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15338 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15340 emit_set_insn (base_plus, base);
15341 base = base_plus;
15343 else if (GET_CODE (base) == PLUS)
15345 /* The addend must be CONST_INT, or we would have dealt with it above. */
15346 HOST_WIDE_INT hi, lo;
15348 offset += INTVAL (XEXP (base, 1));
15349 base = XEXP (base, 0);
15351 /* Rework the address into a legal sequence of insns. */
15352 /* Valid range for lo is -4095 -> 4095 */
15353 lo = (offset >= 0
15354 ? (offset & 0xfff)
15355 : -((-offset) & 0xfff));
15357 /* Corner case, if lo is the max offset then we would be out of range
15358 once we have added the additional 1 below, so bump the msb into the
15359 pre-loading insn(s). */
15360 if (lo == 4095)
15361 lo &= 0x7ff;
15363 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15364 ^ (HOST_WIDE_INT) 0x80000000)
15365 - (HOST_WIDE_INT) 0x80000000);
15367 gcc_assert (hi + lo == offset);
15369 if (hi != 0)
15371 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15373 /* Get the base address; addsi3 knows how to handle constants
15374 that require more than one insn. */
15375 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15376 base = base_plus;
15377 offset = lo;
15381 /* Operands[2] may overlap operands[0] (though it won't overlap
15382 operands[1]), that's why we asked for a DImode reg -- so we can
15383 use the bit that does not overlap. */
15384 if (REGNO (operands[2]) == REGNO (operands[0]))
15385 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15386 else
15387 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15389 emit_insn (gen_zero_extendqisi2 (scratch,
15390 gen_rtx_MEM (QImode,
15391 plus_constant (Pmode, base,
15392 offset))));
15393 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15394 gen_rtx_MEM (QImode,
15395 plus_constant (Pmode, base,
15396 offset + 1))));
15397 if (!BYTES_BIG_ENDIAN)
15398 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15399 gen_rtx_IOR (SImode,
15400 gen_rtx_ASHIFT
15401 (SImode,
15402 gen_rtx_SUBREG (SImode, operands[0], 0),
15403 GEN_INT (8)),
15404 scratch));
15405 else
15406 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15407 gen_rtx_IOR (SImode,
15408 gen_rtx_ASHIFT (SImode, scratch,
15409 GEN_INT (8)),
15410 gen_rtx_SUBREG (SImode, operands[0], 0)));
15413 /* Handle storing a half-word to memory during reload by synthesizing as two
15414 byte stores. Take care not to clobber the input values until after we
15415 have moved them somewhere safe. This code assumes that if the DImode
15416 scratch in operands[2] overlaps either the input value or output address
15417 in some way, then that value must die in this insn (we absolutely need
15418 two scratch registers for some corner cases). */
15419 void
15420 arm_reload_out_hi (rtx *operands)
15422 rtx ref = operands[0];
15423 rtx outval = operands[1];
15424 rtx base, scratch;
15425 HOST_WIDE_INT offset = 0;
15427 if (GET_CODE (ref) == SUBREG)
15429 offset = SUBREG_BYTE (ref);
15430 ref = SUBREG_REG (ref);
15433 if (REG_P (ref))
15435 /* We have a pseudo which has been spilt onto the stack; there
15436 are two cases here: the first where there is a simple
15437 stack-slot replacement and a second where the stack-slot is
15438 out of range, or is used as a subreg. */
15439 if (reg_equiv_mem (REGNO (ref)))
15441 ref = reg_equiv_mem (REGNO (ref));
15442 base = find_replacement (&XEXP (ref, 0));
15444 else
15445 /* The slot is out of range, or was dressed up in a SUBREG. */
15446 base = reg_equiv_address (REGNO (ref));
15448 else
15449 base = find_replacement (&XEXP (ref, 0));
15451 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15453 /* Handle the case where the address is too complex to be offset by 1. */
15454 if (GET_CODE (base) == MINUS
15455 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15457 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15459 /* Be careful not to destroy OUTVAL. */
15460 if (reg_overlap_mentioned_p (base_plus, outval))
15462 /* Updating base_plus might destroy outval, see if we can
15463 swap the scratch and base_plus. */
15464 if (!reg_overlap_mentioned_p (scratch, outval))
15466 rtx tmp = scratch;
15467 scratch = base_plus;
15468 base_plus = tmp;
15470 else
15472 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15474 /* Be conservative and copy OUTVAL into the scratch now,
15475 this should only be necessary if outval is a subreg
15476 of something larger than a word. */
15477 /* XXX Might this clobber base? I can't see how it can,
15478 since scratch is known to overlap with OUTVAL, and
15479 must be wider than a word. */
15480 emit_insn (gen_movhi (scratch_hi, outval));
15481 outval = scratch_hi;
15485 emit_set_insn (base_plus, base);
15486 base = base_plus;
15488 else if (GET_CODE (base) == PLUS)
15490 /* The addend must be CONST_INT, or we would have dealt with it above. */
15491 HOST_WIDE_INT hi, lo;
15493 offset += INTVAL (XEXP (base, 1));
15494 base = XEXP (base, 0);
15496 /* Rework the address into a legal sequence of insns. */
15497 /* Valid range for lo is -4095 -> 4095 */
15498 lo = (offset >= 0
15499 ? (offset & 0xfff)
15500 : -((-offset) & 0xfff));
15502 /* Corner case, if lo is the max offset then we would be out of range
15503 once we have added the additional 1 below, so bump the msb into the
15504 pre-loading insn(s). */
15505 if (lo == 4095)
15506 lo &= 0x7ff;
15508 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15509 ^ (HOST_WIDE_INT) 0x80000000)
15510 - (HOST_WIDE_INT) 0x80000000);
15512 gcc_assert (hi + lo == offset);
15514 if (hi != 0)
15516 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15518 /* Be careful not to destroy OUTVAL. */
15519 if (reg_overlap_mentioned_p (base_plus, outval))
15521 /* Updating base_plus might destroy outval, see if we
15522 can swap the scratch and base_plus. */
15523 if (!reg_overlap_mentioned_p (scratch, outval))
15525 rtx tmp = scratch;
15526 scratch = base_plus;
15527 base_plus = tmp;
15529 else
15531 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15533 /* Be conservative and copy outval into scratch now,
15534 this should only be necessary if outval is a
15535 subreg of something larger than a word. */
15536 /* XXX Might this clobber base? I can't see how it
15537 can, since scratch is known to overlap with
15538 outval. */
15539 emit_insn (gen_movhi (scratch_hi, outval));
15540 outval = scratch_hi;
15544 /* Get the base address; addsi3 knows how to handle constants
15545 that require more than one insn. */
15546 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15547 base = base_plus;
15548 offset = lo;
15552 if (BYTES_BIG_ENDIAN)
15554 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15555 plus_constant (Pmode, base,
15556 offset + 1)),
15557 gen_lowpart (QImode, outval)));
15558 emit_insn (gen_lshrsi3 (scratch,
15559 gen_rtx_SUBREG (SImode, outval, 0),
15560 GEN_INT (8)));
15561 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15562 offset)),
15563 gen_lowpart (QImode, scratch)));
15565 else
15567 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15568 offset)),
15569 gen_lowpart (QImode, outval)));
15570 emit_insn (gen_lshrsi3 (scratch,
15571 gen_rtx_SUBREG (SImode, outval, 0),
15572 GEN_INT (8)));
15573 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15574 plus_constant (Pmode, base,
15575 offset + 1)),
15576 gen_lowpart (QImode, scratch)));
15580 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15581 (padded to the size of a word) should be passed in a register. */
15583 static bool
15584 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15586 if (TARGET_AAPCS_BASED)
15587 return must_pass_in_stack_var_size (mode, type);
15588 else
15589 return must_pass_in_stack_var_size_or_pad (mode, type);
15593 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15594 Return true if an argument passed on the stack should be padded upwards,
15595 i.e. if the least-significant byte has useful data.
15596 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15597 aggregate types are placed in the lowest memory address. */
15599 bool
15600 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15602 if (!TARGET_AAPCS_BASED)
15603 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15605 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15606 return false;
15608 return true;
15612 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15613 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15614 register has useful data, and return the opposite if the most
15615 significant byte does. */
15617 bool
15618 arm_pad_reg_upward (machine_mode mode,
15619 tree type, int first ATTRIBUTE_UNUSED)
15621 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15623 /* For AAPCS, small aggregates, small fixed-point types,
15624 and small complex types are always padded upwards. */
15625 if (type)
15627 if ((AGGREGATE_TYPE_P (type)
15628 || TREE_CODE (type) == COMPLEX_TYPE
15629 || FIXED_POINT_TYPE_P (type))
15630 && int_size_in_bytes (type) <= 4)
15631 return true;
15633 else
15635 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15636 && GET_MODE_SIZE (mode) <= 4)
15637 return true;
15641 /* Otherwise, use default padding. */
15642 return !BYTES_BIG_ENDIAN;
15645 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15646 assuming that the address in the base register is word aligned. */
15647 bool
15648 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15650 HOST_WIDE_INT max_offset;
15652 /* Offset must be a multiple of 4 in Thumb mode. */
15653 if (TARGET_THUMB2 && ((offset & 3) != 0))
15654 return false;
15656 if (TARGET_THUMB2)
15657 max_offset = 1020;
15658 else if (TARGET_ARM)
15659 max_offset = 255;
15660 else
15661 return false;
15663 return ((offset <= max_offset) && (offset >= -max_offset));
15666 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15667 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15668 Assumes that the address in the base register RN is word aligned. Pattern
15669 guarantees that both memory accesses use the same base register,
15670 the offsets are constants within the range, and the gap between the offsets is 4.
15671 If preload complete then check that registers are legal. WBACK indicates whether
15672 address is updated. LOAD indicates whether memory access is load or store. */
15673 bool
15674 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15675 bool wback, bool load)
15677 unsigned int t, t2, n;
15679 if (!reload_completed)
15680 return true;
15682 if (!offset_ok_for_ldrd_strd (offset))
15683 return false;
15685 t = REGNO (rt);
15686 t2 = REGNO (rt2);
15687 n = REGNO (rn);
15689 if ((TARGET_THUMB2)
15690 && ((wback && (n == t || n == t2))
15691 || (t == SP_REGNUM)
15692 || (t == PC_REGNUM)
15693 || (t2 == SP_REGNUM)
15694 || (t2 == PC_REGNUM)
15695 || (!load && (n == PC_REGNUM))
15696 || (load && (t == t2))
15697 /* Triggers Cortex-M3 LDRD errata. */
15698 || (!wback && load && fix_cm3_ldrd && (n == t))))
15699 return false;
15701 if ((TARGET_ARM)
15702 && ((wback && (n == t || n == t2))
15703 || (t2 == PC_REGNUM)
15704 || (t % 2 != 0) /* First destination register is not even. */
15705 || (t2 != t + 1)
15706 /* PC can be used as base register (for offset addressing only),
15707 but it is depricated. */
15708 || (n == PC_REGNUM)))
15709 return false;
15711 return true;
15714 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15715 operand MEM's address contains an immediate offset from the base
15716 register and has no side effects, in which case it sets BASE and
15717 OFFSET accordingly. */
15718 static bool
15719 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15721 rtx addr;
15723 gcc_assert (base != NULL && offset != NULL);
15725 /* TODO: Handle more general memory operand patterns, such as
15726 PRE_DEC and PRE_INC. */
15728 if (side_effects_p (mem))
15729 return false;
15731 /* Can't deal with subregs. */
15732 if (GET_CODE (mem) == SUBREG)
15733 return false;
15735 gcc_assert (MEM_P (mem));
15737 *offset = const0_rtx;
15739 addr = XEXP (mem, 0);
15741 /* If addr isn't valid for DImode, then we can't handle it. */
15742 if (!arm_legitimate_address_p (DImode, addr,
15743 reload_in_progress || reload_completed))
15744 return false;
15746 if (REG_P (addr))
15748 *base = addr;
15749 return true;
15751 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15753 *base = XEXP (addr, 0);
15754 *offset = XEXP (addr, 1);
15755 return (REG_P (*base) && CONST_INT_P (*offset));
15758 return false;
15761 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15763 /* Called from a peephole2 to replace two word-size accesses with a
15764 single LDRD/STRD instruction. Returns true iff we can generate a
15765 new instruction sequence. That is, both accesses use the same base
15766 register and the gap between constant offsets is 4. This function
15767 may reorder its operands to match ldrd/strd RTL templates.
15768 OPERANDS are the operands found by the peephole matcher;
15769 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15770 corresponding memory operands. LOAD indicaates whether the access
15771 is load or store. CONST_STORE indicates a store of constant
15772 integer values held in OPERANDS[4,5] and assumes that the pattern
15773 is of length 4 insn, for the purpose of checking dead registers.
15774 COMMUTE indicates that register operands may be reordered. */
15775 bool
15776 gen_operands_ldrd_strd (rtx *operands, bool load,
15777 bool const_store, bool commute)
15779 int nops = 2;
15780 HOST_WIDE_INT offsets[2], offset;
15781 rtx base = NULL_RTX;
15782 rtx cur_base, cur_offset, tmp;
15783 int i, gap;
15784 HARD_REG_SET regset;
15786 gcc_assert (!const_store || !load);
15787 /* Check that the memory references are immediate offsets from the
15788 same base register. Extract the base register, the destination
15789 registers, and the corresponding memory offsets. */
15790 for (i = 0; i < nops; i++)
15792 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15793 return false;
15795 if (i == 0)
15796 base = cur_base;
15797 else if (REGNO (base) != REGNO (cur_base))
15798 return false;
15800 offsets[i] = INTVAL (cur_offset);
15801 if (GET_CODE (operands[i]) == SUBREG)
15803 tmp = SUBREG_REG (operands[i]);
15804 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15805 operands[i] = tmp;
15809 /* Make sure there is no dependency between the individual loads. */
15810 if (load && REGNO (operands[0]) == REGNO (base))
15811 return false; /* RAW */
15813 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15814 return false; /* WAW */
15816 /* If the same input register is used in both stores
15817 when storing different constants, try to find a free register.
15818 For example, the code
15819 mov r0, 0
15820 str r0, [r2]
15821 mov r0, 1
15822 str r0, [r2, #4]
15823 can be transformed into
15824 mov r1, 0
15825 strd r1, r0, [r2]
15826 in Thumb mode assuming that r1 is free. */
15827 if (const_store
15828 && REGNO (operands[0]) == REGNO (operands[1])
15829 && INTVAL (operands[4]) != INTVAL (operands[5]))
15831 if (TARGET_THUMB2)
15833 CLEAR_HARD_REG_SET (regset);
15834 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15835 if (tmp == NULL_RTX)
15836 return false;
15838 /* Use the new register in the first load to ensure that
15839 if the original input register is not dead after peephole,
15840 then it will have the correct constant value. */
15841 operands[0] = tmp;
15843 else if (TARGET_ARM)
15845 return false;
15846 int regno = REGNO (operands[0]);
15847 if (!peep2_reg_dead_p (4, operands[0]))
15849 /* When the input register is even and is not dead after the
15850 pattern, it has to hold the second constant but we cannot
15851 form a legal STRD in ARM mode with this register as the second
15852 register. */
15853 if (regno % 2 == 0)
15854 return false;
15856 /* Is regno-1 free? */
15857 SET_HARD_REG_SET (regset);
15858 CLEAR_HARD_REG_BIT(regset, regno - 1);
15859 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15860 if (tmp == NULL_RTX)
15861 return false;
15863 operands[0] = tmp;
15865 else
15867 /* Find a DImode register. */
15868 CLEAR_HARD_REG_SET (regset);
15869 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15870 if (tmp != NULL_RTX)
15872 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15873 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15875 else
15877 /* Can we use the input register to form a DI register? */
15878 SET_HARD_REG_SET (regset);
15879 CLEAR_HARD_REG_BIT(regset,
15880 regno % 2 == 0 ? regno + 1 : regno - 1);
15881 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15882 if (tmp == NULL_RTX)
15883 return false;
15884 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15888 gcc_assert (operands[0] != NULL_RTX);
15889 gcc_assert (operands[1] != NULL_RTX);
15890 gcc_assert (REGNO (operands[0]) % 2 == 0);
15891 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15895 /* Make sure the instructions are ordered with lower memory access first. */
15896 if (offsets[0] > offsets[1])
15898 gap = offsets[0] - offsets[1];
15899 offset = offsets[1];
15901 /* Swap the instructions such that lower memory is accessed first. */
15902 SWAP_RTX (operands[0], operands[1]);
15903 SWAP_RTX (operands[2], operands[3]);
15904 if (const_store)
15905 SWAP_RTX (operands[4], operands[5]);
15907 else
15909 gap = offsets[1] - offsets[0];
15910 offset = offsets[0];
15913 /* Make sure accesses are to consecutive memory locations. */
15914 if (gap != 4)
15915 return false;
15917 /* Make sure we generate legal instructions. */
15918 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15919 false, load))
15920 return true;
15922 /* In Thumb state, where registers are almost unconstrained, there
15923 is little hope to fix it. */
15924 if (TARGET_THUMB2)
15925 return false;
15927 if (load && commute)
15929 /* Try reordering registers. */
15930 SWAP_RTX (operands[0], operands[1]);
15931 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15932 false, load))
15933 return true;
15936 if (const_store)
15938 /* If input registers are dead after this pattern, they can be
15939 reordered or replaced by other registers that are free in the
15940 current pattern. */
15941 if (!peep2_reg_dead_p (4, operands[0])
15942 || !peep2_reg_dead_p (4, operands[1]))
15943 return false;
15945 /* Try to reorder the input registers. */
15946 /* For example, the code
15947 mov r0, 0
15948 mov r1, 1
15949 str r1, [r2]
15950 str r0, [r2, #4]
15951 can be transformed into
15952 mov r1, 0
15953 mov r0, 1
15954 strd r0, [r2]
15956 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15957 false, false))
15959 SWAP_RTX (operands[0], operands[1]);
15960 return true;
15963 /* Try to find a free DI register. */
15964 CLEAR_HARD_REG_SET (regset);
15965 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15966 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15967 while (true)
15969 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15970 if (tmp == NULL_RTX)
15971 return false;
15973 /* DREG must be an even-numbered register in DImode.
15974 Split it into SI registers. */
15975 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15976 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15977 gcc_assert (operands[0] != NULL_RTX);
15978 gcc_assert (operands[1] != NULL_RTX);
15979 gcc_assert (REGNO (operands[0]) % 2 == 0);
15980 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15982 return (operands_ok_ldrd_strd (operands[0], operands[1],
15983 base, offset,
15984 false, load));
15988 return false;
15990 #undef SWAP_RTX
15995 /* Print a symbolic form of X to the debug file, F. */
15996 static void
15997 arm_print_value (FILE *f, rtx x)
15999 switch (GET_CODE (x))
16001 case CONST_INT:
16002 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16003 return;
16005 case CONST_DOUBLE:
16006 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16007 return;
16009 case CONST_VECTOR:
16011 int i;
16013 fprintf (f, "<");
16014 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16016 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16017 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16018 fputc (',', f);
16020 fprintf (f, ">");
16022 return;
16024 case CONST_STRING:
16025 fprintf (f, "\"%s\"", XSTR (x, 0));
16026 return;
16028 case SYMBOL_REF:
16029 fprintf (f, "`%s'", XSTR (x, 0));
16030 return;
16032 case LABEL_REF:
16033 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16034 return;
16036 case CONST:
16037 arm_print_value (f, XEXP (x, 0));
16038 return;
16040 case PLUS:
16041 arm_print_value (f, XEXP (x, 0));
16042 fprintf (f, "+");
16043 arm_print_value (f, XEXP (x, 1));
16044 return;
16046 case PC:
16047 fprintf (f, "pc");
16048 return;
16050 default:
16051 fprintf (f, "????");
16052 return;
16056 /* Routines for manipulation of the constant pool. */
16058 /* Arm instructions cannot load a large constant directly into a
16059 register; they have to come from a pc relative load. The constant
16060 must therefore be placed in the addressable range of the pc
16061 relative load. Depending on the precise pc relative load
16062 instruction the range is somewhere between 256 bytes and 4k. This
16063 means that we often have to dump a constant inside a function, and
16064 generate code to branch around it.
16066 It is important to minimize this, since the branches will slow
16067 things down and make the code larger.
16069 Normally we can hide the table after an existing unconditional
16070 branch so that there is no interruption of the flow, but in the
16071 worst case the code looks like this:
16073 ldr rn, L1
16075 b L2
16076 align
16077 L1: .long value
16081 ldr rn, L3
16083 b L4
16084 align
16085 L3: .long value
16089 We fix this by performing a scan after scheduling, which notices
16090 which instructions need to have their operands fetched from the
16091 constant table and builds the table.
16093 The algorithm starts by building a table of all the constants that
16094 need fixing up and all the natural barriers in the function (places
16095 where a constant table can be dropped without breaking the flow).
16096 For each fixup we note how far the pc-relative replacement will be
16097 able to reach and the offset of the instruction into the function.
16099 Having built the table we then group the fixes together to form
16100 tables that are as large as possible (subject to addressing
16101 constraints) and emit each table of constants after the last
16102 barrier that is within range of all the instructions in the group.
16103 If a group does not contain a barrier, then we forcibly create one
16104 by inserting a jump instruction into the flow. Once the table has
16105 been inserted, the insns are then modified to reference the
16106 relevant entry in the pool.
16108 Possible enhancements to the algorithm (not implemented) are:
16110 1) For some processors and object formats, there may be benefit in
16111 aligning the pools to the start of cache lines; this alignment
16112 would need to be taken into account when calculating addressability
16113 of a pool. */
16115 /* These typedefs are located at the start of this file, so that
16116 they can be used in the prototypes there. This comment is to
16117 remind readers of that fact so that the following structures
16118 can be understood more easily.
16120 typedef struct minipool_node Mnode;
16121 typedef struct minipool_fixup Mfix; */
16123 struct minipool_node
16125 /* Doubly linked chain of entries. */
16126 Mnode * next;
16127 Mnode * prev;
16128 /* The maximum offset into the code that this entry can be placed. While
16129 pushing fixes for forward references, all entries are sorted in order
16130 of increasing max_address. */
16131 HOST_WIDE_INT max_address;
16132 /* Similarly for an entry inserted for a backwards ref. */
16133 HOST_WIDE_INT min_address;
16134 /* The number of fixes referencing this entry. This can become zero
16135 if we "unpush" an entry. In this case we ignore the entry when we
16136 come to emit the code. */
16137 int refcount;
16138 /* The offset from the start of the minipool. */
16139 HOST_WIDE_INT offset;
16140 /* The value in table. */
16141 rtx value;
16142 /* The mode of value. */
16143 machine_mode mode;
16144 /* The size of the value. With iWMMXt enabled
16145 sizes > 4 also imply an alignment of 8-bytes. */
16146 int fix_size;
16149 struct minipool_fixup
16151 Mfix * next;
16152 rtx_insn * insn;
16153 HOST_WIDE_INT address;
16154 rtx * loc;
16155 machine_mode mode;
16156 int fix_size;
16157 rtx value;
16158 Mnode * minipool;
16159 HOST_WIDE_INT forwards;
16160 HOST_WIDE_INT backwards;
16163 /* Fixes less than a word need padding out to a word boundary. */
16164 #define MINIPOOL_FIX_SIZE(mode) \
16165 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16167 static Mnode * minipool_vector_head;
16168 static Mnode * minipool_vector_tail;
16169 static rtx_code_label *minipool_vector_label;
16170 static int minipool_pad;
16172 /* The linked list of all minipool fixes required for this function. */
16173 Mfix * minipool_fix_head;
16174 Mfix * minipool_fix_tail;
16175 /* The fix entry for the current minipool, once it has been placed. */
16176 Mfix * minipool_barrier;
16178 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16179 #define JUMP_TABLES_IN_TEXT_SECTION 0
16180 #endif
16182 static HOST_WIDE_INT
16183 get_jump_table_size (rtx_jump_table_data *insn)
16185 /* ADDR_VECs only take room if read-only data does into the text
16186 section. */
16187 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16189 rtx body = PATTERN (insn);
16190 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16191 HOST_WIDE_INT size;
16192 HOST_WIDE_INT modesize;
16194 modesize = GET_MODE_SIZE (GET_MODE (body));
16195 size = modesize * XVECLEN (body, elt);
16196 switch (modesize)
16198 case 1:
16199 /* Round up size of TBB table to a halfword boundary. */
16200 size = (size + 1) & ~(HOST_WIDE_INT)1;
16201 break;
16202 case 2:
16203 /* No padding necessary for TBH. */
16204 break;
16205 case 4:
16206 /* Add two bytes for alignment on Thumb. */
16207 if (TARGET_THUMB)
16208 size += 2;
16209 break;
16210 default:
16211 gcc_unreachable ();
16213 return size;
16216 return 0;
16219 /* Return the maximum amount of padding that will be inserted before
16220 label LABEL. */
16222 static HOST_WIDE_INT
16223 get_label_padding (rtx label)
16225 HOST_WIDE_INT align, min_insn_size;
16227 align = 1 << label_to_alignment (label);
16228 min_insn_size = TARGET_THUMB ? 2 : 4;
16229 return align > min_insn_size ? align - min_insn_size : 0;
16232 /* Move a minipool fix MP from its current location to before MAX_MP.
16233 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16234 constraints may need updating. */
16235 static Mnode *
16236 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16237 HOST_WIDE_INT max_address)
16239 /* The code below assumes these are different. */
16240 gcc_assert (mp != max_mp);
16242 if (max_mp == NULL)
16244 if (max_address < mp->max_address)
16245 mp->max_address = max_address;
16247 else
16249 if (max_address > max_mp->max_address - mp->fix_size)
16250 mp->max_address = max_mp->max_address - mp->fix_size;
16251 else
16252 mp->max_address = max_address;
16254 /* Unlink MP from its current position. Since max_mp is non-null,
16255 mp->prev must be non-null. */
16256 mp->prev->next = mp->next;
16257 if (mp->next != NULL)
16258 mp->next->prev = mp->prev;
16259 else
16260 minipool_vector_tail = mp->prev;
16262 /* Re-insert it before MAX_MP. */
16263 mp->next = max_mp;
16264 mp->prev = max_mp->prev;
16265 max_mp->prev = mp;
16267 if (mp->prev != NULL)
16268 mp->prev->next = mp;
16269 else
16270 minipool_vector_head = mp;
16273 /* Save the new entry. */
16274 max_mp = mp;
16276 /* Scan over the preceding entries and adjust their addresses as
16277 required. */
16278 while (mp->prev != NULL
16279 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16281 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16282 mp = mp->prev;
16285 return max_mp;
16288 /* Add a constant to the minipool for a forward reference. Returns the
16289 node added or NULL if the constant will not fit in this pool. */
16290 static Mnode *
16291 add_minipool_forward_ref (Mfix *fix)
16293 /* If set, max_mp is the first pool_entry that has a lower
16294 constraint than the one we are trying to add. */
16295 Mnode * max_mp = NULL;
16296 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16297 Mnode * mp;
16299 /* If the minipool starts before the end of FIX->INSN then this FIX
16300 can not be placed into the current pool. Furthermore, adding the
16301 new constant pool entry may cause the pool to start FIX_SIZE bytes
16302 earlier. */
16303 if (minipool_vector_head &&
16304 (fix->address + get_attr_length (fix->insn)
16305 >= minipool_vector_head->max_address - fix->fix_size))
16306 return NULL;
16308 /* Scan the pool to see if a constant with the same value has
16309 already been added. While we are doing this, also note the
16310 location where we must insert the constant if it doesn't already
16311 exist. */
16312 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16314 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16315 && fix->mode == mp->mode
16316 && (!LABEL_P (fix->value)
16317 || (CODE_LABEL_NUMBER (fix->value)
16318 == CODE_LABEL_NUMBER (mp->value)))
16319 && rtx_equal_p (fix->value, mp->value))
16321 /* More than one fix references this entry. */
16322 mp->refcount++;
16323 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16326 /* Note the insertion point if necessary. */
16327 if (max_mp == NULL
16328 && mp->max_address > max_address)
16329 max_mp = mp;
16331 /* If we are inserting an 8-bytes aligned quantity and
16332 we have not already found an insertion point, then
16333 make sure that all such 8-byte aligned quantities are
16334 placed at the start of the pool. */
16335 if (ARM_DOUBLEWORD_ALIGN
16336 && max_mp == NULL
16337 && fix->fix_size >= 8
16338 && mp->fix_size < 8)
16340 max_mp = mp;
16341 max_address = mp->max_address;
16345 /* The value is not currently in the minipool, so we need to create
16346 a new entry for it. If MAX_MP is NULL, the entry will be put on
16347 the end of the list since the placement is less constrained than
16348 any existing entry. Otherwise, we insert the new fix before
16349 MAX_MP and, if necessary, adjust the constraints on the other
16350 entries. */
16351 mp = XNEW (Mnode);
16352 mp->fix_size = fix->fix_size;
16353 mp->mode = fix->mode;
16354 mp->value = fix->value;
16355 mp->refcount = 1;
16356 /* Not yet required for a backwards ref. */
16357 mp->min_address = -65536;
16359 if (max_mp == NULL)
16361 mp->max_address = max_address;
16362 mp->next = NULL;
16363 mp->prev = minipool_vector_tail;
16365 if (mp->prev == NULL)
16367 minipool_vector_head = mp;
16368 minipool_vector_label = gen_label_rtx ();
16370 else
16371 mp->prev->next = mp;
16373 minipool_vector_tail = mp;
16375 else
16377 if (max_address > max_mp->max_address - mp->fix_size)
16378 mp->max_address = max_mp->max_address - mp->fix_size;
16379 else
16380 mp->max_address = max_address;
16382 mp->next = max_mp;
16383 mp->prev = max_mp->prev;
16384 max_mp->prev = mp;
16385 if (mp->prev != NULL)
16386 mp->prev->next = mp;
16387 else
16388 minipool_vector_head = mp;
16391 /* Save the new entry. */
16392 max_mp = mp;
16394 /* Scan over the preceding entries and adjust their addresses as
16395 required. */
16396 while (mp->prev != NULL
16397 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16399 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16400 mp = mp->prev;
16403 return max_mp;
16406 static Mnode *
16407 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16408 HOST_WIDE_INT min_address)
16410 HOST_WIDE_INT offset;
16412 /* The code below assumes these are different. */
16413 gcc_assert (mp != min_mp);
16415 if (min_mp == NULL)
16417 if (min_address > mp->min_address)
16418 mp->min_address = min_address;
16420 else
16422 /* We will adjust this below if it is too loose. */
16423 mp->min_address = min_address;
16425 /* Unlink MP from its current position. Since min_mp is non-null,
16426 mp->next must be non-null. */
16427 mp->next->prev = mp->prev;
16428 if (mp->prev != NULL)
16429 mp->prev->next = mp->next;
16430 else
16431 minipool_vector_head = mp->next;
16433 /* Reinsert it after MIN_MP. */
16434 mp->prev = min_mp;
16435 mp->next = min_mp->next;
16436 min_mp->next = mp;
16437 if (mp->next != NULL)
16438 mp->next->prev = mp;
16439 else
16440 minipool_vector_tail = mp;
16443 min_mp = mp;
16445 offset = 0;
16446 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16448 mp->offset = offset;
16449 if (mp->refcount > 0)
16450 offset += mp->fix_size;
16452 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16453 mp->next->min_address = mp->min_address + mp->fix_size;
16456 return min_mp;
16459 /* Add a constant to the minipool for a backward reference. Returns the
16460 node added or NULL if the constant will not fit in this pool.
16462 Note that the code for insertion for a backwards reference can be
16463 somewhat confusing because the calculated offsets for each fix do
16464 not take into account the size of the pool (which is still under
16465 construction. */
16466 static Mnode *
16467 add_minipool_backward_ref (Mfix *fix)
16469 /* If set, min_mp is the last pool_entry that has a lower constraint
16470 than the one we are trying to add. */
16471 Mnode *min_mp = NULL;
16472 /* This can be negative, since it is only a constraint. */
16473 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16474 Mnode *mp;
16476 /* If we can't reach the current pool from this insn, or if we can't
16477 insert this entry at the end of the pool without pushing other
16478 fixes out of range, then we don't try. This ensures that we
16479 can't fail later on. */
16480 if (min_address >= minipool_barrier->address
16481 || (minipool_vector_tail->min_address + fix->fix_size
16482 >= minipool_barrier->address))
16483 return NULL;
16485 /* Scan the pool to see if a constant with the same value has
16486 already been added. While we are doing this, also note the
16487 location where we must insert the constant if it doesn't already
16488 exist. */
16489 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16491 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16492 && fix->mode == mp->mode
16493 && (!LABEL_P (fix->value)
16494 || (CODE_LABEL_NUMBER (fix->value)
16495 == CODE_LABEL_NUMBER (mp->value)))
16496 && rtx_equal_p (fix->value, mp->value)
16497 /* Check that there is enough slack to move this entry to the
16498 end of the table (this is conservative). */
16499 && (mp->max_address
16500 > (minipool_barrier->address
16501 + minipool_vector_tail->offset
16502 + minipool_vector_tail->fix_size)))
16504 mp->refcount++;
16505 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16508 if (min_mp != NULL)
16509 mp->min_address += fix->fix_size;
16510 else
16512 /* Note the insertion point if necessary. */
16513 if (mp->min_address < min_address)
16515 /* For now, we do not allow the insertion of 8-byte alignment
16516 requiring nodes anywhere but at the start of the pool. */
16517 if (ARM_DOUBLEWORD_ALIGN
16518 && fix->fix_size >= 8 && mp->fix_size < 8)
16519 return NULL;
16520 else
16521 min_mp = mp;
16523 else if (mp->max_address
16524 < minipool_barrier->address + mp->offset + fix->fix_size)
16526 /* Inserting before this entry would push the fix beyond
16527 its maximum address (which can happen if we have
16528 re-located a forwards fix); force the new fix to come
16529 after it. */
16530 if (ARM_DOUBLEWORD_ALIGN
16531 && fix->fix_size >= 8 && mp->fix_size < 8)
16532 return NULL;
16533 else
16535 min_mp = mp;
16536 min_address = mp->min_address + fix->fix_size;
16539 /* Do not insert a non-8-byte aligned quantity before 8-byte
16540 aligned quantities. */
16541 else if (ARM_DOUBLEWORD_ALIGN
16542 && fix->fix_size < 8
16543 && mp->fix_size >= 8)
16545 min_mp = mp;
16546 min_address = mp->min_address + fix->fix_size;
16551 /* We need to create a new entry. */
16552 mp = XNEW (Mnode);
16553 mp->fix_size = fix->fix_size;
16554 mp->mode = fix->mode;
16555 mp->value = fix->value;
16556 mp->refcount = 1;
16557 mp->max_address = minipool_barrier->address + 65536;
16559 mp->min_address = min_address;
16561 if (min_mp == NULL)
16563 mp->prev = NULL;
16564 mp->next = minipool_vector_head;
16566 if (mp->next == NULL)
16568 minipool_vector_tail = mp;
16569 minipool_vector_label = gen_label_rtx ();
16571 else
16572 mp->next->prev = mp;
16574 minipool_vector_head = mp;
16576 else
16578 mp->next = min_mp->next;
16579 mp->prev = min_mp;
16580 min_mp->next = mp;
16582 if (mp->next != NULL)
16583 mp->next->prev = mp;
16584 else
16585 minipool_vector_tail = mp;
16588 /* Save the new entry. */
16589 min_mp = mp;
16591 if (mp->prev)
16592 mp = mp->prev;
16593 else
16594 mp->offset = 0;
16596 /* Scan over the following entries and adjust their offsets. */
16597 while (mp->next != NULL)
16599 if (mp->next->min_address < mp->min_address + mp->fix_size)
16600 mp->next->min_address = mp->min_address + mp->fix_size;
16602 if (mp->refcount)
16603 mp->next->offset = mp->offset + mp->fix_size;
16604 else
16605 mp->next->offset = mp->offset;
16607 mp = mp->next;
16610 return min_mp;
16613 static void
16614 assign_minipool_offsets (Mfix *barrier)
16616 HOST_WIDE_INT offset = 0;
16617 Mnode *mp;
16619 minipool_barrier = barrier;
16621 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16623 mp->offset = offset;
16625 if (mp->refcount > 0)
16626 offset += mp->fix_size;
16630 /* Output the literal table */
16631 static void
16632 dump_minipool (rtx_insn *scan)
16634 Mnode * mp;
16635 Mnode * nmp;
16636 int align64 = 0;
16638 if (ARM_DOUBLEWORD_ALIGN)
16639 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16640 if (mp->refcount > 0 && mp->fix_size >= 8)
16642 align64 = 1;
16643 break;
16646 if (dump_file)
16647 fprintf (dump_file,
16648 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16649 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16651 scan = emit_label_after (gen_label_rtx (), scan);
16652 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16653 scan = emit_label_after (minipool_vector_label, scan);
16655 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16657 if (mp->refcount > 0)
16659 if (dump_file)
16661 fprintf (dump_file,
16662 ";; Offset %u, min %ld, max %ld ",
16663 (unsigned) mp->offset, (unsigned long) mp->min_address,
16664 (unsigned long) mp->max_address);
16665 arm_print_value (dump_file, mp->value);
16666 fputc ('\n', dump_file);
16669 switch (mp->fix_size)
16671 #ifdef HAVE_consttable_1
16672 case 1:
16673 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16674 break;
16676 #endif
16677 #ifdef HAVE_consttable_2
16678 case 2:
16679 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16680 break;
16682 #endif
16683 #ifdef HAVE_consttable_4
16684 case 4:
16685 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16686 break;
16688 #endif
16689 #ifdef HAVE_consttable_8
16690 case 8:
16691 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16692 break;
16694 #endif
16695 #ifdef HAVE_consttable_16
16696 case 16:
16697 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16698 break;
16700 #endif
16701 default:
16702 gcc_unreachable ();
16706 nmp = mp->next;
16707 free (mp);
16710 minipool_vector_head = minipool_vector_tail = NULL;
16711 scan = emit_insn_after (gen_consttable_end (), scan);
16712 scan = emit_barrier_after (scan);
16715 /* Return the cost of forcibly inserting a barrier after INSN. */
16716 static int
16717 arm_barrier_cost (rtx insn)
16719 /* Basing the location of the pool on the loop depth is preferable,
16720 but at the moment, the basic block information seems to be
16721 corrupt by this stage of the compilation. */
16722 int base_cost = 50;
16723 rtx next = next_nonnote_insn (insn);
16725 if (next != NULL && LABEL_P (next))
16726 base_cost -= 20;
16728 switch (GET_CODE (insn))
16730 case CODE_LABEL:
16731 /* It will always be better to place the table before the label, rather
16732 than after it. */
16733 return 50;
16735 case INSN:
16736 case CALL_INSN:
16737 return base_cost;
16739 case JUMP_INSN:
16740 return base_cost - 10;
16742 default:
16743 return base_cost + 10;
16747 /* Find the best place in the insn stream in the range
16748 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16749 Create the barrier by inserting a jump and add a new fix entry for
16750 it. */
16751 static Mfix *
16752 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16754 HOST_WIDE_INT count = 0;
16755 rtx_barrier *barrier;
16756 rtx_insn *from = fix->insn;
16757 /* The instruction after which we will insert the jump. */
16758 rtx_insn *selected = NULL;
16759 int selected_cost;
16760 /* The address at which the jump instruction will be placed. */
16761 HOST_WIDE_INT selected_address;
16762 Mfix * new_fix;
16763 HOST_WIDE_INT max_count = max_address - fix->address;
16764 rtx_code_label *label = gen_label_rtx ();
16766 selected_cost = arm_barrier_cost (from);
16767 selected_address = fix->address;
16769 while (from && count < max_count)
16771 rtx_jump_table_data *tmp;
16772 int new_cost;
16774 /* This code shouldn't have been called if there was a natural barrier
16775 within range. */
16776 gcc_assert (!BARRIER_P (from));
16778 /* Count the length of this insn. This must stay in sync with the
16779 code that pushes minipool fixes. */
16780 if (LABEL_P (from))
16781 count += get_label_padding (from);
16782 else
16783 count += get_attr_length (from);
16785 /* If there is a jump table, add its length. */
16786 if (tablejump_p (from, NULL, &tmp))
16788 count += get_jump_table_size (tmp);
16790 /* Jump tables aren't in a basic block, so base the cost on
16791 the dispatch insn. If we select this location, we will
16792 still put the pool after the table. */
16793 new_cost = arm_barrier_cost (from);
16795 if (count < max_count
16796 && (!selected || new_cost <= selected_cost))
16798 selected = tmp;
16799 selected_cost = new_cost;
16800 selected_address = fix->address + count;
16803 /* Continue after the dispatch table. */
16804 from = NEXT_INSN (tmp);
16805 continue;
16808 new_cost = arm_barrier_cost (from);
16810 if (count < max_count
16811 && (!selected || new_cost <= selected_cost))
16813 selected = from;
16814 selected_cost = new_cost;
16815 selected_address = fix->address + count;
16818 from = NEXT_INSN (from);
16821 /* Make sure that we found a place to insert the jump. */
16822 gcc_assert (selected);
16824 /* Make sure we do not split a call and its corresponding
16825 CALL_ARG_LOCATION note. */
16826 if (CALL_P (selected))
16828 rtx_insn *next = NEXT_INSN (selected);
16829 if (next && NOTE_P (next)
16830 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16831 selected = next;
16834 /* Create a new JUMP_INSN that branches around a barrier. */
16835 from = emit_jump_insn_after (gen_jump (label), selected);
16836 JUMP_LABEL (from) = label;
16837 barrier = emit_barrier_after (from);
16838 emit_label_after (label, barrier);
16840 /* Create a minipool barrier entry for the new barrier. */
16841 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16842 new_fix->insn = barrier;
16843 new_fix->address = selected_address;
16844 new_fix->next = fix->next;
16845 fix->next = new_fix;
16847 return new_fix;
16850 /* Record that there is a natural barrier in the insn stream at
16851 ADDRESS. */
16852 static void
16853 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16855 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16857 fix->insn = insn;
16858 fix->address = address;
16860 fix->next = NULL;
16861 if (minipool_fix_head != NULL)
16862 minipool_fix_tail->next = fix;
16863 else
16864 minipool_fix_head = fix;
16866 minipool_fix_tail = fix;
16869 /* Record INSN, which will need fixing up to load a value from the
16870 minipool. ADDRESS is the offset of the insn since the start of the
16871 function; LOC is a pointer to the part of the insn which requires
16872 fixing; VALUE is the constant that must be loaded, which is of type
16873 MODE. */
16874 static void
16875 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16876 machine_mode mode, rtx value)
16878 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16880 fix->insn = insn;
16881 fix->address = address;
16882 fix->loc = loc;
16883 fix->mode = mode;
16884 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16885 fix->value = value;
16886 fix->forwards = get_attr_pool_range (insn);
16887 fix->backwards = get_attr_neg_pool_range (insn);
16888 fix->minipool = NULL;
16890 /* If an insn doesn't have a range defined for it, then it isn't
16891 expecting to be reworked by this code. Better to stop now than
16892 to generate duff assembly code. */
16893 gcc_assert (fix->forwards || fix->backwards);
16895 /* If an entry requires 8-byte alignment then assume all constant pools
16896 require 4 bytes of padding. Trying to do this later on a per-pool
16897 basis is awkward because existing pool entries have to be modified. */
16898 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16899 minipool_pad = 4;
16901 if (dump_file)
16903 fprintf (dump_file,
16904 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16905 GET_MODE_NAME (mode),
16906 INSN_UID (insn), (unsigned long) address,
16907 -1 * (long)fix->backwards, (long)fix->forwards);
16908 arm_print_value (dump_file, fix->value);
16909 fprintf (dump_file, "\n");
16912 /* Add it to the chain of fixes. */
16913 fix->next = NULL;
16915 if (minipool_fix_head != NULL)
16916 minipool_fix_tail->next = fix;
16917 else
16918 minipool_fix_head = fix;
16920 minipool_fix_tail = fix;
16923 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16924 Returns the number of insns needed, or 99 if we always want to synthesize
16925 the value. */
16927 arm_max_const_double_inline_cost ()
16929 /* Let the value get synthesized to avoid the use of literal pools. */
16930 if (arm_disable_literal_pool)
16931 return 99;
16933 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16936 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16937 Returns the number of insns needed, or 99 if we don't know how to
16938 do it. */
16940 arm_const_double_inline_cost (rtx val)
16942 rtx lowpart, highpart;
16943 machine_mode mode;
16945 mode = GET_MODE (val);
16947 if (mode == VOIDmode)
16948 mode = DImode;
16950 gcc_assert (GET_MODE_SIZE (mode) == 8);
16952 lowpart = gen_lowpart (SImode, val);
16953 highpart = gen_highpart_mode (SImode, mode, val);
16955 gcc_assert (CONST_INT_P (lowpart));
16956 gcc_assert (CONST_INT_P (highpart));
16958 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16959 NULL_RTX, NULL_RTX, 0, 0)
16960 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16961 NULL_RTX, NULL_RTX, 0, 0));
16964 /* Cost of loading a SImode constant. */
16965 static inline int
16966 arm_const_inline_cost (enum rtx_code code, rtx val)
16968 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16969 NULL_RTX, NULL_RTX, 1, 0);
16972 /* Return true if it is worthwhile to split a 64-bit constant into two
16973 32-bit operations. This is the case if optimizing for size, or
16974 if we have load delay slots, or if one 32-bit part can be done with
16975 a single data operation. */
16976 bool
16977 arm_const_double_by_parts (rtx val)
16979 machine_mode mode = GET_MODE (val);
16980 rtx part;
16982 if (optimize_size || arm_ld_sched)
16983 return true;
16985 if (mode == VOIDmode)
16986 mode = DImode;
16988 part = gen_highpart_mode (SImode, mode, val);
16990 gcc_assert (CONST_INT_P (part));
16992 if (const_ok_for_arm (INTVAL (part))
16993 || const_ok_for_arm (~INTVAL (part)))
16994 return true;
16996 part = gen_lowpart (SImode, val);
16998 gcc_assert (CONST_INT_P (part));
17000 if (const_ok_for_arm (INTVAL (part))
17001 || const_ok_for_arm (~INTVAL (part)))
17002 return true;
17004 return false;
17007 /* Return true if it is possible to inline both the high and low parts
17008 of a 64-bit constant into 32-bit data processing instructions. */
17009 bool
17010 arm_const_double_by_immediates (rtx val)
17012 machine_mode mode = GET_MODE (val);
17013 rtx part;
17015 if (mode == VOIDmode)
17016 mode = DImode;
17018 part = gen_highpart_mode (SImode, mode, val);
17020 gcc_assert (CONST_INT_P (part));
17022 if (!const_ok_for_arm (INTVAL (part)))
17023 return false;
17025 part = gen_lowpart (SImode, val);
17027 gcc_assert (CONST_INT_P (part));
17029 if (!const_ok_for_arm (INTVAL (part)))
17030 return false;
17032 return true;
17035 /* Scan INSN and note any of its operands that need fixing.
17036 If DO_PUSHES is false we do not actually push any of the fixups
17037 needed. */
17038 static void
17039 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17041 int opno;
17043 extract_constrain_insn (insn);
17045 if (recog_data.n_alternatives == 0)
17046 return;
17048 /* Fill in recog_op_alt with information about the constraints of
17049 this insn. */
17050 preprocess_constraints (insn);
17052 const operand_alternative *op_alt = which_op_alt ();
17053 for (opno = 0; opno < recog_data.n_operands; opno++)
17055 /* Things we need to fix can only occur in inputs. */
17056 if (recog_data.operand_type[opno] != OP_IN)
17057 continue;
17059 /* If this alternative is a memory reference, then any mention
17060 of constants in this alternative is really to fool reload
17061 into allowing us to accept one there. We need to fix them up
17062 now so that we output the right code. */
17063 if (op_alt[opno].memory_ok)
17065 rtx op = recog_data.operand[opno];
17067 if (CONSTANT_P (op))
17069 if (do_pushes)
17070 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17071 recog_data.operand_mode[opno], op);
17073 else if (MEM_P (op)
17074 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17075 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17077 if (do_pushes)
17079 rtx cop = avoid_constant_pool_reference (op);
17081 /* Casting the address of something to a mode narrower
17082 than a word can cause avoid_constant_pool_reference()
17083 to return the pool reference itself. That's no good to
17084 us here. Lets just hope that we can use the
17085 constant pool value directly. */
17086 if (op == cop)
17087 cop = get_pool_constant (XEXP (op, 0));
17089 push_minipool_fix (insn, address,
17090 recog_data.operand_loc[opno],
17091 recog_data.operand_mode[opno], cop);
17098 return;
17101 /* Rewrite move insn into subtract of 0 if the condition codes will
17102 be useful in next conditional jump insn. */
17104 static void
17105 thumb1_reorg (void)
17107 basic_block bb;
17109 FOR_EACH_BB_FN (bb, cfun)
17111 rtx dest, src;
17112 rtx pat, op0, set = NULL;
17113 rtx_insn *prev, *insn = BB_END (bb);
17114 bool insn_clobbered = false;
17116 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17117 insn = PREV_INSN (insn);
17119 /* Find the last cbranchsi4_insn in basic block BB. */
17120 if (insn == BB_HEAD (bb)
17121 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17122 continue;
17124 /* Get the register with which we are comparing. */
17125 pat = PATTERN (insn);
17126 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17128 /* Find the first flag setting insn before INSN in basic block BB. */
17129 gcc_assert (insn != BB_HEAD (bb));
17130 for (prev = PREV_INSN (insn);
17131 (!insn_clobbered
17132 && prev != BB_HEAD (bb)
17133 && (NOTE_P (prev)
17134 || DEBUG_INSN_P (prev)
17135 || ((set = single_set (prev)) != NULL
17136 && get_attr_conds (prev) == CONDS_NOCOND)));
17137 prev = PREV_INSN (prev))
17139 if (reg_set_p (op0, prev))
17140 insn_clobbered = true;
17143 /* Skip if op0 is clobbered by insn other than prev. */
17144 if (insn_clobbered)
17145 continue;
17147 if (!set)
17148 continue;
17150 dest = SET_DEST (set);
17151 src = SET_SRC (set);
17152 if (!low_register_operand (dest, SImode)
17153 || !low_register_operand (src, SImode))
17154 continue;
17156 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17157 in INSN. Both src and dest of the move insn are checked. */
17158 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17160 dest = copy_rtx (dest);
17161 src = copy_rtx (src);
17162 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17163 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17164 INSN_CODE (prev) = -1;
17165 /* Set test register in INSN to dest. */
17166 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17167 INSN_CODE (insn) = -1;
17172 /* Convert instructions to their cc-clobbering variant if possible, since
17173 that allows us to use smaller encodings. */
17175 static void
17176 thumb2_reorg (void)
17178 basic_block bb;
17179 regset_head live;
17181 INIT_REG_SET (&live);
17183 /* We are freeing block_for_insn in the toplev to keep compatibility
17184 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17185 compute_bb_for_insn ();
17186 df_analyze ();
17188 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17190 FOR_EACH_BB_FN (bb, cfun)
17192 if (current_tune->disparage_flag_setting_t16_encodings
17193 && optimize_bb_for_speed_p (bb))
17194 continue;
17196 rtx_insn *insn;
17197 Convert_Action action = SKIP;
17198 Convert_Action action_for_partial_flag_setting
17199 = (current_tune->disparage_partial_flag_setting_t16_encodings
17200 && optimize_bb_for_speed_p (bb))
17201 ? SKIP : CONV;
17203 COPY_REG_SET (&live, DF_LR_OUT (bb));
17204 df_simulate_initialize_backwards (bb, &live);
17205 FOR_BB_INSNS_REVERSE (bb, insn)
17207 if (NONJUMP_INSN_P (insn)
17208 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17209 && GET_CODE (PATTERN (insn)) == SET)
17211 action = SKIP;
17212 rtx pat = PATTERN (insn);
17213 rtx dst = XEXP (pat, 0);
17214 rtx src = XEXP (pat, 1);
17215 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17217 if (!OBJECT_P (src))
17218 op0 = XEXP (src, 0);
17220 if (BINARY_P (src))
17221 op1 = XEXP (src, 1);
17223 if (low_register_operand (dst, SImode))
17225 switch (GET_CODE (src))
17227 case PLUS:
17228 /* Adding two registers and storing the result
17229 in the first source is already a 16-bit
17230 operation. */
17231 if (rtx_equal_p (dst, op0)
17232 && register_operand (op1, SImode))
17233 break;
17235 if (low_register_operand (op0, SImode))
17237 /* ADDS <Rd>,<Rn>,<Rm> */
17238 if (low_register_operand (op1, SImode))
17239 action = CONV;
17240 /* ADDS <Rdn>,#<imm8> */
17241 /* SUBS <Rdn>,#<imm8> */
17242 else if (rtx_equal_p (dst, op0)
17243 && CONST_INT_P (op1)
17244 && IN_RANGE (INTVAL (op1), -255, 255))
17245 action = CONV;
17246 /* ADDS <Rd>,<Rn>,#<imm3> */
17247 /* SUBS <Rd>,<Rn>,#<imm3> */
17248 else if (CONST_INT_P (op1)
17249 && IN_RANGE (INTVAL (op1), -7, 7))
17250 action = CONV;
17252 /* ADCS <Rd>, <Rn> */
17253 else if (GET_CODE (XEXP (src, 0)) == PLUS
17254 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17255 && low_register_operand (XEXP (XEXP (src, 0), 1),
17256 SImode)
17257 && COMPARISON_P (op1)
17258 && cc_register (XEXP (op1, 0), VOIDmode)
17259 && maybe_get_arm_condition_code (op1) == ARM_CS
17260 && XEXP (op1, 1) == const0_rtx)
17261 action = CONV;
17262 break;
17264 case MINUS:
17265 /* RSBS <Rd>,<Rn>,#0
17266 Not handled here: see NEG below. */
17267 /* SUBS <Rd>,<Rn>,#<imm3>
17268 SUBS <Rdn>,#<imm8>
17269 Not handled here: see PLUS above. */
17270 /* SUBS <Rd>,<Rn>,<Rm> */
17271 if (low_register_operand (op0, SImode)
17272 && low_register_operand (op1, SImode))
17273 action = CONV;
17274 break;
17276 case MULT:
17277 /* MULS <Rdm>,<Rn>,<Rdm>
17278 As an exception to the rule, this is only used
17279 when optimizing for size since MULS is slow on all
17280 known implementations. We do not even want to use
17281 MULS in cold code, if optimizing for speed, so we
17282 test the global flag here. */
17283 if (!optimize_size)
17284 break;
17285 /* else fall through. */
17286 case AND:
17287 case IOR:
17288 case XOR:
17289 /* ANDS <Rdn>,<Rm> */
17290 if (rtx_equal_p (dst, op0)
17291 && low_register_operand (op1, SImode))
17292 action = action_for_partial_flag_setting;
17293 else if (rtx_equal_p (dst, op1)
17294 && low_register_operand (op0, SImode))
17295 action = action_for_partial_flag_setting == SKIP
17296 ? SKIP : SWAP_CONV;
17297 break;
17299 case ASHIFTRT:
17300 case ASHIFT:
17301 case LSHIFTRT:
17302 /* ASRS <Rdn>,<Rm> */
17303 /* LSRS <Rdn>,<Rm> */
17304 /* LSLS <Rdn>,<Rm> */
17305 if (rtx_equal_p (dst, op0)
17306 && low_register_operand (op1, SImode))
17307 action = action_for_partial_flag_setting;
17308 /* ASRS <Rd>,<Rm>,#<imm5> */
17309 /* LSRS <Rd>,<Rm>,#<imm5> */
17310 /* LSLS <Rd>,<Rm>,#<imm5> */
17311 else if (low_register_operand (op0, SImode)
17312 && CONST_INT_P (op1)
17313 && IN_RANGE (INTVAL (op1), 0, 31))
17314 action = action_for_partial_flag_setting;
17315 break;
17317 case ROTATERT:
17318 /* RORS <Rdn>,<Rm> */
17319 if (rtx_equal_p (dst, op0)
17320 && low_register_operand (op1, SImode))
17321 action = action_for_partial_flag_setting;
17322 break;
17324 case NOT:
17325 /* MVNS <Rd>,<Rm> */
17326 if (low_register_operand (op0, SImode))
17327 action = action_for_partial_flag_setting;
17328 break;
17330 case NEG:
17331 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17332 if (low_register_operand (op0, SImode))
17333 action = CONV;
17334 break;
17336 case CONST_INT:
17337 /* MOVS <Rd>,#<imm8> */
17338 if (CONST_INT_P (src)
17339 && IN_RANGE (INTVAL (src), 0, 255))
17340 action = action_for_partial_flag_setting;
17341 break;
17343 case REG:
17344 /* MOVS and MOV<c> with registers have different
17345 encodings, so are not relevant here. */
17346 break;
17348 default:
17349 break;
17353 if (action != SKIP)
17355 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17356 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17357 rtvec vec;
17359 if (action == SWAP_CONV)
17361 src = copy_rtx (src);
17362 XEXP (src, 0) = op1;
17363 XEXP (src, 1) = op0;
17364 pat = gen_rtx_SET (VOIDmode, dst, src);
17365 vec = gen_rtvec (2, pat, clobber);
17367 else /* action == CONV */
17368 vec = gen_rtvec (2, pat, clobber);
17370 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17371 INSN_CODE (insn) = -1;
17375 if (NONDEBUG_INSN_P (insn))
17376 df_simulate_one_insn_backwards (bb, insn, &live);
17380 CLEAR_REG_SET (&live);
17383 /* Gcc puts the pool in the wrong place for ARM, since we can only
17384 load addresses a limited distance around the pc. We do some
17385 special munging to move the constant pool values to the correct
17386 point in the code. */
17387 static void
17388 arm_reorg (void)
17390 rtx_insn *insn;
17391 HOST_WIDE_INT address = 0;
17392 Mfix * fix;
17394 if (TARGET_THUMB1)
17395 thumb1_reorg ();
17396 else if (TARGET_THUMB2)
17397 thumb2_reorg ();
17399 /* Ensure all insns that must be split have been split at this point.
17400 Otherwise, the pool placement code below may compute incorrect
17401 insn lengths. Note that when optimizing, all insns have already
17402 been split at this point. */
17403 if (!optimize)
17404 split_all_insns_noflow ();
17406 minipool_fix_head = minipool_fix_tail = NULL;
17408 /* The first insn must always be a note, or the code below won't
17409 scan it properly. */
17410 insn = get_insns ();
17411 gcc_assert (NOTE_P (insn));
17412 minipool_pad = 0;
17414 /* Scan all the insns and record the operands that will need fixing. */
17415 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17417 if (BARRIER_P (insn))
17418 push_minipool_barrier (insn, address);
17419 else if (INSN_P (insn))
17421 rtx_jump_table_data *table;
17423 note_invalid_constants (insn, address, true);
17424 address += get_attr_length (insn);
17426 /* If the insn is a vector jump, add the size of the table
17427 and skip the table. */
17428 if (tablejump_p (insn, NULL, &table))
17430 address += get_jump_table_size (table);
17431 insn = table;
17434 else if (LABEL_P (insn))
17435 /* Add the worst-case padding due to alignment. We don't add
17436 the _current_ padding because the minipool insertions
17437 themselves might change it. */
17438 address += get_label_padding (insn);
17441 fix = minipool_fix_head;
17443 /* Now scan the fixups and perform the required changes. */
17444 while (fix)
17446 Mfix * ftmp;
17447 Mfix * fdel;
17448 Mfix * last_added_fix;
17449 Mfix * last_barrier = NULL;
17450 Mfix * this_fix;
17452 /* Skip any further barriers before the next fix. */
17453 while (fix && BARRIER_P (fix->insn))
17454 fix = fix->next;
17456 /* No more fixes. */
17457 if (fix == NULL)
17458 break;
17460 last_added_fix = NULL;
17462 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17464 if (BARRIER_P (ftmp->insn))
17466 if (ftmp->address >= minipool_vector_head->max_address)
17467 break;
17469 last_barrier = ftmp;
17471 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17472 break;
17474 last_added_fix = ftmp; /* Keep track of the last fix added. */
17477 /* If we found a barrier, drop back to that; any fixes that we
17478 could have reached but come after the barrier will now go in
17479 the next mini-pool. */
17480 if (last_barrier != NULL)
17482 /* Reduce the refcount for those fixes that won't go into this
17483 pool after all. */
17484 for (fdel = last_barrier->next;
17485 fdel && fdel != ftmp;
17486 fdel = fdel->next)
17488 fdel->minipool->refcount--;
17489 fdel->minipool = NULL;
17492 ftmp = last_barrier;
17494 else
17496 /* ftmp is first fix that we can't fit into this pool and
17497 there no natural barriers that we could use. Insert a
17498 new barrier in the code somewhere between the previous
17499 fix and this one, and arrange to jump around it. */
17500 HOST_WIDE_INT max_address;
17502 /* The last item on the list of fixes must be a barrier, so
17503 we can never run off the end of the list of fixes without
17504 last_barrier being set. */
17505 gcc_assert (ftmp);
17507 max_address = minipool_vector_head->max_address;
17508 /* Check that there isn't another fix that is in range that
17509 we couldn't fit into this pool because the pool was
17510 already too large: we need to put the pool before such an
17511 instruction. The pool itself may come just after the
17512 fix because create_fix_barrier also allows space for a
17513 jump instruction. */
17514 if (ftmp->address < max_address)
17515 max_address = ftmp->address + 1;
17517 last_barrier = create_fix_barrier (last_added_fix, max_address);
17520 assign_minipool_offsets (last_barrier);
17522 while (ftmp)
17524 if (!BARRIER_P (ftmp->insn)
17525 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17526 == NULL))
17527 break;
17529 ftmp = ftmp->next;
17532 /* Scan over the fixes we have identified for this pool, fixing them
17533 up and adding the constants to the pool itself. */
17534 for (this_fix = fix; this_fix && ftmp != this_fix;
17535 this_fix = this_fix->next)
17536 if (!BARRIER_P (this_fix->insn))
17538 rtx addr
17539 = plus_constant (Pmode,
17540 gen_rtx_LABEL_REF (VOIDmode,
17541 minipool_vector_label),
17542 this_fix->minipool->offset);
17543 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17546 dump_minipool (last_barrier->insn);
17547 fix = ftmp;
17550 /* From now on we must synthesize any constants that we can't handle
17551 directly. This can happen if the RTL gets split during final
17552 instruction generation. */
17553 cfun->machine->after_arm_reorg = 1;
17555 /* Free the minipool memory. */
17556 obstack_free (&minipool_obstack, minipool_startobj);
17559 /* Routines to output assembly language. */
17561 /* Return string representation of passed in real value. */
17562 static const char *
17563 fp_const_from_val (REAL_VALUE_TYPE *r)
17565 if (!fp_consts_inited)
17566 init_fp_table ();
17568 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17569 return "0";
17572 /* OPERANDS[0] is the entire list of insns that constitute pop,
17573 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17574 is in the list, UPDATE is true iff the list contains explicit
17575 update of base register. */
17576 void
17577 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17578 bool update)
17580 int i;
17581 char pattern[100];
17582 int offset;
17583 const char *conditional;
17584 int num_saves = XVECLEN (operands[0], 0);
17585 unsigned int regno;
17586 unsigned int regno_base = REGNO (operands[1]);
17588 offset = 0;
17589 offset += update ? 1 : 0;
17590 offset += return_pc ? 1 : 0;
17592 /* Is the base register in the list? */
17593 for (i = offset; i < num_saves; i++)
17595 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17596 /* If SP is in the list, then the base register must be SP. */
17597 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17598 /* If base register is in the list, there must be no explicit update. */
17599 if (regno == regno_base)
17600 gcc_assert (!update);
17603 conditional = reverse ? "%?%D0" : "%?%d0";
17604 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17606 /* Output pop (not stmfd) because it has a shorter encoding. */
17607 gcc_assert (update);
17608 sprintf (pattern, "pop%s\t{", conditional);
17610 else
17612 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17613 It's just a convention, their semantics are identical. */
17614 if (regno_base == SP_REGNUM)
17615 sprintf (pattern, "ldm%sfd\t", conditional);
17616 else if (TARGET_UNIFIED_ASM)
17617 sprintf (pattern, "ldmia%s\t", conditional);
17618 else
17619 sprintf (pattern, "ldm%sia\t", conditional);
17621 strcat (pattern, reg_names[regno_base]);
17622 if (update)
17623 strcat (pattern, "!, {");
17624 else
17625 strcat (pattern, ", {");
17628 /* Output the first destination register. */
17629 strcat (pattern,
17630 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17632 /* Output the rest of the destination registers. */
17633 for (i = offset + 1; i < num_saves; i++)
17635 strcat (pattern, ", ");
17636 strcat (pattern,
17637 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17640 strcat (pattern, "}");
17642 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17643 strcat (pattern, "^");
17645 output_asm_insn (pattern, &cond);
17649 /* Output the assembly for a store multiple. */
17651 const char *
17652 vfp_output_vstmd (rtx * operands)
17654 char pattern[100];
17655 int p;
17656 int base;
17657 int i;
17658 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17659 ? XEXP (operands[0], 0)
17660 : XEXP (XEXP (operands[0], 0), 0);
17661 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17663 if (push_p)
17664 strcpy (pattern, "vpush%?.64\t{%P1");
17665 else
17666 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17668 p = strlen (pattern);
17670 gcc_assert (REG_P (operands[1]));
17672 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17673 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17675 p += sprintf (&pattern[p], ", d%d", base + i);
17677 strcpy (&pattern[p], "}");
17679 output_asm_insn (pattern, operands);
17680 return "";
17684 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17685 number of bytes pushed. */
17687 static int
17688 vfp_emit_fstmd (int base_reg, int count)
17690 rtx par;
17691 rtx dwarf;
17692 rtx tmp, reg;
17693 int i;
17695 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17696 register pairs are stored by a store multiple insn. We avoid this
17697 by pushing an extra pair. */
17698 if (count == 2 && !arm_arch6)
17700 if (base_reg == LAST_VFP_REGNUM - 3)
17701 base_reg -= 2;
17702 count++;
17705 /* FSTMD may not store more than 16 doubleword registers at once. Split
17706 larger stores into multiple parts (up to a maximum of two, in
17707 practice). */
17708 if (count > 16)
17710 int saved;
17711 /* NOTE: base_reg is an internal register number, so each D register
17712 counts as 2. */
17713 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17714 saved += vfp_emit_fstmd (base_reg, 16);
17715 return saved;
17718 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17719 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17721 reg = gen_rtx_REG (DFmode, base_reg);
17722 base_reg += 2;
17724 XVECEXP (par, 0, 0)
17725 = gen_rtx_SET (VOIDmode,
17726 gen_frame_mem
17727 (BLKmode,
17728 gen_rtx_PRE_MODIFY (Pmode,
17729 stack_pointer_rtx,
17730 plus_constant
17731 (Pmode, stack_pointer_rtx,
17732 - (count * 8)))
17734 gen_rtx_UNSPEC (BLKmode,
17735 gen_rtvec (1, reg),
17736 UNSPEC_PUSH_MULT));
17738 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17739 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17740 RTX_FRAME_RELATED_P (tmp) = 1;
17741 XVECEXP (dwarf, 0, 0) = tmp;
17743 tmp = gen_rtx_SET (VOIDmode,
17744 gen_frame_mem (DFmode, stack_pointer_rtx),
17745 reg);
17746 RTX_FRAME_RELATED_P (tmp) = 1;
17747 XVECEXP (dwarf, 0, 1) = tmp;
17749 for (i = 1; i < count; i++)
17751 reg = gen_rtx_REG (DFmode, base_reg);
17752 base_reg += 2;
17753 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17755 tmp = gen_rtx_SET (VOIDmode,
17756 gen_frame_mem (DFmode,
17757 plus_constant (Pmode,
17758 stack_pointer_rtx,
17759 i * 8)),
17760 reg);
17761 RTX_FRAME_RELATED_P (tmp) = 1;
17762 XVECEXP (dwarf, 0, i + 1) = tmp;
17765 par = emit_insn (par);
17766 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17767 RTX_FRAME_RELATED_P (par) = 1;
17769 return count * 8;
17772 /* Emit a call instruction with pattern PAT. ADDR is the address of
17773 the call target. */
17775 void
17776 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17778 rtx insn;
17780 insn = emit_call_insn (pat);
17782 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17783 If the call might use such an entry, add a use of the PIC register
17784 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17785 if (TARGET_VXWORKS_RTP
17786 && flag_pic
17787 && !sibcall
17788 && GET_CODE (addr) == SYMBOL_REF
17789 && (SYMBOL_REF_DECL (addr)
17790 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17791 : !SYMBOL_REF_LOCAL_P (addr)))
17793 require_pic_register ();
17794 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17797 if (TARGET_AAPCS_BASED)
17799 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17800 linker. We need to add an IP clobber to allow setting
17801 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17802 is not needed since it's a fixed register. */
17803 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17804 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17808 /* Output a 'call' insn. */
17809 const char *
17810 output_call (rtx *operands)
17812 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17814 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17815 if (REGNO (operands[0]) == LR_REGNUM)
17817 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17818 output_asm_insn ("mov%?\t%0, %|lr", operands);
17821 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17823 if (TARGET_INTERWORK || arm_arch4t)
17824 output_asm_insn ("bx%?\t%0", operands);
17825 else
17826 output_asm_insn ("mov%?\t%|pc, %0", operands);
17828 return "";
17831 /* Output a 'call' insn that is a reference in memory. This is
17832 disabled for ARMv5 and we prefer a blx instead because otherwise
17833 there's a significant performance overhead. */
17834 const char *
17835 output_call_mem (rtx *operands)
17837 gcc_assert (!arm_arch5);
17838 if (TARGET_INTERWORK)
17840 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17841 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17842 output_asm_insn ("bx%?\t%|ip", operands);
17844 else if (regno_use_in (LR_REGNUM, operands[0]))
17846 /* LR is used in the memory address. We load the address in the
17847 first instruction. It's safe to use IP as the target of the
17848 load since the call will kill it anyway. */
17849 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17850 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17851 if (arm_arch4t)
17852 output_asm_insn ("bx%?\t%|ip", operands);
17853 else
17854 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17856 else
17858 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17859 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17862 return "";
17866 /* Output a move from arm registers to arm registers of a long double
17867 OPERANDS[0] is the destination.
17868 OPERANDS[1] is the source. */
17869 const char *
17870 output_mov_long_double_arm_from_arm (rtx *operands)
17872 /* We have to be careful here because the two might overlap. */
17873 int dest_start = REGNO (operands[0]);
17874 int src_start = REGNO (operands[1]);
17875 rtx ops[2];
17876 int i;
17878 if (dest_start < src_start)
17880 for (i = 0; i < 3; i++)
17882 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17883 ops[1] = gen_rtx_REG (SImode, src_start + i);
17884 output_asm_insn ("mov%?\t%0, %1", ops);
17887 else
17889 for (i = 2; i >= 0; i--)
17891 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17892 ops[1] = gen_rtx_REG (SImode, src_start + i);
17893 output_asm_insn ("mov%?\t%0, %1", ops);
17897 return "";
17900 void
17901 arm_emit_movpair (rtx dest, rtx src)
17903 /* If the src is an immediate, simplify it. */
17904 if (CONST_INT_P (src))
17906 HOST_WIDE_INT val = INTVAL (src);
17907 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17908 if ((val >> 16) & 0x0000ffff)
17909 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17910 GEN_INT (16)),
17911 GEN_INT ((val >> 16) & 0x0000ffff));
17912 return;
17914 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17915 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17918 /* Output a move between double words. It must be REG<-MEM
17919 or MEM<-REG. */
17920 const char *
17921 output_move_double (rtx *operands, bool emit, int *count)
17923 enum rtx_code code0 = GET_CODE (operands[0]);
17924 enum rtx_code code1 = GET_CODE (operands[1]);
17925 rtx otherops[3];
17926 if (count)
17927 *count = 1;
17929 /* The only case when this might happen is when
17930 you are looking at the length of a DImode instruction
17931 that has an invalid constant in it. */
17932 if (code0 == REG && code1 != MEM)
17934 gcc_assert (!emit);
17935 *count = 2;
17936 return "";
17939 if (code0 == REG)
17941 unsigned int reg0 = REGNO (operands[0]);
17943 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17945 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17947 switch (GET_CODE (XEXP (operands[1], 0)))
17949 case REG:
17951 if (emit)
17953 if (TARGET_LDRD
17954 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17956 else
17957 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17959 break;
17961 case PRE_INC:
17962 gcc_assert (TARGET_LDRD);
17963 if (emit)
17964 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17965 break;
17967 case PRE_DEC:
17968 if (emit)
17970 if (TARGET_LDRD)
17971 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17972 else
17973 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17975 break;
17977 case POST_INC:
17978 if (emit)
17980 if (TARGET_LDRD)
17981 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17982 else
17983 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17985 break;
17987 case POST_DEC:
17988 gcc_assert (TARGET_LDRD);
17989 if (emit)
17990 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17991 break;
17993 case PRE_MODIFY:
17994 case POST_MODIFY:
17995 /* Autoicrement addressing modes should never have overlapping
17996 base and destination registers, and overlapping index registers
17997 are already prohibited, so this doesn't need to worry about
17998 fix_cm3_ldrd. */
17999 otherops[0] = operands[0];
18000 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18001 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18003 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18005 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18007 /* Registers overlap so split out the increment. */
18008 if (emit)
18010 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18011 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18013 if (count)
18014 *count = 2;
18016 else
18018 /* Use a single insn if we can.
18019 FIXME: IWMMXT allows offsets larger than ldrd can
18020 handle, fix these up with a pair of ldr. */
18021 if (TARGET_THUMB2
18022 || !CONST_INT_P (otherops[2])
18023 || (INTVAL (otherops[2]) > -256
18024 && INTVAL (otherops[2]) < 256))
18026 if (emit)
18027 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18029 else
18031 if (emit)
18033 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18034 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18036 if (count)
18037 *count = 2;
18042 else
18044 /* Use a single insn if we can.
18045 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18046 fix these up with a pair of ldr. */
18047 if (TARGET_THUMB2
18048 || !CONST_INT_P (otherops[2])
18049 || (INTVAL (otherops[2]) > -256
18050 && INTVAL (otherops[2]) < 256))
18052 if (emit)
18053 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18055 else
18057 if (emit)
18059 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18060 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18062 if (count)
18063 *count = 2;
18066 break;
18068 case LABEL_REF:
18069 case CONST:
18070 /* We might be able to use ldrd %0, %1 here. However the range is
18071 different to ldr/adr, and it is broken on some ARMv7-M
18072 implementations. */
18073 /* Use the second register of the pair to avoid problematic
18074 overlap. */
18075 otherops[1] = operands[1];
18076 if (emit)
18077 output_asm_insn ("adr%?\t%0, %1", otherops);
18078 operands[1] = otherops[0];
18079 if (emit)
18081 if (TARGET_LDRD)
18082 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18083 else
18084 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18087 if (count)
18088 *count = 2;
18089 break;
18091 /* ??? This needs checking for thumb2. */
18092 default:
18093 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18094 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18096 otherops[0] = operands[0];
18097 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18098 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18100 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18102 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18104 switch ((int) INTVAL (otherops[2]))
18106 case -8:
18107 if (emit)
18108 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18109 return "";
18110 case -4:
18111 if (TARGET_THUMB2)
18112 break;
18113 if (emit)
18114 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18115 return "";
18116 case 4:
18117 if (TARGET_THUMB2)
18118 break;
18119 if (emit)
18120 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18121 return "";
18124 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18125 operands[1] = otherops[0];
18126 if (TARGET_LDRD
18127 && (REG_P (otherops[2])
18128 || TARGET_THUMB2
18129 || (CONST_INT_P (otherops[2])
18130 && INTVAL (otherops[2]) > -256
18131 && INTVAL (otherops[2]) < 256)))
18133 if (reg_overlap_mentioned_p (operands[0],
18134 otherops[2]))
18136 rtx tmp;
18137 /* Swap base and index registers over to
18138 avoid a conflict. */
18139 tmp = otherops[1];
18140 otherops[1] = otherops[2];
18141 otherops[2] = tmp;
18143 /* If both registers conflict, it will usually
18144 have been fixed by a splitter. */
18145 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18146 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18148 if (emit)
18150 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18151 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18153 if (count)
18154 *count = 2;
18156 else
18158 otherops[0] = operands[0];
18159 if (emit)
18160 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18162 return "";
18165 if (CONST_INT_P (otherops[2]))
18167 if (emit)
18169 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18170 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18171 else
18172 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18175 else
18177 if (emit)
18178 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18181 else
18183 if (emit)
18184 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18187 if (count)
18188 *count = 2;
18190 if (TARGET_LDRD)
18191 return "ldr%(d%)\t%0, [%1]";
18193 return "ldm%(ia%)\t%1, %M0";
18195 else
18197 otherops[1] = adjust_address (operands[1], SImode, 4);
18198 /* Take care of overlapping base/data reg. */
18199 if (reg_mentioned_p (operands[0], operands[1]))
18201 if (emit)
18203 output_asm_insn ("ldr%?\t%0, %1", otherops);
18204 output_asm_insn ("ldr%?\t%0, %1", operands);
18206 if (count)
18207 *count = 2;
18210 else
18212 if (emit)
18214 output_asm_insn ("ldr%?\t%0, %1", operands);
18215 output_asm_insn ("ldr%?\t%0, %1", otherops);
18217 if (count)
18218 *count = 2;
18223 else
18225 /* Constraints should ensure this. */
18226 gcc_assert (code0 == MEM && code1 == REG);
18227 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18228 || (TARGET_ARM && TARGET_LDRD));
18230 switch (GET_CODE (XEXP (operands[0], 0)))
18232 case REG:
18233 if (emit)
18235 if (TARGET_LDRD)
18236 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18237 else
18238 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18240 break;
18242 case PRE_INC:
18243 gcc_assert (TARGET_LDRD);
18244 if (emit)
18245 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18246 break;
18248 case PRE_DEC:
18249 if (emit)
18251 if (TARGET_LDRD)
18252 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18253 else
18254 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18256 break;
18258 case POST_INC:
18259 if (emit)
18261 if (TARGET_LDRD)
18262 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18263 else
18264 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18266 break;
18268 case POST_DEC:
18269 gcc_assert (TARGET_LDRD);
18270 if (emit)
18271 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18272 break;
18274 case PRE_MODIFY:
18275 case POST_MODIFY:
18276 otherops[0] = operands[1];
18277 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18278 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18280 /* IWMMXT allows offsets larger than ldrd can handle,
18281 fix these up with a pair of ldr. */
18282 if (!TARGET_THUMB2
18283 && CONST_INT_P (otherops[2])
18284 && (INTVAL(otherops[2]) <= -256
18285 || INTVAL(otherops[2]) >= 256))
18287 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18289 if (emit)
18291 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18292 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18294 if (count)
18295 *count = 2;
18297 else
18299 if (emit)
18301 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18302 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18304 if (count)
18305 *count = 2;
18308 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18310 if (emit)
18311 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18313 else
18315 if (emit)
18316 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18318 break;
18320 case PLUS:
18321 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18322 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18324 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18326 case -8:
18327 if (emit)
18328 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18329 return "";
18331 case -4:
18332 if (TARGET_THUMB2)
18333 break;
18334 if (emit)
18335 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18336 return "";
18338 case 4:
18339 if (TARGET_THUMB2)
18340 break;
18341 if (emit)
18342 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18343 return "";
18346 if (TARGET_LDRD
18347 && (REG_P (otherops[2])
18348 || TARGET_THUMB2
18349 || (CONST_INT_P (otherops[2])
18350 && INTVAL (otherops[2]) > -256
18351 && INTVAL (otherops[2]) < 256)))
18353 otherops[0] = operands[1];
18354 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18355 if (emit)
18356 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18357 return "";
18359 /* Fall through */
18361 default:
18362 otherops[0] = adjust_address (operands[0], SImode, 4);
18363 otherops[1] = operands[1];
18364 if (emit)
18366 output_asm_insn ("str%?\t%1, %0", operands);
18367 output_asm_insn ("str%?\t%H1, %0", otherops);
18369 if (count)
18370 *count = 2;
18374 return "";
18377 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18378 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18380 const char *
18381 output_move_quad (rtx *operands)
18383 if (REG_P (operands[0]))
18385 /* Load, or reg->reg move. */
18387 if (MEM_P (operands[1]))
18389 switch (GET_CODE (XEXP (operands[1], 0)))
18391 case REG:
18392 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18393 break;
18395 case LABEL_REF:
18396 case CONST:
18397 output_asm_insn ("adr%?\t%0, %1", operands);
18398 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18399 break;
18401 default:
18402 gcc_unreachable ();
18405 else
18407 rtx ops[2];
18408 int dest, src, i;
18410 gcc_assert (REG_P (operands[1]));
18412 dest = REGNO (operands[0]);
18413 src = REGNO (operands[1]);
18415 /* This seems pretty dumb, but hopefully GCC won't try to do it
18416 very often. */
18417 if (dest < src)
18418 for (i = 0; i < 4; i++)
18420 ops[0] = gen_rtx_REG (SImode, dest + i);
18421 ops[1] = gen_rtx_REG (SImode, src + i);
18422 output_asm_insn ("mov%?\t%0, %1", ops);
18424 else
18425 for (i = 3; i >= 0; i--)
18427 ops[0] = gen_rtx_REG (SImode, dest + i);
18428 ops[1] = gen_rtx_REG (SImode, src + i);
18429 output_asm_insn ("mov%?\t%0, %1", ops);
18433 else
18435 gcc_assert (MEM_P (operands[0]));
18436 gcc_assert (REG_P (operands[1]));
18437 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18439 switch (GET_CODE (XEXP (operands[0], 0)))
18441 case REG:
18442 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18443 break;
18445 default:
18446 gcc_unreachable ();
18450 return "";
18453 /* Output a VFP load or store instruction. */
18455 const char *
18456 output_move_vfp (rtx *operands)
18458 rtx reg, mem, addr, ops[2];
18459 int load = REG_P (operands[0]);
18460 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18461 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18462 const char *templ;
18463 char buff[50];
18464 machine_mode mode;
18466 reg = operands[!load];
18467 mem = operands[load];
18469 mode = GET_MODE (reg);
18471 gcc_assert (REG_P (reg));
18472 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18473 gcc_assert (mode == SFmode
18474 || mode == DFmode
18475 || mode == SImode
18476 || mode == DImode
18477 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18478 gcc_assert (MEM_P (mem));
18480 addr = XEXP (mem, 0);
18482 switch (GET_CODE (addr))
18484 case PRE_DEC:
18485 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18486 ops[0] = XEXP (addr, 0);
18487 ops[1] = reg;
18488 break;
18490 case POST_INC:
18491 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18492 ops[0] = XEXP (addr, 0);
18493 ops[1] = reg;
18494 break;
18496 default:
18497 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18498 ops[0] = reg;
18499 ops[1] = mem;
18500 break;
18503 sprintf (buff, templ,
18504 load ? "ld" : "st",
18505 dp ? "64" : "32",
18506 dp ? "P" : "",
18507 integer_p ? "\t%@ int" : "");
18508 output_asm_insn (buff, ops);
18510 return "";
18513 /* Output a Neon double-word or quad-word load or store, or a load
18514 or store for larger structure modes.
18516 WARNING: The ordering of elements is weird in big-endian mode,
18517 because the EABI requires that vectors stored in memory appear
18518 as though they were stored by a VSTM, as required by the EABI.
18519 GCC RTL defines element ordering based on in-memory order.
18520 This can be different from the architectural ordering of elements
18521 within a NEON register. The intrinsics defined in arm_neon.h use the
18522 NEON register element ordering, not the GCC RTL element ordering.
18524 For example, the in-memory ordering of a big-endian a quadword
18525 vector with 16-bit elements when stored from register pair {d0,d1}
18526 will be (lowest address first, d0[N] is NEON register element N):
18528 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18530 When necessary, quadword registers (dN, dN+1) are moved to ARM
18531 registers from rN in the order:
18533 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18535 So that STM/LDM can be used on vectors in ARM registers, and the
18536 same memory layout will result as if VSTM/VLDM were used.
18538 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18539 possible, which allows use of appropriate alignment tags.
18540 Note that the choice of "64" is independent of the actual vector
18541 element size; this size simply ensures that the behavior is
18542 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18544 Due to limitations of those instructions, use of VST1.64/VLD1.64
18545 is not possible if:
18546 - the address contains PRE_DEC, or
18547 - the mode refers to more than 4 double-word registers
18549 In those cases, it would be possible to replace VSTM/VLDM by a
18550 sequence of instructions; this is not currently implemented since
18551 this is not certain to actually improve performance. */
18553 const char *
18554 output_move_neon (rtx *operands)
18556 rtx reg, mem, addr, ops[2];
18557 int regno, nregs, load = REG_P (operands[0]);
18558 const char *templ;
18559 char buff[50];
18560 machine_mode mode;
18562 reg = operands[!load];
18563 mem = operands[load];
18565 mode = GET_MODE (reg);
18567 gcc_assert (REG_P (reg));
18568 regno = REGNO (reg);
18569 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18570 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18571 || NEON_REGNO_OK_FOR_QUAD (regno));
18572 gcc_assert (VALID_NEON_DREG_MODE (mode)
18573 || VALID_NEON_QREG_MODE (mode)
18574 || VALID_NEON_STRUCT_MODE (mode));
18575 gcc_assert (MEM_P (mem));
18577 addr = XEXP (mem, 0);
18579 /* Strip off const from addresses like (const (plus (...))). */
18580 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18581 addr = XEXP (addr, 0);
18583 switch (GET_CODE (addr))
18585 case POST_INC:
18586 /* We have to use vldm / vstm for too-large modes. */
18587 if (nregs > 4)
18589 templ = "v%smia%%?\t%%0!, %%h1";
18590 ops[0] = XEXP (addr, 0);
18592 else
18594 templ = "v%s1.64\t%%h1, %%A0";
18595 ops[0] = mem;
18597 ops[1] = reg;
18598 break;
18600 case PRE_DEC:
18601 /* We have to use vldm / vstm in this case, since there is no
18602 pre-decrement form of the vld1 / vst1 instructions. */
18603 templ = "v%smdb%%?\t%%0!, %%h1";
18604 ops[0] = XEXP (addr, 0);
18605 ops[1] = reg;
18606 break;
18608 case POST_MODIFY:
18609 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18610 gcc_unreachable ();
18612 case REG:
18613 /* We have to use vldm / vstm for too-large modes. */
18614 if (nregs > 1)
18616 if (nregs > 4)
18617 templ = "v%smia%%?\t%%m0, %%h1";
18618 else
18619 templ = "v%s1.64\t%%h1, %%A0";
18621 ops[0] = mem;
18622 ops[1] = reg;
18623 break;
18625 /* Fall through. */
18626 case LABEL_REF:
18627 case PLUS:
18629 int i;
18630 int overlap = -1;
18631 for (i = 0; i < nregs; i++)
18633 /* We're only using DImode here because it's a convenient size. */
18634 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18635 ops[1] = adjust_address (mem, DImode, 8 * i);
18636 if (reg_overlap_mentioned_p (ops[0], mem))
18638 gcc_assert (overlap == -1);
18639 overlap = i;
18641 else
18643 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18644 output_asm_insn (buff, ops);
18647 if (overlap != -1)
18649 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18650 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18651 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18652 output_asm_insn (buff, ops);
18655 return "";
18658 default:
18659 gcc_unreachable ();
18662 sprintf (buff, templ, load ? "ld" : "st");
18663 output_asm_insn (buff, ops);
18665 return "";
18668 /* Compute and return the length of neon_mov<mode>, where <mode> is
18669 one of VSTRUCT modes: EI, OI, CI or XI. */
18671 arm_attr_length_move_neon (rtx_insn *insn)
18673 rtx reg, mem, addr;
18674 int load;
18675 machine_mode mode;
18677 extract_insn_cached (insn);
18679 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18681 mode = GET_MODE (recog_data.operand[0]);
18682 switch (mode)
18684 case EImode:
18685 case OImode:
18686 return 8;
18687 case CImode:
18688 return 12;
18689 case XImode:
18690 return 16;
18691 default:
18692 gcc_unreachable ();
18696 load = REG_P (recog_data.operand[0]);
18697 reg = recog_data.operand[!load];
18698 mem = recog_data.operand[load];
18700 gcc_assert (MEM_P (mem));
18702 mode = GET_MODE (reg);
18703 addr = XEXP (mem, 0);
18705 /* Strip off const from addresses like (const (plus (...))). */
18706 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18707 addr = XEXP (addr, 0);
18709 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18711 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18712 return insns * 4;
18714 else
18715 return 4;
18718 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18719 return zero. */
18722 arm_address_offset_is_imm (rtx_insn *insn)
18724 rtx mem, addr;
18726 extract_insn_cached (insn);
18728 if (REG_P (recog_data.operand[0]))
18729 return 0;
18731 mem = recog_data.operand[0];
18733 gcc_assert (MEM_P (mem));
18735 addr = XEXP (mem, 0);
18737 if (REG_P (addr)
18738 || (GET_CODE (addr) == PLUS
18739 && REG_P (XEXP (addr, 0))
18740 && CONST_INT_P (XEXP (addr, 1))))
18741 return 1;
18742 else
18743 return 0;
18746 /* Output an ADD r, s, #n where n may be too big for one instruction.
18747 If adding zero to one register, output nothing. */
18748 const char *
18749 output_add_immediate (rtx *operands)
18751 HOST_WIDE_INT n = INTVAL (operands[2]);
18753 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18755 if (n < 0)
18756 output_multi_immediate (operands,
18757 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18758 -n);
18759 else
18760 output_multi_immediate (operands,
18761 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18765 return "";
18768 /* Output a multiple immediate operation.
18769 OPERANDS is the vector of operands referred to in the output patterns.
18770 INSTR1 is the output pattern to use for the first constant.
18771 INSTR2 is the output pattern to use for subsequent constants.
18772 IMMED_OP is the index of the constant slot in OPERANDS.
18773 N is the constant value. */
18774 static const char *
18775 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18776 int immed_op, HOST_WIDE_INT n)
18778 #if HOST_BITS_PER_WIDE_INT > 32
18779 n &= 0xffffffff;
18780 #endif
18782 if (n == 0)
18784 /* Quick and easy output. */
18785 operands[immed_op] = const0_rtx;
18786 output_asm_insn (instr1, operands);
18788 else
18790 int i;
18791 const char * instr = instr1;
18793 /* Note that n is never zero here (which would give no output). */
18794 for (i = 0; i < 32; i += 2)
18796 if (n & (3 << i))
18798 operands[immed_op] = GEN_INT (n & (255 << i));
18799 output_asm_insn (instr, operands);
18800 instr = instr2;
18801 i += 6;
18806 return "";
18809 /* Return the name of a shifter operation. */
18810 static const char *
18811 arm_shift_nmem(enum rtx_code code)
18813 switch (code)
18815 case ASHIFT:
18816 return ARM_LSL_NAME;
18818 case ASHIFTRT:
18819 return "asr";
18821 case LSHIFTRT:
18822 return "lsr";
18824 case ROTATERT:
18825 return "ror";
18827 default:
18828 abort();
18832 /* Return the appropriate ARM instruction for the operation code.
18833 The returned result should not be overwritten. OP is the rtx of the
18834 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18835 was shifted. */
18836 const char *
18837 arithmetic_instr (rtx op, int shift_first_arg)
18839 switch (GET_CODE (op))
18841 case PLUS:
18842 return "add";
18844 case MINUS:
18845 return shift_first_arg ? "rsb" : "sub";
18847 case IOR:
18848 return "orr";
18850 case XOR:
18851 return "eor";
18853 case AND:
18854 return "and";
18856 case ASHIFT:
18857 case ASHIFTRT:
18858 case LSHIFTRT:
18859 case ROTATERT:
18860 return arm_shift_nmem(GET_CODE(op));
18862 default:
18863 gcc_unreachable ();
18867 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18868 for the operation code. The returned result should not be overwritten.
18869 OP is the rtx code of the shift.
18870 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18871 shift. */
18872 static const char *
18873 shift_op (rtx op, HOST_WIDE_INT *amountp)
18875 const char * mnem;
18876 enum rtx_code code = GET_CODE (op);
18878 switch (code)
18880 case ROTATE:
18881 if (!CONST_INT_P (XEXP (op, 1)))
18883 output_operand_lossage ("invalid shift operand");
18884 return NULL;
18887 code = ROTATERT;
18888 *amountp = 32 - INTVAL (XEXP (op, 1));
18889 mnem = "ror";
18890 break;
18892 case ASHIFT:
18893 case ASHIFTRT:
18894 case LSHIFTRT:
18895 case ROTATERT:
18896 mnem = arm_shift_nmem(code);
18897 if (CONST_INT_P (XEXP (op, 1)))
18899 *amountp = INTVAL (XEXP (op, 1));
18901 else if (REG_P (XEXP (op, 1)))
18903 *amountp = -1;
18904 return mnem;
18906 else
18908 output_operand_lossage ("invalid shift operand");
18909 return NULL;
18911 break;
18913 case MULT:
18914 /* We never have to worry about the amount being other than a
18915 power of 2, since this case can never be reloaded from a reg. */
18916 if (!CONST_INT_P (XEXP (op, 1)))
18918 output_operand_lossage ("invalid shift operand");
18919 return NULL;
18922 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18924 /* Amount must be a power of two. */
18925 if (*amountp & (*amountp - 1))
18927 output_operand_lossage ("invalid shift operand");
18928 return NULL;
18931 *amountp = int_log2 (*amountp);
18932 return ARM_LSL_NAME;
18934 default:
18935 output_operand_lossage ("invalid shift operand");
18936 return NULL;
18939 /* This is not 100% correct, but follows from the desire to merge
18940 multiplication by a power of 2 with the recognizer for a
18941 shift. >=32 is not a valid shift for "lsl", so we must try and
18942 output a shift that produces the correct arithmetical result.
18943 Using lsr #32 is identical except for the fact that the carry bit
18944 is not set correctly if we set the flags; but we never use the
18945 carry bit from such an operation, so we can ignore that. */
18946 if (code == ROTATERT)
18947 /* Rotate is just modulo 32. */
18948 *amountp &= 31;
18949 else if (*amountp != (*amountp & 31))
18951 if (code == ASHIFT)
18952 mnem = "lsr";
18953 *amountp = 32;
18956 /* Shifts of 0 are no-ops. */
18957 if (*amountp == 0)
18958 return NULL;
18960 return mnem;
18963 /* Obtain the shift from the POWER of two. */
18965 static HOST_WIDE_INT
18966 int_log2 (HOST_WIDE_INT power)
18968 HOST_WIDE_INT shift = 0;
18970 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18972 gcc_assert (shift <= 31);
18973 shift++;
18976 return shift;
18979 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18980 because /bin/as is horribly restrictive. The judgement about
18981 whether or not each character is 'printable' (and can be output as
18982 is) or not (and must be printed with an octal escape) must be made
18983 with reference to the *host* character set -- the situation is
18984 similar to that discussed in the comments above pp_c_char in
18985 c-pretty-print.c. */
18987 #define MAX_ASCII_LEN 51
18989 void
18990 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18992 int i;
18993 int len_so_far = 0;
18995 fputs ("\t.ascii\t\"", stream);
18997 for (i = 0; i < len; i++)
18999 int c = p[i];
19001 if (len_so_far >= MAX_ASCII_LEN)
19003 fputs ("\"\n\t.ascii\t\"", stream);
19004 len_so_far = 0;
19007 if (ISPRINT (c))
19009 if (c == '\\' || c == '\"')
19011 putc ('\\', stream);
19012 len_so_far++;
19014 putc (c, stream);
19015 len_so_far++;
19017 else
19019 fprintf (stream, "\\%03o", c);
19020 len_so_far += 4;
19024 fputs ("\"\n", stream);
19027 /* Compute the register save mask for registers 0 through 12
19028 inclusive. This code is used by arm_compute_save_reg_mask. */
19030 static unsigned long
19031 arm_compute_save_reg0_reg12_mask (void)
19033 unsigned long func_type = arm_current_func_type ();
19034 unsigned long save_reg_mask = 0;
19035 unsigned int reg;
19037 if (IS_INTERRUPT (func_type))
19039 unsigned int max_reg;
19040 /* Interrupt functions must not corrupt any registers,
19041 even call clobbered ones. If this is a leaf function
19042 we can just examine the registers used by the RTL, but
19043 otherwise we have to assume that whatever function is
19044 called might clobber anything, and so we have to save
19045 all the call-clobbered registers as well. */
19046 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19047 /* FIQ handlers have registers r8 - r12 banked, so
19048 we only need to check r0 - r7, Normal ISRs only
19049 bank r14 and r15, so we must check up to r12.
19050 r13 is the stack pointer which is always preserved,
19051 so we do not need to consider it here. */
19052 max_reg = 7;
19053 else
19054 max_reg = 12;
19056 for (reg = 0; reg <= max_reg; reg++)
19057 if (df_regs_ever_live_p (reg)
19058 || (! crtl->is_leaf && call_used_regs[reg]))
19059 save_reg_mask |= (1 << reg);
19061 /* Also save the pic base register if necessary. */
19062 if (flag_pic
19063 && !TARGET_SINGLE_PIC_BASE
19064 && arm_pic_register != INVALID_REGNUM
19065 && crtl->uses_pic_offset_table)
19066 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19068 else if (IS_VOLATILE(func_type))
19070 /* For noreturn functions we historically omitted register saves
19071 altogether. However this really messes up debugging. As a
19072 compromise save just the frame pointers. Combined with the link
19073 register saved elsewhere this should be sufficient to get
19074 a backtrace. */
19075 if (frame_pointer_needed)
19076 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19077 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19078 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19079 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19080 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19082 else
19084 /* In the normal case we only need to save those registers
19085 which are call saved and which are used by this function. */
19086 for (reg = 0; reg <= 11; reg++)
19087 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19088 save_reg_mask |= (1 << reg);
19090 /* Handle the frame pointer as a special case. */
19091 if (frame_pointer_needed)
19092 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19094 /* If we aren't loading the PIC register,
19095 don't stack it even though it may be live. */
19096 if (flag_pic
19097 && !TARGET_SINGLE_PIC_BASE
19098 && arm_pic_register != INVALID_REGNUM
19099 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19100 || crtl->uses_pic_offset_table))
19101 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19103 /* The prologue will copy SP into R0, so save it. */
19104 if (IS_STACKALIGN (func_type))
19105 save_reg_mask |= 1;
19108 /* Save registers so the exception handler can modify them. */
19109 if (crtl->calls_eh_return)
19111 unsigned int i;
19113 for (i = 0; ; i++)
19115 reg = EH_RETURN_DATA_REGNO (i);
19116 if (reg == INVALID_REGNUM)
19117 break;
19118 save_reg_mask |= 1 << reg;
19122 return save_reg_mask;
19125 /* Return true if r3 is live at the start of the function. */
19127 static bool
19128 arm_r3_live_at_start_p (void)
19130 /* Just look at cfg info, which is still close enough to correct at this
19131 point. This gives false positives for broken functions that might use
19132 uninitialized data that happens to be allocated in r3, but who cares? */
19133 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19136 /* Compute the number of bytes used to store the static chain register on the
19137 stack, above the stack frame. We need to know this accurately to get the
19138 alignment of the rest of the stack frame correct. */
19140 static int
19141 arm_compute_static_chain_stack_bytes (void)
19143 /* See the defining assertion in arm_expand_prologue. */
19144 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19145 && IS_NESTED (arm_current_func_type ())
19146 && arm_r3_live_at_start_p ()
19147 && crtl->args.pretend_args_size == 0)
19148 return 4;
19150 return 0;
19153 /* Compute a bit mask of which registers need to be
19154 saved on the stack for the current function.
19155 This is used by arm_get_frame_offsets, which may add extra registers. */
19157 static unsigned long
19158 arm_compute_save_reg_mask (void)
19160 unsigned int save_reg_mask = 0;
19161 unsigned long func_type = arm_current_func_type ();
19162 unsigned int reg;
19164 if (IS_NAKED (func_type))
19165 /* This should never really happen. */
19166 return 0;
19168 /* If we are creating a stack frame, then we must save the frame pointer,
19169 IP (which will hold the old stack pointer), LR and the PC. */
19170 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19171 save_reg_mask |=
19172 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19173 | (1 << IP_REGNUM)
19174 | (1 << LR_REGNUM)
19175 | (1 << PC_REGNUM);
19177 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19179 /* Decide if we need to save the link register.
19180 Interrupt routines have their own banked link register,
19181 so they never need to save it.
19182 Otherwise if we do not use the link register we do not need to save
19183 it. If we are pushing other registers onto the stack however, we
19184 can save an instruction in the epilogue by pushing the link register
19185 now and then popping it back into the PC. This incurs extra memory
19186 accesses though, so we only do it when optimizing for size, and only
19187 if we know that we will not need a fancy return sequence. */
19188 if (df_regs_ever_live_p (LR_REGNUM)
19189 || (save_reg_mask
19190 && optimize_size
19191 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19192 && !crtl->calls_eh_return))
19193 save_reg_mask |= 1 << LR_REGNUM;
19195 if (cfun->machine->lr_save_eliminated)
19196 save_reg_mask &= ~ (1 << LR_REGNUM);
19198 if (TARGET_REALLY_IWMMXT
19199 && ((bit_count (save_reg_mask)
19200 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19201 arm_compute_static_chain_stack_bytes())
19202 ) % 2) != 0)
19204 /* The total number of registers that are going to be pushed
19205 onto the stack is odd. We need to ensure that the stack
19206 is 64-bit aligned before we start to save iWMMXt registers,
19207 and also before we start to create locals. (A local variable
19208 might be a double or long long which we will load/store using
19209 an iWMMXt instruction). Therefore we need to push another
19210 ARM register, so that the stack will be 64-bit aligned. We
19211 try to avoid using the arg registers (r0 -r3) as they might be
19212 used to pass values in a tail call. */
19213 for (reg = 4; reg <= 12; reg++)
19214 if ((save_reg_mask & (1 << reg)) == 0)
19215 break;
19217 if (reg <= 12)
19218 save_reg_mask |= (1 << reg);
19219 else
19221 cfun->machine->sibcall_blocked = 1;
19222 save_reg_mask |= (1 << 3);
19226 /* We may need to push an additional register for use initializing the
19227 PIC base register. */
19228 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19229 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19231 reg = thumb_find_work_register (1 << 4);
19232 if (!call_used_regs[reg])
19233 save_reg_mask |= (1 << reg);
19236 return save_reg_mask;
19240 /* Compute a bit mask of which registers need to be
19241 saved on the stack for the current function. */
19242 static unsigned long
19243 thumb1_compute_save_reg_mask (void)
19245 unsigned long mask;
19246 unsigned reg;
19248 mask = 0;
19249 for (reg = 0; reg < 12; reg ++)
19250 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19251 mask |= 1 << reg;
19253 if (flag_pic
19254 && !TARGET_SINGLE_PIC_BASE
19255 && arm_pic_register != INVALID_REGNUM
19256 && crtl->uses_pic_offset_table)
19257 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19259 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19260 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19261 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19263 /* LR will also be pushed if any lo regs are pushed. */
19264 if (mask & 0xff || thumb_force_lr_save ())
19265 mask |= (1 << LR_REGNUM);
19267 /* Make sure we have a low work register if we need one.
19268 We will need one if we are going to push a high register,
19269 but we are not currently intending to push a low register. */
19270 if ((mask & 0xff) == 0
19271 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19273 /* Use thumb_find_work_register to choose which register
19274 we will use. If the register is live then we will
19275 have to push it. Use LAST_LO_REGNUM as our fallback
19276 choice for the register to select. */
19277 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19278 /* Make sure the register returned by thumb_find_work_register is
19279 not part of the return value. */
19280 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19281 reg = LAST_LO_REGNUM;
19283 if (! call_used_regs[reg])
19284 mask |= 1 << reg;
19287 /* The 504 below is 8 bytes less than 512 because there are two possible
19288 alignment words. We can't tell here if they will be present or not so we
19289 have to play it safe and assume that they are. */
19290 if ((CALLER_INTERWORKING_SLOT_SIZE +
19291 ROUND_UP_WORD (get_frame_size ()) +
19292 crtl->outgoing_args_size) >= 504)
19294 /* This is the same as the code in thumb1_expand_prologue() which
19295 determines which register to use for stack decrement. */
19296 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19297 if (mask & (1 << reg))
19298 break;
19300 if (reg > LAST_LO_REGNUM)
19302 /* Make sure we have a register available for stack decrement. */
19303 mask |= 1 << LAST_LO_REGNUM;
19307 return mask;
19311 /* Return the number of bytes required to save VFP registers. */
19312 static int
19313 arm_get_vfp_saved_size (void)
19315 unsigned int regno;
19316 int count;
19317 int saved;
19319 saved = 0;
19320 /* Space for saved VFP registers. */
19321 if (TARGET_HARD_FLOAT && TARGET_VFP)
19323 count = 0;
19324 for (regno = FIRST_VFP_REGNUM;
19325 regno < LAST_VFP_REGNUM;
19326 regno += 2)
19328 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19329 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19331 if (count > 0)
19333 /* Workaround ARM10 VFPr1 bug. */
19334 if (count == 2 && !arm_arch6)
19335 count++;
19336 saved += count * 8;
19338 count = 0;
19340 else
19341 count++;
19343 if (count > 0)
19345 if (count == 2 && !arm_arch6)
19346 count++;
19347 saved += count * 8;
19350 return saved;
19354 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19355 everything bar the final return instruction. If simple_return is true,
19356 then do not output epilogue, because it has already been emitted in RTL. */
19357 const char *
19358 output_return_instruction (rtx operand, bool really_return, bool reverse,
19359 bool simple_return)
19361 char conditional[10];
19362 char instr[100];
19363 unsigned reg;
19364 unsigned long live_regs_mask;
19365 unsigned long func_type;
19366 arm_stack_offsets *offsets;
19368 func_type = arm_current_func_type ();
19370 if (IS_NAKED (func_type))
19371 return "";
19373 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19375 /* If this function was declared non-returning, and we have
19376 found a tail call, then we have to trust that the called
19377 function won't return. */
19378 if (really_return)
19380 rtx ops[2];
19382 /* Otherwise, trap an attempted return by aborting. */
19383 ops[0] = operand;
19384 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19385 : "abort");
19386 assemble_external_libcall (ops[1]);
19387 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19390 return "";
19393 gcc_assert (!cfun->calls_alloca || really_return);
19395 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19397 cfun->machine->return_used_this_function = 1;
19399 offsets = arm_get_frame_offsets ();
19400 live_regs_mask = offsets->saved_regs_mask;
19402 if (!simple_return && live_regs_mask)
19404 const char * return_reg;
19406 /* If we do not have any special requirements for function exit
19407 (e.g. interworking) then we can load the return address
19408 directly into the PC. Otherwise we must load it into LR. */
19409 if (really_return
19410 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19411 return_reg = reg_names[PC_REGNUM];
19412 else
19413 return_reg = reg_names[LR_REGNUM];
19415 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19417 /* There are three possible reasons for the IP register
19418 being saved. 1) a stack frame was created, in which case
19419 IP contains the old stack pointer, or 2) an ISR routine
19420 corrupted it, or 3) it was saved to align the stack on
19421 iWMMXt. In case 1, restore IP into SP, otherwise just
19422 restore IP. */
19423 if (frame_pointer_needed)
19425 live_regs_mask &= ~ (1 << IP_REGNUM);
19426 live_regs_mask |= (1 << SP_REGNUM);
19428 else
19429 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19432 /* On some ARM architectures it is faster to use LDR rather than
19433 LDM to load a single register. On other architectures, the
19434 cost is the same. In 26 bit mode, or for exception handlers,
19435 we have to use LDM to load the PC so that the CPSR is also
19436 restored. */
19437 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19438 if (live_regs_mask == (1U << reg))
19439 break;
19441 if (reg <= LAST_ARM_REGNUM
19442 && (reg != LR_REGNUM
19443 || ! really_return
19444 || ! IS_INTERRUPT (func_type)))
19446 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19447 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19449 else
19451 char *p;
19452 int first = 1;
19454 /* Generate the load multiple instruction to restore the
19455 registers. Note we can get here, even if
19456 frame_pointer_needed is true, but only if sp already
19457 points to the base of the saved core registers. */
19458 if (live_regs_mask & (1 << SP_REGNUM))
19460 unsigned HOST_WIDE_INT stack_adjust;
19462 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19463 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19465 if (stack_adjust && arm_arch5 && TARGET_ARM)
19466 if (TARGET_UNIFIED_ASM)
19467 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19468 else
19469 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19470 else
19472 /* If we can't use ldmib (SA110 bug),
19473 then try to pop r3 instead. */
19474 if (stack_adjust)
19475 live_regs_mask |= 1 << 3;
19477 if (TARGET_UNIFIED_ASM)
19478 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19479 else
19480 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19483 else
19484 if (TARGET_UNIFIED_ASM)
19485 sprintf (instr, "pop%s\t{", conditional);
19486 else
19487 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19489 p = instr + strlen (instr);
19491 for (reg = 0; reg <= SP_REGNUM; reg++)
19492 if (live_regs_mask & (1 << reg))
19494 int l = strlen (reg_names[reg]);
19496 if (first)
19497 first = 0;
19498 else
19500 memcpy (p, ", ", 2);
19501 p += 2;
19504 memcpy (p, "%|", 2);
19505 memcpy (p + 2, reg_names[reg], l);
19506 p += l + 2;
19509 if (live_regs_mask & (1 << LR_REGNUM))
19511 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19512 /* If returning from an interrupt, restore the CPSR. */
19513 if (IS_INTERRUPT (func_type))
19514 strcat (p, "^");
19516 else
19517 strcpy (p, "}");
19520 output_asm_insn (instr, & operand);
19522 /* See if we need to generate an extra instruction to
19523 perform the actual function return. */
19524 if (really_return
19525 && func_type != ARM_FT_INTERWORKED
19526 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19528 /* The return has already been handled
19529 by loading the LR into the PC. */
19530 return "";
19534 if (really_return)
19536 switch ((int) ARM_FUNC_TYPE (func_type))
19538 case ARM_FT_ISR:
19539 case ARM_FT_FIQ:
19540 /* ??? This is wrong for unified assembly syntax. */
19541 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19542 break;
19544 case ARM_FT_INTERWORKED:
19545 sprintf (instr, "bx%s\t%%|lr", conditional);
19546 break;
19548 case ARM_FT_EXCEPTION:
19549 /* ??? This is wrong for unified assembly syntax. */
19550 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19551 break;
19553 default:
19554 /* Use bx if it's available. */
19555 if (arm_arch5 || arm_arch4t)
19556 sprintf (instr, "bx%s\t%%|lr", conditional);
19557 else
19558 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19559 break;
19562 output_asm_insn (instr, & operand);
19565 return "";
19568 /* Write the function name into the code section, directly preceding
19569 the function prologue.
19571 Code will be output similar to this:
19573 .ascii "arm_poke_function_name", 0
19574 .align
19576 .word 0xff000000 + (t1 - t0)
19577 arm_poke_function_name
19578 mov ip, sp
19579 stmfd sp!, {fp, ip, lr, pc}
19580 sub fp, ip, #4
19582 When performing a stack backtrace, code can inspect the value
19583 of 'pc' stored at 'fp' + 0. If the trace function then looks
19584 at location pc - 12 and the top 8 bits are set, then we know
19585 that there is a function name embedded immediately preceding this
19586 location and has length ((pc[-3]) & 0xff000000).
19588 We assume that pc is declared as a pointer to an unsigned long.
19590 It is of no benefit to output the function name if we are assembling
19591 a leaf function. These function types will not contain a stack
19592 backtrace structure, therefore it is not possible to determine the
19593 function name. */
19594 void
19595 arm_poke_function_name (FILE *stream, const char *name)
19597 unsigned long alignlength;
19598 unsigned long length;
19599 rtx x;
19601 length = strlen (name) + 1;
19602 alignlength = ROUND_UP_WORD (length);
19604 ASM_OUTPUT_ASCII (stream, name, length);
19605 ASM_OUTPUT_ALIGN (stream, 2);
19606 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19607 assemble_aligned_integer (UNITS_PER_WORD, x);
19610 /* Place some comments into the assembler stream
19611 describing the current function. */
19612 static void
19613 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19615 unsigned long func_type;
19617 /* ??? Do we want to print some of the below anyway? */
19618 if (TARGET_THUMB1)
19619 return;
19621 /* Sanity check. */
19622 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19624 func_type = arm_current_func_type ();
19626 switch ((int) ARM_FUNC_TYPE (func_type))
19628 default:
19629 case ARM_FT_NORMAL:
19630 break;
19631 case ARM_FT_INTERWORKED:
19632 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19633 break;
19634 case ARM_FT_ISR:
19635 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19636 break;
19637 case ARM_FT_FIQ:
19638 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19639 break;
19640 case ARM_FT_EXCEPTION:
19641 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19642 break;
19645 if (IS_NAKED (func_type))
19646 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19648 if (IS_VOLATILE (func_type))
19649 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19651 if (IS_NESTED (func_type))
19652 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19653 if (IS_STACKALIGN (func_type))
19654 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19656 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19657 crtl->args.size,
19658 crtl->args.pretend_args_size, frame_size);
19660 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19661 frame_pointer_needed,
19662 cfun->machine->uses_anonymous_args);
19664 if (cfun->machine->lr_save_eliminated)
19665 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19667 if (crtl->calls_eh_return)
19668 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19672 static void
19673 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19674 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19676 arm_stack_offsets *offsets;
19678 if (TARGET_THUMB1)
19680 int regno;
19682 /* Emit any call-via-reg trampolines that are needed for v4t support
19683 of call_reg and call_value_reg type insns. */
19684 for (regno = 0; regno < LR_REGNUM; regno++)
19686 rtx label = cfun->machine->call_via[regno];
19688 if (label != NULL)
19690 switch_to_section (function_section (current_function_decl));
19691 targetm.asm_out.internal_label (asm_out_file, "L",
19692 CODE_LABEL_NUMBER (label));
19693 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19697 /* ??? Probably not safe to set this here, since it assumes that a
19698 function will be emitted as assembly immediately after we generate
19699 RTL for it. This does not happen for inline functions. */
19700 cfun->machine->return_used_this_function = 0;
19702 else /* TARGET_32BIT */
19704 /* We need to take into account any stack-frame rounding. */
19705 offsets = arm_get_frame_offsets ();
19707 gcc_assert (!use_return_insn (FALSE, NULL)
19708 || (cfun->machine->return_used_this_function != 0)
19709 || offsets->saved_regs == offsets->outgoing_args
19710 || frame_pointer_needed);
19714 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19715 STR and STRD. If an even number of registers are being pushed, one
19716 or more STRD patterns are created for each register pair. If an
19717 odd number of registers are pushed, emit an initial STR followed by
19718 as many STRD instructions as are needed. This works best when the
19719 stack is initially 64-bit aligned (the normal case), since it
19720 ensures that each STRD is also 64-bit aligned. */
19721 static void
19722 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19724 int num_regs = 0;
19725 int i;
19726 int regno;
19727 rtx par = NULL_RTX;
19728 rtx dwarf = NULL_RTX;
19729 rtx tmp;
19730 bool first = true;
19732 num_regs = bit_count (saved_regs_mask);
19734 /* Must be at least one register to save, and can't save SP or PC. */
19735 gcc_assert (num_regs > 0 && num_regs <= 14);
19736 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19737 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19739 /* Create sequence for DWARF info. All the frame-related data for
19740 debugging is held in this wrapper. */
19741 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19743 /* Describe the stack adjustment. */
19744 tmp = gen_rtx_SET (VOIDmode,
19745 stack_pointer_rtx,
19746 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19747 RTX_FRAME_RELATED_P (tmp) = 1;
19748 XVECEXP (dwarf, 0, 0) = tmp;
19750 /* Find the first register. */
19751 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19754 i = 0;
19756 /* If there's an odd number of registers to push. Start off by
19757 pushing a single register. This ensures that subsequent strd
19758 operations are dword aligned (assuming that SP was originally
19759 64-bit aligned). */
19760 if ((num_regs & 1) != 0)
19762 rtx reg, mem, insn;
19764 reg = gen_rtx_REG (SImode, regno);
19765 if (num_regs == 1)
19766 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19767 stack_pointer_rtx));
19768 else
19769 mem = gen_frame_mem (Pmode,
19770 gen_rtx_PRE_MODIFY
19771 (Pmode, stack_pointer_rtx,
19772 plus_constant (Pmode, stack_pointer_rtx,
19773 -4 * num_regs)));
19775 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19776 RTX_FRAME_RELATED_P (tmp) = 1;
19777 insn = emit_insn (tmp);
19778 RTX_FRAME_RELATED_P (insn) = 1;
19779 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19780 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19781 reg);
19782 RTX_FRAME_RELATED_P (tmp) = 1;
19783 i++;
19784 regno++;
19785 XVECEXP (dwarf, 0, i) = tmp;
19786 first = false;
19789 while (i < num_regs)
19790 if (saved_regs_mask & (1 << regno))
19792 rtx reg1, reg2, mem1, mem2;
19793 rtx tmp0, tmp1, tmp2;
19794 int regno2;
19796 /* Find the register to pair with this one. */
19797 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19798 regno2++)
19801 reg1 = gen_rtx_REG (SImode, regno);
19802 reg2 = gen_rtx_REG (SImode, regno2);
19804 if (first)
19806 rtx insn;
19808 first = false;
19809 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19810 stack_pointer_rtx,
19811 -4 * num_regs));
19812 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19813 stack_pointer_rtx,
19814 -4 * (num_regs - 1)));
19815 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19816 plus_constant (Pmode, stack_pointer_rtx,
19817 -4 * (num_regs)));
19818 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19819 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19820 RTX_FRAME_RELATED_P (tmp0) = 1;
19821 RTX_FRAME_RELATED_P (tmp1) = 1;
19822 RTX_FRAME_RELATED_P (tmp2) = 1;
19823 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19824 XVECEXP (par, 0, 0) = tmp0;
19825 XVECEXP (par, 0, 1) = tmp1;
19826 XVECEXP (par, 0, 2) = tmp2;
19827 insn = emit_insn (par);
19828 RTX_FRAME_RELATED_P (insn) = 1;
19829 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19831 else
19833 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19834 stack_pointer_rtx,
19835 4 * i));
19836 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19837 stack_pointer_rtx,
19838 4 * (i + 1)));
19839 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19840 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19841 RTX_FRAME_RELATED_P (tmp1) = 1;
19842 RTX_FRAME_RELATED_P (tmp2) = 1;
19843 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19844 XVECEXP (par, 0, 0) = tmp1;
19845 XVECEXP (par, 0, 1) = tmp2;
19846 emit_insn (par);
19849 /* Create unwind information. This is an approximation. */
19850 tmp1 = gen_rtx_SET (VOIDmode,
19851 gen_frame_mem (Pmode,
19852 plus_constant (Pmode,
19853 stack_pointer_rtx,
19854 4 * i)),
19855 reg1);
19856 tmp2 = gen_rtx_SET (VOIDmode,
19857 gen_frame_mem (Pmode,
19858 plus_constant (Pmode,
19859 stack_pointer_rtx,
19860 4 * (i + 1))),
19861 reg2);
19863 RTX_FRAME_RELATED_P (tmp1) = 1;
19864 RTX_FRAME_RELATED_P (tmp2) = 1;
19865 XVECEXP (dwarf, 0, i + 1) = tmp1;
19866 XVECEXP (dwarf, 0, i + 2) = tmp2;
19867 i += 2;
19868 regno = regno2 + 1;
19870 else
19871 regno++;
19873 return;
19876 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19877 whenever possible, otherwise it emits single-word stores. The first store
19878 also allocates stack space for all saved registers, using writeback with
19879 post-addressing mode. All other stores use offset addressing. If no STRD
19880 can be emitted, this function emits a sequence of single-word stores,
19881 and not an STM as before, because single-word stores provide more freedom
19882 scheduling and can be turned into an STM by peephole optimizations. */
19883 static void
19884 arm_emit_strd_push (unsigned long saved_regs_mask)
19886 int num_regs = 0;
19887 int i, j, dwarf_index = 0;
19888 int offset = 0;
19889 rtx dwarf = NULL_RTX;
19890 rtx insn = NULL_RTX;
19891 rtx tmp, mem;
19893 /* TODO: A more efficient code can be emitted by changing the
19894 layout, e.g., first push all pairs that can use STRD to keep the
19895 stack aligned, and then push all other registers. */
19896 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19897 if (saved_regs_mask & (1 << i))
19898 num_regs++;
19900 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19901 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19902 gcc_assert (num_regs > 0);
19904 /* Create sequence for DWARF info. */
19905 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19907 /* For dwarf info, we generate explicit stack update. */
19908 tmp = gen_rtx_SET (VOIDmode,
19909 stack_pointer_rtx,
19910 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19911 RTX_FRAME_RELATED_P (tmp) = 1;
19912 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19914 /* Save registers. */
19915 offset = - 4 * num_regs;
19916 j = 0;
19917 while (j <= LAST_ARM_REGNUM)
19918 if (saved_regs_mask & (1 << j))
19920 if ((j % 2 == 0)
19921 && (saved_regs_mask & (1 << (j + 1))))
19923 /* Current register and previous register form register pair for
19924 which STRD can be generated. */
19925 if (offset < 0)
19927 /* Allocate stack space for all saved registers. */
19928 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19929 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19930 mem = gen_frame_mem (DImode, tmp);
19931 offset = 0;
19933 else if (offset > 0)
19934 mem = gen_frame_mem (DImode,
19935 plus_constant (Pmode,
19936 stack_pointer_rtx,
19937 offset));
19938 else
19939 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19941 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19942 RTX_FRAME_RELATED_P (tmp) = 1;
19943 tmp = emit_insn (tmp);
19945 /* Record the first store insn. */
19946 if (dwarf_index == 1)
19947 insn = tmp;
19949 /* Generate dwarf info. */
19950 mem = gen_frame_mem (SImode,
19951 plus_constant (Pmode,
19952 stack_pointer_rtx,
19953 offset));
19954 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19955 RTX_FRAME_RELATED_P (tmp) = 1;
19956 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19958 mem = gen_frame_mem (SImode,
19959 plus_constant (Pmode,
19960 stack_pointer_rtx,
19961 offset + 4));
19962 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19963 RTX_FRAME_RELATED_P (tmp) = 1;
19964 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19966 offset += 8;
19967 j += 2;
19969 else
19971 /* Emit a single word store. */
19972 if (offset < 0)
19974 /* Allocate stack space for all saved registers. */
19975 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19976 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19977 mem = gen_frame_mem (SImode, tmp);
19978 offset = 0;
19980 else if (offset > 0)
19981 mem = gen_frame_mem (SImode,
19982 plus_constant (Pmode,
19983 stack_pointer_rtx,
19984 offset));
19985 else
19986 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19988 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19989 RTX_FRAME_RELATED_P (tmp) = 1;
19990 tmp = emit_insn (tmp);
19992 /* Record the first store insn. */
19993 if (dwarf_index == 1)
19994 insn = tmp;
19996 /* Generate dwarf info. */
19997 mem = gen_frame_mem (SImode,
19998 plus_constant(Pmode,
19999 stack_pointer_rtx,
20000 offset));
20001 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20002 RTX_FRAME_RELATED_P (tmp) = 1;
20003 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20005 offset += 4;
20006 j += 1;
20009 else
20010 j++;
20012 /* Attach dwarf info to the first insn we generate. */
20013 gcc_assert (insn != NULL_RTX);
20014 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20015 RTX_FRAME_RELATED_P (insn) = 1;
20018 /* Generate and emit an insn that we will recognize as a push_multi.
20019 Unfortunately, since this insn does not reflect very well the actual
20020 semantics of the operation, we need to annotate the insn for the benefit
20021 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20022 MASK for registers that should be annotated for DWARF2 frame unwind
20023 information. */
20024 static rtx
20025 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20027 int num_regs = 0;
20028 int num_dwarf_regs = 0;
20029 int i, j;
20030 rtx par;
20031 rtx dwarf;
20032 int dwarf_par_index;
20033 rtx tmp, reg;
20035 /* We don't record the PC in the dwarf frame information. */
20036 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20038 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20040 if (mask & (1 << i))
20041 num_regs++;
20042 if (dwarf_regs_mask & (1 << i))
20043 num_dwarf_regs++;
20046 gcc_assert (num_regs && num_regs <= 16);
20047 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20049 /* For the body of the insn we are going to generate an UNSPEC in
20050 parallel with several USEs. This allows the insn to be recognized
20051 by the push_multi pattern in the arm.md file.
20053 The body of the insn looks something like this:
20055 (parallel [
20056 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20057 (const_int:SI <num>)))
20058 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20059 (use (reg:SI XX))
20060 (use (reg:SI YY))
20064 For the frame note however, we try to be more explicit and actually
20065 show each register being stored into the stack frame, plus a (single)
20066 decrement of the stack pointer. We do it this way in order to be
20067 friendly to the stack unwinding code, which only wants to see a single
20068 stack decrement per instruction. The RTL we generate for the note looks
20069 something like this:
20071 (sequence [
20072 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20073 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20074 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20075 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20079 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20080 instead we'd have a parallel expression detailing all
20081 the stores to the various memory addresses so that debug
20082 information is more up-to-date. Remember however while writing
20083 this to take care of the constraints with the push instruction.
20085 Note also that this has to be taken care of for the VFP registers.
20087 For more see PR43399. */
20089 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20090 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20091 dwarf_par_index = 1;
20093 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20095 if (mask & (1 << i))
20097 reg = gen_rtx_REG (SImode, i);
20099 XVECEXP (par, 0, 0)
20100 = gen_rtx_SET (VOIDmode,
20101 gen_frame_mem
20102 (BLKmode,
20103 gen_rtx_PRE_MODIFY (Pmode,
20104 stack_pointer_rtx,
20105 plus_constant
20106 (Pmode, stack_pointer_rtx,
20107 -4 * num_regs))
20109 gen_rtx_UNSPEC (BLKmode,
20110 gen_rtvec (1, reg),
20111 UNSPEC_PUSH_MULT));
20113 if (dwarf_regs_mask & (1 << i))
20115 tmp = gen_rtx_SET (VOIDmode,
20116 gen_frame_mem (SImode, stack_pointer_rtx),
20117 reg);
20118 RTX_FRAME_RELATED_P (tmp) = 1;
20119 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20122 break;
20126 for (j = 1, i++; j < num_regs; i++)
20128 if (mask & (1 << i))
20130 reg = gen_rtx_REG (SImode, i);
20132 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20134 if (dwarf_regs_mask & (1 << i))
20137 = gen_rtx_SET (VOIDmode,
20138 gen_frame_mem
20139 (SImode,
20140 plus_constant (Pmode, stack_pointer_rtx,
20141 4 * j)),
20142 reg);
20143 RTX_FRAME_RELATED_P (tmp) = 1;
20144 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20147 j++;
20151 par = emit_insn (par);
20153 tmp = gen_rtx_SET (VOIDmode,
20154 stack_pointer_rtx,
20155 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20156 RTX_FRAME_RELATED_P (tmp) = 1;
20157 XVECEXP (dwarf, 0, 0) = tmp;
20159 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20161 return par;
20164 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20165 SIZE is the offset to be adjusted.
20166 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20167 static void
20168 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20170 rtx dwarf;
20172 RTX_FRAME_RELATED_P (insn) = 1;
20173 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20174 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20177 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20178 SAVED_REGS_MASK shows which registers need to be restored.
20180 Unfortunately, since this insn does not reflect very well the actual
20181 semantics of the operation, we need to annotate the insn for the benefit
20182 of DWARF2 frame unwind information. */
20183 static void
20184 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20186 int num_regs = 0;
20187 int i, j;
20188 rtx par;
20189 rtx dwarf = NULL_RTX;
20190 rtx tmp, reg;
20191 bool return_in_pc;
20192 int offset_adj;
20193 int emit_update;
20195 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20196 offset_adj = return_in_pc ? 1 : 0;
20197 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20198 if (saved_regs_mask & (1 << i))
20199 num_regs++;
20201 gcc_assert (num_regs && num_regs <= 16);
20203 /* If SP is in reglist, then we don't emit SP update insn. */
20204 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20206 /* The parallel needs to hold num_regs SETs
20207 and one SET for the stack update. */
20208 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20210 if (return_in_pc)
20212 tmp = ret_rtx;
20213 XVECEXP (par, 0, 0) = tmp;
20216 if (emit_update)
20218 /* Increment the stack pointer, based on there being
20219 num_regs 4-byte registers to restore. */
20220 tmp = gen_rtx_SET (VOIDmode,
20221 stack_pointer_rtx,
20222 plus_constant (Pmode,
20223 stack_pointer_rtx,
20224 4 * num_regs));
20225 RTX_FRAME_RELATED_P (tmp) = 1;
20226 XVECEXP (par, 0, offset_adj) = tmp;
20229 /* Now restore every reg, which may include PC. */
20230 for (j = 0, i = 0; j < num_regs; i++)
20231 if (saved_regs_mask & (1 << i))
20233 reg = gen_rtx_REG (SImode, i);
20234 if ((num_regs == 1) && emit_update && !return_in_pc)
20236 /* Emit single load with writeback. */
20237 tmp = gen_frame_mem (SImode,
20238 gen_rtx_POST_INC (Pmode,
20239 stack_pointer_rtx));
20240 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20241 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20242 return;
20245 tmp = gen_rtx_SET (VOIDmode,
20246 reg,
20247 gen_frame_mem
20248 (SImode,
20249 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20250 RTX_FRAME_RELATED_P (tmp) = 1;
20251 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20253 /* We need to maintain a sequence for DWARF info too. As dwarf info
20254 should not have PC, skip PC. */
20255 if (i != PC_REGNUM)
20256 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20258 j++;
20261 if (return_in_pc)
20262 par = emit_jump_insn (par);
20263 else
20264 par = emit_insn (par);
20266 REG_NOTES (par) = dwarf;
20267 if (!return_in_pc)
20268 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20269 stack_pointer_rtx, stack_pointer_rtx);
20272 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20273 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20275 Unfortunately, since this insn does not reflect very well the actual
20276 semantics of the operation, we need to annotate the insn for the benefit
20277 of DWARF2 frame unwind information. */
20278 static void
20279 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20281 int i, j;
20282 rtx par;
20283 rtx dwarf = NULL_RTX;
20284 rtx tmp, reg;
20286 gcc_assert (num_regs && num_regs <= 32);
20288 /* Workaround ARM10 VFPr1 bug. */
20289 if (num_regs == 2 && !arm_arch6)
20291 if (first_reg == 15)
20292 first_reg--;
20294 num_regs++;
20297 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20298 there could be up to 32 D-registers to restore.
20299 If there are more than 16 D-registers, make two recursive calls,
20300 each of which emits one pop_multi instruction. */
20301 if (num_regs > 16)
20303 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20304 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20305 return;
20308 /* The parallel needs to hold num_regs SETs
20309 and one SET for the stack update. */
20310 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20312 /* Increment the stack pointer, based on there being
20313 num_regs 8-byte registers to restore. */
20314 tmp = gen_rtx_SET (VOIDmode,
20315 base_reg,
20316 plus_constant (Pmode, base_reg, 8 * num_regs));
20317 RTX_FRAME_RELATED_P (tmp) = 1;
20318 XVECEXP (par, 0, 0) = tmp;
20320 /* Now show every reg that will be restored, using a SET for each. */
20321 for (j = 0, i=first_reg; j < num_regs; i += 2)
20323 reg = gen_rtx_REG (DFmode, i);
20325 tmp = gen_rtx_SET (VOIDmode,
20326 reg,
20327 gen_frame_mem
20328 (DFmode,
20329 plus_constant (Pmode, base_reg, 8 * j)));
20330 RTX_FRAME_RELATED_P (tmp) = 1;
20331 XVECEXP (par, 0, j + 1) = tmp;
20333 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20335 j++;
20338 par = emit_insn (par);
20339 REG_NOTES (par) = dwarf;
20341 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20342 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20344 RTX_FRAME_RELATED_P (par) = 1;
20345 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20347 else
20348 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20349 base_reg, base_reg);
20352 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20353 number of registers are being popped, multiple LDRD patterns are created for
20354 all register pairs. If odd number of registers are popped, last register is
20355 loaded by using LDR pattern. */
20356 static void
20357 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20359 int num_regs = 0;
20360 int i, j;
20361 rtx par = NULL_RTX;
20362 rtx dwarf = NULL_RTX;
20363 rtx tmp, reg, tmp1;
20364 bool return_in_pc;
20366 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20367 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20368 if (saved_regs_mask & (1 << i))
20369 num_regs++;
20371 gcc_assert (num_regs && num_regs <= 16);
20373 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20374 to be popped. So, if num_regs is even, now it will become odd,
20375 and we can generate pop with PC. If num_regs is odd, it will be
20376 even now, and ldr with return can be generated for PC. */
20377 if (return_in_pc)
20378 num_regs--;
20380 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20382 /* Var j iterates over all the registers to gather all the registers in
20383 saved_regs_mask. Var i gives index of saved registers in stack frame.
20384 A PARALLEL RTX of register-pair is created here, so that pattern for
20385 LDRD can be matched. As PC is always last register to be popped, and
20386 we have already decremented num_regs if PC, we don't have to worry
20387 about PC in this loop. */
20388 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20389 if (saved_regs_mask & (1 << j))
20391 /* Create RTX for memory load. */
20392 reg = gen_rtx_REG (SImode, j);
20393 tmp = gen_rtx_SET (SImode,
20394 reg,
20395 gen_frame_mem (SImode,
20396 plus_constant (Pmode,
20397 stack_pointer_rtx, 4 * i)));
20398 RTX_FRAME_RELATED_P (tmp) = 1;
20400 if (i % 2 == 0)
20402 /* When saved-register index (i) is even, the RTX to be emitted is
20403 yet to be created. Hence create it first. The LDRD pattern we
20404 are generating is :
20405 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20406 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20407 where target registers need not be consecutive. */
20408 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20409 dwarf = NULL_RTX;
20412 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20413 added as 0th element and if i is odd, reg_i is added as 1st element
20414 of LDRD pattern shown above. */
20415 XVECEXP (par, 0, (i % 2)) = tmp;
20416 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20418 if ((i % 2) == 1)
20420 /* When saved-register index (i) is odd, RTXs for both the registers
20421 to be loaded are generated in above given LDRD pattern, and the
20422 pattern can be emitted now. */
20423 par = emit_insn (par);
20424 REG_NOTES (par) = dwarf;
20425 RTX_FRAME_RELATED_P (par) = 1;
20428 i++;
20431 /* If the number of registers pushed is odd AND return_in_pc is false OR
20432 number of registers are even AND return_in_pc is true, last register is
20433 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20434 then LDR with post increment. */
20436 /* Increment the stack pointer, based on there being
20437 num_regs 4-byte registers to restore. */
20438 tmp = gen_rtx_SET (VOIDmode,
20439 stack_pointer_rtx,
20440 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20441 RTX_FRAME_RELATED_P (tmp) = 1;
20442 tmp = emit_insn (tmp);
20443 if (!return_in_pc)
20445 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20446 stack_pointer_rtx, stack_pointer_rtx);
20449 dwarf = NULL_RTX;
20451 if (((num_regs % 2) == 1 && !return_in_pc)
20452 || ((num_regs % 2) == 0 && return_in_pc))
20454 /* Scan for the single register to be popped. Skip until the saved
20455 register is found. */
20456 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20458 /* Gen LDR with post increment here. */
20459 tmp1 = gen_rtx_MEM (SImode,
20460 gen_rtx_POST_INC (SImode,
20461 stack_pointer_rtx));
20462 set_mem_alias_set (tmp1, get_frame_alias_set ());
20464 reg = gen_rtx_REG (SImode, j);
20465 tmp = gen_rtx_SET (SImode, reg, tmp1);
20466 RTX_FRAME_RELATED_P (tmp) = 1;
20467 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20469 if (return_in_pc)
20471 /* If return_in_pc, j must be PC_REGNUM. */
20472 gcc_assert (j == PC_REGNUM);
20473 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20474 XVECEXP (par, 0, 0) = ret_rtx;
20475 XVECEXP (par, 0, 1) = tmp;
20476 par = emit_jump_insn (par);
20478 else
20480 par = emit_insn (tmp);
20481 REG_NOTES (par) = dwarf;
20482 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20483 stack_pointer_rtx, stack_pointer_rtx);
20487 else if ((num_regs % 2) == 1 && return_in_pc)
20489 /* There are 2 registers to be popped. So, generate the pattern
20490 pop_multiple_with_stack_update_and_return to pop in PC. */
20491 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20494 return;
20497 /* LDRD in ARM mode needs consecutive registers as operands. This function
20498 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20499 offset addressing and then generates one separate stack udpate. This provides
20500 more scheduling freedom, compared to writeback on every load. However,
20501 if the function returns using load into PC directly
20502 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20503 before the last load. TODO: Add a peephole optimization to recognize
20504 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20505 peephole optimization to merge the load at stack-offset zero
20506 with the stack update instruction using load with writeback
20507 in post-index addressing mode. */
20508 static void
20509 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20511 int j = 0;
20512 int offset = 0;
20513 rtx par = NULL_RTX;
20514 rtx dwarf = NULL_RTX;
20515 rtx tmp, mem;
20517 /* Restore saved registers. */
20518 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20519 j = 0;
20520 while (j <= LAST_ARM_REGNUM)
20521 if (saved_regs_mask & (1 << j))
20523 if ((j % 2) == 0
20524 && (saved_regs_mask & (1 << (j + 1)))
20525 && (j + 1) != PC_REGNUM)
20527 /* Current register and next register form register pair for which
20528 LDRD can be generated. PC is always the last register popped, and
20529 we handle it separately. */
20530 if (offset > 0)
20531 mem = gen_frame_mem (DImode,
20532 plus_constant (Pmode,
20533 stack_pointer_rtx,
20534 offset));
20535 else
20536 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20538 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20539 tmp = emit_insn (tmp);
20540 RTX_FRAME_RELATED_P (tmp) = 1;
20542 /* Generate dwarf info. */
20544 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20545 gen_rtx_REG (SImode, j),
20546 NULL_RTX);
20547 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20548 gen_rtx_REG (SImode, j + 1),
20549 dwarf);
20551 REG_NOTES (tmp) = dwarf;
20553 offset += 8;
20554 j += 2;
20556 else if (j != PC_REGNUM)
20558 /* Emit a single word load. */
20559 if (offset > 0)
20560 mem = gen_frame_mem (SImode,
20561 plus_constant (Pmode,
20562 stack_pointer_rtx,
20563 offset));
20564 else
20565 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20567 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20568 tmp = emit_insn (tmp);
20569 RTX_FRAME_RELATED_P (tmp) = 1;
20571 /* Generate dwarf info. */
20572 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20573 gen_rtx_REG (SImode, j),
20574 NULL_RTX);
20576 offset += 4;
20577 j += 1;
20579 else /* j == PC_REGNUM */
20580 j++;
20582 else
20583 j++;
20585 /* Update the stack. */
20586 if (offset > 0)
20588 tmp = gen_rtx_SET (Pmode,
20589 stack_pointer_rtx,
20590 plus_constant (Pmode,
20591 stack_pointer_rtx,
20592 offset));
20593 tmp = emit_insn (tmp);
20594 arm_add_cfa_adjust_cfa_note (tmp, offset,
20595 stack_pointer_rtx, stack_pointer_rtx);
20596 offset = 0;
20599 if (saved_regs_mask & (1 << PC_REGNUM))
20601 /* Only PC is to be popped. */
20602 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20603 XVECEXP (par, 0, 0) = ret_rtx;
20604 tmp = gen_rtx_SET (SImode,
20605 gen_rtx_REG (SImode, PC_REGNUM),
20606 gen_frame_mem (SImode,
20607 gen_rtx_POST_INC (SImode,
20608 stack_pointer_rtx)));
20609 RTX_FRAME_RELATED_P (tmp) = 1;
20610 XVECEXP (par, 0, 1) = tmp;
20611 par = emit_jump_insn (par);
20613 /* Generate dwarf info. */
20614 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20615 gen_rtx_REG (SImode, PC_REGNUM),
20616 NULL_RTX);
20617 REG_NOTES (par) = dwarf;
20618 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20619 stack_pointer_rtx, stack_pointer_rtx);
20623 /* Calculate the size of the return value that is passed in registers. */
20624 static unsigned
20625 arm_size_return_regs (void)
20627 machine_mode mode;
20629 if (crtl->return_rtx != 0)
20630 mode = GET_MODE (crtl->return_rtx);
20631 else
20632 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20634 return GET_MODE_SIZE (mode);
20637 /* Return true if the current function needs to save/restore LR. */
20638 static bool
20639 thumb_force_lr_save (void)
20641 return !cfun->machine->lr_save_eliminated
20642 && (!leaf_function_p ()
20643 || thumb_far_jump_used_p ()
20644 || df_regs_ever_live_p (LR_REGNUM));
20647 /* We do not know if r3 will be available because
20648 we do have an indirect tailcall happening in this
20649 particular case. */
20650 static bool
20651 is_indirect_tailcall_p (rtx call)
20653 rtx pat = PATTERN (call);
20655 /* Indirect tail call. */
20656 pat = XVECEXP (pat, 0, 0);
20657 if (GET_CODE (pat) == SET)
20658 pat = SET_SRC (pat);
20660 pat = XEXP (XEXP (pat, 0), 0);
20661 return REG_P (pat);
20664 /* Return true if r3 is used by any of the tail call insns in the
20665 current function. */
20666 static bool
20667 any_sibcall_could_use_r3 (void)
20669 edge_iterator ei;
20670 edge e;
20672 if (!crtl->tail_call_emit)
20673 return false;
20674 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20675 if (e->flags & EDGE_SIBCALL)
20677 rtx call = BB_END (e->src);
20678 if (!CALL_P (call))
20679 call = prev_nonnote_nondebug_insn (call);
20680 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20681 if (find_regno_fusage (call, USE, 3)
20682 || is_indirect_tailcall_p (call))
20683 return true;
20685 return false;
20689 /* Compute the distance from register FROM to register TO.
20690 These can be the arg pointer (26), the soft frame pointer (25),
20691 the stack pointer (13) or the hard frame pointer (11).
20692 In thumb mode r7 is used as the soft frame pointer, if needed.
20693 Typical stack layout looks like this:
20695 old stack pointer -> | |
20696 ----
20697 | | \
20698 | | saved arguments for
20699 | | vararg functions
20700 | | /
20702 hard FP & arg pointer -> | | \
20703 | | stack
20704 | | frame
20705 | | /
20707 | | \
20708 | | call saved
20709 | | registers
20710 soft frame pointer -> | | /
20712 | | \
20713 | | local
20714 | | variables
20715 locals base pointer -> | | /
20717 | | \
20718 | | outgoing
20719 | | arguments
20720 current stack pointer -> | | /
20723 For a given function some or all of these stack components
20724 may not be needed, giving rise to the possibility of
20725 eliminating some of the registers.
20727 The values returned by this function must reflect the behavior
20728 of arm_expand_prologue() and arm_compute_save_reg_mask().
20730 The sign of the number returned reflects the direction of stack
20731 growth, so the values are positive for all eliminations except
20732 from the soft frame pointer to the hard frame pointer.
20734 SFP may point just inside the local variables block to ensure correct
20735 alignment. */
20738 /* Calculate stack offsets. These are used to calculate register elimination
20739 offsets and in prologue/epilogue code. Also calculates which registers
20740 should be saved. */
20742 static arm_stack_offsets *
20743 arm_get_frame_offsets (void)
20745 struct arm_stack_offsets *offsets;
20746 unsigned long func_type;
20747 int leaf;
20748 int saved;
20749 int core_saved;
20750 HOST_WIDE_INT frame_size;
20751 int i;
20753 offsets = &cfun->machine->stack_offsets;
20755 /* We need to know if we are a leaf function. Unfortunately, it
20756 is possible to be called after start_sequence has been called,
20757 which causes get_insns to return the insns for the sequence,
20758 not the function, which will cause leaf_function_p to return
20759 the incorrect result.
20761 to know about leaf functions once reload has completed, and the
20762 frame size cannot be changed after that time, so we can safely
20763 use the cached value. */
20765 if (reload_completed)
20766 return offsets;
20768 /* Initially this is the size of the local variables. It will translated
20769 into an offset once we have determined the size of preceding data. */
20770 frame_size = ROUND_UP_WORD (get_frame_size ());
20772 leaf = leaf_function_p ();
20774 /* Space for variadic functions. */
20775 offsets->saved_args = crtl->args.pretend_args_size;
20777 /* In Thumb mode this is incorrect, but never used. */
20778 offsets->frame
20779 = (offsets->saved_args
20780 + arm_compute_static_chain_stack_bytes ()
20781 + (frame_pointer_needed ? 4 : 0));
20783 if (TARGET_32BIT)
20785 unsigned int regno;
20787 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20788 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20789 saved = core_saved;
20791 /* We know that SP will be doubleword aligned on entry, and we must
20792 preserve that condition at any subroutine call. We also require the
20793 soft frame pointer to be doubleword aligned. */
20795 if (TARGET_REALLY_IWMMXT)
20797 /* Check for the call-saved iWMMXt registers. */
20798 for (regno = FIRST_IWMMXT_REGNUM;
20799 regno <= LAST_IWMMXT_REGNUM;
20800 regno++)
20801 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20802 saved += 8;
20805 func_type = arm_current_func_type ();
20806 /* Space for saved VFP registers. */
20807 if (! IS_VOLATILE (func_type)
20808 && TARGET_HARD_FLOAT && TARGET_VFP)
20809 saved += arm_get_vfp_saved_size ();
20811 else /* TARGET_THUMB1 */
20813 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20814 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20815 saved = core_saved;
20816 if (TARGET_BACKTRACE)
20817 saved += 16;
20820 /* Saved registers include the stack frame. */
20821 offsets->saved_regs
20822 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20823 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20825 /* A leaf function does not need any stack alignment if it has nothing
20826 on the stack. */
20827 if (leaf && frame_size == 0
20828 /* However if it calls alloca(), we have a dynamically allocated
20829 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20830 && ! cfun->calls_alloca)
20832 offsets->outgoing_args = offsets->soft_frame;
20833 offsets->locals_base = offsets->soft_frame;
20834 return offsets;
20837 /* Ensure SFP has the correct alignment. */
20838 if (ARM_DOUBLEWORD_ALIGN
20839 && (offsets->soft_frame & 7))
20841 offsets->soft_frame += 4;
20842 /* Try to align stack by pushing an extra reg. Don't bother doing this
20843 when there is a stack frame as the alignment will be rolled into
20844 the normal stack adjustment. */
20845 if (frame_size + crtl->outgoing_args_size == 0)
20847 int reg = -1;
20849 /* Register r3 is caller-saved. Normally it does not need to be
20850 saved on entry by the prologue. However if we choose to save
20851 it for padding then we may confuse the compiler into thinking
20852 a prologue sequence is required when in fact it is not. This
20853 will occur when shrink-wrapping if r3 is used as a scratch
20854 register and there are no other callee-saved writes.
20856 This situation can be avoided when other callee-saved registers
20857 are available and r3 is not mandatory if we choose a callee-saved
20858 register for padding. */
20859 bool prefer_callee_reg_p = false;
20861 /* If it is safe to use r3, then do so. This sometimes
20862 generates better code on Thumb-2 by avoiding the need to
20863 use 32-bit push/pop instructions. */
20864 if (! any_sibcall_could_use_r3 ()
20865 && arm_size_return_regs () <= 12
20866 && (offsets->saved_regs_mask & (1 << 3)) == 0
20867 && (TARGET_THUMB2
20868 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20870 reg = 3;
20871 if (!TARGET_THUMB2)
20872 prefer_callee_reg_p = true;
20874 if (reg == -1
20875 || prefer_callee_reg_p)
20877 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20879 /* Avoid fixed registers; they may be changed at
20880 arbitrary times so it's unsafe to restore them
20881 during the epilogue. */
20882 if (!fixed_regs[i]
20883 && (offsets->saved_regs_mask & (1 << i)) == 0)
20885 reg = i;
20886 break;
20891 if (reg != -1)
20893 offsets->saved_regs += 4;
20894 offsets->saved_regs_mask |= (1 << reg);
20899 offsets->locals_base = offsets->soft_frame + frame_size;
20900 offsets->outgoing_args = (offsets->locals_base
20901 + crtl->outgoing_args_size);
20903 if (ARM_DOUBLEWORD_ALIGN)
20905 /* Ensure SP remains doubleword aligned. */
20906 if (offsets->outgoing_args & 7)
20907 offsets->outgoing_args += 4;
20908 gcc_assert (!(offsets->outgoing_args & 7));
20911 return offsets;
20915 /* Calculate the relative offsets for the different stack pointers. Positive
20916 offsets are in the direction of stack growth. */
20918 HOST_WIDE_INT
20919 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20921 arm_stack_offsets *offsets;
20923 offsets = arm_get_frame_offsets ();
20925 /* OK, now we have enough information to compute the distances.
20926 There must be an entry in these switch tables for each pair
20927 of registers in ELIMINABLE_REGS, even if some of the entries
20928 seem to be redundant or useless. */
20929 switch (from)
20931 case ARG_POINTER_REGNUM:
20932 switch (to)
20934 case THUMB_HARD_FRAME_POINTER_REGNUM:
20935 return 0;
20937 case FRAME_POINTER_REGNUM:
20938 /* This is the reverse of the soft frame pointer
20939 to hard frame pointer elimination below. */
20940 return offsets->soft_frame - offsets->saved_args;
20942 case ARM_HARD_FRAME_POINTER_REGNUM:
20943 /* This is only non-zero in the case where the static chain register
20944 is stored above the frame. */
20945 return offsets->frame - offsets->saved_args - 4;
20947 case STACK_POINTER_REGNUM:
20948 /* If nothing has been pushed on the stack at all
20949 then this will return -4. This *is* correct! */
20950 return offsets->outgoing_args - (offsets->saved_args + 4);
20952 default:
20953 gcc_unreachable ();
20955 gcc_unreachable ();
20957 case FRAME_POINTER_REGNUM:
20958 switch (to)
20960 case THUMB_HARD_FRAME_POINTER_REGNUM:
20961 return 0;
20963 case ARM_HARD_FRAME_POINTER_REGNUM:
20964 /* The hard frame pointer points to the top entry in the
20965 stack frame. The soft frame pointer to the bottom entry
20966 in the stack frame. If there is no stack frame at all,
20967 then they are identical. */
20969 return offsets->frame - offsets->soft_frame;
20971 case STACK_POINTER_REGNUM:
20972 return offsets->outgoing_args - offsets->soft_frame;
20974 default:
20975 gcc_unreachable ();
20977 gcc_unreachable ();
20979 default:
20980 /* You cannot eliminate from the stack pointer.
20981 In theory you could eliminate from the hard frame
20982 pointer to the stack pointer, but this will never
20983 happen, since if a stack frame is not needed the
20984 hard frame pointer will never be used. */
20985 gcc_unreachable ();
20989 /* Given FROM and TO register numbers, say whether this elimination is
20990 allowed. Frame pointer elimination is automatically handled.
20992 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20993 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20994 pointer, we must eliminate FRAME_POINTER_REGNUM into
20995 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20996 ARG_POINTER_REGNUM. */
20998 bool
20999 arm_can_eliminate (const int from, const int to)
21001 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21002 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21003 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21004 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21005 true);
21008 /* Emit RTL to save coprocessor registers on function entry. Returns the
21009 number of bytes pushed. */
21011 static int
21012 arm_save_coproc_regs(void)
21014 int saved_size = 0;
21015 unsigned reg;
21016 unsigned start_reg;
21017 rtx insn;
21019 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21020 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21022 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21023 insn = gen_rtx_MEM (V2SImode, insn);
21024 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21025 RTX_FRAME_RELATED_P (insn) = 1;
21026 saved_size += 8;
21029 if (TARGET_HARD_FLOAT && TARGET_VFP)
21031 start_reg = FIRST_VFP_REGNUM;
21033 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21035 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21036 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21038 if (start_reg != reg)
21039 saved_size += vfp_emit_fstmd (start_reg,
21040 (reg - start_reg) / 2);
21041 start_reg = reg + 2;
21044 if (start_reg != reg)
21045 saved_size += vfp_emit_fstmd (start_reg,
21046 (reg - start_reg) / 2);
21048 return saved_size;
21052 /* Set the Thumb frame pointer from the stack pointer. */
21054 static void
21055 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21057 HOST_WIDE_INT amount;
21058 rtx insn, dwarf;
21060 amount = offsets->outgoing_args - offsets->locals_base;
21061 if (amount < 1024)
21062 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21063 stack_pointer_rtx, GEN_INT (amount)));
21064 else
21066 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21067 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21068 expects the first two operands to be the same. */
21069 if (TARGET_THUMB2)
21071 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21072 stack_pointer_rtx,
21073 hard_frame_pointer_rtx));
21075 else
21077 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21078 hard_frame_pointer_rtx,
21079 stack_pointer_rtx));
21081 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21082 plus_constant (Pmode, stack_pointer_rtx, amount));
21083 RTX_FRAME_RELATED_P (dwarf) = 1;
21084 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21087 RTX_FRAME_RELATED_P (insn) = 1;
21090 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21091 function. */
21092 void
21093 arm_expand_prologue (void)
21095 rtx amount;
21096 rtx insn;
21097 rtx ip_rtx;
21098 unsigned long live_regs_mask;
21099 unsigned long func_type;
21100 int fp_offset = 0;
21101 int saved_pretend_args = 0;
21102 int saved_regs = 0;
21103 unsigned HOST_WIDE_INT args_to_push;
21104 arm_stack_offsets *offsets;
21106 func_type = arm_current_func_type ();
21108 /* Naked functions don't have prologues. */
21109 if (IS_NAKED (func_type))
21110 return;
21112 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21113 args_to_push = crtl->args.pretend_args_size;
21115 /* Compute which register we will have to save onto the stack. */
21116 offsets = arm_get_frame_offsets ();
21117 live_regs_mask = offsets->saved_regs_mask;
21119 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21121 if (IS_STACKALIGN (func_type))
21123 rtx r0, r1;
21125 /* Handle a word-aligned stack pointer. We generate the following:
21127 mov r0, sp
21128 bic r1, r0, #7
21129 mov sp, r1
21130 <save and restore r0 in normal prologue/epilogue>
21131 mov sp, r0
21132 bx lr
21134 The unwinder doesn't need to know about the stack realignment.
21135 Just tell it we saved SP in r0. */
21136 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21138 r0 = gen_rtx_REG (SImode, 0);
21139 r1 = gen_rtx_REG (SImode, 1);
21141 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21142 RTX_FRAME_RELATED_P (insn) = 1;
21143 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21145 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21147 /* ??? The CFA changes here, which may cause GDB to conclude that it
21148 has entered a different function. That said, the unwind info is
21149 correct, individually, before and after this instruction because
21150 we've described the save of SP, which will override the default
21151 handling of SP as restoring from the CFA. */
21152 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21155 /* For APCS frames, if IP register is clobbered
21156 when creating frame, save that register in a special
21157 way. */
21158 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21160 if (IS_INTERRUPT (func_type))
21162 /* Interrupt functions must not corrupt any registers.
21163 Creating a frame pointer however, corrupts the IP
21164 register, so we must push it first. */
21165 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21167 /* Do not set RTX_FRAME_RELATED_P on this insn.
21168 The dwarf stack unwinding code only wants to see one
21169 stack decrement per function, and this is not it. If
21170 this instruction is labeled as being part of the frame
21171 creation sequence then dwarf2out_frame_debug_expr will
21172 die when it encounters the assignment of IP to FP
21173 later on, since the use of SP here establishes SP as
21174 the CFA register and not IP.
21176 Anyway this instruction is not really part of the stack
21177 frame creation although it is part of the prologue. */
21179 else if (IS_NESTED (func_type))
21181 /* The static chain register is the same as the IP register
21182 used as a scratch register during stack frame creation.
21183 To get around this need to find somewhere to store IP
21184 whilst the frame is being created. We try the following
21185 places in order:
21187 1. The last argument register r3 if it is available.
21188 2. A slot on the stack above the frame if there are no
21189 arguments to push onto the stack.
21190 3. Register r3 again, after pushing the argument registers
21191 onto the stack, if this is a varargs function.
21192 4. The last slot on the stack created for the arguments to
21193 push, if this isn't a varargs function.
21195 Note - we only need to tell the dwarf2 backend about the SP
21196 adjustment in the second variant; the static chain register
21197 doesn't need to be unwound, as it doesn't contain a value
21198 inherited from the caller. */
21200 if (!arm_r3_live_at_start_p ())
21201 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21202 else if (args_to_push == 0)
21204 rtx addr, dwarf;
21206 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21207 saved_regs += 4;
21209 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21210 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21211 fp_offset = 4;
21213 /* Just tell the dwarf backend that we adjusted SP. */
21214 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21215 plus_constant (Pmode, stack_pointer_rtx,
21216 -fp_offset));
21217 RTX_FRAME_RELATED_P (insn) = 1;
21218 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21220 else
21222 /* Store the args on the stack. */
21223 if (cfun->machine->uses_anonymous_args)
21225 insn
21226 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21227 (0xf0 >> (args_to_push / 4)) & 0xf);
21228 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21229 saved_pretend_args = 1;
21231 else
21233 rtx addr, dwarf;
21235 if (args_to_push == 4)
21236 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21237 else
21238 addr
21239 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21240 plus_constant (Pmode,
21241 stack_pointer_rtx,
21242 -args_to_push));
21244 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21246 /* Just tell the dwarf backend that we adjusted SP. */
21247 dwarf
21248 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21249 plus_constant (Pmode, stack_pointer_rtx,
21250 -args_to_push));
21251 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21254 RTX_FRAME_RELATED_P (insn) = 1;
21255 fp_offset = args_to_push;
21256 args_to_push = 0;
21260 insn = emit_set_insn (ip_rtx,
21261 plus_constant (Pmode, stack_pointer_rtx,
21262 fp_offset));
21263 RTX_FRAME_RELATED_P (insn) = 1;
21266 if (args_to_push)
21268 /* Push the argument registers, or reserve space for them. */
21269 if (cfun->machine->uses_anonymous_args)
21270 insn = emit_multi_reg_push
21271 ((0xf0 >> (args_to_push / 4)) & 0xf,
21272 (0xf0 >> (args_to_push / 4)) & 0xf);
21273 else
21274 insn = emit_insn
21275 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21276 GEN_INT (- args_to_push)));
21277 RTX_FRAME_RELATED_P (insn) = 1;
21280 /* If this is an interrupt service routine, and the link register
21281 is going to be pushed, and we're not generating extra
21282 push of IP (needed when frame is needed and frame layout if apcs),
21283 subtracting four from LR now will mean that the function return
21284 can be done with a single instruction. */
21285 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21286 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21287 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21288 && TARGET_ARM)
21290 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21292 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21295 if (live_regs_mask)
21297 unsigned long dwarf_regs_mask = live_regs_mask;
21299 saved_regs += bit_count (live_regs_mask) * 4;
21300 if (optimize_size && !frame_pointer_needed
21301 && saved_regs == offsets->saved_regs - offsets->saved_args)
21303 /* If no coprocessor registers are being pushed and we don't have
21304 to worry about a frame pointer then push extra registers to
21305 create the stack frame. This is done is a way that does not
21306 alter the frame layout, so is independent of the epilogue. */
21307 int n;
21308 int frame;
21309 n = 0;
21310 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21311 n++;
21312 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21313 if (frame && n * 4 >= frame)
21315 n = frame / 4;
21316 live_regs_mask |= (1 << n) - 1;
21317 saved_regs += frame;
21321 if (TARGET_LDRD
21322 && current_tune->prefer_ldrd_strd
21323 && !optimize_function_for_size_p (cfun))
21325 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21326 if (TARGET_THUMB2)
21327 thumb2_emit_strd_push (live_regs_mask);
21328 else if (TARGET_ARM
21329 && !TARGET_APCS_FRAME
21330 && !IS_INTERRUPT (func_type))
21331 arm_emit_strd_push (live_regs_mask);
21332 else
21334 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21335 RTX_FRAME_RELATED_P (insn) = 1;
21338 else
21340 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21341 RTX_FRAME_RELATED_P (insn) = 1;
21345 if (! IS_VOLATILE (func_type))
21346 saved_regs += arm_save_coproc_regs ();
21348 if (frame_pointer_needed && TARGET_ARM)
21350 /* Create the new frame pointer. */
21351 if (TARGET_APCS_FRAME)
21353 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21354 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21355 RTX_FRAME_RELATED_P (insn) = 1;
21357 if (IS_NESTED (func_type))
21359 /* Recover the static chain register. */
21360 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21361 insn = gen_rtx_REG (SImode, 3);
21362 else
21364 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21365 insn = gen_frame_mem (SImode, insn);
21367 emit_set_insn (ip_rtx, insn);
21368 /* Add a USE to stop propagate_one_insn() from barfing. */
21369 emit_insn (gen_force_register_use (ip_rtx));
21372 else
21374 insn = GEN_INT (saved_regs - 4);
21375 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21376 stack_pointer_rtx, insn));
21377 RTX_FRAME_RELATED_P (insn) = 1;
21381 if (flag_stack_usage_info)
21382 current_function_static_stack_size
21383 = offsets->outgoing_args - offsets->saved_args;
21385 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21387 /* This add can produce multiple insns for a large constant, so we
21388 need to get tricky. */
21389 rtx_insn *last = get_last_insn ();
21391 amount = GEN_INT (offsets->saved_args + saved_regs
21392 - offsets->outgoing_args);
21394 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21395 amount));
21398 last = last ? NEXT_INSN (last) : get_insns ();
21399 RTX_FRAME_RELATED_P (last) = 1;
21401 while (last != insn);
21403 /* If the frame pointer is needed, emit a special barrier that
21404 will prevent the scheduler from moving stores to the frame
21405 before the stack adjustment. */
21406 if (frame_pointer_needed)
21407 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21408 hard_frame_pointer_rtx));
21412 if (frame_pointer_needed && TARGET_THUMB2)
21413 thumb_set_frame_pointer (offsets);
21415 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21417 unsigned long mask;
21419 mask = live_regs_mask;
21420 mask &= THUMB2_WORK_REGS;
21421 if (!IS_NESTED (func_type))
21422 mask |= (1 << IP_REGNUM);
21423 arm_load_pic_register (mask);
21426 /* If we are profiling, make sure no instructions are scheduled before
21427 the call to mcount. Similarly if the user has requested no
21428 scheduling in the prolog. Similarly if we want non-call exceptions
21429 using the EABI unwinder, to prevent faulting instructions from being
21430 swapped with a stack adjustment. */
21431 if (crtl->profile || !TARGET_SCHED_PROLOG
21432 || (arm_except_unwind_info (&global_options) == UI_TARGET
21433 && cfun->can_throw_non_call_exceptions))
21434 emit_insn (gen_blockage ());
21436 /* If the link register is being kept alive, with the return address in it,
21437 then make sure that it does not get reused by the ce2 pass. */
21438 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21439 cfun->machine->lr_save_eliminated = 1;
21442 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21443 static void
21444 arm_print_condition (FILE *stream)
21446 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21448 /* Branch conversion is not implemented for Thumb-2. */
21449 if (TARGET_THUMB)
21451 output_operand_lossage ("predicated Thumb instruction");
21452 return;
21454 if (current_insn_predicate != NULL)
21456 output_operand_lossage
21457 ("predicated instruction in conditional sequence");
21458 return;
21461 fputs (arm_condition_codes[arm_current_cc], stream);
21463 else if (current_insn_predicate)
21465 enum arm_cond_code code;
21467 if (TARGET_THUMB1)
21469 output_operand_lossage ("predicated Thumb instruction");
21470 return;
21473 code = get_arm_condition_code (current_insn_predicate);
21474 fputs (arm_condition_codes[code], stream);
21479 /* Globally reserved letters: acln
21480 Puncutation letters currently used: @_|?().!#
21481 Lower case letters currently used: bcdefhimpqtvwxyz
21482 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21483 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21485 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21487 If CODE is 'd', then the X is a condition operand and the instruction
21488 should only be executed if the condition is true.
21489 if CODE is 'D', then the X is a condition operand and the instruction
21490 should only be executed if the condition is false: however, if the mode
21491 of the comparison is CCFPEmode, then always execute the instruction -- we
21492 do this because in these circumstances !GE does not necessarily imply LT;
21493 in these cases the instruction pattern will take care to make sure that
21494 an instruction containing %d will follow, thereby undoing the effects of
21495 doing this instruction unconditionally.
21496 If CODE is 'N' then X is a floating point operand that must be negated
21497 before output.
21498 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21499 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21500 static void
21501 arm_print_operand (FILE *stream, rtx x, int code)
21503 switch (code)
21505 case '@':
21506 fputs (ASM_COMMENT_START, stream);
21507 return;
21509 case '_':
21510 fputs (user_label_prefix, stream);
21511 return;
21513 case '|':
21514 fputs (REGISTER_PREFIX, stream);
21515 return;
21517 case '?':
21518 arm_print_condition (stream);
21519 return;
21521 case '(':
21522 /* Nothing in unified syntax, otherwise the current condition code. */
21523 if (!TARGET_UNIFIED_ASM)
21524 arm_print_condition (stream);
21525 break;
21527 case ')':
21528 /* The current condition code in unified syntax, otherwise nothing. */
21529 if (TARGET_UNIFIED_ASM)
21530 arm_print_condition (stream);
21531 break;
21533 case '.':
21534 /* The current condition code for a condition code setting instruction.
21535 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21536 if (TARGET_UNIFIED_ASM)
21538 fputc('s', stream);
21539 arm_print_condition (stream);
21541 else
21543 arm_print_condition (stream);
21544 fputc('s', stream);
21546 return;
21548 case '!':
21549 /* If the instruction is conditionally executed then print
21550 the current condition code, otherwise print 's'. */
21551 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21552 if (current_insn_predicate)
21553 arm_print_condition (stream);
21554 else
21555 fputc('s', stream);
21556 break;
21558 /* %# is a "break" sequence. It doesn't output anything, but is used to
21559 separate e.g. operand numbers from following text, if that text consists
21560 of further digits which we don't want to be part of the operand
21561 number. */
21562 case '#':
21563 return;
21565 case 'N':
21567 REAL_VALUE_TYPE r;
21568 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21569 r = real_value_negate (&r);
21570 fprintf (stream, "%s", fp_const_from_val (&r));
21572 return;
21574 /* An integer or symbol address without a preceding # sign. */
21575 case 'c':
21576 switch (GET_CODE (x))
21578 case CONST_INT:
21579 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21580 break;
21582 case SYMBOL_REF:
21583 output_addr_const (stream, x);
21584 break;
21586 case CONST:
21587 if (GET_CODE (XEXP (x, 0)) == PLUS
21588 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21590 output_addr_const (stream, x);
21591 break;
21593 /* Fall through. */
21595 default:
21596 output_operand_lossage ("Unsupported operand for code '%c'", code);
21598 return;
21600 /* An integer that we want to print in HEX. */
21601 case 'x':
21602 switch (GET_CODE (x))
21604 case CONST_INT:
21605 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21606 break;
21608 default:
21609 output_operand_lossage ("Unsupported operand for code '%c'", code);
21611 return;
21613 case 'B':
21614 if (CONST_INT_P (x))
21616 HOST_WIDE_INT val;
21617 val = ARM_SIGN_EXTEND (~INTVAL (x));
21618 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21620 else
21622 putc ('~', stream);
21623 output_addr_const (stream, x);
21625 return;
21627 case 'b':
21628 /* Print the log2 of a CONST_INT. */
21630 HOST_WIDE_INT val;
21632 if (!CONST_INT_P (x)
21633 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21634 output_operand_lossage ("Unsupported operand for code '%c'", code);
21635 else
21636 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21638 return;
21640 case 'L':
21641 /* The low 16 bits of an immediate constant. */
21642 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21643 return;
21645 case 'i':
21646 fprintf (stream, "%s", arithmetic_instr (x, 1));
21647 return;
21649 case 'I':
21650 fprintf (stream, "%s", arithmetic_instr (x, 0));
21651 return;
21653 case 'S':
21655 HOST_WIDE_INT val;
21656 const char *shift;
21658 shift = shift_op (x, &val);
21660 if (shift)
21662 fprintf (stream, ", %s ", shift);
21663 if (val == -1)
21664 arm_print_operand (stream, XEXP (x, 1), 0);
21665 else
21666 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21669 return;
21671 /* An explanation of the 'Q', 'R' and 'H' register operands:
21673 In a pair of registers containing a DI or DF value the 'Q'
21674 operand returns the register number of the register containing
21675 the least significant part of the value. The 'R' operand returns
21676 the register number of the register containing the most
21677 significant part of the value.
21679 The 'H' operand returns the higher of the two register numbers.
21680 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21681 same as the 'Q' operand, since the most significant part of the
21682 value is held in the lower number register. The reverse is true
21683 on systems where WORDS_BIG_ENDIAN is false.
21685 The purpose of these operands is to distinguish between cases
21686 where the endian-ness of the values is important (for example
21687 when they are added together), and cases where the endian-ness
21688 is irrelevant, but the order of register operations is important.
21689 For example when loading a value from memory into a register
21690 pair, the endian-ness does not matter. Provided that the value
21691 from the lower memory address is put into the lower numbered
21692 register, and the value from the higher address is put into the
21693 higher numbered register, the load will work regardless of whether
21694 the value being loaded is big-wordian or little-wordian. The
21695 order of the two register loads can matter however, if the address
21696 of the memory location is actually held in one of the registers
21697 being overwritten by the load.
21699 The 'Q' and 'R' constraints are also available for 64-bit
21700 constants. */
21701 case 'Q':
21702 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21704 rtx part = gen_lowpart (SImode, x);
21705 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21706 return;
21709 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21711 output_operand_lossage ("invalid operand for code '%c'", code);
21712 return;
21715 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21716 return;
21718 case 'R':
21719 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21721 machine_mode mode = GET_MODE (x);
21722 rtx part;
21724 if (mode == VOIDmode)
21725 mode = DImode;
21726 part = gen_highpart_mode (SImode, mode, x);
21727 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21728 return;
21731 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21733 output_operand_lossage ("invalid operand for code '%c'", code);
21734 return;
21737 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21738 return;
21740 case 'H':
21741 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21743 output_operand_lossage ("invalid operand for code '%c'", code);
21744 return;
21747 asm_fprintf (stream, "%r", REGNO (x) + 1);
21748 return;
21750 case 'J':
21751 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21753 output_operand_lossage ("invalid operand for code '%c'", code);
21754 return;
21757 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21758 return;
21760 case 'K':
21761 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21763 output_operand_lossage ("invalid operand for code '%c'", code);
21764 return;
21767 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21768 return;
21770 case 'm':
21771 asm_fprintf (stream, "%r",
21772 REG_P (XEXP (x, 0))
21773 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21774 return;
21776 case 'M':
21777 asm_fprintf (stream, "{%r-%r}",
21778 REGNO (x),
21779 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21780 return;
21782 /* Like 'M', but writing doubleword vector registers, for use by Neon
21783 insns. */
21784 case 'h':
21786 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21787 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21788 if (numregs == 1)
21789 asm_fprintf (stream, "{d%d}", regno);
21790 else
21791 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21793 return;
21795 case 'd':
21796 /* CONST_TRUE_RTX means always -- that's the default. */
21797 if (x == const_true_rtx)
21798 return;
21800 if (!COMPARISON_P (x))
21802 output_operand_lossage ("invalid operand for code '%c'", code);
21803 return;
21806 fputs (arm_condition_codes[get_arm_condition_code (x)],
21807 stream);
21808 return;
21810 case 'D':
21811 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21812 want to do that. */
21813 if (x == const_true_rtx)
21815 output_operand_lossage ("instruction never executed");
21816 return;
21818 if (!COMPARISON_P (x))
21820 output_operand_lossage ("invalid operand for code '%c'", code);
21821 return;
21824 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21825 (get_arm_condition_code (x))],
21826 stream);
21827 return;
21829 case 's':
21830 case 'V':
21831 case 'W':
21832 case 'X':
21833 case 'Y':
21834 case 'Z':
21835 /* Former Maverick support, removed after GCC-4.7. */
21836 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21837 return;
21839 case 'U':
21840 if (!REG_P (x)
21841 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21842 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21843 /* Bad value for wCG register number. */
21845 output_operand_lossage ("invalid operand for code '%c'", code);
21846 return;
21849 else
21850 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21851 return;
21853 /* Print an iWMMXt control register name. */
21854 case 'w':
21855 if (!CONST_INT_P (x)
21856 || INTVAL (x) < 0
21857 || INTVAL (x) >= 16)
21858 /* Bad value for wC register number. */
21860 output_operand_lossage ("invalid operand for code '%c'", code);
21861 return;
21864 else
21866 static const char * wc_reg_names [16] =
21868 "wCID", "wCon", "wCSSF", "wCASF",
21869 "wC4", "wC5", "wC6", "wC7",
21870 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21871 "wC12", "wC13", "wC14", "wC15"
21874 fputs (wc_reg_names [INTVAL (x)], stream);
21876 return;
21878 /* Print the high single-precision register of a VFP double-precision
21879 register. */
21880 case 'p':
21882 machine_mode mode = GET_MODE (x);
21883 int regno;
21885 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21887 output_operand_lossage ("invalid operand for code '%c'", code);
21888 return;
21891 regno = REGNO (x);
21892 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21894 output_operand_lossage ("invalid operand for code '%c'", code);
21895 return;
21898 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21900 return;
21902 /* Print a VFP/Neon double precision or quad precision register name. */
21903 case 'P':
21904 case 'q':
21906 machine_mode mode = GET_MODE (x);
21907 int is_quad = (code == 'q');
21908 int regno;
21910 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21912 output_operand_lossage ("invalid operand for code '%c'", code);
21913 return;
21916 if (!REG_P (x)
21917 || !IS_VFP_REGNUM (REGNO (x)))
21919 output_operand_lossage ("invalid operand for code '%c'", code);
21920 return;
21923 regno = REGNO (x);
21924 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21925 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21927 output_operand_lossage ("invalid operand for code '%c'", code);
21928 return;
21931 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21932 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21934 return;
21936 /* These two codes print the low/high doubleword register of a Neon quad
21937 register, respectively. For pair-structure types, can also print
21938 low/high quadword registers. */
21939 case 'e':
21940 case 'f':
21942 machine_mode mode = GET_MODE (x);
21943 int regno;
21945 if ((GET_MODE_SIZE (mode) != 16
21946 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21948 output_operand_lossage ("invalid operand for code '%c'", code);
21949 return;
21952 regno = REGNO (x);
21953 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21955 output_operand_lossage ("invalid operand for code '%c'", code);
21956 return;
21959 if (GET_MODE_SIZE (mode) == 16)
21960 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21961 + (code == 'f' ? 1 : 0));
21962 else
21963 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21964 + (code == 'f' ? 1 : 0));
21966 return;
21968 /* Print a VFPv3 floating-point constant, represented as an integer
21969 index. */
21970 case 'G':
21972 int index = vfp3_const_double_index (x);
21973 gcc_assert (index != -1);
21974 fprintf (stream, "%d", index);
21976 return;
21978 /* Print bits representing opcode features for Neon.
21980 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21981 and polynomials as unsigned.
21983 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21985 Bit 2 is 1 for rounding functions, 0 otherwise. */
21987 /* Identify the type as 's', 'u', 'p' or 'f'. */
21988 case 'T':
21990 HOST_WIDE_INT bits = INTVAL (x);
21991 fputc ("uspf"[bits & 3], stream);
21993 return;
21995 /* Likewise, but signed and unsigned integers are both 'i'. */
21996 case 'F':
21998 HOST_WIDE_INT bits = INTVAL (x);
21999 fputc ("iipf"[bits & 3], stream);
22001 return;
22003 /* As for 'T', but emit 'u' instead of 'p'. */
22004 case 't':
22006 HOST_WIDE_INT bits = INTVAL (x);
22007 fputc ("usuf"[bits & 3], stream);
22009 return;
22011 /* Bit 2: rounding (vs none). */
22012 case 'O':
22014 HOST_WIDE_INT bits = INTVAL (x);
22015 fputs ((bits & 4) != 0 ? "r" : "", stream);
22017 return;
22019 /* Memory operand for vld1/vst1 instruction. */
22020 case 'A':
22022 rtx addr;
22023 bool postinc = FALSE;
22024 rtx postinc_reg = NULL;
22025 unsigned align, memsize, align_bits;
22027 gcc_assert (MEM_P (x));
22028 addr = XEXP (x, 0);
22029 if (GET_CODE (addr) == POST_INC)
22031 postinc = 1;
22032 addr = XEXP (addr, 0);
22034 if (GET_CODE (addr) == POST_MODIFY)
22036 postinc_reg = XEXP( XEXP (addr, 1), 1);
22037 addr = XEXP (addr, 0);
22039 asm_fprintf (stream, "[%r", REGNO (addr));
22041 /* We know the alignment of this access, so we can emit a hint in the
22042 instruction (for some alignments) as an aid to the memory subsystem
22043 of the target. */
22044 align = MEM_ALIGN (x) >> 3;
22045 memsize = MEM_SIZE (x);
22047 /* Only certain alignment specifiers are supported by the hardware. */
22048 if (memsize == 32 && (align % 32) == 0)
22049 align_bits = 256;
22050 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22051 align_bits = 128;
22052 else if (memsize >= 8 && (align % 8) == 0)
22053 align_bits = 64;
22054 else
22055 align_bits = 0;
22057 if (align_bits != 0)
22058 asm_fprintf (stream, ":%d", align_bits);
22060 asm_fprintf (stream, "]");
22062 if (postinc)
22063 fputs("!", stream);
22064 if (postinc_reg)
22065 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22067 return;
22069 case 'C':
22071 rtx addr;
22073 gcc_assert (MEM_P (x));
22074 addr = XEXP (x, 0);
22075 gcc_assert (REG_P (addr));
22076 asm_fprintf (stream, "[%r]", REGNO (addr));
22078 return;
22080 /* Translate an S register number into a D register number and element index. */
22081 case 'y':
22083 machine_mode mode = GET_MODE (x);
22084 int regno;
22086 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22088 output_operand_lossage ("invalid operand for code '%c'", code);
22089 return;
22092 regno = REGNO (x);
22093 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22095 output_operand_lossage ("invalid operand for code '%c'", code);
22096 return;
22099 regno = regno - FIRST_VFP_REGNUM;
22100 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22102 return;
22104 case 'v':
22105 gcc_assert (CONST_DOUBLE_P (x));
22106 int result;
22107 result = vfp3_const_double_for_fract_bits (x);
22108 if (result == 0)
22109 result = vfp3_const_double_for_bits (x);
22110 fprintf (stream, "#%d", result);
22111 return;
22113 /* Register specifier for vld1.16/vst1.16. Translate the S register
22114 number into a D register number and element index. */
22115 case 'z':
22117 machine_mode mode = GET_MODE (x);
22118 int regno;
22120 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22122 output_operand_lossage ("invalid operand for code '%c'", code);
22123 return;
22126 regno = REGNO (x);
22127 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22129 output_operand_lossage ("invalid operand for code '%c'", code);
22130 return;
22133 regno = regno - FIRST_VFP_REGNUM;
22134 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22136 return;
22138 default:
22139 if (x == 0)
22141 output_operand_lossage ("missing operand");
22142 return;
22145 switch (GET_CODE (x))
22147 case REG:
22148 asm_fprintf (stream, "%r", REGNO (x));
22149 break;
22151 case MEM:
22152 output_memory_reference_mode = GET_MODE (x);
22153 output_address (XEXP (x, 0));
22154 break;
22156 case CONST_DOUBLE:
22158 char fpstr[20];
22159 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22160 sizeof (fpstr), 0, 1);
22161 fprintf (stream, "#%s", fpstr);
22163 break;
22165 default:
22166 gcc_assert (GET_CODE (x) != NEG);
22167 fputc ('#', stream);
22168 if (GET_CODE (x) == HIGH)
22170 fputs (":lower16:", stream);
22171 x = XEXP (x, 0);
22174 output_addr_const (stream, x);
22175 break;
22180 /* Target hook for printing a memory address. */
22181 static void
22182 arm_print_operand_address (FILE *stream, rtx x)
22184 if (TARGET_32BIT)
22186 int is_minus = GET_CODE (x) == MINUS;
22188 if (REG_P (x))
22189 asm_fprintf (stream, "[%r]", REGNO (x));
22190 else if (GET_CODE (x) == PLUS || is_minus)
22192 rtx base = XEXP (x, 0);
22193 rtx index = XEXP (x, 1);
22194 HOST_WIDE_INT offset = 0;
22195 if (!REG_P (base)
22196 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22198 /* Ensure that BASE is a register. */
22199 /* (one of them must be). */
22200 /* Also ensure the SP is not used as in index register. */
22201 rtx temp = base;
22202 base = index;
22203 index = temp;
22205 switch (GET_CODE (index))
22207 case CONST_INT:
22208 offset = INTVAL (index);
22209 if (is_minus)
22210 offset = -offset;
22211 asm_fprintf (stream, "[%r, #%wd]",
22212 REGNO (base), offset);
22213 break;
22215 case REG:
22216 asm_fprintf (stream, "[%r, %s%r]",
22217 REGNO (base), is_minus ? "-" : "",
22218 REGNO (index));
22219 break;
22221 case MULT:
22222 case ASHIFTRT:
22223 case LSHIFTRT:
22224 case ASHIFT:
22225 case ROTATERT:
22227 asm_fprintf (stream, "[%r, %s%r",
22228 REGNO (base), is_minus ? "-" : "",
22229 REGNO (XEXP (index, 0)));
22230 arm_print_operand (stream, index, 'S');
22231 fputs ("]", stream);
22232 break;
22235 default:
22236 gcc_unreachable ();
22239 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22240 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22242 extern machine_mode output_memory_reference_mode;
22244 gcc_assert (REG_P (XEXP (x, 0)));
22246 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22247 asm_fprintf (stream, "[%r, #%s%d]!",
22248 REGNO (XEXP (x, 0)),
22249 GET_CODE (x) == PRE_DEC ? "-" : "",
22250 GET_MODE_SIZE (output_memory_reference_mode));
22251 else
22252 asm_fprintf (stream, "[%r], #%s%d",
22253 REGNO (XEXP (x, 0)),
22254 GET_CODE (x) == POST_DEC ? "-" : "",
22255 GET_MODE_SIZE (output_memory_reference_mode));
22257 else if (GET_CODE (x) == PRE_MODIFY)
22259 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22260 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22261 asm_fprintf (stream, "#%wd]!",
22262 INTVAL (XEXP (XEXP (x, 1), 1)));
22263 else
22264 asm_fprintf (stream, "%r]!",
22265 REGNO (XEXP (XEXP (x, 1), 1)));
22267 else if (GET_CODE (x) == POST_MODIFY)
22269 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22270 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22271 asm_fprintf (stream, "#%wd",
22272 INTVAL (XEXP (XEXP (x, 1), 1)));
22273 else
22274 asm_fprintf (stream, "%r",
22275 REGNO (XEXP (XEXP (x, 1), 1)));
22277 else output_addr_const (stream, x);
22279 else
22281 if (REG_P (x))
22282 asm_fprintf (stream, "[%r]", REGNO (x));
22283 else if (GET_CODE (x) == POST_INC)
22284 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22285 else if (GET_CODE (x) == PLUS)
22287 gcc_assert (REG_P (XEXP (x, 0)));
22288 if (CONST_INT_P (XEXP (x, 1)))
22289 asm_fprintf (stream, "[%r, #%wd]",
22290 REGNO (XEXP (x, 0)),
22291 INTVAL (XEXP (x, 1)));
22292 else
22293 asm_fprintf (stream, "[%r, %r]",
22294 REGNO (XEXP (x, 0)),
22295 REGNO (XEXP (x, 1)));
22297 else
22298 output_addr_const (stream, x);
22302 /* Target hook for indicating whether a punctuation character for
22303 TARGET_PRINT_OPERAND is valid. */
22304 static bool
22305 arm_print_operand_punct_valid_p (unsigned char code)
22307 return (code == '@' || code == '|' || code == '.'
22308 || code == '(' || code == ')' || code == '#'
22309 || (TARGET_32BIT && (code == '?'))
22310 || (TARGET_THUMB2 && (code == '!'))
22311 || (TARGET_THUMB && (code == '_')));
22314 /* Target hook for assembling integer objects. The ARM version needs to
22315 handle word-sized values specially. */
22316 static bool
22317 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22319 machine_mode mode;
22321 if (size == UNITS_PER_WORD && aligned_p)
22323 fputs ("\t.word\t", asm_out_file);
22324 output_addr_const (asm_out_file, x);
22326 /* Mark symbols as position independent. We only do this in the
22327 .text segment, not in the .data segment. */
22328 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22329 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22331 /* See legitimize_pic_address for an explanation of the
22332 TARGET_VXWORKS_RTP check. */
22333 if (!arm_pic_data_is_text_relative
22334 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22335 fputs ("(GOT)", asm_out_file);
22336 else
22337 fputs ("(GOTOFF)", asm_out_file);
22339 fputc ('\n', asm_out_file);
22340 return true;
22343 mode = GET_MODE (x);
22345 if (arm_vector_mode_supported_p (mode))
22347 int i, units;
22349 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22351 units = CONST_VECTOR_NUNITS (x);
22352 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22354 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22355 for (i = 0; i < units; i++)
22357 rtx elt = CONST_VECTOR_ELT (x, i);
22358 assemble_integer
22359 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22361 else
22362 for (i = 0; i < units; i++)
22364 rtx elt = CONST_VECTOR_ELT (x, i);
22365 REAL_VALUE_TYPE rval;
22367 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22369 assemble_real
22370 (rval, GET_MODE_INNER (mode),
22371 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22374 return true;
22377 return default_assemble_integer (x, size, aligned_p);
22380 static void
22381 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22383 section *s;
22385 if (!TARGET_AAPCS_BASED)
22387 (is_ctor ?
22388 default_named_section_asm_out_constructor
22389 : default_named_section_asm_out_destructor) (symbol, priority);
22390 return;
22393 /* Put these in the .init_array section, using a special relocation. */
22394 if (priority != DEFAULT_INIT_PRIORITY)
22396 char buf[18];
22397 sprintf (buf, "%s.%.5u",
22398 is_ctor ? ".init_array" : ".fini_array",
22399 priority);
22400 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22402 else if (is_ctor)
22403 s = ctors_section;
22404 else
22405 s = dtors_section;
22407 switch_to_section (s);
22408 assemble_align (POINTER_SIZE);
22409 fputs ("\t.word\t", asm_out_file);
22410 output_addr_const (asm_out_file, symbol);
22411 fputs ("(target1)\n", asm_out_file);
22414 /* Add a function to the list of static constructors. */
22416 static void
22417 arm_elf_asm_constructor (rtx symbol, int priority)
22419 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22422 /* Add a function to the list of static destructors. */
22424 static void
22425 arm_elf_asm_destructor (rtx symbol, int priority)
22427 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22430 /* A finite state machine takes care of noticing whether or not instructions
22431 can be conditionally executed, and thus decrease execution time and code
22432 size by deleting branch instructions. The fsm is controlled by
22433 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22435 /* The state of the fsm controlling condition codes are:
22436 0: normal, do nothing special
22437 1: make ASM_OUTPUT_OPCODE not output this instruction
22438 2: make ASM_OUTPUT_OPCODE not output this instruction
22439 3: make instructions conditional
22440 4: make instructions conditional
22442 State transitions (state->state by whom under condition):
22443 0 -> 1 final_prescan_insn if the `target' is a label
22444 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22445 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22446 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22447 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22448 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22449 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22450 (the target insn is arm_target_insn).
22452 If the jump clobbers the conditions then we use states 2 and 4.
22454 A similar thing can be done with conditional return insns.
22456 XXX In case the `target' is an unconditional branch, this conditionalising
22457 of the instructions always reduces code size, but not always execution
22458 time. But then, I want to reduce the code size to somewhere near what
22459 /bin/cc produces. */
22461 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22462 instructions. When a COND_EXEC instruction is seen the subsequent
22463 instructions are scanned so that multiple conditional instructions can be
22464 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22465 specify the length and true/false mask for the IT block. These will be
22466 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22468 /* Returns the index of the ARM condition code string in
22469 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22470 COMPARISON should be an rtx like `(eq (...) (...))'. */
22472 enum arm_cond_code
22473 maybe_get_arm_condition_code (rtx comparison)
22475 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22476 enum arm_cond_code code;
22477 enum rtx_code comp_code = GET_CODE (comparison);
22479 if (GET_MODE_CLASS (mode) != MODE_CC)
22480 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22481 XEXP (comparison, 1));
22483 switch (mode)
22485 case CC_DNEmode: code = ARM_NE; goto dominance;
22486 case CC_DEQmode: code = ARM_EQ; goto dominance;
22487 case CC_DGEmode: code = ARM_GE; goto dominance;
22488 case CC_DGTmode: code = ARM_GT; goto dominance;
22489 case CC_DLEmode: code = ARM_LE; goto dominance;
22490 case CC_DLTmode: code = ARM_LT; goto dominance;
22491 case CC_DGEUmode: code = ARM_CS; goto dominance;
22492 case CC_DGTUmode: code = ARM_HI; goto dominance;
22493 case CC_DLEUmode: code = ARM_LS; goto dominance;
22494 case CC_DLTUmode: code = ARM_CC;
22496 dominance:
22497 if (comp_code == EQ)
22498 return ARM_INVERSE_CONDITION_CODE (code);
22499 if (comp_code == NE)
22500 return code;
22501 return ARM_NV;
22503 case CC_NOOVmode:
22504 switch (comp_code)
22506 case NE: return ARM_NE;
22507 case EQ: return ARM_EQ;
22508 case GE: return ARM_PL;
22509 case LT: return ARM_MI;
22510 default: return ARM_NV;
22513 case CC_Zmode:
22514 switch (comp_code)
22516 case NE: return ARM_NE;
22517 case EQ: return ARM_EQ;
22518 default: return ARM_NV;
22521 case CC_Nmode:
22522 switch (comp_code)
22524 case NE: return ARM_MI;
22525 case EQ: return ARM_PL;
22526 default: return ARM_NV;
22529 case CCFPEmode:
22530 case CCFPmode:
22531 /* We can handle all cases except UNEQ and LTGT. */
22532 switch (comp_code)
22534 case GE: return ARM_GE;
22535 case GT: return ARM_GT;
22536 case LE: return ARM_LS;
22537 case LT: return ARM_MI;
22538 case NE: return ARM_NE;
22539 case EQ: return ARM_EQ;
22540 case ORDERED: return ARM_VC;
22541 case UNORDERED: return ARM_VS;
22542 case UNLT: return ARM_LT;
22543 case UNLE: return ARM_LE;
22544 case UNGT: return ARM_HI;
22545 case UNGE: return ARM_PL;
22546 /* UNEQ and LTGT do not have a representation. */
22547 case UNEQ: /* Fall through. */
22548 case LTGT: /* Fall through. */
22549 default: return ARM_NV;
22552 case CC_SWPmode:
22553 switch (comp_code)
22555 case NE: return ARM_NE;
22556 case EQ: return ARM_EQ;
22557 case GE: return ARM_LE;
22558 case GT: return ARM_LT;
22559 case LE: return ARM_GE;
22560 case LT: return ARM_GT;
22561 case GEU: return ARM_LS;
22562 case GTU: return ARM_CC;
22563 case LEU: return ARM_CS;
22564 case LTU: return ARM_HI;
22565 default: return ARM_NV;
22568 case CC_Cmode:
22569 switch (comp_code)
22571 case LTU: return ARM_CS;
22572 case GEU: return ARM_CC;
22573 default: return ARM_NV;
22576 case CC_CZmode:
22577 switch (comp_code)
22579 case NE: return ARM_NE;
22580 case EQ: return ARM_EQ;
22581 case GEU: return ARM_CS;
22582 case GTU: return ARM_HI;
22583 case LEU: return ARM_LS;
22584 case LTU: return ARM_CC;
22585 default: return ARM_NV;
22588 case CC_NCVmode:
22589 switch (comp_code)
22591 case GE: return ARM_GE;
22592 case LT: return ARM_LT;
22593 case GEU: return ARM_CS;
22594 case LTU: return ARM_CC;
22595 default: return ARM_NV;
22598 case CCmode:
22599 switch (comp_code)
22601 case NE: return ARM_NE;
22602 case EQ: return ARM_EQ;
22603 case GE: return ARM_GE;
22604 case GT: return ARM_GT;
22605 case LE: return ARM_LE;
22606 case LT: return ARM_LT;
22607 case GEU: return ARM_CS;
22608 case GTU: return ARM_HI;
22609 case LEU: return ARM_LS;
22610 case LTU: return ARM_CC;
22611 default: return ARM_NV;
22614 default: gcc_unreachable ();
22618 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22619 static enum arm_cond_code
22620 get_arm_condition_code (rtx comparison)
22622 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22623 gcc_assert (code != ARM_NV);
22624 return code;
22627 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22628 instructions. */
22629 void
22630 thumb2_final_prescan_insn (rtx_insn *insn)
22632 rtx_insn *first_insn = insn;
22633 rtx body = PATTERN (insn);
22634 rtx predicate;
22635 enum arm_cond_code code;
22636 int n;
22637 int mask;
22638 int max;
22640 /* max_insns_skipped in the tune was already taken into account in the
22641 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22642 just emit the IT blocks as we can. It does not make sense to split
22643 the IT blocks. */
22644 max = MAX_INSN_PER_IT_BLOCK;
22646 /* Remove the previous insn from the count of insns to be output. */
22647 if (arm_condexec_count)
22648 arm_condexec_count--;
22650 /* Nothing to do if we are already inside a conditional block. */
22651 if (arm_condexec_count)
22652 return;
22654 if (GET_CODE (body) != COND_EXEC)
22655 return;
22657 /* Conditional jumps are implemented directly. */
22658 if (JUMP_P (insn))
22659 return;
22661 predicate = COND_EXEC_TEST (body);
22662 arm_current_cc = get_arm_condition_code (predicate);
22664 n = get_attr_ce_count (insn);
22665 arm_condexec_count = 1;
22666 arm_condexec_mask = (1 << n) - 1;
22667 arm_condexec_masklen = n;
22668 /* See if subsequent instructions can be combined into the same block. */
22669 for (;;)
22671 insn = next_nonnote_insn (insn);
22673 /* Jumping into the middle of an IT block is illegal, so a label or
22674 barrier terminates the block. */
22675 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22676 break;
22678 body = PATTERN (insn);
22679 /* USE and CLOBBER aren't really insns, so just skip them. */
22680 if (GET_CODE (body) == USE
22681 || GET_CODE (body) == CLOBBER)
22682 continue;
22684 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22685 if (GET_CODE (body) != COND_EXEC)
22686 break;
22687 /* Maximum number of conditionally executed instructions in a block. */
22688 n = get_attr_ce_count (insn);
22689 if (arm_condexec_masklen + n > max)
22690 break;
22692 predicate = COND_EXEC_TEST (body);
22693 code = get_arm_condition_code (predicate);
22694 mask = (1 << n) - 1;
22695 if (arm_current_cc == code)
22696 arm_condexec_mask |= (mask << arm_condexec_masklen);
22697 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22698 break;
22700 arm_condexec_count++;
22701 arm_condexec_masklen += n;
22703 /* A jump must be the last instruction in a conditional block. */
22704 if (JUMP_P (insn))
22705 break;
22707 /* Restore recog_data (getting the attributes of other insns can
22708 destroy this array, but final.c assumes that it remains intact
22709 across this call). */
22710 extract_constrain_insn_cached (first_insn);
22713 void
22714 arm_final_prescan_insn (rtx_insn *insn)
22716 /* BODY will hold the body of INSN. */
22717 rtx body = PATTERN (insn);
22719 /* This will be 1 if trying to repeat the trick, and things need to be
22720 reversed if it appears to fail. */
22721 int reverse = 0;
22723 /* If we start with a return insn, we only succeed if we find another one. */
22724 int seeking_return = 0;
22725 enum rtx_code return_code = UNKNOWN;
22727 /* START_INSN will hold the insn from where we start looking. This is the
22728 first insn after the following code_label if REVERSE is true. */
22729 rtx_insn *start_insn = insn;
22731 /* If in state 4, check if the target branch is reached, in order to
22732 change back to state 0. */
22733 if (arm_ccfsm_state == 4)
22735 if (insn == arm_target_insn)
22737 arm_target_insn = NULL;
22738 arm_ccfsm_state = 0;
22740 return;
22743 /* If in state 3, it is possible to repeat the trick, if this insn is an
22744 unconditional branch to a label, and immediately following this branch
22745 is the previous target label which is only used once, and the label this
22746 branch jumps to is not too far off. */
22747 if (arm_ccfsm_state == 3)
22749 if (simplejump_p (insn))
22751 start_insn = next_nonnote_insn (start_insn);
22752 if (BARRIER_P (start_insn))
22754 /* XXX Isn't this always a barrier? */
22755 start_insn = next_nonnote_insn (start_insn);
22757 if (LABEL_P (start_insn)
22758 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22759 && LABEL_NUSES (start_insn) == 1)
22760 reverse = TRUE;
22761 else
22762 return;
22764 else if (ANY_RETURN_P (body))
22766 start_insn = next_nonnote_insn (start_insn);
22767 if (BARRIER_P (start_insn))
22768 start_insn = next_nonnote_insn (start_insn);
22769 if (LABEL_P (start_insn)
22770 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22771 && LABEL_NUSES (start_insn) == 1)
22773 reverse = TRUE;
22774 seeking_return = 1;
22775 return_code = GET_CODE (body);
22777 else
22778 return;
22780 else
22781 return;
22784 gcc_assert (!arm_ccfsm_state || reverse);
22785 if (!JUMP_P (insn))
22786 return;
22788 /* This jump might be paralleled with a clobber of the condition codes
22789 the jump should always come first */
22790 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22791 body = XVECEXP (body, 0, 0);
22793 if (reverse
22794 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22795 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22797 int insns_skipped;
22798 int fail = FALSE, succeed = FALSE;
22799 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22800 int then_not_else = TRUE;
22801 rtx_insn *this_insn = start_insn;
22802 rtx label = 0;
22804 /* Register the insn jumped to. */
22805 if (reverse)
22807 if (!seeking_return)
22808 label = XEXP (SET_SRC (body), 0);
22810 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22811 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22812 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22814 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22815 then_not_else = FALSE;
22817 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22819 seeking_return = 1;
22820 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22822 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22824 seeking_return = 1;
22825 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22826 then_not_else = FALSE;
22828 else
22829 gcc_unreachable ();
22831 /* See how many insns this branch skips, and what kind of insns. If all
22832 insns are okay, and the label or unconditional branch to the same
22833 label is not too far away, succeed. */
22834 for (insns_skipped = 0;
22835 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22837 rtx scanbody;
22839 this_insn = next_nonnote_insn (this_insn);
22840 if (!this_insn)
22841 break;
22843 switch (GET_CODE (this_insn))
22845 case CODE_LABEL:
22846 /* Succeed if it is the target label, otherwise fail since
22847 control falls in from somewhere else. */
22848 if (this_insn == label)
22850 arm_ccfsm_state = 1;
22851 succeed = TRUE;
22853 else
22854 fail = TRUE;
22855 break;
22857 case BARRIER:
22858 /* Succeed if the following insn is the target label.
22859 Otherwise fail.
22860 If return insns are used then the last insn in a function
22861 will be a barrier. */
22862 this_insn = next_nonnote_insn (this_insn);
22863 if (this_insn && this_insn == label)
22865 arm_ccfsm_state = 1;
22866 succeed = TRUE;
22868 else
22869 fail = TRUE;
22870 break;
22872 case CALL_INSN:
22873 /* The AAPCS says that conditional calls should not be
22874 used since they make interworking inefficient (the
22875 linker can't transform BL<cond> into BLX). That's
22876 only a problem if the machine has BLX. */
22877 if (arm_arch5)
22879 fail = TRUE;
22880 break;
22883 /* Succeed if the following insn is the target label, or
22884 if the following two insns are a barrier and the
22885 target label. */
22886 this_insn = next_nonnote_insn (this_insn);
22887 if (this_insn && BARRIER_P (this_insn))
22888 this_insn = next_nonnote_insn (this_insn);
22890 if (this_insn && this_insn == label
22891 && insns_skipped < max_insns_skipped)
22893 arm_ccfsm_state = 1;
22894 succeed = TRUE;
22896 else
22897 fail = TRUE;
22898 break;
22900 case JUMP_INSN:
22901 /* If this is an unconditional branch to the same label, succeed.
22902 If it is to another label, do nothing. If it is conditional,
22903 fail. */
22904 /* XXX Probably, the tests for SET and the PC are
22905 unnecessary. */
22907 scanbody = PATTERN (this_insn);
22908 if (GET_CODE (scanbody) == SET
22909 && GET_CODE (SET_DEST (scanbody)) == PC)
22911 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22912 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22914 arm_ccfsm_state = 2;
22915 succeed = TRUE;
22917 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22918 fail = TRUE;
22920 /* Fail if a conditional return is undesirable (e.g. on a
22921 StrongARM), but still allow this if optimizing for size. */
22922 else if (GET_CODE (scanbody) == return_code
22923 && !use_return_insn (TRUE, NULL)
22924 && !optimize_size)
22925 fail = TRUE;
22926 else if (GET_CODE (scanbody) == return_code)
22928 arm_ccfsm_state = 2;
22929 succeed = TRUE;
22931 else if (GET_CODE (scanbody) == PARALLEL)
22933 switch (get_attr_conds (this_insn))
22935 case CONDS_NOCOND:
22936 break;
22937 default:
22938 fail = TRUE;
22939 break;
22942 else
22943 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22945 break;
22947 case INSN:
22948 /* Instructions using or affecting the condition codes make it
22949 fail. */
22950 scanbody = PATTERN (this_insn);
22951 if (!(GET_CODE (scanbody) == SET
22952 || GET_CODE (scanbody) == PARALLEL)
22953 || get_attr_conds (this_insn) != CONDS_NOCOND)
22954 fail = TRUE;
22955 break;
22957 default:
22958 break;
22961 if (succeed)
22963 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22964 arm_target_label = CODE_LABEL_NUMBER (label);
22965 else
22967 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22969 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22971 this_insn = next_nonnote_insn (this_insn);
22972 gcc_assert (!this_insn
22973 || (!BARRIER_P (this_insn)
22974 && !LABEL_P (this_insn)));
22976 if (!this_insn)
22978 /* Oh, dear! we ran off the end.. give up. */
22979 extract_constrain_insn_cached (insn);
22980 arm_ccfsm_state = 0;
22981 arm_target_insn = NULL;
22982 return;
22984 arm_target_insn = this_insn;
22987 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22988 what it was. */
22989 if (!reverse)
22990 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22992 if (reverse || then_not_else)
22993 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22996 /* Restore recog_data (getting the attributes of other insns can
22997 destroy this array, but final.c assumes that it remains intact
22998 across this call. */
22999 extract_constrain_insn_cached (insn);
23003 /* Output IT instructions. */
23004 void
23005 thumb2_asm_output_opcode (FILE * stream)
23007 char buff[5];
23008 int n;
23010 if (arm_condexec_mask)
23012 for (n = 0; n < arm_condexec_masklen; n++)
23013 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23014 buff[n] = 0;
23015 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23016 arm_condition_codes[arm_current_cc]);
23017 arm_condexec_mask = 0;
23021 /* Returns true if REGNO is a valid register
23022 for holding a quantity of type MODE. */
23024 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23026 if (GET_MODE_CLASS (mode) == MODE_CC)
23027 return (regno == CC_REGNUM
23028 || (TARGET_HARD_FLOAT && TARGET_VFP
23029 && regno == VFPCC_REGNUM));
23031 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23032 return false;
23034 if (TARGET_THUMB1)
23035 /* For the Thumb we only allow values bigger than SImode in
23036 registers 0 - 6, so that there is always a second low
23037 register available to hold the upper part of the value.
23038 We probably we ought to ensure that the register is the
23039 start of an even numbered register pair. */
23040 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23042 if (TARGET_HARD_FLOAT && TARGET_VFP
23043 && IS_VFP_REGNUM (regno))
23045 if (mode == SFmode || mode == SImode)
23046 return VFP_REGNO_OK_FOR_SINGLE (regno);
23048 if (mode == DFmode)
23049 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23051 /* VFP registers can hold HFmode values, but there is no point in
23052 putting them there unless we have hardware conversion insns. */
23053 if (mode == HFmode)
23054 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23056 if (TARGET_NEON)
23057 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23058 || (VALID_NEON_QREG_MODE (mode)
23059 && NEON_REGNO_OK_FOR_QUAD (regno))
23060 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23061 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23062 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23063 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23064 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23066 return FALSE;
23069 if (TARGET_REALLY_IWMMXT)
23071 if (IS_IWMMXT_GR_REGNUM (regno))
23072 return mode == SImode;
23074 if (IS_IWMMXT_REGNUM (regno))
23075 return VALID_IWMMXT_REG_MODE (mode);
23078 /* We allow almost any value to be stored in the general registers.
23079 Restrict doubleword quantities to even register pairs in ARM state
23080 so that we can use ldrd. Do not allow very large Neon structure
23081 opaque modes in general registers; they would use too many. */
23082 if (regno <= LAST_ARM_REGNUM)
23084 if (ARM_NUM_REGS (mode) > 4)
23085 return FALSE;
23087 if (TARGET_THUMB2)
23088 return TRUE;
23090 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23093 if (regno == FRAME_POINTER_REGNUM
23094 || regno == ARG_POINTER_REGNUM)
23095 /* We only allow integers in the fake hard registers. */
23096 return GET_MODE_CLASS (mode) == MODE_INT;
23098 return FALSE;
23101 /* Implement MODES_TIEABLE_P. */
23103 bool
23104 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23106 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23107 return true;
23109 /* We specifically want to allow elements of "structure" modes to
23110 be tieable to the structure. This more general condition allows
23111 other rarer situations too. */
23112 if (TARGET_NEON
23113 && (VALID_NEON_DREG_MODE (mode1)
23114 || VALID_NEON_QREG_MODE (mode1)
23115 || VALID_NEON_STRUCT_MODE (mode1))
23116 && (VALID_NEON_DREG_MODE (mode2)
23117 || VALID_NEON_QREG_MODE (mode2)
23118 || VALID_NEON_STRUCT_MODE (mode2)))
23119 return true;
23121 return false;
23124 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23125 not used in arm mode. */
23127 enum reg_class
23128 arm_regno_class (int regno)
23130 if (regno == PC_REGNUM)
23131 return NO_REGS;
23133 if (TARGET_THUMB1)
23135 if (regno == STACK_POINTER_REGNUM)
23136 return STACK_REG;
23137 if (regno == CC_REGNUM)
23138 return CC_REG;
23139 if (regno < 8)
23140 return LO_REGS;
23141 return HI_REGS;
23144 if (TARGET_THUMB2 && regno < 8)
23145 return LO_REGS;
23147 if ( regno <= LAST_ARM_REGNUM
23148 || regno == FRAME_POINTER_REGNUM
23149 || regno == ARG_POINTER_REGNUM)
23150 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23152 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23153 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23155 if (IS_VFP_REGNUM (regno))
23157 if (regno <= D7_VFP_REGNUM)
23158 return VFP_D0_D7_REGS;
23159 else if (regno <= LAST_LO_VFP_REGNUM)
23160 return VFP_LO_REGS;
23161 else
23162 return VFP_HI_REGS;
23165 if (IS_IWMMXT_REGNUM (regno))
23166 return IWMMXT_REGS;
23168 if (IS_IWMMXT_GR_REGNUM (regno))
23169 return IWMMXT_GR_REGS;
23171 return NO_REGS;
23174 /* Handle a special case when computing the offset
23175 of an argument from the frame pointer. */
23177 arm_debugger_arg_offset (int value, rtx addr)
23179 rtx_insn *insn;
23181 /* We are only interested if dbxout_parms() failed to compute the offset. */
23182 if (value != 0)
23183 return 0;
23185 /* We can only cope with the case where the address is held in a register. */
23186 if (!REG_P (addr))
23187 return 0;
23189 /* If we are using the frame pointer to point at the argument, then
23190 an offset of 0 is correct. */
23191 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23192 return 0;
23194 /* If we are using the stack pointer to point at the
23195 argument, then an offset of 0 is correct. */
23196 /* ??? Check this is consistent with thumb2 frame layout. */
23197 if ((TARGET_THUMB || !frame_pointer_needed)
23198 && REGNO (addr) == SP_REGNUM)
23199 return 0;
23201 /* Oh dear. The argument is pointed to by a register rather
23202 than being held in a register, or being stored at a known
23203 offset from the frame pointer. Since GDB only understands
23204 those two kinds of argument we must translate the address
23205 held in the register into an offset from the frame pointer.
23206 We do this by searching through the insns for the function
23207 looking to see where this register gets its value. If the
23208 register is initialized from the frame pointer plus an offset
23209 then we are in luck and we can continue, otherwise we give up.
23211 This code is exercised by producing debugging information
23212 for a function with arguments like this:
23214 double func (double a, double b, int c, double d) {return d;}
23216 Without this code the stab for parameter 'd' will be set to
23217 an offset of 0 from the frame pointer, rather than 8. */
23219 /* The if() statement says:
23221 If the insn is a normal instruction
23222 and if the insn is setting the value in a register
23223 and if the register being set is the register holding the address of the argument
23224 and if the address is computing by an addition
23225 that involves adding to a register
23226 which is the frame pointer
23227 a constant integer
23229 then... */
23231 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23233 if ( NONJUMP_INSN_P (insn)
23234 && GET_CODE (PATTERN (insn)) == SET
23235 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23236 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23237 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23238 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23239 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23242 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23244 break;
23248 if (value == 0)
23250 debug_rtx (addr);
23251 warning (0, "unable to compute real location of stacked parameter");
23252 value = 8; /* XXX magic hack */
23255 return value;
23258 typedef enum {
23259 T_V8QI,
23260 T_V4HI,
23261 T_V4HF,
23262 T_V2SI,
23263 T_V2SF,
23264 T_DI,
23265 T_V16QI,
23266 T_V8HI,
23267 T_V4SI,
23268 T_V4SF,
23269 T_V2DI,
23270 T_TI,
23271 T_EI,
23272 T_OI,
23273 T_MAX /* Size of enum. Keep last. */
23274 } neon_builtin_type_mode;
23276 #define TYPE_MODE_BIT(X) (1 << (X))
23278 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23279 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23280 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23281 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23282 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23283 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23285 #define v8qi_UP T_V8QI
23286 #define v4hi_UP T_V4HI
23287 #define v4hf_UP T_V4HF
23288 #define v2si_UP T_V2SI
23289 #define v2sf_UP T_V2SF
23290 #define di_UP T_DI
23291 #define v16qi_UP T_V16QI
23292 #define v8hi_UP T_V8HI
23293 #define v4si_UP T_V4SI
23294 #define v4sf_UP T_V4SF
23295 #define v2di_UP T_V2DI
23296 #define ti_UP T_TI
23297 #define ei_UP T_EI
23298 #define oi_UP T_OI
23300 #define UP(X) X##_UP
23302 typedef enum {
23303 NEON_BINOP,
23304 NEON_TERNOP,
23305 NEON_UNOP,
23306 NEON_BSWAP,
23307 NEON_GETLANE,
23308 NEON_SETLANE,
23309 NEON_CREATE,
23310 NEON_RINT,
23311 NEON_COPYSIGNF,
23312 NEON_DUP,
23313 NEON_DUPLANE,
23314 NEON_COMBINE,
23315 NEON_SPLIT,
23316 NEON_LANEMUL,
23317 NEON_LANEMULL,
23318 NEON_LANEMULH,
23319 NEON_LANEMAC,
23320 NEON_SCALARMUL,
23321 NEON_SCALARMULL,
23322 NEON_SCALARMULH,
23323 NEON_SCALARMAC,
23324 NEON_CONVERT,
23325 NEON_FLOAT_WIDEN,
23326 NEON_FLOAT_NARROW,
23327 NEON_FIXCONV,
23328 NEON_SELECT,
23329 NEON_REINTERP,
23330 NEON_VTBL,
23331 NEON_VTBX,
23332 NEON_LOAD1,
23333 NEON_LOAD1LANE,
23334 NEON_STORE1,
23335 NEON_STORE1LANE,
23336 NEON_LOADSTRUCT,
23337 NEON_LOADSTRUCTLANE,
23338 NEON_STORESTRUCT,
23339 NEON_STORESTRUCTLANE,
23340 NEON_LOGICBINOP,
23341 NEON_SHIFTINSERT,
23342 NEON_SHIFTIMM,
23343 NEON_SHIFTACC
23344 } neon_itype;
23346 typedef struct {
23347 const char *name;
23348 const neon_itype itype;
23349 const neon_builtin_type_mode mode;
23350 const enum insn_code code;
23351 unsigned int fcode;
23352 } neon_builtin_datum;
23354 #define CF(N,X) CODE_FOR_neon_##N##X
23356 #define VAR1(T, N, A) \
23357 {#N, NEON_##T, UP (A), CF (N, A), 0}
23358 #define VAR2(T, N, A, B) \
23359 VAR1 (T, N, A), \
23360 {#N, NEON_##T, UP (B), CF (N, B), 0}
23361 #define VAR3(T, N, A, B, C) \
23362 VAR2 (T, N, A, B), \
23363 {#N, NEON_##T, UP (C), CF (N, C), 0}
23364 #define VAR4(T, N, A, B, C, D) \
23365 VAR3 (T, N, A, B, C), \
23366 {#N, NEON_##T, UP (D), CF (N, D), 0}
23367 #define VAR5(T, N, A, B, C, D, E) \
23368 VAR4 (T, N, A, B, C, D), \
23369 {#N, NEON_##T, UP (E), CF (N, E), 0}
23370 #define VAR6(T, N, A, B, C, D, E, F) \
23371 VAR5 (T, N, A, B, C, D, E), \
23372 {#N, NEON_##T, UP (F), CF (N, F), 0}
23373 #define VAR7(T, N, A, B, C, D, E, F, G) \
23374 VAR6 (T, N, A, B, C, D, E, F), \
23375 {#N, NEON_##T, UP (G), CF (N, G), 0}
23376 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23377 VAR7 (T, N, A, B, C, D, E, F, G), \
23378 {#N, NEON_##T, UP (H), CF (N, H), 0}
23379 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23380 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23381 {#N, NEON_##T, UP (I), CF (N, I), 0}
23382 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23383 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23384 {#N, NEON_##T, UP (J), CF (N, J), 0}
23386 /* The NEON builtin data can be found in arm_neon_builtins.def.
23387 The mode entries in the following table correspond to the "key" type of the
23388 instruction variant, i.e. equivalent to that which would be specified after
23389 the assembler mnemonic, which usually refers to the last vector operand.
23390 (Signed/unsigned/polynomial types are not differentiated between though, and
23391 are all mapped onto the same mode for a given element size.) The modes
23392 listed per instruction should be the same as those defined for that
23393 instruction's pattern in neon.md. */
23395 static neon_builtin_datum neon_builtin_data[] =
23397 #include "arm_neon_builtins.def"
23400 #undef CF
23401 #undef VAR1
23402 #undef VAR2
23403 #undef VAR3
23404 #undef VAR4
23405 #undef VAR5
23406 #undef VAR6
23407 #undef VAR7
23408 #undef VAR8
23409 #undef VAR9
23410 #undef VAR10
23412 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23413 #define VAR1(T, N, A) \
23414 CF (N, A)
23415 #define VAR2(T, N, A, B) \
23416 VAR1 (T, N, A), \
23417 CF (N, B)
23418 #define VAR3(T, N, A, B, C) \
23419 VAR2 (T, N, A, B), \
23420 CF (N, C)
23421 #define VAR4(T, N, A, B, C, D) \
23422 VAR3 (T, N, A, B, C), \
23423 CF (N, D)
23424 #define VAR5(T, N, A, B, C, D, E) \
23425 VAR4 (T, N, A, B, C, D), \
23426 CF (N, E)
23427 #define VAR6(T, N, A, B, C, D, E, F) \
23428 VAR5 (T, N, A, B, C, D, E), \
23429 CF (N, F)
23430 #define VAR7(T, N, A, B, C, D, E, F, G) \
23431 VAR6 (T, N, A, B, C, D, E, F), \
23432 CF (N, G)
23433 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23434 VAR7 (T, N, A, B, C, D, E, F, G), \
23435 CF (N, H)
23436 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23437 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23438 CF (N, I)
23439 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23440 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23441 CF (N, J)
23442 enum arm_builtins
23444 ARM_BUILTIN_GETWCGR0,
23445 ARM_BUILTIN_GETWCGR1,
23446 ARM_BUILTIN_GETWCGR2,
23447 ARM_BUILTIN_GETWCGR3,
23449 ARM_BUILTIN_SETWCGR0,
23450 ARM_BUILTIN_SETWCGR1,
23451 ARM_BUILTIN_SETWCGR2,
23452 ARM_BUILTIN_SETWCGR3,
23454 ARM_BUILTIN_WZERO,
23456 ARM_BUILTIN_WAVG2BR,
23457 ARM_BUILTIN_WAVG2HR,
23458 ARM_BUILTIN_WAVG2B,
23459 ARM_BUILTIN_WAVG2H,
23461 ARM_BUILTIN_WACCB,
23462 ARM_BUILTIN_WACCH,
23463 ARM_BUILTIN_WACCW,
23465 ARM_BUILTIN_WMACS,
23466 ARM_BUILTIN_WMACSZ,
23467 ARM_BUILTIN_WMACU,
23468 ARM_BUILTIN_WMACUZ,
23470 ARM_BUILTIN_WSADB,
23471 ARM_BUILTIN_WSADBZ,
23472 ARM_BUILTIN_WSADH,
23473 ARM_BUILTIN_WSADHZ,
23475 ARM_BUILTIN_WALIGNI,
23476 ARM_BUILTIN_WALIGNR0,
23477 ARM_BUILTIN_WALIGNR1,
23478 ARM_BUILTIN_WALIGNR2,
23479 ARM_BUILTIN_WALIGNR3,
23481 ARM_BUILTIN_TMIA,
23482 ARM_BUILTIN_TMIAPH,
23483 ARM_BUILTIN_TMIABB,
23484 ARM_BUILTIN_TMIABT,
23485 ARM_BUILTIN_TMIATB,
23486 ARM_BUILTIN_TMIATT,
23488 ARM_BUILTIN_TMOVMSKB,
23489 ARM_BUILTIN_TMOVMSKH,
23490 ARM_BUILTIN_TMOVMSKW,
23492 ARM_BUILTIN_TBCSTB,
23493 ARM_BUILTIN_TBCSTH,
23494 ARM_BUILTIN_TBCSTW,
23496 ARM_BUILTIN_WMADDS,
23497 ARM_BUILTIN_WMADDU,
23499 ARM_BUILTIN_WPACKHSS,
23500 ARM_BUILTIN_WPACKWSS,
23501 ARM_BUILTIN_WPACKDSS,
23502 ARM_BUILTIN_WPACKHUS,
23503 ARM_BUILTIN_WPACKWUS,
23504 ARM_BUILTIN_WPACKDUS,
23506 ARM_BUILTIN_WADDB,
23507 ARM_BUILTIN_WADDH,
23508 ARM_BUILTIN_WADDW,
23509 ARM_BUILTIN_WADDSSB,
23510 ARM_BUILTIN_WADDSSH,
23511 ARM_BUILTIN_WADDSSW,
23512 ARM_BUILTIN_WADDUSB,
23513 ARM_BUILTIN_WADDUSH,
23514 ARM_BUILTIN_WADDUSW,
23515 ARM_BUILTIN_WSUBB,
23516 ARM_BUILTIN_WSUBH,
23517 ARM_BUILTIN_WSUBW,
23518 ARM_BUILTIN_WSUBSSB,
23519 ARM_BUILTIN_WSUBSSH,
23520 ARM_BUILTIN_WSUBSSW,
23521 ARM_BUILTIN_WSUBUSB,
23522 ARM_BUILTIN_WSUBUSH,
23523 ARM_BUILTIN_WSUBUSW,
23525 ARM_BUILTIN_WAND,
23526 ARM_BUILTIN_WANDN,
23527 ARM_BUILTIN_WOR,
23528 ARM_BUILTIN_WXOR,
23530 ARM_BUILTIN_WCMPEQB,
23531 ARM_BUILTIN_WCMPEQH,
23532 ARM_BUILTIN_WCMPEQW,
23533 ARM_BUILTIN_WCMPGTUB,
23534 ARM_BUILTIN_WCMPGTUH,
23535 ARM_BUILTIN_WCMPGTUW,
23536 ARM_BUILTIN_WCMPGTSB,
23537 ARM_BUILTIN_WCMPGTSH,
23538 ARM_BUILTIN_WCMPGTSW,
23540 ARM_BUILTIN_TEXTRMSB,
23541 ARM_BUILTIN_TEXTRMSH,
23542 ARM_BUILTIN_TEXTRMSW,
23543 ARM_BUILTIN_TEXTRMUB,
23544 ARM_BUILTIN_TEXTRMUH,
23545 ARM_BUILTIN_TEXTRMUW,
23546 ARM_BUILTIN_TINSRB,
23547 ARM_BUILTIN_TINSRH,
23548 ARM_BUILTIN_TINSRW,
23550 ARM_BUILTIN_WMAXSW,
23551 ARM_BUILTIN_WMAXSH,
23552 ARM_BUILTIN_WMAXSB,
23553 ARM_BUILTIN_WMAXUW,
23554 ARM_BUILTIN_WMAXUH,
23555 ARM_BUILTIN_WMAXUB,
23556 ARM_BUILTIN_WMINSW,
23557 ARM_BUILTIN_WMINSH,
23558 ARM_BUILTIN_WMINSB,
23559 ARM_BUILTIN_WMINUW,
23560 ARM_BUILTIN_WMINUH,
23561 ARM_BUILTIN_WMINUB,
23563 ARM_BUILTIN_WMULUM,
23564 ARM_BUILTIN_WMULSM,
23565 ARM_BUILTIN_WMULUL,
23567 ARM_BUILTIN_PSADBH,
23568 ARM_BUILTIN_WSHUFH,
23570 ARM_BUILTIN_WSLLH,
23571 ARM_BUILTIN_WSLLW,
23572 ARM_BUILTIN_WSLLD,
23573 ARM_BUILTIN_WSRAH,
23574 ARM_BUILTIN_WSRAW,
23575 ARM_BUILTIN_WSRAD,
23576 ARM_BUILTIN_WSRLH,
23577 ARM_BUILTIN_WSRLW,
23578 ARM_BUILTIN_WSRLD,
23579 ARM_BUILTIN_WRORH,
23580 ARM_BUILTIN_WRORW,
23581 ARM_BUILTIN_WRORD,
23582 ARM_BUILTIN_WSLLHI,
23583 ARM_BUILTIN_WSLLWI,
23584 ARM_BUILTIN_WSLLDI,
23585 ARM_BUILTIN_WSRAHI,
23586 ARM_BUILTIN_WSRAWI,
23587 ARM_BUILTIN_WSRADI,
23588 ARM_BUILTIN_WSRLHI,
23589 ARM_BUILTIN_WSRLWI,
23590 ARM_BUILTIN_WSRLDI,
23591 ARM_BUILTIN_WRORHI,
23592 ARM_BUILTIN_WRORWI,
23593 ARM_BUILTIN_WRORDI,
23595 ARM_BUILTIN_WUNPCKIHB,
23596 ARM_BUILTIN_WUNPCKIHH,
23597 ARM_BUILTIN_WUNPCKIHW,
23598 ARM_BUILTIN_WUNPCKILB,
23599 ARM_BUILTIN_WUNPCKILH,
23600 ARM_BUILTIN_WUNPCKILW,
23602 ARM_BUILTIN_WUNPCKEHSB,
23603 ARM_BUILTIN_WUNPCKEHSH,
23604 ARM_BUILTIN_WUNPCKEHSW,
23605 ARM_BUILTIN_WUNPCKEHUB,
23606 ARM_BUILTIN_WUNPCKEHUH,
23607 ARM_BUILTIN_WUNPCKEHUW,
23608 ARM_BUILTIN_WUNPCKELSB,
23609 ARM_BUILTIN_WUNPCKELSH,
23610 ARM_BUILTIN_WUNPCKELSW,
23611 ARM_BUILTIN_WUNPCKELUB,
23612 ARM_BUILTIN_WUNPCKELUH,
23613 ARM_BUILTIN_WUNPCKELUW,
23615 ARM_BUILTIN_WABSB,
23616 ARM_BUILTIN_WABSH,
23617 ARM_BUILTIN_WABSW,
23619 ARM_BUILTIN_WADDSUBHX,
23620 ARM_BUILTIN_WSUBADDHX,
23622 ARM_BUILTIN_WABSDIFFB,
23623 ARM_BUILTIN_WABSDIFFH,
23624 ARM_BUILTIN_WABSDIFFW,
23626 ARM_BUILTIN_WADDCH,
23627 ARM_BUILTIN_WADDCW,
23629 ARM_BUILTIN_WAVG4,
23630 ARM_BUILTIN_WAVG4R,
23632 ARM_BUILTIN_WMADDSX,
23633 ARM_BUILTIN_WMADDUX,
23635 ARM_BUILTIN_WMADDSN,
23636 ARM_BUILTIN_WMADDUN,
23638 ARM_BUILTIN_WMULWSM,
23639 ARM_BUILTIN_WMULWUM,
23641 ARM_BUILTIN_WMULWSMR,
23642 ARM_BUILTIN_WMULWUMR,
23644 ARM_BUILTIN_WMULWL,
23646 ARM_BUILTIN_WMULSMR,
23647 ARM_BUILTIN_WMULUMR,
23649 ARM_BUILTIN_WQMULM,
23650 ARM_BUILTIN_WQMULMR,
23652 ARM_BUILTIN_WQMULWM,
23653 ARM_BUILTIN_WQMULWMR,
23655 ARM_BUILTIN_WADDBHUSM,
23656 ARM_BUILTIN_WADDBHUSL,
23658 ARM_BUILTIN_WQMIABB,
23659 ARM_BUILTIN_WQMIABT,
23660 ARM_BUILTIN_WQMIATB,
23661 ARM_BUILTIN_WQMIATT,
23663 ARM_BUILTIN_WQMIABBN,
23664 ARM_BUILTIN_WQMIABTN,
23665 ARM_BUILTIN_WQMIATBN,
23666 ARM_BUILTIN_WQMIATTN,
23668 ARM_BUILTIN_WMIABB,
23669 ARM_BUILTIN_WMIABT,
23670 ARM_BUILTIN_WMIATB,
23671 ARM_BUILTIN_WMIATT,
23673 ARM_BUILTIN_WMIABBN,
23674 ARM_BUILTIN_WMIABTN,
23675 ARM_BUILTIN_WMIATBN,
23676 ARM_BUILTIN_WMIATTN,
23678 ARM_BUILTIN_WMIAWBB,
23679 ARM_BUILTIN_WMIAWBT,
23680 ARM_BUILTIN_WMIAWTB,
23681 ARM_BUILTIN_WMIAWTT,
23683 ARM_BUILTIN_WMIAWBBN,
23684 ARM_BUILTIN_WMIAWBTN,
23685 ARM_BUILTIN_WMIAWTBN,
23686 ARM_BUILTIN_WMIAWTTN,
23688 ARM_BUILTIN_WMERGE,
23690 ARM_BUILTIN_CRC32B,
23691 ARM_BUILTIN_CRC32H,
23692 ARM_BUILTIN_CRC32W,
23693 ARM_BUILTIN_CRC32CB,
23694 ARM_BUILTIN_CRC32CH,
23695 ARM_BUILTIN_CRC32CW,
23697 ARM_BUILTIN_GET_FPSCR,
23698 ARM_BUILTIN_SET_FPSCR,
23700 #undef CRYPTO1
23701 #undef CRYPTO2
23702 #undef CRYPTO3
23704 #define CRYPTO1(L, U, M1, M2) \
23705 ARM_BUILTIN_CRYPTO_##U,
23706 #define CRYPTO2(L, U, M1, M2, M3) \
23707 ARM_BUILTIN_CRYPTO_##U,
23708 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23709 ARM_BUILTIN_CRYPTO_##U,
23711 #include "crypto.def"
23713 #undef CRYPTO1
23714 #undef CRYPTO2
23715 #undef CRYPTO3
23717 #include "arm_neon_builtins.def"
23719 ,ARM_BUILTIN_MAX
23722 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23724 #undef CF
23725 #undef VAR1
23726 #undef VAR2
23727 #undef VAR3
23728 #undef VAR4
23729 #undef VAR5
23730 #undef VAR6
23731 #undef VAR7
23732 #undef VAR8
23733 #undef VAR9
23734 #undef VAR10
23736 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23738 #define NUM_DREG_TYPES 5
23739 #define NUM_QREG_TYPES 6
23741 static void
23742 arm_init_neon_builtins (void)
23744 unsigned int i, fcode;
23745 tree decl;
23747 tree neon_intQI_type_node;
23748 tree neon_intHI_type_node;
23749 tree neon_floatHF_type_node;
23750 tree neon_polyQI_type_node;
23751 tree neon_polyHI_type_node;
23752 tree neon_intSI_type_node;
23753 tree neon_intDI_type_node;
23754 tree neon_intUTI_type_node;
23755 tree neon_float_type_node;
23757 tree intQI_pointer_node;
23758 tree intHI_pointer_node;
23759 tree intSI_pointer_node;
23760 tree intDI_pointer_node;
23761 tree float_pointer_node;
23763 tree const_intQI_node;
23764 tree const_intHI_node;
23765 tree const_intSI_node;
23766 tree const_intDI_node;
23767 tree const_float_node;
23769 tree const_intQI_pointer_node;
23770 tree const_intHI_pointer_node;
23771 tree const_intSI_pointer_node;
23772 tree const_intDI_pointer_node;
23773 tree const_float_pointer_node;
23775 tree V8QI_type_node;
23776 tree V4HI_type_node;
23777 tree V4UHI_type_node;
23778 tree V4HF_type_node;
23779 tree V2SI_type_node;
23780 tree V2USI_type_node;
23781 tree V2SF_type_node;
23782 tree V16QI_type_node;
23783 tree V8HI_type_node;
23784 tree V8UHI_type_node;
23785 tree V4SI_type_node;
23786 tree V4USI_type_node;
23787 tree V4SF_type_node;
23788 tree V2DI_type_node;
23789 tree V2UDI_type_node;
23791 tree intUQI_type_node;
23792 tree intUHI_type_node;
23793 tree intUSI_type_node;
23794 tree intUDI_type_node;
23796 tree intEI_type_node;
23797 tree intOI_type_node;
23798 tree intCI_type_node;
23799 tree intXI_type_node;
23801 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23802 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23803 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23805 /* Create distinguished type nodes for NEON vector element types,
23806 and pointers to values of such types, so we can detect them later. */
23807 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23808 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23809 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23810 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23811 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23812 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23813 neon_float_type_node = make_node (REAL_TYPE);
23814 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23815 layout_type (neon_float_type_node);
23816 neon_floatHF_type_node = make_node (REAL_TYPE);
23817 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23818 layout_type (neon_floatHF_type_node);
23820 /* Define typedefs which exactly correspond to the modes we are basing vector
23821 types on. If you change these names you'll need to change
23822 the table used by arm_mangle_type too. */
23823 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23824 "__builtin_neon_qi");
23825 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23826 "__builtin_neon_hi");
23827 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23828 "__builtin_neon_hf");
23829 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23830 "__builtin_neon_si");
23831 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23832 "__builtin_neon_sf");
23833 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23834 "__builtin_neon_di");
23835 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23836 "__builtin_neon_poly8");
23837 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23838 "__builtin_neon_poly16");
23840 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23841 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23842 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23843 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23844 float_pointer_node = build_pointer_type (neon_float_type_node);
23846 /* Next create constant-qualified versions of the above types. */
23847 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23848 TYPE_QUAL_CONST);
23849 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23850 TYPE_QUAL_CONST);
23851 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23852 TYPE_QUAL_CONST);
23853 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23854 TYPE_QUAL_CONST);
23855 const_float_node = build_qualified_type (neon_float_type_node,
23856 TYPE_QUAL_CONST);
23858 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23859 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23860 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23861 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23862 const_float_pointer_node = build_pointer_type (const_float_node);
23864 /* Unsigned integer types for various mode sizes. */
23865 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23866 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23867 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23868 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23869 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23870 /* Now create vector types based on our NEON element types. */
23871 /* 64-bit vectors. */
23872 V8QI_type_node =
23873 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23874 V4HI_type_node =
23875 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23876 V4UHI_type_node =
23877 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23878 V4HF_type_node =
23879 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23880 V2SI_type_node =
23881 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23882 V2USI_type_node =
23883 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23884 V2SF_type_node =
23885 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23886 /* 128-bit vectors. */
23887 V16QI_type_node =
23888 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23889 V8HI_type_node =
23890 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23891 V8UHI_type_node =
23892 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23893 V4SI_type_node =
23894 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23895 V4USI_type_node =
23896 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23897 V4SF_type_node =
23898 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23899 V2DI_type_node =
23900 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23901 V2UDI_type_node =
23902 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23905 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23906 "__builtin_neon_uqi");
23907 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23908 "__builtin_neon_uhi");
23909 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23910 "__builtin_neon_usi");
23911 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23912 "__builtin_neon_udi");
23913 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23914 "__builtin_neon_poly64");
23915 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23916 "__builtin_neon_poly128");
23918 /* Opaque integer types for structures of vectors. */
23919 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23920 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23921 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23922 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23924 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23925 "__builtin_neon_ti");
23926 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23927 "__builtin_neon_ei");
23928 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23929 "__builtin_neon_oi");
23930 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23931 "__builtin_neon_ci");
23932 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23933 "__builtin_neon_xi");
23935 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23938 tree V16UQI_type_node =
23939 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23941 tree v16uqi_ftype_v16uqi
23942 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23944 tree v16uqi_ftype_v16uqi_v16uqi
23945 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23946 V16UQI_type_node, NULL_TREE);
23948 tree v4usi_ftype_v4usi
23949 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23951 tree v4usi_ftype_v4usi_v4usi
23952 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23953 V4USI_type_node, NULL_TREE);
23955 tree v4usi_ftype_v4usi_v4usi_v4usi
23956 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23957 V4USI_type_node, V4USI_type_node, NULL_TREE);
23959 tree uti_ftype_udi_udi
23960 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23961 intUDI_type_node, NULL_TREE);
23963 #undef CRYPTO1
23964 #undef CRYPTO2
23965 #undef CRYPTO3
23966 #undef C
23967 #undef N
23968 #undef CF
23969 #undef FT1
23970 #undef FT2
23971 #undef FT3
23973 #define C(U) \
23974 ARM_BUILTIN_CRYPTO_##U
23975 #define N(L) \
23976 "__builtin_arm_crypto_"#L
23977 #define FT1(R, A) \
23978 R##_ftype_##A
23979 #define FT2(R, A1, A2) \
23980 R##_ftype_##A1##_##A2
23981 #define FT3(R, A1, A2, A3) \
23982 R##_ftype_##A1##_##A2##_##A3
23983 #define CRYPTO1(L, U, R, A) \
23984 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23985 C (U), BUILT_IN_MD, \
23986 NULL, NULL_TREE);
23987 #define CRYPTO2(L, U, R, A1, A2) \
23988 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23989 C (U), BUILT_IN_MD, \
23990 NULL, NULL_TREE);
23992 #define CRYPTO3(L, U, R, A1, A2, A3) \
23993 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23994 C (U), BUILT_IN_MD, \
23995 NULL, NULL_TREE);
23996 #include "crypto.def"
23998 #undef CRYPTO1
23999 #undef CRYPTO2
24000 #undef CRYPTO3
24001 #undef C
24002 #undef N
24003 #undef FT1
24004 #undef FT2
24005 #undef FT3
24007 dreg_types[0] = V8QI_type_node;
24008 dreg_types[1] = V4HI_type_node;
24009 dreg_types[2] = V2SI_type_node;
24010 dreg_types[3] = V2SF_type_node;
24011 dreg_types[4] = neon_intDI_type_node;
24013 qreg_types[0] = V16QI_type_node;
24014 qreg_types[1] = V8HI_type_node;
24015 qreg_types[2] = V4SI_type_node;
24016 qreg_types[3] = V4SF_type_node;
24017 qreg_types[4] = V2DI_type_node;
24018 qreg_types[5] = neon_intUTI_type_node;
24020 for (i = 0; i < NUM_QREG_TYPES; i++)
24022 int j;
24023 for (j = 0; j < NUM_QREG_TYPES; j++)
24025 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24026 reinterp_ftype_dreg[i][j]
24027 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24029 reinterp_ftype_qreg[i][j]
24030 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24034 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24035 i < ARRAY_SIZE (neon_builtin_data);
24036 i++, fcode++)
24038 neon_builtin_datum *d = &neon_builtin_data[i];
24040 const char* const modenames[] = {
24041 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24042 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24043 "ti", "ei", "oi"
24045 char namebuf[60];
24046 tree ftype = NULL;
24047 int is_load = 0, is_store = 0;
24049 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24051 d->fcode = fcode;
24053 switch (d->itype)
24055 case NEON_LOAD1:
24056 case NEON_LOAD1LANE:
24057 case NEON_LOADSTRUCT:
24058 case NEON_LOADSTRUCTLANE:
24059 is_load = 1;
24060 /* Fall through. */
24061 case NEON_STORE1:
24062 case NEON_STORE1LANE:
24063 case NEON_STORESTRUCT:
24064 case NEON_STORESTRUCTLANE:
24065 if (!is_load)
24066 is_store = 1;
24067 /* Fall through. */
24068 case NEON_UNOP:
24069 case NEON_RINT:
24070 case NEON_BINOP:
24071 case NEON_LOGICBINOP:
24072 case NEON_SHIFTINSERT:
24073 case NEON_TERNOP:
24074 case NEON_GETLANE:
24075 case NEON_SETLANE:
24076 case NEON_CREATE:
24077 case NEON_DUP:
24078 case NEON_DUPLANE:
24079 case NEON_SHIFTIMM:
24080 case NEON_SHIFTACC:
24081 case NEON_COMBINE:
24082 case NEON_SPLIT:
24083 case NEON_CONVERT:
24084 case NEON_FIXCONV:
24085 case NEON_LANEMUL:
24086 case NEON_LANEMULL:
24087 case NEON_LANEMULH:
24088 case NEON_LANEMAC:
24089 case NEON_SCALARMUL:
24090 case NEON_SCALARMULL:
24091 case NEON_SCALARMULH:
24092 case NEON_SCALARMAC:
24093 case NEON_SELECT:
24094 case NEON_VTBL:
24095 case NEON_VTBX:
24097 int k;
24098 tree return_type = void_type_node, args = void_list_node;
24100 /* Build a function type directly from the insn_data for
24101 this builtin. The build_function_type() function takes
24102 care of removing duplicates for us. */
24103 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24105 tree eltype;
24107 if (is_load && k == 1)
24109 /* Neon load patterns always have the memory
24110 operand in the operand 1 position. */
24111 gcc_assert (insn_data[d->code].operand[k].predicate
24112 == neon_struct_operand);
24114 switch (d->mode)
24116 case T_V8QI:
24117 case T_V16QI:
24118 eltype = const_intQI_pointer_node;
24119 break;
24121 case T_V4HI:
24122 case T_V8HI:
24123 eltype = const_intHI_pointer_node;
24124 break;
24126 case T_V2SI:
24127 case T_V4SI:
24128 eltype = const_intSI_pointer_node;
24129 break;
24131 case T_V2SF:
24132 case T_V4SF:
24133 eltype = const_float_pointer_node;
24134 break;
24136 case T_DI:
24137 case T_V2DI:
24138 eltype = const_intDI_pointer_node;
24139 break;
24141 default: gcc_unreachable ();
24144 else if (is_store && k == 0)
24146 /* Similarly, Neon store patterns use operand 0 as
24147 the memory location to store to. */
24148 gcc_assert (insn_data[d->code].operand[k].predicate
24149 == neon_struct_operand);
24151 switch (d->mode)
24153 case T_V8QI:
24154 case T_V16QI:
24155 eltype = intQI_pointer_node;
24156 break;
24158 case T_V4HI:
24159 case T_V8HI:
24160 eltype = intHI_pointer_node;
24161 break;
24163 case T_V2SI:
24164 case T_V4SI:
24165 eltype = intSI_pointer_node;
24166 break;
24168 case T_V2SF:
24169 case T_V4SF:
24170 eltype = float_pointer_node;
24171 break;
24173 case T_DI:
24174 case T_V2DI:
24175 eltype = intDI_pointer_node;
24176 break;
24178 default: gcc_unreachable ();
24181 else
24183 switch (insn_data[d->code].operand[k].mode)
24185 case VOIDmode: eltype = void_type_node; break;
24186 /* Scalars. */
24187 case QImode: eltype = neon_intQI_type_node; break;
24188 case HImode: eltype = neon_intHI_type_node; break;
24189 case SImode: eltype = neon_intSI_type_node; break;
24190 case SFmode: eltype = neon_float_type_node; break;
24191 case DImode: eltype = neon_intDI_type_node; break;
24192 case TImode: eltype = intTI_type_node; break;
24193 case EImode: eltype = intEI_type_node; break;
24194 case OImode: eltype = intOI_type_node; break;
24195 case CImode: eltype = intCI_type_node; break;
24196 case XImode: eltype = intXI_type_node; break;
24197 /* 64-bit vectors. */
24198 case V8QImode: eltype = V8QI_type_node; break;
24199 case V4HImode: eltype = V4HI_type_node; break;
24200 case V2SImode: eltype = V2SI_type_node; break;
24201 case V2SFmode: eltype = V2SF_type_node; break;
24202 /* 128-bit vectors. */
24203 case V16QImode: eltype = V16QI_type_node; break;
24204 case V8HImode: eltype = V8HI_type_node; break;
24205 case V4SImode: eltype = V4SI_type_node; break;
24206 case V4SFmode: eltype = V4SF_type_node; break;
24207 case V2DImode: eltype = V2DI_type_node; break;
24208 default: gcc_unreachable ();
24212 if (k == 0 && !is_store)
24213 return_type = eltype;
24214 else
24215 args = tree_cons (NULL_TREE, eltype, args);
24218 ftype = build_function_type (return_type, args);
24220 break;
24222 case NEON_REINTERP:
24224 /* We iterate over NUM_DREG_TYPES doubleword types,
24225 then NUM_QREG_TYPES quadword types.
24226 V4HF is not a type used in reinterpret, so we translate
24227 d->mode to the correct index in reinterp_ftype_dreg. */
24228 bool qreg_p
24229 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24230 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24231 % NUM_QREG_TYPES;
24232 switch (insn_data[d->code].operand[0].mode)
24234 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24235 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24236 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24237 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24238 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24239 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24240 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24241 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24242 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24243 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24244 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24245 default: gcc_unreachable ();
24248 break;
24249 case NEON_FLOAT_WIDEN:
24251 tree eltype = NULL_TREE;
24252 tree return_type = NULL_TREE;
24254 switch (insn_data[d->code].operand[1].mode)
24256 case V4HFmode:
24257 eltype = V4HF_type_node;
24258 return_type = V4SF_type_node;
24259 break;
24260 default: gcc_unreachable ();
24262 ftype = build_function_type_list (return_type, eltype, NULL);
24263 break;
24265 case NEON_FLOAT_NARROW:
24267 tree eltype = NULL_TREE;
24268 tree return_type = NULL_TREE;
24270 switch (insn_data[d->code].operand[1].mode)
24272 case V4SFmode:
24273 eltype = V4SF_type_node;
24274 return_type = V4HF_type_node;
24275 break;
24276 default: gcc_unreachable ();
24278 ftype = build_function_type_list (return_type, eltype, NULL);
24279 break;
24281 case NEON_BSWAP:
24283 tree eltype = NULL_TREE;
24284 switch (insn_data[d->code].operand[1].mode)
24286 case V4HImode:
24287 eltype = V4UHI_type_node;
24288 break;
24289 case V8HImode:
24290 eltype = V8UHI_type_node;
24291 break;
24292 case V2SImode:
24293 eltype = V2USI_type_node;
24294 break;
24295 case V4SImode:
24296 eltype = V4USI_type_node;
24297 break;
24298 case V2DImode:
24299 eltype = V2UDI_type_node;
24300 break;
24301 default: gcc_unreachable ();
24303 ftype = build_function_type_list (eltype, eltype, NULL);
24304 break;
24306 case NEON_COPYSIGNF:
24308 tree eltype = NULL_TREE;
24309 switch (insn_data[d->code].operand[1].mode)
24311 case V2SFmode:
24312 eltype = V2SF_type_node;
24313 break;
24314 case V4SFmode:
24315 eltype = V4SF_type_node;
24316 break;
24317 default: gcc_unreachable ();
24319 ftype = build_function_type_list (eltype, eltype, NULL);
24320 break;
24322 default:
24323 gcc_unreachable ();
24326 gcc_assert (ftype != NULL);
24328 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24330 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24331 NULL_TREE);
24332 arm_builtin_decls[fcode] = decl;
24336 #undef NUM_DREG_TYPES
24337 #undef NUM_QREG_TYPES
24339 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24340 do \
24342 if ((MASK) & insn_flags) \
24344 tree bdecl; \
24345 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24346 BUILT_IN_MD, NULL, NULL_TREE); \
24347 arm_builtin_decls[CODE] = bdecl; \
24350 while (0)
24352 struct builtin_description
24354 const unsigned int mask;
24355 const enum insn_code icode;
24356 const char * const name;
24357 const enum arm_builtins code;
24358 const enum rtx_code comparison;
24359 const unsigned int flag;
24362 static const struct builtin_description bdesc_2arg[] =
24364 #define IWMMXT_BUILTIN(code, string, builtin) \
24365 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24366 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24368 #define IWMMXT2_BUILTIN(code, string, builtin) \
24369 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24370 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24372 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24373 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24374 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24375 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24376 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24377 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24378 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24379 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24380 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24381 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24382 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24383 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24384 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24385 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24386 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24387 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24388 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24389 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24390 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24391 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24392 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24393 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24394 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24395 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24396 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24397 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24398 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24399 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24400 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24401 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24402 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24403 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24404 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24405 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24406 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24407 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24408 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24409 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24410 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24411 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24412 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24413 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24414 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24415 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24416 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24417 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24418 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24419 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24420 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24421 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24422 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24423 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24424 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24425 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24426 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24427 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24428 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24429 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24430 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24431 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24432 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24433 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24434 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24435 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24436 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24437 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24438 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24439 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24440 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24441 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24442 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24443 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24444 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24445 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24446 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24447 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24448 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24449 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24451 #define IWMMXT_BUILTIN2(code, builtin) \
24452 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24454 #define IWMMXT2_BUILTIN2(code, builtin) \
24455 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24457 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24458 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24459 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24460 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24461 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24462 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24463 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24464 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24465 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24466 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24469 #define FP_BUILTIN(L, U) \
24470 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24471 UNKNOWN, 0},
24473 FP_BUILTIN (get_fpscr, GET_FPSCR)
24474 FP_BUILTIN (set_fpscr, SET_FPSCR)
24475 #undef FP_BUILTIN
24477 #define CRC32_BUILTIN(L, U) \
24478 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24479 UNKNOWN, 0},
24480 CRC32_BUILTIN (crc32b, CRC32B)
24481 CRC32_BUILTIN (crc32h, CRC32H)
24482 CRC32_BUILTIN (crc32w, CRC32W)
24483 CRC32_BUILTIN (crc32cb, CRC32CB)
24484 CRC32_BUILTIN (crc32ch, CRC32CH)
24485 CRC32_BUILTIN (crc32cw, CRC32CW)
24486 #undef CRC32_BUILTIN
24489 #define CRYPTO_BUILTIN(L, U) \
24490 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24491 UNKNOWN, 0},
24492 #undef CRYPTO1
24493 #undef CRYPTO2
24494 #undef CRYPTO3
24495 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24496 #define CRYPTO1(L, U, R, A)
24497 #define CRYPTO3(L, U, R, A1, A2, A3)
24498 #include "crypto.def"
24499 #undef CRYPTO1
24500 #undef CRYPTO2
24501 #undef CRYPTO3
24505 static const struct builtin_description bdesc_1arg[] =
24507 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24508 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24509 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24510 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24511 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24512 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24513 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24514 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24515 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24516 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24517 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24518 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24519 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24520 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24521 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24522 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24523 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24524 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24525 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24526 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24527 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24528 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24529 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24530 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24532 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24533 #define CRYPTO2(L, U, R, A1, A2)
24534 #define CRYPTO3(L, U, R, A1, A2, A3)
24535 #include "crypto.def"
24536 #undef CRYPTO1
24537 #undef CRYPTO2
24538 #undef CRYPTO3
24541 static const struct builtin_description bdesc_3arg[] =
24543 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24544 #define CRYPTO1(L, U, R, A)
24545 #define CRYPTO2(L, U, R, A1, A2)
24546 #include "crypto.def"
24547 #undef CRYPTO1
24548 #undef CRYPTO2
24549 #undef CRYPTO3
24551 #undef CRYPTO_BUILTIN
24553 /* Set up all the iWMMXt builtins. This is not called if
24554 TARGET_IWMMXT is zero. */
24556 static void
24557 arm_init_iwmmxt_builtins (void)
24559 const struct builtin_description * d;
24560 size_t i;
24562 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24563 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24564 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24566 tree v8qi_ftype_v8qi_v8qi_int
24567 = build_function_type_list (V8QI_type_node,
24568 V8QI_type_node, V8QI_type_node,
24569 integer_type_node, NULL_TREE);
24570 tree v4hi_ftype_v4hi_int
24571 = build_function_type_list (V4HI_type_node,
24572 V4HI_type_node, integer_type_node, NULL_TREE);
24573 tree v2si_ftype_v2si_int
24574 = build_function_type_list (V2SI_type_node,
24575 V2SI_type_node, integer_type_node, NULL_TREE);
24576 tree v2si_ftype_di_di
24577 = build_function_type_list (V2SI_type_node,
24578 long_long_integer_type_node,
24579 long_long_integer_type_node,
24580 NULL_TREE);
24581 tree di_ftype_di_int
24582 = build_function_type_list (long_long_integer_type_node,
24583 long_long_integer_type_node,
24584 integer_type_node, NULL_TREE);
24585 tree di_ftype_di_int_int
24586 = build_function_type_list (long_long_integer_type_node,
24587 long_long_integer_type_node,
24588 integer_type_node,
24589 integer_type_node, NULL_TREE);
24590 tree int_ftype_v8qi
24591 = build_function_type_list (integer_type_node,
24592 V8QI_type_node, NULL_TREE);
24593 tree int_ftype_v4hi
24594 = build_function_type_list (integer_type_node,
24595 V4HI_type_node, NULL_TREE);
24596 tree int_ftype_v2si
24597 = build_function_type_list (integer_type_node,
24598 V2SI_type_node, NULL_TREE);
24599 tree int_ftype_v8qi_int
24600 = build_function_type_list (integer_type_node,
24601 V8QI_type_node, integer_type_node, NULL_TREE);
24602 tree int_ftype_v4hi_int
24603 = build_function_type_list (integer_type_node,
24604 V4HI_type_node, integer_type_node, NULL_TREE);
24605 tree int_ftype_v2si_int
24606 = build_function_type_list (integer_type_node,
24607 V2SI_type_node, integer_type_node, NULL_TREE);
24608 tree v8qi_ftype_v8qi_int_int
24609 = build_function_type_list (V8QI_type_node,
24610 V8QI_type_node, integer_type_node,
24611 integer_type_node, NULL_TREE);
24612 tree v4hi_ftype_v4hi_int_int
24613 = build_function_type_list (V4HI_type_node,
24614 V4HI_type_node, integer_type_node,
24615 integer_type_node, NULL_TREE);
24616 tree v2si_ftype_v2si_int_int
24617 = build_function_type_list (V2SI_type_node,
24618 V2SI_type_node, integer_type_node,
24619 integer_type_node, NULL_TREE);
24620 /* Miscellaneous. */
24621 tree v8qi_ftype_v4hi_v4hi
24622 = build_function_type_list (V8QI_type_node,
24623 V4HI_type_node, V4HI_type_node, NULL_TREE);
24624 tree v4hi_ftype_v2si_v2si
24625 = build_function_type_list (V4HI_type_node,
24626 V2SI_type_node, V2SI_type_node, NULL_TREE);
24627 tree v8qi_ftype_v4hi_v8qi
24628 = build_function_type_list (V8QI_type_node,
24629 V4HI_type_node, V8QI_type_node, NULL_TREE);
24630 tree v2si_ftype_v4hi_v4hi
24631 = build_function_type_list (V2SI_type_node,
24632 V4HI_type_node, V4HI_type_node, NULL_TREE);
24633 tree v2si_ftype_v8qi_v8qi
24634 = build_function_type_list (V2SI_type_node,
24635 V8QI_type_node, V8QI_type_node, NULL_TREE);
24636 tree v4hi_ftype_v4hi_di
24637 = build_function_type_list (V4HI_type_node,
24638 V4HI_type_node, long_long_integer_type_node,
24639 NULL_TREE);
24640 tree v2si_ftype_v2si_di
24641 = build_function_type_list (V2SI_type_node,
24642 V2SI_type_node, long_long_integer_type_node,
24643 NULL_TREE);
24644 tree di_ftype_void
24645 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24646 tree int_ftype_void
24647 = build_function_type_list (integer_type_node, NULL_TREE);
24648 tree di_ftype_v8qi
24649 = build_function_type_list (long_long_integer_type_node,
24650 V8QI_type_node, NULL_TREE);
24651 tree di_ftype_v4hi
24652 = build_function_type_list (long_long_integer_type_node,
24653 V4HI_type_node, NULL_TREE);
24654 tree di_ftype_v2si
24655 = build_function_type_list (long_long_integer_type_node,
24656 V2SI_type_node, NULL_TREE);
24657 tree v2si_ftype_v4hi
24658 = build_function_type_list (V2SI_type_node,
24659 V4HI_type_node, NULL_TREE);
24660 tree v4hi_ftype_v8qi
24661 = build_function_type_list (V4HI_type_node,
24662 V8QI_type_node, NULL_TREE);
24663 tree v8qi_ftype_v8qi
24664 = build_function_type_list (V8QI_type_node,
24665 V8QI_type_node, NULL_TREE);
24666 tree v4hi_ftype_v4hi
24667 = build_function_type_list (V4HI_type_node,
24668 V4HI_type_node, NULL_TREE);
24669 tree v2si_ftype_v2si
24670 = build_function_type_list (V2SI_type_node,
24671 V2SI_type_node, NULL_TREE);
24673 tree di_ftype_di_v4hi_v4hi
24674 = build_function_type_list (long_long_unsigned_type_node,
24675 long_long_unsigned_type_node,
24676 V4HI_type_node, V4HI_type_node,
24677 NULL_TREE);
24679 tree di_ftype_v4hi_v4hi
24680 = build_function_type_list (long_long_unsigned_type_node,
24681 V4HI_type_node,V4HI_type_node,
24682 NULL_TREE);
24684 tree v2si_ftype_v2si_v4hi_v4hi
24685 = build_function_type_list (V2SI_type_node,
24686 V2SI_type_node, V4HI_type_node,
24687 V4HI_type_node, NULL_TREE);
24689 tree v2si_ftype_v2si_v8qi_v8qi
24690 = build_function_type_list (V2SI_type_node,
24691 V2SI_type_node, V8QI_type_node,
24692 V8QI_type_node, NULL_TREE);
24694 tree di_ftype_di_v2si_v2si
24695 = build_function_type_list (long_long_unsigned_type_node,
24696 long_long_unsigned_type_node,
24697 V2SI_type_node, V2SI_type_node,
24698 NULL_TREE);
24700 tree di_ftype_di_di_int
24701 = build_function_type_list (long_long_unsigned_type_node,
24702 long_long_unsigned_type_node,
24703 long_long_unsigned_type_node,
24704 integer_type_node, NULL_TREE);
24706 tree void_ftype_int
24707 = build_function_type_list (void_type_node,
24708 integer_type_node, NULL_TREE);
24710 tree v8qi_ftype_char
24711 = build_function_type_list (V8QI_type_node,
24712 signed_char_type_node, NULL_TREE);
24714 tree v4hi_ftype_short
24715 = build_function_type_list (V4HI_type_node,
24716 short_integer_type_node, NULL_TREE);
24718 tree v2si_ftype_int
24719 = build_function_type_list (V2SI_type_node,
24720 integer_type_node, NULL_TREE);
24722 /* Normal vector binops. */
24723 tree v8qi_ftype_v8qi_v8qi
24724 = build_function_type_list (V8QI_type_node,
24725 V8QI_type_node, V8QI_type_node, NULL_TREE);
24726 tree v4hi_ftype_v4hi_v4hi
24727 = build_function_type_list (V4HI_type_node,
24728 V4HI_type_node,V4HI_type_node, NULL_TREE);
24729 tree v2si_ftype_v2si_v2si
24730 = build_function_type_list (V2SI_type_node,
24731 V2SI_type_node, V2SI_type_node, NULL_TREE);
24732 tree di_ftype_di_di
24733 = build_function_type_list (long_long_unsigned_type_node,
24734 long_long_unsigned_type_node,
24735 long_long_unsigned_type_node,
24736 NULL_TREE);
24738 /* Add all builtins that are more or less simple operations on two
24739 operands. */
24740 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24742 /* Use one of the operands; the target can have a different mode for
24743 mask-generating compares. */
24744 machine_mode mode;
24745 tree type;
24747 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24748 continue;
24750 mode = insn_data[d->icode].operand[1].mode;
24752 switch (mode)
24754 case V8QImode:
24755 type = v8qi_ftype_v8qi_v8qi;
24756 break;
24757 case V4HImode:
24758 type = v4hi_ftype_v4hi_v4hi;
24759 break;
24760 case V2SImode:
24761 type = v2si_ftype_v2si_v2si;
24762 break;
24763 case DImode:
24764 type = di_ftype_di_di;
24765 break;
24767 default:
24768 gcc_unreachable ();
24771 def_mbuiltin (d->mask, d->name, type, d->code);
24774 /* Add the remaining MMX insns with somewhat more complicated types. */
24775 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24776 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24777 ARM_BUILTIN_ ## CODE)
24779 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24780 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24781 ARM_BUILTIN_ ## CODE)
24783 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24784 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24785 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24786 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24787 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24788 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24789 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24790 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24791 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24793 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24794 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24795 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24796 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24797 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24798 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24800 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24801 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24802 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24803 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24804 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24805 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24807 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24808 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24809 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24810 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24811 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24812 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24814 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24815 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24816 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24817 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24818 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24819 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24821 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24823 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24824 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24825 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24826 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24827 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24828 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24829 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24830 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24831 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24832 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24834 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24835 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24836 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24837 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24838 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24839 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24840 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24841 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24842 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24844 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24845 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24846 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24848 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24849 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24850 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24852 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24853 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24855 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24856 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24857 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24858 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24859 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24860 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24862 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24863 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24864 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24865 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24866 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24867 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24868 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24869 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24870 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24871 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24872 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24873 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24875 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24876 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24877 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24878 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24880 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24881 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24882 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24883 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24884 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24885 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24886 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24888 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24889 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24890 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24892 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24893 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24894 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24895 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24897 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24898 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24899 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24900 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24902 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24903 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24904 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24905 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24907 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24908 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24909 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24910 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24912 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24913 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24914 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24915 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24917 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24918 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24919 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24920 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24922 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24924 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24925 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24926 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24928 #undef iwmmx_mbuiltin
24929 #undef iwmmx2_mbuiltin
24932 static void
24933 arm_init_fp16_builtins (void)
24935 tree fp16_type = make_node (REAL_TYPE);
24936 TYPE_PRECISION (fp16_type) = 16;
24937 layout_type (fp16_type);
24938 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24941 static void
24942 arm_init_crc32_builtins ()
24944 tree si_ftype_si_qi
24945 = build_function_type_list (unsigned_intSI_type_node,
24946 unsigned_intSI_type_node,
24947 unsigned_intQI_type_node, NULL_TREE);
24948 tree si_ftype_si_hi
24949 = build_function_type_list (unsigned_intSI_type_node,
24950 unsigned_intSI_type_node,
24951 unsigned_intHI_type_node, NULL_TREE);
24952 tree si_ftype_si_si
24953 = build_function_type_list (unsigned_intSI_type_node,
24954 unsigned_intSI_type_node,
24955 unsigned_intSI_type_node, NULL_TREE);
24957 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24958 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24959 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24960 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24961 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24962 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24963 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24964 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24965 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24966 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24967 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24968 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24969 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24970 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24971 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24972 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24973 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24974 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24977 static void
24978 arm_init_builtins (void)
24980 if (TARGET_REALLY_IWMMXT)
24981 arm_init_iwmmxt_builtins ();
24983 if (TARGET_NEON)
24984 arm_init_neon_builtins ();
24986 if (arm_fp16_format)
24987 arm_init_fp16_builtins ();
24989 if (TARGET_CRC32)
24990 arm_init_crc32_builtins ();
24992 if (TARGET_VFP && TARGET_HARD_FLOAT)
24994 tree ftype_set_fpscr
24995 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24996 tree ftype_get_fpscr
24997 = build_function_type_list (unsigned_type_node, NULL);
24999 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
25000 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
25001 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25002 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
25003 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
25004 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25008 /* Return the ARM builtin for CODE. */
25010 static tree
25011 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25013 if (code >= ARM_BUILTIN_MAX)
25014 return error_mark_node;
25016 return arm_builtin_decls[code];
25019 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25021 static const char *
25022 arm_invalid_parameter_type (const_tree t)
25024 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25025 return N_("function parameters cannot have __fp16 type");
25026 return NULL;
25029 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25031 static const char *
25032 arm_invalid_return_type (const_tree t)
25034 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25035 return N_("functions cannot return __fp16 type");
25036 return NULL;
25039 /* Implement TARGET_PROMOTED_TYPE. */
25041 static tree
25042 arm_promoted_type (const_tree t)
25044 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25045 return float_type_node;
25046 return NULL_TREE;
25049 /* Implement TARGET_CONVERT_TO_TYPE.
25050 Specifically, this hook implements the peculiarity of the ARM
25051 half-precision floating-point C semantics that requires conversions between
25052 __fp16 to or from double to do an intermediate conversion to float. */
25054 static tree
25055 arm_convert_to_type (tree type, tree expr)
25057 tree fromtype = TREE_TYPE (expr);
25058 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25059 return NULL_TREE;
25060 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25061 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25062 return convert (type, convert (float_type_node, expr));
25063 return NULL_TREE;
25066 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25067 This simply adds HFmode as a supported mode; even though we don't
25068 implement arithmetic on this type directly, it's supported by
25069 optabs conversions, much the way the double-word arithmetic is
25070 special-cased in the default hook. */
25072 static bool
25073 arm_scalar_mode_supported_p (machine_mode mode)
25075 if (mode == HFmode)
25076 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25077 else if (ALL_FIXED_POINT_MODE_P (mode))
25078 return true;
25079 else
25080 return default_scalar_mode_supported_p (mode);
25083 /* Errors in the source file can cause expand_expr to return const0_rtx
25084 where we expect a vector. To avoid crashing, use one of the vector
25085 clear instructions. */
25087 static rtx
25088 safe_vector_operand (rtx x, machine_mode mode)
25090 if (x != const0_rtx)
25091 return x;
25092 x = gen_reg_rtx (mode);
25094 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25095 : gen_rtx_SUBREG (DImode, x, 0)));
25096 return x;
25099 /* Function to expand ternary builtins. */
25100 static rtx
25101 arm_expand_ternop_builtin (enum insn_code icode,
25102 tree exp, rtx target)
25104 rtx pat;
25105 tree arg0 = CALL_EXPR_ARG (exp, 0);
25106 tree arg1 = CALL_EXPR_ARG (exp, 1);
25107 tree arg2 = CALL_EXPR_ARG (exp, 2);
25109 rtx op0 = expand_normal (arg0);
25110 rtx op1 = expand_normal (arg1);
25111 rtx op2 = expand_normal (arg2);
25112 rtx op3 = NULL_RTX;
25114 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25115 lane operand depending on endianness. */
25116 bool builtin_sha1cpm_p = false;
25118 if (insn_data[icode].n_operands == 5)
25120 gcc_assert (icode == CODE_FOR_crypto_sha1c
25121 || icode == CODE_FOR_crypto_sha1p
25122 || icode == CODE_FOR_crypto_sha1m);
25123 builtin_sha1cpm_p = true;
25125 machine_mode tmode = insn_data[icode].operand[0].mode;
25126 machine_mode mode0 = insn_data[icode].operand[1].mode;
25127 machine_mode mode1 = insn_data[icode].operand[2].mode;
25128 machine_mode mode2 = insn_data[icode].operand[3].mode;
25131 if (VECTOR_MODE_P (mode0))
25132 op0 = safe_vector_operand (op0, mode0);
25133 if (VECTOR_MODE_P (mode1))
25134 op1 = safe_vector_operand (op1, mode1);
25135 if (VECTOR_MODE_P (mode2))
25136 op2 = safe_vector_operand (op2, mode2);
25138 if (! target
25139 || GET_MODE (target) != tmode
25140 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25141 target = gen_reg_rtx (tmode);
25143 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25144 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25145 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25147 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25148 op0 = copy_to_mode_reg (mode0, op0);
25149 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25150 op1 = copy_to_mode_reg (mode1, op1);
25151 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25152 op2 = copy_to_mode_reg (mode2, op2);
25153 if (builtin_sha1cpm_p)
25154 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25156 if (builtin_sha1cpm_p)
25157 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25158 else
25159 pat = GEN_FCN (icode) (target, op0, op1, op2);
25160 if (! pat)
25161 return 0;
25162 emit_insn (pat);
25163 return target;
25166 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25168 static rtx
25169 arm_expand_binop_builtin (enum insn_code icode,
25170 tree exp, rtx target)
25172 rtx pat;
25173 tree arg0 = CALL_EXPR_ARG (exp, 0);
25174 tree arg1 = CALL_EXPR_ARG (exp, 1);
25175 rtx op0 = expand_normal (arg0);
25176 rtx op1 = expand_normal (arg1);
25177 machine_mode tmode = insn_data[icode].operand[0].mode;
25178 machine_mode mode0 = insn_data[icode].operand[1].mode;
25179 machine_mode mode1 = insn_data[icode].operand[2].mode;
25181 if (VECTOR_MODE_P (mode0))
25182 op0 = safe_vector_operand (op0, mode0);
25183 if (VECTOR_MODE_P (mode1))
25184 op1 = safe_vector_operand (op1, mode1);
25186 if (! target
25187 || GET_MODE (target) != tmode
25188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25189 target = gen_reg_rtx (tmode);
25191 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25192 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25194 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25195 op0 = copy_to_mode_reg (mode0, op0);
25196 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25197 op1 = copy_to_mode_reg (mode1, op1);
25199 pat = GEN_FCN (icode) (target, op0, op1);
25200 if (! pat)
25201 return 0;
25202 emit_insn (pat);
25203 return target;
25206 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25208 static rtx
25209 arm_expand_unop_builtin (enum insn_code icode,
25210 tree exp, rtx target, int do_load)
25212 rtx pat;
25213 tree arg0 = CALL_EXPR_ARG (exp, 0);
25214 rtx op0 = expand_normal (arg0);
25215 rtx op1 = NULL_RTX;
25216 machine_mode tmode = insn_data[icode].operand[0].mode;
25217 machine_mode mode0 = insn_data[icode].operand[1].mode;
25218 bool builtin_sha1h_p = false;
25220 if (insn_data[icode].n_operands == 3)
25222 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25223 builtin_sha1h_p = true;
25226 if (! target
25227 || GET_MODE (target) != tmode
25228 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25229 target = gen_reg_rtx (tmode);
25230 if (do_load)
25231 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25232 else
25234 if (VECTOR_MODE_P (mode0))
25235 op0 = safe_vector_operand (op0, mode0);
25237 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25238 op0 = copy_to_mode_reg (mode0, op0);
25240 if (builtin_sha1h_p)
25241 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25243 if (builtin_sha1h_p)
25244 pat = GEN_FCN (icode) (target, op0, op1);
25245 else
25246 pat = GEN_FCN (icode) (target, op0);
25247 if (! pat)
25248 return 0;
25249 emit_insn (pat);
25250 return target;
25253 typedef enum {
25254 NEON_ARG_COPY_TO_REG,
25255 NEON_ARG_CONSTANT,
25256 NEON_ARG_MEMORY,
25257 NEON_ARG_STOP
25258 } builtin_arg;
25260 #define NEON_MAX_BUILTIN_ARGS 5
25262 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25263 and return an expression for the accessed memory.
25265 The intrinsic function operates on a block of registers that has
25266 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25267 function references the memory at EXP of type TYPE and in mode
25268 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25269 available. */
25271 static tree
25272 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25273 machine_mode reg_mode,
25274 neon_builtin_type_mode type_mode)
25276 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25277 tree elem_type, upper_bound, array_type;
25279 /* Work out the size of the register block in bytes. */
25280 reg_size = GET_MODE_SIZE (reg_mode);
25282 /* Work out the size of each vector in bytes. */
25283 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25284 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25286 /* Work out how many vectors there are. */
25287 gcc_assert (reg_size % vector_size == 0);
25288 nvectors = reg_size / vector_size;
25290 /* Work out the type of each element. */
25291 gcc_assert (POINTER_TYPE_P (type));
25292 elem_type = TREE_TYPE (type);
25294 /* Work out how many elements are being loaded or stored.
25295 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25296 and memory elements; anything else implies a lane load or store. */
25297 if (mem_mode == reg_mode)
25298 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25299 else
25300 nelems = nvectors;
25302 /* Create a type that describes the full access. */
25303 upper_bound = build_int_cst (size_type_node, nelems - 1);
25304 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25306 /* Dereference EXP using that type. */
25307 return fold_build2 (MEM_REF, array_type, exp,
25308 build_int_cst (build_pointer_type (array_type), 0));
25311 /* Expand a Neon builtin. */
25312 static rtx
25313 arm_expand_neon_args (rtx target, int icode, int have_retval,
25314 neon_builtin_type_mode type_mode,
25315 tree exp, int fcode, ...)
25317 va_list ap;
25318 rtx pat;
25319 tree arg[NEON_MAX_BUILTIN_ARGS];
25320 rtx op[NEON_MAX_BUILTIN_ARGS];
25321 tree arg_type;
25322 tree formals;
25323 machine_mode tmode = insn_data[icode].operand[0].mode;
25324 machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25325 machine_mode other_mode;
25326 int argc = 0;
25327 int opno;
25329 if (have_retval
25330 && (!target
25331 || GET_MODE (target) != tmode
25332 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25333 target = gen_reg_rtx (tmode);
25335 va_start (ap, fcode);
25337 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25339 for (;;)
25341 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25343 if (thisarg == NEON_ARG_STOP)
25344 break;
25345 else
25347 opno = argc + have_retval;
25348 mode[argc] = insn_data[icode].operand[opno].mode;
25349 arg[argc] = CALL_EXPR_ARG (exp, argc);
25350 arg_type = TREE_VALUE (formals);
25351 if (thisarg == NEON_ARG_MEMORY)
25353 other_mode = insn_data[icode].operand[1 - opno].mode;
25354 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25355 mode[argc], other_mode,
25356 type_mode);
25359 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25360 be returned. */
25361 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25362 (thisarg == NEON_ARG_MEMORY
25363 ? EXPAND_MEMORY : EXPAND_NORMAL));
25365 switch (thisarg)
25367 case NEON_ARG_COPY_TO_REG:
25368 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25369 if (!(*insn_data[icode].operand[opno].predicate)
25370 (op[argc], mode[argc]))
25371 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25372 break;
25374 case NEON_ARG_CONSTANT:
25375 /* FIXME: This error message is somewhat unhelpful. */
25376 if (!(*insn_data[icode].operand[opno].predicate)
25377 (op[argc], mode[argc]))
25378 error ("argument must be a constant");
25379 break;
25381 case NEON_ARG_MEMORY:
25382 /* Check if expand failed. */
25383 if (op[argc] == const0_rtx)
25384 return 0;
25385 gcc_assert (MEM_P (op[argc]));
25386 PUT_MODE (op[argc], mode[argc]);
25387 /* ??? arm_neon.h uses the same built-in functions for signed
25388 and unsigned accesses, casting where necessary. This isn't
25389 alias safe. */
25390 set_mem_alias_set (op[argc], 0);
25391 if (!(*insn_data[icode].operand[opno].predicate)
25392 (op[argc], mode[argc]))
25393 op[argc] = (replace_equiv_address
25394 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25395 break;
25397 case NEON_ARG_STOP:
25398 gcc_unreachable ();
25401 argc++;
25402 formals = TREE_CHAIN (formals);
25406 va_end (ap);
25408 if (have_retval)
25409 switch (argc)
25411 case 1:
25412 pat = GEN_FCN (icode) (target, op[0]);
25413 break;
25415 case 2:
25416 pat = GEN_FCN (icode) (target, op[0], op[1]);
25417 break;
25419 case 3:
25420 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25421 break;
25423 case 4:
25424 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25425 break;
25427 case 5:
25428 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25429 break;
25431 default:
25432 gcc_unreachable ();
25434 else
25435 switch (argc)
25437 case 1:
25438 pat = GEN_FCN (icode) (op[0]);
25439 break;
25441 case 2:
25442 pat = GEN_FCN (icode) (op[0], op[1]);
25443 break;
25445 case 3:
25446 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25447 break;
25449 case 4:
25450 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25451 break;
25453 case 5:
25454 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25455 break;
25457 default:
25458 gcc_unreachable ();
25461 if (!pat)
25462 return 0;
25464 emit_insn (pat);
25466 return target;
25469 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25470 constants defined per-instruction or per instruction-variant. Instead, the
25471 required info is looked up in the table neon_builtin_data. */
25472 static rtx
25473 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25475 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25476 neon_itype itype = d->itype;
25477 enum insn_code icode = d->code;
25478 neon_builtin_type_mode type_mode = d->mode;
25480 switch (itype)
25482 case NEON_UNOP:
25483 case NEON_CONVERT:
25484 case NEON_DUPLANE:
25485 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25486 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25488 case NEON_BINOP:
25489 case NEON_SETLANE:
25490 case NEON_SCALARMUL:
25491 case NEON_SCALARMULL:
25492 case NEON_SCALARMULH:
25493 case NEON_SHIFTINSERT:
25494 case NEON_LOGICBINOP:
25495 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25496 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25497 NEON_ARG_STOP);
25499 case NEON_TERNOP:
25500 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25501 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25502 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25504 case NEON_GETLANE:
25505 case NEON_FIXCONV:
25506 case NEON_SHIFTIMM:
25507 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25508 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25509 NEON_ARG_STOP);
25511 case NEON_CREATE:
25512 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25513 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25515 case NEON_DUP:
25516 case NEON_RINT:
25517 case NEON_SPLIT:
25518 case NEON_FLOAT_WIDEN:
25519 case NEON_FLOAT_NARROW:
25520 case NEON_BSWAP:
25521 case NEON_REINTERP:
25522 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25523 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25525 case NEON_COPYSIGNF:
25526 case NEON_COMBINE:
25527 case NEON_VTBL:
25528 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25529 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25531 case NEON_LANEMUL:
25532 case NEON_LANEMULL:
25533 case NEON_LANEMULH:
25534 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25535 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25536 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25538 case NEON_LANEMAC:
25539 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25540 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25541 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25543 case NEON_SHIFTACC:
25544 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25545 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25546 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25548 case NEON_SCALARMAC:
25549 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25550 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25551 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25553 case NEON_SELECT:
25554 case NEON_VTBX:
25555 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25556 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25557 NEON_ARG_STOP);
25559 case NEON_LOAD1:
25560 case NEON_LOADSTRUCT:
25561 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25562 NEON_ARG_MEMORY, NEON_ARG_STOP);
25564 case NEON_LOAD1LANE:
25565 case NEON_LOADSTRUCTLANE:
25566 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25567 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25568 NEON_ARG_STOP);
25570 case NEON_STORE1:
25571 case NEON_STORESTRUCT:
25572 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25573 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25575 case NEON_STORE1LANE:
25576 case NEON_STORESTRUCTLANE:
25577 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25578 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25579 NEON_ARG_STOP);
25582 gcc_unreachable ();
25585 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25586 void
25587 neon_reinterpret (rtx dest, rtx src)
25589 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25592 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25593 not to early-clobber SRC registers in the process.
25595 We assume that the operands described by SRC and DEST represent a
25596 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25597 number of components into which the copy has been decomposed. */
25598 void
25599 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25601 unsigned int i;
25603 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25604 || REGNO (operands[0]) < REGNO (operands[1]))
25606 for (i = 0; i < count; i++)
25608 operands[2 * i] = dest[i];
25609 operands[2 * i + 1] = src[i];
25612 else
25614 for (i = 0; i < count; i++)
25616 operands[2 * i] = dest[count - i - 1];
25617 operands[2 * i + 1] = src[count - i - 1];
25622 /* Split operands into moves from op[1] + op[2] into op[0]. */
25624 void
25625 neon_split_vcombine (rtx operands[3])
25627 unsigned int dest = REGNO (operands[0]);
25628 unsigned int src1 = REGNO (operands[1]);
25629 unsigned int src2 = REGNO (operands[2]);
25630 machine_mode halfmode = GET_MODE (operands[1]);
25631 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25632 rtx destlo, desthi;
25634 if (src1 == dest && src2 == dest + halfregs)
25636 /* No-op move. Can't split to nothing; emit something. */
25637 emit_note (NOTE_INSN_DELETED);
25638 return;
25641 /* Preserve register attributes for variable tracking. */
25642 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25643 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25644 GET_MODE_SIZE (halfmode));
25646 /* Special case of reversed high/low parts. Use VSWP. */
25647 if (src2 == dest && src1 == dest + halfregs)
25649 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25650 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25651 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25652 return;
25655 if (!reg_overlap_mentioned_p (operands[2], destlo))
25657 /* Try to avoid unnecessary moves if part of the result
25658 is in the right place already. */
25659 if (src1 != dest)
25660 emit_move_insn (destlo, operands[1]);
25661 if (src2 != dest + halfregs)
25662 emit_move_insn (desthi, operands[2]);
25664 else
25666 if (src2 != dest + halfregs)
25667 emit_move_insn (desthi, operands[2]);
25668 if (src1 != dest)
25669 emit_move_insn (destlo, operands[1]);
25673 /* Expand an expression EXP that calls a built-in function,
25674 with result going to TARGET if that's convenient
25675 (and in mode MODE if that's convenient).
25676 SUBTARGET may be used as the target for computing one of EXP's operands.
25677 IGNORE is nonzero if the value is to be ignored. */
25679 static rtx
25680 arm_expand_builtin (tree exp,
25681 rtx target,
25682 rtx subtarget ATTRIBUTE_UNUSED,
25683 machine_mode mode ATTRIBUTE_UNUSED,
25684 int ignore ATTRIBUTE_UNUSED)
25686 const struct builtin_description * d;
25687 enum insn_code icode;
25688 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25689 tree arg0;
25690 tree arg1;
25691 tree arg2;
25692 rtx op0;
25693 rtx op1;
25694 rtx op2;
25695 rtx pat;
25696 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25697 size_t i;
25698 machine_mode tmode;
25699 machine_mode mode0;
25700 machine_mode mode1;
25701 machine_mode mode2;
25702 int opint;
25703 int selector;
25704 int mask;
25705 int imm;
25707 if (fcode >= ARM_BUILTIN_NEON_BASE)
25708 return arm_expand_neon_builtin (fcode, exp, target);
25710 switch (fcode)
25712 case ARM_BUILTIN_GET_FPSCR:
25713 case ARM_BUILTIN_SET_FPSCR:
25714 if (fcode == ARM_BUILTIN_GET_FPSCR)
25716 icode = CODE_FOR_get_fpscr;
25717 target = gen_reg_rtx (SImode);
25718 pat = GEN_FCN (icode) (target);
25720 else
25722 target = NULL_RTX;
25723 icode = CODE_FOR_set_fpscr;
25724 arg0 = CALL_EXPR_ARG (exp, 0);
25725 op0 = expand_normal (arg0);
25726 pat = GEN_FCN (icode) (op0);
25728 emit_insn (pat);
25729 return target;
25731 case ARM_BUILTIN_TEXTRMSB:
25732 case ARM_BUILTIN_TEXTRMUB:
25733 case ARM_BUILTIN_TEXTRMSH:
25734 case ARM_BUILTIN_TEXTRMUH:
25735 case ARM_BUILTIN_TEXTRMSW:
25736 case ARM_BUILTIN_TEXTRMUW:
25737 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25738 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25739 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25740 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25741 : CODE_FOR_iwmmxt_textrmw);
25743 arg0 = CALL_EXPR_ARG (exp, 0);
25744 arg1 = CALL_EXPR_ARG (exp, 1);
25745 op0 = expand_normal (arg0);
25746 op1 = expand_normal (arg1);
25747 tmode = insn_data[icode].operand[0].mode;
25748 mode0 = insn_data[icode].operand[1].mode;
25749 mode1 = insn_data[icode].operand[2].mode;
25751 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25752 op0 = copy_to_mode_reg (mode0, op0);
25753 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25755 /* @@@ better error message */
25756 error ("selector must be an immediate");
25757 return gen_reg_rtx (tmode);
25760 opint = INTVAL (op1);
25761 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25763 if (opint > 7 || opint < 0)
25764 error ("the range of selector should be in 0 to 7");
25766 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25768 if (opint > 3 || opint < 0)
25769 error ("the range of selector should be in 0 to 3");
25771 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25773 if (opint > 1 || opint < 0)
25774 error ("the range of selector should be in 0 to 1");
25777 if (target == 0
25778 || GET_MODE (target) != tmode
25779 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25780 target = gen_reg_rtx (tmode);
25781 pat = GEN_FCN (icode) (target, op0, op1);
25782 if (! pat)
25783 return 0;
25784 emit_insn (pat);
25785 return target;
25787 case ARM_BUILTIN_WALIGNI:
25788 /* If op2 is immediate, call walighi, else call walighr. */
25789 arg0 = CALL_EXPR_ARG (exp, 0);
25790 arg1 = CALL_EXPR_ARG (exp, 1);
25791 arg2 = CALL_EXPR_ARG (exp, 2);
25792 op0 = expand_normal (arg0);
25793 op1 = expand_normal (arg1);
25794 op2 = expand_normal (arg2);
25795 if (CONST_INT_P (op2))
25797 icode = CODE_FOR_iwmmxt_waligni;
25798 tmode = insn_data[icode].operand[0].mode;
25799 mode0 = insn_data[icode].operand[1].mode;
25800 mode1 = insn_data[icode].operand[2].mode;
25801 mode2 = insn_data[icode].operand[3].mode;
25802 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25803 op0 = copy_to_mode_reg (mode0, op0);
25804 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25805 op1 = copy_to_mode_reg (mode1, op1);
25806 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25807 selector = INTVAL (op2);
25808 if (selector > 7 || selector < 0)
25809 error ("the range of selector should be in 0 to 7");
25811 else
25813 icode = CODE_FOR_iwmmxt_walignr;
25814 tmode = insn_data[icode].operand[0].mode;
25815 mode0 = insn_data[icode].operand[1].mode;
25816 mode1 = insn_data[icode].operand[2].mode;
25817 mode2 = insn_data[icode].operand[3].mode;
25818 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25819 op0 = copy_to_mode_reg (mode0, op0);
25820 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25821 op1 = copy_to_mode_reg (mode1, op1);
25822 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25823 op2 = copy_to_mode_reg (mode2, op2);
25825 if (target == 0
25826 || GET_MODE (target) != tmode
25827 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25828 target = gen_reg_rtx (tmode);
25829 pat = GEN_FCN (icode) (target, op0, op1, op2);
25830 if (!pat)
25831 return 0;
25832 emit_insn (pat);
25833 return target;
25835 case ARM_BUILTIN_TINSRB:
25836 case ARM_BUILTIN_TINSRH:
25837 case ARM_BUILTIN_TINSRW:
25838 case ARM_BUILTIN_WMERGE:
25839 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25840 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25841 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25842 : CODE_FOR_iwmmxt_tinsrw);
25843 arg0 = CALL_EXPR_ARG (exp, 0);
25844 arg1 = CALL_EXPR_ARG (exp, 1);
25845 arg2 = CALL_EXPR_ARG (exp, 2);
25846 op0 = expand_normal (arg0);
25847 op1 = expand_normal (arg1);
25848 op2 = expand_normal (arg2);
25849 tmode = insn_data[icode].operand[0].mode;
25850 mode0 = insn_data[icode].operand[1].mode;
25851 mode1 = insn_data[icode].operand[2].mode;
25852 mode2 = insn_data[icode].operand[3].mode;
25854 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25855 op0 = copy_to_mode_reg (mode0, op0);
25856 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25857 op1 = copy_to_mode_reg (mode1, op1);
25858 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25860 error ("selector must be an immediate");
25861 return const0_rtx;
25863 if (icode == CODE_FOR_iwmmxt_wmerge)
25865 selector = INTVAL (op2);
25866 if (selector > 7 || selector < 0)
25867 error ("the range of selector should be in 0 to 7");
25869 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25870 || (icode == CODE_FOR_iwmmxt_tinsrh)
25871 || (icode == CODE_FOR_iwmmxt_tinsrw))
25873 mask = 0x01;
25874 selector= INTVAL (op2);
25875 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25876 error ("the range of selector should be in 0 to 7");
25877 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25878 error ("the range of selector should be in 0 to 3");
25879 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25880 error ("the range of selector should be in 0 to 1");
25881 mask <<= selector;
25882 op2 = GEN_INT (mask);
25884 if (target == 0
25885 || GET_MODE (target) != tmode
25886 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25887 target = gen_reg_rtx (tmode);
25888 pat = GEN_FCN (icode) (target, op0, op1, op2);
25889 if (! pat)
25890 return 0;
25891 emit_insn (pat);
25892 return target;
25894 case ARM_BUILTIN_SETWCGR0:
25895 case ARM_BUILTIN_SETWCGR1:
25896 case ARM_BUILTIN_SETWCGR2:
25897 case ARM_BUILTIN_SETWCGR3:
25898 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25899 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25900 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25901 : CODE_FOR_iwmmxt_setwcgr3);
25902 arg0 = CALL_EXPR_ARG (exp, 0);
25903 op0 = expand_normal (arg0);
25904 mode0 = insn_data[icode].operand[0].mode;
25905 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25906 op0 = copy_to_mode_reg (mode0, op0);
25907 pat = GEN_FCN (icode) (op0);
25908 if (!pat)
25909 return 0;
25910 emit_insn (pat);
25911 return 0;
25913 case ARM_BUILTIN_GETWCGR0:
25914 case ARM_BUILTIN_GETWCGR1:
25915 case ARM_BUILTIN_GETWCGR2:
25916 case ARM_BUILTIN_GETWCGR3:
25917 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25918 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25919 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25920 : CODE_FOR_iwmmxt_getwcgr3);
25921 tmode = insn_data[icode].operand[0].mode;
25922 if (target == 0
25923 || GET_MODE (target) != tmode
25924 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25925 target = gen_reg_rtx (tmode);
25926 pat = GEN_FCN (icode) (target);
25927 if (!pat)
25928 return 0;
25929 emit_insn (pat);
25930 return target;
25932 case ARM_BUILTIN_WSHUFH:
25933 icode = CODE_FOR_iwmmxt_wshufh;
25934 arg0 = CALL_EXPR_ARG (exp, 0);
25935 arg1 = CALL_EXPR_ARG (exp, 1);
25936 op0 = expand_normal (arg0);
25937 op1 = expand_normal (arg1);
25938 tmode = insn_data[icode].operand[0].mode;
25939 mode1 = insn_data[icode].operand[1].mode;
25940 mode2 = insn_data[icode].operand[2].mode;
25942 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25943 op0 = copy_to_mode_reg (mode1, op0);
25944 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25946 error ("mask must be an immediate");
25947 return const0_rtx;
25949 selector = INTVAL (op1);
25950 if (selector < 0 || selector > 255)
25951 error ("the range of mask should be in 0 to 255");
25952 if (target == 0
25953 || GET_MODE (target) != tmode
25954 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25955 target = gen_reg_rtx (tmode);
25956 pat = GEN_FCN (icode) (target, op0, op1);
25957 if (! pat)
25958 return 0;
25959 emit_insn (pat);
25960 return target;
25962 case ARM_BUILTIN_WMADDS:
25963 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25964 case ARM_BUILTIN_WMADDSX:
25965 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25966 case ARM_BUILTIN_WMADDSN:
25967 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25968 case ARM_BUILTIN_WMADDU:
25969 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25970 case ARM_BUILTIN_WMADDUX:
25971 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25972 case ARM_BUILTIN_WMADDUN:
25973 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25974 case ARM_BUILTIN_WSADBZ:
25975 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25976 case ARM_BUILTIN_WSADHZ:
25977 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25979 /* Several three-argument builtins. */
25980 case ARM_BUILTIN_WMACS:
25981 case ARM_BUILTIN_WMACU:
25982 case ARM_BUILTIN_TMIA:
25983 case ARM_BUILTIN_TMIAPH:
25984 case ARM_BUILTIN_TMIATT:
25985 case ARM_BUILTIN_TMIATB:
25986 case ARM_BUILTIN_TMIABT:
25987 case ARM_BUILTIN_TMIABB:
25988 case ARM_BUILTIN_WQMIABB:
25989 case ARM_BUILTIN_WQMIABT:
25990 case ARM_BUILTIN_WQMIATB:
25991 case ARM_BUILTIN_WQMIATT:
25992 case ARM_BUILTIN_WQMIABBN:
25993 case ARM_BUILTIN_WQMIABTN:
25994 case ARM_BUILTIN_WQMIATBN:
25995 case ARM_BUILTIN_WQMIATTN:
25996 case ARM_BUILTIN_WMIABB:
25997 case ARM_BUILTIN_WMIABT:
25998 case ARM_BUILTIN_WMIATB:
25999 case ARM_BUILTIN_WMIATT:
26000 case ARM_BUILTIN_WMIABBN:
26001 case ARM_BUILTIN_WMIABTN:
26002 case ARM_BUILTIN_WMIATBN:
26003 case ARM_BUILTIN_WMIATTN:
26004 case ARM_BUILTIN_WMIAWBB:
26005 case ARM_BUILTIN_WMIAWBT:
26006 case ARM_BUILTIN_WMIAWTB:
26007 case ARM_BUILTIN_WMIAWTT:
26008 case ARM_BUILTIN_WMIAWBBN:
26009 case ARM_BUILTIN_WMIAWBTN:
26010 case ARM_BUILTIN_WMIAWTBN:
26011 case ARM_BUILTIN_WMIAWTTN:
26012 case ARM_BUILTIN_WSADB:
26013 case ARM_BUILTIN_WSADH:
26014 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26015 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26016 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26017 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26018 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26019 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26020 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26021 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26022 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26023 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26024 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26025 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26026 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26027 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26028 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26029 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26030 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26031 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26032 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26033 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26034 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26035 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26036 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26037 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26038 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26039 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26040 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26041 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26042 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26043 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26044 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26045 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26046 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26047 : CODE_FOR_iwmmxt_wsadh);
26048 arg0 = CALL_EXPR_ARG (exp, 0);
26049 arg1 = CALL_EXPR_ARG (exp, 1);
26050 arg2 = CALL_EXPR_ARG (exp, 2);
26051 op0 = expand_normal (arg0);
26052 op1 = expand_normal (arg1);
26053 op2 = expand_normal (arg2);
26054 tmode = insn_data[icode].operand[0].mode;
26055 mode0 = insn_data[icode].operand[1].mode;
26056 mode1 = insn_data[icode].operand[2].mode;
26057 mode2 = insn_data[icode].operand[3].mode;
26059 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26060 op0 = copy_to_mode_reg (mode0, op0);
26061 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26062 op1 = copy_to_mode_reg (mode1, op1);
26063 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26064 op2 = copy_to_mode_reg (mode2, op2);
26065 if (target == 0
26066 || GET_MODE (target) != tmode
26067 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26068 target = gen_reg_rtx (tmode);
26069 pat = GEN_FCN (icode) (target, op0, op1, op2);
26070 if (! pat)
26071 return 0;
26072 emit_insn (pat);
26073 return target;
26075 case ARM_BUILTIN_WZERO:
26076 target = gen_reg_rtx (DImode);
26077 emit_insn (gen_iwmmxt_clrdi (target));
26078 return target;
26080 case ARM_BUILTIN_WSRLHI:
26081 case ARM_BUILTIN_WSRLWI:
26082 case ARM_BUILTIN_WSRLDI:
26083 case ARM_BUILTIN_WSLLHI:
26084 case ARM_BUILTIN_WSLLWI:
26085 case ARM_BUILTIN_WSLLDI:
26086 case ARM_BUILTIN_WSRAHI:
26087 case ARM_BUILTIN_WSRAWI:
26088 case ARM_BUILTIN_WSRADI:
26089 case ARM_BUILTIN_WRORHI:
26090 case ARM_BUILTIN_WRORWI:
26091 case ARM_BUILTIN_WRORDI:
26092 case ARM_BUILTIN_WSRLH:
26093 case ARM_BUILTIN_WSRLW:
26094 case ARM_BUILTIN_WSRLD:
26095 case ARM_BUILTIN_WSLLH:
26096 case ARM_BUILTIN_WSLLW:
26097 case ARM_BUILTIN_WSLLD:
26098 case ARM_BUILTIN_WSRAH:
26099 case ARM_BUILTIN_WSRAW:
26100 case ARM_BUILTIN_WSRAD:
26101 case ARM_BUILTIN_WRORH:
26102 case ARM_BUILTIN_WRORW:
26103 case ARM_BUILTIN_WRORD:
26104 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26105 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26106 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26107 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26108 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26109 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26110 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26111 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26112 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26113 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26114 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26115 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26116 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26117 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26118 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26119 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26120 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26121 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26122 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26123 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26124 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26125 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26126 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26127 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26128 : CODE_FOR_nothing);
26129 arg1 = CALL_EXPR_ARG (exp, 1);
26130 op1 = expand_normal (arg1);
26131 if (GET_MODE (op1) == VOIDmode)
26133 imm = INTVAL (op1);
26134 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26135 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26136 && (imm < 0 || imm > 32))
26138 if (fcode == ARM_BUILTIN_WRORHI)
26139 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26140 else if (fcode == ARM_BUILTIN_WRORWI)
26141 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26142 else if (fcode == ARM_BUILTIN_WRORH)
26143 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26144 else
26145 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26147 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26148 && (imm < 0 || imm > 64))
26150 if (fcode == ARM_BUILTIN_WRORDI)
26151 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26152 else
26153 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26155 else if (imm < 0)
26157 if (fcode == ARM_BUILTIN_WSRLHI)
26158 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26159 else if (fcode == ARM_BUILTIN_WSRLWI)
26160 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26161 else if (fcode == ARM_BUILTIN_WSRLDI)
26162 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26163 else if (fcode == ARM_BUILTIN_WSLLHI)
26164 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26165 else if (fcode == ARM_BUILTIN_WSLLWI)
26166 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26167 else if (fcode == ARM_BUILTIN_WSLLDI)
26168 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26169 else if (fcode == ARM_BUILTIN_WSRAHI)
26170 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26171 else if (fcode == ARM_BUILTIN_WSRAWI)
26172 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26173 else if (fcode == ARM_BUILTIN_WSRADI)
26174 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26175 else if (fcode == ARM_BUILTIN_WSRLH)
26176 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26177 else if (fcode == ARM_BUILTIN_WSRLW)
26178 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26179 else if (fcode == ARM_BUILTIN_WSRLD)
26180 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26181 else if (fcode == ARM_BUILTIN_WSLLH)
26182 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26183 else if (fcode == ARM_BUILTIN_WSLLW)
26184 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26185 else if (fcode == ARM_BUILTIN_WSLLD)
26186 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26187 else if (fcode == ARM_BUILTIN_WSRAH)
26188 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26189 else if (fcode == ARM_BUILTIN_WSRAW)
26190 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26191 else
26192 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26195 return arm_expand_binop_builtin (icode, exp, target);
26197 default:
26198 break;
26201 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26202 if (d->code == (const enum arm_builtins) fcode)
26203 return arm_expand_binop_builtin (d->icode, exp, target);
26205 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26206 if (d->code == (const enum arm_builtins) fcode)
26207 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26209 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26210 if (d->code == (const enum arm_builtins) fcode)
26211 return arm_expand_ternop_builtin (d->icode, exp, target);
26213 /* @@@ Should really do something sensible here. */
26214 return NULL_RTX;
26217 /* Return the number (counting from 0) of
26218 the least significant set bit in MASK. */
26220 inline static int
26221 number_of_first_bit_set (unsigned mask)
26223 return ctz_hwi (mask);
26226 /* Like emit_multi_reg_push, but allowing for a different set of
26227 registers to be described as saved. MASK is the set of registers
26228 to be saved; REAL_REGS is the set of registers to be described as
26229 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26231 static rtx_insn *
26232 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26234 unsigned long regno;
26235 rtx par[10], tmp, reg;
26236 rtx_insn *insn;
26237 int i, j;
26239 /* Build the parallel of the registers actually being stored. */
26240 for (i = 0; mask; ++i, mask &= mask - 1)
26242 regno = ctz_hwi (mask);
26243 reg = gen_rtx_REG (SImode, regno);
26245 if (i == 0)
26246 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26247 else
26248 tmp = gen_rtx_USE (VOIDmode, reg);
26250 par[i] = tmp;
26253 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26254 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26255 tmp = gen_frame_mem (BLKmode, tmp);
26256 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26257 par[0] = tmp;
26259 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26260 insn = emit_insn (tmp);
26262 /* Always build the stack adjustment note for unwind info. */
26263 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26264 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26265 par[0] = tmp;
26267 /* Build the parallel of the registers recorded as saved for unwind. */
26268 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26270 regno = ctz_hwi (real_regs);
26271 reg = gen_rtx_REG (SImode, regno);
26273 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26274 tmp = gen_frame_mem (SImode, tmp);
26275 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26276 RTX_FRAME_RELATED_P (tmp) = 1;
26277 par[j + 1] = tmp;
26280 if (j == 0)
26281 tmp = par[0];
26282 else
26284 RTX_FRAME_RELATED_P (par[0]) = 1;
26285 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26288 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26290 return insn;
26293 /* Emit code to push or pop registers to or from the stack. F is the
26294 assembly file. MASK is the registers to pop. */
26295 static void
26296 thumb_pop (FILE *f, unsigned long mask)
26298 int regno;
26299 int lo_mask = mask & 0xFF;
26300 int pushed_words = 0;
26302 gcc_assert (mask);
26304 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26306 /* Special case. Do not generate a POP PC statement here, do it in
26307 thumb_exit() */
26308 thumb_exit (f, -1);
26309 return;
26312 fprintf (f, "\tpop\t{");
26314 /* Look at the low registers first. */
26315 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26317 if (lo_mask & 1)
26319 asm_fprintf (f, "%r", regno);
26321 if ((lo_mask & ~1) != 0)
26322 fprintf (f, ", ");
26324 pushed_words++;
26328 if (mask & (1 << PC_REGNUM))
26330 /* Catch popping the PC. */
26331 if (TARGET_INTERWORK || TARGET_BACKTRACE
26332 || crtl->calls_eh_return)
26334 /* The PC is never poped directly, instead
26335 it is popped into r3 and then BX is used. */
26336 fprintf (f, "}\n");
26338 thumb_exit (f, -1);
26340 return;
26342 else
26344 if (mask & 0xFF)
26345 fprintf (f, ", ");
26347 asm_fprintf (f, "%r", PC_REGNUM);
26351 fprintf (f, "}\n");
26354 /* Generate code to return from a thumb function.
26355 If 'reg_containing_return_addr' is -1, then the return address is
26356 actually on the stack, at the stack pointer. */
26357 static void
26358 thumb_exit (FILE *f, int reg_containing_return_addr)
26360 unsigned regs_available_for_popping;
26361 unsigned regs_to_pop;
26362 int pops_needed;
26363 unsigned available;
26364 unsigned required;
26365 machine_mode mode;
26366 int size;
26367 int restore_a4 = FALSE;
26369 /* Compute the registers we need to pop. */
26370 regs_to_pop = 0;
26371 pops_needed = 0;
26373 if (reg_containing_return_addr == -1)
26375 regs_to_pop |= 1 << LR_REGNUM;
26376 ++pops_needed;
26379 if (TARGET_BACKTRACE)
26381 /* Restore the (ARM) frame pointer and stack pointer. */
26382 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26383 pops_needed += 2;
26386 /* If there is nothing to pop then just emit the BX instruction and
26387 return. */
26388 if (pops_needed == 0)
26390 if (crtl->calls_eh_return)
26391 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26393 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26394 return;
26396 /* Otherwise if we are not supporting interworking and we have not created
26397 a backtrace structure and the function was not entered in ARM mode then
26398 just pop the return address straight into the PC. */
26399 else if (!TARGET_INTERWORK
26400 && !TARGET_BACKTRACE
26401 && !is_called_in_ARM_mode (current_function_decl)
26402 && !crtl->calls_eh_return)
26404 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26405 return;
26408 /* Find out how many of the (return) argument registers we can corrupt. */
26409 regs_available_for_popping = 0;
26411 /* If returning via __builtin_eh_return, the bottom three registers
26412 all contain information needed for the return. */
26413 if (crtl->calls_eh_return)
26414 size = 12;
26415 else
26417 /* If we can deduce the registers used from the function's
26418 return value. This is more reliable that examining
26419 df_regs_ever_live_p () because that will be set if the register is
26420 ever used in the function, not just if the register is used
26421 to hold a return value. */
26423 if (crtl->return_rtx != 0)
26424 mode = GET_MODE (crtl->return_rtx);
26425 else
26426 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26428 size = GET_MODE_SIZE (mode);
26430 if (size == 0)
26432 /* In a void function we can use any argument register.
26433 In a function that returns a structure on the stack
26434 we can use the second and third argument registers. */
26435 if (mode == VOIDmode)
26436 regs_available_for_popping =
26437 (1 << ARG_REGISTER (1))
26438 | (1 << ARG_REGISTER (2))
26439 | (1 << ARG_REGISTER (3));
26440 else
26441 regs_available_for_popping =
26442 (1 << ARG_REGISTER (2))
26443 | (1 << ARG_REGISTER (3));
26445 else if (size <= 4)
26446 regs_available_for_popping =
26447 (1 << ARG_REGISTER (2))
26448 | (1 << ARG_REGISTER (3));
26449 else if (size <= 8)
26450 regs_available_for_popping =
26451 (1 << ARG_REGISTER (3));
26454 /* Match registers to be popped with registers into which we pop them. */
26455 for (available = regs_available_for_popping,
26456 required = regs_to_pop;
26457 required != 0 && available != 0;
26458 available &= ~(available & - available),
26459 required &= ~(required & - required))
26460 -- pops_needed;
26462 /* If we have any popping registers left over, remove them. */
26463 if (available > 0)
26464 regs_available_for_popping &= ~available;
26466 /* Otherwise if we need another popping register we can use
26467 the fourth argument register. */
26468 else if (pops_needed)
26470 /* If we have not found any free argument registers and
26471 reg a4 contains the return address, we must move it. */
26472 if (regs_available_for_popping == 0
26473 && reg_containing_return_addr == LAST_ARG_REGNUM)
26475 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26476 reg_containing_return_addr = LR_REGNUM;
26478 else if (size > 12)
26480 /* Register a4 is being used to hold part of the return value,
26481 but we have dire need of a free, low register. */
26482 restore_a4 = TRUE;
26484 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26487 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26489 /* The fourth argument register is available. */
26490 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26492 --pops_needed;
26496 /* Pop as many registers as we can. */
26497 thumb_pop (f, regs_available_for_popping);
26499 /* Process the registers we popped. */
26500 if (reg_containing_return_addr == -1)
26502 /* The return address was popped into the lowest numbered register. */
26503 regs_to_pop &= ~(1 << LR_REGNUM);
26505 reg_containing_return_addr =
26506 number_of_first_bit_set (regs_available_for_popping);
26508 /* Remove this register for the mask of available registers, so that
26509 the return address will not be corrupted by further pops. */
26510 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26513 /* If we popped other registers then handle them here. */
26514 if (regs_available_for_popping)
26516 int frame_pointer;
26518 /* Work out which register currently contains the frame pointer. */
26519 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26521 /* Move it into the correct place. */
26522 asm_fprintf (f, "\tmov\t%r, %r\n",
26523 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26525 /* (Temporarily) remove it from the mask of popped registers. */
26526 regs_available_for_popping &= ~(1 << frame_pointer);
26527 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26529 if (regs_available_for_popping)
26531 int stack_pointer;
26533 /* We popped the stack pointer as well,
26534 find the register that contains it. */
26535 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26537 /* Move it into the stack register. */
26538 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26540 /* At this point we have popped all necessary registers, so
26541 do not worry about restoring regs_available_for_popping
26542 to its correct value:
26544 assert (pops_needed == 0)
26545 assert (regs_available_for_popping == (1 << frame_pointer))
26546 assert (regs_to_pop == (1 << STACK_POINTER)) */
26548 else
26550 /* Since we have just move the popped value into the frame
26551 pointer, the popping register is available for reuse, and
26552 we know that we still have the stack pointer left to pop. */
26553 regs_available_for_popping |= (1 << frame_pointer);
26557 /* If we still have registers left on the stack, but we no longer have
26558 any registers into which we can pop them, then we must move the return
26559 address into the link register and make available the register that
26560 contained it. */
26561 if (regs_available_for_popping == 0 && pops_needed > 0)
26563 regs_available_for_popping |= 1 << reg_containing_return_addr;
26565 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26566 reg_containing_return_addr);
26568 reg_containing_return_addr = LR_REGNUM;
26571 /* If we have registers left on the stack then pop some more.
26572 We know that at most we will want to pop FP and SP. */
26573 if (pops_needed > 0)
26575 int popped_into;
26576 int move_to;
26578 thumb_pop (f, regs_available_for_popping);
26580 /* We have popped either FP or SP.
26581 Move whichever one it is into the correct register. */
26582 popped_into = number_of_first_bit_set (regs_available_for_popping);
26583 move_to = number_of_first_bit_set (regs_to_pop);
26585 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26587 regs_to_pop &= ~(1 << move_to);
26589 --pops_needed;
26592 /* If we still have not popped everything then we must have only
26593 had one register available to us and we are now popping the SP. */
26594 if (pops_needed > 0)
26596 int popped_into;
26598 thumb_pop (f, regs_available_for_popping);
26600 popped_into = number_of_first_bit_set (regs_available_for_popping);
26602 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26604 assert (regs_to_pop == (1 << STACK_POINTER))
26605 assert (pops_needed == 1)
26609 /* If necessary restore the a4 register. */
26610 if (restore_a4)
26612 if (reg_containing_return_addr != LR_REGNUM)
26614 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26615 reg_containing_return_addr = LR_REGNUM;
26618 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26621 if (crtl->calls_eh_return)
26622 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26624 /* Return to caller. */
26625 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26628 /* Scan INSN just before assembler is output for it.
26629 For Thumb-1, we track the status of the condition codes; this
26630 information is used in the cbranchsi4_insn pattern. */
26631 void
26632 thumb1_final_prescan_insn (rtx_insn *insn)
26634 if (flag_print_asm_name)
26635 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26636 INSN_ADDRESSES (INSN_UID (insn)));
26637 /* Don't overwrite the previous setter when we get to a cbranch. */
26638 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26640 enum attr_conds conds;
26642 if (cfun->machine->thumb1_cc_insn)
26644 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26645 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26646 CC_STATUS_INIT;
26648 conds = get_attr_conds (insn);
26649 if (conds == CONDS_SET)
26651 rtx set = single_set (insn);
26652 cfun->machine->thumb1_cc_insn = insn;
26653 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26654 cfun->machine->thumb1_cc_op1 = const0_rtx;
26655 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26656 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26658 rtx src1 = XEXP (SET_SRC (set), 1);
26659 if (src1 == const0_rtx)
26660 cfun->machine->thumb1_cc_mode = CCmode;
26662 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26664 /* Record the src register operand instead of dest because
26665 cprop_hardreg pass propagates src. */
26666 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26669 else if (conds != CONDS_NOCOND)
26670 cfun->machine->thumb1_cc_insn = NULL_RTX;
26673 /* Check if unexpected far jump is used. */
26674 if (cfun->machine->lr_save_eliminated
26675 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26676 internal_error("Unexpected thumb1 far jump");
26680 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26682 unsigned HOST_WIDE_INT mask = 0xff;
26683 int i;
26685 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26686 if (val == 0) /* XXX */
26687 return 0;
26689 for (i = 0; i < 25; i++)
26690 if ((val & (mask << i)) == val)
26691 return 1;
26693 return 0;
26696 /* Returns nonzero if the current function contains,
26697 or might contain a far jump. */
26698 static int
26699 thumb_far_jump_used_p (void)
26701 rtx_insn *insn;
26702 bool far_jump = false;
26703 unsigned int func_size = 0;
26705 /* This test is only important for leaf functions. */
26706 /* assert (!leaf_function_p ()); */
26708 /* If we have already decided that far jumps may be used,
26709 do not bother checking again, and always return true even if
26710 it turns out that they are not being used. Once we have made
26711 the decision that far jumps are present (and that hence the link
26712 register will be pushed onto the stack) we cannot go back on it. */
26713 if (cfun->machine->far_jump_used)
26714 return 1;
26716 /* If this function is not being called from the prologue/epilogue
26717 generation code then it must be being called from the
26718 INITIAL_ELIMINATION_OFFSET macro. */
26719 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26721 /* In this case we know that we are being asked about the elimination
26722 of the arg pointer register. If that register is not being used,
26723 then there are no arguments on the stack, and we do not have to
26724 worry that a far jump might force the prologue to push the link
26725 register, changing the stack offsets. In this case we can just
26726 return false, since the presence of far jumps in the function will
26727 not affect stack offsets.
26729 If the arg pointer is live (or if it was live, but has now been
26730 eliminated and so set to dead) then we do have to test to see if
26731 the function might contain a far jump. This test can lead to some
26732 false negatives, since before reload is completed, then length of
26733 branch instructions is not known, so gcc defaults to returning their
26734 longest length, which in turn sets the far jump attribute to true.
26736 A false negative will not result in bad code being generated, but it
26737 will result in a needless push and pop of the link register. We
26738 hope that this does not occur too often.
26740 If we need doubleword stack alignment this could affect the other
26741 elimination offsets so we can't risk getting it wrong. */
26742 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26743 cfun->machine->arg_pointer_live = 1;
26744 else if (!cfun->machine->arg_pointer_live)
26745 return 0;
26748 /* We should not change far_jump_used during or after reload, as there is
26749 no chance to change stack frame layout. */
26750 if (reload_in_progress || reload_completed)
26751 return 0;
26753 /* Check to see if the function contains a branch
26754 insn with the far jump attribute set. */
26755 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26757 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26759 far_jump = true;
26761 func_size += get_attr_length (insn);
26764 /* Attribute far_jump will always be true for thumb1 before
26765 shorten_branch pass. So checking far_jump attribute before
26766 shorten_branch isn't much useful.
26768 Following heuristic tries to estimate more accurately if a far jump
26769 may finally be used. The heuristic is very conservative as there is
26770 no chance to roll-back the decision of not to use far jump.
26772 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26773 2-byte insn is associated with a 4 byte constant pool. Using
26774 function size 2048/3 as the threshold is conservative enough. */
26775 if (far_jump)
26777 if ((func_size * 3) >= 2048)
26779 /* Record the fact that we have decided that
26780 the function does use far jumps. */
26781 cfun->machine->far_jump_used = 1;
26782 return 1;
26786 return 0;
26789 /* Return nonzero if FUNC must be entered in ARM mode. */
26791 is_called_in_ARM_mode (tree func)
26793 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26795 /* Ignore the problem about functions whose address is taken. */
26796 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26797 return TRUE;
26799 #ifdef ARM_PE
26800 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26801 #else
26802 return FALSE;
26803 #endif
26806 /* Given the stack offsets and register mask in OFFSETS, decide how
26807 many additional registers to push instead of subtracting a constant
26808 from SP. For epilogues the principle is the same except we use pop.
26809 FOR_PROLOGUE indicates which we're generating. */
26810 static int
26811 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26813 HOST_WIDE_INT amount;
26814 unsigned long live_regs_mask = offsets->saved_regs_mask;
26815 /* Extract a mask of the ones we can give to the Thumb's push/pop
26816 instruction. */
26817 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26818 /* Then count how many other high registers will need to be pushed. */
26819 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26820 int n_free, reg_base, size;
26822 if (!for_prologue && frame_pointer_needed)
26823 amount = offsets->locals_base - offsets->saved_regs;
26824 else
26825 amount = offsets->outgoing_args - offsets->saved_regs;
26827 /* If the stack frame size is 512 exactly, we can save one load
26828 instruction, which should make this a win even when optimizing
26829 for speed. */
26830 if (!optimize_size && amount != 512)
26831 return 0;
26833 /* Can't do this if there are high registers to push. */
26834 if (high_regs_pushed != 0)
26835 return 0;
26837 /* Shouldn't do it in the prologue if no registers would normally
26838 be pushed at all. In the epilogue, also allow it if we'll have
26839 a pop insn for the PC. */
26840 if (l_mask == 0
26841 && (for_prologue
26842 || TARGET_BACKTRACE
26843 || (live_regs_mask & 1 << LR_REGNUM) == 0
26844 || TARGET_INTERWORK
26845 || crtl->args.pretend_args_size != 0))
26846 return 0;
26848 /* Don't do this if thumb_expand_prologue wants to emit instructions
26849 between the push and the stack frame allocation. */
26850 if (for_prologue
26851 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26852 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26853 return 0;
26855 reg_base = 0;
26856 n_free = 0;
26857 if (!for_prologue)
26859 size = arm_size_return_regs ();
26860 reg_base = ARM_NUM_INTS (size);
26861 live_regs_mask >>= reg_base;
26864 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26865 && (for_prologue || call_used_regs[reg_base + n_free]))
26867 live_regs_mask >>= 1;
26868 n_free++;
26871 if (n_free == 0)
26872 return 0;
26873 gcc_assert (amount / 4 * 4 == amount);
26875 if (amount >= 512 && (amount - n_free * 4) < 512)
26876 return (amount - 508) / 4;
26877 if (amount <= n_free * 4)
26878 return amount / 4;
26879 return 0;
26882 /* The bits which aren't usefully expanded as rtl. */
26883 const char *
26884 thumb1_unexpanded_epilogue (void)
26886 arm_stack_offsets *offsets;
26887 int regno;
26888 unsigned long live_regs_mask = 0;
26889 int high_regs_pushed = 0;
26890 int extra_pop;
26891 int had_to_push_lr;
26892 int size;
26894 if (cfun->machine->return_used_this_function != 0)
26895 return "";
26897 if (IS_NAKED (arm_current_func_type ()))
26898 return "";
26900 offsets = arm_get_frame_offsets ();
26901 live_regs_mask = offsets->saved_regs_mask;
26902 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26904 /* If we can deduce the registers used from the function's return value.
26905 This is more reliable that examining df_regs_ever_live_p () because that
26906 will be set if the register is ever used in the function, not just if
26907 the register is used to hold a return value. */
26908 size = arm_size_return_regs ();
26910 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26911 if (extra_pop > 0)
26913 unsigned long extra_mask = (1 << extra_pop) - 1;
26914 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26917 /* The prolog may have pushed some high registers to use as
26918 work registers. e.g. the testsuite file:
26919 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26920 compiles to produce:
26921 push {r4, r5, r6, r7, lr}
26922 mov r7, r9
26923 mov r6, r8
26924 push {r6, r7}
26925 as part of the prolog. We have to undo that pushing here. */
26927 if (high_regs_pushed)
26929 unsigned long mask = live_regs_mask & 0xff;
26930 int next_hi_reg;
26932 /* The available low registers depend on the size of the value we are
26933 returning. */
26934 if (size <= 12)
26935 mask |= 1 << 3;
26936 if (size <= 8)
26937 mask |= 1 << 2;
26939 if (mask == 0)
26940 /* Oh dear! We have no low registers into which we can pop
26941 high registers! */
26942 internal_error
26943 ("no low registers available for popping high registers");
26945 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26946 if (live_regs_mask & (1 << next_hi_reg))
26947 break;
26949 while (high_regs_pushed)
26951 /* Find lo register(s) into which the high register(s) can
26952 be popped. */
26953 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26955 if (mask & (1 << regno))
26956 high_regs_pushed--;
26957 if (high_regs_pushed == 0)
26958 break;
26961 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26963 /* Pop the values into the low register(s). */
26964 thumb_pop (asm_out_file, mask);
26966 /* Move the value(s) into the high registers. */
26967 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26969 if (mask & (1 << regno))
26971 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26972 regno);
26974 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26975 if (live_regs_mask & (1 << next_hi_reg))
26976 break;
26980 live_regs_mask &= ~0x0f00;
26983 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26984 live_regs_mask &= 0xff;
26986 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26988 /* Pop the return address into the PC. */
26989 if (had_to_push_lr)
26990 live_regs_mask |= 1 << PC_REGNUM;
26992 /* Either no argument registers were pushed or a backtrace
26993 structure was created which includes an adjusted stack
26994 pointer, so just pop everything. */
26995 if (live_regs_mask)
26996 thumb_pop (asm_out_file, live_regs_mask);
26998 /* We have either just popped the return address into the
26999 PC or it is was kept in LR for the entire function.
27000 Note that thumb_pop has already called thumb_exit if the
27001 PC was in the list. */
27002 if (!had_to_push_lr)
27003 thumb_exit (asm_out_file, LR_REGNUM);
27005 else
27007 /* Pop everything but the return address. */
27008 if (live_regs_mask)
27009 thumb_pop (asm_out_file, live_regs_mask);
27011 if (had_to_push_lr)
27013 if (size > 12)
27015 /* We have no free low regs, so save one. */
27016 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27017 LAST_ARG_REGNUM);
27020 /* Get the return address into a temporary register. */
27021 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27023 if (size > 12)
27025 /* Move the return address to lr. */
27026 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27027 LAST_ARG_REGNUM);
27028 /* Restore the low register. */
27029 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27030 IP_REGNUM);
27031 regno = LR_REGNUM;
27033 else
27034 regno = LAST_ARG_REGNUM;
27036 else
27037 regno = LR_REGNUM;
27039 /* Remove the argument registers that were pushed onto the stack. */
27040 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27041 SP_REGNUM, SP_REGNUM,
27042 crtl->args.pretend_args_size);
27044 thumb_exit (asm_out_file, regno);
27047 return "";
27050 /* Functions to save and restore machine-specific function data. */
27051 static struct machine_function *
27052 arm_init_machine_status (void)
27054 struct machine_function *machine;
27055 machine = ggc_cleared_alloc<machine_function> ();
27057 #if ARM_FT_UNKNOWN != 0
27058 machine->func_type = ARM_FT_UNKNOWN;
27059 #endif
27060 return machine;
27063 /* Return an RTX indicating where the return address to the
27064 calling function can be found. */
27066 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27068 if (count != 0)
27069 return NULL_RTX;
27071 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27074 /* Do anything needed before RTL is emitted for each function. */
27075 void
27076 arm_init_expanders (void)
27078 /* Arrange to initialize and mark the machine per-function status. */
27079 init_machine_status = arm_init_machine_status;
27081 /* This is to stop the combine pass optimizing away the alignment
27082 adjustment of va_arg. */
27083 /* ??? It is claimed that this should not be necessary. */
27084 if (cfun)
27085 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27089 /* Like arm_compute_initial_elimination offset. Simpler because there
27090 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27091 to point at the base of the local variables after static stack
27092 space for a function has been allocated. */
27094 HOST_WIDE_INT
27095 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27097 arm_stack_offsets *offsets;
27099 offsets = arm_get_frame_offsets ();
27101 switch (from)
27103 case ARG_POINTER_REGNUM:
27104 switch (to)
27106 case STACK_POINTER_REGNUM:
27107 return offsets->outgoing_args - offsets->saved_args;
27109 case FRAME_POINTER_REGNUM:
27110 return offsets->soft_frame - offsets->saved_args;
27112 case ARM_HARD_FRAME_POINTER_REGNUM:
27113 return offsets->saved_regs - offsets->saved_args;
27115 case THUMB_HARD_FRAME_POINTER_REGNUM:
27116 return offsets->locals_base - offsets->saved_args;
27118 default:
27119 gcc_unreachable ();
27121 break;
27123 case FRAME_POINTER_REGNUM:
27124 switch (to)
27126 case STACK_POINTER_REGNUM:
27127 return offsets->outgoing_args - offsets->soft_frame;
27129 case ARM_HARD_FRAME_POINTER_REGNUM:
27130 return offsets->saved_regs - offsets->soft_frame;
27132 case THUMB_HARD_FRAME_POINTER_REGNUM:
27133 return offsets->locals_base - offsets->soft_frame;
27135 default:
27136 gcc_unreachable ();
27138 break;
27140 default:
27141 gcc_unreachable ();
27145 /* Generate the function's prologue. */
27147 void
27148 thumb1_expand_prologue (void)
27150 rtx_insn *insn;
27152 HOST_WIDE_INT amount;
27153 arm_stack_offsets *offsets;
27154 unsigned long func_type;
27155 int regno;
27156 unsigned long live_regs_mask;
27157 unsigned long l_mask;
27158 unsigned high_regs_pushed = 0;
27160 func_type = arm_current_func_type ();
27162 /* Naked functions don't have prologues. */
27163 if (IS_NAKED (func_type))
27164 return;
27166 if (IS_INTERRUPT (func_type))
27168 error ("interrupt Service Routines cannot be coded in Thumb mode");
27169 return;
27172 if (is_called_in_ARM_mode (current_function_decl))
27173 emit_insn (gen_prologue_thumb1_interwork ());
27175 offsets = arm_get_frame_offsets ();
27176 live_regs_mask = offsets->saved_regs_mask;
27178 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27179 l_mask = live_regs_mask & 0x40ff;
27180 /* Then count how many other high registers will need to be pushed. */
27181 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27183 if (crtl->args.pretend_args_size)
27185 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27187 if (cfun->machine->uses_anonymous_args)
27189 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27190 unsigned long mask;
27192 mask = 1ul << (LAST_ARG_REGNUM + 1);
27193 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27195 insn = thumb1_emit_multi_reg_push (mask, 0);
27197 else
27199 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27200 stack_pointer_rtx, x));
27202 RTX_FRAME_RELATED_P (insn) = 1;
27205 if (TARGET_BACKTRACE)
27207 HOST_WIDE_INT offset = 0;
27208 unsigned work_register;
27209 rtx work_reg, x, arm_hfp_rtx;
27211 /* We have been asked to create a stack backtrace structure.
27212 The code looks like this:
27214 0 .align 2
27215 0 func:
27216 0 sub SP, #16 Reserve space for 4 registers.
27217 2 push {R7} Push low registers.
27218 4 add R7, SP, #20 Get the stack pointer before the push.
27219 6 str R7, [SP, #8] Store the stack pointer
27220 (before reserving the space).
27221 8 mov R7, PC Get hold of the start of this code + 12.
27222 10 str R7, [SP, #16] Store it.
27223 12 mov R7, FP Get hold of the current frame pointer.
27224 14 str R7, [SP, #4] Store it.
27225 16 mov R7, LR Get hold of the current return address.
27226 18 str R7, [SP, #12] Store it.
27227 20 add R7, SP, #16 Point at the start of the
27228 backtrace structure.
27229 22 mov FP, R7 Put this value into the frame pointer. */
27231 work_register = thumb_find_work_register (live_regs_mask);
27232 work_reg = gen_rtx_REG (SImode, work_register);
27233 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27235 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27236 stack_pointer_rtx, GEN_INT (-16)));
27237 RTX_FRAME_RELATED_P (insn) = 1;
27239 if (l_mask)
27241 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27242 RTX_FRAME_RELATED_P (insn) = 1;
27244 offset = bit_count (l_mask) * UNITS_PER_WORD;
27247 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27248 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27250 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27251 x = gen_frame_mem (SImode, x);
27252 emit_move_insn (x, work_reg);
27254 /* Make sure that the instruction fetching the PC is in the right place
27255 to calculate "start of backtrace creation code + 12". */
27256 /* ??? The stores using the common WORK_REG ought to be enough to
27257 prevent the scheduler from doing anything weird. Failing that
27258 we could always move all of the following into an UNSPEC_VOLATILE. */
27259 if (l_mask)
27261 x = gen_rtx_REG (SImode, PC_REGNUM);
27262 emit_move_insn (work_reg, x);
27264 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27265 x = gen_frame_mem (SImode, x);
27266 emit_move_insn (x, work_reg);
27268 emit_move_insn (work_reg, arm_hfp_rtx);
27270 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27271 x = gen_frame_mem (SImode, x);
27272 emit_move_insn (x, work_reg);
27274 else
27276 emit_move_insn (work_reg, arm_hfp_rtx);
27278 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27279 x = gen_frame_mem (SImode, x);
27280 emit_move_insn (x, work_reg);
27282 x = gen_rtx_REG (SImode, PC_REGNUM);
27283 emit_move_insn (work_reg, x);
27285 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27286 x = gen_frame_mem (SImode, x);
27287 emit_move_insn (x, work_reg);
27290 x = gen_rtx_REG (SImode, LR_REGNUM);
27291 emit_move_insn (work_reg, x);
27293 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27294 x = gen_frame_mem (SImode, x);
27295 emit_move_insn (x, work_reg);
27297 x = GEN_INT (offset + 12);
27298 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27300 emit_move_insn (arm_hfp_rtx, work_reg);
27302 /* Optimization: If we are not pushing any low registers but we are going
27303 to push some high registers then delay our first push. This will just
27304 be a push of LR and we can combine it with the push of the first high
27305 register. */
27306 else if ((l_mask & 0xff) != 0
27307 || (high_regs_pushed == 0 && l_mask))
27309 unsigned long mask = l_mask;
27310 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27311 insn = thumb1_emit_multi_reg_push (mask, mask);
27312 RTX_FRAME_RELATED_P (insn) = 1;
27315 if (high_regs_pushed)
27317 unsigned pushable_regs;
27318 unsigned next_hi_reg;
27319 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27320 : crtl->args.info.nregs;
27321 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27323 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27324 if (live_regs_mask & (1 << next_hi_reg))
27325 break;
27327 /* Here we need to mask out registers used for passing arguments
27328 even if they can be pushed. This is to avoid using them to stash the high
27329 registers. Such kind of stash may clobber the use of arguments. */
27330 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27332 if (pushable_regs == 0)
27333 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27335 while (high_regs_pushed > 0)
27337 unsigned long real_regs_mask = 0;
27339 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27341 if (pushable_regs & (1 << regno))
27343 emit_move_insn (gen_rtx_REG (SImode, regno),
27344 gen_rtx_REG (SImode, next_hi_reg));
27346 high_regs_pushed --;
27347 real_regs_mask |= (1 << next_hi_reg);
27349 if (high_regs_pushed)
27351 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27352 next_hi_reg --)
27353 if (live_regs_mask & (1 << next_hi_reg))
27354 break;
27356 else
27358 pushable_regs &= ~((1 << regno) - 1);
27359 break;
27364 /* If we had to find a work register and we have not yet
27365 saved the LR then add it to the list of regs to push. */
27366 if (l_mask == (1 << LR_REGNUM))
27368 pushable_regs |= l_mask;
27369 real_regs_mask |= l_mask;
27370 l_mask = 0;
27373 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27374 RTX_FRAME_RELATED_P (insn) = 1;
27378 /* Load the pic register before setting the frame pointer,
27379 so we can use r7 as a temporary work register. */
27380 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27381 arm_load_pic_register (live_regs_mask);
27383 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27384 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27385 stack_pointer_rtx);
27387 if (flag_stack_usage_info)
27388 current_function_static_stack_size
27389 = offsets->outgoing_args - offsets->saved_args;
27391 amount = offsets->outgoing_args - offsets->saved_regs;
27392 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27393 if (amount)
27395 if (amount < 512)
27397 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27398 GEN_INT (- amount)));
27399 RTX_FRAME_RELATED_P (insn) = 1;
27401 else
27403 rtx reg, dwarf;
27405 /* The stack decrement is too big for an immediate value in a single
27406 insn. In theory we could issue multiple subtracts, but after
27407 three of them it becomes more space efficient to place the full
27408 value in the constant pool and load into a register. (Also the
27409 ARM debugger really likes to see only one stack decrement per
27410 function). So instead we look for a scratch register into which
27411 we can load the decrement, and then we subtract this from the
27412 stack pointer. Unfortunately on the thumb the only available
27413 scratch registers are the argument registers, and we cannot use
27414 these as they may hold arguments to the function. Instead we
27415 attempt to locate a call preserved register which is used by this
27416 function. If we can find one, then we know that it will have
27417 been pushed at the start of the prologue and so we can corrupt
27418 it now. */
27419 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27420 if (live_regs_mask & (1 << regno))
27421 break;
27423 gcc_assert(regno <= LAST_LO_REGNUM);
27425 reg = gen_rtx_REG (SImode, regno);
27427 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27429 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27430 stack_pointer_rtx, reg));
27432 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27433 plus_constant (Pmode, stack_pointer_rtx,
27434 -amount));
27435 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27436 RTX_FRAME_RELATED_P (insn) = 1;
27440 if (frame_pointer_needed)
27441 thumb_set_frame_pointer (offsets);
27443 /* If we are profiling, make sure no instructions are scheduled before
27444 the call to mcount. Similarly if the user has requested no
27445 scheduling in the prolog. Similarly if we want non-call exceptions
27446 using the EABI unwinder, to prevent faulting instructions from being
27447 swapped with a stack adjustment. */
27448 if (crtl->profile || !TARGET_SCHED_PROLOG
27449 || (arm_except_unwind_info (&global_options) == UI_TARGET
27450 && cfun->can_throw_non_call_exceptions))
27451 emit_insn (gen_blockage ());
27453 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27454 if (live_regs_mask & 0xff)
27455 cfun->machine->lr_save_eliminated = 0;
27458 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27459 POP instruction can be generated. LR should be replaced by PC. All
27460 the checks required are already done by USE_RETURN_INSN (). Hence,
27461 all we really need to check here is if single register is to be
27462 returned, or multiple register return. */
27463 void
27464 thumb2_expand_return (bool simple_return)
27466 int i, num_regs;
27467 unsigned long saved_regs_mask;
27468 arm_stack_offsets *offsets;
27470 offsets = arm_get_frame_offsets ();
27471 saved_regs_mask = offsets->saved_regs_mask;
27473 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27474 if (saved_regs_mask & (1 << i))
27475 num_regs++;
27477 if (!simple_return && saved_regs_mask)
27479 if (num_regs == 1)
27481 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27482 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27483 rtx addr = gen_rtx_MEM (SImode,
27484 gen_rtx_POST_INC (SImode,
27485 stack_pointer_rtx));
27486 set_mem_alias_set (addr, get_frame_alias_set ());
27487 XVECEXP (par, 0, 0) = ret_rtx;
27488 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27489 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27490 emit_jump_insn (par);
27492 else
27494 saved_regs_mask &= ~ (1 << LR_REGNUM);
27495 saved_regs_mask |= (1 << PC_REGNUM);
27496 arm_emit_multi_reg_pop (saved_regs_mask);
27499 else
27501 emit_jump_insn (simple_return_rtx);
27505 void
27506 thumb1_expand_epilogue (void)
27508 HOST_WIDE_INT amount;
27509 arm_stack_offsets *offsets;
27510 int regno;
27512 /* Naked functions don't have prologues. */
27513 if (IS_NAKED (arm_current_func_type ()))
27514 return;
27516 offsets = arm_get_frame_offsets ();
27517 amount = offsets->outgoing_args - offsets->saved_regs;
27519 if (frame_pointer_needed)
27521 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27522 amount = offsets->locals_base - offsets->saved_regs;
27524 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27526 gcc_assert (amount >= 0);
27527 if (amount)
27529 emit_insn (gen_blockage ());
27531 if (amount < 512)
27532 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27533 GEN_INT (amount)));
27534 else
27536 /* r3 is always free in the epilogue. */
27537 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27539 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27540 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27544 /* Emit a USE (stack_pointer_rtx), so that
27545 the stack adjustment will not be deleted. */
27546 emit_insn (gen_force_register_use (stack_pointer_rtx));
27548 if (crtl->profile || !TARGET_SCHED_PROLOG)
27549 emit_insn (gen_blockage ());
27551 /* Emit a clobber for each insn that will be restored in the epilogue,
27552 so that flow2 will get register lifetimes correct. */
27553 for (regno = 0; regno < 13; regno++)
27554 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27555 emit_clobber (gen_rtx_REG (SImode, regno));
27557 if (! df_regs_ever_live_p (LR_REGNUM))
27558 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27561 /* Epilogue code for APCS frame. */
27562 static void
27563 arm_expand_epilogue_apcs_frame (bool really_return)
27565 unsigned long func_type;
27566 unsigned long saved_regs_mask;
27567 int num_regs = 0;
27568 int i;
27569 int floats_from_frame = 0;
27570 arm_stack_offsets *offsets;
27572 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27573 func_type = arm_current_func_type ();
27575 /* Get frame offsets for ARM. */
27576 offsets = arm_get_frame_offsets ();
27577 saved_regs_mask = offsets->saved_regs_mask;
27579 /* Find the offset of the floating-point save area in the frame. */
27580 floats_from_frame
27581 = (offsets->saved_args
27582 + arm_compute_static_chain_stack_bytes ()
27583 - offsets->frame);
27585 /* Compute how many core registers saved and how far away the floats are. */
27586 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27587 if (saved_regs_mask & (1 << i))
27589 num_regs++;
27590 floats_from_frame += 4;
27593 if (TARGET_HARD_FLOAT && TARGET_VFP)
27595 int start_reg;
27596 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27598 /* The offset is from IP_REGNUM. */
27599 int saved_size = arm_get_vfp_saved_size ();
27600 if (saved_size > 0)
27602 rtx_insn *insn;
27603 floats_from_frame += saved_size;
27604 insn = emit_insn (gen_addsi3 (ip_rtx,
27605 hard_frame_pointer_rtx,
27606 GEN_INT (-floats_from_frame)));
27607 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27608 ip_rtx, hard_frame_pointer_rtx);
27611 /* Generate VFP register multi-pop. */
27612 start_reg = FIRST_VFP_REGNUM;
27614 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27615 /* Look for a case where a reg does not need restoring. */
27616 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27617 && (!df_regs_ever_live_p (i + 1)
27618 || call_used_regs[i + 1]))
27620 if (start_reg != i)
27621 arm_emit_vfp_multi_reg_pop (start_reg,
27622 (i - start_reg) / 2,
27623 gen_rtx_REG (SImode,
27624 IP_REGNUM));
27625 start_reg = i + 2;
27628 /* Restore the remaining regs that we have discovered (or possibly
27629 even all of them, if the conditional in the for loop never
27630 fired). */
27631 if (start_reg != i)
27632 arm_emit_vfp_multi_reg_pop (start_reg,
27633 (i - start_reg) / 2,
27634 gen_rtx_REG (SImode, IP_REGNUM));
27637 if (TARGET_IWMMXT)
27639 /* The frame pointer is guaranteed to be non-double-word aligned, as
27640 it is set to double-word-aligned old_stack_pointer - 4. */
27641 rtx_insn *insn;
27642 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27644 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27645 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27647 rtx addr = gen_frame_mem (V2SImode,
27648 plus_constant (Pmode, hard_frame_pointer_rtx,
27649 - lrm_count * 4));
27650 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27651 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27652 gen_rtx_REG (V2SImode, i),
27653 NULL_RTX);
27654 lrm_count += 2;
27658 /* saved_regs_mask should contain IP which contains old stack pointer
27659 at the time of activation creation. Since SP and IP are adjacent registers,
27660 we can restore the value directly into SP. */
27661 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27662 saved_regs_mask &= ~(1 << IP_REGNUM);
27663 saved_regs_mask |= (1 << SP_REGNUM);
27665 /* There are two registers left in saved_regs_mask - LR and PC. We
27666 only need to restore LR (the return address), but to
27667 save time we can load it directly into PC, unless we need a
27668 special function exit sequence, or we are not really returning. */
27669 if (really_return
27670 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27671 && !crtl->calls_eh_return)
27672 /* Delete LR from the register mask, so that LR on
27673 the stack is loaded into the PC in the register mask. */
27674 saved_regs_mask &= ~(1 << LR_REGNUM);
27675 else
27676 saved_regs_mask &= ~(1 << PC_REGNUM);
27678 num_regs = bit_count (saved_regs_mask);
27679 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27681 rtx_insn *insn;
27682 emit_insn (gen_blockage ());
27683 /* Unwind the stack to just below the saved registers. */
27684 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27685 hard_frame_pointer_rtx,
27686 GEN_INT (- 4 * num_regs)));
27688 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27689 stack_pointer_rtx, hard_frame_pointer_rtx);
27692 arm_emit_multi_reg_pop (saved_regs_mask);
27694 if (IS_INTERRUPT (func_type))
27696 /* Interrupt handlers will have pushed the
27697 IP onto the stack, so restore it now. */
27698 rtx_insn *insn;
27699 rtx addr = gen_rtx_MEM (SImode,
27700 gen_rtx_POST_INC (SImode,
27701 stack_pointer_rtx));
27702 set_mem_alias_set (addr, get_frame_alias_set ());
27703 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27704 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27705 gen_rtx_REG (SImode, IP_REGNUM),
27706 NULL_RTX);
27709 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27710 return;
27712 if (crtl->calls_eh_return)
27713 emit_insn (gen_addsi3 (stack_pointer_rtx,
27714 stack_pointer_rtx,
27715 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27717 if (IS_STACKALIGN (func_type))
27718 /* Restore the original stack pointer. Before prologue, the stack was
27719 realigned and the original stack pointer saved in r0. For details,
27720 see comment in arm_expand_prologue. */
27721 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27723 emit_jump_insn (simple_return_rtx);
27726 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27727 function is not a sibcall. */
27728 void
27729 arm_expand_epilogue (bool really_return)
27731 unsigned long func_type;
27732 unsigned long saved_regs_mask;
27733 int num_regs = 0;
27734 int i;
27735 int amount;
27736 arm_stack_offsets *offsets;
27738 func_type = arm_current_func_type ();
27740 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27741 let output_return_instruction take care of instruction emission if any. */
27742 if (IS_NAKED (func_type)
27743 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27745 if (really_return)
27746 emit_jump_insn (simple_return_rtx);
27747 return;
27750 /* If we are throwing an exception, then we really must be doing a
27751 return, so we can't tail-call. */
27752 gcc_assert (!crtl->calls_eh_return || really_return);
27754 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27756 arm_expand_epilogue_apcs_frame (really_return);
27757 return;
27760 /* Get frame offsets for ARM. */
27761 offsets = arm_get_frame_offsets ();
27762 saved_regs_mask = offsets->saved_regs_mask;
27763 num_regs = bit_count (saved_regs_mask);
27765 if (frame_pointer_needed)
27767 rtx_insn *insn;
27768 /* Restore stack pointer if necessary. */
27769 if (TARGET_ARM)
27771 /* In ARM mode, frame pointer points to first saved register.
27772 Restore stack pointer to last saved register. */
27773 amount = offsets->frame - offsets->saved_regs;
27775 /* Force out any pending memory operations that reference stacked data
27776 before stack de-allocation occurs. */
27777 emit_insn (gen_blockage ());
27778 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27779 hard_frame_pointer_rtx,
27780 GEN_INT (amount)));
27781 arm_add_cfa_adjust_cfa_note (insn, amount,
27782 stack_pointer_rtx,
27783 hard_frame_pointer_rtx);
27785 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27786 deleted. */
27787 emit_insn (gen_force_register_use (stack_pointer_rtx));
27789 else
27791 /* In Thumb-2 mode, the frame pointer points to the last saved
27792 register. */
27793 amount = offsets->locals_base - offsets->saved_regs;
27794 if (amount)
27796 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27797 hard_frame_pointer_rtx,
27798 GEN_INT (amount)));
27799 arm_add_cfa_adjust_cfa_note (insn, amount,
27800 hard_frame_pointer_rtx,
27801 hard_frame_pointer_rtx);
27804 /* Force out any pending memory operations that reference stacked data
27805 before stack de-allocation occurs. */
27806 emit_insn (gen_blockage ());
27807 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27808 hard_frame_pointer_rtx));
27809 arm_add_cfa_adjust_cfa_note (insn, 0,
27810 stack_pointer_rtx,
27811 hard_frame_pointer_rtx);
27812 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27813 deleted. */
27814 emit_insn (gen_force_register_use (stack_pointer_rtx));
27817 else
27819 /* Pop off outgoing args and local frame to adjust stack pointer to
27820 last saved register. */
27821 amount = offsets->outgoing_args - offsets->saved_regs;
27822 if (amount)
27824 rtx_insn *tmp;
27825 /* Force out any pending memory operations that reference stacked data
27826 before stack de-allocation occurs. */
27827 emit_insn (gen_blockage ());
27828 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27829 stack_pointer_rtx,
27830 GEN_INT (amount)));
27831 arm_add_cfa_adjust_cfa_note (tmp, amount,
27832 stack_pointer_rtx, stack_pointer_rtx);
27833 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27834 not deleted. */
27835 emit_insn (gen_force_register_use (stack_pointer_rtx));
27839 if (TARGET_HARD_FLOAT && TARGET_VFP)
27841 /* Generate VFP register multi-pop. */
27842 int end_reg = LAST_VFP_REGNUM + 1;
27844 /* Scan the registers in reverse order. We need to match
27845 any groupings made in the prologue and generate matching
27846 vldm operations. The need to match groups is because,
27847 unlike pop, vldm can only do consecutive regs. */
27848 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27849 /* Look for a case where a reg does not need restoring. */
27850 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27851 && (!df_regs_ever_live_p (i + 1)
27852 || call_used_regs[i + 1]))
27854 /* Restore the regs discovered so far (from reg+2 to
27855 end_reg). */
27856 if (end_reg > i + 2)
27857 arm_emit_vfp_multi_reg_pop (i + 2,
27858 (end_reg - (i + 2)) / 2,
27859 stack_pointer_rtx);
27860 end_reg = i;
27863 /* Restore the remaining regs that we have discovered (or possibly
27864 even all of them, if the conditional in the for loop never
27865 fired). */
27866 if (end_reg > i + 2)
27867 arm_emit_vfp_multi_reg_pop (i + 2,
27868 (end_reg - (i + 2)) / 2,
27869 stack_pointer_rtx);
27872 if (TARGET_IWMMXT)
27873 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27874 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27876 rtx_insn *insn;
27877 rtx addr = gen_rtx_MEM (V2SImode,
27878 gen_rtx_POST_INC (SImode,
27879 stack_pointer_rtx));
27880 set_mem_alias_set (addr, get_frame_alias_set ());
27881 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27882 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27883 gen_rtx_REG (V2SImode, i),
27884 NULL_RTX);
27885 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27886 stack_pointer_rtx, stack_pointer_rtx);
27889 if (saved_regs_mask)
27891 rtx insn;
27892 bool return_in_pc = false;
27894 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27895 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27896 && !IS_STACKALIGN (func_type)
27897 && really_return
27898 && crtl->args.pretend_args_size == 0
27899 && saved_regs_mask & (1 << LR_REGNUM)
27900 && !crtl->calls_eh_return)
27902 saved_regs_mask &= ~(1 << LR_REGNUM);
27903 saved_regs_mask |= (1 << PC_REGNUM);
27904 return_in_pc = true;
27907 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27909 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27910 if (saved_regs_mask & (1 << i))
27912 rtx addr = gen_rtx_MEM (SImode,
27913 gen_rtx_POST_INC (SImode,
27914 stack_pointer_rtx));
27915 set_mem_alias_set (addr, get_frame_alias_set ());
27917 if (i == PC_REGNUM)
27919 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27920 XVECEXP (insn, 0, 0) = ret_rtx;
27921 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27922 gen_rtx_REG (SImode, i),
27923 addr);
27924 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27925 insn = emit_jump_insn (insn);
27927 else
27929 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27930 addr));
27931 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27932 gen_rtx_REG (SImode, i),
27933 NULL_RTX);
27934 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27935 stack_pointer_rtx,
27936 stack_pointer_rtx);
27940 else
27942 if (TARGET_LDRD
27943 && current_tune->prefer_ldrd_strd
27944 && !optimize_function_for_size_p (cfun))
27946 if (TARGET_THUMB2)
27947 thumb2_emit_ldrd_pop (saved_regs_mask);
27948 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27949 arm_emit_ldrd_pop (saved_regs_mask);
27950 else
27951 arm_emit_multi_reg_pop (saved_regs_mask);
27953 else
27954 arm_emit_multi_reg_pop (saved_regs_mask);
27957 if (return_in_pc == true)
27958 return;
27961 if (crtl->args.pretend_args_size)
27963 int i, j;
27964 rtx dwarf = NULL_RTX;
27965 rtx_insn *tmp =
27966 emit_insn (gen_addsi3 (stack_pointer_rtx,
27967 stack_pointer_rtx,
27968 GEN_INT (crtl->args.pretend_args_size)));
27970 RTX_FRAME_RELATED_P (tmp) = 1;
27972 if (cfun->machine->uses_anonymous_args)
27974 /* Restore pretend args. Refer arm_expand_prologue on how to save
27975 pretend_args in stack. */
27976 int num_regs = crtl->args.pretend_args_size / 4;
27977 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27978 for (j = 0, i = 0; j < num_regs; i++)
27979 if (saved_regs_mask & (1 << i))
27981 rtx reg = gen_rtx_REG (SImode, i);
27982 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27983 j++;
27985 REG_NOTES (tmp) = dwarf;
27987 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27988 stack_pointer_rtx, stack_pointer_rtx);
27991 if (!really_return)
27992 return;
27994 if (crtl->calls_eh_return)
27995 emit_insn (gen_addsi3 (stack_pointer_rtx,
27996 stack_pointer_rtx,
27997 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27999 if (IS_STACKALIGN (func_type))
28000 /* Restore the original stack pointer. Before prologue, the stack was
28001 realigned and the original stack pointer saved in r0. For details,
28002 see comment in arm_expand_prologue. */
28003 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
28005 emit_jump_insn (simple_return_rtx);
28008 /* Implementation of insn prologue_thumb1_interwork. This is the first
28009 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28011 const char *
28012 thumb1_output_interwork (void)
28014 const char * name;
28015 FILE *f = asm_out_file;
28017 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28018 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28019 == SYMBOL_REF);
28020 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28022 /* Generate code sequence to switch us into Thumb mode. */
28023 /* The .code 32 directive has already been emitted by
28024 ASM_DECLARE_FUNCTION_NAME. */
28025 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28026 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28028 /* Generate a label, so that the debugger will notice the
28029 change in instruction sets. This label is also used by
28030 the assembler to bypass the ARM code when this function
28031 is called from a Thumb encoded function elsewhere in the
28032 same file. Hence the definition of STUB_NAME here must
28033 agree with the definition in gas/config/tc-arm.c. */
28035 #define STUB_NAME ".real_start_of"
28037 fprintf (f, "\t.code\t16\n");
28038 #ifdef ARM_PE
28039 if (arm_dllexport_name_p (name))
28040 name = arm_strip_name_encoding (name);
28041 #endif
28042 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28043 fprintf (f, "\t.thumb_func\n");
28044 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28046 return "";
28049 /* Handle the case of a double word load into a low register from
28050 a computed memory address. The computed address may involve a
28051 register which is overwritten by the load. */
28052 const char *
28053 thumb_load_double_from_address (rtx *operands)
28055 rtx addr;
28056 rtx base;
28057 rtx offset;
28058 rtx arg1;
28059 rtx arg2;
28061 gcc_assert (REG_P (operands[0]));
28062 gcc_assert (MEM_P (operands[1]));
28064 /* Get the memory address. */
28065 addr = XEXP (operands[1], 0);
28067 /* Work out how the memory address is computed. */
28068 switch (GET_CODE (addr))
28070 case REG:
28071 operands[2] = adjust_address (operands[1], SImode, 4);
28073 if (REGNO (operands[0]) == REGNO (addr))
28075 output_asm_insn ("ldr\t%H0, %2", operands);
28076 output_asm_insn ("ldr\t%0, %1", operands);
28078 else
28080 output_asm_insn ("ldr\t%0, %1", operands);
28081 output_asm_insn ("ldr\t%H0, %2", operands);
28083 break;
28085 case CONST:
28086 /* Compute <address> + 4 for the high order load. */
28087 operands[2] = adjust_address (operands[1], SImode, 4);
28089 output_asm_insn ("ldr\t%0, %1", operands);
28090 output_asm_insn ("ldr\t%H0, %2", operands);
28091 break;
28093 case PLUS:
28094 arg1 = XEXP (addr, 0);
28095 arg2 = XEXP (addr, 1);
28097 if (CONSTANT_P (arg1))
28098 base = arg2, offset = arg1;
28099 else
28100 base = arg1, offset = arg2;
28102 gcc_assert (REG_P (base));
28104 /* Catch the case of <address> = <reg> + <reg> */
28105 if (REG_P (offset))
28107 int reg_offset = REGNO (offset);
28108 int reg_base = REGNO (base);
28109 int reg_dest = REGNO (operands[0]);
28111 /* Add the base and offset registers together into the
28112 higher destination register. */
28113 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28114 reg_dest + 1, reg_base, reg_offset);
28116 /* Load the lower destination register from the address in
28117 the higher destination register. */
28118 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28119 reg_dest, reg_dest + 1);
28121 /* Load the higher destination register from its own address
28122 plus 4. */
28123 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28124 reg_dest + 1, reg_dest + 1);
28126 else
28128 /* Compute <address> + 4 for the high order load. */
28129 operands[2] = adjust_address (operands[1], SImode, 4);
28131 /* If the computed address is held in the low order register
28132 then load the high order register first, otherwise always
28133 load the low order register first. */
28134 if (REGNO (operands[0]) == REGNO (base))
28136 output_asm_insn ("ldr\t%H0, %2", operands);
28137 output_asm_insn ("ldr\t%0, %1", operands);
28139 else
28141 output_asm_insn ("ldr\t%0, %1", operands);
28142 output_asm_insn ("ldr\t%H0, %2", operands);
28145 break;
28147 case LABEL_REF:
28148 /* With no registers to worry about we can just load the value
28149 directly. */
28150 operands[2] = adjust_address (operands[1], SImode, 4);
28152 output_asm_insn ("ldr\t%H0, %2", operands);
28153 output_asm_insn ("ldr\t%0, %1", operands);
28154 break;
28156 default:
28157 gcc_unreachable ();
28160 return "";
28163 const char *
28164 thumb_output_move_mem_multiple (int n, rtx *operands)
28166 rtx tmp;
28168 switch (n)
28170 case 2:
28171 if (REGNO (operands[4]) > REGNO (operands[5]))
28173 tmp = operands[4];
28174 operands[4] = operands[5];
28175 operands[5] = tmp;
28177 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28178 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28179 break;
28181 case 3:
28182 if (REGNO (operands[4]) > REGNO (operands[5]))
28184 tmp = operands[4];
28185 operands[4] = operands[5];
28186 operands[5] = tmp;
28188 if (REGNO (operands[5]) > REGNO (operands[6]))
28190 tmp = operands[5];
28191 operands[5] = operands[6];
28192 operands[6] = tmp;
28194 if (REGNO (operands[4]) > REGNO (operands[5]))
28196 tmp = operands[4];
28197 operands[4] = operands[5];
28198 operands[5] = tmp;
28201 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28202 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28203 break;
28205 default:
28206 gcc_unreachable ();
28209 return "";
28212 /* Output a call-via instruction for thumb state. */
28213 const char *
28214 thumb_call_via_reg (rtx reg)
28216 int regno = REGNO (reg);
28217 rtx *labelp;
28219 gcc_assert (regno < LR_REGNUM);
28221 /* If we are in the normal text section we can use a single instance
28222 per compilation unit. If we are doing function sections, then we need
28223 an entry per section, since we can't rely on reachability. */
28224 if (in_section == text_section)
28226 thumb_call_reg_needed = 1;
28228 if (thumb_call_via_label[regno] == NULL)
28229 thumb_call_via_label[regno] = gen_label_rtx ();
28230 labelp = thumb_call_via_label + regno;
28232 else
28234 if (cfun->machine->call_via[regno] == NULL)
28235 cfun->machine->call_via[regno] = gen_label_rtx ();
28236 labelp = cfun->machine->call_via + regno;
28239 output_asm_insn ("bl\t%a0", labelp);
28240 return "";
28243 /* Routines for generating rtl. */
28244 void
28245 thumb_expand_movmemqi (rtx *operands)
28247 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28248 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28249 HOST_WIDE_INT len = INTVAL (operands[2]);
28250 HOST_WIDE_INT offset = 0;
28252 while (len >= 12)
28254 emit_insn (gen_movmem12b (out, in, out, in));
28255 len -= 12;
28258 if (len >= 8)
28260 emit_insn (gen_movmem8b (out, in, out, in));
28261 len -= 8;
28264 if (len >= 4)
28266 rtx reg = gen_reg_rtx (SImode);
28267 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28268 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28269 len -= 4;
28270 offset += 4;
28273 if (len >= 2)
28275 rtx reg = gen_reg_rtx (HImode);
28276 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28277 plus_constant (Pmode, in,
28278 offset))));
28279 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28280 offset)),
28281 reg));
28282 len -= 2;
28283 offset += 2;
28286 if (len)
28288 rtx reg = gen_reg_rtx (QImode);
28289 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28290 plus_constant (Pmode, in,
28291 offset))));
28292 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28293 offset)),
28294 reg));
28298 void
28299 thumb_reload_out_hi (rtx *operands)
28301 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28304 /* Handle reading a half-word from memory during reload. */
28305 void
28306 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28308 gcc_unreachable ();
28311 /* Return the length of a function name prefix
28312 that starts with the character 'c'. */
28313 static int
28314 arm_get_strip_length (int c)
28316 switch (c)
28318 ARM_NAME_ENCODING_LENGTHS
28319 default: return 0;
28323 /* Return a pointer to a function's name with any
28324 and all prefix encodings stripped from it. */
28325 const char *
28326 arm_strip_name_encoding (const char *name)
28328 int skip;
28330 while ((skip = arm_get_strip_length (* name)))
28331 name += skip;
28333 return name;
28336 /* If there is a '*' anywhere in the name's prefix, then
28337 emit the stripped name verbatim, otherwise prepend an
28338 underscore if leading underscores are being used. */
28339 void
28340 arm_asm_output_labelref (FILE *stream, const char *name)
28342 int skip;
28343 int verbatim = 0;
28345 while ((skip = arm_get_strip_length (* name)))
28347 verbatim |= (*name == '*');
28348 name += skip;
28351 if (verbatim)
28352 fputs (name, stream);
28353 else
28354 asm_fprintf (stream, "%U%s", name);
28357 /* This function is used to emit an EABI tag and its associated value.
28358 We emit the numerical value of the tag in case the assembler does not
28359 support textual tags. (Eg gas prior to 2.20). If requested we include
28360 the tag name in a comment so that anyone reading the assembler output
28361 will know which tag is being set.
28363 This function is not static because arm-c.c needs it too. */
28365 void
28366 arm_emit_eabi_attribute (const char *name, int num, int val)
28368 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28369 if (flag_verbose_asm || flag_debug_asm)
28370 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28371 asm_fprintf (asm_out_file, "\n");
28374 static void
28375 arm_file_start (void)
28377 int val;
28379 if (TARGET_UNIFIED_ASM)
28380 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28382 if (TARGET_BPABI)
28384 const char *fpu_name;
28385 if (arm_selected_arch)
28387 /* armv7ve doesn't support any extensions. */
28388 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28390 /* Keep backward compatability for assemblers
28391 which don't support armv7ve. */
28392 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28393 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28394 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28395 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28396 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28398 else
28400 const char* pos = strchr (arm_selected_arch->name, '+');
28401 if (pos)
28403 char buf[15];
28404 gcc_assert (strlen (arm_selected_arch->name)
28405 <= sizeof (buf) / sizeof (*pos));
28406 strncpy (buf, arm_selected_arch->name,
28407 (pos - arm_selected_arch->name) * sizeof (*pos));
28408 buf[pos - arm_selected_arch->name] = '\0';
28409 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28410 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28412 else
28413 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28416 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28417 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28418 else
28420 const char* truncated_name
28421 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28422 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28425 if (TARGET_SOFT_FLOAT)
28427 fpu_name = "softvfp";
28429 else
28431 fpu_name = arm_fpu_desc->name;
28432 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28434 if (TARGET_HARD_FLOAT)
28435 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28436 if (TARGET_HARD_FLOAT_ABI)
28437 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28440 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28442 /* Some of these attributes only apply when the corresponding features
28443 are used. However we don't have any easy way of figuring this out.
28444 Conservatively record the setting that would have been used. */
28446 if (flag_rounding_math)
28447 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28449 if (!flag_unsafe_math_optimizations)
28451 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28452 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28454 if (flag_signaling_nans)
28455 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28457 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28458 flag_finite_math_only ? 1 : 3);
28460 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28461 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28462 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28463 flag_short_enums ? 1 : 2);
28465 /* Tag_ABI_optimization_goals. */
28466 if (optimize_size)
28467 val = 4;
28468 else if (optimize >= 2)
28469 val = 2;
28470 else if (optimize)
28471 val = 1;
28472 else
28473 val = 6;
28474 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28476 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28477 unaligned_access);
28479 if (arm_fp16_format)
28480 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28481 (int) arm_fp16_format);
28483 if (arm_lang_output_object_attributes_hook)
28484 arm_lang_output_object_attributes_hook();
28487 default_file_start ();
28490 static void
28491 arm_file_end (void)
28493 int regno;
28495 if (NEED_INDICATE_EXEC_STACK)
28496 /* Add .note.GNU-stack. */
28497 file_end_indicate_exec_stack ();
28499 if (! thumb_call_reg_needed)
28500 return;
28502 switch_to_section (text_section);
28503 asm_fprintf (asm_out_file, "\t.code 16\n");
28504 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28506 for (regno = 0; regno < LR_REGNUM; regno++)
28508 rtx label = thumb_call_via_label[regno];
28510 if (label != 0)
28512 targetm.asm_out.internal_label (asm_out_file, "L",
28513 CODE_LABEL_NUMBER (label));
28514 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28519 #ifndef ARM_PE
28520 /* Symbols in the text segment can be accessed without indirecting via the
28521 constant pool; it may take an extra binary operation, but this is still
28522 faster than indirecting via memory. Don't do this when not optimizing,
28523 since we won't be calculating al of the offsets necessary to do this
28524 simplification. */
28526 static void
28527 arm_encode_section_info (tree decl, rtx rtl, int first)
28529 if (optimize > 0 && TREE_CONSTANT (decl))
28530 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28532 default_encode_section_info (decl, rtl, first);
28534 #endif /* !ARM_PE */
28536 static void
28537 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28539 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28540 && !strcmp (prefix, "L"))
28542 arm_ccfsm_state = 0;
28543 arm_target_insn = NULL;
28545 default_internal_label (stream, prefix, labelno);
28548 /* Output code to add DELTA to the first argument, and then jump
28549 to FUNCTION. Used for C++ multiple inheritance. */
28550 static void
28551 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28552 HOST_WIDE_INT delta,
28553 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28554 tree function)
28556 static int thunk_label = 0;
28557 char label[256];
28558 char labelpc[256];
28559 int mi_delta = delta;
28560 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28561 int shift = 0;
28562 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28563 ? 1 : 0);
28564 if (mi_delta < 0)
28565 mi_delta = - mi_delta;
28567 final_start_function (emit_barrier (), file, 1);
28569 if (TARGET_THUMB1)
28571 int labelno = thunk_label++;
28572 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28573 /* Thunks are entered in arm mode when avaiable. */
28574 if (TARGET_THUMB1_ONLY)
28576 /* push r3 so we can use it as a temporary. */
28577 /* TODO: Omit this save if r3 is not used. */
28578 fputs ("\tpush {r3}\n", file);
28579 fputs ("\tldr\tr3, ", file);
28581 else
28583 fputs ("\tldr\tr12, ", file);
28585 assemble_name (file, label);
28586 fputc ('\n', file);
28587 if (flag_pic)
28589 /* If we are generating PIC, the ldr instruction below loads
28590 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28591 the address of the add + 8, so we have:
28593 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28594 = target + 1.
28596 Note that we have "+ 1" because some versions of GNU ld
28597 don't set the low bit of the result for R_ARM_REL32
28598 relocations against thumb function symbols.
28599 On ARMv6M this is +4, not +8. */
28600 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28601 assemble_name (file, labelpc);
28602 fputs (":\n", file);
28603 if (TARGET_THUMB1_ONLY)
28605 /* This is 2 insns after the start of the thunk, so we know it
28606 is 4-byte aligned. */
28607 fputs ("\tadd\tr3, pc, r3\n", file);
28608 fputs ("\tmov r12, r3\n", file);
28610 else
28611 fputs ("\tadd\tr12, pc, r12\n", file);
28613 else if (TARGET_THUMB1_ONLY)
28614 fputs ("\tmov r12, r3\n", file);
28616 if (TARGET_THUMB1_ONLY)
28618 if (mi_delta > 255)
28620 fputs ("\tldr\tr3, ", file);
28621 assemble_name (file, label);
28622 fputs ("+4\n", file);
28623 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28624 mi_op, this_regno, this_regno);
28626 else if (mi_delta != 0)
28628 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28629 mi_op, this_regno, this_regno,
28630 mi_delta);
28633 else
28635 /* TODO: Use movw/movt for large constants when available. */
28636 while (mi_delta != 0)
28638 if ((mi_delta & (3 << shift)) == 0)
28639 shift += 2;
28640 else
28642 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28643 mi_op, this_regno, this_regno,
28644 mi_delta & (0xff << shift));
28645 mi_delta &= ~(0xff << shift);
28646 shift += 8;
28650 if (TARGET_THUMB1)
28652 if (TARGET_THUMB1_ONLY)
28653 fputs ("\tpop\t{r3}\n", file);
28655 fprintf (file, "\tbx\tr12\n");
28656 ASM_OUTPUT_ALIGN (file, 2);
28657 assemble_name (file, label);
28658 fputs (":\n", file);
28659 if (flag_pic)
28661 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28662 rtx tem = XEXP (DECL_RTL (function), 0);
28663 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28664 pipeline offset is four rather than eight. Adjust the offset
28665 accordingly. */
28666 tem = plus_constant (GET_MODE (tem), tem,
28667 TARGET_THUMB1_ONLY ? -3 : -7);
28668 tem = gen_rtx_MINUS (GET_MODE (tem),
28669 tem,
28670 gen_rtx_SYMBOL_REF (Pmode,
28671 ggc_strdup (labelpc)));
28672 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28674 else
28675 /* Output ".word .LTHUNKn". */
28676 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28678 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28679 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28681 else
28683 fputs ("\tb\t", file);
28684 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28685 if (NEED_PLT_RELOC)
28686 fputs ("(PLT)", file);
28687 fputc ('\n', file);
28690 final_end_function ();
28694 arm_emit_vector_const (FILE *file, rtx x)
28696 int i;
28697 const char * pattern;
28699 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28701 switch (GET_MODE (x))
28703 case V2SImode: pattern = "%08x"; break;
28704 case V4HImode: pattern = "%04x"; break;
28705 case V8QImode: pattern = "%02x"; break;
28706 default: gcc_unreachable ();
28709 fprintf (file, "0x");
28710 for (i = CONST_VECTOR_NUNITS (x); i--;)
28712 rtx element;
28714 element = CONST_VECTOR_ELT (x, i);
28715 fprintf (file, pattern, INTVAL (element));
28718 return 1;
28721 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28722 HFmode constant pool entries are actually loaded with ldr. */
28723 void
28724 arm_emit_fp16_const (rtx c)
28726 REAL_VALUE_TYPE r;
28727 long bits;
28729 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28730 bits = real_to_target (NULL, &r, HFmode);
28731 if (WORDS_BIG_ENDIAN)
28732 assemble_zeros (2);
28733 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28734 if (!WORDS_BIG_ENDIAN)
28735 assemble_zeros (2);
28738 const char *
28739 arm_output_load_gr (rtx *operands)
28741 rtx reg;
28742 rtx offset;
28743 rtx wcgr;
28744 rtx sum;
28746 if (!MEM_P (operands [1])
28747 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28748 || !REG_P (reg = XEXP (sum, 0))
28749 || !CONST_INT_P (offset = XEXP (sum, 1))
28750 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28751 return "wldrw%?\t%0, %1";
28753 /* Fix up an out-of-range load of a GR register. */
28754 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28755 wcgr = operands[0];
28756 operands[0] = reg;
28757 output_asm_insn ("ldr%?\t%0, %1", operands);
28759 operands[0] = wcgr;
28760 operands[1] = reg;
28761 output_asm_insn ("tmcr%?\t%0, %1", operands);
28762 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28764 return "";
28767 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28769 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28770 named arg and all anonymous args onto the stack.
28771 XXX I know the prologue shouldn't be pushing registers, but it is faster
28772 that way. */
28774 static void
28775 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28776 machine_mode mode,
28777 tree type,
28778 int *pretend_size,
28779 int second_time ATTRIBUTE_UNUSED)
28781 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28782 int nregs;
28784 cfun->machine->uses_anonymous_args = 1;
28785 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28787 nregs = pcum->aapcs_ncrn;
28788 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28789 nregs++;
28791 else
28792 nregs = pcum->nregs;
28794 if (nregs < NUM_ARG_REGS)
28795 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28798 /* We can't rely on the caller doing the proper promotion when
28799 using APCS or ATPCS. */
28801 static bool
28802 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28804 return !TARGET_AAPCS_BASED;
28807 static machine_mode
28808 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28809 machine_mode mode,
28810 int *punsignedp ATTRIBUTE_UNUSED,
28811 const_tree fntype ATTRIBUTE_UNUSED,
28812 int for_return ATTRIBUTE_UNUSED)
28814 if (GET_MODE_CLASS (mode) == MODE_INT
28815 && GET_MODE_SIZE (mode) < 4)
28816 return SImode;
28818 return mode;
28821 /* AAPCS based ABIs use short enums by default. */
28823 static bool
28824 arm_default_short_enums (void)
28826 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28830 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28832 static bool
28833 arm_align_anon_bitfield (void)
28835 return TARGET_AAPCS_BASED;
28839 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28841 static tree
28842 arm_cxx_guard_type (void)
28844 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28848 /* The EABI says test the least significant bit of a guard variable. */
28850 static bool
28851 arm_cxx_guard_mask_bit (void)
28853 return TARGET_AAPCS_BASED;
28857 /* The EABI specifies that all array cookies are 8 bytes long. */
28859 static tree
28860 arm_get_cookie_size (tree type)
28862 tree size;
28864 if (!TARGET_AAPCS_BASED)
28865 return default_cxx_get_cookie_size (type);
28867 size = build_int_cst (sizetype, 8);
28868 return size;
28872 /* The EABI says that array cookies should also contain the element size. */
28874 static bool
28875 arm_cookie_has_size (void)
28877 return TARGET_AAPCS_BASED;
28881 /* The EABI says constructors and destructors should return a pointer to
28882 the object constructed/destroyed. */
28884 static bool
28885 arm_cxx_cdtor_returns_this (void)
28887 return TARGET_AAPCS_BASED;
28890 /* The EABI says that an inline function may never be the key
28891 method. */
28893 static bool
28894 arm_cxx_key_method_may_be_inline (void)
28896 return !TARGET_AAPCS_BASED;
28899 static void
28900 arm_cxx_determine_class_data_visibility (tree decl)
28902 if (!TARGET_AAPCS_BASED
28903 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28904 return;
28906 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28907 is exported. However, on systems without dynamic vague linkage,
28908 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28909 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28910 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28911 else
28912 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28913 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28916 static bool
28917 arm_cxx_class_data_always_comdat (void)
28919 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28920 vague linkage if the class has no key function. */
28921 return !TARGET_AAPCS_BASED;
28925 /* The EABI says __aeabi_atexit should be used to register static
28926 destructors. */
28928 static bool
28929 arm_cxx_use_aeabi_atexit (void)
28931 return TARGET_AAPCS_BASED;
28935 void
28936 arm_set_return_address (rtx source, rtx scratch)
28938 arm_stack_offsets *offsets;
28939 HOST_WIDE_INT delta;
28940 rtx addr;
28941 unsigned long saved_regs;
28943 offsets = arm_get_frame_offsets ();
28944 saved_regs = offsets->saved_regs_mask;
28946 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28947 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28948 else
28950 if (frame_pointer_needed)
28951 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28952 else
28954 /* LR will be the first saved register. */
28955 delta = offsets->outgoing_args - (offsets->frame + 4);
28958 if (delta >= 4096)
28960 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28961 GEN_INT (delta & ~4095)));
28962 addr = scratch;
28963 delta &= 4095;
28965 else
28966 addr = stack_pointer_rtx;
28968 addr = plus_constant (Pmode, addr, delta);
28970 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28975 void
28976 thumb_set_return_address (rtx source, rtx scratch)
28978 arm_stack_offsets *offsets;
28979 HOST_WIDE_INT delta;
28980 HOST_WIDE_INT limit;
28981 int reg;
28982 rtx addr;
28983 unsigned long mask;
28985 emit_use (source);
28987 offsets = arm_get_frame_offsets ();
28988 mask = offsets->saved_regs_mask;
28989 if (mask & (1 << LR_REGNUM))
28991 limit = 1024;
28992 /* Find the saved regs. */
28993 if (frame_pointer_needed)
28995 delta = offsets->soft_frame - offsets->saved_args;
28996 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28997 if (TARGET_THUMB1)
28998 limit = 128;
29000 else
29002 delta = offsets->outgoing_args - offsets->saved_args;
29003 reg = SP_REGNUM;
29005 /* Allow for the stack frame. */
29006 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29007 delta -= 16;
29008 /* The link register is always the first saved register. */
29009 delta -= 4;
29011 /* Construct the address. */
29012 addr = gen_rtx_REG (SImode, reg);
29013 if (delta > limit)
29015 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29016 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29017 addr = scratch;
29019 else
29020 addr = plus_constant (Pmode, addr, delta);
29022 emit_move_insn (gen_frame_mem (Pmode, addr), source);
29024 else
29025 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29028 /* Implements target hook vector_mode_supported_p. */
29029 bool
29030 arm_vector_mode_supported_p (machine_mode mode)
29032 /* Neon also supports V2SImode, etc. listed in the clause below. */
29033 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29034 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29035 return true;
29037 if ((TARGET_NEON || TARGET_IWMMXT)
29038 && ((mode == V2SImode)
29039 || (mode == V4HImode)
29040 || (mode == V8QImode)))
29041 return true;
29043 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29044 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29045 || mode == V2HAmode))
29046 return true;
29048 return false;
29051 /* Implements target hook array_mode_supported_p. */
29053 static bool
29054 arm_array_mode_supported_p (machine_mode mode,
29055 unsigned HOST_WIDE_INT nelems)
29057 if (TARGET_NEON
29058 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29059 && (nelems >= 2 && nelems <= 4))
29060 return true;
29062 return false;
29065 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29066 registers when autovectorizing for Neon, at least until multiple vector
29067 widths are supported properly by the middle-end. */
29069 static machine_mode
29070 arm_preferred_simd_mode (machine_mode mode)
29072 if (TARGET_NEON)
29073 switch (mode)
29075 case SFmode:
29076 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29077 case SImode:
29078 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29079 case HImode:
29080 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29081 case QImode:
29082 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29083 case DImode:
29084 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29085 return V2DImode;
29086 break;
29088 default:;
29091 if (TARGET_REALLY_IWMMXT)
29092 switch (mode)
29094 case SImode:
29095 return V2SImode;
29096 case HImode:
29097 return V4HImode;
29098 case QImode:
29099 return V8QImode;
29101 default:;
29104 return word_mode;
29107 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29109 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29110 using r0-r4 for function arguments, r7 for the stack frame and don't have
29111 enough left over to do doubleword arithmetic. For Thumb-2 all the
29112 potentially problematic instructions accept high registers so this is not
29113 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29114 that require many low registers. */
29115 static bool
29116 arm_class_likely_spilled_p (reg_class_t rclass)
29118 if ((TARGET_THUMB1 && rclass == LO_REGS)
29119 || rclass == CC_REG)
29120 return true;
29122 return false;
29125 /* Implements target hook small_register_classes_for_mode_p. */
29126 bool
29127 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29129 return TARGET_THUMB1;
29132 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29133 ARM insns and therefore guarantee that the shift count is modulo 256.
29134 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29135 guarantee no particular behavior for out-of-range counts. */
29137 static unsigned HOST_WIDE_INT
29138 arm_shift_truncation_mask (machine_mode mode)
29140 return mode == SImode ? 255 : 0;
29144 /* Map internal gcc register numbers to DWARF2 register numbers. */
29146 unsigned int
29147 arm_dbx_register_number (unsigned int regno)
29149 if (regno < 16)
29150 return regno;
29152 if (IS_VFP_REGNUM (regno))
29154 /* See comment in arm_dwarf_register_span. */
29155 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29156 return 64 + regno - FIRST_VFP_REGNUM;
29157 else
29158 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29161 if (IS_IWMMXT_GR_REGNUM (regno))
29162 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29164 if (IS_IWMMXT_REGNUM (regno))
29165 return 112 + regno - FIRST_IWMMXT_REGNUM;
29167 gcc_unreachable ();
29170 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29171 GCC models tham as 64 32-bit registers, so we need to describe this to
29172 the DWARF generation code. Other registers can use the default. */
29173 static rtx
29174 arm_dwarf_register_span (rtx rtl)
29176 machine_mode mode;
29177 unsigned regno;
29178 rtx parts[16];
29179 int nregs;
29180 int i;
29182 regno = REGNO (rtl);
29183 if (!IS_VFP_REGNUM (regno))
29184 return NULL_RTX;
29186 /* XXX FIXME: The EABI defines two VFP register ranges:
29187 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29188 256-287: D0-D31
29189 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29190 corresponding D register. Until GDB supports this, we shall use the
29191 legacy encodings. We also use these encodings for D0-D15 for
29192 compatibility with older debuggers. */
29193 mode = GET_MODE (rtl);
29194 if (GET_MODE_SIZE (mode) < 8)
29195 return NULL_RTX;
29197 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29199 nregs = GET_MODE_SIZE (mode) / 4;
29200 for (i = 0; i < nregs; i += 2)
29201 if (TARGET_BIG_END)
29203 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29204 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29206 else
29208 parts[i] = gen_rtx_REG (SImode, regno + i);
29209 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29212 else
29214 nregs = GET_MODE_SIZE (mode) / 8;
29215 for (i = 0; i < nregs; i++)
29216 parts[i] = gen_rtx_REG (DImode, regno + i);
29219 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29222 #if ARM_UNWIND_INFO
29223 /* Emit unwind directives for a store-multiple instruction or stack pointer
29224 push during alignment.
29225 These should only ever be generated by the function prologue code, so
29226 expect them to have a particular form.
29227 The store-multiple instruction sometimes pushes pc as the last register,
29228 although it should not be tracked into unwind information, or for -Os
29229 sometimes pushes some dummy registers before first register that needs
29230 to be tracked in unwind information; such dummy registers are there just
29231 to avoid separate stack adjustment, and will not be restored in the
29232 epilogue. */
29234 static void
29235 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29237 int i;
29238 HOST_WIDE_INT offset;
29239 HOST_WIDE_INT nregs;
29240 int reg_size;
29241 unsigned reg;
29242 unsigned lastreg;
29243 unsigned padfirst = 0, padlast = 0;
29244 rtx e;
29246 e = XVECEXP (p, 0, 0);
29247 gcc_assert (GET_CODE (e) == SET);
29249 /* First insn will adjust the stack pointer. */
29250 gcc_assert (GET_CODE (e) == SET
29251 && REG_P (SET_DEST (e))
29252 && REGNO (SET_DEST (e)) == SP_REGNUM
29253 && GET_CODE (SET_SRC (e)) == PLUS);
29255 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29256 nregs = XVECLEN (p, 0) - 1;
29257 gcc_assert (nregs);
29259 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29260 if (reg < 16)
29262 /* For -Os dummy registers can be pushed at the beginning to
29263 avoid separate stack pointer adjustment. */
29264 e = XVECEXP (p, 0, 1);
29265 e = XEXP (SET_DEST (e), 0);
29266 if (GET_CODE (e) == PLUS)
29267 padfirst = INTVAL (XEXP (e, 1));
29268 gcc_assert (padfirst == 0 || optimize_size);
29269 /* The function prologue may also push pc, but not annotate it as it is
29270 never restored. We turn this into a stack pointer adjustment. */
29271 e = XVECEXP (p, 0, nregs);
29272 e = XEXP (SET_DEST (e), 0);
29273 if (GET_CODE (e) == PLUS)
29274 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29275 else
29276 padlast = offset - 4;
29277 gcc_assert (padlast == 0 || padlast == 4);
29278 if (padlast == 4)
29279 fprintf (asm_out_file, "\t.pad #4\n");
29280 reg_size = 4;
29281 fprintf (asm_out_file, "\t.save {");
29283 else if (IS_VFP_REGNUM (reg))
29285 reg_size = 8;
29286 fprintf (asm_out_file, "\t.vsave {");
29288 else
29289 /* Unknown register type. */
29290 gcc_unreachable ();
29292 /* If the stack increment doesn't match the size of the saved registers,
29293 something has gone horribly wrong. */
29294 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29296 offset = padfirst;
29297 lastreg = 0;
29298 /* The remaining insns will describe the stores. */
29299 for (i = 1; i <= nregs; i++)
29301 /* Expect (set (mem <addr>) (reg)).
29302 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29303 e = XVECEXP (p, 0, i);
29304 gcc_assert (GET_CODE (e) == SET
29305 && MEM_P (SET_DEST (e))
29306 && REG_P (SET_SRC (e)));
29308 reg = REGNO (SET_SRC (e));
29309 gcc_assert (reg >= lastreg);
29311 if (i != 1)
29312 fprintf (asm_out_file, ", ");
29313 /* We can't use %r for vfp because we need to use the
29314 double precision register names. */
29315 if (IS_VFP_REGNUM (reg))
29316 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29317 else
29318 asm_fprintf (asm_out_file, "%r", reg);
29320 #ifdef ENABLE_CHECKING
29321 /* Check that the addresses are consecutive. */
29322 e = XEXP (SET_DEST (e), 0);
29323 if (GET_CODE (e) == PLUS)
29324 gcc_assert (REG_P (XEXP (e, 0))
29325 && REGNO (XEXP (e, 0)) == SP_REGNUM
29326 && CONST_INT_P (XEXP (e, 1))
29327 && offset == INTVAL (XEXP (e, 1)));
29328 else
29329 gcc_assert (i == 1
29330 && REG_P (e)
29331 && REGNO (e) == SP_REGNUM);
29332 offset += reg_size;
29333 #endif
29335 fprintf (asm_out_file, "}\n");
29336 if (padfirst)
29337 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29340 /* Emit unwind directives for a SET. */
29342 static void
29343 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29345 rtx e0;
29346 rtx e1;
29347 unsigned reg;
29349 e0 = XEXP (p, 0);
29350 e1 = XEXP (p, 1);
29351 switch (GET_CODE (e0))
29353 case MEM:
29354 /* Pushing a single register. */
29355 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29356 || !REG_P (XEXP (XEXP (e0, 0), 0))
29357 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29358 abort ();
29360 asm_fprintf (asm_out_file, "\t.save ");
29361 if (IS_VFP_REGNUM (REGNO (e1)))
29362 asm_fprintf(asm_out_file, "{d%d}\n",
29363 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29364 else
29365 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29366 break;
29368 case REG:
29369 if (REGNO (e0) == SP_REGNUM)
29371 /* A stack increment. */
29372 if (GET_CODE (e1) != PLUS
29373 || !REG_P (XEXP (e1, 0))
29374 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29375 || !CONST_INT_P (XEXP (e1, 1)))
29376 abort ();
29378 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29379 -INTVAL (XEXP (e1, 1)));
29381 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29383 HOST_WIDE_INT offset;
29385 if (GET_CODE (e1) == PLUS)
29387 if (!REG_P (XEXP (e1, 0))
29388 || !CONST_INT_P (XEXP (e1, 1)))
29389 abort ();
29390 reg = REGNO (XEXP (e1, 0));
29391 offset = INTVAL (XEXP (e1, 1));
29392 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29393 HARD_FRAME_POINTER_REGNUM, reg,
29394 offset);
29396 else if (REG_P (e1))
29398 reg = REGNO (e1);
29399 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29400 HARD_FRAME_POINTER_REGNUM, reg);
29402 else
29403 abort ();
29405 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29407 /* Move from sp to reg. */
29408 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29410 else if (GET_CODE (e1) == PLUS
29411 && REG_P (XEXP (e1, 0))
29412 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29413 && CONST_INT_P (XEXP (e1, 1)))
29415 /* Set reg to offset from sp. */
29416 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29417 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29419 else
29420 abort ();
29421 break;
29423 default:
29424 abort ();
29429 /* Emit unwind directives for the given insn. */
29431 static void
29432 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29434 rtx note, pat;
29435 bool handled_one = false;
29437 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29438 return;
29440 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29441 && (TREE_NOTHROW (current_function_decl)
29442 || crtl->all_throwers_are_sibcalls))
29443 return;
29445 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29446 return;
29448 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29450 switch (REG_NOTE_KIND (note))
29452 case REG_FRAME_RELATED_EXPR:
29453 pat = XEXP (note, 0);
29454 goto found;
29456 case REG_CFA_REGISTER:
29457 pat = XEXP (note, 0);
29458 if (pat == NULL)
29460 pat = PATTERN (insn);
29461 if (GET_CODE (pat) == PARALLEL)
29462 pat = XVECEXP (pat, 0, 0);
29465 /* Only emitted for IS_STACKALIGN re-alignment. */
29467 rtx dest, src;
29468 unsigned reg;
29470 src = SET_SRC (pat);
29471 dest = SET_DEST (pat);
29473 gcc_assert (src == stack_pointer_rtx);
29474 reg = REGNO (dest);
29475 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29476 reg + 0x90, reg);
29478 handled_one = true;
29479 break;
29481 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29482 to get correct dwarf information for shrink-wrap. We should not
29483 emit unwind information for it because these are used either for
29484 pretend arguments or notes to adjust sp and restore registers from
29485 stack. */
29486 case REG_CFA_DEF_CFA:
29487 case REG_CFA_ADJUST_CFA:
29488 case REG_CFA_RESTORE:
29489 return;
29491 case REG_CFA_EXPRESSION:
29492 case REG_CFA_OFFSET:
29493 /* ??? Only handling here what we actually emit. */
29494 gcc_unreachable ();
29496 default:
29497 break;
29500 if (handled_one)
29501 return;
29502 pat = PATTERN (insn);
29503 found:
29505 switch (GET_CODE (pat))
29507 case SET:
29508 arm_unwind_emit_set (asm_out_file, pat);
29509 break;
29511 case SEQUENCE:
29512 /* Store multiple. */
29513 arm_unwind_emit_sequence (asm_out_file, pat);
29514 break;
29516 default:
29517 abort();
29522 /* Output a reference from a function exception table to the type_info
29523 object X. The EABI specifies that the symbol should be relocated by
29524 an R_ARM_TARGET2 relocation. */
29526 static bool
29527 arm_output_ttype (rtx x)
29529 fputs ("\t.word\t", asm_out_file);
29530 output_addr_const (asm_out_file, x);
29531 /* Use special relocations for symbol references. */
29532 if (!CONST_INT_P (x))
29533 fputs ("(TARGET2)", asm_out_file);
29534 fputc ('\n', asm_out_file);
29536 return TRUE;
29539 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29541 static void
29542 arm_asm_emit_except_personality (rtx personality)
29544 fputs ("\t.personality\t", asm_out_file);
29545 output_addr_const (asm_out_file, personality);
29546 fputc ('\n', asm_out_file);
29549 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29551 static void
29552 arm_asm_init_sections (void)
29554 exception_section = get_unnamed_section (0, output_section_asm_op,
29555 "\t.handlerdata");
29557 #endif /* ARM_UNWIND_INFO */
29559 /* Output unwind directives for the start/end of a function. */
29561 void
29562 arm_output_fn_unwind (FILE * f, bool prologue)
29564 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29565 return;
29567 if (prologue)
29568 fputs ("\t.fnstart\n", f);
29569 else
29571 /* If this function will never be unwound, then mark it as such.
29572 The came condition is used in arm_unwind_emit to suppress
29573 the frame annotations. */
29574 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29575 && (TREE_NOTHROW (current_function_decl)
29576 || crtl->all_throwers_are_sibcalls))
29577 fputs("\t.cantunwind\n", f);
29579 fputs ("\t.fnend\n", f);
29583 static bool
29584 arm_emit_tls_decoration (FILE *fp, rtx x)
29586 enum tls_reloc reloc;
29587 rtx val;
29589 val = XVECEXP (x, 0, 0);
29590 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29592 output_addr_const (fp, val);
29594 switch (reloc)
29596 case TLS_GD32:
29597 fputs ("(tlsgd)", fp);
29598 break;
29599 case TLS_LDM32:
29600 fputs ("(tlsldm)", fp);
29601 break;
29602 case TLS_LDO32:
29603 fputs ("(tlsldo)", fp);
29604 break;
29605 case TLS_IE32:
29606 fputs ("(gottpoff)", fp);
29607 break;
29608 case TLS_LE32:
29609 fputs ("(tpoff)", fp);
29610 break;
29611 case TLS_DESCSEQ:
29612 fputs ("(tlsdesc)", fp);
29613 break;
29614 default:
29615 gcc_unreachable ();
29618 switch (reloc)
29620 case TLS_GD32:
29621 case TLS_LDM32:
29622 case TLS_IE32:
29623 case TLS_DESCSEQ:
29624 fputs (" + (. - ", fp);
29625 output_addr_const (fp, XVECEXP (x, 0, 2));
29626 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29627 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29628 output_addr_const (fp, XVECEXP (x, 0, 3));
29629 fputc (')', fp);
29630 break;
29631 default:
29632 break;
29635 return TRUE;
29638 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29640 static void
29641 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29643 gcc_assert (size == 4);
29644 fputs ("\t.word\t", file);
29645 output_addr_const (file, x);
29646 fputs ("(tlsldo)", file);
29649 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29651 static bool
29652 arm_output_addr_const_extra (FILE *fp, rtx x)
29654 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29655 return arm_emit_tls_decoration (fp, x);
29656 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29658 char label[256];
29659 int labelno = INTVAL (XVECEXP (x, 0, 0));
29661 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29662 assemble_name_raw (fp, label);
29664 return TRUE;
29666 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29668 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29669 if (GOT_PCREL)
29670 fputs ("+.", fp);
29671 fputs ("-(", fp);
29672 output_addr_const (fp, XVECEXP (x, 0, 0));
29673 fputc (')', fp);
29674 return TRUE;
29676 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29678 output_addr_const (fp, XVECEXP (x, 0, 0));
29679 if (GOT_PCREL)
29680 fputs ("+.", fp);
29681 fputs ("-(", fp);
29682 output_addr_const (fp, XVECEXP (x, 0, 1));
29683 fputc (')', fp);
29684 return TRUE;
29686 else if (GET_CODE (x) == CONST_VECTOR)
29687 return arm_emit_vector_const (fp, x);
29689 return FALSE;
29692 /* Output assembly for a shift instruction.
29693 SET_FLAGS determines how the instruction modifies the condition codes.
29694 0 - Do not set condition codes.
29695 1 - Set condition codes.
29696 2 - Use smallest instruction. */
29697 const char *
29698 arm_output_shift(rtx * operands, int set_flags)
29700 char pattern[100];
29701 static const char flag_chars[3] = {'?', '.', '!'};
29702 const char *shift;
29703 HOST_WIDE_INT val;
29704 char c;
29706 c = flag_chars[set_flags];
29707 if (TARGET_UNIFIED_ASM)
29709 shift = shift_op(operands[3], &val);
29710 if (shift)
29712 if (val != -1)
29713 operands[2] = GEN_INT(val);
29714 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29716 else
29717 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29719 else
29720 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29721 output_asm_insn (pattern, operands);
29722 return "";
29725 /* Output assembly for a WMMX immediate shift instruction. */
29726 const char *
29727 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29729 int shift = INTVAL (operands[2]);
29730 char templ[50];
29731 machine_mode opmode = GET_MODE (operands[0]);
29733 gcc_assert (shift >= 0);
29735 /* If the shift value in the register versions is > 63 (for D qualifier),
29736 31 (for W qualifier) or 15 (for H qualifier). */
29737 if (((opmode == V4HImode) && (shift > 15))
29738 || ((opmode == V2SImode) && (shift > 31))
29739 || ((opmode == DImode) && (shift > 63)))
29741 if (wror_or_wsra)
29743 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29744 output_asm_insn (templ, operands);
29745 if (opmode == DImode)
29747 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29748 output_asm_insn (templ, operands);
29751 else
29753 /* The destination register will contain all zeros. */
29754 sprintf (templ, "wzero\t%%0");
29755 output_asm_insn (templ, operands);
29757 return "";
29760 if ((opmode == DImode) && (shift > 32))
29762 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29763 output_asm_insn (templ, operands);
29764 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29765 output_asm_insn (templ, operands);
29767 else
29769 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29770 output_asm_insn (templ, operands);
29772 return "";
29775 /* Output assembly for a WMMX tinsr instruction. */
29776 const char *
29777 arm_output_iwmmxt_tinsr (rtx *operands)
29779 int mask = INTVAL (operands[3]);
29780 int i;
29781 char templ[50];
29782 int units = mode_nunits[GET_MODE (operands[0])];
29783 gcc_assert ((mask & (mask - 1)) == 0);
29784 for (i = 0; i < units; ++i)
29786 if ((mask & 0x01) == 1)
29788 break;
29790 mask >>= 1;
29792 gcc_assert (i < units);
29794 switch (GET_MODE (operands[0]))
29796 case V8QImode:
29797 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29798 break;
29799 case V4HImode:
29800 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29801 break;
29802 case V2SImode:
29803 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29804 break;
29805 default:
29806 gcc_unreachable ();
29807 break;
29809 output_asm_insn (templ, operands);
29811 return "";
29814 /* Output a Thumb-1 casesi dispatch sequence. */
29815 const char *
29816 thumb1_output_casesi (rtx *operands)
29818 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29820 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29822 switch (GET_MODE(diff_vec))
29824 case QImode:
29825 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29826 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29827 case HImode:
29828 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29829 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29830 case SImode:
29831 return "bl\t%___gnu_thumb1_case_si";
29832 default:
29833 gcc_unreachable ();
29837 /* Output a Thumb-2 casesi instruction. */
29838 const char *
29839 thumb2_output_casesi (rtx *operands)
29841 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29843 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29845 output_asm_insn ("cmp\t%0, %1", operands);
29846 output_asm_insn ("bhi\t%l3", operands);
29847 switch (GET_MODE(diff_vec))
29849 case QImode:
29850 return "tbb\t[%|pc, %0]";
29851 case HImode:
29852 return "tbh\t[%|pc, %0, lsl #1]";
29853 case SImode:
29854 if (flag_pic)
29856 output_asm_insn ("adr\t%4, %l2", operands);
29857 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29858 output_asm_insn ("add\t%4, %4, %5", operands);
29859 return "bx\t%4";
29861 else
29863 output_asm_insn ("adr\t%4, %l2", operands);
29864 return "ldr\t%|pc, [%4, %0, lsl #2]";
29866 default:
29867 gcc_unreachable ();
29871 /* Most ARM cores are single issue, but some newer ones can dual issue.
29872 The scheduler descriptions rely on this being correct. */
29873 static int
29874 arm_issue_rate (void)
29876 switch (arm_tune)
29878 case cortexa15:
29879 case cortexa57:
29880 return 3;
29882 case cortexr4:
29883 case cortexr4f:
29884 case cortexr5:
29885 case genericv7a:
29886 case cortexa5:
29887 case cortexa7:
29888 case cortexa8:
29889 case cortexa9:
29890 case cortexa12:
29891 case cortexa53:
29892 case fa726te:
29893 case marvell_pj4:
29894 return 2;
29896 default:
29897 return 1;
29901 /* A table and a function to perform ARM-specific name mangling for
29902 NEON vector types in order to conform to the AAPCS (see "Procedure
29903 Call Standard for the ARM Architecture", Appendix A). To qualify
29904 for emission with the mangled names defined in that document, a
29905 vector type must not only be of the correct mode but also be
29906 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29907 typedef struct
29909 machine_mode mode;
29910 const char *element_type_name;
29911 const char *aapcs_name;
29912 } arm_mangle_map_entry;
29914 static arm_mangle_map_entry arm_mangle_map[] = {
29915 /* 64-bit containerized types. */
29916 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29917 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29918 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29919 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29920 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29921 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29922 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29923 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29924 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29925 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29927 /* 128-bit containerized types. */
29928 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29929 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29930 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29931 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29932 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29933 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29934 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29935 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29936 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29937 { VOIDmode, NULL, NULL }
29940 const char *
29941 arm_mangle_type (const_tree type)
29943 arm_mangle_map_entry *pos = arm_mangle_map;
29945 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29946 has to be managled as if it is in the "std" namespace. */
29947 if (TARGET_AAPCS_BASED
29948 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29949 return "St9__va_list";
29951 /* Half-precision float. */
29952 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29953 return "Dh";
29955 if (TREE_CODE (type) != VECTOR_TYPE)
29956 return NULL;
29958 /* Check the mode of the vector type, and the name of the vector
29959 element type, against the table. */
29960 while (pos->mode != VOIDmode)
29962 tree elt_type = TREE_TYPE (type);
29964 if (pos->mode == TYPE_MODE (type)
29965 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29966 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29967 pos->element_type_name))
29968 return pos->aapcs_name;
29970 pos++;
29973 /* Use the default mangling for unrecognized (possibly user-defined)
29974 vector types. */
29975 return NULL;
29978 /* Order of allocation of core registers for Thumb: this allocation is
29979 written over the corresponding initial entries of the array
29980 initialized with REG_ALLOC_ORDER. We allocate all low registers
29981 first. Saving and restoring a low register is usually cheaper than
29982 using a call-clobbered high register. */
29984 static const int thumb_core_reg_alloc_order[] =
29986 3, 2, 1, 0, 4, 5, 6, 7,
29987 14, 12, 8, 9, 10, 11
29990 /* Adjust register allocation order when compiling for Thumb. */
29992 void
29993 arm_order_regs_for_local_alloc (void)
29995 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29996 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29997 if (TARGET_THUMB)
29998 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29999 sizeof (thumb_core_reg_alloc_order));
30002 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30004 bool
30005 arm_frame_pointer_required (void)
30007 return (cfun->has_nonlocal_label
30008 || SUBTARGET_FRAME_POINTER_REQUIRED
30009 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30012 /* Only thumb1 can't support conditional execution, so return true if
30013 the target is not thumb1. */
30014 static bool
30015 arm_have_conditional_execution (void)
30017 return !TARGET_THUMB1;
30020 tree
30021 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30023 machine_mode in_mode, out_mode;
30024 int in_n, out_n;
30025 bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30027 if (TREE_CODE (type_out) != VECTOR_TYPE
30028 || TREE_CODE (type_in) != VECTOR_TYPE)
30029 return NULL_TREE;
30031 out_mode = TYPE_MODE (TREE_TYPE (type_out));
30032 out_n = TYPE_VECTOR_SUBPARTS (type_out);
30033 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30034 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30036 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30037 decl of the vectorized builtin for the appropriate vector mode.
30038 NULL_TREE is returned if no such builtin is available. */
30039 #undef ARM_CHECK_BUILTIN_MODE
30040 #define ARM_CHECK_BUILTIN_MODE(C) \
30041 (TARGET_NEON && TARGET_FPU_ARMV8 \
30042 && flag_unsafe_math_optimizations \
30043 && ARM_CHECK_BUILTIN_MODE_1 (C))
30045 #undef ARM_CHECK_BUILTIN_MODE_1
30046 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30047 (out_mode == SFmode && out_n == C \
30048 && in_mode == SFmode && in_n == C)
30050 #undef ARM_FIND_VRINT_VARIANT
30051 #define ARM_FIND_VRINT_VARIANT(N) \
30052 (ARM_CHECK_BUILTIN_MODE (2) \
30053 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30054 : (ARM_CHECK_BUILTIN_MODE (4) \
30055 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30056 : NULL_TREE))
30058 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30060 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30061 switch (fn)
30063 case BUILT_IN_FLOORF:
30064 return ARM_FIND_VRINT_VARIANT (vrintm);
30065 case BUILT_IN_CEILF:
30066 return ARM_FIND_VRINT_VARIANT (vrintp);
30067 case BUILT_IN_TRUNCF:
30068 return ARM_FIND_VRINT_VARIANT (vrintz);
30069 case BUILT_IN_ROUNDF:
30070 return ARM_FIND_VRINT_VARIANT (vrinta);
30071 #undef ARM_CHECK_BUILTIN_MODE_1
30072 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30073 (out_mode == SImode && out_n == C \
30074 && in_mode == SFmode && in_n == C)
30076 #define ARM_FIND_VCVT_VARIANT(N) \
30077 (ARM_CHECK_BUILTIN_MODE (2) \
30078 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30079 : (ARM_CHECK_BUILTIN_MODE (4) \
30080 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30081 : NULL_TREE))
30083 #define ARM_FIND_VCVTU_VARIANT(N) \
30084 (ARM_CHECK_BUILTIN_MODE (2) \
30085 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30086 : (ARM_CHECK_BUILTIN_MODE (4) \
30087 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30088 : NULL_TREE))
30089 case BUILT_IN_LROUNDF:
30090 return out_unsigned_p
30091 ? ARM_FIND_VCVTU_VARIANT (vcvta)
30092 : ARM_FIND_VCVT_VARIANT (vcvta);
30093 case BUILT_IN_LCEILF:
30094 return out_unsigned_p
30095 ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30096 : ARM_FIND_VCVT_VARIANT (vcvtp);
30097 case BUILT_IN_LFLOORF:
30098 return out_unsigned_p
30099 ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30100 : ARM_FIND_VCVT_VARIANT (vcvtm);
30101 #undef ARM_CHECK_BUILTIN_MODE
30102 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30103 (out_mode == N##mode && out_n == C \
30104 && in_mode == N##mode && in_n == C)
30105 case BUILT_IN_BSWAP16:
30106 if (ARM_CHECK_BUILTIN_MODE (4, HI))
30107 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30108 else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30109 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30110 else
30111 return NULL_TREE;
30112 case BUILT_IN_BSWAP32:
30113 if (ARM_CHECK_BUILTIN_MODE (2, SI))
30114 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30115 else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30116 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30117 else
30118 return NULL_TREE;
30119 case BUILT_IN_BSWAP64:
30120 if (ARM_CHECK_BUILTIN_MODE (2, DI))
30121 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30122 else
30123 return NULL_TREE;
30124 case BUILT_IN_COPYSIGNF:
30125 if (ARM_CHECK_BUILTIN_MODE (2, SF))
30126 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30127 else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30128 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30129 else
30130 return NULL_TREE;
30132 default:
30133 return NULL_TREE;
30136 return NULL_TREE;
30138 #undef ARM_FIND_VCVT_VARIANT
30139 #undef ARM_FIND_VCVTU_VARIANT
30140 #undef ARM_CHECK_BUILTIN_MODE
30141 #undef ARM_FIND_VRINT_VARIANT
30144 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30145 static HOST_WIDE_INT
30146 arm_vector_alignment (const_tree type)
30148 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30150 if (TARGET_AAPCS_BASED)
30151 align = MIN (align, 64);
30153 return align;
30156 static unsigned int
30157 arm_autovectorize_vector_sizes (void)
30159 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30162 static bool
30163 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30165 /* Vectors which aren't in packed structures will not be less aligned than
30166 the natural alignment of their element type, so this is safe. */
30167 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30168 return !is_packed;
30170 return default_builtin_vector_alignment_reachable (type, is_packed);
30173 static bool
30174 arm_builtin_support_vector_misalignment (machine_mode mode,
30175 const_tree type, int misalignment,
30176 bool is_packed)
30178 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30180 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30182 if (is_packed)
30183 return align == 1;
30185 /* If the misalignment is unknown, we should be able to handle the access
30186 so long as it is not to a member of a packed data structure. */
30187 if (misalignment == -1)
30188 return true;
30190 /* Return true if the misalignment is a multiple of the natural alignment
30191 of the vector's element type. This is probably always going to be
30192 true in practice, since we've already established that this isn't a
30193 packed access. */
30194 return ((misalignment % align) == 0);
30197 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30198 is_packed);
30201 static void
30202 arm_conditional_register_usage (void)
30204 int regno;
30206 if (TARGET_THUMB1 && optimize_size)
30208 /* When optimizing for size on Thumb-1, it's better not
30209 to use the HI regs, because of the overhead of
30210 stacking them. */
30211 for (regno = FIRST_HI_REGNUM;
30212 regno <= LAST_HI_REGNUM; ++regno)
30213 fixed_regs[regno] = call_used_regs[regno] = 1;
30216 /* The link register can be clobbered by any branch insn,
30217 but we have no way to track that at present, so mark
30218 it as unavailable. */
30219 if (TARGET_THUMB1)
30220 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30222 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30224 /* VFPv3 registers are disabled when earlier VFP
30225 versions are selected due to the definition of
30226 LAST_VFP_REGNUM. */
30227 for (regno = FIRST_VFP_REGNUM;
30228 regno <= LAST_VFP_REGNUM; ++ regno)
30230 fixed_regs[regno] = 0;
30231 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30232 || regno >= FIRST_VFP_REGNUM + 32;
30236 if (TARGET_REALLY_IWMMXT)
30238 regno = FIRST_IWMMXT_GR_REGNUM;
30239 /* The 2002/10/09 revision of the XScale ABI has wCG0
30240 and wCG1 as call-preserved registers. The 2002/11/21
30241 revision changed this so that all wCG registers are
30242 scratch registers. */
30243 for (regno = FIRST_IWMMXT_GR_REGNUM;
30244 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30245 fixed_regs[regno] = 0;
30246 /* The XScale ABI has wR0 - wR9 as scratch registers,
30247 the rest as call-preserved registers. */
30248 for (regno = FIRST_IWMMXT_REGNUM;
30249 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30251 fixed_regs[regno] = 0;
30252 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30256 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30258 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30259 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30261 else if (TARGET_APCS_STACK)
30263 fixed_regs[10] = 1;
30264 call_used_regs[10] = 1;
30266 /* -mcaller-super-interworking reserves r11 for calls to
30267 _interwork_r11_call_via_rN(). Making the register global
30268 is an easy way of ensuring that it remains valid for all
30269 calls. */
30270 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30271 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30273 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30274 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30275 if (TARGET_CALLER_INTERWORKING)
30276 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30278 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30281 static reg_class_t
30282 arm_preferred_rename_class (reg_class_t rclass)
30284 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30285 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30286 and code size can be reduced. */
30287 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30288 return LO_REGS;
30289 else
30290 return NO_REGS;
30293 /* Compute the atrribute "length" of insn "*push_multi".
30294 So this function MUST be kept in sync with that insn pattern. */
30296 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30298 int i, regno, hi_reg;
30299 int num_saves = XVECLEN (parallel_op, 0);
30301 /* ARM mode. */
30302 if (TARGET_ARM)
30303 return 4;
30304 /* Thumb1 mode. */
30305 if (TARGET_THUMB1)
30306 return 2;
30308 /* Thumb2 mode. */
30309 regno = REGNO (first_op);
30310 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30311 for (i = 1; i < num_saves && !hi_reg; i++)
30313 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30314 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30317 if (!hi_reg)
30318 return 2;
30319 return 4;
30322 /* Compute the number of instructions emitted by output_move_double. */
30324 arm_count_output_move_double_insns (rtx *operands)
30326 int count;
30327 rtx ops[2];
30328 /* output_move_double may modify the operands array, so call it
30329 here on a copy of the array. */
30330 ops[0] = operands[0];
30331 ops[1] = operands[1];
30332 output_move_double (ops, false, &count);
30333 return count;
30337 vfp3_const_double_for_fract_bits (rtx operand)
30339 REAL_VALUE_TYPE r0;
30341 if (!CONST_DOUBLE_P (operand))
30342 return 0;
30344 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30345 if (exact_real_inverse (DFmode, &r0))
30347 if (exact_real_truncate (DFmode, &r0))
30349 HOST_WIDE_INT value = real_to_integer (&r0);
30350 value = value & 0xffffffff;
30351 if ((value != 0) && ( (value & (value - 1)) == 0))
30352 return int_log2 (value);
30355 return 0;
30359 vfp3_const_double_for_bits (rtx operand)
30361 REAL_VALUE_TYPE r0;
30363 if (!CONST_DOUBLE_P (operand))
30364 return 0;
30366 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30367 if (exact_real_truncate (DFmode, &r0))
30369 HOST_WIDE_INT value = real_to_integer (&r0);
30370 value = value & 0xffffffff;
30371 if ((value != 0) && ( (value & (value - 1)) == 0))
30372 return int_log2 (value);
30375 return 0;
30378 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30380 static void
30381 arm_pre_atomic_barrier (enum memmodel model)
30383 if (need_atomic_barrier_p (model, true))
30384 emit_insn (gen_memory_barrier ());
30387 static void
30388 arm_post_atomic_barrier (enum memmodel model)
30390 if (need_atomic_barrier_p (model, false))
30391 emit_insn (gen_memory_barrier ());
30394 /* Emit the load-exclusive and store-exclusive instructions.
30395 Use acquire and release versions if necessary. */
30397 static void
30398 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30400 rtx (*gen) (rtx, rtx);
30402 if (acq)
30404 switch (mode)
30406 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30407 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30408 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30409 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30410 default:
30411 gcc_unreachable ();
30414 else
30416 switch (mode)
30418 case QImode: gen = gen_arm_load_exclusiveqi; break;
30419 case HImode: gen = gen_arm_load_exclusivehi; break;
30420 case SImode: gen = gen_arm_load_exclusivesi; break;
30421 case DImode: gen = gen_arm_load_exclusivedi; break;
30422 default:
30423 gcc_unreachable ();
30427 emit_insn (gen (rval, mem));
30430 static void
30431 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30432 rtx mem, bool rel)
30434 rtx (*gen) (rtx, rtx, rtx);
30436 if (rel)
30438 switch (mode)
30440 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30441 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30442 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30443 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30444 default:
30445 gcc_unreachable ();
30448 else
30450 switch (mode)
30452 case QImode: gen = gen_arm_store_exclusiveqi; break;
30453 case HImode: gen = gen_arm_store_exclusivehi; break;
30454 case SImode: gen = gen_arm_store_exclusivesi; break;
30455 case DImode: gen = gen_arm_store_exclusivedi; break;
30456 default:
30457 gcc_unreachable ();
30461 emit_insn (gen (bval, rval, mem));
30464 /* Mark the previous jump instruction as unlikely. */
30466 static void
30467 emit_unlikely_jump (rtx insn)
30469 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30471 insn = emit_jump_insn (insn);
30472 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30475 /* Expand a compare and swap pattern. */
30477 void
30478 arm_expand_compare_and_swap (rtx operands[])
30480 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30481 machine_mode mode;
30482 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30484 bval = operands[0];
30485 rval = operands[1];
30486 mem = operands[2];
30487 oldval = operands[3];
30488 newval = operands[4];
30489 is_weak = operands[5];
30490 mod_s = operands[6];
30491 mod_f = operands[7];
30492 mode = GET_MODE (mem);
30494 /* Normally the succ memory model must be stronger than fail, but in the
30495 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30496 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30498 if (TARGET_HAVE_LDACQ
30499 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30500 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30501 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30503 switch (mode)
30505 case QImode:
30506 case HImode:
30507 /* For narrow modes, we're going to perform the comparison in SImode,
30508 so do the zero-extension now. */
30509 rval = gen_reg_rtx (SImode);
30510 oldval = convert_modes (SImode, mode, oldval, true);
30511 /* FALLTHRU */
30513 case SImode:
30514 /* Force the value into a register if needed. We waited until after
30515 the zero-extension above to do this properly. */
30516 if (!arm_add_operand (oldval, SImode))
30517 oldval = force_reg (SImode, oldval);
30518 break;
30520 case DImode:
30521 if (!cmpdi_operand (oldval, mode))
30522 oldval = force_reg (mode, oldval);
30523 break;
30525 default:
30526 gcc_unreachable ();
30529 switch (mode)
30531 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30532 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30533 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30534 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30535 default:
30536 gcc_unreachable ();
30539 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30541 if (mode == QImode || mode == HImode)
30542 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30544 /* In all cases, we arrange for success to be signaled by Z set.
30545 This arrangement allows for the boolean result to be used directly
30546 in a subsequent branch, post optimization. */
30547 x = gen_rtx_REG (CCmode, CC_REGNUM);
30548 x = gen_rtx_EQ (SImode, x, const0_rtx);
30549 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30552 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30553 another memory store between the load-exclusive and store-exclusive can
30554 reset the monitor from Exclusive to Open state. This means we must wait
30555 until after reload to split the pattern, lest we get a register spill in
30556 the middle of the atomic sequence. */
30558 void
30559 arm_split_compare_and_swap (rtx operands[])
30561 rtx rval, mem, oldval, newval, scratch;
30562 machine_mode mode;
30563 enum memmodel mod_s, mod_f;
30564 bool is_weak;
30565 rtx_code_label *label1, *label2;
30566 rtx x, cond;
30568 rval = operands[0];
30569 mem = operands[1];
30570 oldval = operands[2];
30571 newval = operands[3];
30572 is_weak = (operands[4] != const0_rtx);
30573 mod_s = (enum memmodel) INTVAL (operands[5]);
30574 mod_f = (enum memmodel) INTVAL (operands[6]);
30575 scratch = operands[7];
30576 mode = GET_MODE (mem);
30578 bool use_acquire = TARGET_HAVE_LDACQ
30579 && !(mod_s == MEMMODEL_RELAXED
30580 || mod_s == MEMMODEL_CONSUME
30581 || mod_s == MEMMODEL_RELEASE);
30583 bool use_release = TARGET_HAVE_LDACQ
30584 && !(mod_s == MEMMODEL_RELAXED
30585 || mod_s == MEMMODEL_CONSUME
30586 || mod_s == MEMMODEL_ACQUIRE);
30588 /* Checks whether a barrier is needed and emits one accordingly. */
30589 if (!(use_acquire || use_release))
30590 arm_pre_atomic_barrier (mod_s);
30592 label1 = NULL;
30593 if (!is_weak)
30595 label1 = gen_label_rtx ();
30596 emit_label (label1);
30598 label2 = gen_label_rtx ();
30600 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30602 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30603 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30604 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30605 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30606 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30608 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30610 /* Weak or strong, we want EQ to be true for success, so that we
30611 match the flags that we got from the compare above. */
30612 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30613 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30614 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30616 if (!is_weak)
30618 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30619 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30620 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30621 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30624 if (mod_f != MEMMODEL_RELAXED)
30625 emit_label (label2);
30627 /* Checks whether a barrier is needed and emits one accordingly. */
30628 if (!(use_acquire || use_release))
30629 arm_post_atomic_barrier (mod_s);
30631 if (mod_f == MEMMODEL_RELAXED)
30632 emit_label (label2);
30635 void
30636 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30637 rtx value, rtx model_rtx, rtx cond)
30639 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30640 machine_mode mode = GET_MODE (mem);
30641 machine_mode wmode = (mode == DImode ? DImode : SImode);
30642 rtx_code_label *label;
30643 rtx x;
30645 bool use_acquire = TARGET_HAVE_LDACQ
30646 && !(model == MEMMODEL_RELAXED
30647 || model == MEMMODEL_CONSUME
30648 || model == MEMMODEL_RELEASE);
30650 bool use_release = TARGET_HAVE_LDACQ
30651 && !(model == MEMMODEL_RELAXED
30652 || model == MEMMODEL_CONSUME
30653 || model == MEMMODEL_ACQUIRE);
30655 /* Checks whether a barrier is needed and emits one accordingly. */
30656 if (!(use_acquire || use_release))
30657 arm_pre_atomic_barrier (model);
30659 label = gen_label_rtx ();
30660 emit_label (label);
30662 if (new_out)
30663 new_out = gen_lowpart (wmode, new_out);
30664 if (old_out)
30665 old_out = gen_lowpart (wmode, old_out);
30666 else
30667 old_out = new_out;
30668 value = simplify_gen_subreg (wmode, value, mode, 0);
30670 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30672 switch (code)
30674 case SET:
30675 new_out = value;
30676 break;
30678 case NOT:
30679 x = gen_rtx_AND (wmode, old_out, value);
30680 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30681 x = gen_rtx_NOT (wmode, new_out);
30682 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30683 break;
30685 case MINUS:
30686 if (CONST_INT_P (value))
30688 value = GEN_INT (-INTVAL (value));
30689 code = PLUS;
30691 /* FALLTHRU */
30693 case PLUS:
30694 if (mode == DImode)
30696 /* DImode plus/minus need to clobber flags. */
30697 /* The adddi3 and subdi3 patterns are incorrectly written so that
30698 they require matching operands, even when we could easily support
30699 three operands. Thankfully, this can be fixed up post-splitting,
30700 as the individual add+adc patterns do accept three operands and
30701 post-reload cprop can make these moves go away. */
30702 emit_move_insn (new_out, old_out);
30703 if (code == PLUS)
30704 x = gen_adddi3 (new_out, new_out, value);
30705 else
30706 x = gen_subdi3 (new_out, new_out, value);
30707 emit_insn (x);
30708 break;
30710 /* FALLTHRU */
30712 default:
30713 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30714 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30715 break;
30718 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30719 use_release);
30721 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30722 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30724 /* Checks whether a barrier is needed and emits one accordingly. */
30725 if (!(use_acquire || use_release))
30726 arm_post_atomic_barrier (model);
30729 #define MAX_VECT_LEN 16
30731 struct expand_vec_perm_d
30733 rtx target, op0, op1;
30734 unsigned char perm[MAX_VECT_LEN];
30735 machine_mode vmode;
30736 unsigned char nelt;
30737 bool one_vector_p;
30738 bool testing_p;
30741 /* Generate a variable permutation. */
30743 static void
30744 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30746 machine_mode vmode = GET_MODE (target);
30747 bool one_vector_p = rtx_equal_p (op0, op1);
30749 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30750 gcc_checking_assert (GET_MODE (op0) == vmode);
30751 gcc_checking_assert (GET_MODE (op1) == vmode);
30752 gcc_checking_assert (GET_MODE (sel) == vmode);
30753 gcc_checking_assert (TARGET_NEON);
30755 if (one_vector_p)
30757 if (vmode == V8QImode)
30758 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30759 else
30760 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30762 else
30764 rtx pair;
30766 if (vmode == V8QImode)
30768 pair = gen_reg_rtx (V16QImode);
30769 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30770 pair = gen_lowpart (TImode, pair);
30771 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30773 else
30775 pair = gen_reg_rtx (OImode);
30776 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30777 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30782 void
30783 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30785 machine_mode vmode = GET_MODE (target);
30786 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30787 bool one_vector_p = rtx_equal_p (op0, op1);
30788 rtx rmask[MAX_VECT_LEN], mask;
30790 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30791 numbering of elements for big-endian, we must reverse the order. */
30792 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30794 /* The VTBL instruction does not use a modulo index, so we must take care
30795 of that ourselves. */
30796 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30797 for (i = 0; i < nelt; ++i)
30798 rmask[i] = mask;
30799 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30800 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30802 arm_expand_vec_perm_1 (target, op0, op1, sel);
30805 /* Generate or test for an insn that supports a constant permutation. */
30807 /* Recognize patterns for the VUZP insns. */
30809 static bool
30810 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30812 unsigned int i, odd, mask, nelt = d->nelt;
30813 rtx out0, out1, in0, in1, x;
30814 rtx (*gen)(rtx, rtx, rtx, rtx);
30816 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30817 return false;
30819 /* Note that these are little-endian tests. Adjust for big-endian later. */
30820 if (d->perm[0] == 0)
30821 odd = 0;
30822 else if (d->perm[0] == 1)
30823 odd = 1;
30824 else
30825 return false;
30826 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30828 for (i = 0; i < nelt; i++)
30830 unsigned elt = (i * 2 + odd) & mask;
30831 if (d->perm[i] != elt)
30832 return false;
30835 /* Success! */
30836 if (d->testing_p)
30837 return true;
30839 switch (d->vmode)
30841 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30842 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30843 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30844 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30845 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30846 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30847 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30848 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30849 default:
30850 gcc_unreachable ();
30853 in0 = d->op0;
30854 in1 = d->op1;
30855 if (BYTES_BIG_ENDIAN)
30857 x = in0, in0 = in1, in1 = x;
30858 odd = !odd;
30861 out0 = d->target;
30862 out1 = gen_reg_rtx (d->vmode);
30863 if (odd)
30864 x = out0, out0 = out1, out1 = x;
30866 emit_insn (gen (out0, in0, in1, out1));
30867 return true;
30870 /* Recognize patterns for the VZIP insns. */
30872 static bool
30873 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30875 unsigned int i, high, mask, nelt = d->nelt;
30876 rtx out0, out1, in0, in1, x;
30877 rtx (*gen)(rtx, rtx, rtx, rtx);
30879 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30880 return false;
30882 /* Note that these are little-endian tests. Adjust for big-endian later. */
30883 high = nelt / 2;
30884 if (d->perm[0] == high)
30886 else if (d->perm[0] == 0)
30887 high = 0;
30888 else
30889 return false;
30890 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30892 for (i = 0; i < nelt / 2; i++)
30894 unsigned elt = (i + high) & mask;
30895 if (d->perm[i * 2] != elt)
30896 return false;
30897 elt = (elt + nelt) & mask;
30898 if (d->perm[i * 2 + 1] != elt)
30899 return false;
30902 /* Success! */
30903 if (d->testing_p)
30904 return true;
30906 switch (d->vmode)
30908 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30909 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30910 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30911 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30912 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30913 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30914 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30915 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30916 default:
30917 gcc_unreachable ();
30920 in0 = d->op0;
30921 in1 = d->op1;
30922 if (BYTES_BIG_ENDIAN)
30924 x = in0, in0 = in1, in1 = x;
30925 high = !high;
30928 out0 = d->target;
30929 out1 = gen_reg_rtx (d->vmode);
30930 if (high)
30931 x = out0, out0 = out1, out1 = x;
30933 emit_insn (gen (out0, in0, in1, out1));
30934 return true;
30937 /* Recognize patterns for the VREV insns. */
30939 static bool
30940 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30942 unsigned int i, j, diff, nelt = d->nelt;
30943 rtx (*gen)(rtx, rtx, rtx);
30945 if (!d->one_vector_p)
30946 return false;
30948 diff = d->perm[0];
30949 switch (diff)
30951 case 7:
30952 switch (d->vmode)
30954 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30955 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30956 default:
30957 return false;
30959 break;
30960 case 3:
30961 switch (d->vmode)
30963 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30964 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30965 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30966 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30967 default:
30968 return false;
30970 break;
30971 case 1:
30972 switch (d->vmode)
30974 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30975 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30976 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30977 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30978 case V4SImode: gen = gen_neon_vrev64v4si; break;
30979 case V2SImode: gen = gen_neon_vrev64v2si; break;
30980 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30981 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30982 default:
30983 return false;
30985 break;
30986 default:
30987 return false;
30990 for (i = 0; i < nelt ; i += diff + 1)
30991 for (j = 0; j <= diff; j += 1)
30993 /* This is guaranteed to be true as the value of diff
30994 is 7, 3, 1 and we should have enough elements in the
30995 queue to generate this. Getting a vector mask with a
30996 value of diff other than these values implies that
30997 something is wrong by the time we get here. */
30998 gcc_assert (i + j < nelt);
30999 if (d->perm[i + j] != i + diff - j)
31000 return false;
31003 /* Success! */
31004 if (d->testing_p)
31005 return true;
31007 /* ??? The third operand is an artifact of the builtin infrastructure
31008 and is ignored by the actual instruction. */
31009 emit_insn (gen (d->target, d->op0, const0_rtx));
31010 return true;
31013 /* Recognize patterns for the VTRN insns. */
31015 static bool
31016 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31018 unsigned int i, odd, mask, nelt = d->nelt;
31019 rtx out0, out1, in0, in1, x;
31020 rtx (*gen)(rtx, rtx, rtx, rtx);
31022 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31023 return false;
31025 /* Note that these are little-endian tests. Adjust for big-endian later. */
31026 if (d->perm[0] == 0)
31027 odd = 0;
31028 else if (d->perm[0] == 1)
31029 odd = 1;
31030 else
31031 return false;
31032 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31034 for (i = 0; i < nelt; i += 2)
31036 if (d->perm[i] != i + odd)
31037 return false;
31038 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31039 return false;
31042 /* Success! */
31043 if (d->testing_p)
31044 return true;
31046 switch (d->vmode)
31048 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31049 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
31050 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
31051 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
31052 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
31053 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
31054 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
31055 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
31056 default:
31057 gcc_unreachable ();
31060 in0 = d->op0;
31061 in1 = d->op1;
31062 if (BYTES_BIG_ENDIAN)
31064 x = in0, in0 = in1, in1 = x;
31065 odd = !odd;
31068 out0 = d->target;
31069 out1 = gen_reg_rtx (d->vmode);
31070 if (odd)
31071 x = out0, out0 = out1, out1 = x;
31073 emit_insn (gen (out0, in0, in1, out1));
31074 return true;
31077 /* Recognize patterns for the VEXT insns. */
31079 static bool
31080 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31082 unsigned int i, nelt = d->nelt;
31083 rtx (*gen) (rtx, rtx, rtx, rtx);
31084 rtx offset;
31086 unsigned int location;
31088 unsigned int next = d->perm[0] + 1;
31090 /* TODO: Handle GCC's numbering of elements for big-endian. */
31091 if (BYTES_BIG_ENDIAN)
31092 return false;
31094 /* Check if the extracted indexes are increasing by one. */
31095 for (i = 1; i < nelt; next++, i++)
31097 /* If we hit the most significant element of the 2nd vector in
31098 the previous iteration, no need to test further. */
31099 if (next == 2 * nelt)
31100 return false;
31102 /* If we are operating on only one vector: it could be a
31103 rotation. If there are only two elements of size < 64, let
31104 arm_evpc_neon_vrev catch it. */
31105 if (d->one_vector_p && (next == nelt))
31107 if ((nelt == 2) && (d->vmode != V2DImode))
31108 return false;
31109 else
31110 next = 0;
31113 if (d->perm[i] != next)
31114 return false;
31117 location = d->perm[0];
31119 switch (d->vmode)
31121 case V16QImode: gen = gen_neon_vextv16qi; break;
31122 case V8QImode: gen = gen_neon_vextv8qi; break;
31123 case V4HImode: gen = gen_neon_vextv4hi; break;
31124 case V8HImode: gen = gen_neon_vextv8hi; break;
31125 case V2SImode: gen = gen_neon_vextv2si; break;
31126 case V4SImode: gen = gen_neon_vextv4si; break;
31127 case V2SFmode: gen = gen_neon_vextv2sf; break;
31128 case V4SFmode: gen = gen_neon_vextv4sf; break;
31129 case V2DImode: gen = gen_neon_vextv2di; break;
31130 default:
31131 return false;
31134 /* Success! */
31135 if (d->testing_p)
31136 return true;
31138 offset = GEN_INT (location);
31139 emit_insn (gen (d->target, d->op0, d->op1, offset));
31140 return true;
31143 /* The NEON VTBL instruction is a fully variable permuation that's even
31144 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31145 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31146 can do slightly better by expanding this as a constant where we don't
31147 have to apply a mask. */
31149 static bool
31150 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31152 rtx rperm[MAX_VECT_LEN], sel;
31153 machine_mode vmode = d->vmode;
31154 unsigned int i, nelt = d->nelt;
31156 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31157 numbering of elements for big-endian, we must reverse the order. */
31158 if (BYTES_BIG_ENDIAN)
31159 return false;
31161 if (d->testing_p)
31162 return true;
31164 /* Generic code will try constant permutation twice. Once with the
31165 original mode and again with the elements lowered to QImode.
31166 So wait and don't do the selector expansion ourselves. */
31167 if (vmode != V8QImode && vmode != V16QImode)
31168 return false;
31170 for (i = 0; i < nelt; ++i)
31171 rperm[i] = GEN_INT (d->perm[i]);
31172 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31173 sel = force_reg (vmode, sel);
31175 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31176 return true;
31179 static bool
31180 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31182 /* Check if the input mask matches vext before reordering the
31183 operands. */
31184 if (TARGET_NEON)
31185 if (arm_evpc_neon_vext (d))
31186 return true;
31188 /* The pattern matching functions above are written to look for a small
31189 number to begin the sequence (0, 1, N/2). If we begin with an index
31190 from the second operand, we can swap the operands. */
31191 if (d->perm[0] >= d->nelt)
31193 unsigned i, nelt = d->nelt;
31194 rtx x;
31196 for (i = 0; i < nelt; ++i)
31197 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31199 x = d->op0;
31200 d->op0 = d->op1;
31201 d->op1 = x;
31204 if (TARGET_NEON)
31206 if (arm_evpc_neon_vuzp (d))
31207 return true;
31208 if (arm_evpc_neon_vzip (d))
31209 return true;
31210 if (arm_evpc_neon_vrev (d))
31211 return true;
31212 if (arm_evpc_neon_vtrn (d))
31213 return true;
31214 return arm_evpc_neon_vtbl (d);
31216 return false;
31219 /* Expand a vec_perm_const pattern. */
31221 bool
31222 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31224 struct expand_vec_perm_d d;
31225 int i, nelt, which;
31227 d.target = target;
31228 d.op0 = op0;
31229 d.op1 = op1;
31231 d.vmode = GET_MODE (target);
31232 gcc_assert (VECTOR_MODE_P (d.vmode));
31233 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31234 d.testing_p = false;
31236 for (i = which = 0; i < nelt; ++i)
31238 rtx e = XVECEXP (sel, 0, i);
31239 int ei = INTVAL (e) & (2 * nelt - 1);
31240 which |= (ei < nelt ? 1 : 2);
31241 d.perm[i] = ei;
31244 switch (which)
31246 default:
31247 gcc_unreachable();
31249 case 3:
31250 d.one_vector_p = false;
31251 if (!rtx_equal_p (op0, op1))
31252 break;
31254 /* The elements of PERM do not suggest that only the first operand
31255 is used, but both operands are identical. Allow easier matching
31256 of the permutation by folding the permutation into the single
31257 input vector. */
31258 /* FALLTHRU */
31259 case 2:
31260 for (i = 0; i < nelt; ++i)
31261 d.perm[i] &= nelt - 1;
31262 d.op0 = op1;
31263 d.one_vector_p = true;
31264 break;
31266 case 1:
31267 d.op1 = op0;
31268 d.one_vector_p = true;
31269 break;
31272 return arm_expand_vec_perm_const_1 (&d);
31275 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31277 static bool
31278 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31279 const unsigned char *sel)
31281 struct expand_vec_perm_d d;
31282 unsigned int i, nelt, which;
31283 bool ret;
31285 d.vmode = vmode;
31286 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31287 d.testing_p = true;
31288 memcpy (d.perm, sel, nelt);
31290 /* Categorize the set of elements in the selector. */
31291 for (i = which = 0; i < nelt; ++i)
31293 unsigned char e = d.perm[i];
31294 gcc_assert (e < 2 * nelt);
31295 which |= (e < nelt ? 1 : 2);
31298 /* For all elements from second vector, fold the elements to first. */
31299 if (which == 2)
31300 for (i = 0; i < nelt; ++i)
31301 d.perm[i] -= nelt;
31303 /* Check whether the mask can be applied to the vector type. */
31304 d.one_vector_p = (which != 3);
31306 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31307 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31308 if (!d.one_vector_p)
31309 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31311 start_sequence ();
31312 ret = arm_expand_vec_perm_const_1 (&d);
31313 end_sequence ();
31315 return ret;
31318 bool
31319 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31321 /* If we are soft float and we do not have ldrd
31322 then all auto increment forms are ok. */
31323 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31324 return true;
31326 switch (code)
31328 /* Post increment and Pre Decrement are supported for all
31329 instruction forms except for vector forms. */
31330 case ARM_POST_INC:
31331 case ARM_PRE_DEC:
31332 if (VECTOR_MODE_P (mode))
31334 if (code != ARM_PRE_DEC)
31335 return true;
31336 else
31337 return false;
31340 return true;
31342 case ARM_POST_DEC:
31343 case ARM_PRE_INC:
31344 /* Without LDRD and mode size greater than
31345 word size, there is no point in auto-incrementing
31346 because ldm and stm will not have these forms. */
31347 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31348 return false;
31350 /* Vector and floating point modes do not support
31351 these auto increment forms. */
31352 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31353 return false;
31355 return true;
31357 default:
31358 return false;
31362 return false;
31365 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31366 on ARM, since we know that shifts by negative amounts are no-ops.
31367 Additionally, the default expansion code is not available or suitable
31368 for post-reload insn splits (this can occur when the register allocator
31369 chooses not to do a shift in NEON).
31371 This function is used in both initial expand and post-reload splits, and
31372 handles all kinds of 64-bit shifts.
31374 Input requirements:
31375 - It is safe for the input and output to be the same register, but
31376 early-clobber rules apply for the shift amount and scratch registers.
31377 - Shift by register requires both scratch registers. In all other cases
31378 the scratch registers may be NULL.
31379 - Ashiftrt by a register also clobbers the CC register. */
31380 void
31381 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31382 rtx amount, rtx scratch1, rtx scratch2)
31384 rtx out_high = gen_highpart (SImode, out);
31385 rtx out_low = gen_lowpart (SImode, out);
31386 rtx in_high = gen_highpart (SImode, in);
31387 rtx in_low = gen_lowpart (SImode, in);
31389 /* Terminology:
31390 in = the register pair containing the input value.
31391 out = the destination register pair.
31392 up = the high- or low-part of each pair.
31393 down = the opposite part to "up".
31394 In a shift, we can consider bits to shift from "up"-stream to
31395 "down"-stream, so in a left-shift "up" is the low-part and "down"
31396 is the high-part of each register pair. */
31398 rtx out_up = code == ASHIFT ? out_low : out_high;
31399 rtx out_down = code == ASHIFT ? out_high : out_low;
31400 rtx in_up = code == ASHIFT ? in_low : in_high;
31401 rtx in_down = code == ASHIFT ? in_high : in_low;
31403 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31404 gcc_assert (out
31405 && (REG_P (out) || GET_CODE (out) == SUBREG)
31406 && GET_MODE (out) == DImode);
31407 gcc_assert (in
31408 && (REG_P (in) || GET_CODE (in) == SUBREG)
31409 && GET_MODE (in) == DImode);
31410 gcc_assert (amount
31411 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31412 && GET_MODE (amount) == SImode)
31413 || CONST_INT_P (amount)));
31414 gcc_assert (scratch1 == NULL
31415 || (GET_CODE (scratch1) == SCRATCH)
31416 || (GET_MODE (scratch1) == SImode
31417 && REG_P (scratch1)));
31418 gcc_assert (scratch2 == NULL
31419 || (GET_CODE (scratch2) == SCRATCH)
31420 || (GET_MODE (scratch2) == SImode
31421 && REG_P (scratch2)));
31422 gcc_assert (!REG_P (out) || !REG_P (amount)
31423 || !HARD_REGISTER_P (out)
31424 || (REGNO (out) != REGNO (amount)
31425 && REGNO (out) + 1 != REGNO (amount)));
31427 /* Macros to make following code more readable. */
31428 #define SUB_32(DEST,SRC) \
31429 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31430 #define RSB_32(DEST,SRC) \
31431 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31432 #define SUB_S_32(DEST,SRC) \
31433 gen_addsi3_compare0 ((DEST), (SRC), \
31434 GEN_INT (-32))
31435 #define SET(DEST,SRC) \
31436 gen_rtx_SET (SImode, (DEST), (SRC))
31437 #define SHIFT(CODE,SRC,AMOUNT) \
31438 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31439 #define LSHIFT(CODE,SRC,AMOUNT) \
31440 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31441 SImode, (SRC), (AMOUNT))
31442 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31443 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31444 SImode, (SRC), (AMOUNT))
31445 #define ORR(A,B) \
31446 gen_rtx_IOR (SImode, (A), (B))
31447 #define BRANCH(COND,LABEL) \
31448 gen_arm_cond_branch ((LABEL), \
31449 gen_rtx_ ## COND (CCmode, cc_reg, \
31450 const0_rtx), \
31451 cc_reg)
31453 /* Shifts by register and shifts by constant are handled separately. */
31454 if (CONST_INT_P (amount))
31456 /* We have a shift-by-constant. */
31458 /* First, handle out-of-range shift amounts.
31459 In both cases we try to match the result an ARM instruction in a
31460 shift-by-register would give. This helps reduce execution
31461 differences between optimization levels, but it won't stop other
31462 parts of the compiler doing different things. This is "undefined
31463 behaviour, in any case. */
31464 if (INTVAL (amount) <= 0)
31465 emit_insn (gen_movdi (out, in));
31466 else if (INTVAL (amount) >= 64)
31468 if (code == ASHIFTRT)
31470 rtx const31_rtx = GEN_INT (31);
31471 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31472 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31474 else
31475 emit_insn (gen_movdi (out, const0_rtx));
31478 /* Now handle valid shifts. */
31479 else if (INTVAL (amount) < 32)
31481 /* Shifts by a constant less than 32. */
31482 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31484 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31485 emit_insn (SET (out_down,
31486 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31487 out_down)));
31488 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31490 else
31492 /* Shifts by a constant greater than 31. */
31493 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31495 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31496 if (code == ASHIFTRT)
31497 emit_insn (gen_ashrsi3 (out_up, in_up,
31498 GEN_INT (31)));
31499 else
31500 emit_insn (SET (out_up, const0_rtx));
31503 else
31505 /* We have a shift-by-register. */
31506 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31508 /* This alternative requires the scratch registers. */
31509 gcc_assert (scratch1 && REG_P (scratch1));
31510 gcc_assert (scratch2 && REG_P (scratch2));
31512 /* We will need the values "amount-32" and "32-amount" later.
31513 Swapping them around now allows the later code to be more general. */
31514 switch (code)
31516 case ASHIFT:
31517 emit_insn (SUB_32 (scratch1, amount));
31518 emit_insn (RSB_32 (scratch2, amount));
31519 break;
31520 case ASHIFTRT:
31521 emit_insn (RSB_32 (scratch1, amount));
31522 /* Also set CC = amount > 32. */
31523 emit_insn (SUB_S_32 (scratch2, amount));
31524 break;
31525 case LSHIFTRT:
31526 emit_insn (RSB_32 (scratch1, amount));
31527 emit_insn (SUB_32 (scratch2, amount));
31528 break;
31529 default:
31530 gcc_unreachable ();
31533 /* Emit code like this:
31535 arithmetic-left:
31536 out_down = in_down << amount;
31537 out_down = (in_up << (amount - 32)) | out_down;
31538 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31539 out_up = in_up << amount;
31541 arithmetic-right:
31542 out_down = in_down >> amount;
31543 out_down = (in_up << (32 - amount)) | out_down;
31544 if (amount < 32)
31545 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31546 out_up = in_up << amount;
31548 logical-right:
31549 out_down = in_down >> amount;
31550 out_down = (in_up << (32 - amount)) | out_down;
31551 if (amount < 32)
31552 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31553 out_up = in_up << amount;
31555 The ARM and Thumb2 variants are the same but implemented slightly
31556 differently. If this were only called during expand we could just
31557 use the Thumb2 case and let combine do the right thing, but this
31558 can also be called from post-reload splitters. */
31560 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31562 if (!TARGET_THUMB2)
31564 /* Emit code for ARM mode. */
31565 emit_insn (SET (out_down,
31566 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31567 if (code == ASHIFTRT)
31569 rtx_code_label *done_label = gen_label_rtx ();
31570 emit_jump_insn (BRANCH (LT, done_label));
31571 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31572 out_down)));
31573 emit_label (done_label);
31575 else
31576 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31577 out_down)));
31579 else
31581 /* Emit code for Thumb2 mode.
31582 Thumb2 can't do shift and or in one insn. */
31583 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31584 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31586 if (code == ASHIFTRT)
31588 rtx_code_label *done_label = gen_label_rtx ();
31589 emit_jump_insn (BRANCH (LT, done_label));
31590 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31591 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31592 emit_label (done_label);
31594 else
31596 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31597 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31601 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31604 #undef SUB_32
31605 #undef RSB_32
31606 #undef SUB_S_32
31607 #undef SET
31608 #undef SHIFT
31609 #undef LSHIFT
31610 #undef REV_LSHIFT
31611 #undef ORR
31612 #undef BRANCH
31616 /* Returns true if a valid comparison operation and makes
31617 the operands in a form that is valid. */
31618 bool
31619 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31621 enum rtx_code code = GET_CODE (*comparison);
31622 int code_int;
31623 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31624 ? GET_MODE (*op2) : GET_MODE (*op1);
31626 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31628 if (code == UNEQ || code == LTGT)
31629 return false;
31631 code_int = (int)code;
31632 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31633 PUT_CODE (*comparison, (enum rtx_code)code_int);
31635 switch (mode)
31637 case SImode:
31638 if (!arm_add_operand (*op1, mode))
31639 *op1 = force_reg (mode, *op1);
31640 if (!arm_add_operand (*op2, mode))
31641 *op2 = force_reg (mode, *op2);
31642 return true;
31644 case DImode:
31645 if (!cmpdi_operand (*op1, mode))
31646 *op1 = force_reg (mode, *op1);
31647 if (!cmpdi_operand (*op2, mode))
31648 *op2 = force_reg (mode, *op2);
31649 return true;
31651 case SFmode:
31652 case DFmode:
31653 if (!arm_float_compare_operand (*op1, mode))
31654 *op1 = force_reg (mode, *op1);
31655 if (!arm_float_compare_operand (*op2, mode))
31656 *op2 = force_reg (mode, *op2);
31657 return true;
31658 default:
31659 break;
31662 return false;
31666 /* Maximum number of instructions to set block of memory. */
31667 static int
31668 arm_block_set_max_insns (void)
31670 if (optimize_function_for_size_p (cfun))
31671 return 4;
31672 else
31673 return current_tune->max_insns_inline_memset;
31676 /* Return TRUE if it's profitable to set block of memory for
31677 non-vectorized case. VAL is the value to set the memory
31678 with. LENGTH is the number of bytes to set. ALIGN is the
31679 alignment of the destination memory in bytes. UNALIGNED_P
31680 is TRUE if we can only set the memory with instructions
31681 meeting alignment requirements. USE_STRD_P is TRUE if we
31682 can use strd to set the memory. */
31683 static bool
31684 arm_block_set_non_vect_profit_p (rtx val,
31685 unsigned HOST_WIDE_INT length,
31686 unsigned HOST_WIDE_INT align,
31687 bool unaligned_p, bool use_strd_p)
31689 int num = 0;
31690 /* For leftovers in bytes of 0-7, we can set the memory block using
31691 strb/strh/str with minimum instruction number. */
31692 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31694 if (unaligned_p)
31696 num = arm_const_inline_cost (SET, val);
31697 num += length / align + length % align;
31699 else if (use_strd_p)
31701 num = arm_const_double_inline_cost (val);
31702 num += (length >> 3) + leftover[length & 7];
31704 else
31706 num = arm_const_inline_cost (SET, val);
31707 num += (length >> 2) + leftover[length & 3];
31710 /* We may be able to combine last pair STRH/STRB into a single STR
31711 by shifting one byte back. */
31712 if (unaligned_access && length > 3 && (length & 3) == 3)
31713 num--;
31715 return (num <= arm_block_set_max_insns ());
31718 /* Return TRUE if it's profitable to set block of memory for
31719 vectorized case. LENGTH is the number of bytes to set.
31720 ALIGN is the alignment of destination memory in bytes.
31721 MODE is the vector mode used to set the memory. */
31722 static bool
31723 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31724 unsigned HOST_WIDE_INT align,
31725 machine_mode mode)
31727 int num;
31728 bool unaligned_p = ((align & 3) != 0);
31729 unsigned int nelt = GET_MODE_NUNITS (mode);
31731 /* Instruction loading constant value. */
31732 num = 1;
31733 /* Instructions storing the memory. */
31734 num += (length + nelt - 1) / nelt;
31735 /* Instructions adjusting the address expression. Only need to
31736 adjust address expression if it's 4 bytes aligned and bytes
31737 leftover can only be stored by mis-aligned store instruction. */
31738 if (!unaligned_p && (length & 3) != 0)
31739 num++;
31741 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31742 if (!unaligned_p && mode == V16QImode)
31743 num--;
31745 return (num <= arm_block_set_max_insns ());
31748 /* Set a block of memory using vectorization instructions for the
31749 unaligned case. We fill the first LENGTH bytes of the memory
31750 area starting from DSTBASE with byte constant VALUE. ALIGN is
31751 the alignment requirement of memory. Return TRUE if succeeded. */
31752 static bool
31753 arm_block_set_unaligned_vect (rtx dstbase,
31754 unsigned HOST_WIDE_INT length,
31755 unsigned HOST_WIDE_INT value,
31756 unsigned HOST_WIDE_INT align)
31758 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31759 rtx dst, mem;
31760 rtx val_elt, val_vec, reg;
31761 rtx rval[MAX_VECT_LEN];
31762 rtx (*gen_func) (rtx, rtx);
31763 machine_mode mode;
31764 unsigned HOST_WIDE_INT v = value;
31766 gcc_assert ((align & 0x3) != 0);
31767 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31768 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31769 if (length >= nelt_v16)
31771 mode = V16QImode;
31772 gen_func = gen_movmisalignv16qi;
31774 else
31776 mode = V8QImode;
31777 gen_func = gen_movmisalignv8qi;
31779 nelt_mode = GET_MODE_NUNITS (mode);
31780 gcc_assert (length >= nelt_mode);
31781 /* Skip if it isn't profitable. */
31782 if (!arm_block_set_vect_profit_p (length, align, mode))
31783 return false;
31785 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31786 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31788 v = sext_hwi (v, BITS_PER_WORD);
31789 val_elt = GEN_INT (v);
31790 for (j = 0; j < nelt_mode; j++)
31791 rval[j] = val_elt;
31793 reg = gen_reg_rtx (mode);
31794 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31795 /* Emit instruction loading the constant value. */
31796 emit_move_insn (reg, val_vec);
31798 /* Handle nelt_mode bytes in a vector. */
31799 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31801 emit_insn ((*gen_func) (mem, reg));
31802 if (i + 2 * nelt_mode <= length)
31803 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31806 /* If there are not less than nelt_v8 bytes leftover, we must be in
31807 V16QI mode. */
31808 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31810 /* Handle (8, 16) bytes leftover. */
31811 if (i + nelt_v8 < length)
31813 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31814 /* We are shifting bytes back, set the alignment accordingly. */
31815 if ((length & 1) != 0 && align >= 2)
31816 set_mem_align (mem, BITS_PER_UNIT);
31818 emit_insn (gen_movmisalignv16qi (mem, reg));
31820 /* Handle (0, 8] bytes leftover. */
31821 else if (i < length && i + nelt_v8 >= length)
31823 if (mode == V16QImode)
31825 reg = gen_lowpart (V8QImode, reg);
31826 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31828 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31829 + (nelt_mode - nelt_v8))));
31830 /* We are shifting bytes back, set the alignment accordingly. */
31831 if ((length & 1) != 0 && align >= 2)
31832 set_mem_align (mem, BITS_PER_UNIT);
31834 emit_insn (gen_movmisalignv8qi (mem, reg));
31837 return true;
31840 /* Set a block of memory using vectorization instructions for the
31841 aligned case. We fill the first LENGTH bytes of the memory area
31842 starting from DSTBASE with byte constant VALUE. ALIGN is the
31843 alignment requirement of memory. Return TRUE if succeeded. */
31844 static bool
31845 arm_block_set_aligned_vect (rtx dstbase,
31846 unsigned HOST_WIDE_INT length,
31847 unsigned HOST_WIDE_INT value,
31848 unsigned HOST_WIDE_INT align)
31850 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31851 rtx dst, addr, mem;
31852 rtx val_elt, val_vec, reg;
31853 rtx rval[MAX_VECT_LEN];
31854 machine_mode mode;
31855 unsigned HOST_WIDE_INT v = value;
31857 gcc_assert ((align & 0x3) == 0);
31858 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31859 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31860 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31861 mode = V16QImode;
31862 else
31863 mode = V8QImode;
31865 nelt_mode = GET_MODE_NUNITS (mode);
31866 gcc_assert (length >= nelt_mode);
31867 /* Skip if it isn't profitable. */
31868 if (!arm_block_set_vect_profit_p (length, align, mode))
31869 return false;
31871 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31873 v = sext_hwi (v, BITS_PER_WORD);
31874 val_elt = GEN_INT (v);
31875 for (j = 0; j < nelt_mode; j++)
31876 rval[j] = val_elt;
31878 reg = gen_reg_rtx (mode);
31879 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31880 /* Emit instruction loading the constant value. */
31881 emit_move_insn (reg, val_vec);
31883 i = 0;
31884 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31885 if (mode == V16QImode)
31887 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31888 emit_insn (gen_movmisalignv16qi (mem, reg));
31889 i += nelt_mode;
31890 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31891 if (i + nelt_v8 < length && i + nelt_v16 > length)
31893 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31894 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31895 /* We are shifting bytes back, set the alignment accordingly. */
31896 if ((length & 0x3) == 0)
31897 set_mem_align (mem, BITS_PER_UNIT * 4);
31898 else if ((length & 0x1) == 0)
31899 set_mem_align (mem, BITS_PER_UNIT * 2);
31900 else
31901 set_mem_align (mem, BITS_PER_UNIT);
31903 emit_insn (gen_movmisalignv16qi (mem, reg));
31904 return true;
31906 /* Fall through for bytes leftover. */
31907 mode = V8QImode;
31908 nelt_mode = GET_MODE_NUNITS (mode);
31909 reg = gen_lowpart (V8QImode, reg);
31912 /* Handle 8 bytes in a vector. */
31913 for (; (i + nelt_mode <= length); i += nelt_mode)
31915 addr = plus_constant (Pmode, dst, i);
31916 mem = adjust_automodify_address (dstbase, mode, addr, i);
31917 emit_move_insn (mem, reg);
31920 /* Handle single word leftover by shifting 4 bytes back. We can
31921 use aligned access for this case. */
31922 if (i + UNITS_PER_WORD == length)
31924 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31925 mem = adjust_automodify_address (dstbase, mode,
31926 addr, i - UNITS_PER_WORD);
31927 /* We are shifting 4 bytes back, set the alignment accordingly. */
31928 if (align > UNITS_PER_WORD)
31929 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31931 emit_move_insn (mem, reg);
31933 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31934 We have to use unaligned access for this case. */
31935 else if (i < length)
31937 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31938 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31939 /* We are shifting bytes back, set the alignment accordingly. */
31940 if ((length & 1) == 0)
31941 set_mem_align (mem, BITS_PER_UNIT * 2);
31942 else
31943 set_mem_align (mem, BITS_PER_UNIT);
31945 emit_insn (gen_movmisalignv8qi (mem, reg));
31948 return true;
31951 /* Set a block of memory using plain strh/strb instructions, only
31952 using instructions allowed by ALIGN on processor. We fill the
31953 first LENGTH bytes of the memory area starting from DSTBASE
31954 with byte constant VALUE. ALIGN is the alignment requirement
31955 of memory. */
31956 static bool
31957 arm_block_set_unaligned_non_vect (rtx dstbase,
31958 unsigned HOST_WIDE_INT length,
31959 unsigned HOST_WIDE_INT value,
31960 unsigned HOST_WIDE_INT align)
31962 unsigned int i;
31963 rtx dst, addr, mem;
31964 rtx val_exp, val_reg, reg;
31965 machine_mode mode;
31966 HOST_WIDE_INT v = value;
31968 gcc_assert (align == 1 || align == 2);
31970 if (align == 2)
31971 v |= (value << BITS_PER_UNIT);
31973 v = sext_hwi (v, BITS_PER_WORD);
31974 val_exp = GEN_INT (v);
31975 /* Skip if it isn't profitable. */
31976 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31977 align, true, false))
31978 return false;
31980 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31981 mode = (align == 2 ? HImode : QImode);
31982 val_reg = force_reg (SImode, val_exp);
31983 reg = gen_lowpart (mode, val_reg);
31985 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
31987 addr = plus_constant (Pmode, dst, i);
31988 mem = adjust_automodify_address (dstbase, mode, addr, i);
31989 emit_move_insn (mem, reg);
31992 /* Handle single byte leftover. */
31993 if (i + 1 == length)
31995 reg = gen_lowpart (QImode, val_reg);
31996 addr = plus_constant (Pmode, dst, i);
31997 mem = adjust_automodify_address (dstbase, QImode, addr, i);
31998 emit_move_insn (mem, reg);
31999 i++;
32002 gcc_assert (i == length);
32003 return true;
32006 /* Set a block of memory using plain strd/str/strh/strb instructions,
32007 to permit unaligned copies on processors which support unaligned
32008 semantics for those instructions. We fill the first LENGTH bytes
32009 of the memory area starting from DSTBASE with byte constant VALUE.
32010 ALIGN is the alignment requirement of memory. */
32011 static bool
32012 arm_block_set_aligned_non_vect (rtx dstbase,
32013 unsigned HOST_WIDE_INT length,
32014 unsigned HOST_WIDE_INT value,
32015 unsigned HOST_WIDE_INT align)
32017 unsigned int i;
32018 rtx dst, addr, mem;
32019 rtx val_exp, val_reg, reg;
32020 unsigned HOST_WIDE_INT v;
32021 bool use_strd_p;
32023 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32024 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32026 v = (value | (value << 8) | (value << 16) | (value << 24));
32027 if (length < UNITS_PER_WORD)
32028 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32030 if (use_strd_p)
32031 v |= (v << BITS_PER_WORD);
32032 else
32033 v = sext_hwi (v, BITS_PER_WORD);
32035 val_exp = GEN_INT (v);
32036 /* Skip if it isn't profitable. */
32037 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32038 align, false, use_strd_p))
32040 if (!use_strd_p)
32041 return false;
32043 /* Try without strd. */
32044 v = (v >> BITS_PER_WORD);
32045 v = sext_hwi (v, BITS_PER_WORD);
32046 val_exp = GEN_INT (v);
32047 use_strd_p = false;
32048 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32049 align, false, use_strd_p))
32050 return false;
32053 i = 0;
32054 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32055 /* Handle double words using strd if possible. */
32056 if (use_strd_p)
32058 val_reg = force_reg (DImode, val_exp);
32059 reg = val_reg;
32060 for (; (i + 8 <= length); i += 8)
32062 addr = plus_constant (Pmode, dst, i);
32063 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32064 emit_move_insn (mem, reg);
32067 else
32068 val_reg = force_reg (SImode, val_exp);
32070 /* Handle words. */
32071 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32072 for (; (i + 4 <= length); i += 4)
32074 addr = plus_constant (Pmode, dst, i);
32075 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32076 if ((align & 3) == 0)
32077 emit_move_insn (mem, reg);
32078 else
32079 emit_insn (gen_unaligned_storesi (mem, reg));
32082 /* Merge last pair of STRH and STRB into a STR if possible. */
32083 if (unaligned_access && i > 0 && (i + 3) == length)
32085 addr = plus_constant (Pmode, dst, i - 1);
32086 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32087 /* We are shifting one byte back, set the alignment accordingly. */
32088 if ((align & 1) == 0)
32089 set_mem_align (mem, BITS_PER_UNIT);
32091 /* Most likely this is an unaligned access, and we can't tell at
32092 compilation time. */
32093 emit_insn (gen_unaligned_storesi (mem, reg));
32094 return true;
32097 /* Handle half word leftover. */
32098 if (i + 2 <= length)
32100 reg = gen_lowpart (HImode, val_reg);
32101 addr = plus_constant (Pmode, dst, i);
32102 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32103 if ((align & 1) == 0)
32104 emit_move_insn (mem, reg);
32105 else
32106 emit_insn (gen_unaligned_storehi (mem, reg));
32108 i += 2;
32111 /* Handle single byte leftover. */
32112 if (i + 1 == length)
32114 reg = gen_lowpart (QImode, val_reg);
32115 addr = plus_constant (Pmode, dst, i);
32116 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32117 emit_move_insn (mem, reg);
32120 return true;
32123 /* Set a block of memory using vectorization instructions for both
32124 aligned and unaligned cases. We fill the first LENGTH bytes of
32125 the memory area starting from DSTBASE with byte constant VALUE.
32126 ALIGN is the alignment requirement of memory. */
32127 static bool
32128 arm_block_set_vect (rtx dstbase,
32129 unsigned HOST_WIDE_INT length,
32130 unsigned HOST_WIDE_INT value,
32131 unsigned HOST_WIDE_INT align)
32133 /* Check whether we need to use unaligned store instruction. */
32134 if (((align & 3) != 0 || (length & 3) != 0)
32135 /* Check whether unaligned store instruction is available. */
32136 && (!unaligned_access || BYTES_BIG_ENDIAN))
32137 return false;
32139 if ((align & 3) == 0)
32140 return arm_block_set_aligned_vect (dstbase, length, value, align);
32141 else
32142 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32145 /* Expand string store operation. Firstly we try to do that by using
32146 vectorization instructions, then try with ARM unaligned access and
32147 double-word store if profitable. OPERANDS[0] is the destination,
32148 OPERANDS[1] is the number of bytes, operands[2] is the value to
32149 initialize the memory, OPERANDS[3] is the known alignment of the
32150 destination. */
32151 bool
32152 arm_gen_setmem (rtx *operands)
32154 rtx dstbase = operands[0];
32155 unsigned HOST_WIDE_INT length;
32156 unsigned HOST_WIDE_INT value;
32157 unsigned HOST_WIDE_INT align;
32159 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32160 return false;
32162 length = UINTVAL (operands[1]);
32163 if (length > 64)
32164 return false;
32166 value = (UINTVAL (operands[2]) & 0xFF);
32167 align = UINTVAL (operands[3]);
32168 if (TARGET_NEON && length >= 8
32169 && current_tune->string_ops_prefer_neon
32170 && arm_block_set_vect (dstbase, length, value, align))
32171 return true;
32173 if (!unaligned_access && (align & 3) != 0)
32174 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32176 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32179 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32181 static unsigned HOST_WIDE_INT
32182 arm_asan_shadow_offset (void)
32184 return (unsigned HOST_WIDE_INT) 1 << 29;
32188 /* This is a temporary fix for PR60655. Ideally we need
32189 to handle most of these cases in the generic part but
32190 currently we reject minus (..) (sym_ref). We try to
32191 ameliorate the case with minus (sym_ref1) (sym_ref2)
32192 where they are in the same section. */
32194 static bool
32195 arm_const_not_ok_for_debug_p (rtx p)
32197 tree decl_op0 = NULL;
32198 tree decl_op1 = NULL;
32200 if (GET_CODE (p) == MINUS)
32202 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32204 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32205 if (decl_op1
32206 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32207 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32209 if ((TREE_CODE (decl_op1) == VAR_DECL
32210 || TREE_CODE (decl_op1) == CONST_DECL)
32211 && (TREE_CODE (decl_op0) == VAR_DECL
32212 || TREE_CODE (decl_op0) == CONST_DECL))
32213 return (get_variable_section (decl_op1, false)
32214 != get_variable_section (decl_op0, false));
32216 if (TREE_CODE (decl_op1) == LABEL_DECL
32217 && TREE_CODE (decl_op0) == LABEL_DECL)
32218 return (DECL_CONTEXT (decl_op1)
32219 != DECL_CONTEXT (decl_op0));
32222 return true;
32226 return false;
32229 static void
32230 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32232 const unsigned ARM_FE_INVALID = 1;
32233 const unsigned ARM_FE_DIVBYZERO = 2;
32234 const unsigned ARM_FE_OVERFLOW = 4;
32235 const unsigned ARM_FE_UNDERFLOW = 8;
32236 const unsigned ARM_FE_INEXACT = 16;
32237 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32238 | ARM_FE_DIVBYZERO
32239 | ARM_FE_OVERFLOW
32240 | ARM_FE_UNDERFLOW
32241 | ARM_FE_INEXACT);
32242 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32243 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32244 tree new_fenv_var, reload_fenv, restore_fnenv;
32245 tree update_call, atomic_feraiseexcept, hold_fnclex;
32247 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32248 return;
32250 /* Generate the equivalent of :
32251 unsigned int fenv_var;
32252 fenv_var = __builtin_arm_get_fpscr ();
32254 unsigned int masked_fenv;
32255 masked_fenv = fenv_var & mask;
32257 __builtin_arm_set_fpscr (masked_fenv); */
32259 fenv_var = create_tmp_var (unsigned_type_node, NULL);
32260 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32261 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32262 mask = build_int_cst (unsigned_type_node,
32263 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32264 | ARM_FE_ALL_EXCEPT));
32265 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32266 fenv_var, build_call_expr (get_fpscr, 0));
32267 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32268 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32269 *hold = build2 (COMPOUND_EXPR, void_type_node,
32270 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32271 hold_fnclex);
32273 /* Store the value of masked_fenv to clear the exceptions:
32274 __builtin_arm_set_fpscr (masked_fenv); */
32276 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32278 /* Generate the equivalent of :
32279 unsigned int new_fenv_var;
32280 new_fenv_var = __builtin_arm_get_fpscr ();
32282 __builtin_arm_set_fpscr (fenv_var);
32284 __atomic_feraiseexcept (new_fenv_var); */
32286 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32287 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32288 build_call_expr (get_fpscr, 0));
32289 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32290 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32291 update_call = build_call_expr (atomic_feraiseexcept, 1,
32292 fold_convert (integer_type_node, new_fenv_var));
32293 *update = build2 (COMPOUND_EXPR, void_type_node,
32294 build2 (COMPOUND_EXPR, void_type_node,
32295 reload_fenv, restore_fnenv), update_call);
32298 /* return TRUE if x is a reference to a value in a constant pool */
32299 extern bool
32300 arm_is_constant_pool_ref (rtx x)
32302 return (MEM_P (x)
32303 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32304 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32307 #include "gt-arm.h"