arm.c (neon_vector_mem_operand): Allow register POST_MODIFY for neon loads and stores.
[official-gcc.git] / gcc / config / arm / arm.c
blobffe8e2105ab9b200b9553e38a9f6003f4cbbd12b
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "sched-int.h"
54 #include "target-def.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "df.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "gimple-expr.h"
64 #include "builtins.h"
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode;
68 typedef struct minipool_fixup Mfix;
70 void (*arm_lang_output_object_attributes_hook)(void);
72 struct four_ints
74 int i[4];
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets *arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
85 HOST_WIDE_INT, rtx, rtx, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx, int);
88 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
89 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
90 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
91 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
92 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
93 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
94 inline static int thumb1_index_register_rtx_p (rtx, int);
95 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
96 static int thumb_far_jump_used_p (void);
97 static bool thumb_force_lr_save (void);
98 static unsigned arm_size_return_regs (void);
99 static bool arm_assemble_integer (rtx, unsigned int, int);
100 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
101 static void arm_print_operand (FILE *, rtx, int);
102 static void arm_print_operand_address (FILE *, rtx);
103 static bool arm_print_operand_punct_valid_p (unsigned char code);
104 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
105 static arm_cc get_arm_condition_code (rtx);
106 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
107 static const char *output_multi_immediate (rtx *, const char *, const char *,
108 int, HOST_WIDE_INT);
109 static const char *shift_op (rtx, HOST_WIDE_INT *);
110 static struct machine_function *arm_init_machine_status (void);
111 static void thumb_exit (FILE *, int);
112 static HOST_WIDE_INT get_jump_table_size (rtx);
113 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
114 static Mnode *add_minipool_forward_ref (Mfix *);
115 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
116 static Mnode *add_minipool_backward_ref (Mfix *);
117 static void assign_minipool_offsets (Mfix *);
118 static void arm_print_value (FILE *, rtx);
119 static void dump_minipool (rtx);
120 static int arm_barrier_cost (rtx);
121 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
122 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
123 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
124 rtx);
125 static void arm_reorg (void);
126 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
127 static unsigned long arm_compute_save_reg0_reg12_mask (void);
128 static unsigned long arm_compute_save_reg_mask (void);
129 static unsigned long arm_isr_value (tree);
130 static unsigned long arm_compute_func_type (void);
131 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
135 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
136 #endif
137 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
138 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
139 static int arm_comp_type_attributes (const_tree, const_tree);
140 static void arm_set_default_type_attributes (tree);
141 static int arm_adjust_cost (rtx, rtx, rtx, int);
142 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
143 static int optimal_immediate_sequence (enum rtx_code code,
144 unsigned HOST_WIDE_INT val,
145 struct four_ints *return_sequence);
146 static int optimal_immediate_sequence_1 (enum rtx_code code,
147 unsigned HOST_WIDE_INT val,
148 struct four_ints *return_sequence,
149 int i);
150 static int arm_get_strip_length (int);
151 static bool arm_function_ok_for_sibcall (tree, tree);
152 static enum machine_mode arm_promote_function_mode (const_tree,
153 enum machine_mode, int *,
154 const_tree, int);
155 static bool arm_return_in_memory (const_tree, const_tree);
156 static rtx arm_function_value (const_tree, const_tree, bool);
157 static rtx arm_libcall_value_1 (enum machine_mode);
158 static rtx arm_libcall_value (enum machine_mode, const_rtx);
159 static bool arm_function_value_regno_p (const unsigned int);
160 static void arm_internal_label (FILE *, const char *, unsigned long);
161 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
162 tree);
163 static bool arm_have_conditional_execution (void);
164 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
165 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
166 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
167 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
168 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
173 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
174 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
175 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
176 static void arm_init_builtins (void);
177 static void arm_init_iwmmxt_builtins (void);
178 static rtx safe_vector_operand (rtx, enum machine_mode);
179 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
180 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
181 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
182 static tree arm_builtin_decl (unsigned, bool);
183 static void emit_constant_insn (rtx cond, rtx pattern);
184 static rtx emit_set_insn (rtx, rtx);
185 static rtx emit_multi_reg_push (unsigned long, unsigned long);
186 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
187 tree, bool);
188 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
189 const_tree, bool);
190 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
191 const_tree, bool);
192 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
193 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
194 const_tree);
195 static rtx aapcs_libcall_value (enum machine_mode);
196 static int aapcs_select_return_coproc (const_tree, const_tree);
198 #ifdef OBJECT_FORMAT_ELF
199 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
200 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
201 #endif
202 #ifndef ARM_PE
203 static void arm_encode_section_info (tree, rtx, int);
204 #endif
206 static void arm_file_end (void);
207 static void arm_file_start (void);
209 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
210 tree, int *, int);
211 static bool arm_pass_by_reference (cumulative_args_t,
212 enum machine_mode, const_tree, bool);
213 static bool arm_promote_prototypes (const_tree);
214 static bool arm_default_short_enums (void);
215 static bool arm_align_anon_bitfield (void);
216 static bool arm_return_in_msb (const_tree);
217 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
218 static bool arm_return_in_memory (const_tree, const_tree);
219 #if ARM_UNWIND_INFO
220 static void arm_unwind_emit (FILE *, rtx);
221 static bool arm_output_ttype (rtx);
222 static void arm_asm_emit_except_personality (rtx);
223 static void arm_asm_init_sections (void);
224 #endif
225 static rtx arm_dwarf_register_span (rtx);
227 static tree arm_cxx_guard_type (void);
228 static bool arm_cxx_guard_mask_bit (void);
229 static tree arm_get_cookie_size (tree);
230 static bool arm_cookie_has_size (void);
231 static bool arm_cxx_cdtor_returns_this (void);
232 static bool arm_cxx_key_method_may_be_inline (void);
233 static void arm_cxx_determine_class_data_visibility (tree);
234 static bool arm_cxx_class_data_always_comdat (void);
235 static bool arm_cxx_use_aeabi_atexit (void);
236 static void arm_init_libfuncs (void);
237 static tree arm_build_builtin_va_list (void);
238 static void arm_expand_builtin_va_start (tree, rtx);
239 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
240 static void arm_option_override (void);
241 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
242 static bool arm_cannot_copy_insn_p (rtx);
243 static int arm_issue_rate (void);
244 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
245 static bool arm_output_addr_const_extra (FILE *, rtx);
246 static bool arm_allocate_stack_slots_for_args (void);
247 static bool arm_warn_func_return (tree);
248 static const char *arm_invalid_parameter_type (const_tree t);
249 static const char *arm_invalid_return_type (const_tree t);
250 static tree arm_promoted_type (const_tree t);
251 static tree arm_convert_to_type (tree type, tree expr);
252 static bool arm_scalar_mode_supported_p (enum machine_mode);
253 static bool arm_frame_pointer_required (void);
254 static bool arm_can_eliminate (const int, const int);
255 static void arm_asm_trampoline_template (FILE *);
256 static void arm_trampoline_init (rtx, tree, rtx);
257 static rtx arm_trampoline_adjust_address (rtx);
258 static rtx arm_pic_static_addr (rtx orig, rtx reg);
259 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
260 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
261 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
262 static bool arm_array_mode_supported_p (enum machine_mode,
263 unsigned HOST_WIDE_INT);
264 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
265 static bool arm_class_likely_spilled_p (reg_class_t);
266 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
267 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
268 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
269 const_tree type,
270 int misalignment,
271 bool is_packed);
272 static void arm_conditional_register_usage (void);
273 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
274 static unsigned int arm_autovectorize_vector_sizes (void);
275 static int arm_default_branch_cost (bool, bool);
276 static int arm_cortex_a5_branch_cost (bool, bool);
277 static int arm_cortex_m_branch_cost (bool, bool);
279 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
280 const unsigned char *sel);
282 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
283 tree vectype,
284 int misalign ATTRIBUTE_UNUSED);
285 static unsigned arm_add_stmt_cost (void *data, int count,
286 enum vect_cost_for_stmt kind,
287 struct _stmt_vec_info *stmt_info,
288 int misalign,
289 enum vect_cost_model_location where);
291 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
292 bool op0_preserve_value);
293 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
295 /* Table of machine attributes. */
296 static const struct attribute_spec arm_attribute_table[] =
298 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
299 affects_type_identity } */
300 /* Function calls made to this symbol must be done indirectly, because
301 it may lie outside of the 26 bit addressing range of a normal function
302 call. */
303 { "long_call", 0, 0, false, true, true, NULL, false },
304 /* Whereas these functions are always known to reside within the 26 bit
305 addressing range. */
306 { "short_call", 0, 0, false, true, true, NULL, false },
307 /* Specify the procedure call conventions for a function. */
308 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
309 false },
310 /* Interrupt Service Routines have special prologue and epilogue requirements. */
311 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
312 false },
313 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
314 false },
315 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
316 false },
317 #ifdef ARM_PE
318 /* ARM/PE has three new attributes:
319 interfacearm - ?
320 dllexport - for exporting a function/variable that will live in a dll
321 dllimport - for importing a function/variable from a dll
323 Microsoft allows multiple declspecs in one __declspec, separating
324 them with spaces. We do NOT support this. Instead, use __declspec
325 multiple times.
327 { "dllimport", 0, 0, true, false, false, NULL, false },
328 { "dllexport", 0, 0, true, false, false, NULL, false },
329 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
330 false },
331 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
332 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
333 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
334 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
335 false },
336 #endif
337 { NULL, 0, 0, false, false, false, NULL, false }
340 /* Initialize the GCC target structure. */
341 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
342 #undef TARGET_MERGE_DECL_ATTRIBUTES
343 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
344 #endif
346 #undef TARGET_LEGITIMIZE_ADDRESS
347 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
349 #undef TARGET_LRA_P
350 #define TARGET_LRA_P arm_lra_p
352 #undef TARGET_ATTRIBUTE_TABLE
353 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
355 #undef TARGET_ASM_FILE_START
356 #define TARGET_ASM_FILE_START arm_file_start
357 #undef TARGET_ASM_FILE_END
358 #define TARGET_ASM_FILE_END arm_file_end
360 #undef TARGET_ASM_ALIGNED_SI_OP
361 #define TARGET_ASM_ALIGNED_SI_OP NULL
362 #undef TARGET_ASM_INTEGER
363 #define TARGET_ASM_INTEGER arm_assemble_integer
365 #undef TARGET_PRINT_OPERAND
366 #define TARGET_PRINT_OPERAND arm_print_operand
367 #undef TARGET_PRINT_OPERAND_ADDRESS
368 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
369 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
370 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
372 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
373 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
375 #undef TARGET_ASM_FUNCTION_PROLOGUE
376 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
378 #undef TARGET_ASM_FUNCTION_EPILOGUE
379 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
381 #undef TARGET_OPTION_OVERRIDE
382 #define TARGET_OPTION_OVERRIDE arm_option_override
384 #undef TARGET_COMP_TYPE_ATTRIBUTES
385 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
387 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
388 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
390 #undef TARGET_SCHED_ADJUST_COST
391 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER arm_sched_reorder
396 #undef TARGET_REGISTER_MOVE_COST
397 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
399 #undef TARGET_MEMORY_MOVE_COST
400 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
402 #undef TARGET_ENCODE_SECTION_INFO
403 #ifdef ARM_PE
404 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
405 #else
406 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
407 #endif
409 #undef TARGET_STRIP_NAME_ENCODING
410 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
412 #undef TARGET_ASM_INTERNAL_LABEL
413 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
415 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
416 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
418 #undef TARGET_FUNCTION_VALUE
419 #define TARGET_FUNCTION_VALUE arm_function_value
421 #undef TARGET_LIBCALL_VALUE
422 #define TARGET_LIBCALL_VALUE arm_libcall_value
424 #undef TARGET_FUNCTION_VALUE_REGNO_P
425 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
427 #undef TARGET_ASM_OUTPUT_MI_THUNK
428 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
429 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
430 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
432 #undef TARGET_RTX_COSTS
433 #define TARGET_RTX_COSTS arm_rtx_costs
434 #undef TARGET_ADDRESS_COST
435 #define TARGET_ADDRESS_COST arm_address_cost
437 #undef TARGET_SHIFT_TRUNCATION_MASK
438 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
439 #undef TARGET_VECTOR_MODE_SUPPORTED_P
440 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
441 #undef TARGET_ARRAY_MODE_SUPPORTED_P
442 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
443 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
444 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
445 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
446 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
447 arm_autovectorize_vector_sizes
449 #undef TARGET_MACHINE_DEPENDENT_REORG
450 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
452 #undef TARGET_INIT_BUILTINS
453 #define TARGET_INIT_BUILTINS arm_init_builtins
454 #undef TARGET_EXPAND_BUILTIN
455 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
456 #undef TARGET_BUILTIN_DECL
457 #define TARGET_BUILTIN_DECL arm_builtin_decl
459 #undef TARGET_INIT_LIBFUNCS
460 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
462 #undef TARGET_PROMOTE_FUNCTION_MODE
463 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
464 #undef TARGET_PROMOTE_PROTOTYPES
465 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
466 #undef TARGET_PASS_BY_REFERENCE
467 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
468 #undef TARGET_ARG_PARTIAL_BYTES
469 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
470 #undef TARGET_FUNCTION_ARG
471 #define TARGET_FUNCTION_ARG arm_function_arg
472 #undef TARGET_FUNCTION_ARG_ADVANCE
473 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
474 #undef TARGET_FUNCTION_ARG_BOUNDARY
475 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
477 #undef TARGET_SETUP_INCOMING_VARARGS
478 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
480 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
481 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
483 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
484 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
485 #undef TARGET_TRAMPOLINE_INIT
486 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
487 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
488 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
490 #undef TARGET_WARN_FUNC_RETURN
491 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
493 #undef TARGET_DEFAULT_SHORT_ENUMS
494 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
496 #undef TARGET_ALIGN_ANON_BITFIELD
497 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
499 #undef TARGET_NARROW_VOLATILE_BITFIELD
500 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
502 #undef TARGET_CXX_GUARD_TYPE
503 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
505 #undef TARGET_CXX_GUARD_MASK_BIT
506 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
508 #undef TARGET_CXX_GET_COOKIE_SIZE
509 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
511 #undef TARGET_CXX_COOKIE_HAS_SIZE
512 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
514 #undef TARGET_CXX_CDTOR_RETURNS_THIS
515 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
517 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
518 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
520 #undef TARGET_CXX_USE_AEABI_ATEXIT
521 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
523 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
524 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
525 arm_cxx_determine_class_data_visibility
527 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
528 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
530 #undef TARGET_RETURN_IN_MSB
531 #define TARGET_RETURN_IN_MSB arm_return_in_msb
533 #undef TARGET_RETURN_IN_MEMORY
534 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
536 #undef TARGET_MUST_PASS_IN_STACK
537 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
539 #if ARM_UNWIND_INFO
540 #undef TARGET_ASM_UNWIND_EMIT
541 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
543 /* EABI unwinding tables use a different format for the typeinfo tables. */
544 #undef TARGET_ASM_TTYPE
545 #define TARGET_ASM_TTYPE arm_output_ttype
547 #undef TARGET_ARM_EABI_UNWINDER
548 #define TARGET_ARM_EABI_UNWINDER true
550 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
551 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
553 #undef TARGET_ASM_INIT_SECTIONS
554 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
555 #endif /* ARM_UNWIND_INFO */
557 #undef TARGET_DWARF_REGISTER_SPAN
558 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
560 #undef TARGET_CANNOT_COPY_INSN_P
561 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
563 #ifdef HAVE_AS_TLS
564 #undef TARGET_HAVE_TLS
565 #define TARGET_HAVE_TLS true
566 #endif
568 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
569 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
571 #undef TARGET_LEGITIMATE_CONSTANT_P
572 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
574 #undef TARGET_CANNOT_FORCE_CONST_MEM
575 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
577 #undef TARGET_MAX_ANCHOR_OFFSET
578 #define TARGET_MAX_ANCHOR_OFFSET 4095
580 /* The minimum is set such that the total size of the block
581 for a particular anchor is -4088 + 1 + 4095 bytes, which is
582 divisible by eight, ensuring natural spacing of anchors. */
583 #undef TARGET_MIN_ANCHOR_OFFSET
584 #define TARGET_MIN_ANCHOR_OFFSET -4088
586 #undef TARGET_SCHED_ISSUE_RATE
587 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
589 #undef TARGET_MANGLE_TYPE
590 #define TARGET_MANGLE_TYPE arm_mangle_type
592 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
593 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
595 #undef TARGET_BUILD_BUILTIN_VA_LIST
596 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
597 #undef TARGET_EXPAND_BUILTIN_VA_START
598 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
599 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
600 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
602 #ifdef HAVE_AS_TLS
603 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
604 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
605 #endif
607 #undef TARGET_LEGITIMATE_ADDRESS_P
608 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
610 #undef TARGET_PREFERRED_RELOAD_CLASS
611 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
613 #undef TARGET_INVALID_PARAMETER_TYPE
614 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
616 #undef TARGET_INVALID_RETURN_TYPE
617 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
619 #undef TARGET_PROMOTED_TYPE
620 #define TARGET_PROMOTED_TYPE arm_promoted_type
622 #undef TARGET_CONVERT_TO_TYPE
623 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
625 #undef TARGET_SCALAR_MODE_SUPPORTED_P
626 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
628 #undef TARGET_FRAME_POINTER_REQUIRED
629 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
631 #undef TARGET_CAN_ELIMINATE
632 #define TARGET_CAN_ELIMINATE arm_can_eliminate
634 #undef TARGET_CONDITIONAL_REGISTER_USAGE
635 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
637 #undef TARGET_CLASS_LIKELY_SPILLED_P
638 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
640 #undef TARGET_VECTORIZE_BUILTINS
641 #define TARGET_VECTORIZE_BUILTINS
643 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
644 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
645 arm_builtin_vectorized_function
647 #undef TARGET_VECTOR_ALIGNMENT
648 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
650 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
651 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
652 arm_vector_alignment_reachable
654 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
655 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
656 arm_builtin_support_vector_misalignment
658 #undef TARGET_PREFERRED_RENAME_CLASS
659 #define TARGET_PREFERRED_RENAME_CLASS \
660 arm_preferred_rename_class
662 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
663 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
664 arm_vectorize_vec_perm_const_ok
666 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
667 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
668 arm_builtin_vectorization_cost
669 #undef TARGET_VECTORIZE_ADD_STMT_COST
670 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
672 #undef TARGET_CANONICALIZE_COMPARISON
673 #define TARGET_CANONICALIZE_COMPARISON \
674 arm_canonicalize_comparison
676 #undef TARGET_ASAN_SHADOW_OFFSET
677 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
679 #undef MAX_INSN_PER_IT_BLOCK
680 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
682 #undef TARGET_CAN_USE_DOLOOP_P
683 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
685 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
686 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
688 struct gcc_target targetm = TARGET_INITIALIZER;
690 /* Obstack for minipool constant handling. */
691 static struct obstack minipool_obstack;
692 static char * minipool_startobj;
694 /* The maximum number of insns skipped which
695 will be conditionalised if possible. */
696 static int max_insns_skipped = 5;
698 extern FILE * asm_out_file;
700 /* True if we are currently building a constant table. */
701 int making_const_table;
703 /* The processor for which instructions should be scheduled. */
704 enum processor_type arm_tune = arm_none;
706 /* The current tuning set. */
707 const struct tune_params *current_tune;
709 /* Which floating point hardware to schedule for. */
710 int arm_fpu_attr;
712 /* Which floating popint hardware to use. */
713 const struct arm_fpu_desc *arm_fpu_desc;
715 /* Used for Thumb call_via trampolines. */
716 rtx thumb_call_via_label[14];
717 static int thumb_call_reg_needed;
719 /* Bit values used to identify processor capabilities. */
720 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
721 #define FL_ARCH3M (1 << 1) /* Extended multiply */
722 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
723 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
724 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
725 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
726 #define FL_THUMB (1 << 6) /* Thumb aware */
727 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
728 #define FL_STRONG (1 << 8) /* StrongARM */
729 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
730 #define FL_XSCALE (1 << 10) /* XScale */
731 /* spare (1 << 11) */
732 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
733 media instructions. */
734 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
735 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
736 Note: ARM6 & 7 derivatives only. */
737 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
738 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
739 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
740 profile. */
741 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
742 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
743 #define FL_NEON (1 << 20) /* Neon instructions. */
744 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
745 architecture. */
746 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
747 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
748 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
749 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
751 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
752 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
754 /* Flags that only effect tuning, not available instructions. */
755 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
756 | FL_CO_PROC)
758 #define FL_FOR_ARCH2 FL_NOTM
759 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
760 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
761 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
762 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
763 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
764 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
765 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
766 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
767 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
768 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
769 #define FL_FOR_ARCH6J FL_FOR_ARCH6
770 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
771 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
772 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
773 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
774 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
775 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
776 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
777 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
778 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
779 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
780 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
781 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
783 /* The bits in this mask specify which
784 instructions we are allowed to generate. */
785 static unsigned long insn_flags = 0;
787 /* The bits in this mask specify which instruction scheduling options should
788 be used. */
789 static unsigned long tune_flags = 0;
791 /* The highest ARM architecture version supported by the
792 target. */
793 enum base_architecture arm_base_arch = BASE_ARCH_0;
795 /* The following are used in the arm.md file as equivalents to bits
796 in the above two flag variables. */
798 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
799 int arm_arch3m = 0;
801 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
802 int arm_arch4 = 0;
804 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
805 int arm_arch4t = 0;
807 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
808 int arm_arch5 = 0;
810 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
811 int arm_arch5e = 0;
813 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
814 int arm_arch6 = 0;
816 /* Nonzero if this chip supports the ARM 6K extensions. */
817 int arm_arch6k = 0;
819 /* Nonzero if instructions present in ARMv6-M can be used. */
820 int arm_arch6m = 0;
822 /* Nonzero if this chip supports the ARM 7 extensions. */
823 int arm_arch7 = 0;
825 /* Nonzero if instructions not present in the 'M' profile can be used. */
826 int arm_arch_notm = 0;
828 /* Nonzero if instructions present in ARMv7E-M can be used. */
829 int arm_arch7em = 0;
831 /* Nonzero if instructions present in ARMv8 can be used. */
832 int arm_arch8 = 0;
834 /* Nonzero if this chip can benefit from load scheduling. */
835 int arm_ld_sched = 0;
837 /* Nonzero if this chip is a StrongARM. */
838 int arm_tune_strongarm = 0;
840 /* Nonzero if this chip supports Intel Wireless MMX technology. */
841 int arm_arch_iwmmxt = 0;
843 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
844 int arm_arch_iwmmxt2 = 0;
846 /* Nonzero if this chip is an XScale. */
847 int arm_arch_xscale = 0;
849 /* Nonzero if tuning for XScale */
850 int arm_tune_xscale = 0;
852 /* Nonzero if we want to tune for stores that access the write-buffer.
853 This typically means an ARM6 or ARM7 with MMU or MPU. */
854 int arm_tune_wbuf = 0;
856 /* Nonzero if tuning for Cortex-A9. */
857 int arm_tune_cortex_a9 = 0;
859 /* Nonzero if generating Thumb instructions. */
860 int thumb_code = 0;
862 /* Nonzero if generating Thumb-1 instructions. */
863 int thumb1_code = 0;
865 /* Nonzero if we should define __THUMB_INTERWORK__ in the
866 preprocessor.
867 XXX This is a bit of a hack, it's intended to help work around
868 problems in GLD which doesn't understand that armv5t code is
869 interworking clean. */
870 int arm_cpp_interwork = 0;
872 /* Nonzero if chip supports Thumb 2. */
873 int arm_arch_thumb2;
875 /* Nonzero if chip supports integer division instruction. */
876 int arm_arch_arm_hwdiv;
877 int arm_arch_thumb_hwdiv;
879 /* Nonzero if we should use Neon to handle 64-bits operations rather
880 than core registers. */
881 int prefer_neon_for_64bits = 0;
883 /* Nonzero if we shouldn't use literal pools. */
884 bool arm_disable_literal_pool = false;
886 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
887 we must report the mode of the memory reference from
888 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
889 enum machine_mode output_memory_reference_mode;
891 /* The register number to be used for the PIC offset register. */
892 unsigned arm_pic_register = INVALID_REGNUM;
894 enum arm_pcs arm_pcs_default;
896 /* For an explanation of these variables, see final_prescan_insn below. */
897 int arm_ccfsm_state;
898 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
899 enum arm_cond_code arm_current_cc;
901 rtx arm_target_insn;
902 int arm_target_label;
903 /* The number of conditionally executed insns, including the current insn. */
904 int arm_condexec_count = 0;
905 /* A bitmask specifying the patterns for the IT block.
906 Zero means do not output an IT block before this insn. */
907 int arm_condexec_mask = 0;
908 /* The number of bits used in arm_condexec_mask. */
909 int arm_condexec_masklen = 0;
911 /* Nonzero if chip supports the ARMv8 CRC instructions. */
912 int arm_arch_crc = 0;
914 /* The condition codes of the ARM, and the inverse function. */
915 static const char * const arm_condition_codes[] =
917 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
918 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
921 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
922 int arm_regs_in_sequence[] =
924 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
927 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
928 #define streq(string1, string2) (strcmp (string1, string2) == 0)
930 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
931 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
932 | (1 << PIC_OFFSET_TABLE_REGNUM)))
934 /* Initialization code. */
936 struct processors
938 const char *const name;
939 enum processor_type core;
940 const char *arch;
941 enum base_architecture base_arch;
942 const unsigned long flags;
943 const struct tune_params *const tune;
947 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
948 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
949 prefetch_slots, \
950 l1_size, \
951 l1_line_size
953 /* arm generic vectorizer costs. */
954 static const
955 struct cpu_vec_costs arm_default_vec_cost = {
956 1, /* scalar_stmt_cost. */
957 1, /* scalar load_cost. */
958 1, /* scalar_store_cost. */
959 1, /* vec_stmt_cost. */
960 1, /* vec_to_scalar_cost. */
961 1, /* scalar_to_vec_cost. */
962 1, /* vec_align_load_cost. */
963 1, /* vec_unalign_load_cost. */
964 1, /* vec_unalign_store_cost. */
965 1, /* vec_store_cost. */
966 3, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
970 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
971 #include "aarch-cost-tables.h"
975 const struct cpu_cost_table cortexa9_extra_costs =
977 /* ALU */
979 0, /* arith. */
980 0, /* logical. */
981 0, /* shift. */
982 COSTS_N_INSNS (1), /* shift_reg. */
983 COSTS_N_INSNS (1), /* arith_shift. */
984 COSTS_N_INSNS (2), /* arith_shift_reg. */
985 0, /* log_shift. */
986 COSTS_N_INSNS (1), /* log_shift_reg. */
987 COSTS_N_INSNS (1), /* extend. */
988 COSTS_N_INSNS (2), /* extend_arith. */
989 COSTS_N_INSNS (1), /* bfi. */
990 COSTS_N_INSNS (1), /* bfx. */
991 0, /* clz. */
992 0, /* rev. */
993 0, /* non_exec. */
994 true /* non_exec_costs_exec. */
997 /* MULT SImode */
999 COSTS_N_INSNS (3), /* simple. */
1000 COSTS_N_INSNS (3), /* flag_setting. */
1001 COSTS_N_INSNS (2), /* extend. */
1002 COSTS_N_INSNS (3), /* add. */
1003 COSTS_N_INSNS (2), /* extend_add. */
1004 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1006 /* MULT DImode */
1008 0, /* simple (N/A). */
1009 0, /* flag_setting (N/A). */
1010 COSTS_N_INSNS (4), /* extend. */
1011 0, /* add (N/A). */
1012 COSTS_N_INSNS (4), /* extend_add. */
1013 0 /* idiv (N/A). */
1016 /* LD/ST */
1018 COSTS_N_INSNS (2), /* load. */
1019 COSTS_N_INSNS (2), /* load_sign_extend. */
1020 COSTS_N_INSNS (2), /* ldrd. */
1021 COSTS_N_INSNS (2), /* ldm_1st. */
1022 1, /* ldm_regs_per_insn_1st. */
1023 2, /* ldm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (5), /* loadf. */
1025 COSTS_N_INSNS (5), /* loadd. */
1026 COSTS_N_INSNS (1), /* load_unaligned. */
1027 COSTS_N_INSNS (2), /* store. */
1028 COSTS_N_INSNS (2), /* strd. */
1029 COSTS_N_INSNS (2), /* stm_1st. */
1030 1, /* stm_regs_per_insn_1st. */
1031 2, /* stm_regs_per_insn_subsequent. */
1032 COSTS_N_INSNS (1), /* storef. */
1033 COSTS_N_INSNS (1), /* stored. */
1034 COSTS_N_INSNS (1) /* store_unaligned. */
1037 /* FP SFmode */
1039 COSTS_N_INSNS (14), /* div. */
1040 COSTS_N_INSNS (4), /* mult. */
1041 COSTS_N_INSNS (7), /* mult_addsub. */
1042 COSTS_N_INSNS (30), /* fma. */
1043 COSTS_N_INSNS (3), /* addsub. */
1044 COSTS_N_INSNS (1), /* fpconst. */
1045 COSTS_N_INSNS (1), /* neg. */
1046 COSTS_N_INSNS (3), /* compare. */
1047 COSTS_N_INSNS (3), /* widen. */
1048 COSTS_N_INSNS (3), /* narrow. */
1049 COSTS_N_INSNS (3), /* toint. */
1050 COSTS_N_INSNS (3), /* fromint. */
1051 COSTS_N_INSNS (3) /* roundint. */
1053 /* FP DFmode */
1055 COSTS_N_INSNS (24), /* div. */
1056 COSTS_N_INSNS (5), /* mult. */
1057 COSTS_N_INSNS (8), /* mult_addsub. */
1058 COSTS_N_INSNS (30), /* fma. */
1059 COSTS_N_INSNS (3), /* addsub. */
1060 COSTS_N_INSNS (1), /* fpconst. */
1061 COSTS_N_INSNS (1), /* neg. */
1062 COSTS_N_INSNS (3), /* compare. */
1063 COSTS_N_INSNS (3), /* widen. */
1064 COSTS_N_INSNS (3), /* narrow. */
1065 COSTS_N_INSNS (3), /* toint. */
1066 COSTS_N_INSNS (3), /* fromint. */
1067 COSTS_N_INSNS (3) /* roundint. */
1070 /* Vector */
1072 COSTS_N_INSNS (1) /* alu. */
1076 const struct cpu_cost_table cortexa8_extra_costs =
1078 /* ALU */
1080 0, /* arith. */
1081 0, /* logical. */
1082 COSTS_N_INSNS (1), /* shift. */
1083 0, /* shift_reg. */
1084 COSTS_N_INSNS (1), /* arith_shift. */
1085 0, /* arith_shift_reg. */
1086 COSTS_N_INSNS (1), /* log_shift. */
1087 0, /* log_shift_reg. */
1088 0, /* extend. */
1089 0, /* extend_arith. */
1090 0, /* bfi. */
1091 0, /* bfx. */
1092 0, /* clz. */
1093 0, /* rev. */
1094 0, /* non_exec. */
1095 true /* non_exec_costs_exec. */
1098 /* MULT SImode */
1100 COSTS_N_INSNS (1), /* simple. */
1101 COSTS_N_INSNS (1), /* flag_setting. */
1102 COSTS_N_INSNS (1), /* extend. */
1103 COSTS_N_INSNS (1), /* add. */
1104 COSTS_N_INSNS (1), /* extend_add. */
1105 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1107 /* MULT DImode */
1109 0, /* simple (N/A). */
1110 0, /* flag_setting (N/A). */
1111 COSTS_N_INSNS (2), /* extend. */
1112 0, /* add (N/A). */
1113 COSTS_N_INSNS (2), /* extend_add. */
1114 0 /* idiv (N/A). */
1117 /* LD/ST */
1119 COSTS_N_INSNS (1), /* load. */
1120 COSTS_N_INSNS (1), /* load_sign_extend. */
1121 COSTS_N_INSNS (1), /* ldrd. */
1122 COSTS_N_INSNS (1), /* ldm_1st. */
1123 1, /* ldm_regs_per_insn_1st. */
1124 2, /* ldm_regs_per_insn_subsequent. */
1125 COSTS_N_INSNS (1), /* loadf. */
1126 COSTS_N_INSNS (1), /* loadd. */
1127 COSTS_N_INSNS (1), /* load_unaligned. */
1128 COSTS_N_INSNS (1), /* store. */
1129 COSTS_N_INSNS (1), /* strd. */
1130 COSTS_N_INSNS (1), /* stm_1st. */
1131 1, /* stm_regs_per_insn_1st. */
1132 2, /* stm_regs_per_insn_subsequent. */
1133 COSTS_N_INSNS (1), /* storef. */
1134 COSTS_N_INSNS (1), /* stored. */
1135 COSTS_N_INSNS (1) /* store_unaligned. */
1138 /* FP SFmode */
1140 COSTS_N_INSNS (36), /* div. */
1141 COSTS_N_INSNS (11), /* mult. */
1142 COSTS_N_INSNS (20), /* mult_addsub. */
1143 COSTS_N_INSNS (30), /* fma. */
1144 COSTS_N_INSNS (9), /* addsub. */
1145 COSTS_N_INSNS (3), /* fpconst. */
1146 COSTS_N_INSNS (3), /* neg. */
1147 COSTS_N_INSNS (6), /* compare. */
1148 COSTS_N_INSNS (4), /* widen. */
1149 COSTS_N_INSNS (4), /* narrow. */
1150 COSTS_N_INSNS (8), /* toint. */
1151 COSTS_N_INSNS (8), /* fromint. */
1152 COSTS_N_INSNS (8) /* roundint. */
1154 /* FP DFmode */
1156 COSTS_N_INSNS (64), /* div. */
1157 COSTS_N_INSNS (16), /* mult. */
1158 COSTS_N_INSNS (25), /* mult_addsub. */
1159 COSTS_N_INSNS (30), /* fma. */
1160 COSTS_N_INSNS (9), /* addsub. */
1161 COSTS_N_INSNS (3), /* fpconst. */
1162 COSTS_N_INSNS (3), /* neg. */
1163 COSTS_N_INSNS (6), /* compare. */
1164 COSTS_N_INSNS (6), /* widen. */
1165 COSTS_N_INSNS (6), /* narrow. */
1166 COSTS_N_INSNS (8), /* toint. */
1167 COSTS_N_INSNS (8), /* fromint. */
1168 COSTS_N_INSNS (8) /* roundint. */
1171 /* Vector */
1173 COSTS_N_INSNS (1) /* alu. */
1179 const struct cpu_cost_table cortexa7_extra_costs =
1181 /* ALU */
1183 0, /* arith. */
1184 0, /* logical. */
1185 COSTS_N_INSNS (1), /* shift. */
1186 COSTS_N_INSNS (1), /* shift_reg. */
1187 COSTS_N_INSNS (1), /* arith_shift. */
1188 COSTS_N_INSNS (1), /* arith_shift_reg. */
1189 COSTS_N_INSNS (1), /* log_shift. */
1190 COSTS_N_INSNS (1), /* log_shift_reg. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* extend_arith. */
1193 COSTS_N_INSNS (1), /* bfi. */
1194 COSTS_N_INSNS (1), /* bfx. */
1195 COSTS_N_INSNS (1), /* clz. */
1196 COSTS_N_INSNS (1), /* rev. */
1197 0, /* non_exec. */
1198 true /* non_exec_costs_exec. */
1202 /* MULT SImode */
1204 0, /* simple. */
1205 COSTS_N_INSNS (1), /* flag_setting. */
1206 COSTS_N_INSNS (1), /* extend. */
1207 COSTS_N_INSNS (1), /* add. */
1208 COSTS_N_INSNS (1), /* extend_add. */
1209 COSTS_N_INSNS (7) /* idiv. */
1211 /* MULT DImode */
1213 0, /* simple (N/A). */
1214 0, /* flag_setting (N/A). */
1215 COSTS_N_INSNS (1), /* extend. */
1216 0, /* add. */
1217 COSTS_N_INSNS (2), /* extend_add. */
1218 0 /* idiv (N/A). */
1221 /* LD/ST */
1223 COSTS_N_INSNS (1), /* load. */
1224 COSTS_N_INSNS (1), /* load_sign_extend. */
1225 COSTS_N_INSNS (3), /* ldrd. */
1226 COSTS_N_INSNS (1), /* ldm_1st. */
1227 1, /* ldm_regs_per_insn_1st. */
1228 2, /* ldm_regs_per_insn_subsequent. */
1229 COSTS_N_INSNS (2), /* loadf. */
1230 COSTS_N_INSNS (2), /* loadd. */
1231 COSTS_N_INSNS (1), /* load_unaligned. */
1232 COSTS_N_INSNS (1), /* store. */
1233 COSTS_N_INSNS (3), /* strd. */
1234 COSTS_N_INSNS (1), /* stm_1st. */
1235 1, /* stm_regs_per_insn_1st. */
1236 2, /* stm_regs_per_insn_subsequent. */
1237 COSTS_N_INSNS (2), /* storef. */
1238 COSTS_N_INSNS (2), /* stored. */
1239 COSTS_N_INSNS (1) /* store_unaligned. */
1242 /* FP SFmode */
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1258 /* FP DFmode */
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1275 /* Vector */
1277 COSTS_N_INSNS (1) /* alu. */
1281 const struct cpu_cost_table cortexa12_extra_costs =
1283 /* ALU */
1285 0, /* arith. */
1286 0, /* logical. */
1287 0, /* shift. */
1288 COSTS_N_INSNS (1), /* shift_reg. */
1289 COSTS_N_INSNS (1), /* arith_shift. */
1290 COSTS_N_INSNS (1), /* arith_shift_reg. */
1291 COSTS_N_INSNS (1), /* log_shift. */
1292 COSTS_N_INSNS (1), /* log_shift_reg. */
1293 0, /* extend. */
1294 COSTS_N_INSNS (1), /* extend_arith. */
1295 0, /* bfi. */
1296 COSTS_N_INSNS (1), /* bfx. */
1297 COSTS_N_INSNS (1), /* clz. */
1298 COSTS_N_INSNS (1), /* rev. */
1299 0, /* non_exec. */
1300 true /* non_exec_costs_exec. */
1302 /* MULT SImode */
1305 COSTS_N_INSNS (2), /* simple. */
1306 COSTS_N_INSNS (3), /* flag_setting. */
1307 COSTS_N_INSNS (2), /* extend. */
1308 COSTS_N_INSNS (3), /* add. */
1309 COSTS_N_INSNS (2), /* extend_add. */
1310 COSTS_N_INSNS (18) /* idiv. */
1312 /* MULT DImode */
1314 0, /* simple (N/A). */
1315 0, /* flag_setting (N/A). */
1316 COSTS_N_INSNS (3), /* extend. */
1317 0, /* add (N/A). */
1318 COSTS_N_INSNS (3), /* extend_add. */
1319 0 /* idiv (N/A). */
1322 /* LD/ST */
1324 COSTS_N_INSNS (3), /* load. */
1325 COSTS_N_INSNS (3), /* load_sign_extend. */
1326 COSTS_N_INSNS (3), /* ldrd. */
1327 COSTS_N_INSNS (3), /* ldm_1st. */
1328 1, /* ldm_regs_per_insn_1st. */
1329 2, /* ldm_regs_per_insn_subsequent. */
1330 COSTS_N_INSNS (3), /* loadf. */
1331 COSTS_N_INSNS (3), /* loadd. */
1332 0, /* load_unaligned. */
1333 0, /* store. */
1334 0, /* strd. */
1335 0, /* stm_1st. */
1336 1, /* stm_regs_per_insn_1st. */
1337 2, /* stm_regs_per_insn_subsequent. */
1338 COSTS_N_INSNS (2), /* storef. */
1339 COSTS_N_INSNS (2), /* stored. */
1340 0 /* store_unaligned. */
1343 /* FP SFmode */
1345 COSTS_N_INSNS (17), /* div. */
1346 COSTS_N_INSNS (4), /* mult. */
1347 COSTS_N_INSNS (8), /* mult_addsub. */
1348 COSTS_N_INSNS (8), /* fma. */
1349 COSTS_N_INSNS (4), /* addsub. */
1350 COSTS_N_INSNS (2), /* fpconst. */
1351 COSTS_N_INSNS (2), /* neg. */
1352 COSTS_N_INSNS (2), /* compare. */
1353 COSTS_N_INSNS (4), /* widen. */
1354 COSTS_N_INSNS (4), /* narrow. */
1355 COSTS_N_INSNS (4), /* toint. */
1356 COSTS_N_INSNS (4), /* fromint. */
1357 COSTS_N_INSNS (4) /* roundint. */
1359 /* FP DFmode */
1361 COSTS_N_INSNS (31), /* div. */
1362 COSTS_N_INSNS (4), /* mult. */
1363 COSTS_N_INSNS (8), /* mult_addsub. */
1364 COSTS_N_INSNS (8), /* fma. */
1365 COSTS_N_INSNS (4), /* addsub. */
1366 COSTS_N_INSNS (2), /* fpconst. */
1367 COSTS_N_INSNS (2), /* neg. */
1368 COSTS_N_INSNS (2), /* compare. */
1369 COSTS_N_INSNS (4), /* widen. */
1370 COSTS_N_INSNS (4), /* narrow. */
1371 COSTS_N_INSNS (4), /* toint. */
1372 COSTS_N_INSNS (4), /* fromint. */
1373 COSTS_N_INSNS (4) /* roundint. */
1376 /* Vector */
1378 COSTS_N_INSNS (1) /* alu. */
1382 const struct cpu_cost_table cortexa15_extra_costs =
1384 /* ALU */
1386 0, /* arith. */
1387 0, /* logical. */
1388 0, /* shift. */
1389 0, /* shift_reg. */
1390 COSTS_N_INSNS (1), /* arith_shift. */
1391 COSTS_N_INSNS (1), /* arith_shift_reg. */
1392 COSTS_N_INSNS (1), /* log_shift. */
1393 COSTS_N_INSNS (1), /* log_shift_reg. */
1394 0, /* extend. */
1395 COSTS_N_INSNS (1), /* extend_arith. */
1396 COSTS_N_INSNS (1), /* bfi. */
1397 0, /* bfx. */
1398 0, /* clz. */
1399 0, /* rev. */
1400 0, /* non_exec. */
1401 true /* non_exec_costs_exec. */
1403 /* MULT SImode */
1406 COSTS_N_INSNS (2), /* simple. */
1407 COSTS_N_INSNS (3), /* flag_setting. */
1408 COSTS_N_INSNS (2), /* extend. */
1409 COSTS_N_INSNS (2), /* add. */
1410 COSTS_N_INSNS (2), /* extend_add. */
1411 COSTS_N_INSNS (18) /* idiv. */
1413 /* MULT DImode */
1415 0, /* simple (N/A). */
1416 0, /* flag_setting (N/A). */
1417 COSTS_N_INSNS (3), /* extend. */
1418 0, /* add (N/A). */
1419 COSTS_N_INSNS (3), /* extend_add. */
1420 0 /* idiv (N/A). */
1423 /* LD/ST */
1425 COSTS_N_INSNS (3), /* load. */
1426 COSTS_N_INSNS (3), /* load_sign_extend. */
1427 COSTS_N_INSNS (3), /* ldrd. */
1428 COSTS_N_INSNS (4), /* ldm_1st. */
1429 1, /* ldm_regs_per_insn_1st. */
1430 2, /* ldm_regs_per_insn_subsequent. */
1431 COSTS_N_INSNS (4), /* loadf. */
1432 COSTS_N_INSNS (4), /* loadd. */
1433 0, /* load_unaligned. */
1434 0, /* store. */
1435 0, /* strd. */
1436 COSTS_N_INSNS (1), /* stm_1st. */
1437 1, /* stm_regs_per_insn_1st. */
1438 2, /* stm_regs_per_insn_subsequent. */
1439 0, /* storef. */
1440 0, /* stored. */
1441 0 /* store_unaligned. */
1444 /* FP SFmode */
1446 COSTS_N_INSNS (17), /* div. */
1447 COSTS_N_INSNS (4), /* mult. */
1448 COSTS_N_INSNS (8), /* mult_addsub. */
1449 COSTS_N_INSNS (8), /* fma. */
1450 COSTS_N_INSNS (4), /* addsub. */
1451 COSTS_N_INSNS (2), /* fpconst. */
1452 COSTS_N_INSNS (2), /* neg. */
1453 COSTS_N_INSNS (5), /* compare. */
1454 COSTS_N_INSNS (4), /* widen. */
1455 COSTS_N_INSNS (4), /* narrow. */
1456 COSTS_N_INSNS (4), /* toint. */
1457 COSTS_N_INSNS (4), /* fromint. */
1458 COSTS_N_INSNS (4) /* roundint. */
1460 /* FP DFmode */
1462 COSTS_N_INSNS (31), /* div. */
1463 COSTS_N_INSNS (4), /* mult. */
1464 COSTS_N_INSNS (8), /* mult_addsub. */
1465 COSTS_N_INSNS (8), /* fma. */
1466 COSTS_N_INSNS (4), /* addsub. */
1467 COSTS_N_INSNS (2), /* fpconst. */
1468 COSTS_N_INSNS (2), /* neg. */
1469 COSTS_N_INSNS (2), /* compare. */
1470 COSTS_N_INSNS (4), /* widen. */
1471 COSTS_N_INSNS (4), /* narrow. */
1472 COSTS_N_INSNS (4), /* toint. */
1473 COSTS_N_INSNS (4), /* fromint. */
1474 COSTS_N_INSNS (4) /* roundint. */
1477 /* Vector */
1479 COSTS_N_INSNS (1) /* alu. */
1483 const struct cpu_cost_table v7m_extra_costs =
1485 /* ALU */
1487 0, /* arith. */
1488 0, /* logical. */
1489 0, /* shift. */
1490 0, /* shift_reg. */
1491 0, /* arith_shift. */
1492 COSTS_N_INSNS (1), /* arith_shift_reg. */
1493 0, /* log_shift. */
1494 COSTS_N_INSNS (1), /* log_shift_reg. */
1495 0, /* extend. */
1496 COSTS_N_INSNS (1), /* extend_arith. */
1497 0, /* bfi. */
1498 0, /* bfx. */
1499 0, /* clz. */
1500 0, /* rev. */
1501 COSTS_N_INSNS (1), /* non_exec. */
1502 false /* non_exec_costs_exec. */
1505 /* MULT SImode */
1507 COSTS_N_INSNS (1), /* simple. */
1508 COSTS_N_INSNS (1), /* flag_setting. */
1509 COSTS_N_INSNS (2), /* extend. */
1510 COSTS_N_INSNS (1), /* add. */
1511 COSTS_N_INSNS (3), /* extend_add. */
1512 COSTS_N_INSNS (8) /* idiv. */
1514 /* MULT DImode */
1516 0, /* simple (N/A). */
1517 0, /* flag_setting (N/A). */
1518 COSTS_N_INSNS (2), /* extend. */
1519 0, /* add (N/A). */
1520 COSTS_N_INSNS (3), /* extend_add. */
1521 0 /* idiv (N/A). */
1524 /* LD/ST */
1526 COSTS_N_INSNS (2), /* load. */
1527 0, /* load_sign_extend. */
1528 COSTS_N_INSNS (3), /* ldrd. */
1529 COSTS_N_INSNS (2), /* ldm_1st. */
1530 1, /* ldm_regs_per_insn_1st. */
1531 1, /* ldm_regs_per_insn_subsequent. */
1532 COSTS_N_INSNS (2), /* loadf. */
1533 COSTS_N_INSNS (3), /* loadd. */
1534 COSTS_N_INSNS (1), /* load_unaligned. */
1535 COSTS_N_INSNS (2), /* store. */
1536 COSTS_N_INSNS (3), /* strd. */
1537 COSTS_N_INSNS (2), /* stm_1st. */
1538 1, /* stm_regs_per_insn_1st. */
1539 1, /* stm_regs_per_insn_subsequent. */
1540 COSTS_N_INSNS (2), /* storef. */
1541 COSTS_N_INSNS (3), /* stored. */
1542 COSTS_N_INSNS (1) /* store_unaligned. */
1545 /* FP SFmode */
1547 COSTS_N_INSNS (7), /* div. */
1548 COSTS_N_INSNS (2), /* mult. */
1549 COSTS_N_INSNS (5), /* mult_addsub. */
1550 COSTS_N_INSNS (3), /* fma. */
1551 COSTS_N_INSNS (1), /* addsub. */
1552 0, /* fpconst. */
1553 0, /* neg. */
1554 0, /* compare. */
1555 0, /* widen. */
1556 0, /* narrow. */
1557 0, /* toint. */
1558 0, /* fromint. */
1559 0 /* roundint. */
1561 /* FP DFmode */
1563 COSTS_N_INSNS (15), /* div. */
1564 COSTS_N_INSNS (5), /* mult. */
1565 COSTS_N_INSNS (7), /* mult_addsub. */
1566 COSTS_N_INSNS (7), /* fma. */
1567 COSTS_N_INSNS (3), /* addsub. */
1568 0, /* fpconst. */
1569 0, /* neg. */
1570 0, /* compare. */
1571 0, /* widen. */
1572 0, /* narrow. */
1573 0, /* toint. */
1574 0, /* fromint. */
1575 0 /* roundint. */
1578 /* Vector */
1580 COSTS_N_INSNS (1) /* alu. */
1584 const struct tune_params arm_slowmul_tune =
1586 arm_slowmul_rtx_costs,
1587 NULL,
1588 NULL, /* Sched adj cost. */
1589 3, /* Constant limit. */
1590 5, /* Max cond insns. */
1591 ARM_PREFETCH_NOT_BENEFICIAL,
1592 true, /* Prefer constant pool. */
1593 arm_default_branch_cost,
1594 false, /* Prefer LDRD/STRD. */
1595 {true, true}, /* Prefer non short circuit. */
1596 &arm_default_vec_cost, /* Vectorizer costs. */
1597 false, /* Prefer Neon for 64-bits bitops. */
1598 false, false /* Prefer 32-bit encodings. */
1601 const struct tune_params arm_fastmul_tune =
1603 arm_fastmul_rtx_costs,
1604 NULL,
1605 NULL, /* Sched adj cost. */
1606 1, /* Constant limit. */
1607 5, /* Max cond insns. */
1608 ARM_PREFETCH_NOT_BENEFICIAL,
1609 true, /* Prefer constant pool. */
1610 arm_default_branch_cost,
1611 false, /* Prefer LDRD/STRD. */
1612 {true, true}, /* Prefer non short circuit. */
1613 &arm_default_vec_cost, /* Vectorizer costs. */
1614 false, /* Prefer Neon for 64-bits bitops. */
1615 false, false /* Prefer 32-bit encodings. */
1618 /* StrongARM has early execution of branches, so a sequence that is worth
1619 skipping is shorter. Set max_insns_skipped to a lower value. */
1621 const struct tune_params arm_strongarm_tune =
1623 arm_fastmul_rtx_costs,
1624 NULL,
1625 NULL, /* Sched adj cost. */
1626 1, /* Constant limit. */
1627 3, /* Max cond insns. */
1628 ARM_PREFETCH_NOT_BENEFICIAL,
1629 true, /* Prefer constant pool. */
1630 arm_default_branch_cost,
1631 false, /* Prefer LDRD/STRD. */
1632 {true, true}, /* Prefer non short circuit. */
1633 &arm_default_vec_cost, /* Vectorizer costs. */
1634 false, /* Prefer Neon for 64-bits bitops. */
1635 false, false /* Prefer 32-bit encodings. */
1638 const struct tune_params arm_xscale_tune =
1640 arm_xscale_rtx_costs,
1641 NULL,
1642 xscale_sched_adjust_cost,
1643 2, /* Constant limit. */
1644 3, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL,
1646 true, /* Prefer constant pool. */
1647 arm_default_branch_cost,
1648 false, /* Prefer LDRD/STRD. */
1649 {true, true}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost, /* Vectorizer costs. */
1651 false, /* Prefer Neon for 64-bits bitops. */
1652 false, false /* Prefer 32-bit encodings. */
1655 const struct tune_params arm_9e_tune =
1657 arm_9e_rtx_costs,
1658 NULL,
1659 NULL, /* Sched adj cost. */
1660 1, /* Constant limit. */
1661 5, /* Max cond insns. */
1662 ARM_PREFETCH_NOT_BENEFICIAL,
1663 true, /* Prefer constant pool. */
1664 arm_default_branch_cost,
1665 false, /* Prefer LDRD/STRD. */
1666 {true, true}, /* Prefer non short circuit. */
1667 &arm_default_vec_cost, /* Vectorizer costs. */
1668 false, /* Prefer Neon for 64-bits bitops. */
1669 false, false /* Prefer 32-bit encodings. */
1672 const struct tune_params arm_v6t2_tune =
1674 arm_9e_rtx_costs,
1675 NULL,
1676 NULL, /* Sched adj cost. */
1677 1, /* Constant limit. */
1678 5, /* Max cond insns. */
1679 ARM_PREFETCH_NOT_BENEFICIAL,
1680 false, /* Prefer constant pool. */
1681 arm_default_branch_cost,
1682 false, /* Prefer LDRD/STRD. */
1683 {true, true}, /* Prefer non short circuit. */
1684 &arm_default_vec_cost, /* Vectorizer costs. */
1685 false, /* Prefer Neon for 64-bits bitops. */
1686 false, false /* Prefer 32-bit encodings. */
1689 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1690 const struct tune_params arm_cortex_tune =
1692 arm_9e_rtx_costs,
1693 &generic_extra_costs,
1694 NULL, /* Sched adj cost. */
1695 1, /* Constant limit. */
1696 5, /* Max cond insns. */
1697 ARM_PREFETCH_NOT_BENEFICIAL,
1698 false, /* Prefer constant pool. */
1699 arm_default_branch_cost,
1700 false, /* Prefer LDRD/STRD. */
1701 {true, true}, /* Prefer non short circuit. */
1702 &arm_default_vec_cost, /* Vectorizer costs. */
1703 false, /* Prefer Neon for 64-bits bitops. */
1704 false, false /* Prefer 32-bit encodings. */
1707 const struct tune_params arm_cortex_a8_tune =
1709 arm_9e_rtx_costs,
1710 &cortexa8_extra_costs,
1711 NULL, /* Sched adj cost. */
1712 1, /* Constant limit. */
1713 5, /* Max cond insns. */
1714 ARM_PREFETCH_NOT_BENEFICIAL,
1715 false, /* Prefer constant pool. */
1716 arm_default_branch_cost,
1717 false, /* Prefer LDRD/STRD. */
1718 {true, true}, /* Prefer non short circuit. */
1719 &arm_default_vec_cost, /* Vectorizer costs. */
1720 false, /* Prefer Neon for 64-bits bitops. */
1721 false, false /* Prefer 32-bit encodings. */
1724 const struct tune_params arm_cortex_a7_tune =
1726 arm_9e_rtx_costs,
1727 &cortexa7_extra_costs,
1728 NULL,
1729 1, /* Constant limit. */
1730 5, /* Max cond insns. */
1731 ARM_PREFETCH_NOT_BENEFICIAL,
1732 false, /* Prefer constant pool. */
1733 arm_default_branch_cost,
1734 false, /* Prefer LDRD/STRD. */
1735 {true, true}, /* Prefer non short circuit. */
1736 &arm_default_vec_cost, /* Vectorizer costs. */
1737 false, /* Prefer Neon for 64-bits bitops. */
1738 false, false /* Prefer 32-bit encodings. */
1741 const struct tune_params arm_cortex_a15_tune =
1743 arm_9e_rtx_costs,
1744 &cortexa15_extra_costs,
1745 NULL, /* Sched adj cost. */
1746 1, /* Constant limit. */
1747 2, /* Max cond insns. */
1748 ARM_PREFETCH_NOT_BENEFICIAL,
1749 false, /* Prefer constant pool. */
1750 arm_default_branch_cost,
1751 true, /* Prefer LDRD/STRD. */
1752 {true, true}, /* Prefer non short circuit. */
1753 &arm_default_vec_cost, /* Vectorizer costs. */
1754 false, /* Prefer Neon for 64-bits bitops. */
1755 true, true /* Prefer 32-bit encodings. */
1758 const struct tune_params arm_cortex_a53_tune =
1760 arm_9e_rtx_costs,
1761 &cortexa53_extra_costs,
1762 NULL, /* Scheduler cost adjustment. */
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 ARM_PREFETCH_NOT_BENEFICIAL,
1766 false, /* Prefer constant pool. */
1767 arm_default_branch_cost,
1768 false, /* Prefer LDRD/STRD. */
1769 {true, true}, /* Prefer non short circuit. */
1770 &arm_default_vec_cost, /* Vectorizer costs. */
1771 false, /* Prefer Neon for 64-bits bitops. */
1772 false, false /* Prefer 32-bit encodings. */
1775 const struct tune_params arm_cortex_a57_tune =
1777 arm_9e_rtx_costs,
1778 &cortexa57_extra_costs,
1779 NULL, /* Scheduler cost adjustment. */
1780 1, /* Constant limit. */
1781 2, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL,
1783 false, /* Prefer constant pool. */
1784 arm_default_branch_cost,
1785 true, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 true, true /* Prefer 32-bit encodings. */
1792 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1793 less appealing. Set max_insns_skipped to a low value. */
1795 const struct tune_params arm_cortex_a5_tune =
1797 arm_9e_rtx_costs,
1798 NULL,
1799 NULL, /* Sched adj cost. */
1800 1, /* Constant limit. */
1801 1, /* Max cond insns. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 false, /* Prefer constant pool. */
1804 arm_cortex_a5_branch_cost,
1805 false, /* Prefer LDRD/STRD. */
1806 {false, false}, /* Prefer non short circuit. */
1807 &arm_default_vec_cost, /* Vectorizer costs. */
1808 false, /* Prefer Neon for 64-bits bitops. */
1809 false, false /* Prefer 32-bit encodings. */
1812 const struct tune_params arm_cortex_a9_tune =
1814 arm_9e_rtx_costs,
1815 &cortexa9_extra_costs,
1816 cortex_a9_sched_adjust_cost,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 ARM_PREFETCH_BENEFICIAL(4,32,32),
1820 false, /* Prefer constant pool. */
1821 arm_default_branch_cost,
1822 false, /* Prefer LDRD/STRD. */
1823 {true, true}, /* Prefer non short circuit. */
1824 &arm_default_vec_cost, /* Vectorizer costs. */
1825 false, /* Prefer Neon for 64-bits bitops. */
1826 false, false /* Prefer 32-bit encodings. */
1829 const struct tune_params arm_cortex_a12_tune =
1831 arm_9e_rtx_costs,
1832 &cortexa12_extra_costs,
1833 NULL,
1834 1, /* Constant limit. */
1835 5, /* Max cond insns. */
1836 ARM_PREFETCH_BENEFICIAL(4,32,32),
1837 false, /* Prefer constant pool. */
1838 arm_default_branch_cost,
1839 true, /* Prefer LDRD/STRD. */
1840 {true, true}, /* Prefer non short circuit. */
1841 &arm_default_vec_cost, /* Vectorizer costs. */
1842 false, /* Prefer Neon for 64-bits bitops. */
1843 false, false /* Prefer 32-bit encodings. */
1846 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1847 cycle to execute each. An LDR from the constant pool also takes two cycles
1848 to execute, but mildly increases pipelining opportunity (consecutive
1849 loads/stores can be pipelined together, saving one cycle), and may also
1850 improve icache utilisation. Hence we prefer the constant pool for such
1851 processors. */
1853 const struct tune_params arm_v7m_tune =
1855 arm_9e_rtx_costs,
1856 &v7m_extra_costs,
1857 NULL, /* Sched adj cost. */
1858 1, /* Constant limit. */
1859 2, /* Max cond insns. */
1860 ARM_PREFETCH_NOT_BENEFICIAL,
1861 true, /* Prefer constant pool. */
1862 arm_cortex_m_branch_cost,
1863 false, /* Prefer LDRD/STRD. */
1864 {false, false}, /* Prefer non short circuit. */
1865 &arm_default_vec_cost, /* Vectorizer costs. */
1866 false, /* Prefer Neon for 64-bits bitops. */
1867 false, false /* Prefer 32-bit encodings. */
1870 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1871 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1872 const struct tune_params arm_v6m_tune =
1874 arm_9e_rtx_costs,
1875 NULL,
1876 NULL, /* Sched adj cost. */
1877 1, /* Constant limit. */
1878 5, /* Max cond insns. */
1879 ARM_PREFETCH_NOT_BENEFICIAL,
1880 false, /* Prefer constant pool. */
1881 arm_default_branch_cost,
1882 false, /* Prefer LDRD/STRD. */
1883 {false, false}, /* Prefer non short circuit. */
1884 &arm_default_vec_cost, /* Vectorizer costs. */
1885 false, /* Prefer Neon for 64-bits bitops. */
1886 false, false /* Prefer 32-bit encodings. */
1889 const struct tune_params arm_fa726te_tune =
1891 arm_9e_rtx_costs,
1892 NULL,
1893 fa726te_sched_adjust_cost,
1894 1, /* Constant limit. */
1895 5, /* Max cond insns. */
1896 ARM_PREFETCH_NOT_BENEFICIAL,
1897 true, /* Prefer constant pool. */
1898 arm_default_branch_cost,
1899 false, /* Prefer LDRD/STRD. */
1900 {true, true}, /* Prefer non short circuit. */
1901 &arm_default_vec_cost, /* Vectorizer costs. */
1902 false, /* Prefer Neon for 64-bits bitops. */
1903 false, false /* Prefer 32-bit encodings. */
1907 /* Not all of these give usefully different compilation alternatives,
1908 but there is no simple way of generalizing them. */
1909 static const struct processors all_cores[] =
1911 /* ARM Cores */
1912 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1913 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1914 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1915 #include "arm-cores.def"
1916 #undef ARM_CORE
1917 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1920 static const struct processors all_architectures[] =
1922 /* ARM Architectures */
1923 /* We don't specify tuning costs here as it will be figured out
1924 from the core. */
1926 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1927 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1928 #include "arm-arches.def"
1929 #undef ARM_ARCH
1930 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1934 /* These are populated as commandline arguments are processed, or NULL
1935 if not specified. */
1936 static const struct processors *arm_selected_arch;
1937 static const struct processors *arm_selected_cpu;
1938 static const struct processors *arm_selected_tune;
1940 /* The name of the preprocessor macro to define for this architecture. */
1942 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1944 /* Available values for -mfpu=. */
1946 static const struct arm_fpu_desc all_fpus[] =
1948 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1949 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1950 #include "arm-fpus.def"
1951 #undef ARM_FPU
1955 /* Supported TLS relocations. */
1957 enum tls_reloc {
1958 TLS_GD32,
1959 TLS_LDM32,
1960 TLS_LDO32,
1961 TLS_IE32,
1962 TLS_LE32,
1963 TLS_DESCSEQ /* GNU scheme */
1966 /* The maximum number of insns to be used when loading a constant. */
1967 inline static int
1968 arm_constant_limit (bool size_p)
1970 return size_p ? 1 : current_tune->constant_limit;
1973 /* Emit an insn that's a simple single-set. Both the operands must be known
1974 to be valid. */
1975 inline static rtx
1976 emit_set_insn (rtx x, rtx y)
1978 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1981 /* Return the number of bits set in VALUE. */
1982 static unsigned
1983 bit_count (unsigned long value)
1985 unsigned long count = 0;
1987 while (value)
1989 count++;
1990 value &= value - 1; /* Clear the least-significant set bit. */
1993 return count;
1996 typedef struct
1998 enum machine_mode mode;
1999 const char *name;
2000 } arm_fixed_mode_set;
2002 /* A small helper for setting fixed-point library libfuncs. */
2004 static void
2005 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
2006 const char *funcname, const char *modename,
2007 int num_suffix)
2009 char buffer[50];
2011 if (num_suffix == 0)
2012 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2013 else
2014 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2016 set_optab_libfunc (optable, mode, buffer);
2019 static void
2020 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2021 enum machine_mode from, const char *funcname,
2022 const char *toname, const char *fromname)
2024 char buffer[50];
2025 const char *maybe_suffix_2 = "";
2027 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2028 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2029 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2030 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2031 maybe_suffix_2 = "2";
2033 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2034 maybe_suffix_2);
2036 set_conv_libfunc (optable, to, from, buffer);
2039 /* Set up library functions unique to ARM. */
2041 static void
2042 arm_init_libfuncs (void)
2044 /* For Linux, we have access to kernel support for atomic operations. */
2045 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2046 init_sync_libfuncs (2 * UNITS_PER_WORD);
2048 /* There are no special library functions unless we are using the
2049 ARM BPABI. */
2050 if (!TARGET_BPABI)
2051 return;
2053 /* The functions below are described in Section 4 of the "Run-Time
2054 ABI for the ARM architecture", Version 1.0. */
2056 /* Double-precision floating-point arithmetic. Table 2. */
2057 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2058 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2059 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2060 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2061 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2063 /* Double-precision comparisons. Table 3. */
2064 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2065 set_optab_libfunc (ne_optab, DFmode, NULL);
2066 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2067 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2068 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2069 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2070 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2072 /* Single-precision floating-point arithmetic. Table 4. */
2073 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2074 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2075 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2076 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2077 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2079 /* Single-precision comparisons. Table 5. */
2080 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2081 set_optab_libfunc (ne_optab, SFmode, NULL);
2082 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2083 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2084 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2085 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2086 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2088 /* Floating-point to integer conversions. Table 6. */
2089 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2090 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2091 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2092 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2093 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2094 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2095 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2096 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2098 /* Conversions between floating types. Table 7. */
2099 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2100 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2102 /* Integer to floating-point conversions. Table 8. */
2103 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2104 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2105 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2106 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2107 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2108 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2109 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2110 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2112 /* Long long. Table 9. */
2113 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2114 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2115 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2116 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2117 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2118 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2119 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2120 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2122 /* Integer (32/32->32) division. \S 4.3.1. */
2123 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2124 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2126 /* The divmod functions are designed so that they can be used for
2127 plain division, even though they return both the quotient and the
2128 remainder. The quotient is returned in the usual location (i.e.,
2129 r0 for SImode, {r0, r1} for DImode), just as would be expected
2130 for an ordinary division routine. Because the AAPCS calling
2131 conventions specify that all of { r0, r1, r2, r3 } are
2132 callee-saved registers, there is no need to tell the compiler
2133 explicitly that those registers are clobbered by these
2134 routines. */
2135 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2136 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2138 /* For SImode division the ABI provides div-without-mod routines,
2139 which are faster. */
2140 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2141 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2143 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2144 divmod libcalls instead. */
2145 set_optab_libfunc (smod_optab, DImode, NULL);
2146 set_optab_libfunc (umod_optab, DImode, NULL);
2147 set_optab_libfunc (smod_optab, SImode, NULL);
2148 set_optab_libfunc (umod_optab, SImode, NULL);
2150 /* Half-precision float operations. The compiler handles all operations
2151 with NULL libfuncs by converting the SFmode. */
2152 switch (arm_fp16_format)
2154 case ARM_FP16_FORMAT_IEEE:
2155 case ARM_FP16_FORMAT_ALTERNATIVE:
2157 /* Conversions. */
2158 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2159 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2160 ? "__gnu_f2h_ieee"
2161 : "__gnu_f2h_alternative"));
2162 set_conv_libfunc (sext_optab, SFmode, HFmode,
2163 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2164 ? "__gnu_h2f_ieee"
2165 : "__gnu_h2f_alternative"));
2167 /* Arithmetic. */
2168 set_optab_libfunc (add_optab, HFmode, NULL);
2169 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2170 set_optab_libfunc (smul_optab, HFmode, NULL);
2171 set_optab_libfunc (neg_optab, HFmode, NULL);
2172 set_optab_libfunc (sub_optab, HFmode, NULL);
2174 /* Comparisons. */
2175 set_optab_libfunc (eq_optab, HFmode, NULL);
2176 set_optab_libfunc (ne_optab, HFmode, NULL);
2177 set_optab_libfunc (lt_optab, HFmode, NULL);
2178 set_optab_libfunc (le_optab, HFmode, NULL);
2179 set_optab_libfunc (ge_optab, HFmode, NULL);
2180 set_optab_libfunc (gt_optab, HFmode, NULL);
2181 set_optab_libfunc (unord_optab, HFmode, NULL);
2182 break;
2184 default:
2185 break;
2188 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2190 const arm_fixed_mode_set fixed_arith_modes[] =
2192 { QQmode, "qq" },
2193 { UQQmode, "uqq" },
2194 { HQmode, "hq" },
2195 { UHQmode, "uhq" },
2196 { SQmode, "sq" },
2197 { USQmode, "usq" },
2198 { DQmode, "dq" },
2199 { UDQmode, "udq" },
2200 { TQmode, "tq" },
2201 { UTQmode, "utq" },
2202 { HAmode, "ha" },
2203 { UHAmode, "uha" },
2204 { SAmode, "sa" },
2205 { USAmode, "usa" },
2206 { DAmode, "da" },
2207 { UDAmode, "uda" },
2208 { TAmode, "ta" },
2209 { UTAmode, "uta" }
2211 const arm_fixed_mode_set fixed_conv_modes[] =
2213 { QQmode, "qq" },
2214 { UQQmode, "uqq" },
2215 { HQmode, "hq" },
2216 { UHQmode, "uhq" },
2217 { SQmode, "sq" },
2218 { USQmode, "usq" },
2219 { DQmode, "dq" },
2220 { UDQmode, "udq" },
2221 { TQmode, "tq" },
2222 { UTQmode, "utq" },
2223 { HAmode, "ha" },
2224 { UHAmode, "uha" },
2225 { SAmode, "sa" },
2226 { USAmode, "usa" },
2227 { DAmode, "da" },
2228 { UDAmode, "uda" },
2229 { TAmode, "ta" },
2230 { UTAmode, "uta" },
2231 { QImode, "qi" },
2232 { HImode, "hi" },
2233 { SImode, "si" },
2234 { DImode, "di" },
2235 { TImode, "ti" },
2236 { SFmode, "sf" },
2237 { DFmode, "df" }
2239 unsigned int i, j;
2241 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2243 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2244 "add", fixed_arith_modes[i].name, 3);
2245 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2246 "ssadd", fixed_arith_modes[i].name, 3);
2247 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2248 "usadd", fixed_arith_modes[i].name, 3);
2249 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2250 "sub", fixed_arith_modes[i].name, 3);
2251 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2252 "sssub", fixed_arith_modes[i].name, 3);
2253 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2254 "ussub", fixed_arith_modes[i].name, 3);
2255 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2256 "mul", fixed_arith_modes[i].name, 3);
2257 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2258 "ssmul", fixed_arith_modes[i].name, 3);
2259 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2260 "usmul", fixed_arith_modes[i].name, 3);
2261 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2262 "div", fixed_arith_modes[i].name, 3);
2263 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2264 "udiv", fixed_arith_modes[i].name, 3);
2265 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2266 "ssdiv", fixed_arith_modes[i].name, 3);
2267 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2268 "usdiv", fixed_arith_modes[i].name, 3);
2269 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2270 "neg", fixed_arith_modes[i].name, 2);
2271 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2272 "ssneg", fixed_arith_modes[i].name, 2);
2273 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2274 "usneg", fixed_arith_modes[i].name, 2);
2275 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2276 "ashl", fixed_arith_modes[i].name, 3);
2277 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2278 "ashr", fixed_arith_modes[i].name, 3);
2279 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2280 "lshr", fixed_arith_modes[i].name, 3);
2281 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2282 "ssashl", fixed_arith_modes[i].name, 3);
2283 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2284 "usashl", fixed_arith_modes[i].name, 3);
2285 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2286 "cmp", fixed_arith_modes[i].name, 2);
2289 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2290 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2292 if (i == j
2293 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2294 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2295 continue;
2297 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2298 fixed_conv_modes[j].mode, "fract",
2299 fixed_conv_modes[i].name,
2300 fixed_conv_modes[j].name);
2301 arm_set_fixed_conv_libfunc (satfract_optab,
2302 fixed_conv_modes[i].mode,
2303 fixed_conv_modes[j].mode, "satfract",
2304 fixed_conv_modes[i].name,
2305 fixed_conv_modes[j].name);
2306 arm_set_fixed_conv_libfunc (fractuns_optab,
2307 fixed_conv_modes[i].mode,
2308 fixed_conv_modes[j].mode, "fractuns",
2309 fixed_conv_modes[i].name,
2310 fixed_conv_modes[j].name);
2311 arm_set_fixed_conv_libfunc (satfractuns_optab,
2312 fixed_conv_modes[i].mode,
2313 fixed_conv_modes[j].mode, "satfractuns",
2314 fixed_conv_modes[i].name,
2315 fixed_conv_modes[j].name);
2319 if (TARGET_AAPCS_BASED)
2320 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2323 /* On AAPCS systems, this is the "struct __va_list". */
2324 static GTY(()) tree va_list_type;
2326 /* Return the type to use as __builtin_va_list. */
2327 static tree
2328 arm_build_builtin_va_list (void)
2330 tree va_list_name;
2331 tree ap_field;
2333 if (!TARGET_AAPCS_BASED)
2334 return std_build_builtin_va_list ();
2336 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2337 defined as:
2339 struct __va_list
2341 void *__ap;
2344 The C Library ABI further reinforces this definition in \S
2345 4.1.
2347 We must follow this definition exactly. The structure tag
2348 name is visible in C++ mangled names, and thus forms a part
2349 of the ABI. The field name may be used by people who
2350 #include <stdarg.h>. */
2351 /* Create the type. */
2352 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2353 /* Give it the required name. */
2354 va_list_name = build_decl (BUILTINS_LOCATION,
2355 TYPE_DECL,
2356 get_identifier ("__va_list"),
2357 va_list_type);
2358 DECL_ARTIFICIAL (va_list_name) = 1;
2359 TYPE_NAME (va_list_type) = va_list_name;
2360 TYPE_STUB_DECL (va_list_type) = va_list_name;
2361 /* Create the __ap field. */
2362 ap_field = build_decl (BUILTINS_LOCATION,
2363 FIELD_DECL,
2364 get_identifier ("__ap"),
2365 ptr_type_node);
2366 DECL_ARTIFICIAL (ap_field) = 1;
2367 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2368 TYPE_FIELDS (va_list_type) = ap_field;
2369 /* Compute its layout. */
2370 layout_type (va_list_type);
2372 return va_list_type;
2375 /* Return an expression of type "void *" pointing to the next
2376 available argument in a variable-argument list. VALIST is the
2377 user-level va_list object, of type __builtin_va_list. */
2378 static tree
2379 arm_extract_valist_ptr (tree valist)
2381 if (TREE_TYPE (valist) == error_mark_node)
2382 return error_mark_node;
2384 /* On an AAPCS target, the pointer is stored within "struct
2385 va_list". */
2386 if (TARGET_AAPCS_BASED)
2388 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2389 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2390 valist, ap_field, NULL_TREE);
2393 return valist;
2396 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2397 static void
2398 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2400 valist = arm_extract_valist_ptr (valist);
2401 std_expand_builtin_va_start (valist, nextarg);
2404 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2405 static tree
2406 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2407 gimple_seq *post_p)
2409 valist = arm_extract_valist_ptr (valist);
2410 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2413 /* Fix up any incompatible options that the user has specified. */
2414 static void
2415 arm_option_override (void)
2417 if (global_options_set.x_arm_arch_option)
2418 arm_selected_arch = &all_architectures[arm_arch_option];
2420 if (global_options_set.x_arm_cpu_option)
2422 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2423 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2426 if (global_options_set.x_arm_tune_option)
2427 arm_selected_tune = &all_cores[(int) arm_tune_option];
2429 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2430 SUBTARGET_OVERRIDE_OPTIONS;
2431 #endif
2433 if (arm_selected_arch)
2435 if (arm_selected_cpu)
2437 /* Check for conflict between mcpu and march. */
2438 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2440 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2441 arm_selected_cpu->name, arm_selected_arch->name);
2442 /* -march wins for code generation.
2443 -mcpu wins for default tuning. */
2444 if (!arm_selected_tune)
2445 arm_selected_tune = arm_selected_cpu;
2447 arm_selected_cpu = arm_selected_arch;
2449 else
2450 /* -mcpu wins. */
2451 arm_selected_arch = NULL;
2453 else
2454 /* Pick a CPU based on the architecture. */
2455 arm_selected_cpu = arm_selected_arch;
2458 /* If the user did not specify a processor, choose one for them. */
2459 if (!arm_selected_cpu)
2461 const struct processors * sel;
2462 unsigned int sought;
2464 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2465 if (!arm_selected_cpu->name)
2467 #ifdef SUBTARGET_CPU_DEFAULT
2468 /* Use the subtarget default CPU if none was specified by
2469 configure. */
2470 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2471 #endif
2472 /* Default to ARM6. */
2473 if (!arm_selected_cpu->name)
2474 arm_selected_cpu = &all_cores[arm6];
2477 sel = arm_selected_cpu;
2478 insn_flags = sel->flags;
2480 /* Now check to see if the user has specified some command line
2481 switch that require certain abilities from the cpu. */
2482 sought = 0;
2484 if (TARGET_INTERWORK || TARGET_THUMB)
2486 sought |= (FL_THUMB | FL_MODE32);
2488 /* There are no ARM processors that support both APCS-26 and
2489 interworking. Therefore we force FL_MODE26 to be removed
2490 from insn_flags here (if it was set), so that the search
2491 below will always be able to find a compatible processor. */
2492 insn_flags &= ~FL_MODE26;
2495 if (sought != 0 && ((sought & insn_flags) != sought))
2497 /* Try to locate a CPU type that supports all of the abilities
2498 of the default CPU, plus the extra abilities requested by
2499 the user. */
2500 for (sel = all_cores; sel->name != NULL; sel++)
2501 if ((sel->flags & sought) == (sought | insn_flags))
2502 break;
2504 if (sel->name == NULL)
2506 unsigned current_bit_count = 0;
2507 const struct processors * best_fit = NULL;
2509 /* Ideally we would like to issue an error message here
2510 saying that it was not possible to find a CPU compatible
2511 with the default CPU, but which also supports the command
2512 line options specified by the programmer, and so they
2513 ought to use the -mcpu=<name> command line option to
2514 override the default CPU type.
2516 If we cannot find a cpu that has both the
2517 characteristics of the default cpu and the given
2518 command line options we scan the array again looking
2519 for a best match. */
2520 for (sel = all_cores; sel->name != NULL; sel++)
2521 if ((sel->flags & sought) == sought)
2523 unsigned count;
2525 count = bit_count (sel->flags & insn_flags);
2527 if (count >= current_bit_count)
2529 best_fit = sel;
2530 current_bit_count = count;
2534 gcc_assert (best_fit);
2535 sel = best_fit;
2538 arm_selected_cpu = sel;
2542 gcc_assert (arm_selected_cpu);
2543 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2544 if (!arm_selected_tune)
2545 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2547 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2548 insn_flags = arm_selected_cpu->flags;
2549 arm_base_arch = arm_selected_cpu->base_arch;
2551 arm_tune = arm_selected_tune->core;
2552 tune_flags = arm_selected_tune->flags;
2553 current_tune = arm_selected_tune->tune;
2555 /* Make sure that the processor choice does not conflict with any of the
2556 other command line choices. */
2557 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2558 error ("target CPU does not support ARM mode");
2560 /* BPABI targets use linker tricks to allow interworking on cores
2561 without thumb support. */
2562 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2564 warning (0, "target CPU does not support interworking" );
2565 target_flags &= ~MASK_INTERWORK;
2568 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2570 warning (0, "target CPU does not support THUMB instructions");
2571 target_flags &= ~MASK_THUMB;
2574 if (TARGET_APCS_FRAME && TARGET_THUMB)
2576 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2577 target_flags &= ~MASK_APCS_FRAME;
2580 /* Callee super interworking implies thumb interworking. Adding
2581 this to the flags here simplifies the logic elsewhere. */
2582 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2583 target_flags |= MASK_INTERWORK;
2585 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2586 from here where no function is being compiled currently. */
2587 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2588 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2590 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2591 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2593 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2595 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2596 target_flags |= MASK_APCS_FRAME;
2599 if (TARGET_POKE_FUNCTION_NAME)
2600 target_flags |= MASK_APCS_FRAME;
2602 if (TARGET_APCS_REENT && flag_pic)
2603 error ("-fpic and -mapcs-reent are incompatible");
2605 if (TARGET_APCS_REENT)
2606 warning (0, "APCS reentrant code not supported. Ignored");
2608 /* If this target is normally configured to use APCS frames, warn if they
2609 are turned off and debugging is turned on. */
2610 if (TARGET_ARM
2611 && write_symbols != NO_DEBUG
2612 && !TARGET_APCS_FRAME
2613 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2614 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2616 if (TARGET_APCS_FLOAT)
2617 warning (0, "passing floating point arguments in fp regs not yet supported");
2619 if (TARGET_LITTLE_WORDS)
2620 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2621 "will be removed in a future release");
2623 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2624 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2625 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2626 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2627 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2628 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2629 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2630 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2631 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2632 arm_arch6m = arm_arch6 && !arm_arch_notm;
2633 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2634 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2635 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2636 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2637 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2639 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2640 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2641 thumb_code = TARGET_ARM == 0;
2642 thumb1_code = TARGET_THUMB1 != 0;
2643 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2644 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2645 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2646 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2647 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2648 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2649 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2650 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2651 if (arm_restrict_it == 2)
2652 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2654 if (!TARGET_THUMB2)
2655 arm_restrict_it = 0;
2657 /* If we are not using the default (ARM mode) section anchor offset
2658 ranges, then set the correct ranges now. */
2659 if (TARGET_THUMB1)
2661 /* Thumb-1 LDR instructions cannot have negative offsets.
2662 Permissible positive offset ranges are 5-bit (for byte loads),
2663 6-bit (for halfword loads), or 7-bit (for word loads).
2664 Empirical results suggest a 7-bit anchor range gives the best
2665 overall code size. */
2666 targetm.min_anchor_offset = 0;
2667 targetm.max_anchor_offset = 127;
2669 else if (TARGET_THUMB2)
2671 /* The minimum is set such that the total size of the block
2672 for a particular anchor is 248 + 1 + 4095 bytes, which is
2673 divisible by eight, ensuring natural spacing of anchors. */
2674 targetm.min_anchor_offset = -248;
2675 targetm.max_anchor_offset = 4095;
2678 /* V5 code we generate is completely interworking capable, so we turn off
2679 TARGET_INTERWORK here to avoid many tests later on. */
2681 /* XXX However, we must pass the right pre-processor defines to CPP
2682 or GLD can get confused. This is a hack. */
2683 if (TARGET_INTERWORK)
2684 arm_cpp_interwork = 1;
2686 if (arm_arch5)
2687 target_flags &= ~MASK_INTERWORK;
2689 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2690 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2692 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2693 error ("iwmmxt abi requires an iwmmxt capable cpu");
2695 if (!global_options_set.x_arm_fpu_index)
2697 const char *target_fpu_name;
2698 bool ok;
2700 #ifdef FPUTYPE_DEFAULT
2701 target_fpu_name = FPUTYPE_DEFAULT;
2702 #else
2703 target_fpu_name = "vfp";
2704 #endif
2706 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2707 CL_TARGET);
2708 gcc_assert (ok);
2711 arm_fpu_desc = &all_fpus[arm_fpu_index];
2713 switch (arm_fpu_desc->model)
2715 case ARM_FP_MODEL_VFP:
2716 arm_fpu_attr = FPU_VFP;
2717 break;
2719 default:
2720 gcc_unreachable();
2723 if (TARGET_AAPCS_BASED)
2725 if (TARGET_CALLER_INTERWORKING)
2726 error ("AAPCS does not support -mcaller-super-interworking");
2727 else
2728 if (TARGET_CALLEE_INTERWORKING)
2729 error ("AAPCS does not support -mcallee-super-interworking");
2732 /* iWMMXt and NEON are incompatible. */
2733 if (TARGET_IWMMXT && TARGET_NEON)
2734 error ("iWMMXt and NEON are incompatible");
2736 /* iWMMXt unsupported under Thumb mode. */
2737 if (TARGET_THUMB && TARGET_IWMMXT)
2738 error ("iWMMXt unsupported under Thumb mode");
2740 /* __fp16 support currently assumes the core has ldrh. */
2741 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2742 sorry ("__fp16 and no ldrh");
2744 /* If soft-float is specified then don't use FPU. */
2745 if (TARGET_SOFT_FLOAT)
2746 arm_fpu_attr = FPU_NONE;
2748 if (TARGET_AAPCS_BASED)
2750 if (arm_abi == ARM_ABI_IWMMXT)
2751 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2752 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2753 && TARGET_HARD_FLOAT
2754 && TARGET_VFP)
2755 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2756 else
2757 arm_pcs_default = ARM_PCS_AAPCS;
2759 else
2761 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2762 sorry ("-mfloat-abi=hard and VFP");
2764 if (arm_abi == ARM_ABI_APCS)
2765 arm_pcs_default = ARM_PCS_APCS;
2766 else
2767 arm_pcs_default = ARM_PCS_ATPCS;
2770 /* For arm2/3 there is no need to do any scheduling if we are doing
2771 software floating-point. */
2772 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2773 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2775 /* Use the cp15 method if it is available. */
2776 if (target_thread_pointer == TP_AUTO)
2778 if (arm_arch6k && !TARGET_THUMB1)
2779 target_thread_pointer = TP_CP15;
2780 else
2781 target_thread_pointer = TP_SOFT;
2784 if (TARGET_HARD_TP && TARGET_THUMB1)
2785 error ("can not use -mtp=cp15 with 16-bit Thumb");
2787 /* Override the default structure alignment for AAPCS ABI. */
2788 if (!global_options_set.x_arm_structure_size_boundary)
2790 if (TARGET_AAPCS_BASED)
2791 arm_structure_size_boundary = 8;
2793 else
2795 if (arm_structure_size_boundary != 8
2796 && arm_structure_size_boundary != 32
2797 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2799 if (ARM_DOUBLEWORD_ALIGN)
2800 warning (0,
2801 "structure size boundary can only be set to 8, 32 or 64");
2802 else
2803 warning (0, "structure size boundary can only be set to 8 or 32");
2804 arm_structure_size_boundary
2805 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2809 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2811 error ("RTP PIC is incompatible with Thumb");
2812 flag_pic = 0;
2815 /* If stack checking is disabled, we can use r10 as the PIC register,
2816 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2817 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2819 if (TARGET_VXWORKS_RTP)
2820 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2821 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2824 if (flag_pic && TARGET_VXWORKS_RTP)
2825 arm_pic_register = 9;
2827 if (arm_pic_register_string != NULL)
2829 int pic_register = decode_reg_name (arm_pic_register_string);
2831 if (!flag_pic)
2832 warning (0, "-mpic-register= is useless without -fpic");
2834 /* Prevent the user from choosing an obviously stupid PIC register. */
2835 else if (pic_register < 0 || call_used_regs[pic_register]
2836 || pic_register == HARD_FRAME_POINTER_REGNUM
2837 || pic_register == STACK_POINTER_REGNUM
2838 || pic_register >= PC_REGNUM
2839 || (TARGET_VXWORKS_RTP
2840 && (unsigned int) pic_register != arm_pic_register))
2841 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2842 else
2843 arm_pic_register = pic_register;
2846 if (TARGET_VXWORKS_RTP
2847 && !global_options_set.x_arm_pic_data_is_text_relative)
2848 arm_pic_data_is_text_relative = 0;
2850 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2851 if (fix_cm3_ldrd == 2)
2853 if (arm_selected_cpu->core == cortexm3)
2854 fix_cm3_ldrd = 1;
2855 else
2856 fix_cm3_ldrd = 0;
2859 /* Enable -munaligned-access by default for
2860 - all ARMv6 architecture-based processors
2861 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2862 - ARMv8 architecture-base processors.
2864 Disable -munaligned-access by default for
2865 - all pre-ARMv6 architecture-based processors
2866 - ARMv6-M architecture-based processors. */
2868 if (unaligned_access == 2)
2870 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2871 unaligned_access = 1;
2872 else
2873 unaligned_access = 0;
2875 else if (unaligned_access == 1
2876 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2878 warning (0, "target CPU does not support unaligned accesses");
2879 unaligned_access = 0;
2882 if (TARGET_THUMB1 && flag_schedule_insns)
2884 /* Don't warn since it's on by default in -O2. */
2885 flag_schedule_insns = 0;
2888 if (optimize_size)
2890 /* If optimizing for size, bump the number of instructions that we
2891 are prepared to conditionally execute (even on a StrongARM). */
2892 max_insns_skipped = 6;
2894 else
2895 max_insns_skipped = current_tune->max_insns_skipped;
2897 /* Hot/Cold partitioning is not currently supported, since we can't
2898 handle literal pool placement in that case. */
2899 if (flag_reorder_blocks_and_partition)
2901 inform (input_location,
2902 "-freorder-blocks-and-partition not supported on this architecture");
2903 flag_reorder_blocks_and_partition = 0;
2904 flag_reorder_blocks = 1;
2907 if (flag_pic)
2908 /* Hoisting PIC address calculations more aggressively provides a small,
2909 but measurable, size reduction for PIC code. Therefore, we decrease
2910 the bar for unrestricted expression hoisting to the cost of PIC address
2911 calculation, which is 2 instructions. */
2912 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2913 global_options.x_param_values,
2914 global_options_set.x_param_values);
2916 /* ARM EABI defaults to strict volatile bitfields. */
2917 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2918 && abi_version_at_least(2))
2919 flag_strict_volatile_bitfields = 1;
2921 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2922 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2923 if (flag_prefetch_loop_arrays < 0
2924 && HAVE_prefetch
2925 && optimize >= 3
2926 && current_tune->num_prefetch_slots > 0)
2927 flag_prefetch_loop_arrays = 1;
2929 /* Set up parameters to be used in prefetching algorithm. Do not override the
2930 defaults unless we are tuning for a core we have researched values for. */
2931 if (current_tune->num_prefetch_slots > 0)
2932 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2933 current_tune->num_prefetch_slots,
2934 global_options.x_param_values,
2935 global_options_set.x_param_values);
2936 if (current_tune->l1_cache_line_size >= 0)
2937 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2938 current_tune->l1_cache_line_size,
2939 global_options.x_param_values,
2940 global_options_set.x_param_values);
2941 if (current_tune->l1_cache_size >= 0)
2942 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2943 current_tune->l1_cache_size,
2944 global_options.x_param_values,
2945 global_options_set.x_param_values);
2947 /* Use Neon to perform 64-bits operations rather than core
2948 registers. */
2949 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2950 if (use_neon_for_64bits == 1)
2951 prefer_neon_for_64bits = true;
2953 /* Use the alternative scheduling-pressure algorithm by default. */
2954 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
2955 global_options.x_param_values,
2956 global_options_set.x_param_values);
2958 /* Disable shrink-wrap when optimizing function for size, since it tends to
2959 generate additional returns. */
2960 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2961 flag_shrink_wrap = false;
2962 /* TBD: Dwarf info for apcs frame is not handled yet. */
2963 if (TARGET_APCS_FRAME)
2964 flag_shrink_wrap = false;
2966 /* We only support -mslow-flash-data on armv7-m targets. */
2967 if (target_slow_flash_data
2968 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2969 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2970 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2972 /* Currently, for slow flash data, we just disable literal pools. */
2973 if (target_slow_flash_data)
2974 arm_disable_literal_pool = true;
2976 /* Register global variables with the garbage collector. */
2977 arm_add_gc_roots ();
2980 static void
2981 arm_add_gc_roots (void)
2983 gcc_obstack_init(&minipool_obstack);
2984 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2987 /* A table of known ARM exception types.
2988 For use with the interrupt function attribute. */
2990 typedef struct
2992 const char *const arg;
2993 const unsigned long return_value;
2995 isr_attribute_arg;
2997 static const isr_attribute_arg isr_attribute_args [] =
2999 { "IRQ", ARM_FT_ISR },
3000 { "irq", ARM_FT_ISR },
3001 { "FIQ", ARM_FT_FIQ },
3002 { "fiq", ARM_FT_FIQ },
3003 { "ABORT", ARM_FT_ISR },
3004 { "abort", ARM_FT_ISR },
3005 { "ABORT", ARM_FT_ISR },
3006 { "abort", ARM_FT_ISR },
3007 { "UNDEF", ARM_FT_EXCEPTION },
3008 { "undef", ARM_FT_EXCEPTION },
3009 { "SWI", ARM_FT_EXCEPTION },
3010 { "swi", ARM_FT_EXCEPTION },
3011 { NULL, ARM_FT_NORMAL }
3014 /* Returns the (interrupt) function type of the current
3015 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3017 static unsigned long
3018 arm_isr_value (tree argument)
3020 const isr_attribute_arg * ptr;
3021 const char * arg;
3023 if (!arm_arch_notm)
3024 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3026 /* No argument - default to IRQ. */
3027 if (argument == NULL_TREE)
3028 return ARM_FT_ISR;
3030 /* Get the value of the argument. */
3031 if (TREE_VALUE (argument) == NULL_TREE
3032 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3033 return ARM_FT_UNKNOWN;
3035 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3037 /* Check it against the list of known arguments. */
3038 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3039 if (streq (arg, ptr->arg))
3040 return ptr->return_value;
3042 /* An unrecognized interrupt type. */
3043 return ARM_FT_UNKNOWN;
3046 /* Computes the type of the current function. */
3048 static unsigned long
3049 arm_compute_func_type (void)
3051 unsigned long type = ARM_FT_UNKNOWN;
3052 tree a;
3053 tree attr;
3055 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3057 /* Decide if the current function is volatile. Such functions
3058 never return, and many memory cycles can be saved by not storing
3059 register values that will never be needed again. This optimization
3060 was added to speed up context switching in a kernel application. */
3061 if (optimize > 0
3062 && (TREE_NOTHROW (current_function_decl)
3063 || !(flag_unwind_tables
3064 || (flag_exceptions
3065 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3066 && TREE_THIS_VOLATILE (current_function_decl))
3067 type |= ARM_FT_VOLATILE;
3069 if (cfun->static_chain_decl != NULL)
3070 type |= ARM_FT_NESTED;
3072 attr = DECL_ATTRIBUTES (current_function_decl);
3074 a = lookup_attribute ("naked", attr);
3075 if (a != NULL_TREE)
3076 type |= ARM_FT_NAKED;
3078 a = lookup_attribute ("isr", attr);
3079 if (a == NULL_TREE)
3080 a = lookup_attribute ("interrupt", attr);
3082 if (a == NULL_TREE)
3083 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3084 else
3085 type |= arm_isr_value (TREE_VALUE (a));
3087 return type;
3090 /* Returns the type of the current function. */
3092 unsigned long
3093 arm_current_func_type (void)
3095 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3096 cfun->machine->func_type = arm_compute_func_type ();
3098 return cfun->machine->func_type;
3101 bool
3102 arm_allocate_stack_slots_for_args (void)
3104 /* Naked functions should not allocate stack slots for arguments. */
3105 return !IS_NAKED (arm_current_func_type ());
3108 static bool
3109 arm_warn_func_return (tree decl)
3111 /* Naked functions are implemented entirely in assembly, including the
3112 return sequence, so suppress warnings about this. */
3113 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3117 /* Output assembler code for a block containing the constant parts
3118 of a trampoline, leaving space for the variable parts.
3120 On the ARM, (if r8 is the static chain regnum, and remembering that
3121 referencing pc adds an offset of 8) the trampoline looks like:
3122 ldr r8, [pc, #0]
3123 ldr pc, [pc]
3124 .word static chain value
3125 .word function's address
3126 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3128 static void
3129 arm_asm_trampoline_template (FILE *f)
3131 if (TARGET_ARM)
3133 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3134 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3136 else if (TARGET_THUMB2)
3138 /* The Thumb-2 trampoline is similar to the arm implementation.
3139 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3140 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3141 STATIC_CHAIN_REGNUM, PC_REGNUM);
3142 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3144 else
3146 ASM_OUTPUT_ALIGN (f, 2);
3147 fprintf (f, "\t.code\t16\n");
3148 fprintf (f, ".Ltrampoline_start:\n");
3149 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3150 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3151 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3152 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3153 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3154 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3156 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3157 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3160 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3162 static void
3163 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3165 rtx fnaddr, mem, a_tramp;
3167 emit_block_move (m_tramp, assemble_trampoline_template (),
3168 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3170 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3171 emit_move_insn (mem, chain_value);
3173 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3174 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3175 emit_move_insn (mem, fnaddr);
3177 a_tramp = XEXP (m_tramp, 0);
3178 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3179 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3180 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3183 /* Thumb trampolines should be entered in thumb mode, so set
3184 the bottom bit of the address. */
3186 static rtx
3187 arm_trampoline_adjust_address (rtx addr)
3189 if (TARGET_THUMB)
3190 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3191 NULL, 0, OPTAB_LIB_WIDEN);
3192 return addr;
3195 /* Return 1 if it is possible to return using a single instruction.
3196 If SIBLING is non-null, this is a test for a return before a sibling
3197 call. SIBLING is the call insn, so we can examine its register usage. */
3200 use_return_insn (int iscond, rtx sibling)
3202 int regno;
3203 unsigned int func_type;
3204 unsigned long saved_int_regs;
3205 unsigned HOST_WIDE_INT stack_adjust;
3206 arm_stack_offsets *offsets;
3208 /* Never use a return instruction before reload has run. */
3209 if (!reload_completed)
3210 return 0;
3212 func_type = arm_current_func_type ();
3214 /* Naked, volatile and stack alignment functions need special
3215 consideration. */
3216 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3217 return 0;
3219 /* So do interrupt functions that use the frame pointer and Thumb
3220 interrupt functions. */
3221 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3222 return 0;
3224 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3225 && !optimize_function_for_size_p (cfun))
3226 return 0;
3228 offsets = arm_get_frame_offsets ();
3229 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3231 /* As do variadic functions. */
3232 if (crtl->args.pretend_args_size
3233 || cfun->machine->uses_anonymous_args
3234 /* Or if the function calls __builtin_eh_return () */
3235 || crtl->calls_eh_return
3236 /* Or if the function calls alloca */
3237 || cfun->calls_alloca
3238 /* Or if there is a stack adjustment. However, if the stack pointer
3239 is saved on the stack, we can use a pre-incrementing stack load. */
3240 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3241 && stack_adjust == 4)))
3242 return 0;
3244 saved_int_regs = offsets->saved_regs_mask;
3246 /* Unfortunately, the insn
3248 ldmib sp, {..., sp, ...}
3250 triggers a bug on most SA-110 based devices, such that the stack
3251 pointer won't be correctly restored if the instruction takes a
3252 page fault. We work around this problem by popping r3 along with
3253 the other registers, since that is never slower than executing
3254 another instruction.
3256 We test for !arm_arch5 here, because code for any architecture
3257 less than this could potentially be run on one of the buggy
3258 chips. */
3259 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3261 /* Validate that r3 is a call-clobbered register (always true in
3262 the default abi) ... */
3263 if (!call_used_regs[3])
3264 return 0;
3266 /* ... that it isn't being used for a return value ... */
3267 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3268 return 0;
3270 /* ... or for a tail-call argument ... */
3271 if (sibling)
3273 gcc_assert (CALL_P (sibling));
3275 if (find_regno_fusage (sibling, USE, 3))
3276 return 0;
3279 /* ... and that there are no call-saved registers in r0-r2
3280 (always true in the default ABI). */
3281 if (saved_int_regs & 0x7)
3282 return 0;
3285 /* Can't be done if interworking with Thumb, and any registers have been
3286 stacked. */
3287 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3288 return 0;
3290 /* On StrongARM, conditional returns are expensive if they aren't
3291 taken and multiple registers have been stacked. */
3292 if (iscond && arm_tune_strongarm)
3294 /* Conditional return when just the LR is stored is a simple
3295 conditional-load instruction, that's not expensive. */
3296 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3297 return 0;
3299 if (flag_pic
3300 && arm_pic_register != INVALID_REGNUM
3301 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3302 return 0;
3305 /* If there are saved registers but the LR isn't saved, then we need
3306 two instructions for the return. */
3307 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3308 return 0;
3310 /* Can't be done if any of the VFP regs are pushed,
3311 since this also requires an insn. */
3312 if (TARGET_HARD_FLOAT && TARGET_VFP)
3313 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3314 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3315 return 0;
3317 if (TARGET_REALLY_IWMMXT)
3318 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3319 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3320 return 0;
3322 return 1;
3325 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3326 shrink-wrapping if possible. This is the case if we need to emit a
3327 prologue, which we can test by looking at the offsets. */
3328 bool
3329 use_simple_return_p (void)
3331 arm_stack_offsets *offsets;
3333 offsets = arm_get_frame_offsets ();
3334 return offsets->outgoing_args != 0;
3337 /* Return TRUE if int I is a valid immediate ARM constant. */
3340 const_ok_for_arm (HOST_WIDE_INT i)
3342 int lowbit;
3344 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3345 be all zero, or all one. */
3346 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3347 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3348 != ((~(unsigned HOST_WIDE_INT) 0)
3349 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3350 return FALSE;
3352 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3354 /* Fast return for 0 and small values. We must do this for zero, since
3355 the code below can't handle that one case. */
3356 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3357 return TRUE;
3359 /* Get the number of trailing zeros. */
3360 lowbit = ffs((int) i) - 1;
3362 /* Only even shifts are allowed in ARM mode so round down to the
3363 nearest even number. */
3364 if (TARGET_ARM)
3365 lowbit &= ~1;
3367 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3368 return TRUE;
3370 if (TARGET_ARM)
3372 /* Allow rotated constants in ARM mode. */
3373 if (lowbit <= 4
3374 && ((i & ~0xc000003f) == 0
3375 || (i & ~0xf000000f) == 0
3376 || (i & ~0xfc000003) == 0))
3377 return TRUE;
3379 else
3381 HOST_WIDE_INT v;
3383 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3384 v = i & 0xff;
3385 v |= v << 16;
3386 if (i == v || i == (v | (v << 8)))
3387 return TRUE;
3389 /* Allow repeated pattern 0xXY00XY00. */
3390 v = i & 0xff00;
3391 v |= v << 16;
3392 if (i == v)
3393 return TRUE;
3396 return FALSE;
3399 /* Return true if I is a valid constant for the operation CODE. */
3401 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3403 if (const_ok_for_arm (i))
3404 return 1;
3406 switch (code)
3408 case SET:
3409 /* See if we can use movw. */
3410 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3411 return 1;
3412 else
3413 /* Otherwise, try mvn. */
3414 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3416 case PLUS:
3417 /* See if we can use addw or subw. */
3418 if (TARGET_THUMB2
3419 && ((i & 0xfffff000) == 0
3420 || ((-i) & 0xfffff000) == 0))
3421 return 1;
3422 /* else fall through. */
3424 case COMPARE:
3425 case EQ:
3426 case NE:
3427 case GT:
3428 case LE:
3429 case LT:
3430 case GE:
3431 case GEU:
3432 case LTU:
3433 case GTU:
3434 case LEU:
3435 case UNORDERED:
3436 case ORDERED:
3437 case UNEQ:
3438 case UNGE:
3439 case UNLT:
3440 case UNGT:
3441 case UNLE:
3442 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3444 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3445 case XOR:
3446 return 0;
3448 case IOR:
3449 if (TARGET_THUMB2)
3450 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3451 return 0;
3453 case AND:
3454 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3456 default:
3457 gcc_unreachable ();
3461 /* Return true if I is a valid di mode constant for the operation CODE. */
3463 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3465 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3466 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3467 rtx hi = GEN_INT (hi_val);
3468 rtx lo = GEN_INT (lo_val);
3470 if (TARGET_THUMB1)
3471 return 0;
3473 switch (code)
3475 case AND:
3476 case IOR:
3477 case XOR:
3478 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3479 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3480 case PLUS:
3481 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3483 default:
3484 return 0;
3488 /* Emit a sequence of insns to handle a large constant.
3489 CODE is the code of the operation required, it can be any of SET, PLUS,
3490 IOR, AND, XOR, MINUS;
3491 MODE is the mode in which the operation is being performed;
3492 VAL is the integer to operate on;
3493 SOURCE is the other operand (a register, or a null-pointer for SET);
3494 SUBTARGETS means it is safe to create scratch registers if that will
3495 either produce a simpler sequence, or we will want to cse the values.
3496 Return value is the number of insns emitted. */
3498 /* ??? Tweak this for thumb2. */
3500 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3501 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3503 rtx cond;
3505 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3506 cond = COND_EXEC_TEST (PATTERN (insn));
3507 else
3508 cond = NULL_RTX;
3510 if (subtargets || code == SET
3511 || (REG_P (target) && REG_P (source)
3512 && REGNO (target) != REGNO (source)))
3514 /* After arm_reorg has been called, we can't fix up expensive
3515 constants by pushing them into memory so we must synthesize
3516 them in-line, regardless of the cost. This is only likely to
3517 be more costly on chips that have load delay slots and we are
3518 compiling without running the scheduler (so no splitting
3519 occurred before the final instruction emission).
3521 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3523 if (!cfun->machine->after_arm_reorg
3524 && !cond
3525 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3526 1, 0)
3527 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3528 + (code != SET))))
3530 if (code == SET)
3532 /* Currently SET is the only monadic value for CODE, all
3533 the rest are diadic. */
3534 if (TARGET_USE_MOVT)
3535 arm_emit_movpair (target, GEN_INT (val));
3536 else
3537 emit_set_insn (target, GEN_INT (val));
3539 return 1;
3541 else
3543 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3545 if (TARGET_USE_MOVT)
3546 arm_emit_movpair (temp, GEN_INT (val));
3547 else
3548 emit_set_insn (temp, GEN_INT (val));
3550 /* For MINUS, the value is subtracted from, since we never
3551 have subtraction of a constant. */
3552 if (code == MINUS)
3553 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3554 else
3555 emit_set_insn (target,
3556 gen_rtx_fmt_ee (code, mode, source, temp));
3557 return 2;
3562 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3566 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3567 ARM/THUMB2 immediates, and add up to VAL.
3568 Thr function return value gives the number of insns required. */
3569 static int
3570 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3571 struct four_ints *return_sequence)
3573 int best_consecutive_zeros = 0;
3574 int i;
3575 int best_start = 0;
3576 int insns1, insns2;
3577 struct four_ints tmp_sequence;
3579 /* If we aren't targeting ARM, the best place to start is always at
3580 the bottom, otherwise look more closely. */
3581 if (TARGET_ARM)
3583 for (i = 0; i < 32; i += 2)
3585 int consecutive_zeros = 0;
3587 if (!(val & (3 << i)))
3589 while ((i < 32) && !(val & (3 << i)))
3591 consecutive_zeros += 2;
3592 i += 2;
3594 if (consecutive_zeros > best_consecutive_zeros)
3596 best_consecutive_zeros = consecutive_zeros;
3597 best_start = i - consecutive_zeros;
3599 i -= 2;
3604 /* So long as it won't require any more insns to do so, it's
3605 desirable to emit a small constant (in bits 0...9) in the last
3606 insn. This way there is more chance that it can be combined with
3607 a later addressing insn to form a pre-indexed load or store
3608 operation. Consider:
3610 *((volatile int *)0xe0000100) = 1;
3611 *((volatile int *)0xe0000110) = 2;
3613 We want this to wind up as:
3615 mov rA, #0xe0000000
3616 mov rB, #1
3617 str rB, [rA, #0x100]
3618 mov rB, #2
3619 str rB, [rA, #0x110]
3621 rather than having to synthesize both large constants from scratch.
3623 Therefore, we calculate how many insns would be required to emit
3624 the constant starting from `best_start', and also starting from
3625 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3626 yield a shorter sequence, we may as well use zero. */
3627 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3628 if (best_start != 0
3629 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3631 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3632 if (insns2 <= insns1)
3634 *return_sequence = tmp_sequence;
3635 insns1 = insns2;
3639 return insns1;
3642 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3643 static int
3644 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3645 struct four_ints *return_sequence, int i)
3647 int remainder = val & 0xffffffff;
3648 int insns = 0;
3650 /* Try and find a way of doing the job in either two or three
3651 instructions.
3653 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3654 location. We start at position I. This may be the MSB, or
3655 optimial_immediate_sequence may have positioned it at the largest block
3656 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3657 wrapping around to the top of the word when we drop off the bottom.
3658 In the worst case this code should produce no more than four insns.
3660 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3661 constants, shifted to any arbitrary location. We should always start
3662 at the MSB. */
3665 int end;
3666 unsigned int b1, b2, b3, b4;
3667 unsigned HOST_WIDE_INT result;
3668 int loc;
3670 gcc_assert (insns < 4);
3672 if (i <= 0)
3673 i += 32;
3675 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3676 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3678 loc = i;
3679 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3680 /* We can use addw/subw for the last 12 bits. */
3681 result = remainder;
3682 else
3684 /* Use an 8-bit shifted/rotated immediate. */
3685 end = i - 8;
3686 if (end < 0)
3687 end += 32;
3688 result = remainder & ((0x0ff << end)
3689 | ((i < end) ? (0xff >> (32 - end))
3690 : 0));
3691 i -= 8;
3694 else
3696 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3697 arbitrary shifts. */
3698 i -= TARGET_ARM ? 2 : 1;
3699 continue;
3702 /* Next, see if we can do a better job with a thumb2 replicated
3703 constant.
3705 We do it this way around to catch the cases like 0x01F001E0 where
3706 two 8-bit immediates would work, but a replicated constant would
3707 make it worse.
3709 TODO: 16-bit constants that don't clear all the bits, but still win.
3710 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3711 if (TARGET_THUMB2)
3713 b1 = (remainder & 0xff000000) >> 24;
3714 b2 = (remainder & 0x00ff0000) >> 16;
3715 b3 = (remainder & 0x0000ff00) >> 8;
3716 b4 = remainder & 0xff;
3718 if (loc > 24)
3720 /* The 8-bit immediate already found clears b1 (and maybe b2),
3721 but must leave b3 and b4 alone. */
3723 /* First try to find a 32-bit replicated constant that clears
3724 almost everything. We can assume that we can't do it in one,
3725 or else we wouldn't be here. */
3726 unsigned int tmp = b1 & b2 & b3 & b4;
3727 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3728 + (tmp << 24);
3729 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3730 + (tmp == b3) + (tmp == b4);
3731 if (tmp
3732 && (matching_bytes >= 3
3733 || (matching_bytes == 2
3734 && const_ok_for_op (remainder & ~tmp2, code))))
3736 /* At least 3 of the bytes match, and the fourth has at
3737 least as many bits set, or two of the bytes match
3738 and it will only require one more insn to finish. */
3739 result = tmp2;
3740 i = tmp != b1 ? 32
3741 : tmp != b2 ? 24
3742 : tmp != b3 ? 16
3743 : 8;
3746 /* Second, try to find a 16-bit replicated constant that can
3747 leave three of the bytes clear. If b2 or b4 is already
3748 zero, then we can. If the 8-bit from above would not
3749 clear b2 anyway, then we still win. */
3750 else if (b1 == b3 && (!b2 || !b4
3751 || (remainder & 0x00ff0000 & ~result)))
3753 result = remainder & 0xff00ff00;
3754 i = 24;
3757 else if (loc > 16)
3759 /* The 8-bit immediate already found clears b2 (and maybe b3)
3760 and we don't get here unless b1 is alredy clear, but it will
3761 leave b4 unchanged. */
3763 /* If we can clear b2 and b4 at once, then we win, since the
3764 8-bits couldn't possibly reach that far. */
3765 if (b2 == b4)
3767 result = remainder & 0x00ff00ff;
3768 i = 16;
3773 return_sequence->i[insns++] = result;
3774 remainder &= ~result;
3776 if (code == SET || code == MINUS)
3777 code = PLUS;
3779 while (remainder);
3781 return insns;
3784 /* Emit an instruction with the indicated PATTERN. If COND is
3785 non-NULL, conditionalize the execution of the instruction on COND
3786 being true. */
3788 static void
3789 emit_constant_insn (rtx cond, rtx pattern)
3791 if (cond)
3792 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3793 emit_insn (pattern);
3796 /* As above, but extra parameter GENERATE which, if clear, suppresses
3797 RTL generation. */
3799 static int
3800 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3801 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3802 int generate)
3804 int can_invert = 0;
3805 int can_negate = 0;
3806 int final_invert = 0;
3807 int i;
3808 int set_sign_bit_copies = 0;
3809 int clear_sign_bit_copies = 0;
3810 int clear_zero_bit_copies = 0;
3811 int set_zero_bit_copies = 0;
3812 int insns = 0, neg_insns, inv_insns;
3813 unsigned HOST_WIDE_INT temp1, temp2;
3814 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3815 struct four_ints *immediates;
3816 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3818 /* Find out which operations are safe for a given CODE. Also do a quick
3819 check for degenerate cases; these can occur when DImode operations
3820 are split. */
3821 switch (code)
3823 case SET:
3824 can_invert = 1;
3825 break;
3827 case PLUS:
3828 can_negate = 1;
3829 break;
3831 case IOR:
3832 if (remainder == 0xffffffff)
3834 if (generate)
3835 emit_constant_insn (cond,
3836 gen_rtx_SET (VOIDmode, target,
3837 GEN_INT (ARM_SIGN_EXTEND (val))));
3838 return 1;
3841 if (remainder == 0)
3843 if (reload_completed && rtx_equal_p (target, source))
3844 return 0;
3846 if (generate)
3847 emit_constant_insn (cond,
3848 gen_rtx_SET (VOIDmode, target, source));
3849 return 1;
3851 break;
3853 case AND:
3854 if (remainder == 0)
3856 if (generate)
3857 emit_constant_insn (cond,
3858 gen_rtx_SET (VOIDmode, target, const0_rtx));
3859 return 1;
3861 if (remainder == 0xffffffff)
3863 if (reload_completed && rtx_equal_p (target, source))
3864 return 0;
3865 if (generate)
3866 emit_constant_insn (cond,
3867 gen_rtx_SET (VOIDmode, target, source));
3868 return 1;
3870 can_invert = 1;
3871 break;
3873 case XOR:
3874 if (remainder == 0)
3876 if (reload_completed && rtx_equal_p (target, source))
3877 return 0;
3878 if (generate)
3879 emit_constant_insn (cond,
3880 gen_rtx_SET (VOIDmode, target, source));
3881 return 1;
3884 if (remainder == 0xffffffff)
3886 if (generate)
3887 emit_constant_insn (cond,
3888 gen_rtx_SET (VOIDmode, target,
3889 gen_rtx_NOT (mode, source)));
3890 return 1;
3892 final_invert = 1;
3893 break;
3895 case MINUS:
3896 /* We treat MINUS as (val - source), since (source - val) is always
3897 passed as (source + (-val)). */
3898 if (remainder == 0)
3900 if (generate)
3901 emit_constant_insn (cond,
3902 gen_rtx_SET (VOIDmode, target,
3903 gen_rtx_NEG (mode, source)));
3904 return 1;
3906 if (const_ok_for_arm (val))
3908 if (generate)
3909 emit_constant_insn (cond,
3910 gen_rtx_SET (VOIDmode, target,
3911 gen_rtx_MINUS (mode, GEN_INT (val),
3912 source)));
3913 return 1;
3916 break;
3918 default:
3919 gcc_unreachable ();
3922 /* If we can do it in one insn get out quickly. */
3923 if (const_ok_for_op (val, code))
3925 if (generate)
3926 emit_constant_insn (cond,
3927 gen_rtx_SET (VOIDmode, target,
3928 (source
3929 ? gen_rtx_fmt_ee (code, mode, source,
3930 GEN_INT (val))
3931 : GEN_INT (val))));
3932 return 1;
3935 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3936 insn. */
3937 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3938 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3940 if (generate)
3942 if (mode == SImode && i == 16)
3943 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3944 smaller insn. */
3945 emit_constant_insn (cond,
3946 gen_zero_extendhisi2
3947 (target, gen_lowpart (HImode, source)));
3948 else
3949 /* Extz only supports SImode, but we can coerce the operands
3950 into that mode. */
3951 emit_constant_insn (cond,
3952 gen_extzv_t2 (gen_lowpart (SImode, target),
3953 gen_lowpart (SImode, source),
3954 GEN_INT (i), const0_rtx));
3957 return 1;
3960 /* Calculate a few attributes that may be useful for specific
3961 optimizations. */
3962 /* Count number of leading zeros. */
3963 for (i = 31; i >= 0; i--)
3965 if ((remainder & (1 << i)) == 0)
3966 clear_sign_bit_copies++;
3967 else
3968 break;
3971 /* Count number of leading 1's. */
3972 for (i = 31; i >= 0; i--)
3974 if ((remainder & (1 << i)) != 0)
3975 set_sign_bit_copies++;
3976 else
3977 break;
3980 /* Count number of trailing zero's. */
3981 for (i = 0; i <= 31; i++)
3983 if ((remainder & (1 << i)) == 0)
3984 clear_zero_bit_copies++;
3985 else
3986 break;
3989 /* Count number of trailing 1's. */
3990 for (i = 0; i <= 31; i++)
3992 if ((remainder & (1 << i)) != 0)
3993 set_zero_bit_copies++;
3994 else
3995 break;
3998 switch (code)
4000 case SET:
4001 /* See if we can do this by sign_extending a constant that is known
4002 to be negative. This is a good, way of doing it, since the shift
4003 may well merge into a subsequent insn. */
4004 if (set_sign_bit_copies > 1)
4006 if (const_ok_for_arm
4007 (temp1 = ARM_SIGN_EXTEND (remainder
4008 << (set_sign_bit_copies - 1))))
4010 if (generate)
4012 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4013 emit_constant_insn (cond,
4014 gen_rtx_SET (VOIDmode, new_src,
4015 GEN_INT (temp1)));
4016 emit_constant_insn (cond,
4017 gen_ashrsi3 (target, new_src,
4018 GEN_INT (set_sign_bit_copies - 1)));
4020 return 2;
4022 /* For an inverted constant, we will need to set the low bits,
4023 these will be shifted out of harm's way. */
4024 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4025 if (const_ok_for_arm (~temp1))
4027 if (generate)
4029 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4030 emit_constant_insn (cond,
4031 gen_rtx_SET (VOIDmode, new_src,
4032 GEN_INT (temp1)));
4033 emit_constant_insn (cond,
4034 gen_ashrsi3 (target, new_src,
4035 GEN_INT (set_sign_bit_copies - 1)));
4037 return 2;
4041 /* See if we can calculate the value as the difference between two
4042 valid immediates. */
4043 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4045 int topshift = clear_sign_bit_copies & ~1;
4047 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4048 & (0xff000000 >> topshift));
4050 /* If temp1 is zero, then that means the 9 most significant
4051 bits of remainder were 1 and we've caused it to overflow.
4052 When topshift is 0 we don't need to do anything since we
4053 can borrow from 'bit 32'. */
4054 if (temp1 == 0 && topshift != 0)
4055 temp1 = 0x80000000 >> (topshift - 1);
4057 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4059 if (const_ok_for_arm (temp2))
4061 if (generate)
4063 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4064 emit_constant_insn (cond,
4065 gen_rtx_SET (VOIDmode, new_src,
4066 GEN_INT (temp1)));
4067 emit_constant_insn (cond,
4068 gen_addsi3 (target, new_src,
4069 GEN_INT (-temp2)));
4072 return 2;
4076 /* See if we can generate this by setting the bottom (or the top)
4077 16 bits, and then shifting these into the other half of the
4078 word. We only look for the simplest cases, to do more would cost
4079 too much. Be careful, however, not to generate this when the
4080 alternative would take fewer insns. */
4081 if (val & 0xffff0000)
4083 temp1 = remainder & 0xffff0000;
4084 temp2 = remainder & 0x0000ffff;
4086 /* Overlaps outside this range are best done using other methods. */
4087 for (i = 9; i < 24; i++)
4089 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4090 && !const_ok_for_arm (temp2))
4092 rtx new_src = (subtargets
4093 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4094 : target);
4095 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4096 source, subtargets, generate);
4097 source = new_src;
4098 if (generate)
4099 emit_constant_insn
4100 (cond,
4101 gen_rtx_SET
4102 (VOIDmode, target,
4103 gen_rtx_IOR (mode,
4104 gen_rtx_ASHIFT (mode, source,
4105 GEN_INT (i)),
4106 source)));
4107 return insns + 1;
4111 /* Don't duplicate cases already considered. */
4112 for (i = 17; i < 24; i++)
4114 if (((temp1 | (temp1 >> i)) == remainder)
4115 && !const_ok_for_arm (temp1))
4117 rtx new_src = (subtargets
4118 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4119 : target);
4120 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4121 source, subtargets, generate);
4122 source = new_src;
4123 if (generate)
4124 emit_constant_insn
4125 (cond,
4126 gen_rtx_SET (VOIDmode, target,
4127 gen_rtx_IOR
4128 (mode,
4129 gen_rtx_LSHIFTRT (mode, source,
4130 GEN_INT (i)),
4131 source)));
4132 return insns + 1;
4136 break;
4138 case IOR:
4139 case XOR:
4140 /* If we have IOR or XOR, and the constant can be loaded in a
4141 single instruction, and we can find a temporary to put it in,
4142 then this can be done in two instructions instead of 3-4. */
4143 if (subtargets
4144 /* TARGET can't be NULL if SUBTARGETS is 0 */
4145 || (reload_completed && !reg_mentioned_p (target, source)))
4147 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4149 if (generate)
4151 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4153 emit_constant_insn (cond,
4154 gen_rtx_SET (VOIDmode, sub,
4155 GEN_INT (val)));
4156 emit_constant_insn (cond,
4157 gen_rtx_SET (VOIDmode, target,
4158 gen_rtx_fmt_ee (code, mode,
4159 source, sub)));
4161 return 2;
4165 if (code == XOR)
4166 break;
4168 /* Convert.
4169 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4170 and the remainder 0s for e.g. 0xfff00000)
4171 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4173 This can be done in 2 instructions by using shifts with mov or mvn.
4174 e.g. for
4175 x = x | 0xfff00000;
4176 we generate.
4177 mvn r0, r0, asl #12
4178 mvn r0, r0, lsr #12 */
4179 if (set_sign_bit_copies > 8
4180 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4182 if (generate)
4184 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4185 rtx shift = GEN_INT (set_sign_bit_copies);
4187 emit_constant_insn
4188 (cond,
4189 gen_rtx_SET (VOIDmode, sub,
4190 gen_rtx_NOT (mode,
4191 gen_rtx_ASHIFT (mode,
4192 source,
4193 shift))));
4194 emit_constant_insn
4195 (cond,
4196 gen_rtx_SET (VOIDmode, target,
4197 gen_rtx_NOT (mode,
4198 gen_rtx_LSHIFTRT (mode, sub,
4199 shift))));
4201 return 2;
4204 /* Convert
4205 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4207 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4209 For eg. r0 = r0 | 0xfff
4210 mvn r0, r0, lsr #12
4211 mvn r0, r0, asl #12
4214 if (set_zero_bit_copies > 8
4215 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4217 if (generate)
4219 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4220 rtx shift = GEN_INT (set_zero_bit_copies);
4222 emit_constant_insn
4223 (cond,
4224 gen_rtx_SET (VOIDmode, sub,
4225 gen_rtx_NOT (mode,
4226 gen_rtx_LSHIFTRT (mode,
4227 source,
4228 shift))));
4229 emit_constant_insn
4230 (cond,
4231 gen_rtx_SET (VOIDmode, target,
4232 gen_rtx_NOT (mode,
4233 gen_rtx_ASHIFT (mode, sub,
4234 shift))));
4236 return 2;
4239 /* This will never be reached for Thumb2 because orn is a valid
4240 instruction. This is for Thumb1 and the ARM 32 bit cases.
4242 x = y | constant (such that ~constant is a valid constant)
4243 Transform this to
4244 x = ~(~y & ~constant).
4246 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4248 if (generate)
4250 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4251 emit_constant_insn (cond,
4252 gen_rtx_SET (VOIDmode, sub,
4253 gen_rtx_NOT (mode, source)));
4254 source = sub;
4255 if (subtargets)
4256 sub = gen_reg_rtx (mode);
4257 emit_constant_insn (cond,
4258 gen_rtx_SET (VOIDmode, sub,
4259 gen_rtx_AND (mode, source,
4260 GEN_INT (temp1))));
4261 emit_constant_insn (cond,
4262 gen_rtx_SET (VOIDmode, target,
4263 gen_rtx_NOT (mode, sub)));
4265 return 3;
4267 break;
4269 case AND:
4270 /* See if two shifts will do 2 or more insn's worth of work. */
4271 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4273 HOST_WIDE_INT shift_mask = ((0xffffffff
4274 << (32 - clear_sign_bit_copies))
4275 & 0xffffffff);
4277 if ((remainder | shift_mask) != 0xffffffff)
4279 if (generate)
4281 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4282 insns = arm_gen_constant (AND, mode, cond,
4283 remainder | shift_mask,
4284 new_src, source, subtargets, 1);
4285 source = new_src;
4287 else
4289 rtx targ = subtargets ? NULL_RTX : target;
4290 insns = arm_gen_constant (AND, mode, cond,
4291 remainder | shift_mask,
4292 targ, source, subtargets, 0);
4296 if (generate)
4298 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4299 rtx shift = GEN_INT (clear_sign_bit_copies);
4301 emit_insn (gen_ashlsi3 (new_src, source, shift));
4302 emit_insn (gen_lshrsi3 (target, new_src, shift));
4305 return insns + 2;
4308 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4310 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4312 if ((remainder | shift_mask) != 0xffffffff)
4314 if (generate)
4316 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4318 insns = arm_gen_constant (AND, mode, cond,
4319 remainder | shift_mask,
4320 new_src, source, subtargets, 1);
4321 source = new_src;
4323 else
4325 rtx targ = subtargets ? NULL_RTX : target;
4327 insns = arm_gen_constant (AND, mode, cond,
4328 remainder | shift_mask,
4329 targ, source, subtargets, 0);
4333 if (generate)
4335 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4336 rtx shift = GEN_INT (clear_zero_bit_copies);
4338 emit_insn (gen_lshrsi3 (new_src, source, shift));
4339 emit_insn (gen_ashlsi3 (target, new_src, shift));
4342 return insns + 2;
4345 break;
4347 default:
4348 break;
4351 /* Calculate what the instruction sequences would be if we generated it
4352 normally, negated, or inverted. */
4353 if (code == AND)
4354 /* AND cannot be split into multiple insns, so invert and use BIC. */
4355 insns = 99;
4356 else
4357 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4359 if (can_negate)
4360 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4361 &neg_immediates);
4362 else
4363 neg_insns = 99;
4365 if (can_invert || final_invert)
4366 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4367 &inv_immediates);
4368 else
4369 inv_insns = 99;
4371 immediates = &pos_immediates;
4373 /* Is the negated immediate sequence more efficient? */
4374 if (neg_insns < insns && neg_insns <= inv_insns)
4376 insns = neg_insns;
4377 immediates = &neg_immediates;
4379 else
4380 can_negate = 0;
4382 /* Is the inverted immediate sequence more efficient?
4383 We must allow for an extra NOT instruction for XOR operations, although
4384 there is some chance that the final 'mvn' will get optimized later. */
4385 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4387 insns = inv_insns;
4388 immediates = &inv_immediates;
4390 else
4392 can_invert = 0;
4393 final_invert = 0;
4396 /* Now output the chosen sequence as instructions. */
4397 if (generate)
4399 for (i = 0; i < insns; i++)
4401 rtx new_src, temp1_rtx;
4403 temp1 = immediates->i[i];
4405 if (code == SET || code == MINUS)
4406 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4407 else if ((final_invert || i < (insns - 1)) && subtargets)
4408 new_src = gen_reg_rtx (mode);
4409 else
4410 new_src = target;
4412 if (can_invert)
4413 temp1 = ~temp1;
4414 else if (can_negate)
4415 temp1 = -temp1;
4417 temp1 = trunc_int_for_mode (temp1, mode);
4418 temp1_rtx = GEN_INT (temp1);
4420 if (code == SET)
4422 else if (code == MINUS)
4423 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4424 else
4425 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4427 emit_constant_insn (cond,
4428 gen_rtx_SET (VOIDmode, new_src,
4429 temp1_rtx));
4430 source = new_src;
4432 if (code == SET)
4434 can_negate = can_invert;
4435 can_invert = 0;
4436 code = PLUS;
4438 else if (code == MINUS)
4439 code = PLUS;
4443 if (final_invert)
4445 if (generate)
4446 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4447 gen_rtx_NOT (mode, source)));
4448 insns++;
4451 return insns;
4454 /* Canonicalize a comparison so that we are more likely to recognize it.
4455 This can be done for a few constant compares, where we can make the
4456 immediate value easier to load. */
4458 static void
4459 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4460 bool op0_preserve_value)
4462 enum machine_mode mode;
4463 unsigned HOST_WIDE_INT i, maxval;
4465 mode = GET_MODE (*op0);
4466 if (mode == VOIDmode)
4467 mode = GET_MODE (*op1);
4469 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4471 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4472 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4473 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4474 for GTU/LEU in Thumb mode. */
4475 if (mode == DImode)
4477 rtx tem;
4479 if (*code == GT || *code == LE
4480 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4482 /* Missing comparison. First try to use an available
4483 comparison. */
4484 if (CONST_INT_P (*op1))
4486 i = INTVAL (*op1);
4487 switch (*code)
4489 case GT:
4490 case LE:
4491 if (i != maxval
4492 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4494 *op1 = GEN_INT (i + 1);
4495 *code = *code == GT ? GE : LT;
4496 return;
4498 break;
4499 case GTU:
4500 case LEU:
4501 if (i != ~((unsigned HOST_WIDE_INT) 0)
4502 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4504 *op1 = GEN_INT (i + 1);
4505 *code = *code == GTU ? GEU : LTU;
4506 return;
4508 break;
4509 default:
4510 gcc_unreachable ();
4514 /* If that did not work, reverse the condition. */
4515 if (!op0_preserve_value)
4517 tem = *op0;
4518 *op0 = *op1;
4519 *op1 = tem;
4520 *code = (int)swap_condition ((enum rtx_code)*code);
4523 return;
4526 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4527 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4528 to facilitate possible combining with a cmp into 'ands'. */
4529 if (mode == SImode
4530 && GET_CODE (*op0) == ZERO_EXTEND
4531 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4532 && GET_MODE (XEXP (*op0, 0)) == QImode
4533 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4534 && subreg_lowpart_p (XEXP (*op0, 0))
4535 && *op1 == const0_rtx)
4536 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4537 GEN_INT (255));
4539 /* Comparisons smaller than DImode. Only adjust comparisons against
4540 an out-of-range constant. */
4541 if (!CONST_INT_P (*op1)
4542 || const_ok_for_arm (INTVAL (*op1))
4543 || const_ok_for_arm (- INTVAL (*op1)))
4544 return;
4546 i = INTVAL (*op1);
4548 switch (*code)
4550 case EQ:
4551 case NE:
4552 return;
4554 case GT:
4555 case LE:
4556 if (i != maxval
4557 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4559 *op1 = GEN_INT (i + 1);
4560 *code = *code == GT ? GE : LT;
4561 return;
4563 break;
4565 case GE:
4566 case LT:
4567 if (i != ~maxval
4568 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4570 *op1 = GEN_INT (i - 1);
4571 *code = *code == GE ? GT : LE;
4572 return;
4574 break;
4576 case GTU:
4577 case LEU:
4578 if (i != ~((unsigned HOST_WIDE_INT) 0)
4579 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4581 *op1 = GEN_INT (i + 1);
4582 *code = *code == GTU ? GEU : LTU;
4583 return;
4585 break;
4587 case GEU:
4588 case LTU:
4589 if (i != 0
4590 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4592 *op1 = GEN_INT (i - 1);
4593 *code = *code == GEU ? GTU : LEU;
4594 return;
4596 break;
4598 default:
4599 gcc_unreachable ();
4604 /* Define how to find the value returned by a function. */
4606 static rtx
4607 arm_function_value(const_tree type, const_tree func,
4608 bool outgoing ATTRIBUTE_UNUSED)
4610 enum machine_mode mode;
4611 int unsignedp ATTRIBUTE_UNUSED;
4612 rtx r ATTRIBUTE_UNUSED;
4614 mode = TYPE_MODE (type);
4616 if (TARGET_AAPCS_BASED)
4617 return aapcs_allocate_return_reg (mode, type, func);
4619 /* Promote integer types. */
4620 if (INTEGRAL_TYPE_P (type))
4621 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4623 /* Promotes small structs returned in a register to full-word size
4624 for big-endian AAPCS. */
4625 if (arm_return_in_msb (type))
4627 HOST_WIDE_INT size = int_size_in_bytes (type);
4628 if (size % UNITS_PER_WORD != 0)
4630 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4631 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4635 return arm_libcall_value_1 (mode);
4638 /* libcall hashtable helpers. */
4640 struct libcall_hasher : typed_noop_remove <rtx_def>
4642 typedef rtx_def value_type;
4643 typedef rtx_def compare_type;
4644 static inline hashval_t hash (const value_type *);
4645 static inline bool equal (const value_type *, const compare_type *);
4646 static inline void remove (value_type *);
4649 inline bool
4650 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4652 return rtx_equal_p (p1, p2);
4655 inline hashval_t
4656 libcall_hasher::hash (const value_type *p1)
4658 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4661 typedef hash_table <libcall_hasher> libcall_table_type;
4663 static void
4664 add_libcall (libcall_table_type htab, rtx libcall)
4666 *htab.find_slot (libcall, INSERT) = libcall;
4669 static bool
4670 arm_libcall_uses_aapcs_base (const_rtx libcall)
4672 static bool init_done = false;
4673 static libcall_table_type libcall_htab;
4675 if (!init_done)
4677 init_done = true;
4679 libcall_htab.create (31);
4680 add_libcall (libcall_htab,
4681 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4682 add_libcall (libcall_htab,
4683 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4684 add_libcall (libcall_htab,
4685 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4686 add_libcall (libcall_htab,
4687 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4689 add_libcall (libcall_htab,
4690 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4691 add_libcall (libcall_htab,
4692 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4693 add_libcall (libcall_htab,
4694 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4695 add_libcall (libcall_htab,
4696 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4698 add_libcall (libcall_htab,
4699 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4700 add_libcall (libcall_htab,
4701 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4702 add_libcall (libcall_htab,
4703 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4704 add_libcall (libcall_htab,
4705 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4706 add_libcall (libcall_htab,
4707 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4708 add_libcall (libcall_htab,
4709 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4710 add_libcall (libcall_htab,
4711 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4712 add_libcall (libcall_htab,
4713 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4715 /* Values from double-precision helper functions are returned in core
4716 registers if the selected core only supports single-precision
4717 arithmetic, even if we are using the hard-float ABI. The same is
4718 true for single-precision helpers, but we will never be using the
4719 hard-float ABI on a CPU which doesn't support single-precision
4720 operations in hardware. */
4721 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4722 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4723 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4724 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4725 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4726 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4727 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4728 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4729 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4730 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4731 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4732 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4733 SFmode));
4734 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4735 DFmode));
4738 return libcall && libcall_htab.find (libcall) != NULL;
4741 static rtx
4742 arm_libcall_value_1 (enum machine_mode mode)
4744 if (TARGET_AAPCS_BASED)
4745 return aapcs_libcall_value (mode);
4746 else if (TARGET_IWMMXT_ABI
4747 && arm_vector_mode_supported_p (mode))
4748 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4749 else
4750 return gen_rtx_REG (mode, ARG_REGISTER (1));
4753 /* Define how to find the value returned by a library function
4754 assuming the value has mode MODE. */
4756 static rtx
4757 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4759 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4760 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4762 /* The following libcalls return their result in integer registers,
4763 even though they return a floating point value. */
4764 if (arm_libcall_uses_aapcs_base (libcall))
4765 return gen_rtx_REG (mode, ARG_REGISTER(1));
4769 return arm_libcall_value_1 (mode);
4772 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4774 static bool
4775 arm_function_value_regno_p (const unsigned int regno)
4777 if (regno == ARG_REGISTER (1)
4778 || (TARGET_32BIT
4779 && TARGET_AAPCS_BASED
4780 && TARGET_VFP
4781 && TARGET_HARD_FLOAT
4782 && regno == FIRST_VFP_REGNUM)
4783 || (TARGET_IWMMXT_ABI
4784 && regno == FIRST_IWMMXT_REGNUM))
4785 return true;
4787 return false;
4790 /* Determine the amount of memory needed to store the possible return
4791 registers of an untyped call. */
4793 arm_apply_result_size (void)
4795 int size = 16;
4797 if (TARGET_32BIT)
4799 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4800 size += 32;
4801 if (TARGET_IWMMXT_ABI)
4802 size += 8;
4805 return size;
4808 /* Decide whether TYPE should be returned in memory (true)
4809 or in a register (false). FNTYPE is the type of the function making
4810 the call. */
4811 static bool
4812 arm_return_in_memory (const_tree type, const_tree fntype)
4814 HOST_WIDE_INT size;
4816 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4818 if (TARGET_AAPCS_BASED)
4820 /* Simple, non-aggregate types (ie not including vectors and
4821 complex) are always returned in a register (or registers).
4822 We don't care about which register here, so we can short-cut
4823 some of the detail. */
4824 if (!AGGREGATE_TYPE_P (type)
4825 && TREE_CODE (type) != VECTOR_TYPE
4826 && TREE_CODE (type) != COMPLEX_TYPE)
4827 return false;
4829 /* Any return value that is no larger than one word can be
4830 returned in r0. */
4831 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4832 return false;
4834 /* Check any available co-processors to see if they accept the
4835 type as a register candidate (VFP, for example, can return
4836 some aggregates in consecutive registers). These aren't
4837 available if the call is variadic. */
4838 if (aapcs_select_return_coproc (type, fntype) >= 0)
4839 return false;
4841 /* Vector values should be returned using ARM registers, not
4842 memory (unless they're over 16 bytes, which will break since
4843 we only have four call-clobbered registers to play with). */
4844 if (TREE_CODE (type) == VECTOR_TYPE)
4845 return (size < 0 || size > (4 * UNITS_PER_WORD));
4847 /* The rest go in memory. */
4848 return true;
4851 if (TREE_CODE (type) == VECTOR_TYPE)
4852 return (size < 0 || size > (4 * UNITS_PER_WORD));
4854 if (!AGGREGATE_TYPE_P (type) &&
4855 (TREE_CODE (type) != VECTOR_TYPE))
4856 /* All simple types are returned in registers. */
4857 return false;
4859 if (arm_abi != ARM_ABI_APCS)
4861 /* ATPCS and later return aggregate types in memory only if they are
4862 larger than a word (or are variable size). */
4863 return (size < 0 || size > UNITS_PER_WORD);
4866 /* For the arm-wince targets we choose to be compatible with Microsoft's
4867 ARM and Thumb compilers, which always return aggregates in memory. */
4868 #ifndef ARM_WINCE
4869 /* All structures/unions bigger than one word are returned in memory.
4870 Also catch the case where int_size_in_bytes returns -1. In this case
4871 the aggregate is either huge or of variable size, and in either case
4872 we will want to return it via memory and not in a register. */
4873 if (size < 0 || size > UNITS_PER_WORD)
4874 return true;
4876 if (TREE_CODE (type) == RECORD_TYPE)
4878 tree field;
4880 /* For a struct the APCS says that we only return in a register
4881 if the type is 'integer like' and every addressable element
4882 has an offset of zero. For practical purposes this means
4883 that the structure can have at most one non bit-field element
4884 and that this element must be the first one in the structure. */
4886 /* Find the first field, ignoring non FIELD_DECL things which will
4887 have been created by C++. */
4888 for (field = TYPE_FIELDS (type);
4889 field && TREE_CODE (field) != FIELD_DECL;
4890 field = DECL_CHAIN (field))
4891 continue;
4893 if (field == NULL)
4894 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4896 /* Check that the first field is valid for returning in a register. */
4898 /* ... Floats are not allowed */
4899 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4900 return true;
4902 /* ... Aggregates that are not themselves valid for returning in
4903 a register are not allowed. */
4904 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4905 return true;
4907 /* Now check the remaining fields, if any. Only bitfields are allowed,
4908 since they are not addressable. */
4909 for (field = DECL_CHAIN (field);
4910 field;
4911 field = DECL_CHAIN (field))
4913 if (TREE_CODE (field) != FIELD_DECL)
4914 continue;
4916 if (!DECL_BIT_FIELD_TYPE (field))
4917 return true;
4920 return false;
4923 if (TREE_CODE (type) == UNION_TYPE)
4925 tree field;
4927 /* Unions can be returned in registers if every element is
4928 integral, or can be returned in an integer register. */
4929 for (field = TYPE_FIELDS (type);
4930 field;
4931 field = DECL_CHAIN (field))
4933 if (TREE_CODE (field) != FIELD_DECL)
4934 continue;
4936 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4937 return true;
4939 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4940 return true;
4943 return false;
4945 #endif /* not ARM_WINCE */
4947 /* Return all other types in memory. */
4948 return true;
4951 const struct pcs_attribute_arg
4953 const char *arg;
4954 enum arm_pcs value;
4955 } pcs_attribute_args[] =
4957 {"aapcs", ARM_PCS_AAPCS},
4958 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4959 #if 0
4960 /* We could recognize these, but changes would be needed elsewhere
4961 * to implement them. */
4962 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4963 {"atpcs", ARM_PCS_ATPCS},
4964 {"apcs", ARM_PCS_APCS},
4965 #endif
4966 {NULL, ARM_PCS_UNKNOWN}
4969 static enum arm_pcs
4970 arm_pcs_from_attribute (tree attr)
4972 const struct pcs_attribute_arg *ptr;
4973 const char *arg;
4975 /* Get the value of the argument. */
4976 if (TREE_VALUE (attr) == NULL_TREE
4977 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4978 return ARM_PCS_UNKNOWN;
4980 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4982 /* Check it against the list of known arguments. */
4983 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4984 if (streq (arg, ptr->arg))
4985 return ptr->value;
4987 /* An unrecognized interrupt type. */
4988 return ARM_PCS_UNKNOWN;
4991 /* Get the PCS variant to use for this call. TYPE is the function's type
4992 specification, DECL is the specific declartion. DECL may be null if
4993 the call could be indirect or if this is a library call. */
4994 static enum arm_pcs
4995 arm_get_pcs_model (const_tree type, const_tree decl)
4997 bool user_convention = false;
4998 enum arm_pcs user_pcs = arm_pcs_default;
4999 tree attr;
5001 gcc_assert (type);
5003 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5004 if (attr)
5006 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5007 user_convention = true;
5010 if (TARGET_AAPCS_BASED)
5012 /* Detect varargs functions. These always use the base rules
5013 (no argument is ever a candidate for a co-processor
5014 register). */
5015 bool base_rules = stdarg_p (type);
5017 if (user_convention)
5019 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5020 sorry ("non-AAPCS derived PCS variant");
5021 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5022 error ("variadic functions must use the base AAPCS variant");
5025 if (base_rules)
5026 return ARM_PCS_AAPCS;
5027 else if (user_convention)
5028 return user_pcs;
5029 else if (decl && flag_unit_at_a_time)
5031 /* Local functions never leak outside this compilation unit,
5032 so we are free to use whatever conventions are
5033 appropriate. */
5034 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5035 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5036 if (i && i->local)
5037 return ARM_PCS_AAPCS_LOCAL;
5040 else if (user_convention && user_pcs != arm_pcs_default)
5041 sorry ("PCS variant");
5043 /* For everything else we use the target's default. */
5044 return arm_pcs_default;
5048 static void
5049 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5050 const_tree fntype ATTRIBUTE_UNUSED,
5051 rtx libcall ATTRIBUTE_UNUSED,
5052 const_tree fndecl ATTRIBUTE_UNUSED)
5054 /* Record the unallocated VFP registers. */
5055 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5056 pcum->aapcs_vfp_reg_alloc = 0;
5059 /* Walk down the type tree of TYPE counting consecutive base elements.
5060 If *MODEP is VOIDmode, then set it to the first valid floating point
5061 type. If a non-floating point type is found, or if a floating point
5062 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5063 otherwise return the count in the sub-tree. */
5064 static int
5065 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5067 enum machine_mode mode;
5068 HOST_WIDE_INT size;
5070 switch (TREE_CODE (type))
5072 case REAL_TYPE:
5073 mode = TYPE_MODE (type);
5074 if (mode != DFmode && mode != SFmode)
5075 return -1;
5077 if (*modep == VOIDmode)
5078 *modep = mode;
5080 if (*modep == mode)
5081 return 1;
5083 break;
5085 case COMPLEX_TYPE:
5086 mode = TYPE_MODE (TREE_TYPE (type));
5087 if (mode != DFmode && mode != SFmode)
5088 return -1;
5090 if (*modep == VOIDmode)
5091 *modep = mode;
5093 if (*modep == mode)
5094 return 2;
5096 break;
5098 case VECTOR_TYPE:
5099 /* Use V2SImode and V4SImode as representatives of all 64-bit
5100 and 128-bit vector types, whether or not those modes are
5101 supported with the present options. */
5102 size = int_size_in_bytes (type);
5103 switch (size)
5105 case 8:
5106 mode = V2SImode;
5107 break;
5108 case 16:
5109 mode = V4SImode;
5110 break;
5111 default:
5112 return -1;
5115 if (*modep == VOIDmode)
5116 *modep = mode;
5118 /* Vector modes are considered to be opaque: two vectors are
5119 equivalent for the purposes of being homogeneous aggregates
5120 if they are the same size. */
5121 if (*modep == mode)
5122 return 1;
5124 break;
5126 case ARRAY_TYPE:
5128 int count;
5129 tree index = TYPE_DOMAIN (type);
5131 /* Can't handle incomplete types nor sizes that are not
5132 fixed. */
5133 if (!COMPLETE_TYPE_P (type)
5134 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5135 return -1;
5137 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5138 if (count == -1
5139 || !index
5140 || !TYPE_MAX_VALUE (index)
5141 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5142 || !TYPE_MIN_VALUE (index)
5143 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5144 || count < 0)
5145 return -1;
5147 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5148 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5150 /* There must be no padding. */
5151 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5152 return -1;
5154 return count;
5157 case RECORD_TYPE:
5159 int count = 0;
5160 int sub_count;
5161 tree field;
5163 /* Can't handle incomplete types nor sizes that are not
5164 fixed. */
5165 if (!COMPLETE_TYPE_P (type)
5166 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5167 return -1;
5169 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5171 if (TREE_CODE (field) != FIELD_DECL)
5172 continue;
5174 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5175 if (sub_count < 0)
5176 return -1;
5177 count += sub_count;
5180 /* There must be no padding. */
5181 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5182 return -1;
5184 return count;
5187 case UNION_TYPE:
5188 case QUAL_UNION_TYPE:
5190 /* These aren't very interesting except in a degenerate case. */
5191 int count = 0;
5192 int sub_count;
5193 tree field;
5195 /* Can't handle incomplete types nor sizes that are not
5196 fixed. */
5197 if (!COMPLETE_TYPE_P (type)
5198 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5199 return -1;
5201 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5203 if (TREE_CODE (field) != FIELD_DECL)
5204 continue;
5206 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5207 if (sub_count < 0)
5208 return -1;
5209 count = count > sub_count ? count : sub_count;
5212 /* There must be no padding. */
5213 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5214 return -1;
5216 return count;
5219 default:
5220 break;
5223 return -1;
5226 /* Return true if PCS_VARIANT should use VFP registers. */
5227 static bool
5228 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5230 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5232 static bool seen_thumb1_vfp = false;
5234 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5236 sorry ("Thumb-1 hard-float VFP ABI");
5237 /* sorry() is not immediately fatal, so only display this once. */
5238 seen_thumb1_vfp = true;
5241 return true;
5244 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5245 return false;
5247 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5248 (TARGET_VFP_DOUBLE || !is_double));
5251 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5252 suitable for passing or returning in VFP registers for the PCS
5253 variant selected. If it is, then *BASE_MODE is updated to contain
5254 a machine mode describing each element of the argument's type and
5255 *COUNT to hold the number of such elements. */
5256 static bool
5257 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5258 enum machine_mode mode, const_tree type,
5259 enum machine_mode *base_mode, int *count)
5261 enum machine_mode new_mode = VOIDmode;
5263 /* If we have the type information, prefer that to working things
5264 out from the mode. */
5265 if (type)
5267 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5269 if (ag_count > 0 && ag_count <= 4)
5270 *count = ag_count;
5271 else
5272 return false;
5274 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5275 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5276 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5278 *count = 1;
5279 new_mode = mode;
5281 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5283 *count = 2;
5284 new_mode = (mode == DCmode ? DFmode : SFmode);
5286 else
5287 return false;
5290 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5291 return false;
5293 *base_mode = new_mode;
5294 return true;
5297 static bool
5298 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5299 enum machine_mode mode, const_tree type)
5301 int count ATTRIBUTE_UNUSED;
5302 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5304 if (!use_vfp_abi (pcs_variant, false))
5305 return false;
5306 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5307 &ag_mode, &count);
5310 static bool
5311 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5312 const_tree type)
5314 if (!use_vfp_abi (pcum->pcs_variant, false))
5315 return false;
5317 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5318 &pcum->aapcs_vfp_rmode,
5319 &pcum->aapcs_vfp_rcount);
5322 static bool
5323 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5324 const_tree type ATTRIBUTE_UNUSED)
5326 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5327 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5328 int regno;
5330 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5331 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5333 pcum->aapcs_vfp_reg_alloc = mask << regno;
5334 if (mode == BLKmode
5335 || (mode == TImode && ! TARGET_NEON)
5336 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5338 int i;
5339 int rcount = pcum->aapcs_vfp_rcount;
5340 int rshift = shift;
5341 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5342 rtx par;
5343 if (!TARGET_NEON)
5345 /* Avoid using unsupported vector modes. */
5346 if (rmode == V2SImode)
5347 rmode = DImode;
5348 else if (rmode == V4SImode)
5350 rmode = DImode;
5351 rcount *= 2;
5352 rshift /= 2;
5355 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5356 for (i = 0; i < rcount; i++)
5358 rtx tmp = gen_rtx_REG (rmode,
5359 FIRST_VFP_REGNUM + regno + i * rshift);
5360 tmp = gen_rtx_EXPR_LIST
5361 (VOIDmode, tmp,
5362 GEN_INT (i * GET_MODE_SIZE (rmode)));
5363 XVECEXP (par, 0, i) = tmp;
5366 pcum->aapcs_reg = par;
5368 else
5369 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5370 return true;
5372 return false;
5375 static rtx
5376 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5377 enum machine_mode mode,
5378 const_tree type ATTRIBUTE_UNUSED)
5380 if (!use_vfp_abi (pcs_variant, false))
5381 return NULL;
5383 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5385 int count;
5386 enum machine_mode ag_mode;
5387 int i;
5388 rtx par;
5389 int shift;
5391 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5392 &ag_mode, &count);
5394 if (!TARGET_NEON)
5396 if (ag_mode == V2SImode)
5397 ag_mode = DImode;
5398 else if (ag_mode == V4SImode)
5400 ag_mode = DImode;
5401 count *= 2;
5404 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5405 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5406 for (i = 0; i < count; i++)
5408 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5409 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5410 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5411 XVECEXP (par, 0, i) = tmp;
5414 return par;
5417 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5420 static void
5421 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5422 enum machine_mode mode ATTRIBUTE_UNUSED,
5423 const_tree type ATTRIBUTE_UNUSED)
5425 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5426 pcum->aapcs_vfp_reg_alloc = 0;
5427 return;
5430 #define AAPCS_CP(X) \
5432 aapcs_ ## X ## _cum_init, \
5433 aapcs_ ## X ## _is_call_candidate, \
5434 aapcs_ ## X ## _allocate, \
5435 aapcs_ ## X ## _is_return_candidate, \
5436 aapcs_ ## X ## _allocate_return_reg, \
5437 aapcs_ ## X ## _advance \
5440 /* Table of co-processors that can be used to pass arguments in
5441 registers. Idealy no arugment should be a candidate for more than
5442 one co-processor table entry, but the table is processed in order
5443 and stops after the first match. If that entry then fails to put
5444 the argument into a co-processor register, the argument will go on
5445 the stack. */
5446 static struct
5448 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5449 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5451 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5452 BLKmode) is a candidate for this co-processor's registers; this
5453 function should ignore any position-dependent state in
5454 CUMULATIVE_ARGS and only use call-type dependent information. */
5455 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5457 /* Return true if the argument does get a co-processor register; it
5458 should set aapcs_reg to an RTX of the register allocated as is
5459 required for a return from FUNCTION_ARG. */
5460 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5462 /* Return true if a result of mode MODE (or type TYPE if MODE is
5463 BLKmode) is can be returned in this co-processor's registers. */
5464 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5466 /* Allocate and return an RTX element to hold the return type of a
5467 call, this routine must not fail and will only be called if
5468 is_return_candidate returned true with the same parameters. */
5469 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5471 /* Finish processing this argument and prepare to start processing
5472 the next one. */
5473 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5474 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5476 AAPCS_CP(vfp)
5479 #undef AAPCS_CP
5481 static int
5482 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5483 const_tree type)
5485 int i;
5487 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5488 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5489 return i;
5491 return -1;
5494 static int
5495 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5497 /* We aren't passed a decl, so we can't check that a call is local.
5498 However, it isn't clear that that would be a win anyway, since it
5499 might limit some tail-calling opportunities. */
5500 enum arm_pcs pcs_variant;
5502 if (fntype)
5504 const_tree fndecl = NULL_TREE;
5506 if (TREE_CODE (fntype) == FUNCTION_DECL)
5508 fndecl = fntype;
5509 fntype = TREE_TYPE (fntype);
5512 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5514 else
5515 pcs_variant = arm_pcs_default;
5517 if (pcs_variant != ARM_PCS_AAPCS)
5519 int i;
5521 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5522 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5523 TYPE_MODE (type),
5524 type))
5525 return i;
5527 return -1;
5530 static rtx
5531 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5532 const_tree fntype)
5534 /* We aren't passed a decl, so we can't check that a call is local.
5535 However, it isn't clear that that would be a win anyway, since it
5536 might limit some tail-calling opportunities. */
5537 enum arm_pcs pcs_variant;
5538 int unsignedp ATTRIBUTE_UNUSED;
5540 if (fntype)
5542 const_tree fndecl = NULL_TREE;
5544 if (TREE_CODE (fntype) == FUNCTION_DECL)
5546 fndecl = fntype;
5547 fntype = TREE_TYPE (fntype);
5550 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5552 else
5553 pcs_variant = arm_pcs_default;
5555 /* Promote integer types. */
5556 if (type && INTEGRAL_TYPE_P (type))
5557 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5559 if (pcs_variant != ARM_PCS_AAPCS)
5561 int i;
5563 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5564 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5565 type))
5566 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5567 mode, type);
5570 /* Promotes small structs returned in a register to full-word size
5571 for big-endian AAPCS. */
5572 if (type && arm_return_in_msb (type))
5574 HOST_WIDE_INT size = int_size_in_bytes (type);
5575 if (size % UNITS_PER_WORD != 0)
5577 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5578 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5582 return gen_rtx_REG (mode, R0_REGNUM);
5585 static rtx
5586 aapcs_libcall_value (enum machine_mode mode)
5588 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5589 && GET_MODE_SIZE (mode) <= 4)
5590 mode = SImode;
5592 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5595 /* Lay out a function argument using the AAPCS rules. The rule
5596 numbers referred to here are those in the AAPCS. */
5597 static void
5598 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5599 const_tree type, bool named)
5601 int nregs, nregs2;
5602 int ncrn;
5604 /* We only need to do this once per argument. */
5605 if (pcum->aapcs_arg_processed)
5606 return;
5608 pcum->aapcs_arg_processed = true;
5610 /* Special case: if named is false then we are handling an incoming
5611 anonymous argument which is on the stack. */
5612 if (!named)
5613 return;
5615 /* Is this a potential co-processor register candidate? */
5616 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5618 int slot = aapcs_select_call_coproc (pcum, mode, type);
5619 pcum->aapcs_cprc_slot = slot;
5621 /* We don't have to apply any of the rules from part B of the
5622 preparation phase, these are handled elsewhere in the
5623 compiler. */
5625 if (slot >= 0)
5627 /* A Co-processor register candidate goes either in its own
5628 class of registers or on the stack. */
5629 if (!pcum->aapcs_cprc_failed[slot])
5631 /* C1.cp - Try to allocate the argument to co-processor
5632 registers. */
5633 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5634 return;
5636 /* C2.cp - Put the argument on the stack and note that we
5637 can't assign any more candidates in this slot. We also
5638 need to note that we have allocated stack space, so that
5639 we won't later try to split a non-cprc candidate between
5640 core registers and the stack. */
5641 pcum->aapcs_cprc_failed[slot] = true;
5642 pcum->can_split = false;
5645 /* We didn't get a register, so this argument goes on the
5646 stack. */
5647 gcc_assert (pcum->can_split == false);
5648 return;
5652 /* C3 - For double-word aligned arguments, round the NCRN up to the
5653 next even number. */
5654 ncrn = pcum->aapcs_ncrn;
5655 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5656 ncrn++;
5658 nregs = ARM_NUM_REGS2(mode, type);
5660 /* Sigh, this test should really assert that nregs > 0, but a GCC
5661 extension allows empty structs and then gives them empty size; it
5662 then allows such a structure to be passed by value. For some of
5663 the code below we have to pretend that such an argument has
5664 non-zero size so that we 'locate' it correctly either in
5665 registers or on the stack. */
5666 gcc_assert (nregs >= 0);
5668 nregs2 = nregs ? nregs : 1;
5670 /* C4 - Argument fits entirely in core registers. */
5671 if (ncrn + nregs2 <= NUM_ARG_REGS)
5673 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5674 pcum->aapcs_next_ncrn = ncrn + nregs;
5675 return;
5678 /* C5 - Some core registers left and there are no arguments already
5679 on the stack: split this argument between the remaining core
5680 registers and the stack. */
5681 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5683 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5684 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5685 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5686 return;
5689 /* C6 - NCRN is set to 4. */
5690 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5692 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5693 return;
5696 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5697 for a call to a function whose data type is FNTYPE.
5698 For a library call, FNTYPE is NULL. */
5699 void
5700 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5701 rtx libname,
5702 tree fndecl ATTRIBUTE_UNUSED)
5704 /* Long call handling. */
5705 if (fntype)
5706 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5707 else
5708 pcum->pcs_variant = arm_pcs_default;
5710 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5712 if (arm_libcall_uses_aapcs_base (libname))
5713 pcum->pcs_variant = ARM_PCS_AAPCS;
5715 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5716 pcum->aapcs_reg = NULL_RTX;
5717 pcum->aapcs_partial = 0;
5718 pcum->aapcs_arg_processed = false;
5719 pcum->aapcs_cprc_slot = -1;
5720 pcum->can_split = true;
5722 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5724 int i;
5726 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5728 pcum->aapcs_cprc_failed[i] = false;
5729 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5732 return;
5735 /* Legacy ABIs */
5737 /* On the ARM, the offset starts at 0. */
5738 pcum->nregs = 0;
5739 pcum->iwmmxt_nregs = 0;
5740 pcum->can_split = true;
5742 /* Varargs vectors are treated the same as long long.
5743 named_count avoids having to change the way arm handles 'named' */
5744 pcum->named_count = 0;
5745 pcum->nargs = 0;
5747 if (TARGET_REALLY_IWMMXT && fntype)
5749 tree fn_arg;
5751 for (fn_arg = TYPE_ARG_TYPES (fntype);
5752 fn_arg;
5753 fn_arg = TREE_CHAIN (fn_arg))
5754 pcum->named_count += 1;
5756 if (! pcum->named_count)
5757 pcum->named_count = INT_MAX;
5761 /* Return true if we use LRA instead of reload pass. */
5762 static bool
5763 arm_lra_p (void)
5765 return arm_lra_flag;
5768 /* Return true if mode/type need doubleword alignment. */
5769 static bool
5770 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5772 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5773 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5777 /* Determine where to put an argument to a function.
5778 Value is zero to push the argument on the stack,
5779 or a hard register in which to store the argument.
5781 MODE is the argument's machine mode.
5782 TYPE is the data type of the argument (as a tree).
5783 This is null for libcalls where that information may
5784 not be available.
5785 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5786 the preceding args and about the function being called.
5787 NAMED is nonzero if this argument is a named parameter
5788 (otherwise it is an extra parameter matching an ellipsis).
5790 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5791 other arguments are passed on the stack. If (NAMED == 0) (which happens
5792 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5793 defined), say it is passed in the stack (function_prologue will
5794 indeed make it pass in the stack if necessary). */
5796 static rtx
5797 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5798 const_tree type, bool named)
5800 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5801 int nregs;
5803 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5804 a call insn (op3 of a call_value insn). */
5805 if (mode == VOIDmode)
5806 return const0_rtx;
5808 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5810 aapcs_layout_arg (pcum, mode, type, named);
5811 return pcum->aapcs_reg;
5814 /* Varargs vectors are treated the same as long long.
5815 named_count avoids having to change the way arm handles 'named' */
5816 if (TARGET_IWMMXT_ABI
5817 && arm_vector_mode_supported_p (mode)
5818 && pcum->named_count > pcum->nargs + 1)
5820 if (pcum->iwmmxt_nregs <= 9)
5821 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5822 else
5824 pcum->can_split = false;
5825 return NULL_RTX;
5829 /* Put doubleword aligned quantities in even register pairs. */
5830 if (pcum->nregs & 1
5831 && ARM_DOUBLEWORD_ALIGN
5832 && arm_needs_doubleword_align (mode, type))
5833 pcum->nregs++;
5835 /* Only allow splitting an arg between regs and memory if all preceding
5836 args were allocated to regs. For args passed by reference we only count
5837 the reference pointer. */
5838 if (pcum->can_split)
5839 nregs = 1;
5840 else
5841 nregs = ARM_NUM_REGS2 (mode, type);
5843 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5844 return NULL_RTX;
5846 return gen_rtx_REG (mode, pcum->nregs);
5849 static unsigned int
5850 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5852 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5853 ? DOUBLEWORD_ALIGNMENT
5854 : PARM_BOUNDARY);
5857 static int
5858 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5859 tree type, bool named)
5861 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5862 int nregs = pcum->nregs;
5864 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5866 aapcs_layout_arg (pcum, mode, type, named);
5867 return pcum->aapcs_partial;
5870 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5871 return 0;
5873 if (NUM_ARG_REGS > nregs
5874 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5875 && pcum->can_split)
5876 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5878 return 0;
5881 /* Update the data in PCUM to advance over an argument
5882 of mode MODE and data type TYPE.
5883 (TYPE is null for libcalls where that information may not be available.) */
5885 static void
5886 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5887 const_tree type, bool named)
5889 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5891 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5893 aapcs_layout_arg (pcum, mode, type, named);
5895 if (pcum->aapcs_cprc_slot >= 0)
5897 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5898 type);
5899 pcum->aapcs_cprc_slot = -1;
5902 /* Generic stuff. */
5903 pcum->aapcs_arg_processed = false;
5904 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5905 pcum->aapcs_reg = NULL_RTX;
5906 pcum->aapcs_partial = 0;
5908 else
5910 pcum->nargs += 1;
5911 if (arm_vector_mode_supported_p (mode)
5912 && pcum->named_count > pcum->nargs
5913 && TARGET_IWMMXT_ABI)
5914 pcum->iwmmxt_nregs += 1;
5915 else
5916 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5920 /* Variable sized types are passed by reference. This is a GCC
5921 extension to the ARM ABI. */
5923 static bool
5924 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5925 enum machine_mode mode ATTRIBUTE_UNUSED,
5926 const_tree type, bool named ATTRIBUTE_UNUSED)
5928 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5931 /* Encode the current state of the #pragma [no_]long_calls. */
5932 typedef enum
5934 OFF, /* No #pragma [no_]long_calls is in effect. */
5935 LONG, /* #pragma long_calls is in effect. */
5936 SHORT /* #pragma no_long_calls is in effect. */
5937 } arm_pragma_enum;
5939 static arm_pragma_enum arm_pragma_long_calls = OFF;
5941 void
5942 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5944 arm_pragma_long_calls = LONG;
5947 void
5948 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5950 arm_pragma_long_calls = SHORT;
5953 void
5954 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5956 arm_pragma_long_calls = OFF;
5959 /* Handle an attribute requiring a FUNCTION_DECL;
5960 arguments as in struct attribute_spec.handler. */
5961 static tree
5962 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5963 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5965 if (TREE_CODE (*node) != FUNCTION_DECL)
5967 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5968 name);
5969 *no_add_attrs = true;
5972 return NULL_TREE;
5975 /* Handle an "interrupt" or "isr" attribute;
5976 arguments as in struct attribute_spec.handler. */
5977 static tree
5978 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5979 bool *no_add_attrs)
5981 if (DECL_P (*node))
5983 if (TREE_CODE (*node) != FUNCTION_DECL)
5985 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5986 name);
5987 *no_add_attrs = true;
5989 /* FIXME: the argument if any is checked for type attributes;
5990 should it be checked for decl ones? */
5992 else
5994 if (TREE_CODE (*node) == FUNCTION_TYPE
5995 || TREE_CODE (*node) == METHOD_TYPE)
5997 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5999 warning (OPT_Wattributes, "%qE attribute ignored",
6000 name);
6001 *no_add_attrs = true;
6004 else if (TREE_CODE (*node) == POINTER_TYPE
6005 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6006 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6007 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6009 *node = build_variant_type_copy (*node);
6010 TREE_TYPE (*node) = build_type_attribute_variant
6011 (TREE_TYPE (*node),
6012 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6013 *no_add_attrs = true;
6015 else
6017 /* Possibly pass this attribute on from the type to a decl. */
6018 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6019 | (int) ATTR_FLAG_FUNCTION_NEXT
6020 | (int) ATTR_FLAG_ARRAY_NEXT))
6022 *no_add_attrs = true;
6023 return tree_cons (name, args, NULL_TREE);
6025 else
6027 warning (OPT_Wattributes, "%qE attribute ignored",
6028 name);
6033 return NULL_TREE;
6036 /* Handle a "pcs" attribute; arguments as in struct
6037 attribute_spec.handler. */
6038 static tree
6039 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6040 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6042 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6044 warning (OPT_Wattributes, "%qE attribute ignored", name);
6045 *no_add_attrs = true;
6047 return NULL_TREE;
6050 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6051 /* Handle the "notshared" attribute. This attribute is another way of
6052 requesting hidden visibility. ARM's compiler supports
6053 "__declspec(notshared)"; we support the same thing via an
6054 attribute. */
6056 static tree
6057 arm_handle_notshared_attribute (tree *node,
6058 tree name ATTRIBUTE_UNUSED,
6059 tree args ATTRIBUTE_UNUSED,
6060 int flags ATTRIBUTE_UNUSED,
6061 bool *no_add_attrs)
6063 tree decl = TYPE_NAME (*node);
6065 if (decl)
6067 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6068 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6069 *no_add_attrs = false;
6071 return NULL_TREE;
6073 #endif
6075 /* Return 0 if the attributes for two types are incompatible, 1 if they
6076 are compatible, and 2 if they are nearly compatible (which causes a
6077 warning to be generated). */
6078 static int
6079 arm_comp_type_attributes (const_tree type1, const_tree type2)
6081 int l1, l2, s1, s2;
6083 /* Check for mismatch of non-default calling convention. */
6084 if (TREE_CODE (type1) != FUNCTION_TYPE)
6085 return 1;
6087 /* Check for mismatched call attributes. */
6088 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6089 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6090 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6091 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6093 /* Only bother to check if an attribute is defined. */
6094 if (l1 | l2 | s1 | s2)
6096 /* If one type has an attribute, the other must have the same attribute. */
6097 if ((l1 != l2) || (s1 != s2))
6098 return 0;
6100 /* Disallow mixed attributes. */
6101 if ((l1 & s2) || (l2 & s1))
6102 return 0;
6105 /* Check for mismatched ISR attribute. */
6106 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6107 if (! l1)
6108 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6109 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6110 if (! l2)
6111 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6112 if (l1 != l2)
6113 return 0;
6115 return 1;
6118 /* Assigns default attributes to newly defined type. This is used to
6119 set short_call/long_call attributes for function types of
6120 functions defined inside corresponding #pragma scopes. */
6121 static void
6122 arm_set_default_type_attributes (tree type)
6124 /* Add __attribute__ ((long_call)) to all functions, when
6125 inside #pragma long_calls or __attribute__ ((short_call)),
6126 when inside #pragma no_long_calls. */
6127 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6129 tree type_attr_list, attr_name;
6130 type_attr_list = TYPE_ATTRIBUTES (type);
6132 if (arm_pragma_long_calls == LONG)
6133 attr_name = get_identifier ("long_call");
6134 else if (arm_pragma_long_calls == SHORT)
6135 attr_name = get_identifier ("short_call");
6136 else
6137 return;
6139 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6140 TYPE_ATTRIBUTES (type) = type_attr_list;
6144 /* Return true if DECL is known to be linked into section SECTION. */
6146 static bool
6147 arm_function_in_section_p (tree decl, section *section)
6149 /* We can only be certain about functions defined in the same
6150 compilation unit. */
6151 if (!TREE_STATIC (decl))
6152 return false;
6154 /* Make sure that SYMBOL always binds to the definition in this
6155 compilation unit. */
6156 if (!targetm.binds_local_p (decl))
6157 return false;
6159 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6160 if (!DECL_SECTION_NAME (decl))
6162 /* Make sure that we will not create a unique section for DECL. */
6163 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6164 return false;
6167 return function_section (decl) == section;
6170 /* Return nonzero if a 32-bit "long_call" should be generated for
6171 a call from the current function to DECL. We generate a long_call
6172 if the function:
6174 a. has an __attribute__((long call))
6175 or b. is within the scope of a #pragma long_calls
6176 or c. the -mlong-calls command line switch has been specified
6178 However we do not generate a long call if the function:
6180 d. has an __attribute__ ((short_call))
6181 or e. is inside the scope of a #pragma no_long_calls
6182 or f. is defined in the same section as the current function. */
6184 bool
6185 arm_is_long_call_p (tree decl)
6187 tree attrs;
6189 if (!decl)
6190 return TARGET_LONG_CALLS;
6192 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6193 if (lookup_attribute ("short_call", attrs))
6194 return false;
6196 /* For "f", be conservative, and only cater for cases in which the
6197 whole of the current function is placed in the same section. */
6198 if (!flag_reorder_blocks_and_partition
6199 && TREE_CODE (decl) == FUNCTION_DECL
6200 && arm_function_in_section_p (decl, current_function_section ()))
6201 return false;
6203 if (lookup_attribute ("long_call", attrs))
6204 return true;
6206 return TARGET_LONG_CALLS;
6209 /* Return nonzero if it is ok to make a tail-call to DECL. */
6210 static bool
6211 arm_function_ok_for_sibcall (tree decl, tree exp)
6213 unsigned long func_type;
6215 if (cfun->machine->sibcall_blocked)
6216 return false;
6218 /* Never tailcall something if we are generating code for Thumb-1. */
6219 if (TARGET_THUMB1)
6220 return false;
6222 /* The PIC register is live on entry to VxWorks PLT entries, so we
6223 must make the call before restoring the PIC register. */
6224 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6225 return false;
6227 /* If we are interworking and the function is not declared static
6228 then we can't tail-call it unless we know that it exists in this
6229 compilation unit (since it might be a Thumb routine). */
6230 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6231 && !TREE_ASM_WRITTEN (decl))
6232 return false;
6234 func_type = arm_current_func_type ();
6235 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6236 if (IS_INTERRUPT (func_type))
6237 return false;
6239 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6241 /* Check that the return value locations are the same. For
6242 example that we aren't returning a value from the sibling in
6243 a VFP register but then need to transfer it to a core
6244 register. */
6245 rtx a, b;
6247 a = arm_function_value (TREE_TYPE (exp), decl, false);
6248 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6249 cfun->decl, false);
6250 if (!rtx_equal_p (a, b))
6251 return false;
6254 /* Never tailcall if function may be called with a misaligned SP. */
6255 if (IS_STACKALIGN (func_type))
6256 return false;
6258 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6259 references should become a NOP. Don't convert such calls into
6260 sibling calls. */
6261 if (TARGET_AAPCS_BASED
6262 && arm_abi == ARM_ABI_AAPCS
6263 && decl
6264 && DECL_WEAK (decl))
6265 return false;
6267 /* Everything else is ok. */
6268 return true;
6272 /* Addressing mode support functions. */
6274 /* Return nonzero if X is a legitimate immediate operand when compiling
6275 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6277 legitimate_pic_operand_p (rtx x)
6279 if (GET_CODE (x) == SYMBOL_REF
6280 || (GET_CODE (x) == CONST
6281 && GET_CODE (XEXP (x, 0)) == PLUS
6282 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6283 return 0;
6285 return 1;
6288 /* Record that the current function needs a PIC register. Initialize
6289 cfun->machine->pic_reg if we have not already done so. */
6291 static void
6292 require_pic_register (void)
6294 /* A lot of the logic here is made obscure by the fact that this
6295 routine gets called as part of the rtx cost estimation process.
6296 We don't want those calls to affect any assumptions about the real
6297 function; and further, we can't call entry_of_function() until we
6298 start the real expansion process. */
6299 if (!crtl->uses_pic_offset_table)
6301 gcc_assert (can_create_pseudo_p ());
6302 if (arm_pic_register != INVALID_REGNUM
6303 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6305 if (!cfun->machine->pic_reg)
6306 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6308 /* Play games to avoid marking the function as needing pic
6309 if we are being called as part of the cost-estimation
6310 process. */
6311 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6312 crtl->uses_pic_offset_table = 1;
6314 else
6316 rtx seq, insn;
6318 if (!cfun->machine->pic_reg)
6319 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6321 /* Play games to avoid marking the function as needing pic
6322 if we are being called as part of the cost-estimation
6323 process. */
6324 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6326 crtl->uses_pic_offset_table = 1;
6327 start_sequence ();
6329 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6330 && arm_pic_register > LAST_LO_REGNUM)
6331 emit_move_insn (cfun->machine->pic_reg,
6332 gen_rtx_REG (Pmode, arm_pic_register));
6333 else
6334 arm_load_pic_register (0UL);
6336 seq = get_insns ();
6337 end_sequence ();
6339 for (insn = seq; insn; insn = NEXT_INSN (insn))
6340 if (INSN_P (insn))
6341 INSN_LOCATION (insn) = prologue_location;
6343 /* We can be called during expansion of PHI nodes, where
6344 we can't yet emit instructions directly in the final
6345 insn stream. Queue the insns on the entry edge, they will
6346 be committed after everything else is expanded. */
6347 insert_insn_on_edge (seq,
6348 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6355 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6357 if (GET_CODE (orig) == SYMBOL_REF
6358 || GET_CODE (orig) == LABEL_REF)
6360 rtx insn;
6362 if (reg == 0)
6364 gcc_assert (can_create_pseudo_p ());
6365 reg = gen_reg_rtx (Pmode);
6368 /* VxWorks does not impose a fixed gap between segments; the run-time
6369 gap can be different from the object-file gap. We therefore can't
6370 use GOTOFF unless we are absolutely sure that the symbol is in the
6371 same segment as the GOT. Unfortunately, the flexibility of linker
6372 scripts means that we can't be sure of that in general, so assume
6373 that GOTOFF is never valid on VxWorks. */
6374 if ((GET_CODE (orig) == LABEL_REF
6375 || (GET_CODE (orig) == SYMBOL_REF &&
6376 SYMBOL_REF_LOCAL_P (orig)))
6377 && NEED_GOT_RELOC
6378 && arm_pic_data_is_text_relative)
6379 insn = arm_pic_static_addr (orig, reg);
6380 else
6382 rtx pat;
6383 rtx mem;
6385 /* If this function doesn't have a pic register, create one now. */
6386 require_pic_register ();
6388 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6390 /* Make the MEM as close to a constant as possible. */
6391 mem = SET_SRC (pat);
6392 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6393 MEM_READONLY_P (mem) = 1;
6394 MEM_NOTRAP_P (mem) = 1;
6396 insn = emit_insn (pat);
6399 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6400 by loop. */
6401 set_unique_reg_note (insn, REG_EQUAL, orig);
6403 return reg;
6405 else if (GET_CODE (orig) == CONST)
6407 rtx base, offset;
6409 if (GET_CODE (XEXP (orig, 0)) == PLUS
6410 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6411 return orig;
6413 /* Handle the case where we have: const (UNSPEC_TLS). */
6414 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6415 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6416 return orig;
6418 /* Handle the case where we have:
6419 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6420 CONST_INT. */
6421 if (GET_CODE (XEXP (orig, 0)) == PLUS
6422 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6423 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6425 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6426 return orig;
6429 if (reg == 0)
6431 gcc_assert (can_create_pseudo_p ());
6432 reg = gen_reg_rtx (Pmode);
6435 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6437 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6438 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6439 base == reg ? 0 : reg);
6441 if (CONST_INT_P (offset))
6443 /* The base register doesn't really matter, we only want to
6444 test the index for the appropriate mode. */
6445 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6447 gcc_assert (can_create_pseudo_p ());
6448 offset = force_reg (Pmode, offset);
6451 if (CONST_INT_P (offset))
6452 return plus_constant (Pmode, base, INTVAL (offset));
6455 if (GET_MODE_SIZE (mode) > 4
6456 && (GET_MODE_CLASS (mode) == MODE_INT
6457 || TARGET_SOFT_FLOAT))
6459 emit_insn (gen_addsi3 (reg, base, offset));
6460 return reg;
6463 return gen_rtx_PLUS (Pmode, base, offset);
6466 return orig;
6470 /* Find a spare register to use during the prolog of a function. */
6472 static int
6473 thumb_find_work_register (unsigned long pushed_regs_mask)
6475 int reg;
6477 /* Check the argument registers first as these are call-used. The
6478 register allocation order means that sometimes r3 might be used
6479 but earlier argument registers might not, so check them all. */
6480 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6481 if (!df_regs_ever_live_p (reg))
6482 return reg;
6484 /* Before going on to check the call-saved registers we can try a couple
6485 more ways of deducing that r3 is available. The first is when we are
6486 pushing anonymous arguments onto the stack and we have less than 4
6487 registers worth of fixed arguments(*). In this case r3 will be part of
6488 the variable argument list and so we can be sure that it will be
6489 pushed right at the start of the function. Hence it will be available
6490 for the rest of the prologue.
6491 (*): ie crtl->args.pretend_args_size is greater than 0. */
6492 if (cfun->machine->uses_anonymous_args
6493 && crtl->args.pretend_args_size > 0)
6494 return LAST_ARG_REGNUM;
6496 /* The other case is when we have fixed arguments but less than 4 registers
6497 worth. In this case r3 might be used in the body of the function, but
6498 it is not being used to convey an argument into the function. In theory
6499 we could just check crtl->args.size to see how many bytes are
6500 being passed in argument registers, but it seems that it is unreliable.
6501 Sometimes it will have the value 0 when in fact arguments are being
6502 passed. (See testcase execute/20021111-1.c for an example). So we also
6503 check the args_info.nregs field as well. The problem with this field is
6504 that it makes no allowances for arguments that are passed to the
6505 function but which are not used. Hence we could miss an opportunity
6506 when a function has an unused argument in r3. But it is better to be
6507 safe than to be sorry. */
6508 if (! cfun->machine->uses_anonymous_args
6509 && crtl->args.size >= 0
6510 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6511 && (TARGET_AAPCS_BASED
6512 ? crtl->args.info.aapcs_ncrn < 4
6513 : crtl->args.info.nregs < 4))
6514 return LAST_ARG_REGNUM;
6516 /* Otherwise look for a call-saved register that is going to be pushed. */
6517 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6518 if (pushed_regs_mask & (1 << reg))
6519 return reg;
6521 if (TARGET_THUMB2)
6523 /* Thumb-2 can use high regs. */
6524 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6525 if (pushed_regs_mask & (1 << reg))
6526 return reg;
6528 /* Something went wrong - thumb_compute_save_reg_mask()
6529 should have arranged for a suitable register to be pushed. */
6530 gcc_unreachable ();
6533 static GTY(()) int pic_labelno;
6535 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6536 low register. */
6538 void
6539 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6541 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6543 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6544 return;
6546 gcc_assert (flag_pic);
6548 pic_reg = cfun->machine->pic_reg;
6549 if (TARGET_VXWORKS_RTP)
6551 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6552 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6553 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6555 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6557 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6558 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6560 else
6562 /* We use an UNSPEC rather than a LABEL_REF because this label
6563 never appears in the code stream. */
6565 labelno = GEN_INT (pic_labelno++);
6566 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6567 l1 = gen_rtx_CONST (VOIDmode, l1);
6569 /* On the ARM the PC register contains 'dot + 8' at the time of the
6570 addition, on the Thumb it is 'dot + 4'. */
6571 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6572 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6573 UNSPEC_GOTSYM_OFF);
6574 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6576 if (TARGET_32BIT)
6578 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6580 else /* TARGET_THUMB1 */
6582 if (arm_pic_register != INVALID_REGNUM
6583 && REGNO (pic_reg) > LAST_LO_REGNUM)
6585 /* We will have pushed the pic register, so we should always be
6586 able to find a work register. */
6587 pic_tmp = gen_rtx_REG (SImode,
6588 thumb_find_work_register (saved_regs));
6589 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6590 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6591 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6593 else if (arm_pic_register != INVALID_REGNUM
6594 && arm_pic_register > LAST_LO_REGNUM
6595 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6597 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6598 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6599 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6601 else
6602 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6606 /* Need to emit this whether or not we obey regdecls,
6607 since setjmp/longjmp can cause life info to screw up. */
6608 emit_use (pic_reg);
6611 /* Generate code to load the address of a static var when flag_pic is set. */
6612 static rtx
6613 arm_pic_static_addr (rtx orig, rtx reg)
6615 rtx l1, labelno, offset_rtx, insn;
6617 gcc_assert (flag_pic);
6619 /* We use an UNSPEC rather than a LABEL_REF because this label
6620 never appears in the code stream. */
6621 labelno = GEN_INT (pic_labelno++);
6622 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6623 l1 = gen_rtx_CONST (VOIDmode, l1);
6625 /* On the ARM the PC register contains 'dot + 8' at the time of the
6626 addition, on the Thumb it is 'dot + 4'. */
6627 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6628 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6629 UNSPEC_SYMBOL_OFFSET);
6630 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6632 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6633 return insn;
6636 /* Return nonzero if X is valid as an ARM state addressing register. */
6637 static int
6638 arm_address_register_rtx_p (rtx x, int strict_p)
6640 int regno;
6642 if (!REG_P (x))
6643 return 0;
6645 regno = REGNO (x);
6647 if (strict_p)
6648 return ARM_REGNO_OK_FOR_BASE_P (regno);
6650 return (regno <= LAST_ARM_REGNUM
6651 || regno >= FIRST_PSEUDO_REGISTER
6652 || regno == FRAME_POINTER_REGNUM
6653 || regno == ARG_POINTER_REGNUM);
6656 /* Return TRUE if this rtx is the difference of a symbol and a label,
6657 and will reduce to a PC-relative relocation in the object file.
6658 Expressions like this can be left alone when generating PIC, rather
6659 than forced through the GOT. */
6660 static int
6661 pcrel_constant_p (rtx x)
6663 if (GET_CODE (x) == MINUS)
6664 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6666 return FALSE;
6669 /* Return true if X will surely end up in an index register after next
6670 splitting pass. */
6671 static bool
6672 will_be_in_index_register (const_rtx x)
6674 /* arm.md: calculate_pic_address will split this into a register. */
6675 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6678 /* Return nonzero if X is a valid ARM state address operand. */
6680 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6681 int strict_p)
6683 bool use_ldrd;
6684 enum rtx_code code = GET_CODE (x);
6686 if (arm_address_register_rtx_p (x, strict_p))
6687 return 1;
6689 use_ldrd = (TARGET_LDRD
6690 && (mode == DImode
6691 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6693 if (code == POST_INC || code == PRE_DEC
6694 || ((code == PRE_INC || code == POST_DEC)
6695 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6696 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6698 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6699 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6700 && GET_CODE (XEXP (x, 1)) == PLUS
6701 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6703 rtx addend = XEXP (XEXP (x, 1), 1);
6705 /* Don't allow ldrd post increment by register because it's hard
6706 to fixup invalid register choices. */
6707 if (use_ldrd
6708 && GET_CODE (x) == POST_MODIFY
6709 && REG_P (addend))
6710 return 0;
6712 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6713 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6716 /* After reload constants split into minipools will have addresses
6717 from a LABEL_REF. */
6718 else if (reload_completed
6719 && (code == LABEL_REF
6720 || (code == CONST
6721 && GET_CODE (XEXP (x, 0)) == PLUS
6722 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6723 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6724 return 1;
6726 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6727 return 0;
6729 else if (code == PLUS)
6731 rtx xop0 = XEXP (x, 0);
6732 rtx xop1 = XEXP (x, 1);
6734 return ((arm_address_register_rtx_p (xop0, strict_p)
6735 && ((CONST_INT_P (xop1)
6736 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6737 || (!strict_p && will_be_in_index_register (xop1))))
6738 || (arm_address_register_rtx_p (xop1, strict_p)
6739 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6742 #if 0
6743 /* Reload currently can't handle MINUS, so disable this for now */
6744 else if (GET_CODE (x) == MINUS)
6746 rtx xop0 = XEXP (x, 0);
6747 rtx xop1 = XEXP (x, 1);
6749 return (arm_address_register_rtx_p (xop0, strict_p)
6750 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6752 #endif
6754 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6755 && code == SYMBOL_REF
6756 && CONSTANT_POOL_ADDRESS_P (x)
6757 && ! (flag_pic
6758 && symbol_mentioned_p (get_pool_constant (x))
6759 && ! pcrel_constant_p (get_pool_constant (x))))
6760 return 1;
6762 return 0;
6765 /* Return nonzero if X is a valid Thumb-2 address operand. */
6766 static int
6767 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6769 bool use_ldrd;
6770 enum rtx_code code = GET_CODE (x);
6772 if (arm_address_register_rtx_p (x, strict_p))
6773 return 1;
6775 use_ldrd = (TARGET_LDRD
6776 && (mode == DImode
6777 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6779 if (code == POST_INC || code == PRE_DEC
6780 || ((code == PRE_INC || code == POST_DEC)
6781 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6782 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6784 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6785 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6786 && GET_CODE (XEXP (x, 1)) == PLUS
6787 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6789 /* Thumb-2 only has autoincrement by constant. */
6790 rtx addend = XEXP (XEXP (x, 1), 1);
6791 HOST_WIDE_INT offset;
6793 if (!CONST_INT_P (addend))
6794 return 0;
6796 offset = INTVAL(addend);
6797 if (GET_MODE_SIZE (mode) <= 4)
6798 return (offset > -256 && offset < 256);
6800 return (use_ldrd && offset > -1024 && offset < 1024
6801 && (offset & 3) == 0);
6804 /* After reload constants split into minipools will have addresses
6805 from a LABEL_REF. */
6806 else if (reload_completed
6807 && (code == LABEL_REF
6808 || (code == CONST
6809 && GET_CODE (XEXP (x, 0)) == PLUS
6810 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6811 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6812 return 1;
6814 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6815 return 0;
6817 else if (code == PLUS)
6819 rtx xop0 = XEXP (x, 0);
6820 rtx xop1 = XEXP (x, 1);
6822 return ((arm_address_register_rtx_p (xop0, strict_p)
6823 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6824 || (!strict_p && will_be_in_index_register (xop1))))
6825 || (arm_address_register_rtx_p (xop1, strict_p)
6826 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6829 /* Normally we can assign constant values to target registers without
6830 the help of constant pool. But there are cases we have to use constant
6831 pool like:
6832 1) assign a label to register.
6833 2) sign-extend a 8bit value to 32bit and then assign to register.
6835 Constant pool access in format:
6836 (set (reg r0) (mem (symbol_ref (".LC0"))))
6837 will cause the use of literal pool (later in function arm_reorg).
6838 So here we mark such format as an invalid format, then the compiler
6839 will adjust it into:
6840 (set (reg r0) (symbol_ref (".LC0")))
6841 (set (reg r0) (mem (reg r0))).
6842 No extra register is required, and (mem (reg r0)) won't cause the use
6843 of literal pools. */
6844 else if (arm_disable_literal_pool && code == SYMBOL_REF
6845 && CONSTANT_POOL_ADDRESS_P (x))
6846 return 0;
6848 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6849 && code == SYMBOL_REF
6850 && CONSTANT_POOL_ADDRESS_P (x)
6851 && ! (flag_pic
6852 && symbol_mentioned_p (get_pool_constant (x))
6853 && ! pcrel_constant_p (get_pool_constant (x))))
6854 return 1;
6856 return 0;
6859 /* Return nonzero if INDEX is valid for an address index operand in
6860 ARM state. */
6861 static int
6862 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6863 int strict_p)
6865 HOST_WIDE_INT range;
6866 enum rtx_code code = GET_CODE (index);
6868 /* Standard coprocessor addressing modes. */
6869 if (TARGET_HARD_FLOAT
6870 && TARGET_VFP
6871 && (mode == SFmode || mode == DFmode))
6872 return (code == CONST_INT && INTVAL (index) < 1024
6873 && INTVAL (index) > -1024
6874 && (INTVAL (index) & 3) == 0);
6876 /* For quad modes, we restrict the constant offset to be slightly less
6877 than what the instruction format permits. We do this because for
6878 quad mode moves, we will actually decompose them into two separate
6879 double-mode reads or writes. INDEX must therefore be a valid
6880 (double-mode) offset and so should INDEX+8. */
6881 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6882 return (code == CONST_INT
6883 && INTVAL (index) < 1016
6884 && INTVAL (index) > -1024
6885 && (INTVAL (index) & 3) == 0);
6887 /* We have no such constraint on double mode offsets, so we permit the
6888 full range of the instruction format. */
6889 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6890 return (code == CONST_INT
6891 && INTVAL (index) < 1024
6892 && INTVAL (index) > -1024
6893 && (INTVAL (index) & 3) == 0);
6895 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6896 return (code == CONST_INT
6897 && INTVAL (index) < 1024
6898 && INTVAL (index) > -1024
6899 && (INTVAL (index) & 3) == 0);
6901 if (arm_address_register_rtx_p (index, strict_p)
6902 && (GET_MODE_SIZE (mode) <= 4))
6903 return 1;
6905 if (mode == DImode || mode == DFmode)
6907 if (code == CONST_INT)
6909 HOST_WIDE_INT val = INTVAL (index);
6911 if (TARGET_LDRD)
6912 return val > -256 && val < 256;
6913 else
6914 return val > -4096 && val < 4092;
6917 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6920 if (GET_MODE_SIZE (mode) <= 4
6921 && ! (arm_arch4
6922 && (mode == HImode
6923 || mode == HFmode
6924 || (mode == QImode && outer == SIGN_EXTEND))))
6926 if (code == MULT)
6928 rtx xiop0 = XEXP (index, 0);
6929 rtx xiop1 = XEXP (index, 1);
6931 return ((arm_address_register_rtx_p (xiop0, strict_p)
6932 && power_of_two_operand (xiop1, SImode))
6933 || (arm_address_register_rtx_p (xiop1, strict_p)
6934 && power_of_two_operand (xiop0, SImode)));
6936 else if (code == LSHIFTRT || code == ASHIFTRT
6937 || code == ASHIFT || code == ROTATERT)
6939 rtx op = XEXP (index, 1);
6941 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6942 && CONST_INT_P (op)
6943 && INTVAL (op) > 0
6944 && INTVAL (op) <= 31);
6948 /* For ARM v4 we may be doing a sign-extend operation during the
6949 load. */
6950 if (arm_arch4)
6952 if (mode == HImode
6953 || mode == HFmode
6954 || (outer == SIGN_EXTEND && mode == QImode))
6955 range = 256;
6956 else
6957 range = 4096;
6959 else
6960 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6962 return (code == CONST_INT
6963 && INTVAL (index) < range
6964 && INTVAL (index) > -range);
6967 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6968 index operand. i.e. 1, 2, 4 or 8. */
6969 static bool
6970 thumb2_index_mul_operand (rtx op)
6972 HOST_WIDE_INT val;
6974 if (!CONST_INT_P (op))
6975 return false;
6977 val = INTVAL(op);
6978 return (val == 1 || val == 2 || val == 4 || val == 8);
6981 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6982 static int
6983 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6985 enum rtx_code code = GET_CODE (index);
6987 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6988 /* Standard coprocessor addressing modes. */
6989 if (TARGET_HARD_FLOAT
6990 && TARGET_VFP
6991 && (mode == SFmode || mode == DFmode))
6992 return (code == CONST_INT && INTVAL (index) < 1024
6993 /* Thumb-2 allows only > -256 index range for it's core register
6994 load/stores. Since we allow SF/DF in core registers, we have
6995 to use the intersection between -256~4096 (core) and -1024~1024
6996 (coprocessor). */
6997 && INTVAL (index) > -256
6998 && (INTVAL (index) & 3) == 0);
7000 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7002 /* For DImode assume values will usually live in core regs
7003 and only allow LDRD addressing modes. */
7004 if (!TARGET_LDRD || mode != DImode)
7005 return (code == CONST_INT
7006 && INTVAL (index) < 1024
7007 && INTVAL (index) > -1024
7008 && (INTVAL (index) & 3) == 0);
7011 /* For quad modes, we restrict the constant offset to be slightly less
7012 than what the instruction format permits. We do this because for
7013 quad mode moves, we will actually decompose them into two separate
7014 double-mode reads or writes. INDEX must therefore be a valid
7015 (double-mode) offset and so should INDEX+8. */
7016 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7017 return (code == CONST_INT
7018 && INTVAL (index) < 1016
7019 && INTVAL (index) > -1024
7020 && (INTVAL (index) & 3) == 0);
7022 /* We have no such constraint on double mode offsets, so we permit the
7023 full range of the instruction format. */
7024 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7025 return (code == CONST_INT
7026 && INTVAL (index) < 1024
7027 && INTVAL (index) > -1024
7028 && (INTVAL (index) & 3) == 0);
7030 if (arm_address_register_rtx_p (index, strict_p)
7031 && (GET_MODE_SIZE (mode) <= 4))
7032 return 1;
7034 if (mode == DImode || mode == DFmode)
7036 if (code == CONST_INT)
7038 HOST_WIDE_INT val = INTVAL (index);
7039 /* ??? Can we assume ldrd for thumb2? */
7040 /* Thumb-2 ldrd only has reg+const addressing modes. */
7041 /* ldrd supports offsets of +-1020.
7042 However the ldr fallback does not. */
7043 return val > -256 && val < 256 && (val & 3) == 0;
7045 else
7046 return 0;
7049 if (code == MULT)
7051 rtx xiop0 = XEXP (index, 0);
7052 rtx xiop1 = XEXP (index, 1);
7054 return ((arm_address_register_rtx_p (xiop0, strict_p)
7055 && thumb2_index_mul_operand (xiop1))
7056 || (arm_address_register_rtx_p (xiop1, strict_p)
7057 && thumb2_index_mul_operand (xiop0)));
7059 else if (code == ASHIFT)
7061 rtx op = XEXP (index, 1);
7063 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7064 && CONST_INT_P (op)
7065 && INTVAL (op) > 0
7066 && INTVAL (op) <= 3);
7069 return (code == CONST_INT
7070 && INTVAL (index) < 4096
7071 && INTVAL (index) > -256);
7074 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7075 static int
7076 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7078 int regno;
7080 if (!REG_P (x))
7081 return 0;
7083 regno = REGNO (x);
7085 if (strict_p)
7086 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7088 return (regno <= LAST_LO_REGNUM
7089 || regno > LAST_VIRTUAL_REGISTER
7090 || regno == FRAME_POINTER_REGNUM
7091 || (GET_MODE_SIZE (mode) >= 4
7092 && (regno == STACK_POINTER_REGNUM
7093 || regno >= FIRST_PSEUDO_REGISTER
7094 || x == hard_frame_pointer_rtx
7095 || x == arg_pointer_rtx)));
7098 /* Return nonzero if x is a legitimate index register. This is the case
7099 for any base register that can access a QImode object. */
7100 inline static int
7101 thumb1_index_register_rtx_p (rtx x, int strict_p)
7103 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7106 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7108 The AP may be eliminated to either the SP or the FP, so we use the
7109 least common denominator, e.g. SImode, and offsets from 0 to 64.
7111 ??? Verify whether the above is the right approach.
7113 ??? Also, the FP may be eliminated to the SP, so perhaps that
7114 needs special handling also.
7116 ??? Look at how the mips16 port solves this problem. It probably uses
7117 better ways to solve some of these problems.
7119 Although it is not incorrect, we don't accept QImode and HImode
7120 addresses based on the frame pointer or arg pointer until the
7121 reload pass starts. This is so that eliminating such addresses
7122 into stack based ones won't produce impossible code. */
7124 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7126 /* ??? Not clear if this is right. Experiment. */
7127 if (GET_MODE_SIZE (mode) < 4
7128 && !(reload_in_progress || reload_completed)
7129 && (reg_mentioned_p (frame_pointer_rtx, x)
7130 || reg_mentioned_p (arg_pointer_rtx, x)
7131 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7132 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7133 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7134 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7135 return 0;
7137 /* Accept any base register. SP only in SImode or larger. */
7138 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7139 return 1;
7141 /* This is PC relative data before arm_reorg runs. */
7142 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7143 && GET_CODE (x) == SYMBOL_REF
7144 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7145 return 1;
7147 /* This is PC relative data after arm_reorg runs. */
7148 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7149 && reload_completed
7150 && (GET_CODE (x) == LABEL_REF
7151 || (GET_CODE (x) == CONST
7152 && GET_CODE (XEXP (x, 0)) == PLUS
7153 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7154 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7155 return 1;
7157 /* Post-inc indexing only supported for SImode and larger. */
7158 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7159 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7160 return 1;
7162 else if (GET_CODE (x) == PLUS)
7164 /* REG+REG address can be any two index registers. */
7165 /* We disallow FRAME+REG addressing since we know that FRAME
7166 will be replaced with STACK, and SP relative addressing only
7167 permits SP+OFFSET. */
7168 if (GET_MODE_SIZE (mode) <= 4
7169 && XEXP (x, 0) != frame_pointer_rtx
7170 && XEXP (x, 1) != frame_pointer_rtx
7171 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7172 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7173 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7174 return 1;
7176 /* REG+const has 5-7 bit offset for non-SP registers. */
7177 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7178 || XEXP (x, 0) == arg_pointer_rtx)
7179 && CONST_INT_P (XEXP (x, 1))
7180 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7181 return 1;
7183 /* REG+const has 10-bit offset for SP, but only SImode and
7184 larger is supported. */
7185 /* ??? Should probably check for DI/DFmode overflow here
7186 just like GO_IF_LEGITIMATE_OFFSET does. */
7187 else if (REG_P (XEXP (x, 0))
7188 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7189 && GET_MODE_SIZE (mode) >= 4
7190 && CONST_INT_P (XEXP (x, 1))
7191 && INTVAL (XEXP (x, 1)) >= 0
7192 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7193 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7194 return 1;
7196 else if (REG_P (XEXP (x, 0))
7197 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7198 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7199 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7200 && REGNO (XEXP (x, 0))
7201 <= LAST_VIRTUAL_POINTER_REGISTER))
7202 && GET_MODE_SIZE (mode) >= 4
7203 && CONST_INT_P (XEXP (x, 1))
7204 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7205 return 1;
7208 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7209 && GET_MODE_SIZE (mode) == 4
7210 && GET_CODE (x) == SYMBOL_REF
7211 && CONSTANT_POOL_ADDRESS_P (x)
7212 && ! (flag_pic
7213 && symbol_mentioned_p (get_pool_constant (x))
7214 && ! pcrel_constant_p (get_pool_constant (x))))
7215 return 1;
7217 return 0;
7220 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7221 instruction of mode MODE. */
7223 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7225 switch (GET_MODE_SIZE (mode))
7227 case 1:
7228 return val >= 0 && val < 32;
7230 case 2:
7231 return val >= 0 && val < 64 && (val & 1) == 0;
7233 default:
7234 return (val >= 0
7235 && (val + GET_MODE_SIZE (mode)) <= 128
7236 && (val & 3) == 0);
7240 bool
7241 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7243 if (TARGET_ARM)
7244 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7245 else if (TARGET_THUMB2)
7246 return thumb2_legitimate_address_p (mode, x, strict_p);
7247 else /* if (TARGET_THUMB1) */
7248 return thumb1_legitimate_address_p (mode, x, strict_p);
7251 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7253 Given an rtx X being reloaded into a reg required to be
7254 in class CLASS, return the class of reg to actually use.
7255 In general this is just CLASS, but for the Thumb core registers and
7256 immediate constants we prefer a LO_REGS class or a subset. */
7258 static reg_class_t
7259 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7261 if (TARGET_32BIT)
7262 return rclass;
7263 else
7265 if (rclass == GENERAL_REGS)
7266 return LO_REGS;
7267 else
7268 return rclass;
7272 /* Build the SYMBOL_REF for __tls_get_addr. */
7274 static GTY(()) rtx tls_get_addr_libfunc;
7276 static rtx
7277 get_tls_get_addr (void)
7279 if (!tls_get_addr_libfunc)
7280 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7281 return tls_get_addr_libfunc;
7285 arm_load_tp (rtx target)
7287 if (!target)
7288 target = gen_reg_rtx (SImode);
7290 if (TARGET_HARD_TP)
7292 /* Can return in any reg. */
7293 emit_insn (gen_load_tp_hard (target));
7295 else
7297 /* Always returned in r0. Immediately copy the result into a pseudo,
7298 otherwise other uses of r0 (e.g. setting up function arguments) may
7299 clobber the value. */
7301 rtx tmp;
7303 emit_insn (gen_load_tp_soft ());
7305 tmp = gen_rtx_REG (SImode, 0);
7306 emit_move_insn (target, tmp);
7308 return target;
7311 static rtx
7312 load_tls_operand (rtx x, rtx reg)
7314 rtx tmp;
7316 if (reg == NULL_RTX)
7317 reg = gen_reg_rtx (SImode);
7319 tmp = gen_rtx_CONST (SImode, x);
7321 emit_move_insn (reg, tmp);
7323 return reg;
7326 static rtx
7327 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7329 rtx insns, label, labelno, sum;
7331 gcc_assert (reloc != TLS_DESCSEQ);
7332 start_sequence ();
7334 labelno = GEN_INT (pic_labelno++);
7335 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7336 label = gen_rtx_CONST (VOIDmode, label);
7338 sum = gen_rtx_UNSPEC (Pmode,
7339 gen_rtvec (4, x, GEN_INT (reloc), label,
7340 GEN_INT (TARGET_ARM ? 8 : 4)),
7341 UNSPEC_TLS);
7342 reg = load_tls_operand (sum, reg);
7344 if (TARGET_ARM)
7345 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7346 else
7347 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7349 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7350 LCT_PURE, /* LCT_CONST? */
7351 Pmode, 1, reg, Pmode);
7353 insns = get_insns ();
7354 end_sequence ();
7356 return insns;
7359 static rtx
7360 arm_tls_descseq_addr (rtx x, rtx reg)
7362 rtx labelno = GEN_INT (pic_labelno++);
7363 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7364 rtx sum = gen_rtx_UNSPEC (Pmode,
7365 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7366 gen_rtx_CONST (VOIDmode, label),
7367 GEN_INT (!TARGET_ARM)),
7368 UNSPEC_TLS);
7369 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7371 emit_insn (gen_tlscall (x, labelno));
7372 if (!reg)
7373 reg = gen_reg_rtx (SImode);
7374 else
7375 gcc_assert (REGNO (reg) != 0);
7377 emit_move_insn (reg, reg0);
7379 return reg;
7383 legitimize_tls_address (rtx x, rtx reg)
7385 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7386 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7388 switch (model)
7390 case TLS_MODEL_GLOBAL_DYNAMIC:
7391 if (TARGET_GNU2_TLS)
7393 reg = arm_tls_descseq_addr (x, reg);
7395 tp = arm_load_tp (NULL_RTX);
7397 dest = gen_rtx_PLUS (Pmode, tp, reg);
7399 else
7401 /* Original scheme */
7402 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7403 dest = gen_reg_rtx (Pmode);
7404 emit_libcall_block (insns, dest, ret, x);
7406 return dest;
7408 case TLS_MODEL_LOCAL_DYNAMIC:
7409 if (TARGET_GNU2_TLS)
7411 reg = arm_tls_descseq_addr (x, reg);
7413 tp = arm_load_tp (NULL_RTX);
7415 dest = gen_rtx_PLUS (Pmode, tp, reg);
7417 else
7419 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7421 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7422 share the LDM result with other LD model accesses. */
7423 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7424 UNSPEC_TLS);
7425 dest = gen_reg_rtx (Pmode);
7426 emit_libcall_block (insns, dest, ret, eqv);
7428 /* Load the addend. */
7429 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7430 GEN_INT (TLS_LDO32)),
7431 UNSPEC_TLS);
7432 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7433 dest = gen_rtx_PLUS (Pmode, dest, addend);
7435 return dest;
7437 case TLS_MODEL_INITIAL_EXEC:
7438 labelno = GEN_INT (pic_labelno++);
7439 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7440 label = gen_rtx_CONST (VOIDmode, label);
7441 sum = gen_rtx_UNSPEC (Pmode,
7442 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7443 GEN_INT (TARGET_ARM ? 8 : 4)),
7444 UNSPEC_TLS);
7445 reg = load_tls_operand (sum, reg);
7447 if (TARGET_ARM)
7448 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7449 else if (TARGET_THUMB2)
7450 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7451 else
7453 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7454 emit_move_insn (reg, gen_const_mem (SImode, reg));
7457 tp = arm_load_tp (NULL_RTX);
7459 return gen_rtx_PLUS (Pmode, tp, reg);
7461 case TLS_MODEL_LOCAL_EXEC:
7462 tp = arm_load_tp (NULL_RTX);
7464 reg = gen_rtx_UNSPEC (Pmode,
7465 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7466 UNSPEC_TLS);
7467 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7469 return gen_rtx_PLUS (Pmode, tp, reg);
7471 default:
7472 abort ();
7476 /* Try machine-dependent ways of modifying an illegitimate address
7477 to be legitimate. If we find one, return the new, valid address. */
7479 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7481 if (arm_tls_referenced_p (x))
7483 rtx addend = NULL;
7485 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7487 addend = XEXP (XEXP (x, 0), 1);
7488 x = XEXP (XEXP (x, 0), 0);
7491 if (GET_CODE (x) != SYMBOL_REF)
7492 return x;
7494 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7496 x = legitimize_tls_address (x, NULL_RTX);
7498 if (addend)
7500 x = gen_rtx_PLUS (SImode, x, addend);
7501 orig_x = x;
7503 else
7504 return x;
7507 if (!TARGET_ARM)
7509 /* TODO: legitimize_address for Thumb2. */
7510 if (TARGET_THUMB2)
7511 return x;
7512 return thumb_legitimize_address (x, orig_x, mode);
7515 if (GET_CODE (x) == PLUS)
7517 rtx xop0 = XEXP (x, 0);
7518 rtx xop1 = XEXP (x, 1);
7520 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7521 xop0 = force_reg (SImode, xop0);
7523 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7524 && !symbol_mentioned_p (xop1))
7525 xop1 = force_reg (SImode, xop1);
7527 if (ARM_BASE_REGISTER_RTX_P (xop0)
7528 && CONST_INT_P (xop1))
7530 HOST_WIDE_INT n, low_n;
7531 rtx base_reg, val;
7532 n = INTVAL (xop1);
7534 /* VFP addressing modes actually allow greater offsets, but for
7535 now we just stick with the lowest common denominator. */
7536 if (mode == DImode
7537 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7539 low_n = n & 0x0f;
7540 n &= ~0x0f;
7541 if (low_n > 4)
7543 n += 16;
7544 low_n -= 16;
7547 else
7549 low_n = ((mode) == TImode ? 0
7550 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7551 n -= low_n;
7554 base_reg = gen_reg_rtx (SImode);
7555 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7556 emit_move_insn (base_reg, val);
7557 x = plus_constant (Pmode, base_reg, low_n);
7559 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7560 x = gen_rtx_PLUS (SImode, xop0, xop1);
7563 /* XXX We don't allow MINUS any more -- see comment in
7564 arm_legitimate_address_outer_p (). */
7565 else if (GET_CODE (x) == MINUS)
7567 rtx xop0 = XEXP (x, 0);
7568 rtx xop1 = XEXP (x, 1);
7570 if (CONSTANT_P (xop0))
7571 xop0 = force_reg (SImode, xop0);
7573 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7574 xop1 = force_reg (SImode, xop1);
7576 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7577 x = gen_rtx_MINUS (SImode, xop0, xop1);
7580 /* Make sure to take full advantage of the pre-indexed addressing mode
7581 with absolute addresses which often allows for the base register to
7582 be factorized for multiple adjacent memory references, and it might
7583 even allows for the mini pool to be avoided entirely. */
7584 else if (CONST_INT_P (x) && optimize > 0)
7586 unsigned int bits;
7587 HOST_WIDE_INT mask, base, index;
7588 rtx base_reg;
7590 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7591 use a 8-bit index. So let's use a 12-bit index for SImode only and
7592 hope that arm_gen_constant will enable ldrb to use more bits. */
7593 bits = (mode == SImode) ? 12 : 8;
7594 mask = (1 << bits) - 1;
7595 base = INTVAL (x) & ~mask;
7596 index = INTVAL (x) & mask;
7597 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7599 /* It'll most probably be more efficient to generate the base
7600 with more bits set and use a negative index instead. */
7601 base |= mask;
7602 index -= mask;
7604 base_reg = force_reg (SImode, GEN_INT (base));
7605 x = plus_constant (Pmode, base_reg, index);
7608 if (flag_pic)
7610 /* We need to find and carefully transform any SYMBOL and LABEL
7611 references; so go back to the original address expression. */
7612 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7614 if (new_x != orig_x)
7615 x = new_x;
7618 return x;
7622 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7623 to be legitimate. If we find one, return the new, valid address. */
7625 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7627 if (GET_CODE (x) == PLUS
7628 && CONST_INT_P (XEXP (x, 1))
7629 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7630 || INTVAL (XEXP (x, 1)) < 0))
7632 rtx xop0 = XEXP (x, 0);
7633 rtx xop1 = XEXP (x, 1);
7634 HOST_WIDE_INT offset = INTVAL (xop1);
7636 /* Try and fold the offset into a biasing of the base register and
7637 then offsetting that. Don't do this when optimizing for space
7638 since it can cause too many CSEs. */
7639 if (optimize_size && offset >= 0
7640 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7642 HOST_WIDE_INT delta;
7644 if (offset >= 256)
7645 delta = offset - (256 - GET_MODE_SIZE (mode));
7646 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7647 delta = 31 * GET_MODE_SIZE (mode);
7648 else
7649 delta = offset & (~31 * GET_MODE_SIZE (mode));
7651 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7652 NULL_RTX);
7653 x = plus_constant (Pmode, xop0, delta);
7655 else if (offset < 0 && offset > -256)
7656 /* Small negative offsets are best done with a subtract before the
7657 dereference, forcing these into a register normally takes two
7658 instructions. */
7659 x = force_operand (x, NULL_RTX);
7660 else
7662 /* For the remaining cases, force the constant into a register. */
7663 xop1 = force_reg (SImode, xop1);
7664 x = gen_rtx_PLUS (SImode, xop0, xop1);
7667 else if (GET_CODE (x) == PLUS
7668 && s_register_operand (XEXP (x, 1), SImode)
7669 && !s_register_operand (XEXP (x, 0), SImode))
7671 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7673 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7676 if (flag_pic)
7678 /* We need to find and carefully transform any SYMBOL and LABEL
7679 references; so go back to the original address expression. */
7680 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7682 if (new_x != orig_x)
7683 x = new_x;
7686 return x;
7689 bool
7690 arm_legitimize_reload_address (rtx *p,
7691 enum machine_mode mode,
7692 int opnum, int type,
7693 int ind_levels ATTRIBUTE_UNUSED)
7695 /* We must recognize output that we have already generated ourselves. */
7696 if (GET_CODE (*p) == PLUS
7697 && GET_CODE (XEXP (*p, 0)) == PLUS
7698 && REG_P (XEXP (XEXP (*p, 0), 0))
7699 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7700 && CONST_INT_P (XEXP (*p, 1)))
7702 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7703 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7704 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7705 return true;
7708 if (GET_CODE (*p) == PLUS
7709 && REG_P (XEXP (*p, 0))
7710 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7711 /* If the base register is equivalent to a constant, let the generic
7712 code handle it. Otherwise we will run into problems if a future
7713 reload pass decides to rematerialize the constant. */
7714 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7715 && CONST_INT_P (XEXP (*p, 1)))
7717 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7718 HOST_WIDE_INT low, high;
7720 /* Detect coprocessor load/stores. */
7721 bool coproc_p = ((TARGET_HARD_FLOAT
7722 && TARGET_VFP
7723 && (mode == SFmode || mode == DFmode))
7724 || (TARGET_REALLY_IWMMXT
7725 && VALID_IWMMXT_REG_MODE (mode))
7726 || (TARGET_NEON
7727 && (VALID_NEON_DREG_MODE (mode)
7728 || VALID_NEON_QREG_MODE (mode))));
7730 /* For some conditions, bail out when lower two bits are unaligned. */
7731 if ((val & 0x3) != 0
7732 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7733 && (coproc_p
7734 /* For DI, and DF under soft-float: */
7735 || ((mode == DImode || mode == DFmode)
7736 /* Without ldrd, we use stm/ldm, which does not
7737 fair well with unaligned bits. */
7738 && (! TARGET_LDRD
7739 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7740 || TARGET_THUMB2))))
7741 return false;
7743 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7744 of which the (reg+high) gets turned into a reload add insn,
7745 we try to decompose the index into high/low values that can often
7746 also lead to better reload CSE.
7747 For example:
7748 ldr r0, [r2, #4100] // Offset too large
7749 ldr r1, [r2, #4104] // Offset too large
7751 is best reloaded as:
7752 add t1, r2, #4096
7753 ldr r0, [t1, #4]
7754 add t2, r2, #4096
7755 ldr r1, [t2, #8]
7757 which post-reload CSE can simplify in most cases to eliminate the
7758 second add instruction:
7759 add t1, r2, #4096
7760 ldr r0, [t1, #4]
7761 ldr r1, [t1, #8]
7763 The idea here is that we want to split out the bits of the constant
7764 as a mask, rather than as subtracting the maximum offset that the
7765 respective type of load/store used can handle.
7767 When encountering negative offsets, we can still utilize it even if
7768 the overall offset is positive; sometimes this may lead to an immediate
7769 that can be constructed with fewer instructions.
7770 For example:
7771 ldr r0, [r2, #0x3FFFFC]
7773 This is best reloaded as:
7774 add t1, r2, #0x400000
7775 ldr r0, [t1, #-4]
7777 The trick for spotting this for a load insn with N bits of offset
7778 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7779 negative offset that is going to make bit N and all the bits below
7780 it become zero in the remainder part.
7782 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7783 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7784 used in most cases of ARM load/store instructions. */
7786 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7787 (((VAL) & ((1 << (N)) - 1)) \
7788 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7789 : 0)
7791 if (coproc_p)
7793 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7795 /* NEON quad-word load/stores are made of two double-word accesses,
7796 so the valid index range is reduced by 8. Treat as 9-bit range if
7797 we go over it. */
7798 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7799 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7801 else if (GET_MODE_SIZE (mode) == 8)
7803 if (TARGET_LDRD)
7804 low = (TARGET_THUMB2
7805 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7806 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7807 else
7808 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7809 to access doublewords. The supported load/store offsets are
7810 -8, -4, and 4, which we try to produce here. */
7811 low = ((val & 0xf) ^ 0x8) - 0x8;
7813 else if (GET_MODE_SIZE (mode) < 8)
7815 /* NEON element load/stores do not have an offset. */
7816 if (TARGET_NEON_FP16 && mode == HFmode)
7817 return false;
7819 if (TARGET_THUMB2)
7821 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7822 Try the wider 12-bit range first, and re-try if the result
7823 is out of range. */
7824 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7825 if (low < -255)
7826 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7828 else
7830 if (mode == HImode || mode == HFmode)
7832 if (arm_arch4)
7833 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7834 else
7836 /* The storehi/movhi_bytes fallbacks can use only
7837 [-4094,+4094] of the full ldrb/strb index range. */
7838 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7839 if (low == 4095 || low == -4095)
7840 return false;
7843 else
7844 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7847 else
7848 return false;
7850 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7851 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7852 - (unsigned HOST_WIDE_INT) 0x80000000);
7853 /* Check for overflow or zero */
7854 if (low == 0 || high == 0 || (high + low != val))
7855 return false;
7857 /* Reload the high part into a base reg; leave the low part
7858 in the mem.
7859 Note that replacing this gen_rtx_PLUS with plus_constant is
7860 wrong in this case because we rely on the
7861 (plus (plus reg c1) c2) structure being preserved so that
7862 XEXP (*p, 0) in push_reload below uses the correct term. */
7863 *p = gen_rtx_PLUS (GET_MODE (*p),
7864 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7865 GEN_INT (high)),
7866 GEN_INT (low));
7867 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7868 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7869 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7870 return true;
7873 return false;
7877 thumb_legitimize_reload_address (rtx *x_p,
7878 enum machine_mode mode,
7879 int opnum, int type,
7880 int ind_levels ATTRIBUTE_UNUSED)
7882 rtx x = *x_p;
7884 if (GET_CODE (x) == PLUS
7885 && GET_MODE_SIZE (mode) < 4
7886 && REG_P (XEXP (x, 0))
7887 && XEXP (x, 0) == stack_pointer_rtx
7888 && CONST_INT_P (XEXP (x, 1))
7889 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7891 rtx orig_x = x;
7893 x = copy_rtx (x);
7894 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7895 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7896 return x;
7899 /* If both registers are hi-regs, then it's better to reload the
7900 entire expression rather than each register individually. That
7901 only requires one reload register rather than two. */
7902 if (GET_CODE (x) == PLUS
7903 && REG_P (XEXP (x, 0))
7904 && REG_P (XEXP (x, 1))
7905 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7906 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7908 rtx orig_x = x;
7910 x = copy_rtx (x);
7911 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7912 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7913 return x;
7916 return NULL;
7919 /* Test for various thread-local symbols. */
7921 /* Helper for arm_tls_referenced_p. */
7923 static int
7924 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7926 if (GET_CODE (*x) == SYMBOL_REF)
7927 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7929 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7930 TLS offsets, not real symbol references. */
7931 if (GET_CODE (*x) == UNSPEC
7932 && XINT (*x, 1) == UNSPEC_TLS)
7933 return -1;
7935 return 0;
7938 /* Return TRUE if X contains any TLS symbol references. */
7940 bool
7941 arm_tls_referenced_p (rtx x)
7943 if (! TARGET_HAVE_TLS)
7944 return false;
7946 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7949 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7951 On the ARM, allow any integer (invalid ones are removed later by insn
7952 patterns), nice doubles and symbol_refs which refer to the function's
7953 constant pool XXX.
7955 When generating pic allow anything. */
7957 static bool
7958 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7960 /* At present, we have no support for Neon structure constants, so forbid
7961 them here. It might be possible to handle simple cases like 0 and -1
7962 in future. */
7963 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7964 return false;
7966 return flag_pic || !label_mentioned_p (x);
7969 static bool
7970 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7972 return (CONST_INT_P (x)
7973 || CONST_DOUBLE_P (x)
7974 || CONSTANT_ADDRESS_P (x)
7975 || flag_pic);
7978 static bool
7979 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7981 return (!arm_cannot_force_const_mem (mode, x)
7982 && (TARGET_32BIT
7983 ? arm_legitimate_constant_p_1 (mode, x)
7984 : thumb_legitimate_constant_p (mode, x)));
7987 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7989 static bool
7990 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7992 rtx base, offset;
7994 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7996 split_const (x, &base, &offset);
7997 if (GET_CODE (base) == SYMBOL_REF
7998 && !offset_within_block_p (base, INTVAL (offset)))
7999 return true;
8001 return arm_tls_referenced_p (x);
8004 #define REG_OR_SUBREG_REG(X) \
8005 (REG_P (X) \
8006 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8008 #define REG_OR_SUBREG_RTX(X) \
8009 (REG_P (X) ? (X) : SUBREG_REG (X))
8011 static inline int
8012 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8014 enum machine_mode mode = GET_MODE (x);
8015 int total, words;
8017 switch (code)
8019 case ASHIFT:
8020 case ASHIFTRT:
8021 case LSHIFTRT:
8022 case ROTATERT:
8023 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8025 case PLUS:
8026 case MINUS:
8027 case COMPARE:
8028 case NEG:
8029 case NOT:
8030 return COSTS_N_INSNS (1);
8032 case MULT:
8033 if (CONST_INT_P (XEXP (x, 1)))
8035 int cycles = 0;
8036 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8038 while (i)
8040 i >>= 2;
8041 cycles++;
8043 return COSTS_N_INSNS (2) + cycles;
8045 return COSTS_N_INSNS (1) + 16;
8047 case SET:
8048 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8049 the mode. */
8050 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8051 return (COSTS_N_INSNS (words)
8052 + 4 * ((MEM_P (SET_SRC (x)))
8053 + MEM_P (SET_DEST (x))));
8055 case CONST_INT:
8056 if (outer == SET)
8058 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8059 return 0;
8060 if (thumb_shiftable_const (INTVAL (x)))
8061 return COSTS_N_INSNS (2);
8062 return COSTS_N_INSNS (3);
8064 else if ((outer == PLUS || outer == COMPARE)
8065 && INTVAL (x) < 256 && INTVAL (x) > -256)
8066 return 0;
8067 else if ((outer == IOR || outer == XOR || outer == AND)
8068 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8069 return COSTS_N_INSNS (1);
8070 else if (outer == AND)
8072 int i;
8073 /* This duplicates the tests in the andsi3 expander. */
8074 for (i = 9; i <= 31; i++)
8075 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8076 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8077 return COSTS_N_INSNS (2);
8079 else if (outer == ASHIFT || outer == ASHIFTRT
8080 || outer == LSHIFTRT)
8081 return 0;
8082 return COSTS_N_INSNS (2);
8084 case CONST:
8085 case CONST_DOUBLE:
8086 case LABEL_REF:
8087 case SYMBOL_REF:
8088 return COSTS_N_INSNS (3);
8090 case UDIV:
8091 case UMOD:
8092 case DIV:
8093 case MOD:
8094 return 100;
8096 case TRUNCATE:
8097 return 99;
8099 case AND:
8100 case XOR:
8101 case IOR:
8102 /* XXX guess. */
8103 return 8;
8105 case MEM:
8106 /* XXX another guess. */
8107 /* Memory costs quite a lot for the first word, but subsequent words
8108 load at the equivalent of a single insn each. */
8109 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8110 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8111 ? 4 : 0));
8113 case IF_THEN_ELSE:
8114 /* XXX a guess. */
8115 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8116 return 14;
8117 return 2;
8119 case SIGN_EXTEND:
8120 case ZERO_EXTEND:
8121 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8122 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8124 if (mode == SImode)
8125 return total;
8127 if (arm_arch6)
8128 return total + COSTS_N_INSNS (1);
8130 /* Assume a two-shift sequence. Increase the cost slightly so
8131 we prefer actual shifts over an extend operation. */
8132 return total + 1 + COSTS_N_INSNS (2);
8134 default:
8135 return 99;
8139 static inline bool
8140 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8142 enum machine_mode mode = GET_MODE (x);
8143 enum rtx_code subcode;
8144 rtx operand;
8145 enum rtx_code code = GET_CODE (x);
8146 *total = 0;
8148 switch (code)
8150 case MEM:
8151 /* Memory costs quite a lot for the first word, but subsequent words
8152 load at the equivalent of a single insn each. */
8153 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8154 return true;
8156 case DIV:
8157 case MOD:
8158 case UDIV:
8159 case UMOD:
8160 if (TARGET_HARD_FLOAT && mode == SFmode)
8161 *total = COSTS_N_INSNS (2);
8162 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8163 *total = COSTS_N_INSNS (4);
8164 else
8165 *total = COSTS_N_INSNS (20);
8166 return false;
8168 case ROTATE:
8169 if (REG_P (XEXP (x, 1)))
8170 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8171 else if (!CONST_INT_P (XEXP (x, 1)))
8172 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8174 /* Fall through */
8175 case ROTATERT:
8176 if (mode != SImode)
8178 *total += COSTS_N_INSNS (4);
8179 return true;
8182 /* Fall through */
8183 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8184 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8185 if (mode == DImode)
8187 *total += COSTS_N_INSNS (3);
8188 return true;
8191 *total += COSTS_N_INSNS (1);
8192 /* Increase the cost of complex shifts because they aren't any faster,
8193 and reduce dual issue opportunities. */
8194 if (arm_tune_cortex_a9
8195 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8196 ++*total;
8198 return true;
8200 case MINUS:
8201 if (mode == DImode)
8203 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8204 if (CONST_INT_P (XEXP (x, 0))
8205 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8207 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8208 return true;
8211 if (CONST_INT_P (XEXP (x, 1))
8212 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8214 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8215 return true;
8218 return false;
8221 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8223 if (TARGET_HARD_FLOAT
8224 && (mode == SFmode
8225 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8227 *total = COSTS_N_INSNS (1);
8228 if (CONST_DOUBLE_P (XEXP (x, 0))
8229 && arm_const_double_rtx (XEXP (x, 0)))
8231 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8232 return true;
8235 if (CONST_DOUBLE_P (XEXP (x, 1))
8236 && arm_const_double_rtx (XEXP (x, 1)))
8238 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8239 return true;
8242 return false;
8244 *total = COSTS_N_INSNS (20);
8245 return false;
8248 *total = COSTS_N_INSNS (1);
8249 if (CONST_INT_P (XEXP (x, 0))
8250 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8252 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8253 return true;
8256 subcode = GET_CODE (XEXP (x, 1));
8257 if (subcode == ASHIFT || subcode == ASHIFTRT
8258 || subcode == LSHIFTRT
8259 || subcode == ROTATE || subcode == ROTATERT)
8261 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8262 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8263 return true;
8266 /* A shift as a part of RSB costs no more than RSB itself. */
8267 if (GET_CODE (XEXP (x, 0)) == MULT
8268 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8270 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8271 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8272 return true;
8275 if (subcode == MULT
8276 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8278 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8279 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8280 return true;
8283 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8284 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8286 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8287 if (REG_P (XEXP (XEXP (x, 1), 0))
8288 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8289 *total += COSTS_N_INSNS (1);
8291 return true;
8294 /* Fall through */
8296 case PLUS:
8297 if (code == PLUS && arm_arch6 && mode == SImode
8298 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8299 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8301 *total = COSTS_N_INSNS (1);
8302 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8303 0, speed);
8304 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8305 return true;
8308 /* MLA: All arguments must be registers. We filter out
8309 multiplication by a power of two, so that we fall down into
8310 the code below. */
8311 if (GET_CODE (XEXP (x, 0)) == MULT
8312 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8314 /* The cost comes from the cost of the multiply. */
8315 return false;
8318 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8320 if (TARGET_HARD_FLOAT
8321 && (mode == SFmode
8322 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8324 *total = COSTS_N_INSNS (1);
8325 if (CONST_DOUBLE_P (XEXP (x, 1))
8326 && arm_const_double_rtx (XEXP (x, 1)))
8328 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8329 return true;
8332 return false;
8335 *total = COSTS_N_INSNS (20);
8336 return false;
8339 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8340 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8342 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8343 if (REG_P (XEXP (XEXP (x, 0), 0))
8344 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8345 *total += COSTS_N_INSNS (1);
8346 return true;
8349 /* Fall through */
8351 case AND: case XOR: case IOR:
8353 /* Normally the frame registers will be spilt into reg+const during
8354 reload, so it is a bad idea to combine them with other instructions,
8355 since then they might not be moved outside of loops. As a compromise
8356 we allow integration with ops that have a constant as their second
8357 operand. */
8358 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8359 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8360 && !CONST_INT_P (XEXP (x, 1)))
8361 *total = COSTS_N_INSNS (1);
8363 if (mode == DImode)
8365 *total += COSTS_N_INSNS (2);
8366 if (CONST_INT_P (XEXP (x, 1))
8367 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8369 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8370 return true;
8373 return false;
8376 *total += COSTS_N_INSNS (1);
8377 if (CONST_INT_P (XEXP (x, 1))
8378 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8380 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8381 return true;
8383 subcode = GET_CODE (XEXP (x, 0));
8384 if (subcode == ASHIFT || subcode == ASHIFTRT
8385 || subcode == LSHIFTRT
8386 || subcode == ROTATE || subcode == ROTATERT)
8388 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8389 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8390 return true;
8393 if (subcode == MULT
8394 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8396 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8397 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8398 return true;
8401 if (subcode == UMIN || subcode == UMAX
8402 || subcode == SMIN || subcode == SMAX)
8404 *total = COSTS_N_INSNS (3);
8405 return true;
8408 return false;
8410 case MULT:
8411 /* This should have been handled by the CPU specific routines. */
8412 gcc_unreachable ();
8414 case TRUNCATE:
8415 if (arm_arch3m && mode == SImode
8416 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8417 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8418 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8419 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8420 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8421 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8423 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8424 return true;
8426 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8427 return false;
8429 case NEG:
8430 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8432 if (TARGET_HARD_FLOAT
8433 && (mode == SFmode
8434 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8436 *total = COSTS_N_INSNS (1);
8437 return false;
8439 *total = COSTS_N_INSNS (2);
8440 return false;
8443 /* Fall through */
8444 case NOT:
8445 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8446 if (mode == SImode && code == NOT)
8448 subcode = GET_CODE (XEXP (x, 0));
8449 if (subcode == ASHIFT || subcode == ASHIFTRT
8450 || subcode == LSHIFTRT
8451 || subcode == ROTATE || subcode == ROTATERT
8452 || (subcode == MULT
8453 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8455 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8456 /* Register shifts cost an extra cycle. */
8457 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8458 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8459 subcode, 1, speed);
8460 return true;
8464 return false;
8466 case IF_THEN_ELSE:
8467 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8469 *total = COSTS_N_INSNS (4);
8470 return true;
8473 operand = XEXP (x, 0);
8475 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8476 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8477 && REG_P (XEXP (operand, 0))
8478 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8479 *total += COSTS_N_INSNS (1);
8480 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8481 + rtx_cost (XEXP (x, 2), code, 2, speed));
8482 return true;
8484 case NE:
8485 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8487 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8488 return true;
8490 goto scc_insn;
8492 case GE:
8493 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8494 && mode == SImode && XEXP (x, 1) == const0_rtx)
8496 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8497 return true;
8499 goto scc_insn;
8501 case LT:
8502 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8503 && mode == SImode && XEXP (x, 1) == const0_rtx)
8505 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8506 return true;
8508 goto scc_insn;
8510 case EQ:
8511 case GT:
8512 case LE:
8513 case GEU:
8514 case LTU:
8515 case GTU:
8516 case LEU:
8517 case UNORDERED:
8518 case ORDERED:
8519 case UNEQ:
8520 case UNGE:
8521 case UNLT:
8522 case UNGT:
8523 case UNLE:
8524 scc_insn:
8525 /* SCC insns. In the case where the comparison has already been
8526 performed, then they cost 2 instructions. Otherwise they need
8527 an additional comparison before them. */
8528 *total = COSTS_N_INSNS (2);
8529 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8531 return true;
8534 /* Fall through */
8535 case COMPARE:
8536 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8538 *total = 0;
8539 return true;
8542 *total += COSTS_N_INSNS (1);
8543 if (CONST_INT_P (XEXP (x, 1))
8544 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8546 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8547 return true;
8550 subcode = GET_CODE (XEXP (x, 0));
8551 if (subcode == ASHIFT || subcode == ASHIFTRT
8552 || subcode == LSHIFTRT
8553 || subcode == ROTATE || subcode == ROTATERT)
8555 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8556 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8557 return true;
8560 if (subcode == MULT
8561 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8563 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8564 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8565 return true;
8568 return false;
8570 case UMIN:
8571 case UMAX:
8572 case SMIN:
8573 case SMAX:
8574 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8575 if (!CONST_INT_P (XEXP (x, 1))
8576 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8577 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8578 return true;
8580 case ABS:
8581 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8583 if (TARGET_HARD_FLOAT
8584 && (mode == SFmode
8585 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8587 *total = COSTS_N_INSNS (1);
8588 return false;
8590 *total = COSTS_N_INSNS (20);
8591 return false;
8593 *total = COSTS_N_INSNS (1);
8594 if (mode == DImode)
8595 *total += COSTS_N_INSNS (3);
8596 return false;
8598 case SIGN_EXTEND:
8599 case ZERO_EXTEND:
8600 *total = 0;
8601 if (GET_MODE_CLASS (mode) == MODE_INT)
8603 rtx op = XEXP (x, 0);
8604 enum machine_mode opmode = GET_MODE (op);
8606 if (mode == DImode)
8607 *total += COSTS_N_INSNS (1);
8609 if (opmode != SImode)
8611 if (MEM_P (op))
8613 /* If !arm_arch4, we use one of the extendhisi2_mem
8614 or movhi_bytes patterns for HImode. For a QImode
8615 sign extension, we first zero-extend from memory
8616 and then perform a shift sequence. */
8617 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8618 *total += COSTS_N_INSNS (2);
8620 else if (arm_arch6)
8621 *total += COSTS_N_INSNS (1);
8623 /* We don't have the necessary insn, so we need to perform some
8624 other operation. */
8625 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8626 /* An and with constant 255. */
8627 *total += COSTS_N_INSNS (1);
8628 else
8629 /* A shift sequence. Increase costs slightly to avoid
8630 combining two shifts into an extend operation. */
8631 *total += COSTS_N_INSNS (2) + 1;
8634 return false;
8637 switch (GET_MODE (XEXP (x, 0)))
8639 case V8QImode:
8640 case V4HImode:
8641 case V2SImode:
8642 case V4QImode:
8643 case V2HImode:
8644 *total = COSTS_N_INSNS (1);
8645 return false;
8647 default:
8648 gcc_unreachable ();
8650 gcc_unreachable ();
8652 case ZERO_EXTRACT:
8653 case SIGN_EXTRACT:
8654 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8655 return true;
8657 case CONST_INT:
8658 if (const_ok_for_arm (INTVAL (x))
8659 || const_ok_for_arm (~INTVAL (x)))
8660 *total = COSTS_N_INSNS (1);
8661 else
8662 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8663 INTVAL (x), NULL_RTX,
8664 NULL_RTX, 0, 0));
8665 return true;
8667 case CONST:
8668 case LABEL_REF:
8669 case SYMBOL_REF:
8670 *total = COSTS_N_INSNS (3);
8671 return true;
8673 case HIGH:
8674 *total = COSTS_N_INSNS (1);
8675 return true;
8677 case LO_SUM:
8678 *total = COSTS_N_INSNS (1);
8679 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8680 return true;
8682 case CONST_DOUBLE:
8683 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8684 && (mode == SFmode || !TARGET_VFP_SINGLE))
8685 *total = COSTS_N_INSNS (1);
8686 else
8687 *total = COSTS_N_INSNS (4);
8688 return true;
8690 case SET:
8691 /* The vec_extract patterns accept memory operands that require an
8692 address reload. Account for the cost of that reload to give the
8693 auto-inc-dec pass an incentive to try to replace them. */
8694 if (TARGET_NEON && MEM_P (SET_DEST (x))
8695 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8697 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8698 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8699 *total += COSTS_N_INSNS (1);
8700 return true;
8702 /* Likewise for the vec_set patterns. */
8703 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8704 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8705 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8707 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8708 *total = rtx_cost (mem, code, 0, speed);
8709 if (!neon_vector_mem_operand (mem, 2, true))
8710 *total += COSTS_N_INSNS (1);
8711 return true;
8713 return false;
8715 case UNSPEC:
8716 /* We cost this as high as our memory costs to allow this to
8717 be hoisted from loops. */
8718 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8720 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8722 return true;
8724 case CONST_VECTOR:
8725 if (TARGET_NEON
8726 && TARGET_HARD_FLOAT
8727 && outer == SET
8728 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8729 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8730 *total = COSTS_N_INSNS (1);
8731 else
8732 *total = COSTS_N_INSNS (4);
8733 return true;
8735 default:
8736 *total = COSTS_N_INSNS (4);
8737 return false;
8741 /* Estimates the size cost of thumb1 instructions.
8742 For now most of the code is copied from thumb1_rtx_costs. We need more
8743 fine grain tuning when we have more related test cases. */
8744 static inline int
8745 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8747 enum machine_mode mode = GET_MODE (x);
8748 int words;
8750 switch (code)
8752 case ASHIFT:
8753 case ASHIFTRT:
8754 case LSHIFTRT:
8755 case ROTATERT:
8756 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8758 case PLUS:
8759 case MINUS:
8760 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8761 defined by RTL expansion, especially for the expansion of
8762 multiplication. */
8763 if ((GET_CODE (XEXP (x, 0)) == MULT
8764 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8765 || (GET_CODE (XEXP (x, 1)) == MULT
8766 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8767 return COSTS_N_INSNS (2);
8768 /* On purpose fall through for normal RTX. */
8769 case COMPARE:
8770 case NEG:
8771 case NOT:
8772 return COSTS_N_INSNS (1);
8774 case MULT:
8775 if (CONST_INT_P (XEXP (x, 1)))
8777 /* Thumb1 mul instruction can't operate on const. We must Load it
8778 into a register first. */
8779 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8780 return COSTS_N_INSNS (1) + const_size;
8782 return COSTS_N_INSNS (1);
8784 case SET:
8785 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8786 the mode. */
8787 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8788 return (COSTS_N_INSNS (words)
8789 + 4 * ((MEM_P (SET_SRC (x)))
8790 + MEM_P (SET_DEST (x))));
8792 case CONST_INT:
8793 if (outer == SET)
8795 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8796 return COSTS_N_INSNS (1);
8797 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8798 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8799 return COSTS_N_INSNS (2);
8800 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8801 if (thumb_shiftable_const (INTVAL (x)))
8802 return COSTS_N_INSNS (2);
8803 return COSTS_N_INSNS (3);
8805 else if ((outer == PLUS || outer == COMPARE)
8806 && INTVAL (x) < 256 && INTVAL (x) > -256)
8807 return 0;
8808 else if ((outer == IOR || outer == XOR || outer == AND)
8809 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8810 return COSTS_N_INSNS (1);
8811 else if (outer == AND)
8813 int i;
8814 /* This duplicates the tests in the andsi3 expander. */
8815 for (i = 9; i <= 31; i++)
8816 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8817 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8818 return COSTS_N_INSNS (2);
8820 else if (outer == ASHIFT || outer == ASHIFTRT
8821 || outer == LSHIFTRT)
8822 return 0;
8823 return COSTS_N_INSNS (2);
8825 case CONST:
8826 case CONST_DOUBLE:
8827 case LABEL_REF:
8828 case SYMBOL_REF:
8829 return COSTS_N_INSNS (3);
8831 case UDIV:
8832 case UMOD:
8833 case DIV:
8834 case MOD:
8835 return 100;
8837 case TRUNCATE:
8838 return 99;
8840 case AND:
8841 case XOR:
8842 case IOR:
8843 /* XXX guess. */
8844 return 8;
8846 case MEM:
8847 /* XXX another guess. */
8848 /* Memory costs quite a lot for the first word, but subsequent words
8849 load at the equivalent of a single insn each. */
8850 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8851 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8852 ? 4 : 0));
8854 case IF_THEN_ELSE:
8855 /* XXX a guess. */
8856 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8857 return 14;
8858 return 2;
8860 case ZERO_EXTEND:
8861 /* XXX still guessing. */
8862 switch (GET_MODE (XEXP (x, 0)))
8864 case QImode:
8865 return (1 + (mode == DImode ? 4 : 0)
8866 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8868 case HImode:
8869 return (4 + (mode == DImode ? 4 : 0)
8870 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8872 case SImode:
8873 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8875 default:
8876 return 99;
8879 default:
8880 return 99;
8884 /* RTX costs when optimizing for size. */
8885 static bool
8886 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8887 int *total)
8889 enum machine_mode mode = GET_MODE (x);
8890 if (TARGET_THUMB1)
8892 *total = thumb1_size_rtx_costs (x, code, outer_code);
8893 return true;
8896 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8897 switch (code)
8899 case MEM:
8900 /* A memory access costs 1 insn if the mode is small, or the address is
8901 a single register, otherwise it costs one insn per word. */
8902 if (REG_P (XEXP (x, 0)))
8903 *total = COSTS_N_INSNS (1);
8904 else if (flag_pic
8905 && GET_CODE (XEXP (x, 0)) == PLUS
8906 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8907 /* This will be split into two instructions.
8908 See arm.md:calculate_pic_address. */
8909 *total = COSTS_N_INSNS (2);
8910 else
8911 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8912 return true;
8914 case DIV:
8915 case MOD:
8916 case UDIV:
8917 case UMOD:
8918 /* Needs a libcall, so it costs about this. */
8919 *total = COSTS_N_INSNS (2);
8920 return false;
8922 case ROTATE:
8923 if (mode == SImode && REG_P (XEXP (x, 1)))
8925 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8926 return true;
8928 /* Fall through */
8929 case ROTATERT:
8930 case ASHIFT:
8931 case LSHIFTRT:
8932 case ASHIFTRT:
8933 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8935 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8936 return true;
8938 else if (mode == SImode)
8940 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8941 /* Slightly disparage register shifts, but not by much. */
8942 if (!CONST_INT_P (XEXP (x, 1)))
8943 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8944 return true;
8947 /* Needs a libcall. */
8948 *total = COSTS_N_INSNS (2);
8949 return false;
8951 case MINUS:
8952 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8953 && (mode == SFmode || !TARGET_VFP_SINGLE))
8955 *total = COSTS_N_INSNS (1);
8956 return false;
8959 if (mode == SImode)
8961 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8962 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8964 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8965 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8966 || subcode1 == ROTATE || subcode1 == ROTATERT
8967 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8968 || subcode1 == ASHIFTRT)
8970 /* It's just the cost of the two operands. */
8971 *total = 0;
8972 return false;
8975 *total = COSTS_N_INSNS (1);
8976 return false;
8979 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8980 return false;
8982 case PLUS:
8983 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8984 && (mode == SFmode || !TARGET_VFP_SINGLE))
8986 *total = COSTS_N_INSNS (1);
8987 return false;
8990 /* A shift as a part of ADD costs nothing. */
8991 if (GET_CODE (XEXP (x, 0)) == MULT
8992 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8994 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8995 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8996 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8997 return true;
9000 /* Fall through */
9001 case AND: case XOR: case IOR:
9002 if (mode == SImode)
9004 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9006 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9007 || subcode == LSHIFTRT || subcode == ASHIFTRT
9008 || (code == AND && subcode == NOT))
9010 /* It's just the cost of the two operands. */
9011 *total = 0;
9012 return false;
9016 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9017 return false;
9019 case MULT:
9020 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9021 return false;
9023 case NEG:
9024 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9025 && (mode == SFmode || !TARGET_VFP_SINGLE))
9027 *total = COSTS_N_INSNS (1);
9028 return false;
9031 /* Fall through */
9032 case NOT:
9033 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9035 return false;
9037 case IF_THEN_ELSE:
9038 *total = 0;
9039 return false;
9041 case COMPARE:
9042 if (cc_register (XEXP (x, 0), VOIDmode))
9043 * total = 0;
9044 else
9045 *total = COSTS_N_INSNS (1);
9046 return false;
9048 case ABS:
9049 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9050 && (mode == SFmode || !TARGET_VFP_SINGLE))
9051 *total = COSTS_N_INSNS (1);
9052 else
9053 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9054 return false;
9056 case SIGN_EXTEND:
9057 case ZERO_EXTEND:
9058 return arm_rtx_costs_1 (x, outer_code, total, 0);
9060 case CONST_INT:
9061 if (const_ok_for_arm (INTVAL (x)))
9062 /* A multiplication by a constant requires another instruction
9063 to load the constant to a register. */
9064 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9065 ? 1 : 0);
9066 else if (const_ok_for_arm (~INTVAL (x)))
9067 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9068 else if (const_ok_for_arm (-INTVAL (x)))
9070 if (outer_code == COMPARE || outer_code == PLUS
9071 || outer_code == MINUS)
9072 *total = 0;
9073 else
9074 *total = COSTS_N_INSNS (1);
9076 else
9077 *total = COSTS_N_INSNS (2);
9078 return true;
9080 case CONST:
9081 case LABEL_REF:
9082 case SYMBOL_REF:
9083 *total = COSTS_N_INSNS (2);
9084 return true;
9086 case CONST_DOUBLE:
9087 *total = COSTS_N_INSNS (4);
9088 return true;
9090 case CONST_VECTOR:
9091 if (TARGET_NEON
9092 && TARGET_HARD_FLOAT
9093 && outer_code == SET
9094 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9095 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9096 *total = COSTS_N_INSNS (1);
9097 else
9098 *total = COSTS_N_INSNS (4);
9099 return true;
9101 case HIGH:
9102 case LO_SUM:
9103 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9104 cost of these slightly. */
9105 *total = COSTS_N_INSNS (1) + 1;
9106 return true;
9108 case SET:
9109 return false;
9111 default:
9112 if (mode != VOIDmode)
9113 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9114 else
9115 *total = COSTS_N_INSNS (4); /* How knows? */
9116 return false;
9120 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9121 operand, then return the operand that is being shifted. If the shift
9122 is not by a constant, then set SHIFT_REG to point to the operand.
9123 Return NULL if OP is not a shifter operand. */
9124 static rtx
9125 shifter_op_p (rtx op, rtx *shift_reg)
9127 enum rtx_code code = GET_CODE (op);
9129 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9130 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9131 return XEXP (op, 0);
9132 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9133 return XEXP (op, 0);
9134 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9135 || code == ASHIFTRT)
9137 if (!CONST_INT_P (XEXP (op, 1)))
9138 *shift_reg = XEXP (op, 1);
9139 return XEXP (op, 0);
9142 return NULL;
9145 static bool
9146 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9148 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9149 gcc_assert (GET_CODE (x) == UNSPEC);
9151 switch (XINT (x, 1))
9153 case UNSPEC_UNALIGNED_LOAD:
9154 /* We can only do unaligned loads into the integer unit, and we can't
9155 use LDM or LDRD. */
9156 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9157 if (speed_p)
9158 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9159 + extra_cost->ldst.load_unaligned);
9161 #ifdef NOT_YET
9162 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9163 ADDR_SPACE_GENERIC, speed_p);
9164 #endif
9165 return true;
9167 case UNSPEC_UNALIGNED_STORE:
9168 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9169 if (speed_p)
9170 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9171 + extra_cost->ldst.store_unaligned);
9173 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9174 #ifdef NOT_YET
9175 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9176 ADDR_SPACE_GENERIC, speed_p);
9177 #endif
9178 return true;
9180 case UNSPEC_VRINTZ:
9181 case UNSPEC_VRINTP:
9182 case UNSPEC_VRINTM:
9183 case UNSPEC_VRINTR:
9184 case UNSPEC_VRINTX:
9185 case UNSPEC_VRINTA:
9186 *cost = COSTS_N_INSNS (1);
9187 if (speed_p)
9188 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9190 return true;
9191 default:
9192 *cost = COSTS_N_INSNS (2);
9193 break;
9195 return false;
9198 /* Cost of a libcall. We assume one insn per argument, an amount for the
9199 call (one insn for -Os) and then one for processing the result. */
9200 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9202 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9203 do \
9205 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9206 if (shift_op != NULL \
9207 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9209 if (shift_reg) \
9211 if (speed_p) \
9212 *cost += extra_cost->alu.arith_shift_reg; \
9213 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9215 else if (speed_p) \
9216 *cost += extra_cost->alu.arith_shift; \
9218 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9219 + rtx_cost (XEXP (x, 1 - IDX), \
9220 OP, 1, speed_p)); \
9221 return true; \
9224 while (0);
9226 /* RTX costs. Make an estimate of the cost of executing the operation
9227 X, which is contained with an operation with code OUTER_CODE.
9228 SPEED_P indicates whether the cost desired is the performance cost,
9229 or the size cost. The estimate is stored in COST and the return
9230 value is TRUE if the cost calculation is final, or FALSE if the
9231 caller should recurse through the operands of X to add additional
9232 costs.
9234 We currently make no attempt to model the size savings of Thumb-2
9235 16-bit instructions. At the normal points in compilation where
9236 this code is called we have no measure of whether the condition
9237 flags are live or not, and thus no realistic way to determine what
9238 the size will eventually be. */
9239 static bool
9240 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9241 const struct cpu_cost_table *extra_cost,
9242 int *cost, bool speed_p)
9244 enum machine_mode mode = GET_MODE (x);
9246 if (TARGET_THUMB1)
9248 if (speed_p)
9249 *cost = thumb1_rtx_costs (x, code, outer_code);
9250 else
9251 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9252 return true;
9255 switch (code)
9257 case SET:
9258 *cost = 0;
9259 /* SET RTXs don't have a mode so we get it from the destination. */
9260 mode = GET_MODE (SET_DEST (x));
9262 if (REG_P (SET_SRC (x))
9263 && REG_P (SET_DEST (x)))
9265 /* Assume that most copies can be done with a single insn,
9266 unless we don't have HW FP, in which case everything
9267 larger than word mode will require two insns. */
9268 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9269 && GET_MODE_SIZE (mode) > 4)
9270 || mode == DImode)
9271 ? 2 : 1);
9272 /* Conditional register moves can be encoded
9273 in 16 bits in Thumb mode. */
9274 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9275 *cost >>= 1;
9277 return true;
9280 if (CONST_INT_P (SET_SRC (x)))
9282 /* Handle CONST_INT here, since the value doesn't have a mode
9283 and we would otherwise be unable to work out the true cost. */
9284 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9285 outer_code = SET;
9286 /* Slightly lower the cost of setting a core reg to a constant.
9287 This helps break up chains and allows for better scheduling. */
9288 if (REG_P (SET_DEST (x))
9289 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9290 *cost -= 1;
9291 x = SET_SRC (x);
9292 /* Immediate moves with an immediate in the range [0, 255] can be
9293 encoded in 16 bits in Thumb mode. */
9294 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9295 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9296 *cost >>= 1;
9297 goto const_int_cost;
9300 return false;
9302 case MEM:
9303 /* A memory access costs 1 insn if the mode is small, or the address is
9304 a single register, otherwise it costs one insn per word. */
9305 if (REG_P (XEXP (x, 0)))
9306 *cost = COSTS_N_INSNS (1);
9307 else if (flag_pic
9308 && GET_CODE (XEXP (x, 0)) == PLUS
9309 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9310 /* This will be split into two instructions.
9311 See arm.md:calculate_pic_address. */
9312 *cost = COSTS_N_INSNS (2);
9313 else
9314 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9316 /* For speed optimizations, add the costs of the address and
9317 accessing memory. */
9318 if (speed_p)
9319 #ifdef NOT_YET
9320 *cost += (extra_cost->ldst.load
9321 + arm_address_cost (XEXP (x, 0), mode,
9322 ADDR_SPACE_GENERIC, speed_p));
9323 #else
9324 *cost += extra_cost->ldst.load;
9325 #endif
9326 return true;
9328 case PARALLEL:
9330 /* Calculations of LDM costs are complex. We assume an initial cost
9331 (ldm_1st) which will load the number of registers mentioned in
9332 ldm_regs_per_insn_1st registers; then each additional
9333 ldm_regs_per_insn_subsequent registers cost one more insn. The
9334 formula for N regs is thus:
9336 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9337 + ldm_regs_per_insn_subsequent - 1)
9338 / ldm_regs_per_insn_subsequent).
9340 Additional costs may also be added for addressing. A similar
9341 formula is used for STM. */
9343 bool is_ldm = load_multiple_operation (x, SImode);
9344 bool is_stm = store_multiple_operation (x, SImode);
9346 *cost = COSTS_N_INSNS (1);
9348 if (is_ldm || is_stm)
9350 if (speed_p)
9352 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9353 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9354 ? extra_cost->ldst.ldm_regs_per_insn_1st
9355 : extra_cost->ldst.stm_regs_per_insn_1st;
9356 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9357 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9358 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9360 *cost += regs_per_insn_1st
9361 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9362 + regs_per_insn_sub - 1)
9363 / regs_per_insn_sub);
9364 return true;
9368 return false;
9370 case DIV:
9371 case UDIV:
9372 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9373 && (mode == SFmode || !TARGET_VFP_SINGLE))
9374 *cost = COSTS_N_INSNS (speed_p
9375 ? extra_cost->fp[mode != SFmode].div : 1);
9376 else if (mode == SImode && TARGET_IDIV)
9377 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9378 else
9379 *cost = LIBCALL_COST (2);
9380 return false; /* All arguments must be in registers. */
9382 case MOD:
9383 case UMOD:
9384 *cost = LIBCALL_COST (2);
9385 return false; /* All arguments must be in registers. */
9387 case ROTATE:
9388 if (mode == SImode && REG_P (XEXP (x, 1)))
9390 *cost = (COSTS_N_INSNS (2)
9391 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9392 if (speed_p)
9393 *cost += extra_cost->alu.shift_reg;
9394 return true;
9396 /* Fall through */
9397 case ROTATERT:
9398 case ASHIFT:
9399 case LSHIFTRT:
9400 case ASHIFTRT:
9401 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9403 *cost = (COSTS_N_INSNS (3)
9404 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9405 if (speed_p)
9406 *cost += 2 * extra_cost->alu.shift;
9407 return true;
9409 else if (mode == SImode)
9411 *cost = (COSTS_N_INSNS (1)
9412 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9413 /* Slightly disparage register shifts at -Os, but not by much. */
9414 if (!CONST_INT_P (XEXP (x, 1)))
9415 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9416 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9417 return true;
9419 else if (GET_MODE_CLASS (mode) == MODE_INT
9420 && GET_MODE_SIZE (mode) < 4)
9422 if (code == ASHIFT)
9424 *cost = (COSTS_N_INSNS (1)
9425 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9426 /* Slightly disparage register shifts at -Os, but not by
9427 much. */
9428 if (!CONST_INT_P (XEXP (x, 1)))
9429 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9430 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9432 else if (code == LSHIFTRT || code == ASHIFTRT)
9434 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9436 /* Can use SBFX/UBFX. */
9437 *cost = COSTS_N_INSNS (1);
9438 if (speed_p)
9439 *cost += extra_cost->alu.bfx;
9440 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9442 else
9444 *cost = COSTS_N_INSNS (2);
9445 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9446 if (speed_p)
9448 if (CONST_INT_P (XEXP (x, 1)))
9449 *cost += 2 * extra_cost->alu.shift;
9450 else
9451 *cost += (extra_cost->alu.shift
9452 + extra_cost->alu.shift_reg);
9454 else
9455 /* Slightly disparage register shifts. */
9456 *cost += !CONST_INT_P (XEXP (x, 1));
9459 else /* Rotates. */
9461 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9462 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9463 if (speed_p)
9465 if (CONST_INT_P (XEXP (x, 1)))
9466 *cost += (2 * extra_cost->alu.shift
9467 + extra_cost->alu.log_shift);
9468 else
9469 *cost += (extra_cost->alu.shift
9470 + extra_cost->alu.shift_reg
9471 + extra_cost->alu.log_shift_reg);
9474 return true;
9477 *cost = LIBCALL_COST (2);
9478 return false;
9480 case BSWAP:
9481 if (arm_arch6)
9483 if (mode == SImode)
9485 *cost = COSTS_N_INSNS (1);
9486 if (speed_p)
9487 *cost += extra_cost->alu.rev;
9489 return false;
9492 else
9494 /* No rev instruction available. Look at arm_legacy_rev
9495 and thumb_legacy_rev for the form of RTL used then. */
9496 if (TARGET_THUMB)
9498 *cost = COSTS_N_INSNS (10);
9500 if (speed_p)
9502 *cost += 6 * extra_cost->alu.shift;
9503 *cost += 3 * extra_cost->alu.logical;
9506 else
9508 *cost = COSTS_N_INSNS (5);
9510 if (speed_p)
9512 *cost += 2 * extra_cost->alu.shift;
9513 *cost += extra_cost->alu.arith_shift;
9514 *cost += 2 * extra_cost->alu.logical;
9517 return true;
9519 return false;
9521 case MINUS:
9522 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9523 && (mode == SFmode || !TARGET_VFP_SINGLE))
9525 *cost = COSTS_N_INSNS (1);
9526 if (GET_CODE (XEXP (x, 0)) == MULT
9527 || GET_CODE (XEXP (x, 1)) == MULT)
9529 rtx mul_op0, mul_op1, sub_op;
9531 if (speed_p)
9532 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9534 if (GET_CODE (XEXP (x, 0)) == MULT)
9536 mul_op0 = XEXP (XEXP (x, 0), 0);
9537 mul_op1 = XEXP (XEXP (x, 0), 1);
9538 sub_op = XEXP (x, 1);
9540 else
9542 mul_op0 = XEXP (XEXP (x, 1), 0);
9543 mul_op1 = XEXP (XEXP (x, 1), 1);
9544 sub_op = XEXP (x, 0);
9547 /* The first operand of the multiply may be optionally
9548 negated. */
9549 if (GET_CODE (mul_op0) == NEG)
9550 mul_op0 = XEXP (mul_op0, 0);
9552 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9553 + rtx_cost (mul_op1, code, 0, speed_p)
9554 + rtx_cost (sub_op, code, 0, speed_p));
9556 return true;
9559 if (speed_p)
9560 *cost += extra_cost->fp[mode != SFmode].addsub;
9561 return false;
9564 if (mode == SImode)
9566 rtx shift_by_reg = NULL;
9567 rtx shift_op;
9568 rtx non_shift_op;
9570 *cost = COSTS_N_INSNS (1);
9572 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9573 if (shift_op == NULL)
9575 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9576 non_shift_op = XEXP (x, 0);
9578 else
9579 non_shift_op = XEXP (x, 1);
9581 if (shift_op != NULL)
9583 if (shift_by_reg != NULL)
9585 if (speed_p)
9586 *cost += extra_cost->alu.arith_shift_reg;
9587 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9589 else if (speed_p)
9590 *cost += extra_cost->alu.arith_shift;
9592 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9593 + rtx_cost (non_shift_op, code, 0, speed_p));
9594 return true;
9597 if (arm_arch_thumb2
9598 && GET_CODE (XEXP (x, 1)) == MULT)
9600 /* MLS. */
9601 if (speed_p)
9602 *cost += extra_cost->mult[0].add;
9603 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9604 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9605 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9606 return true;
9609 if (CONST_INT_P (XEXP (x, 0)))
9611 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9612 INTVAL (XEXP (x, 0)), NULL_RTX,
9613 NULL_RTX, 1, 0);
9614 *cost = COSTS_N_INSNS (insns);
9615 if (speed_p)
9616 *cost += insns * extra_cost->alu.arith;
9617 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9618 return true;
9621 return false;
9624 if (GET_MODE_CLASS (mode) == MODE_INT
9625 && GET_MODE_SIZE (mode) < 4)
9627 rtx shift_op, shift_reg;
9628 shift_reg = NULL;
9630 /* We check both sides of the MINUS for shifter operands since,
9631 unlike PLUS, it's not commutative. */
9633 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9634 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9636 /* Slightly disparage, as we might need to widen the result. */
9637 *cost = 1 + COSTS_N_INSNS (1);
9638 if (speed_p)
9639 *cost += extra_cost->alu.arith;
9641 if (CONST_INT_P (XEXP (x, 0)))
9643 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9644 return true;
9647 return false;
9650 if (mode == DImode)
9652 *cost = COSTS_N_INSNS (2);
9654 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9656 rtx op1 = XEXP (x, 1);
9658 if (speed_p)
9659 *cost += 2 * extra_cost->alu.arith;
9661 if (GET_CODE (op1) == ZERO_EXTEND)
9662 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9663 else
9664 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9665 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9666 0, speed_p);
9667 return true;
9669 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9671 if (speed_p)
9672 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9673 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9674 0, speed_p)
9675 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9676 return true;
9678 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9679 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9681 if (speed_p)
9682 *cost += (extra_cost->alu.arith
9683 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9684 ? extra_cost->alu.arith
9685 : extra_cost->alu.arith_shift));
9686 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9687 + rtx_cost (XEXP (XEXP (x, 1), 0),
9688 GET_CODE (XEXP (x, 1)), 0, speed_p));
9689 return true;
9692 if (speed_p)
9693 *cost += 2 * extra_cost->alu.arith;
9694 return false;
9697 /* Vector mode? */
9699 *cost = LIBCALL_COST (2);
9700 return false;
9702 case PLUS:
9703 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9704 && (mode == SFmode || !TARGET_VFP_SINGLE))
9706 *cost = COSTS_N_INSNS (1);
9707 if (GET_CODE (XEXP (x, 0)) == MULT)
9709 rtx mul_op0, mul_op1, add_op;
9711 if (speed_p)
9712 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9714 mul_op0 = XEXP (XEXP (x, 0), 0);
9715 mul_op1 = XEXP (XEXP (x, 0), 1);
9716 add_op = XEXP (x, 1);
9718 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9719 + rtx_cost (mul_op1, code, 0, speed_p)
9720 + rtx_cost (add_op, code, 0, speed_p));
9722 return true;
9725 if (speed_p)
9726 *cost += extra_cost->fp[mode != SFmode].addsub;
9727 return false;
9729 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9731 *cost = LIBCALL_COST (2);
9732 return false;
9735 /* Narrow modes can be synthesized in SImode, but the range
9736 of useful sub-operations is limited. Check for shift operations
9737 on one of the operands. Only left shifts can be used in the
9738 narrow modes. */
9739 if (GET_MODE_CLASS (mode) == MODE_INT
9740 && GET_MODE_SIZE (mode) < 4)
9742 rtx shift_op, shift_reg;
9743 shift_reg = NULL;
9745 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9747 if (CONST_INT_P (XEXP (x, 1)))
9749 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9750 INTVAL (XEXP (x, 1)), NULL_RTX,
9751 NULL_RTX, 1, 0);
9752 *cost = COSTS_N_INSNS (insns);
9753 if (speed_p)
9754 *cost += insns * extra_cost->alu.arith;
9755 /* Slightly penalize a narrow operation as the result may
9756 need widening. */
9757 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9758 return true;
9761 /* Slightly penalize a narrow operation as the result may
9762 need widening. */
9763 *cost = 1 + COSTS_N_INSNS (1);
9764 if (speed_p)
9765 *cost += extra_cost->alu.arith;
9767 return false;
9770 if (mode == SImode)
9772 rtx shift_op, shift_reg;
9774 *cost = COSTS_N_INSNS (1);
9775 if (TARGET_INT_SIMD
9776 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9777 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9779 /* UXTA[BH] or SXTA[BH]. */
9780 if (speed_p)
9781 *cost += extra_cost->alu.extend_arith;
9782 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9783 speed_p)
9784 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9785 return true;
9788 shift_reg = NULL;
9789 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9790 if (shift_op != NULL)
9792 if (shift_reg)
9794 if (speed_p)
9795 *cost += extra_cost->alu.arith_shift_reg;
9796 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9798 else if (speed_p)
9799 *cost += extra_cost->alu.arith_shift;
9801 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9802 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9803 return true;
9805 if (GET_CODE (XEXP (x, 0)) == MULT)
9807 rtx mul_op = XEXP (x, 0);
9809 *cost = COSTS_N_INSNS (1);
9811 if (TARGET_DSP_MULTIPLY
9812 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9813 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9814 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9815 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9816 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9817 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9818 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9819 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9820 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9821 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9822 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9823 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9824 == 16))))))
9826 /* SMLA[BT][BT]. */
9827 if (speed_p)
9828 *cost += extra_cost->mult[0].extend_add;
9829 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9830 SIGN_EXTEND, 0, speed_p)
9831 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9832 SIGN_EXTEND, 0, speed_p)
9833 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9834 return true;
9837 if (speed_p)
9838 *cost += extra_cost->mult[0].add;
9839 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9840 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9841 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9842 return true;
9844 if (CONST_INT_P (XEXP (x, 1)))
9846 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9847 INTVAL (XEXP (x, 1)), NULL_RTX,
9848 NULL_RTX, 1, 0);
9849 *cost = COSTS_N_INSNS (insns);
9850 if (speed_p)
9851 *cost += insns * extra_cost->alu.arith;
9852 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9853 return true;
9855 return false;
9858 if (mode == DImode)
9860 if (arm_arch3m
9861 && GET_CODE (XEXP (x, 0)) == MULT
9862 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9863 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9864 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9865 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9867 *cost = COSTS_N_INSNS (1);
9868 if (speed_p)
9869 *cost += extra_cost->mult[1].extend_add;
9870 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9871 ZERO_EXTEND, 0, speed_p)
9872 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9873 ZERO_EXTEND, 0, speed_p)
9874 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9875 return true;
9878 *cost = COSTS_N_INSNS (2);
9880 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9881 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9883 if (speed_p)
9884 *cost += (extra_cost->alu.arith
9885 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9886 ? extra_cost->alu.arith
9887 : extra_cost->alu.arith_shift));
9889 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9890 speed_p)
9891 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9892 return true;
9895 if (speed_p)
9896 *cost += 2 * extra_cost->alu.arith;
9897 return false;
9900 /* Vector mode? */
9901 *cost = LIBCALL_COST (2);
9902 return false;
9903 case IOR:
9904 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9906 *cost = COSTS_N_INSNS (1);
9907 if (speed_p)
9908 *cost += extra_cost->alu.rev;
9910 return true;
9912 /* Fall through. */
9913 case AND: case XOR:
9914 if (mode == SImode)
9916 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9917 rtx op0 = XEXP (x, 0);
9918 rtx shift_op, shift_reg;
9920 *cost = COSTS_N_INSNS (1);
9922 if (subcode == NOT
9923 && (code == AND
9924 || (code == IOR && TARGET_THUMB2)))
9925 op0 = XEXP (op0, 0);
9927 shift_reg = NULL;
9928 shift_op = shifter_op_p (op0, &shift_reg);
9929 if (shift_op != NULL)
9931 if (shift_reg)
9933 if (speed_p)
9934 *cost += extra_cost->alu.log_shift_reg;
9935 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9937 else if (speed_p)
9938 *cost += extra_cost->alu.log_shift;
9940 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9941 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9942 return true;
9945 if (CONST_INT_P (XEXP (x, 1)))
9947 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9948 INTVAL (XEXP (x, 1)), NULL_RTX,
9949 NULL_RTX, 1, 0);
9951 *cost = COSTS_N_INSNS (insns);
9952 if (speed_p)
9953 *cost += insns * extra_cost->alu.logical;
9954 *cost += rtx_cost (op0, code, 0, speed_p);
9955 return true;
9958 if (speed_p)
9959 *cost += extra_cost->alu.logical;
9960 *cost += (rtx_cost (op0, code, 0, speed_p)
9961 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9962 return true;
9965 if (mode == DImode)
9967 rtx op0 = XEXP (x, 0);
9968 enum rtx_code subcode = GET_CODE (op0);
9970 *cost = COSTS_N_INSNS (2);
9972 if (subcode == NOT
9973 && (code == AND
9974 || (code == IOR && TARGET_THUMB2)))
9975 op0 = XEXP (op0, 0);
9977 if (GET_CODE (op0) == ZERO_EXTEND)
9979 if (speed_p)
9980 *cost += 2 * extra_cost->alu.logical;
9982 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9983 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9984 return true;
9986 else if (GET_CODE (op0) == SIGN_EXTEND)
9988 if (speed_p)
9989 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9991 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9992 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9993 return true;
9996 if (speed_p)
9997 *cost += 2 * extra_cost->alu.logical;
9999 return true;
10001 /* Vector mode? */
10003 *cost = LIBCALL_COST (2);
10004 return false;
10006 case MULT:
10007 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10008 && (mode == SFmode || !TARGET_VFP_SINGLE))
10010 rtx op0 = XEXP (x, 0);
10012 *cost = COSTS_N_INSNS (1);
10014 if (GET_CODE (op0) == NEG)
10015 op0 = XEXP (op0, 0);
10017 if (speed_p)
10018 *cost += extra_cost->fp[mode != SFmode].mult;
10020 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10021 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10022 return true;
10024 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10026 *cost = LIBCALL_COST (2);
10027 return false;
10030 if (mode == SImode)
10032 *cost = COSTS_N_INSNS (1);
10033 if (TARGET_DSP_MULTIPLY
10034 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10035 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10036 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10037 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10038 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10039 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10040 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10041 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10042 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10043 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10044 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10045 && (INTVAL (XEXP (XEXP (x, 1), 1))
10046 == 16))))))
10048 /* SMUL[TB][TB]. */
10049 if (speed_p)
10050 *cost += extra_cost->mult[0].extend;
10051 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10052 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10053 return true;
10055 if (speed_p)
10056 *cost += extra_cost->mult[0].simple;
10057 return false;
10060 if (mode == DImode)
10062 if (arm_arch3m
10063 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10064 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10065 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10066 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10068 *cost = COSTS_N_INSNS (1);
10069 if (speed_p)
10070 *cost += extra_cost->mult[1].extend;
10071 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10072 ZERO_EXTEND, 0, speed_p)
10073 + rtx_cost (XEXP (XEXP (x, 1), 0),
10074 ZERO_EXTEND, 0, speed_p));
10075 return true;
10078 *cost = LIBCALL_COST (2);
10079 return false;
10082 /* Vector mode? */
10083 *cost = LIBCALL_COST (2);
10084 return false;
10086 case NEG:
10087 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10088 && (mode == SFmode || !TARGET_VFP_SINGLE))
10090 *cost = COSTS_N_INSNS (1);
10091 if (speed_p)
10092 *cost += extra_cost->fp[mode != SFmode].neg;
10094 return false;
10096 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10098 *cost = LIBCALL_COST (1);
10099 return false;
10102 if (mode == SImode)
10104 if (GET_CODE (XEXP (x, 0)) == ABS)
10106 *cost = COSTS_N_INSNS (2);
10107 /* Assume the non-flag-changing variant. */
10108 if (speed_p)
10109 *cost += (extra_cost->alu.log_shift
10110 + extra_cost->alu.arith_shift);
10111 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10112 return true;
10115 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10116 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10118 *cost = COSTS_N_INSNS (2);
10119 /* No extra cost for MOV imm and MVN imm. */
10120 /* If the comparison op is using the flags, there's no further
10121 cost, otherwise we need to add the cost of the comparison. */
10122 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10123 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10124 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10126 *cost += (COSTS_N_INSNS (1)
10127 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10128 speed_p)
10129 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10130 speed_p));
10131 if (speed_p)
10132 *cost += extra_cost->alu.arith;
10134 return true;
10136 *cost = COSTS_N_INSNS (1);
10137 if (speed_p)
10138 *cost += extra_cost->alu.arith;
10139 return false;
10142 if (GET_MODE_CLASS (mode) == MODE_INT
10143 && GET_MODE_SIZE (mode) < 4)
10145 /* Slightly disparage, as we might need an extend operation. */
10146 *cost = 1 + COSTS_N_INSNS (1);
10147 if (speed_p)
10148 *cost += extra_cost->alu.arith;
10149 return false;
10152 if (mode == DImode)
10154 *cost = COSTS_N_INSNS (2);
10155 if (speed_p)
10156 *cost += 2 * extra_cost->alu.arith;
10157 return false;
10160 /* Vector mode? */
10161 *cost = LIBCALL_COST (1);
10162 return false;
10164 case NOT:
10165 if (mode == SImode)
10167 rtx shift_op;
10168 rtx shift_reg = NULL;
10170 *cost = COSTS_N_INSNS (1);
10171 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10173 if (shift_op)
10175 if (shift_reg != NULL)
10177 if (speed_p)
10178 *cost += extra_cost->alu.log_shift_reg;
10179 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10181 else if (speed_p)
10182 *cost += extra_cost->alu.log_shift;
10183 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10184 return true;
10187 if (speed_p)
10188 *cost += extra_cost->alu.logical;
10189 return false;
10191 if (mode == DImode)
10193 *cost = COSTS_N_INSNS (2);
10194 return false;
10197 /* Vector mode? */
10199 *cost += LIBCALL_COST (1);
10200 return false;
10202 case IF_THEN_ELSE:
10204 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10206 *cost = COSTS_N_INSNS (4);
10207 return true;
10209 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10210 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10212 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10213 /* Assume that if one arm of the if_then_else is a register,
10214 that it will be tied with the result and eliminate the
10215 conditional insn. */
10216 if (REG_P (XEXP (x, 1)))
10217 *cost += op2cost;
10218 else if (REG_P (XEXP (x, 2)))
10219 *cost += op1cost;
10220 else
10222 if (speed_p)
10224 if (extra_cost->alu.non_exec_costs_exec)
10225 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10226 else
10227 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10229 else
10230 *cost += op1cost + op2cost;
10233 return true;
10235 case COMPARE:
10236 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10237 *cost = 0;
10238 else
10240 enum machine_mode op0mode;
10241 /* We'll mostly assume that the cost of a compare is the cost of the
10242 LHS. However, there are some notable exceptions. */
10244 /* Floating point compares are never done as side-effects. */
10245 op0mode = GET_MODE (XEXP (x, 0));
10246 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10247 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10249 *cost = COSTS_N_INSNS (1);
10250 if (speed_p)
10251 *cost += extra_cost->fp[op0mode != SFmode].compare;
10253 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10255 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10256 return true;
10259 return false;
10261 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10263 *cost = LIBCALL_COST (2);
10264 return false;
10267 /* DImode compares normally take two insns. */
10268 if (op0mode == DImode)
10270 *cost = COSTS_N_INSNS (2);
10271 if (speed_p)
10272 *cost += 2 * extra_cost->alu.arith;
10273 return false;
10276 if (op0mode == SImode)
10278 rtx shift_op;
10279 rtx shift_reg;
10281 if (XEXP (x, 1) == const0_rtx
10282 && !(REG_P (XEXP (x, 0))
10283 || (GET_CODE (XEXP (x, 0)) == SUBREG
10284 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10286 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10288 /* Multiply operations that set the flags are often
10289 significantly more expensive. */
10290 if (speed_p
10291 && GET_CODE (XEXP (x, 0)) == MULT
10292 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10293 *cost += extra_cost->mult[0].flag_setting;
10295 if (speed_p
10296 && GET_CODE (XEXP (x, 0)) == PLUS
10297 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10298 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10299 0), 1), mode))
10300 *cost += extra_cost->mult[0].flag_setting;
10301 return true;
10304 shift_reg = NULL;
10305 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10306 if (shift_op != NULL)
10308 *cost = COSTS_N_INSNS (1);
10309 if (shift_reg != NULL)
10311 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10312 if (speed_p)
10313 *cost += extra_cost->alu.arith_shift_reg;
10315 else if (speed_p)
10316 *cost += extra_cost->alu.arith_shift;
10317 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10318 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10319 return true;
10322 *cost = COSTS_N_INSNS (1);
10323 if (speed_p)
10324 *cost += extra_cost->alu.arith;
10325 if (CONST_INT_P (XEXP (x, 1))
10326 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10328 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10329 return true;
10331 return false;
10334 /* Vector mode? */
10336 *cost = LIBCALL_COST (2);
10337 return false;
10339 return true;
10341 case EQ:
10342 case NE:
10343 case LT:
10344 case LE:
10345 case GT:
10346 case GE:
10347 case LTU:
10348 case LEU:
10349 case GEU:
10350 case GTU:
10351 case ORDERED:
10352 case UNORDERED:
10353 case UNEQ:
10354 case UNLE:
10355 case UNLT:
10356 case UNGE:
10357 case UNGT:
10358 case LTGT:
10359 if (outer_code == SET)
10361 /* Is it a store-flag operation? */
10362 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10363 && XEXP (x, 1) == const0_rtx)
10365 /* Thumb also needs an IT insn. */
10366 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10367 return true;
10369 if (XEXP (x, 1) == const0_rtx)
10371 switch (code)
10373 case LT:
10374 /* LSR Rd, Rn, #31. */
10375 *cost = COSTS_N_INSNS (1);
10376 if (speed_p)
10377 *cost += extra_cost->alu.shift;
10378 break;
10380 case EQ:
10381 /* RSBS T1, Rn, #0
10382 ADC Rd, Rn, T1. */
10384 case NE:
10385 /* SUBS T1, Rn, #1
10386 SBC Rd, Rn, T1. */
10387 *cost = COSTS_N_INSNS (2);
10388 break;
10390 case LE:
10391 /* RSBS T1, Rn, Rn, LSR #31
10392 ADC Rd, Rn, T1. */
10393 *cost = COSTS_N_INSNS (2);
10394 if (speed_p)
10395 *cost += extra_cost->alu.arith_shift;
10396 break;
10398 case GT:
10399 /* RSB Rd, Rn, Rn, ASR #1
10400 LSR Rd, Rd, #31. */
10401 *cost = COSTS_N_INSNS (2);
10402 if (speed_p)
10403 *cost += (extra_cost->alu.arith_shift
10404 + extra_cost->alu.shift);
10405 break;
10407 case GE:
10408 /* ASR Rd, Rn, #31
10409 ADD Rd, Rn, #1. */
10410 *cost = COSTS_N_INSNS (2);
10411 if (speed_p)
10412 *cost += extra_cost->alu.shift;
10413 break;
10415 default:
10416 /* Remaining cases are either meaningless or would take
10417 three insns anyway. */
10418 *cost = COSTS_N_INSNS (3);
10419 break;
10421 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10422 return true;
10424 else
10426 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10427 if (CONST_INT_P (XEXP (x, 1))
10428 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10430 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10431 return true;
10434 return false;
10437 /* Not directly inside a set. If it involves the condition code
10438 register it must be the condition for a branch, cond_exec or
10439 I_T_E operation. Since the comparison is performed elsewhere
10440 this is just the control part which has no additional
10441 cost. */
10442 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10443 && XEXP (x, 1) == const0_rtx)
10445 *cost = 0;
10446 return true;
10448 return false;
10450 case ABS:
10451 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10452 && (mode == SFmode || !TARGET_VFP_SINGLE))
10454 *cost = COSTS_N_INSNS (1);
10455 if (speed_p)
10456 *cost += extra_cost->fp[mode != SFmode].neg;
10458 return false;
10460 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10462 *cost = LIBCALL_COST (1);
10463 return false;
10466 if (mode == SImode)
10468 *cost = COSTS_N_INSNS (1);
10469 if (speed_p)
10470 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10471 return false;
10473 /* Vector mode? */
10474 *cost = LIBCALL_COST (1);
10475 return false;
10477 case SIGN_EXTEND:
10478 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10479 && MEM_P (XEXP (x, 0)))
10481 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10483 if (mode == DImode)
10484 *cost += COSTS_N_INSNS (1);
10486 if (!speed_p)
10487 return true;
10489 if (GET_MODE (XEXP (x, 0)) == SImode)
10490 *cost += extra_cost->ldst.load;
10491 else
10492 *cost += extra_cost->ldst.load_sign_extend;
10494 if (mode == DImode)
10495 *cost += extra_cost->alu.shift;
10497 return true;
10500 /* Widening from less than 32-bits requires an extend operation. */
10501 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10503 /* We have SXTB/SXTH. */
10504 *cost = COSTS_N_INSNS (1);
10505 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10506 if (speed_p)
10507 *cost += extra_cost->alu.extend;
10509 else if (GET_MODE (XEXP (x, 0)) != SImode)
10511 /* Needs two shifts. */
10512 *cost = COSTS_N_INSNS (2);
10513 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10514 if (speed_p)
10515 *cost += 2 * extra_cost->alu.shift;
10518 /* Widening beyond 32-bits requires one more insn. */
10519 if (mode == DImode)
10521 *cost += COSTS_N_INSNS (1);
10522 if (speed_p)
10523 *cost += extra_cost->alu.shift;
10526 return true;
10528 case ZERO_EXTEND:
10529 if ((arm_arch4
10530 || GET_MODE (XEXP (x, 0)) == SImode
10531 || GET_MODE (XEXP (x, 0)) == QImode)
10532 && MEM_P (XEXP (x, 0)))
10534 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10536 if (mode == DImode)
10537 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10539 return true;
10542 /* Widening from less than 32-bits requires an extend operation. */
10543 if (GET_MODE (XEXP (x, 0)) == QImode)
10545 /* UXTB can be a shorter instruction in Thumb2, but it might
10546 be slower than the AND Rd, Rn, #255 alternative. When
10547 optimizing for speed it should never be slower to use
10548 AND, and we don't really model 16-bit vs 32-bit insns
10549 here. */
10550 *cost = COSTS_N_INSNS (1);
10551 if (speed_p)
10552 *cost += extra_cost->alu.logical;
10554 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10556 /* We have UXTB/UXTH. */
10557 *cost = COSTS_N_INSNS (1);
10558 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10559 if (speed_p)
10560 *cost += extra_cost->alu.extend;
10562 else if (GET_MODE (XEXP (x, 0)) != SImode)
10564 /* Needs two shifts. It's marginally preferable to use
10565 shifts rather than two BIC instructions as the second
10566 shift may merge with a subsequent insn as a shifter
10567 op. */
10568 *cost = COSTS_N_INSNS (2);
10569 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10570 if (speed_p)
10571 *cost += 2 * extra_cost->alu.shift;
10573 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10574 *cost = COSTS_N_INSNS (1);
10576 /* Widening beyond 32-bits requires one more insn. */
10577 if (mode == DImode)
10579 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10582 return true;
10584 case CONST_INT:
10585 *cost = 0;
10586 /* CONST_INT has no mode, so we cannot tell for sure how many
10587 insns are really going to be needed. The best we can do is
10588 look at the value passed. If it fits in SImode, then assume
10589 that's the mode it will be used for. Otherwise assume it
10590 will be used in DImode. */
10591 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10592 mode = SImode;
10593 else
10594 mode = DImode;
10596 /* Avoid blowing up in arm_gen_constant (). */
10597 if (!(outer_code == PLUS
10598 || outer_code == AND
10599 || outer_code == IOR
10600 || outer_code == XOR
10601 || outer_code == MINUS))
10602 outer_code = SET;
10604 const_int_cost:
10605 if (mode == SImode)
10607 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10608 INTVAL (x), NULL, NULL,
10609 0, 0));
10610 /* Extra costs? */
10612 else
10614 *cost += COSTS_N_INSNS (arm_gen_constant
10615 (outer_code, SImode, NULL,
10616 trunc_int_for_mode (INTVAL (x), SImode),
10617 NULL, NULL, 0, 0)
10618 + arm_gen_constant (outer_code, SImode, NULL,
10619 INTVAL (x) >> 32, NULL,
10620 NULL, 0, 0));
10621 /* Extra costs? */
10624 return true;
10626 case CONST:
10627 case LABEL_REF:
10628 case SYMBOL_REF:
10629 if (speed_p)
10631 if (arm_arch_thumb2 && !flag_pic)
10632 *cost = COSTS_N_INSNS (2);
10633 else
10634 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10636 else
10637 *cost = COSTS_N_INSNS (2);
10639 if (flag_pic)
10641 *cost += COSTS_N_INSNS (1);
10642 if (speed_p)
10643 *cost += extra_cost->alu.arith;
10646 return true;
10648 case CONST_FIXED:
10649 *cost = COSTS_N_INSNS (4);
10650 /* Fixme. */
10651 return true;
10653 case CONST_DOUBLE:
10654 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10655 && (mode == SFmode || !TARGET_VFP_SINGLE))
10657 if (vfp3_const_double_rtx (x))
10659 *cost = COSTS_N_INSNS (1);
10660 if (speed_p)
10661 *cost += extra_cost->fp[mode == DFmode].fpconst;
10662 return true;
10665 if (speed_p)
10667 *cost = COSTS_N_INSNS (1);
10668 if (mode == DFmode)
10669 *cost += extra_cost->ldst.loadd;
10670 else
10671 *cost += extra_cost->ldst.loadf;
10673 else
10674 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10676 return true;
10678 *cost = COSTS_N_INSNS (4);
10679 return true;
10681 case CONST_VECTOR:
10682 /* Fixme. */
10683 if (TARGET_NEON
10684 && TARGET_HARD_FLOAT
10685 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10686 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10687 *cost = COSTS_N_INSNS (1);
10688 else
10689 *cost = COSTS_N_INSNS (4);
10690 return true;
10692 case HIGH:
10693 case LO_SUM:
10694 *cost = COSTS_N_INSNS (1);
10695 /* When optimizing for size, we prefer constant pool entries to
10696 MOVW/MOVT pairs, so bump the cost of these slightly. */
10697 if (!speed_p)
10698 *cost += 1;
10699 return true;
10701 case CLZ:
10702 *cost = COSTS_N_INSNS (1);
10703 if (speed_p)
10704 *cost += extra_cost->alu.clz;
10705 return false;
10707 case SMIN:
10708 if (XEXP (x, 1) == const0_rtx)
10710 *cost = COSTS_N_INSNS (1);
10711 if (speed_p)
10712 *cost += extra_cost->alu.log_shift;
10713 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10714 return true;
10716 /* Fall through. */
10717 case SMAX:
10718 case UMIN:
10719 case UMAX:
10720 *cost = COSTS_N_INSNS (2);
10721 return false;
10723 case TRUNCATE:
10724 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10725 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10726 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10728 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10729 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10730 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10731 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10732 == ZERO_EXTEND))))
10734 *cost = COSTS_N_INSNS (1);
10735 if (speed_p)
10736 *cost += extra_cost->mult[1].extend;
10737 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10738 speed_p)
10739 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10740 0, speed_p));
10741 return true;
10743 *cost = LIBCALL_COST (1);
10744 return false;
10746 case UNSPEC:
10747 return arm_unspec_cost (x, outer_code, speed_p, cost);
10749 case PC:
10750 /* Reading the PC is like reading any other register. Writing it
10751 is more expensive, but we take that into account elsewhere. */
10752 *cost = 0;
10753 return true;
10755 case ZERO_EXTRACT:
10756 /* TODO: Simple zero_extract of bottom bits using AND. */
10757 /* Fall through. */
10758 case SIGN_EXTRACT:
10759 if (arm_arch6
10760 && mode == SImode
10761 && CONST_INT_P (XEXP (x, 1))
10762 && CONST_INT_P (XEXP (x, 2)))
10764 *cost = COSTS_N_INSNS (1);
10765 if (speed_p)
10766 *cost += extra_cost->alu.bfx;
10767 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10768 return true;
10770 /* Without UBFX/SBFX, need to resort to shift operations. */
10771 *cost = COSTS_N_INSNS (2);
10772 if (speed_p)
10773 *cost += 2 * extra_cost->alu.shift;
10774 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10775 return true;
10777 case FLOAT_EXTEND:
10778 if (TARGET_HARD_FLOAT)
10780 *cost = COSTS_N_INSNS (1);
10781 if (speed_p)
10782 *cost += extra_cost->fp[mode == DFmode].widen;
10783 if (!TARGET_FPU_ARMV8
10784 && GET_MODE (XEXP (x, 0)) == HFmode)
10786 /* Pre v8, widening HF->DF is a two-step process, first
10787 widening to SFmode. */
10788 *cost += COSTS_N_INSNS (1);
10789 if (speed_p)
10790 *cost += extra_cost->fp[0].widen;
10792 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10793 return true;
10796 *cost = LIBCALL_COST (1);
10797 return false;
10799 case FLOAT_TRUNCATE:
10800 if (TARGET_HARD_FLOAT)
10802 *cost = COSTS_N_INSNS (1);
10803 if (speed_p)
10804 *cost += extra_cost->fp[mode == DFmode].narrow;
10805 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10806 return true;
10807 /* Vector modes? */
10809 *cost = LIBCALL_COST (1);
10810 return false;
10812 case FMA:
10813 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10815 rtx op0 = XEXP (x, 0);
10816 rtx op1 = XEXP (x, 1);
10817 rtx op2 = XEXP (x, 2);
10819 *cost = COSTS_N_INSNS (1);
10821 /* vfms or vfnma. */
10822 if (GET_CODE (op0) == NEG)
10823 op0 = XEXP (op0, 0);
10825 /* vfnms or vfnma. */
10826 if (GET_CODE (op2) == NEG)
10827 op2 = XEXP (op2, 0);
10829 *cost += rtx_cost (op0, FMA, 0, speed_p);
10830 *cost += rtx_cost (op1, FMA, 1, speed_p);
10831 *cost += rtx_cost (op2, FMA, 2, speed_p);
10833 if (speed_p)
10834 *cost += extra_cost->fp[mode ==DFmode].fma;
10836 return true;
10839 *cost = LIBCALL_COST (3);
10840 return false;
10842 case FIX:
10843 case UNSIGNED_FIX:
10844 if (TARGET_HARD_FLOAT)
10846 if (GET_MODE_CLASS (mode) == MODE_INT)
10848 *cost = COSTS_N_INSNS (1);
10849 if (speed_p)
10850 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10851 /* Strip of the 'cost' of rounding towards zero. */
10852 if (GET_CODE (XEXP (x, 0)) == FIX)
10853 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10854 else
10855 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10856 /* ??? Increase the cost to deal with transferring from
10857 FP -> CORE registers? */
10858 return true;
10860 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10861 && TARGET_FPU_ARMV8)
10863 *cost = COSTS_N_INSNS (1);
10864 if (speed_p)
10865 *cost += extra_cost->fp[mode == DFmode].roundint;
10866 return false;
10868 /* Vector costs? */
10870 *cost = LIBCALL_COST (1);
10871 return false;
10873 case FLOAT:
10874 case UNSIGNED_FLOAT:
10875 if (TARGET_HARD_FLOAT)
10877 /* ??? Increase the cost to deal with transferring from CORE
10878 -> FP registers? */
10879 *cost = COSTS_N_INSNS (1);
10880 if (speed_p)
10881 *cost += extra_cost->fp[mode == DFmode].fromint;
10882 return false;
10884 *cost = LIBCALL_COST (1);
10885 return false;
10887 case CALL:
10888 *cost = COSTS_N_INSNS (1);
10889 return true;
10891 case ASM_OPERANDS:
10893 /* Just a guess. Guess number of instructions in the asm
10894 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10895 though (see PR60663). */
10896 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10897 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10899 *cost = COSTS_N_INSNS (asm_length + num_operands);
10900 return true;
10902 default:
10903 if (mode != VOIDmode)
10904 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10905 else
10906 *cost = COSTS_N_INSNS (4); /* Who knows? */
10907 return false;
10911 #undef HANDLE_NARROW_SHIFT_ARITH
10913 /* RTX costs when optimizing for size. */
10914 static bool
10915 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10916 int *total, bool speed)
10918 bool result;
10920 if (TARGET_OLD_RTX_COSTS
10921 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10923 /* Old way. (Deprecated.) */
10924 if (!speed)
10925 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10926 (enum rtx_code) outer_code, total);
10927 else
10928 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10929 (enum rtx_code) outer_code, total,
10930 speed);
10932 else
10934 /* New way. */
10935 if (current_tune->insn_extra_cost)
10936 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10937 (enum rtx_code) outer_code,
10938 current_tune->insn_extra_cost,
10939 total, speed);
10940 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10941 && current_tune->insn_extra_cost != NULL */
10942 else
10943 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10944 (enum rtx_code) outer_code,
10945 &generic_extra_costs, total, speed);
10948 if (dump_file && (dump_flags & TDF_DETAILS))
10950 print_rtl_single (dump_file, x);
10951 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10952 *total, result ? "final" : "partial");
10954 return result;
10957 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10958 supported on any "slowmul" cores, so it can be ignored. */
10960 static bool
10961 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10962 int *total, bool speed)
10964 enum machine_mode mode = GET_MODE (x);
10966 if (TARGET_THUMB)
10968 *total = thumb1_rtx_costs (x, code, outer_code);
10969 return true;
10972 switch (code)
10974 case MULT:
10975 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10976 || mode == DImode)
10978 *total = COSTS_N_INSNS (20);
10979 return false;
10982 if (CONST_INT_P (XEXP (x, 1)))
10984 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10985 & (unsigned HOST_WIDE_INT) 0xffffffff);
10986 int cost, const_ok = const_ok_for_arm (i);
10987 int j, booth_unit_size;
10989 /* Tune as appropriate. */
10990 cost = const_ok ? 4 : 8;
10991 booth_unit_size = 2;
10992 for (j = 0; i && j < 32; j += booth_unit_size)
10994 i >>= booth_unit_size;
10995 cost++;
10998 *total = COSTS_N_INSNS (cost);
10999 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11000 return true;
11003 *total = COSTS_N_INSNS (20);
11004 return false;
11006 default:
11007 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11012 /* RTX cost for cores with a fast multiply unit (M variants). */
11014 static bool
11015 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11016 int *total, bool speed)
11018 enum machine_mode mode = GET_MODE (x);
11020 if (TARGET_THUMB1)
11022 *total = thumb1_rtx_costs (x, code, outer_code);
11023 return true;
11026 /* ??? should thumb2 use different costs? */
11027 switch (code)
11029 case MULT:
11030 /* There is no point basing this on the tuning, since it is always the
11031 fast variant if it exists at all. */
11032 if (mode == DImode
11033 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11034 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11035 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11037 *total = COSTS_N_INSNS(2);
11038 return false;
11042 if (mode == DImode)
11044 *total = COSTS_N_INSNS (5);
11045 return false;
11048 if (CONST_INT_P (XEXP (x, 1)))
11050 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11051 & (unsigned HOST_WIDE_INT) 0xffffffff);
11052 int cost, const_ok = const_ok_for_arm (i);
11053 int j, booth_unit_size;
11055 /* Tune as appropriate. */
11056 cost = const_ok ? 4 : 8;
11057 booth_unit_size = 8;
11058 for (j = 0; i && j < 32; j += booth_unit_size)
11060 i >>= booth_unit_size;
11061 cost++;
11064 *total = COSTS_N_INSNS(cost);
11065 return false;
11068 if (mode == SImode)
11070 *total = COSTS_N_INSNS (4);
11071 return false;
11074 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11076 if (TARGET_HARD_FLOAT
11077 && (mode == SFmode
11078 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11080 *total = COSTS_N_INSNS (1);
11081 return false;
11085 /* Requires a lib call */
11086 *total = COSTS_N_INSNS (20);
11087 return false;
11089 default:
11090 return arm_rtx_costs_1 (x, outer_code, total, speed);
11095 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11096 so it can be ignored. */
11098 static bool
11099 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11100 int *total, bool speed)
11102 enum machine_mode mode = GET_MODE (x);
11104 if (TARGET_THUMB)
11106 *total = thumb1_rtx_costs (x, code, outer_code);
11107 return true;
11110 switch (code)
11112 case COMPARE:
11113 if (GET_CODE (XEXP (x, 0)) != MULT)
11114 return arm_rtx_costs_1 (x, outer_code, total, speed);
11116 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11117 will stall until the multiplication is complete. */
11118 *total = COSTS_N_INSNS (3);
11119 return false;
11121 case MULT:
11122 /* There is no point basing this on the tuning, since it is always the
11123 fast variant if it exists at all. */
11124 if (mode == DImode
11125 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11126 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11127 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11129 *total = COSTS_N_INSNS (2);
11130 return false;
11134 if (mode == DImode)
11136 *total = COSTS_N_INSNS (5);
11137 return false;
11140 if (CONST_INT_P (XEXP (x, 1)))
11142 /* If operand 1 is a constant we can more accurately
11143 calculate the cost of the multiply. The multiplier can
11144 retire 15 bits on the first cycle and a further 12 on the
11145 second. We do, of course, have to load the constant into
11146 a register first. */
11147 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11148 /* There's a general overhead of one cycle. */
11149 int cost = 1;
11150 unsigned HOST_WIDE_INT masked_const;
11152 if (i & 0x80000000)
11153 i = ~i;
11155 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11157 masked_const = i & 0xffff8000;
11158 if (masked_const != 0)
11160 cost++;
11161 masked_const = i & 0xf8000000;
11162 if (masked_const != 0)
11163 cost++;
11165 *total = COSTS_N_INSNS (cost);
11166 return false;
11169 if (mode == SImode)
11171 *total = COSTS_N_INSNS (3);
11172 return false;
11175 /* Requires a lib call */
11176 *total = COSTS_N_INSNS (20);
11177 return false;
11179 default:
11180 return arm_rtx_costs_1 (x, outer_code, total, speed);
11185 /* RTX costs for 9e (and later) cores. */
11187 static bool
11188 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11189 int *total, bool speed)
11191 enum machine_mode mode = GET_MODE (x);
11193 if (TARGET_THUMB1)
11195 switch (code)
11197 case MULT:
11198 *total = COSTS_N_INSNS (3);
11199 return true;
11201 default:
11202 *total = thumb1_rtx_costs (x, code, outer_code);
11203 return true;
11207 switch (code)
11209 case MULT:
11210 /* There is no point basing this on the tuning, since it is always the
11211 fast variant if it exists at all. */
11212 if (mode == DImode
11213 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11214 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11215 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11217 *total = COSTS_N_INSNS (2);
11218 return false;
11222 if (mode == DImode)
11224 *total = COSTS_N_INSNS (5);
11225 return false;
11228 if (mode == SImode)
11230 *total = COSTS_N_INSNS (2);
11231 return false;
11234 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11236 if (TARGET_HARD_FLOAT
11237 && (mode == SFmode
11238 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11240 *total = COSTS_N_INSNS (1);
11241 return false;
11245 *total = COSTS_N_INSNS (20);
11246 return false;
11248 default:
11249 return arm_rtx_costs_1 (x, outer_code, total, speed);
11252 /* All address computations that can be done are free, but rtx cost returns
11253 the same for practically all of them. So we weight the different types
11254 of address here in the order (most pref first):
11255 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11256 static inline int
11257 arm_arm_address_cost (rtx x)
11259 enum rtx_code c = GET_CODE (x);
11261 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11262 return 0;
11263 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11264 return 10;
11266 if (c == PLUS)
11268 if (CONST_INT_P (XEXP (x, 1)))
11269 return 2;
11271 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11272 return 3;
11274 return 4;
11277 return 6;
11280 static inline int
11281 arm_thumb_address_cost (rtx x)
11283 enum rtx_code c = GET_CODE (x);
11285 if (c == REG)
11286 return 1;
11287 if (c == PLUS
11288 && REG_P (XEXP (x, 0))
11289 && CONST_INT_P (XEXP (x, 1)))
11290 return 1;
11292 return 2;
11295 static int
11296 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11297 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11299 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11302 /* Adjust cost hook for XScale. */
11303 static bool
11304 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11306 /* Some true dependencies can have a higher cost depending
11307 on precisely how certain input operands are used. */
11308 if (REG_NOTE_KIND(link) == 0
11309 && recog_memoized (insn) >= 0
11310 && recog_memoized (dep) >= 0)
11312 int shift_opnum = get_attr_shift (insn);
11313 enum attr_type attr_type = get_attr_type (dep);
11315 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11316 operand for INSN. If we have a shifted input operand and the
11317 instruction we depend on is another ALU instruction, then we may
11318 have to account for an additional stall. */
11319 if (shift_opnum != 0
11320 && (attr_type == TYPE_ALU_SHIFT_IMM
11321 || attr_type == TYPE_ALUS_SHIFT_IMM
11322 || attr_type == TYPE_LOGIC_SHIFT_IMM
11323 || attr_type == TYPE_LOGICS_SHIFT_IMM
11324 || attr_type == TYPE_ALU_SHIFT_REG
11325 || attr_type == TYPE_ALUS_SHIFT_REG
11326 || attr_type == TYPE_LOGIC_SHIFT_REG
11327 || attr_type == TYPE_LOGICS_SHIFT_REG
11328 || attr_type == TYPE_MOV_SHIFT
11329 || attr_type == TYPE_MVN_SHIFT
11330 || attr_type == TYPE_MOV_SHIFT_REG
11331 || attr_type == TYPE_MVN_SHIFT_REG))
11333 rtx shifted_operand;
11334 int opno;
11336 /* Get the shifted operand. */
11337 extract_insn (insn);
11338 shifted_operand = recog_data.operand[shift_opnum];
11340 /* Iterate over all the operands in DEP. If we write an operand
11341 that overlaps with SHIFTED_OPERAND, then we have increase the
11342 cost of this dependency. */
11343 extract_insn (dep);
11344 preprocess_constraints (dep);
11345 for (opno = 0; opno < recog_data.n_operands; opno++)
11347 /* We can ignore strict inputs. */
11348 if (recog_data.operand_type[opno] == OP_IN)
11349 continue;
11351 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11352 shifted_operand))
11354 *cost = 2;
11355 return false;
11360 return true;
11363 /* Adjust cost hook for Cortex A9. */
11364 static bool
11365 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11367 switch (REG_NOTE_KIND (link))
11369 case REG_DEP_ANTI:
11370 *cost = 0;
11371 return false;
11373 case REG_DEP_TRUE:
11374 case REG_DEP_OUTPUT:
11375 if (recog_memoized (insn) >= 0
11376 && recog_memoized (dep) >= 0)
11378 if (GET_CODE (PATTERN (insn)) == SET)
11380 if (GET_MODE_CLASS
11381 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11382 || GET_MODE_CLASS
11383 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11385 enum attr_type attr_type_insn = get_attr_type (insn);
11386 enum attr_type attr_type_dep = get_attr_type (dep);
11388 /* By default all dependencies of the form
11389 s0 = s0 <op> s1
11390 s0 = s0 <op> s2
11391 have an extra latency of 1 cycle because
11392 of the input and output dependency in this
11393 case. However this gets modeled as an true
11394 dependency and hence all these checks. */
11395 if (REG_P (SET_DEST (PATTERN (insn)))
11396 && REG_P (SET_DEST (PATTERN (dep)))
11397 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11398 SET_DEST (PATTERN (dep))))
11400 /* FMACS is a special case where the dependent
11401 instruction can be issued 3 cycles before
11402 the normal latency in case of an output
11403 dependency. */
11404 if ((attr_type_insn == TYPE_FMACS
11405 || attr_type_insn == TYPE_FMACD)
11406 && (attr_type_dep == TYPE_FMACS
11407 || attr_type_dep == TYPE_FMACD))
11409 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11410 *cost = insn_default_latency (dep) - 3;
11411 else
11412 *cost = insn_default_latency (dep);
11413 return false;
11415 else
11417 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11418 *cost = insn_default_latency (dep) + 1;
11419 else
11420 *cost = insn_default_latency (dep);
11422 return false;
11427 break;
11429 default:
11430 gcc_unreachable ();
11433 return true;
11436 /* Adjust cost hook for FA726TE. */
11437 static bool
11438 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11440 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11441 have penalty of 3. */
11442 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11443 && recog_memoized (insn) >= 0
11444 && recog_memoized (dep) >= 0
11445 && get_attr_conds (dep) == CONDS_SET)
11447 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11448 if (get_attr_conds (insn) == CONDS_USE
11449 && get_attr_type (insn) != TYPE_BRANCH)
11451 *cost = 3;
11452 return false;
11455 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11456 || get_attr_conds (insn) == CONDS_USE)
11458 *cost = 0;
11459 return false;
11463 return true;
11466 /* Implement TARGET_REGISTER_MOVE_COST.
11468 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11469 it is typically more expensive than a single memory access. We set
11470 the cost to less than two memory accesses so that floating
11471 point to integer conversion does not go through memory. */
11474 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11475 reg_class_t from, reg_class_t to)
11477 if (TARGET_32BIT)
11479 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11480 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11481 return 15;
11482 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11483 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11484 return 4;
11485 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11486 return 20;
11487 else
11488 return 2;
11490 else
11492 if (from == HI_REGS || to == HI_REGS)
11493 return 4;
11494 else
11495 return 2;
11499 /* Implement TARGET_MEMORY_MOVE_COST. */
11502 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11503 bool in ATTRIBUTE_UNUSED)
11505 if (TARGET_32BIT)
11506 return 10;
11507 else
11509 if (GET_MODE_SIZE (mode) < 4)
11510 return 8;
11511 else
11512 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11516 /* Vectorizer cost model implementation. */
11518 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11519 static int
11520 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11521 tree vectype,
11522 int misalign ATTRIBUTE_UNUSED)
11524 unsigned elements;
11526 switch (type_of_cost)
11528 case scalar_stmt:
11529 return current_tune->vec_costs->scalar_stmt_cost;
11531 case scalar_load:
11532 return current_tune->vec_costs->scalar_load_cost;
11534 case scalar_store:
11535 return current_tune->vec_costs->scalar_store_cost;
11537 case vector_stmt:
11538 return current_tune->vec_costs->vec_stmt_cost;
11540 case vector_load:
11541 return current_tune->vec_costs->vec_align_load_cost;
11543 case vector_store:
11544 return current_tune->vec_costs->vec_store_cost;
11546 case vec_to_scalar:
11547 return current_tune->vec_costs->vec_to_scalar_cost;
11549 case scalar_to_vec:
11550 return current_tune->vec_costs->scalar_to_vec_cost;
11552 case unaligned_load:
11553 return current_tune->vec_costs->vec_unalign_load_cost;
11555 case unaligned_store:
11556 return current_tune->vec_costs->vec_unalign_store_cost;
11558 case cond_branch_taken:
11559 return current_tune->vec_costs->cond_taken_branch_cost;
11561 case cond_branch_not_taken:
11562 return current_tune->vec_costs->cond_not_taken_branch_cost;
11564 case vec_perm:
11565 case vec_promote_demote:
11566 return current_tune->vec_costs->vec_stmt_cost;
11568 case vec_construct:
11569 elements = TYPE_VECTOR_SUBPARTS (vectype);
11570 return elements / 2 + 1;
11572 default:
11573 gcc_unreachable ();
11577 /* Implement targetm.vectorize.add_stmt_cost. */
11579 static unsigned
11580 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11581 struct _stmt_vec_info *stmt_info, int misalign,
11582 enum vect_cost_model_location where)
11584 unsigned *cost = (unsigned *) data;
11585 unsigned retval = 0;
11587 if (flag_vect_cost_model)
11589 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11590 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11592 /* Statements in an inner loop relative to the loop being
11593 vectorized are weighted more heavily. The value here is
11594 arbitrary and could potentially be improved with analysis. */
11595 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11596 count *= 50; /* FIXME. */
11598 retval = (unsigned) (count * stmt_cost);
11599 cost[where] += retval;
11602 return retval;
11605 /* Return true if and only if this insn can dual-issue only as older. */
11606 static bool
11607 cortexa7_older_only (rtx insn)
11609 if (recog_memoized (insn) < 0)
11610 return false;
11612 switch (get_attr_type (insn))
11614 case TYPE_ALU_REG:
11615 case TYPE_ALUS_REG:
11616 case TYPE_LOGIC_REG:
11617 case TYPE_LOGICS_REG:
11618 case TYPE_ADC_REG:
11619 case TYPE_ADCS_REG:
11620 case TYPE_ADR:
11621 case TYPE_BFM:
11622 case TYPE_REV:
11623 case TYPE_MVN_REG:
11624 case TYPE_SHIFT_IMM:
11625 case TYPE_SHIFT_REG:
11626 case TYPE_LOAD_BYTE:
11627 case TYPE_LOAD1:
11628 case TYPE_STORE1:
11629 case TYPE_FFARITHS:
11630 case TYPE_FADDS:
11631 case TYPE_FFARITHD:
11632 case TYPE_FADDD:
11633 case TYPE_FMOV:
11634 case TYPE_F_CVT:
11635 case TYPE_FCMPS:
11636 case TYPE_FCMPD:
11637 case TYPE_FCONSTS:
11638 case TYPE_FCONSTD:
11639 case TYPE_FMULS:
11640 case TYPE_FMACS:
11641 case TYPE_FMULD:
11642 case TYPE_FMACD:
11643 case TYPE_FDIVS:
11644 case TYPE_FDIVD:
11645 case TYPE_F_MRC:
11646 case TYPE_F_MRRC:
11647 case TYPE_F_FLAG:
11648 case TYPE_F_LOADS:
11649 case TYPE_F_STORES:
11650 return true;
11651 default:
11652 return false;
11656 /* Return true if and only if this insn can dual-issue as younger. */
11657 static bool
11658 cortexa7_younger (FILE *file, int verbose, rtx insn)
11660 if (recog_memoized (insn) < 0)
11662 if (verbose > 5)
11663 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11664 return false;
11667 switch (get_attr_type (insn))
11669 case TYPE_ALU_IMM:
11670 case TYPE_ALUS_IMM:
11671 case TYPE_LOGIC_IMM:
11672 case TYPE_LOGICS_IMM:
11673 case TYPE_EXTEND:
11674 case TYPE_MVN_IMM:
11675 case TYPE_MOV_IMM:
11676 case TYPE_MOV_REG:
11677 case TYPE_MOV_SHIFT:
11678 case TYPE_MOV_SHIFT_REG:
11679 case TYPE_BRANCH:
11680 case TYPE_CALL:
11681 return true;
11682 default:
11683 return false;
11688 /* Look for an instruction that can dual issue only as an older
11689 instruction, and move it in front of any instructions that can
11690 dual-issue as younger, while preserving the relative order of all
11691 other instructions in the ready list. This is a hueuristic to help
11692 dual-issue in later cycles, by postponing issue of more flexible
11693 instructions. This heuristic may affect dual issue opportunities
11694 in the current cycle. */
11695 static void
11696 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11697 int clock)
11699 int i;
11700 int first_older_only = -1, first_younger = -1;
11702 if (verbose > 5)
11703 fprintf (file,
11704 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11705 clock,
11706 *n_readyp);
11708 /* Traverse the ready list from the head (the instruction to issue
11709 first), and looking for the first instruction that can issue as
11710 younger and the first instruction that can dual-issue only as
11711 older. */
11712 for (i = *n_readyp - 1; i >= 0; i--)
11714 rtx insn = ready[i];
11715 if (cortexa7_older_only (insn))
11717 first_older_only = i;
11718 if (verbose > 5)
11719 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11720 break;
11722 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11723 first_younger = i;
11726 /* Nothing to reorder because either no younger insn found or insn
11727 that can dual-issue only as older appears before any insn that
11728 can dual-issue as younger. */
11729 if (first_younger == -1)
11731 if (verbose > 5)
11732 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11733 return;
11736 /* Nothing to reorder because no older-only insn in the ready list. */
11737 if (first_older_only == -1)
11739 if (verbose > 5)
11740 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11741 return;
11744 /* Move first_older_only insn before first_younger. */
11745 if (verbose > 5)
11746 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11747 INSN_UID(ready [first_older_only]),
11748 INSN_UID(ready [first_younger]));
11749 rtx first_older_only_insn = ready [first_older_only];
11750 for (i = first_older_only; i < first_younger; i++)
11752 ready[i] = ready[i+1];
11755 ready[i] = first_older_only_insn;
11756 return;
11759 /* Implement TARGET_SCHED_REORDER. */
11760 static int
11761 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11762 int clock)
11764 switch (arm_tune)
11766 case cortexa7:
11767 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11768 break;
11769 default:
11770 /* Do nothing for other cores. */
11771 break;
11774 return arm_issue_rate ();
11777 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11778 It corrects the value of COST based on the relationship between
11779 INSN and DEP through the dependence LINK. It returns the new
11780 value. There is a per-core adjust_cost hook to adjust scheduler costs
11781 and the per-core hook can choose to completely override the generic
11782 adjust_cost function. Only put bits of code into arm_adjust_cost that
11783 are common across all cores. */
11784 static int
11785 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11787 rtx i_pat, d_pat;
11789 /* When generating Thumb-1 code, we want to place flag-setting operations
11790 close to a conditional branch which depends on them, so that we can
11791 omit the comparison. */
11792 if (TARGET_THUMB1
11793 && REG_NOTE_KIND (link) == 0
11794 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11795 && recog_memoized (dep) >= 0
11796 && get_attr_conds (dep) == CONDS_SET)
11797 return 0;
11799 if (current_tune->sched_adjust_cost != NULL)
11801 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11802 return cost;
11805 /* XXX Is this strictly true? */
11806 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11807 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11808 return 0;
11810 /* Call insns don't incur a stall, even if they follow a load. */
11811 if (REG_NOTE_KIND (link) == 0
11812 && CALL_P (insn))
11813 return 1;
11815 if ((i_pat = single_set (insn)) != NULL
11816 && MEM_P (SET_SRC (i_pat))
11817 && (d_pat = single_set (dep)) != NULL
11818 && MEM_P (SET_DEST (d_pat)))
11820 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11821 /* This is a load after a store, there is no conflict if the load reads
11822 from a cached area. Assume that loads from the stack, and from the
11823 constant pool are cached, and that others will miss. This is a
11824 hack. */
11826 if ((GET_CODE (src_mem) == SYMBOL_REF
11827 && CONSTANT_POOL_ADDRESS_P (src_mem))
11828 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11829 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11830 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11831 return 1;
11834 return cost;
11838 arm_max_conditional_execute (void)
11840 return max_insns_skipped;
11843 static int
11844 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11846 if (TARGET_32BIT)
11847 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11848 else
11849 return (optimize > 0) ? 2 : 0;
11852 static int
11853 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11855 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11858 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11859 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11860 sequences of non-executed instructions in IT blocks probably take the same
11861 amount of time as executed instructions (and the IT instruction itself takes
11862 space in icache). This function was experimentally determined to give good
11863 results on a popular embedded benchmark. */
11865 static int
11866 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11868 return (TARGET_32BIT && speed_p) ? 1
11869 : arm_default_branch_cost (speed_p, predictable_p);
11872 static bool fp_consts_inited = false;
11874 static REAL_VALUE_TYPE value_fp0;
11876 static void
11877 init_fp_table (void)
11879 REAL_VALUE_TYPE r;
11881 r = REAL_VALUE_ATOF ("0", DFmode);
11882 value_fp0 = r;
11883 fp_consts_inited = true;
11886 /* Return TRUE if rtx X is a valid immediate FP constant. */
11888 arm_const_double_rtx (rtx x)
11890 REAL_VALUE_TYPE r;
11892 if (!fp_consts_inited)
11893 init_fp_table ();
11895 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11896 if (REAL_VALUE_MINUS_ZERO (r))
11897 return 0;
11899 if (REAL_VALUES_EQUAL (r, value_fp0))
11900 return 1;
11902 return 0;
11905 /* VFPv3 has a fairly wide range of representable immediates, formed from
11906 "quarter-precision" floating-point values. These can be evaluated using this
11907 formula (with ^ for exponentiation):
11909 -1^s * n * 2^-r
11911 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11912 16 <= n <= 31 and 0 <= r <= 7.
11914 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11916 - A (most-significant) is the sign bit.
11917 - BCD are the exponent (encoded as r XOR 3).
11918 - EFGH are the mantissa (encoded as n - 16).
11921 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11922 fconst[sd] instruction, or -1 if X isn't suitable. */
11923 static int
11924 vfp3_const_double_index (rtx x)
11926 REAL_VALUE_TYPE r, m;
11927 int sign, exponent;
11928 unsigned HOST_WIDE_INT mantissa, mant_hi;
11929 unsigned HOST_WIDE_INT mask;
11930 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11931 bool fail;
11933 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11934 return -1;
11936 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11938 /* We can't represent these things, so detect them first. */
11939 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11940 return -1;
11942 /* Extract sign, exponent and mantissa. */
11943 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11944 r = real_value_abs (&r);
11945 exponent = REAL_EXP (&r);
11946 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11947 highest (sign) bit, with a fixed binary point at bit point_pos.
11948 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11949 bits for the mantissa, this may fail (low bits would be lost). */
11950 real_ldexp (&m, &r, point_pos - exponent);
11951 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11952 mantissa = w.elt (0);
11953 mant_hi = w.elt (1);
11955 /* If there are bits set in the low part of the mantissa, we can't
11956 represent this value. */
11957 if (mantissa != 0)
11958 return -1;
11960 /* Now make it so that mantissa contains the most-significant bits, and move
11961 the point_pos to indicate that the least-significant bits have been
11962 discarded. */
11963 point_pos -= HOST_BITS_PER_WIDE_INT;
11964 mantissa = mant_hi;
11966 /* We can permit four significant bits of mantissa only, plus a high bit
11967 which is always 1. */
11968 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11969 if ((mantissa & mask) != 0)
11970 return -1;
11972 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11973 mantissa >>= point_pos - 5;
11975 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11976 floating-point immediate zero with Neon using an integer-zero load, but
11977 that case is handled elsewhere.) */
11978 if (mantissa == 0)
11979 return -1;
11981 gcc_assert (mantissa >= 16 && mantissa <= 31);
11983 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11984 normalized significands are in the range [1, 2). (Our mantissa is shifted
11985 left 4 places at this point relative to normalized IEEE754 values). GCC
11986 internally uses [0.5, 1) (see real.c), so the exponent returned from
11987 REAL_EXP must be altered. */
11988 exponent = 5 - exponent;
11990 if (exponent < 0 || exponent > 7)
11991 return -1;
11993 /* Sign, mantissa and exponent are now in the correct form to plug into the
11994 formula described in the comment above. */
11995 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11998 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12000 vfp3_const_double_rtx (rtx x)
12002 if (!TARGET_VFP3)
12003 return 0;
12005 return vfp3_const_double_index (x) != -1;
12008 /* Recognize immediates which can be used in various Neon instructions. Legal
12009 immediates are described by the following table (for VMVN variants, the
12010 bitwise inverse of the constant shown is recognized. In either case, VMOV
12011 is output and the correct instruction to use for a given constant is chosen
12012 by the assembler). The constant shown is replicated across all elements of
12013 the destination vector.
12015 insn elems variant constant (binary)
12016 ---- ----- ------- -----------------
12017 vmov i32 0 00000000 00000000 00000000 abcdefgh
12018 vmov i32 1 00000000 00000000 abcdefgh 00000000
12019 vmov i32 2 00000000 abcdefgh 00000000 00000000
12020 vmov i32 3 abcdefgh 00000000 00000000 00000000
12021 vmov i16 4 00000000 abcdefgh
12022 vmov i16 5 abcdefgh 00000000
12023 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12024 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12025 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12026 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12027 vmvn i16 10 00000000 abcdefgh
12028 vmvn i16 11 abcdefgh 00000000
12029 vmov i32 12 00000000 00000000 abcdefgh 11111111
12030 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12031 vmov i32 14 00000000 abcdefgh 11111111 11111111
12032 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12033 vmov i8 16 abcdefgh
12034 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12035 eeeeeeee ffffffff gggggggg hhhhhhhh
12036 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12037 vmov f32 19 00000000 00000000 00000000 00000000
12039 For case 18, B = !b. Representable values are exactly those accepted by
12040 vfp3_const_double_index, but are output as floating-point numbers rather
12041 than indices.
12043 For case 19, we will change it to vmov.i32 when assembling.
12045 Variants 0-5 (inclusive) may also be used as immediates for the second
12046 operand of VORR/VBIC instructions.
12048 The INVERSE argument causes the bitwise inverse of the given operand to be
12049 recognized instead (used for recognizing legal immediates for the VAND/VORN
12050 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12051 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12052 output, rather than the real insns vbic/vorr).
12054 INVERSE makes no difference to the recognition of float vectors.
12056 The return value is the variant of immediate as shown in the above table, or
12057 -1 if the given value doesn't match any of the listed patterns.
12059 static int
12060 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12061 rtx *modconst, int *elementwidth)
12063 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12064 matches = 1; \
12065 for (i = 0; i < idx; i += (STRIDE)) \
12066 if (!(TEST)) \
12067 matches = 0; \
12068 if (matches) \
12070 immtype = (CLASS); \
12071 elsize = (ELSIZE); \
12072 break; \
12075 unsigned int i, elsize = 0, idx = 0, n_elts;
12076 unsigned int innersize;
12077 unsigned char bytes[16];
12078 int immtype = -1, matches;
12079 unsigned int invmask = inverse ? 0xff : 0;
12080 bool vector = GET_CODE (op) == CONST_VECTOR;
12082 if (vector)
12084 n_elts = CONST_VECTOR_NUNITS (op);
12085 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12087 else
12089 n_elts = 1;
12090 if (mode == VOIDmode)
12091 mode = DImode;
12092 innersize = GET_MODE_SIZE (mode);
12095 /* Vectors of float constants. */
12096 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12098 rtx el0 = CONST_VECTOR_ELT (op, 0);
12099 REAL_VALUE_TYPE r0;
12101 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12102 return -1;
12104 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12106 for (i = 1; i < n_elts; i++)
12108 rtx elt = CONST_VECTOR_ELT (op, i);
12109 REAL_VALUE_TYPE re;
12111 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12113 if (!REAL_VALUES_EQUAL (r0, re))
12114 return -1;
12117 if (modconst)
12118 *modconst = CONST_VECTOR_ELT (op, 0);
12120 if (elementwidth)
12121 *elementwidth = 0;
12123 if (el0 == CONST0_RTX (GET_MODE (el0)))
12124 return 19;
12125 else
12126 return 18;
12129 /* Splat vector constant out into a byte vector. */
12130 for (i = 0; i < n_elts; i++)
12132 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12133 unsigned HOST_WIDE_INT elpart;
12134 unsigned int part, parts;
12136 if (CONST_INT_P (el))
12138 elpart = INTVAL (el);
12139 parts = 1;
12141 else if (CONST_DOUBLE_P (el))
12143 elpart = CONST_DOUBLE_LOW (el);
12144 parts = 2;
12146 else
12147 gcc_unreachable ();
12149 for (part = 0; part < parts; part++)
12151 unsigned int byte;
12152 for (byte = 0; byte < innersize; byte++)
12154 bytes[idx++] = (elpart & 0xff) ^ invmask;
12155 elpart >>= BITS_PER_UNIT;
12157 if (CONST_DOUBLE_P (el))
12158 elpart = CONST_DOUBLE_HIGH (el);
12162 /* Sanity check. */
12163 gcc_assert (idx == GET_MODE_SIZE (mode));
12167 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12168 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12170 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12171 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12173 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12174 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12176 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12177 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12179 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12181 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12183 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12184 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12186 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12187 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12189 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12190 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12192 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12193 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12195 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12197 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12199 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12200 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12202 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12203 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12205 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12206 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12208 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12209 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12211 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12213 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12214 && bytes[i] == bytes[(i + 8) % idx]);
12216 while (0);
12218 if (immtype == -1)
12219 return -1;
12221 if (elementwidth)
12222 *elementwidth = elsize;
12224 if (modconst)
12226 unsigned HOST_WIDE_INT imm = 0;
12228 /* Un-invert bytes of recognized vector, if necessary. */
12229 if (invmask != 0)
12230 for (i = 0; i < idx; i++)
12231 bytes[i] ^= invmask;
12233 if (immtype == 17)
12235 /* FIXME: Broken on 32-bit H_W_I hosts. */
12236 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12238 for (i = 0; i < 8; i++)
12239 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12240 << (i * BITS_PER_UNIT);
12242 *modconst = GEN_INT (imm);
12244 else
12246 unsigned HOST_WIDE_INT imm = 0;
12248 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12249 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12251 *modconst = GEN_INT (imm);
12255 return immtype;
12256 #undef CHECK
12259 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12260 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12261 float elements), and a modified constant (whatever should be output for a
12262 VMOV) in *MODCONST. */
12265 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12266 rtx *modconst, int *elementwidth)
12268 rtx tmpconst;
12269 int tmpwidth;
12270 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12272 if (retval == -1)
12273 return 0;
12275 if (modconst)
12276 *modconst = tmpconst;
12278 if (elementwidth)
12279 *elementwidth = tmpwidth;
12281 return 1;
12284 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12285 the immediate is valid, write a constant suitable for using as an operand
12286 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12287 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12290 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12291 rtx *modconst, int *elementwidth)
12293 rtx tmpconst;
12294 int tmpwidth;
12295 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12297 if (retval < 0 || retval > 5)
12298 return 0;
12300 if (modconst)
12301 *modconst = tmpconst;
12303 if (elementwidth)
12304 *elementwidth = tmpwidth;
12306 return 1;
12309 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12310 the immediate is valid, write a constant suitable for using as an operand
12311 to VSHR/VSHL to *MODCONST and the corresponding element width to
12312 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12313 because they have different limitations. */
12316 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12317 rtx *modconst, int *elementwidth,
12318 bool isleftshift)
12320 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12321 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12322 unsigned HOST_WIDE_INT last_elt = 0;
12323 unsigned HOST_WIDE_INT maxshift;
12325 /* Split vector constant out into a byte vector. */
12326 for (i = 0; i < n_elts; i++)
12328 rtx el = CONST_VECTOR_ELT (op, i);
12329 unsigned HOST_WIDE_INT elpart;
12331 if (CONST_INT_P (el))
12332 elpart = INTVAL (el);
12333 else if (CONST_DOUBLE_P (el))
12334 return 0;
12335 else
12336 gcc_unreachable ();
12338 if (i != 0 && elpart != last_elt)
12339 return 0;
12341 last_elt = elpart;
12344 /* Shift less than element size. */
12345 maxshift = innersize * 8;
12347 if (isleftshift)
12349 /* Left shift immediate value can be from 0 to <size>-1. */
12350 if (last_elt >= maxshift)
12351 return 0;
12353 else
12355 /* Right shift immediate value can be from 1 to <size>. */
12356 if (last_elt == 0 || last_elt > maxshift)
12357 return 0;
12360 if (elementwidth)
12361 *elementwidth = innersize * 8;
12363 if (modconst)
12364 *modconst = CONST_VECTOR_ELT (op, 0);
12366 return 1;
12369 /* Return a string suitable for output of Neon immediate logic operation
12370 MNEM. */
12372 char *
12373 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12374 int inverse, int quad)
12376 int width, is_valid;
12377 static char templ[40];
12379 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12381 gcc_assert (is_valid != 0);
12383 if (quad)
12384 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12385 else
12386 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12388 return templ;
12391 /* Return a string suitable for output of Neon immediate shift operation
12392 (VSHR or VSHL) MNEM. */
12394 char *
12395 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12396 enum machine_mode mode, int quad,
12397 bool isleftshift)
12399 int width, is_valid;
12400 static char templ[40];
12402 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12403 gcc_assert (is_valid != 0);
12405 if (quad)
12406 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12407 else
12408 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12410 return templ;
12413 /* Output a sequence of pairwise operations to implement a reduction.
12414 NOTE: We do "too much work" here, because pairwise operations work on two
12415 registers-worth of operands in one go. Unfortunately we can't exploit those
12416 extra calculations to do the full operation in fewer steps, I don't think.
12417 Although all vector elements of the result but the first are ignored, we
12418 actually calculate the same result in each of the elements. An alternative
12419 such as initially loading a vector with zero to use as each of the second
12420 operands would use up an additional register and take an extra instruction,
12421 for no particular gain. */
12423 void
12424 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12425 rtx (*reduc) (rtx, rtx, rtx))
12427 enum machine_mode inner = GET_MODE_INNER (mode);
12428 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12429 rtx tmpsum = op1;
12431 for (i = parts / 2; i >= 1; i /= 2)
12433 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12434 emit_insn (reduc (dest, tmpsum, tmpsum));
12435 tmpsum = dest;
12439 /* If VALS is a vector constant that can be loaded into a register
12440 using VDUP, generate instructions to do so and return an RTX to
12441 assign to the register. Otherwise return NULL_RTX. */
12443 static rtx
12444 neon_vdup_constant (rtx vals)
12446 enum machine_mode mode = GET_MODE (vals);
12447 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12448 int n_elts = GET_MODE_NUNITS (mode);
12449 bool all_same = true;
12450 rtx x;
12451 int i;
12453 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12454 return NULL_RTX;
12456 for (i = 0; i < n_elts; ++i)
12458 x = XVECEXP (vals, 0, i);
12459 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12460 all_same = false;
12463 if (!all_same)
12464 /* The elements are not all the same. We could handle repeating
12465 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12466 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12467 vdup.i16). */
12468 return NULL_RTX;
12470 /* We can load this constant by using VDUP and a constant in a
12471 single ARM register. This will be cheaper than a vector
12472 load. */
12474 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12475 return gen_rtx_VEC_DUPLICATE (mode, x);
12478 /* Generate code to load VALS, which is a PARALLEL containing only
12479 constants (for vec_init) or CONST_VECTOR, efficiently into a
12480 register. Returns an RTX to copy into the register, or NULL_RTX
12481 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12484 neon_make_constant (rtx vals)
12486 enum machine_mode mode = GET_MODE (vals);
12487 rtx target;
12488 rtx const_vec = NULL_RTX;
12489 int n_elts = GET_MODE_NUNITS (mode);
12490 int n_const = 0;
12491 int i;
12493 if (GET_CODE (vals) == CONST_VECTOR)
12494 const_vec = vals;
12495 else if (GET_CODE (vals) == PARALLEL)
12497 /* A CONST_VECTOR must contain only CONST_INTs and
12498 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12499 Only store valid constants in a CONST_VECTOR. */
12500 for (i = 0; i < n_elts; ++i)
12502 rtx x = XVECEXP (vals, 0, i);
12503 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12504 n_const++;
12506 if (n_const == n_elts)
12507 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12509 else
12510 gcc_unreachable ();
12512 if (const_vec != NULL
12513 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12514 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12515 return const_vec;
12516 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12517 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12518 pipeline cycle; creating the constant takes one or two ARM
12519 pipeline cycles. */
12520 return target;
12521 else if (const_vec != NULL_RTX)
12522 /* Load from constant pool. On Cortex-A8 this takes two cycles
12523 (for either double or quad vectors). We can not take advantage
12524 of single-cycle VLD1 because we need a PC-relative addressing
12525 mode. */
12526 return const_vec;
12527 else
12528 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12529 We can not construct an initializer. */
12530 return NULL_RTX;
12533 /* Initialize vector TARGET to VALS. */
12535 void
12536 neon_expand_vector_init (rtx target, rtx vals)
12538 enum machine_mode mode = GET_MODE (target);
12539 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12540 int n_elts = GET_MODE_NUNITS (mode);
12541 int n_var = 0, one_var = -1;
12542 bool all_same = true;
12543 rtx x, mem;
12544 int i;
12546 for (i = 0; i < n_elts; ++i)
12548 x = XVECEXP (vals, 0, i);
12549 if (!CONSTANT_P (x))
12550 ++n_var, one_var = i;
12552 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12553 all_same = false;
12556 if (n_var == 0)
12558 rtx constant = neon_make_constant (vals);
12559 if (constant != NULL_RTX)
12561 emit_move_insn (target, constant);
12562 return;
12566 /* Splat a single non-constant element if we can. */
12567 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12569 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12570 emit_insn (gen_rtx_SET (VOIDmode, target,
12571 gen_rtx_VEC_DUPLICATE (mode, x)));
12572 return;
12575 /* One field is non-constant. Load constant then overwrite varying
12576 field. This is more efficient than using the stack. */
12577 if (n_var == 1)
12579 rtx copy = copy_rtx (vals);
12580 rtx index = GEN_INT (one_var);
12582 /* Load constant part of vector, substitute neighboring value for
12583 varying element. */
12584 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12585 neon_expand_vector_init (target, copy);
12587 /* Insert variable. */
12588 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12589 switch (mode)
12591 case V8QImode:
12592 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12593 break;
12594 case V16QImode:
12595 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12596 break;
12597 case V4HImode:
12598 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12599 break;
12600 case V8HImode:
12601 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12602 break;
12603 case V2SImode:
12604 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12605 break;
12606 case V4SImode:
12607 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12608 break;
12609 case V2SFmode:
12610 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12611 break;
12612 case V4SFmode:
12613 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12614 break;
12615 case V2DImode:
12616 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12617 break;
12618 default:
12619 gcc_unreachable ();
12621 return;
12624 /* Construct the vector in memory one field at a time
12625 and load the whole vector. */
12626 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12627 for (i = 0; i < n_elts; i++)
12628 emit_move_insn (adjust_address_nv (mem, inner_mode,
12629 i * GET_MODE_SIZE (inner_mode)),
12630 XVECEXP (vals, 0, i));
12631 emit_move_insn (target, mem);
12634 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12635 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12636 reported source locations are bogus. */
12638 static void
12639 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12640 const char *err)
12642 HOST_WIDE_INT lane;
12644 gcc_assert (CONST_INT_P (operand));
12646 lane = INTVAL (operand);
12648 if (lane < low || lane >= high)
12649 error (err);
12652 /* Bounds-check lanes. */
12654 void
12655 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12657 bounds_check (operand, low, high, "lane out of range");
12660 /* Bounds-check constants. */
12662 void
12663 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12665 bounds_check (operand, low, high, "constant out of range");
12668 HOST_WIDE_INT
12669 neon_element_bits (enum machine_mode mode)
12671 if (mode == DImode)
12672 return GET_MODE_BITSIZE (mode);
12673 else
12674 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12678 /* Predicates for `match_operand' and `match_operator'. */
12680 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12681 WB is true if full writeback address modes are allowed and is false
12682 if limited writeback address modes (POST_INC and PRE_DEC) are
12683 allowed. */
12686 arm_coproc_mem_operand (rtx op, bool wb)
12688 rtx ind;
12690 /* Reject eliminable registers. */
12691 if (! (reload_in_progress || reload_completed || lra_in_progress)
12692 && ( reg_mentioned_p (frame_pointer_rtx, op)
12693 || reg_mentioned_p (arg_pointer_rtx, op)
12694 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12695 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12696 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12697 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12698 return FALSE;
12700 /* Constants are converted into offsets from labels. */
12701 if (!MEM_P (op))
12702 return FALSE;
12704 ind = XEXP (op, 0);
12706 if (reload_completed
12707 && (GET_CODE (ind) == LABEL_REF
12708 || (GET_CODE (ind) == CONST
12709 && GET_CODE (XEXP (ind, 0)) == PLUS
12710 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12711 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12712 return TRUE;
12714 /* Match: (mem (reg)). */
12715 if (REG_P (ind))
12716 return arm_address_register_rtx_p (ind, 0);
12718 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12719 acceptable in any case (subject to verification by
12720 arm_address_register_rtx_p). We need WB to be true to accept
12721 PRE_INC and POST_DEC. */
12722 if (GET_CODE (ind) == POST_INC
12723 || GET_CODE (ind) == PRE_DEC
12724 || (wb
12725 && (GET_CODE (ind) == PRE_INC
12726 || GET_CODE (ind) == POST_DEC)))
12727 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12729 if (wb
12730 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12731 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12732 && GET_CODE (XEXP (ind, 1)) == PLUS
12733 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12734 ind = XEXP (ind, 1);
12736 /* Match:
12737 (plus (reg)
12738 (const)). */
12739 if (GET_CODE (ind) == PLUS
12740 && REG_P (XEXP (ind, 0))
12741 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12742 && CONST_INT_P (XEXP (ind, 1))
12743 && INTVAL (XEXP (ind, 1)) > -1024
12744 && INTVAL (XEXP (ind, 1)) < 1024
12745 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12746 return TRUE;
12748 return FALSE;
12751 /* Return TRUE if OP is a memory operand which we can load or store a vector
12752 to/from. TYPE is one of the following values:
12753 0 - Vector load/stor (vldr)
12754 1 - Core registers (ldm)
12755 2 - Element/structure loads (vld1)
12758 neon_vector_mem_operand (rtx op, int type, bool strict)
12760 rtx ind;
12762 /* Reject eliminable registers. */
12763 if (! (reload_in_progress || reload_completed)
12764 && ( reg_mentioned_p (frame_pointer_rtx, op)
12765 || reg_mentioned_p (arg_pointer_rtx, op)
12766 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12767 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12768 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12769 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12770 return !strict;
12772 /* Constants are converted into offsets from labels. */
12773 if (!MEM_P (op))
12774 return FALSE;
12776 ind = XEXP (op, 0);
12778 if (reload_completed
12779 && (GET_CODE (ind) == LABEL_REF
12780 || (GET_CODE (ind) == CONST
12781 && GET_CODE (XEXP (ind, 0)) == PLUS
12782 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12783 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12784 return TRUE;
12786 /* Match: (mem (reg)). */
12787 if (REG_P (ind))
12788 return arm_address_register_rtx_p (ind, 0);
12790 /* Allow post-increment with Neon registers. */
12791 if ((type != 1 && GET_CODE (ind) == POST_INC)
12792 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12793 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12795 /* Allow post-increment by register for VLDn */
12796 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12797 && GET_CODE (XEXP (ind, 1)) == PLUS
12798 && REG_P (XEXP (XEXP (ind, 1), 1)))
12799 return true;
12801 /* Match:
12802 (plus (reg)
12803 (const)). */
12804 if (type == 0
12805 && GET_CODE (ind) == PLUS
12806 && REG_P (XEXP (ind, 0))
12807 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12808 && CONST_INT_P (XEXP (ind, 1))
12809 && INTVAL (XEXP (ind, 1)) > -1024
12810 /* For quad modes, we restrict the constant offset to be slightly less
12811 than what the instruction format permits. We have no such constraint
12812 on double mode offsets. (This must match arm_legitimate_index_p.) */
12813 && (INTVAL (XEXP (ind, 1))
12814 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12815 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12816 return TRUE;
12818 return FALSE;
12821 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12822 type. */
12824 neon_struct_mem_operand (rtx op)
12826 rtx ind;
12828 /* Reject eliminable registers. */
12829 if (! (reload_in_progress || reload_completed)
12830 && ( reg_mentioned_p (frame_pointer_rtx, op)
12831 || reg_mentioned_p (arg_pointer_rtx, op)
12832 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12833 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12834 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12835 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12836 return FALSE;
12838 /* Constants are converted into offsets from labels. */
12839 if (!MEM_P (op))
12840 return FALSE;
12842 ind = XEXP (op, 0);
12844 if (reload_completed
12845 && (GET_CODE (ind) == LABEL_REF
12846 || (GET_CODE (ind) == CONST
12847 && GET_CODE (XEXP (ind, 0)) == PLUS
12848 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12849 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12850 return TRUE;
12852 /* Match: (mem (reg)). */
12853 if (REG_P (ind))
12854 return arm_address_register_rtx_p (ind, 0);
12856 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12857 if (GET_CODE (ind) == POST_INC
12858 || GET_CODE (ind) == PRE_DEC)
12859 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12861 return FALSE;
12864 /* Return true if X is a register that will be eliminated later on. */
12866 arm_eliminable_register (rtx x)
12868 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12869 || REGNO (x) == ARG_POINTER_REGNUM
12870 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12871 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12874 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12875 coprocessor registers. Otherwise return NO_REGS. */
12877 enum reg_class
12878 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12880 if (mode == HFmode)
12882 if (!TARGET_NEON_FP16)
12883 return GENERAL_REGS;
12884 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12885 return NO_REGS;
12886 return GENERAL_REGS;
12889 /* The neon move patterns handle all legitimate vector and struct
12890 addresses. */
12891 if (TARGET_NEON
12892 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12893 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12894 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12895 || VALID_NEON_STRUCT_MODE (mode)))
12896 return NO_REGS;
12898 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12899 return NO_REGS;
12901 return GENERAL_REGS;
12904 /* Values which must be returned in the most-significant end of the return
12905 register. */
12907 static bool
12908 arm_return_in_msb (const_tree valtype)
12910 return (TARGET_AAPCS_BASED
12911 && BYTES_BIG_ENDIAN
12912 && (AGGREGATE_TYPE_P (valtype)
12913 || TREE_CODE (valtype) == COMPLEX_TYPE
12914 || FIXED_POINT_TYPE_P (valtype)));
12917 /* Return TRUE if X references a SYMBOL_REF. */
12919 symbol_mentioned_p (rtx x)
12921 const char * fmt;
12922 int i;
12924 if (GET_CODE (x) == SYMBOL_REF)
12925 return 1;
12927 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12928 are constant offsets, not symbols. */
12929 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12930 return 0;
12932 fmt = GET_RTX_FORMAT (GET_CODE (x));
12934 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12936 if (fmt[i] == 'E')
12938 int j;
12940 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12941 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12942 return 1;
12944 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12945 return 1;
12948 return 0;
12951 /* Return TRUE if X references a LABEL_REF. */
12953 label_mentioned_p (rtx x)
12955 const char * fmt;
12956 int i;
12958 if (GET_CODE (x) == LABEL_REF)
12959 return 1;
12961 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12962 instruction, but they are constant offsets, not symbols. */
12963 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12964 return 0;
12966 fmt = GET_RTX_FORMAT (GET_CODE (x));
12967 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12969 if (fmt[i] == 'E')
12971 int j;
12973 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12974 if (label_mentioned_p (XVECEXP (x, i, j)))
12975 return 1;
12977 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12978 return 1;
12981 return 0;
12985 tls_mentioned_p (rtx x)
12987 switch (GET_CODE (x))
12989 case CONST:
12990 return tls_mentioned_p (XEXP (x, 0));
12992 case UNSPEC:
12993 if (XINT (x, 1) == UNSPEC_TLS)
12994 return 1;
12996 default:
12997 return 0;
13001 /* Must not copy any rtx that uses a pc-relative address. */
13003 static int
13004 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13006 if (GET_CODE (*x) == UNSPEC
13007 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13008 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13009 return 1;
13010 return 0;
13013 static bool
13014 arm_cannot_copy_insn_p (rtx insn)
13016 /* The tls call insn cannot be copied, as it is paired with a data
13017 word. */
13018 if (recog_memoized (insn) == CODE_FOR_tlscall)
13019 return true;
13021 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13024 enum rtx_code
13025 minmax_code (rtx x)
13027 enum rtx_code code = GET_CODE (x);
13029 switch (code)
13031 case SMAX:
13032 return GE;
13033 case SMIN:
13034 return LE;
13035 case UMIN:
13036 return LEU;
13037 case UMAX:
13038 return GEU;
13039 default:
13040 gcc_unreachable ();
13044 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13046 bool
13047 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13048 int *mask, bool *signed_sat)
13050 /* The high bound must be a power of two minus one. */
13051 int log = exact_log2 (INTVAL (hi_bound) + 1);
13052 if (log == -1)
13053 return false;
13055 /* The low bound is either zero (for usat) or one less than the
13056 negation of the high bound (for ssat). */
13057 if (INTVAL (lo_bound) == 0)
13059 if (mask)
13060 *mask = log;
13061 if (signed_sat)
13062 *signed_sat = false;
13064 return true;
13067 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13069 if (mask)
13070 *mask = log + 1;
13071 if (signed_sat)
13072 *signed_sat = true;
13074 return true;
13077 return false;
13080 /* Return 1 if memory locations are adjacent. */
13082 adjacent_mem_locations (rtx a, rtx b)
13084 /* We don't guarantee to preserve the order of these memory refs. */
13085 if (volatile_refs_p (a) || volatile_refs_p (b))
13086 return 0;
13088 if ((REG_P (XEXP (a, 0))
13089 || (GET_CODE (XEXP (a, 0)) == PLUS
13090 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13091 && (REG_P (XEXP (b, 0))
13092 || (GET_CODE (XEXP (b, 0)) == PLUS
13093 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13095 HOST_WIDE_INT val0 = 0, val1 = 0;
13096 rtx reg0, reg1;
13097 int val_diff;
13099 if (GET_CODE (XEXP (a, 0)) == PLUS)
13101 reg0 = XEXP (XEXP (a, 0), 0);
13102 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13104 else
13105 reg0 = XEXP (a, 0);
13107 if (GET_CODE (XEXP (b, 0)) == PLUS)
13109 reg1 = XEXP (XEXP (b, 0), 0);
13110 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13112 else
13113 reg1 = XEXP (b, 0);
13115 /* Don't accept any offset that will require multiple
13116 instructions to handle, since this would cause the
13117 arith_adjacentmem pattern to output an overlong sequence. */
13118 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13119 return 0;
13121 /* Don't allow an eliminable register: register elimination can make
13122 the offset too large. */
13123 if (arm_eliminable_register (reg0))
13124 return 0;
13126 val_diff = val1 - val0;
13128 if (arm_ld_sched)
13130 /* If the target has load delay slots, then there's no benefit
13131 to using an ldm instruction unless the offset is zero and
13132 we are optimizing for size. */
13133 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13134 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13135 && (val_diff == 4 || val_diff == -4));
13138 return ((REGNO (reg0) == REGNO (reg1))
13139 && (val_diff == 4 || val_diff == -4));
13142 return 0;
13145 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13146 for load operations, false for store operations. CONSECUTIVE is true
13147 if the register numbers in the operation must be consecutive in the register
13148 bank. RETURN_PC is true if value is to be loaded in PC.
13149 The pattern we are trying to match for load is:
13150 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13151 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13154 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13156 where
13157 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13158 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13159 3. If consecutive is TRUE, then for kth register being loaded,
13160 REGNO (R_dk) = REGNO (R_d0) + k.
13161 The pattern for store is similar. */
13162 bool
13163 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13164 bool consecutive, bool return_pc)
13166 HOST_WIDE_INT count = XVECLEN (op, 0);
13167 rtx reg, mem, addr;
13168 unsigned regno;
13169 unsigned first_regno;
13170 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13171 rtx elt;
13172 bool addr_reg_in_reglist = false;
13173 bool update = false;
13174 int reg_increment;
13175 int offset_adj;
13176 int regs_per_val;
13178 /* If not in SImode, then registers must be consecutive
13179 (e.g., VLDM instructions for DFmode). */
13180 gcc_assert ((mode == SImode) || consecutive);
13181 /* Setting return_pc for stores is illegal. */
13182 gcc_assert (!return_pc || load);
13184 /* Set up the increments and the regs per val based on the mode. */
13185 reg_increment = GET_MODE_SIZE (mode);
13186 regs_per_val = reg_increment / 4;
13187 offset_adj = return_pc ? 1 : 0;
13189 if (count <= 1
13190 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13191 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13192 return false;
13194 /* Check if this is a write-back. */
13195 elt = XVECEXP (op, 0, offset_adj);
13196 if (GET_CODE (SET_SRC (elt)) == PLUS)
13198 i++;
13199 base = 1;
13200 update = true;
13202 /* The offset adjustment must be the number of registers being
13203 popped times the size of a single register. */
13204 if (!REG_P (SET_DEST (elt))
13205 || !REG_P (XEXP (SET_SRC (elt), 0))
13206 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13207 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13208 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13209 ((count - 1 - offset_adj) * reg_increment))
13210 return false;
13213 i = i + offset_adj;
13214 base = base + offset_adj;
13215 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13216 success depends on the type: VLDM can do just one reg,
13217 LDM must do at least two. */
13218 if ((count <= i) && (mode == SImode))
13219 return false;
13221 elt = XVECEXP (op, 0, i - 1);
13222 if (GET_CODE (elt) != SET)
13223 return false;
13225 if (load)
13227 reg = SET_DEST (elt);
13228 mem = SET_SRC (elt);
13230 else
13232 reg = SET_SRC (elt);
13233 mem = SET_DEST (elt);
13236 if (!REG_P (reg) || !MEM_P (mem))
13237 return false;
13239 regno = REGNO (reg);
13240 first_regno = regno;
13241 addr = XEXP (mem, 0);
13242 if (GET_CODE (addr) == PLUS)
13244 if (!CONST_INT_P (XEXP (addr, 1)))
13245 return false;
13247 offset = INTVAL (XEXP (addr, 1));
13248 addr = XEXP (addr, 0);
13251 if (!REG_P (addr))
13252 return false;
13254 /* Don't allow SP to be loaded unless it is also the base register. It
13255 guarantees that SP is reset correctly when an LDM instruction
13256 is interrupted. Otherwise, we might end up with a corrupt stack. */
13257 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13258 return false;
13260 for (; i < count; i++)
13262 elt = XVECEXP (op, 0, i);
13263 if (GET_CODE (elt) != SET)
13264 return false;
13266 if (load)
13268 reg = SET_DEST (elt);
13269 mem = SET_SRC (elt);
13271 else
13273 reg = SET_SRC (elt);
13274 mem = SET_DEST (elt);
13277 if (!REG_P (reg)
13278 || GET_MODE (reg) != mode
13279 || REGNO (reg) <= regno
13280 || (consecutive
13281 && (REGNO (reg) !=
13282 (unsigned int) (first_regno + regs_per_val * (i - base))))
13283 /* Don't allow SP to be loaded unless it is also the base register. It
13284 guarantees that SP is reset correctly when an LDM instruction
13285 is interrupted. Otherwise, we might end up with a corrupt stack. */
13286 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13287 || !MEM_P (mem)
13288 || GET_MODE (mem) != mode
13289 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13290 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13291 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13292 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13293 offset + (i - base) * reg_increment))
13294 && (!REG_P (XEXP (mem, 0))
13295 || offset + (i - base) * reg_increment != 0)))
13296 return false;
13298 regno = REGNO (reg);
13299 if (regno == REGNO (addr))
13300 addr_reg_in_reglist = true;
13303 if (load)
13305 if (update && addr_reg_in_reglist)
13306 return false;
13308 /* For Thumb-1, address register is always modified - either by write-back
13309 or by explicit load. If the pattern does not describe an update,
13310 then the address register must be in the list of loaded registers. */
13311 if (TARGET_THUMB1)
13312 return update || addr_reg_in_reglist;
13315 return true;
13318 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13319 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13320 instruction. ADD_OFFSET is nonzero if the base address register needs
13321 to be modified with an add instruction before we can use it. */
13323 static bool
13324 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13325 int nops, HOST_WIDE_INT add_offset)
13327 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13328 if the offset isn't small enough. The reason 2 ldrs are faster
13329 is because these ARMs are able to do more than one cache access
13330 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13331 whilst the ARM8 has a double bandwidth cache. This means that
13332 these cores can do both an instruction fetch and a data fetch in
13333 a single cycle, so the trick of calculating the address into a
13334 scratch register (one of the result regs) and then doing a load
13335 multiple actually becomes slower (and no smaller in code size).
13336 That is the transformation
13338 ldr rd1, [rbase + offset]
13339 ldr rd2, [rbase + offset + 4]
13343 add rd1, rbase, offset
13344 ldmia rd1, {rd1, rd2}
13346 produces worse code -- '3 cycles + any stalls on rd2' instead of
13347 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13348 access per cycle, the first sequence could never complete in less
13349 than 6 cycles, whereas the ldm sequence would only take 5 and
13350 would make better use of sequential accesses if not hitting the
13351 cache.
13353 We cheat here and test 'arm_ld_sched' which we currently know to
13354 only be true for the ARM8, ARM9 and StrongARM. If this ever
13355 changes, then the test below needs to be reworked. */
13356 if (nops == 2 && arm_ld_sched && add_offset != 0)
13357 return false;
13359 /* XScale has load-store double instructions, but they have stricter
13360 alignment requirements than load-store multiple, so we cannot
13361 use them.
13363 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13364 the pipeline until completion.
13366 NREGS CYCLES
13372 An ldr instruction takes 1-3 cycles, but does not block the
13373 pipeline.
13375 NREGS CYCLES
13376 1 1-3
13377 2 2-6
13378 3 3-9
13379 4 4-12
13381 Best case ldr will always win. However, the more ldr instructions
13382 we issue, the less likely we are to be able to schedule them well.
13383 Using ldr instructions also increases code size.
13385 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13386 for counts of 3 or 4 regs. */
13387 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13388 return false;
13389 return true;
13392 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13393 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13394 an array ORDER which describes the sequence to use when accessing the
13395 offsets that produces an ascending order. In this sequence, each
13396 offset must be larger by exactly 4 than the previous one. ORDER[0]
13397 must have been filled in with the lowest offset by the caller.
13398 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13399 we use to verify that ORDER produces an ascending order of registers.
13400 Return true if it was possible to construct such an order, false if
13401 not. */
13403 static bool
13404 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13405 int *unsorted_regs)
13407 int i;
13408 for (i = 1; i < nops; i++)
13410 int j;
13412 order[i] = order[i - 1];
13413 for (j = 0; j < nops; j++)
13414 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13416 /* We must find exactly one offset that is higher than the
13417 previous one by 4. */
13418 if (order[i] != order[i - 1])
13419 return false;
13420 order[i] = j;
13422 if (order[i] == order[i - 1])
13423 return false;
13424 /* The register numbers must be ascending. */
13425 if (unsorted_regs != NULL
13426 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13427 return false;
13429 return true;
13432 /* Used to determine in a peephole whether a sequence of load
13433 instructions can be changed into a load-multiple instruction.
13434 NOPS is the number of separate load instructions we are examining. The
13435 first NOPS entries in OPERANDS are the destination registers, the
13436 next NOPS entries are memory operands. If this function is
13437 successful, *BASE is set to the common base register of the memory
13438 accesses; *LOAD_OFFSET is set to the first memory location's offset
13439 from that base register.
13440 REGS is an array filled in with the destination register numbers.
13441 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13442 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13443 the sequence of registers in REGS matches the loads from ascending memory
13444 locations, and the function verifies that the register numbers are
13445 themselves ascending. If CHECK_REGS is false, the register numbers
13446 are stored in the order they are found in the operands. */
13447 static int
13448 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13449 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13451 int unsorted_regs[MAX_LDM_STM_OPS];
13452 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13453 int order[MAX_LDM_STM_OPS];
13454 rtx base_reg_rtx = NULL;
13455 int base_reg = -1;
13456 int i, ldm_case;
13458 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13459 easily extended if required. */
13460 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13462 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13464 /* Loop over the operands and check that the memory references are
13465 suitable (i.e. immediate offsets from the same base register). At
13466 the same time, extract the target register, and the memory
13467 offsets. */
13468 for (i = 0; i < nops; i++)
13470 rtx reg;
13471 rtx offset;
13473 /* Convert a subreg of a mem into the mem itself. */
13474 if (GET_CODE (operands[nops + i]) == SUBREG)
13475 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13477 gcc_assert (MEM_P (operands[nops + i]));
13479 /* Don't reorder volatile memory references; it doesn't seem worth
13480 looking for the case where the order is ok anyway. */
13481 if (MEM_VOLATILE_P (operands[nops + i]))
13482 return 0;
13484 offset = const0_rtx;
13486 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13487 || (GET_CODE (reg) == SUBREG
13488 && REG_P (reg = SUBREG_REG (reg))))
13489 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13490 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13491 || (GET_CODE (reg) == SUBREG
13492 && REG_P (reg = SUBREG_REG (reg))))
13493 && (CONST_INT_P (offset
13494 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13496 if (i == 0)
13498 base_reg = REGNO (reg);
13499 base_reg_rtx = reg;
13500 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13501 return 0;
13503 else if (base_reg != (int) REGNO (reg))
13504 /* Not addressed from the same base register. */
13505 return 0;
13507 unsorted_regs[i] = (REG_P (operands[i])
13508 ? REGNO (operands[i])
13509 : REGNO (SUBREG_REG (operands[i])));
13511 /* If it isn't an integer register, or if it overwrites the
13512 base register but isn't the last insn in the list, then
13513 we can't do this. */
13514 if (unsorted_regs[i] < 0
13515 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13516 || unsorted_regs[i] > 14
13517 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13518 return 0;
13520 /* Don't allow SP to be loaded unless it is also the base
13521 register. It guarantees that SP is reset correctly when
13522 an LDM instruction is interrupted. Otherwise, we might
13523 end up with a corrupt stack. */
13524 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13525 return 0;
13527 unsorted_offsets[i] = INTVAL (offset);
13528 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13529 order[0] = i;
13531 else
13532 /* Not a suitable memory address. */
13533 return 0;
13536 /* All the useful information has now been extracted from the
13537 operands into unsorted_regs and unsorted_offsets; additionally,
13538 order[0] has been set to the lowest offset in the list. Sort
13539 the offsets into order, verifying that they are adjacent, and
13540 check that the register numbers are ascending. */
13541 if (!compute_offset_order (nops, unsorted_offsets, order,
13542 check_regs ? unsorted_regs : NULL))
13543 return 0;
13545 if (saved_order)
13546 memcpy (saved_order, order, sizeof order);
13548 if (base)
13550 *base = base_reg;
13552 for (i = 0; i < nops; i++)
13553 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13555 *load_offset = unsorted_offsets[order[0]];
13558 if (TARGET_THUMB1
13559 && !peep2_reg_dead_p (nops, base_reg_rtx))
13560 return 0;
13562 if (unsorted_offsets[order[0]] == 0)
13563 ldm_case = 1; /* ldmia */
13564 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13565 ldm_case = 2; /* ldmib */
13566 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13567 ldm_case = 3; /* ldmda */
13568 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13569 ldm_case = 4; /* ldmdb */
13570 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13571 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13572 ldm_case = 5;
13573 else
13574 return 0;
13576 if (!multiple_operation_profitable_p (false, nops,
13577 ldm_case == 5
13578 ? unsorted_offsets[order[0]] : 0))
13579 return 0;
13581 return ldm_case;
13584 /* Used to determine in a peephole whether a sequence of store instructions can
13585 be changed into a store-multiple instruction.
13586 NOPS is the number of separate store instructions we are examining.
13587 NOPS_TOTAL is the total number of instructions recognized by the peephole
13588 pattern.
13589 The first NOPS entries in OPERANDS are the source registers, the next
13590 NOPS entries are memory operands. If this function is successful, *BASE is
13591 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13592 to the first memory location's offset from that base register. REGS is an
13593 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13594 likewise filled with the corresponding rtx's.
13595 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13596 numbers to an ascending order of stores.
13597 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13598 from ascending memory locations, and the function verifies that the register
13599 numbers are themselves ascending. If CHECK_REGS is false, the register
13600 numbers are stored in the order they are found in the operands. */
13601 static int
13602 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13603 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13604 HOST_WIDE_INT *load_offset, bool check_regs)
13606 int unsorted_regs[MAX_LDM_STM_OPS];
13607 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13608 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13609 int order[MAX_LDM_STM_OPS];
13610 int base_reg = -1;
13611 rtx base_reg_rtx = NULL;
13612 int i, stm_case;
13614 /* Write back of base register is currently only supported for Thumb 1. */
13615 int base_writeback = TARGET_THUMB1;
13617 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13618 easily extended if required. */
13619 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13621 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13623 /* Loop over the operands and check that the memory references are
13624 suitable (i.e. immediate offsets from the same base register). At
13625 the same time, extract the target register, and the memory
13626 offsets. */
13627 for (i = 0; i < nops; i++)
13629 rtx reg;
13630 rtx offset;
13632 /* Convert a subreg of a mem into the mem itself. */
13633 if (GET_CODE (operands[nops + i]) == SUBREG)
13634 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13636 gcc_assert (MEM_P (operands[nops + i]));
13638 /* Don't reorder volatile memory references; it doesn't seem worth
13639 looking for the case where the order is ok anyway. */
13640 if (MEM_VOLATILE_P (operands[nops + i]))
13641 return 0;
13643 offset = const0_rtx;
13645 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13646 || (GET_CODE (reg) == SUBREG
13647 && REG_P (reg = SUBREG_REG (reg))))
13648 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13649 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13650 || (GET_CODE (reg) == SUBREG
13651 && REG_P (reg = SUBREG_REG (reg))))
13652 && (CONST_INT_P (offset
13653 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13655 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13656 ? operands[i] : SUBREG_REG (operands[i]));
13657 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13659 if (i == 0)
13661 base_reg = REGNO (reg);
13662 base_reg_rtx = reg;
13663 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13664 return 0;
13666 else if (base_reg != (int) REGNO (reg))
13667 /* Not addressed from the same base register. */
13668 return 0;
13670 /* If it isn't an integer register, then we can't do this. */
13671 if (unsorted_regs[i] < 0
13672 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13673 /* The effects are unpredictable if the base register is
13674 both updated and stored. */
13675 || (base_writeback && unsorted_regs[i] == base_reg)
13676 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13677 || unsorted_regs[i] > 14)
13678 return 0;
13680 unsorted_offsets[i] = INTVAL (offset);
13681 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13682 order[0] = i;
13684 else
13685 /* Not a suitable memory address. */
13686 return 0;
13689 /* All the useful information has now been extracted from the
13690 operands into unsorted_regs and unsorted_offsets; additionally,
13691 order[0] has been set to the lowest offset in the list. Sort
13692 the offsets into order, verifying that they are adjacent, and
13693 check that the register numbers are ascending. */
13694 if (!compute_offset_order (nops, unsorted_offsets, order,
13695 check_regs ? unsorted_regs : NULL))
13696 return 0;
13698 if (saved_order)
13699 memcpy (saved_order, order, sizeof order);
13701 if (base)
13703 *base = base_reg;
13705 for (i = 0; i < nops; i++)
13707 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13708 if (reg_rtxs)
13709 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13712 *load_offset = unsorted_offsets[order[0]];
13715 if (TARGET_THUMB1
13716 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13717 return 0;
13719 if (unsorted_offsets[order[0]] == 0)
13720 stm_case = 1; /* stmia */
13721 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13722 stm_case = 2; /* stmib */
13723 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13724 stm_case = 3; /* stmda */
13725 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13726 stm_case = 4; /* stmdb */
13727 else
13728 return 0;
13730 if (!multiple_operation_profitable_p (false, nops, 0))
13731 return 0;
13733 return stm_case;
13736 /* Routines for use in generating RTL. */
13738 /* Generate a load-multiple instruction. COUNT is the number of loads in
13739 the instruction; REGS and MEMS are arrays containing the operands.
13740 BASEREG is the base register to be used in addressing the memory operands.
13741 WBACK_OFFSET is nonzero if the instruction should update the base
13742 register. */
13744 static rtx
13745 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13746 HOST_WIDE_INT wback_offset)
13748 int i = 0, j;
13749 rtx result;
13751 if (!multiple_operation_profitable_p (false, count, 0))
13753 rtx seq;
13755 start_sequence ();
13757 for (i = 0; i < count; i++)
13758 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13760 if (wback_offset != 0)
13761 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13763 seq = get_insns ();
13764 end_sequence ();
13766 return seq;
13769 result = gen_rtx_PARALLEL (VOIDmode,
13770 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13771 if (wback_offset != 0)
13773 XVECEXP (result, 0, 0)
13774 = gen_rtx_SET (VOIDmode, basereg,
13775 plus_constant (Pmode, basereg, wback_offset));
13776 i = 1;
13777 count++;
13780 for (j = 0; i < count; i++, j++)
13781 XVECEXP (result, 0, i)
13782 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13784 return result;
13787 /* Generate a store-multiple instruction. COUNT is the number of stores in
13788 the instruction; REGS and MEMS are arrays containing the operands.
13789 BASEREG is the base register to be used in addressing the memory operands.
13790 WBACK_OFFSET is nonzero if the instruction should update the base
13791 register. */
13793 static rtx
13794 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13795 HOST_WIDE_INT wback_offset)
13797 int i = 0, j;
13798 rtx result;
13800 if (GET_CODE (basereg) == PLUS)
13801 basereg = XEXP (basereg, 0);
13803 if (!multiple_operation_profitable_p (false, count, 0))
13805 rtx seq;
13807 start_sequence ();
13809 for (i = 0; i < count; i++)
13810 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13812 if (wback_offset != 0)
13813 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13815 seq = get_insns ();
13816 end_sequence ();
13818 return seq;
13821 result = gen_rtx_PARALLEL (VOIDmode,
13822 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13823 if (wback_offset != 0)
13825 XVECEXP (result, 0, 0)
13826 = gen_rtx_SET (VOIDmode, basereg,
13827 plus_constant (Pmode, basereg, wback_offset));
13828 i = 1;
13829 count++;
13832 for (j = 0; i < count; i++, j++)
13833 XVECEXP (result, 0, i)
13834 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13836 return result;
13839 /* Generate either a load-multiple or a store-multiple instruction. This
13840 function can be used in situations where we can start with a single MEM
13841 rtx and adjust its address upwards.
13842 COUNT is the number of operations in the instruction, not counting a
13843 possible update of the base register. REGS is an array containing the
13844 register operands.
13845 BASEREG is the base register to be used in addressing the memory operands,
13846 which are constructed from BASEMEM.
13847 WRITE_BACK specifies whether the generated instruction should include an
13848 update of the base register.
13849 OFFSETP is used to pass an offset to and from this function; this offset
13850 is not used when constructing the address (instead BASEMEM should have an
13851 appropriate offset in its address), it is used only for setting
13852 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13854 static rtx
13855 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13856 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13858 rtx mems[MAX_LDM_STM_OPS];
13859 HOST_WIDE_INT offset = *offsetp;
13860 int i;
13862 gcc_assert (count <= MAX_LDM_STM_OPS);
13864 if (GET_CODE (basereg) == PLUS)
13865 basereg = XEXP (basereg, 0);
13867 for (i = 0; i < count; i++)
13869 rtx addr = plus_constant (Pmode, basereg, i * 4);
13870 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13871 offset += 4;
13874 if (write_back)
13875 *offsetp = offset;
13877 if (is_load)
13878 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13879 write_back ? 4 * count : 0);
13880 else
13881 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13882 write_back ? 4 * count : 0);
13886 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13887 rtx basemem, HOST_WIDE_INT *offsetp)
13889 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13890 offsetp);
13894 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13895 rtx basemem, HOST_WIDE_INT *offsetp)
13897 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13898 offsetp);
13901 /* Called from a peephole2 expander to turn a sequence of loads into an
13902 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13903 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13904 is true if we can reorder the registers because they are used commutatively
13905 subsequently.
13906 Returns true iff we could generate a new instruction. */
13908 bool
13909 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13911 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13912 rtx mems[MAX_LDM_STM_OPS];
13913 int i, j, base_reg;
13914 rtx base_reg_rtx;
13915 HOST_WIDE_INT offset;
13916 int write_back = FALSE;
13917 int ldm_case;
13918 rtx addr;
13920 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13921 &base_reg, &offset, !sort_regs);
13923 if (ldm_case == 0)
13924 return false;
13926 if (sort_regs)
13927 for (i = 0; i < nops - 1; i++)
13928 for (j = i + 1; j < nops; j++)
13929 if (regs[i] > regs[j])
13931 int t = regs[i];
13932 regs[i] = regs[j];
13933 regs[j] = t;
13935 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13937 if (TARGET_THUMB1)
13939 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13940 gcc_assert (ldm_case == 1 || ldm_case == 5);
13941 write_back = TRUE;
13944 if (ldm_case == 5)
13946 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13947 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13948 offset = 0;
13949 if (!TARGET_THUMB1)
13951 base_reg = regs[0];
13952 base_reg_rtx = newbase;
13956 for (i = 0; i < nops; i++)
13958 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13959 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13960 SImode, addr, 0);
13962 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13963 write_back ? offset + i * 4 : 0));
13964 return true;
13967 /* Called from a peephole2 expander to turn a sequence of stores into an
13968 STM instruction. OPERANDS are the operands found by the peephole matcher;
13969 NOPS indicates how many separate stores we are trying to combine.
13970 Returns true iff we could generate a new instruction. */
13972 bool
13973 gen_stm_seq (rtx *operands, int nops)
13975 int i;
13976 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13977 rtx mems[MAX_LDM_STM_OPS];
13978 int base_reg;
13979 rtx base_reg_rtx;
13980 HOST_WIDE_INT offset;
13981 int write_back = FALSE;
13982 int stm_case;
13983 rtx addr;
13984 bool base_reg_dies;
13986 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13987 mem_order, &base_reg, &offset, true);
13989 if (stm_case == 0)
13990 return false;
13992 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13994 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13995 if (TARGET_THUMB1)
13997 gcc_assert (base_reg_dies);
13998 write_back = TRUE;
14001 if (stm_case == 5)
14003 gcc_assert (base_reg_dies);
14004 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14005 offset = 0;
14008 addr = plus_constant (Pmode, base_reg_rtx, offset);
14010 for (i = 0; i < nops; i++)
14012 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14013 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14014 SImode, addr, 0);
14016 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14017 write_back ? offset + i * 4 : 0));
14018 return true;
14021 /* Called from a peephole2 expander to turn a sequence of stores that are
14022 preceded by constant loads into an STM instruction. OPERANDS are the
14023 operands found by the peephole matcher; NOPS indicates how many
14024 separate stores we are trying to combine; there are 2 * NOPS
14025 instructions in the peephole.
14026 Returns true iff we could generate a new instruction. */
14028 bool
14029 gen_const_stm_seq (rtx *operands, int nops)
14031 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14032 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14033 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14034 rtx mems[MAX_LDM_STM_OPS];
14035 int base_reg;
14036 rtx base_reg_rtx;
14037 HOST_WIDE_INT offset;
14038 int write_back = FALSE;
14039 int stm_case;
14040 rtx addr;
14041 bool base_reg_dies;
14042 int i, j;
14043 HARD_REG_SET allocated;
14045 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14046 mem_order, &base_reg, &offset, false);
14048 if (stm_case == 0)
14049 return false;
14051 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14053 /* If the same register is used more than once, try to find a free
14054 register. */
14055 CLEAR_HARD_REG_SET (allocated);
14056 for (i = 0; i < nops; i++)
14058 for (j = i + 1; j < nops; j++)
14059 if (regs[i] == regs[j])
14061 rtx t = peep2_find_free_register (0, nops * 2,
14062 TARGET_THUMB1 ? "l" : "r",
14063 SImode, &allocated);
14064 if (t == NULL_RTX)
14065 return false;
14066 reg_rtxs[i] = t;
14067 regs[i] = REGNO (t);
14071 /* Compute an ordering that maps the register numbers to an ascending
14072 sequence. */
14073 reg_order[0] = 0;
14074 for (i = 0; i < nops; i++)
14075 if (regs[i] < regs[reg_order[0]])
14076 reg_order[0] = i;
14078 for (i = 1; i < nops; i++)
14080 int this_order = reg_order[i - 1];
14081 for (j = 0; j < nops; j++)
14082 if (regs[j] > regs[reg_order[i - 1]]
14083 && (this_order == reg_order[i - 1]
14084 || regs[j] < regs[this_order]))
14085 this_order = j;
14086 reg_order[i] = this_order;
14089 /* Ensure that registers that must be live after the instruction end
14090 up with the correct value. */
14091 for (i = 0; i < nops; i++)
14093 int this_order = reg_order[i];
14094 if ((this_order != mem_order[i]
14095 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14096 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14097 return false;
14100 /* Load the constants. */
14101 for (i = 0; i < nops; i++)
14103 rtx op = operands[2 * nops + mem_order[i]];
14104 sorted_regs[i] = regs[reg_order[i]];
14105 emit_move_insn (reg_rtxs[reg_order[i]], op);
14108 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14110 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14111 if (TARGET_THUMB1)
14113 gcc_assert (base_reg_dies);
14114 write_back = TRUE;
14117 if (stm_case == 5)
14119 gcc_assert (base_reg_dies);
14120 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14121 offset = 0;
14124 addr = plus_constant (Pmode, base_reg_rtx, offset);
14126 for (i = 0; i < nops; i++)
14128 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14129 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14130 SImode, addr, 0);
14132 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14133 write_back ? offset + i * 4 : 0));
14134 return true;
14137 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14138 unaligned copies on processors which support unaligned semantics for those
14139 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14140 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14141 An interleave factor of 1 (the minimum) will perform no interleaving.
14142 Load/store multiple are used for aligned addresses where possible. */
14144 static void
14145 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14146 HOST_WIDE_INT length,
14147 unsigned int interleave_factor)
14149 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14150 int *regnos = XALLOCAVEC (int, interleave_factor);
14151 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14152 HOST_WIDE_INT i, j;
14153 HOST_WIDE_INT remaining = length, words;
14154 rtx halfword_tmp = NULL, byte_tmp = NULL;
14155 rtx dst, src;
14156 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14157 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14158 HOST_WIDE_INT srcoffset, dstoffset;
14159 HOST_WIDE_INT src_autoinc, dst_autoinc;
14160 rtx mem, addr;
14162 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14164 /* Use hard registers if we have aligned source or destination so we can use
14165 load/store multiple with contiguous registers. */
14166 if (dst_aligned || src_aligned)
14167 for (i = 0; i < interleave_factor; i++)
14168 regs[i] = gen_rtx_REG (SImode, i);
14169 else
14170 for (i = 0; i < interleave_factor; i++)
14171 regs[i] = gen_reg_rtx (SImode);
14173 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14174 src = copy_addr_to_reg (XEXP (srcbase, 0));
14176 srcoffset = dstoffset = 0;
14178 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14179 For copying the last bytes we want to subtract this offset again. */
14180 src_autoinc = dst_autoinc = 0;
14182 for (i = 0; i < interleave_factor; i++)
14183 regnos[i] = i;
14185 /* Copy BLOCK_SIZE_BYTES chunks. */
14187 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14189 /* Load words. */
14190 if (src_aligned && interleave_factor > 1)
14192 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14193 TRUE, srcbase, &srcoffset));
14194 src_autoinc += UNITS_PER_WORD * interleave_factor;
14196 else
14198 for (j = 0; j < interleave_factor; j++)
14200 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14201 - src_autoinc));
14202 mem = adjust_automodify_address (srcbase, SImode, addr,
14203 srcoffset + j * UNITS_PER_WORD);
14204 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14206 srcoffset += block_size_bytes;
14209 /* Store words. */
14210 if (dst_aligned && interleave_factor > 1)
14212 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14213 TRUE, dstbase, &dstoffset));
14214 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14216 else
14218 for (j = 0; j < interleave_factor; j++)
14220 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14221 - dst_autoinc));
14222 mem = adjust_automodify_address (dstbase, SImode, addr,
14223 dstoffset + j * UNITS_PER_WORD);
14224 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14226 dstoffset += block_size_bytes;
14229 remaining -= block_size_bytes;
14232 /* Copy any whole words left (note these aren't interleaved with any
14233 subsequent halfword/byte load/stores in the interests of simplicity). */
14235 words = remaining / UNITS_PER_WORD;
14237 gcc_assert (words < interleave_factor);
14239 if (src_aligned && words > 1)
14241 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14242 &srcoffset));
14243 src_autoinc += UNITS_PER_WORD * words;
14245 else
14247 for (j = 0; j < words; j++)
14249 addr = plus_constant (Pmode, src,
14250 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14251 mem = adjust_automodify_address (srcbase, SImode, addr,
14252 srcoffset + j * UNITS_PER_WORD);
14253 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14255 srcoffset += words * UNITS_PER_WORD;
14258 if (dst_aligned && words > 1)
14260 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14261 &dstoffset));
14262 dst_autoinc += words * UNITS_PER_WORD;
14264 else
14266 for (j = 0; j < words; j++)
14268 addr = plus_constant (Pmode, dst,
14269 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14270 mem = adjust_automodify_address (dstbase, SImode, addr,
14271 dstoffset + j * UNITS_PER_WORD);
14272 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14274 dstoffset += words * UNITS_PER_WORD;
14277 remaining -= words * UNITS_PER_WORD;
14279 gcc_assert (remaining < 4);
14281 /* Copy a halfword if necessary. */
14283 if (remaining >= 2)
14285 halfword_tmp = gen_reg_rtx (SImode);
14287 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14288 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14289 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14291 /* Either write out immediately, or delay until we've loaded the last
14292 byte, depending on interleave factor. */
14293 if (interleave_factor == 1)
14295 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14296 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14297 emit_insn (gen_unaligned_storehi (mem,
14298 gen_lowpart (HImode, halfword_tmp)));
14299 halfword_tmp = NULL;
14300 dstoffset += 2;
14303 remaining -= 2;
14304 srcoffset += 2;
14307 gcc_assert (remaining < 2);
14309 /* Copy last byte. */
14311 if ((remaining & 1) != 0)
14313 byte_tmp = gen_reg_rtx (SImode);
14315 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14316 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14317 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14319 if (interleave_factor == 1)
14321 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14322 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14323 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14324 byte_tmp = NULL;
14325 dstoffset++;
14328 remaining--;
14329 srcoffset++;
14332 /* Store last halfword if we haven't done so already. */
14334 if (halfword_tmp)
14336 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14337 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14338 emit_insn (gen_unaligned_storehi (mem,
14339 gen_lowpart (HImode, halfword_tmp)));
14340 dstoffset += 2;
14343 /* Likewise for last byte. */
14345 if (byte_tmp)
14347 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14348 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14349 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14350 dstoffset++;
14353 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14356 /* From mips_adjust_block_mem:
14358 Helper function for doing a loop-based block operation on memory
14359 reference MEM. Each iteration of the loop will operate on LENGTH
14360 bytes of MEM.
14362 Create a new base register for use within the loop and point it to
14363 the start of MEM. Create a new memory reference that uses this
14364 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14366 static void
14367 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14368 rtx *loop_mem)
14370 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14372 /* Although the new mem does not refer to a known location,
14373 it does keep up to LENGTH bytes of alignment. */
14374 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14375 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14378 /* From mips_block_move_loop:
14380 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14381 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14382 the memory regions do not overlap. */
14384 static void
14385 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14386 unsigned int interleave_factor,
14387 HOST_WIDE_INT bytes_per_iter)
14389 rtx label, src_reg, dest_reg, final_src, test;
14390 HOST_WIDE_INT leftover;
14392 leftover = length % bytes_per_iter;
14393 length -= leftover;
14395 /* Create registers and memory references for use within the loop. */
14396 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14397 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14399 /* Calculate the value that SRC_REG should have after the last iteration of
14400 the loop. */
14401 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14402 0, 0, OPTAB_WIDEN);
14404 /* Emit the start of the loop. */
14405 label = gen_label_rtx ();
14406 emit_label (label);
14408 /* Emit the loop body. */
14409 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14410 interleave_factor);
14412 /* Move on to the next block. */
14413 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14414 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14416 /* Emit the loop condition. */
14417 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14418 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14420 /* Mop up any left-over bytes. */
14421 if (leftover)
14422 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14425 /* Emit a block move when either the source or destination is unaligned (not
14426 aligned to a four-byte boundary). This may need further tuning depending on
14427 core type, optimize_size setting, etc. */
14429 static int
14430 arm_movmemqi_unaligned (rtx *operands)
14432 HOST_WIDE_INT length = INTVAL (operands[2]);
14434 if (optimize_size)
14436 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14437 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14438 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14439 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14440 or dst_aligned though: allow more interleaving in those cases since the
14441 resulting code can be smaller. */
14442 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14443 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14445 if (length > 12)
14446 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14447 interleave_factor, bytes_per_iter);
14448 else
14449 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14450 interleave_factor);
14452 else
14454 /* Note that the loop created by arm_block_move_unaligned_loop may be
14455 subject to loop unrolling, which makes tuning this condition a little
14456 redundant. */
14457 if (length > 32)
14458 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14459 else
14460 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14463 return 1;
14467 arm_gen_movmemqi (rtx *operands)
14469 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14470 HOST_WIDE_INT srcoffset, dstoffset;
14471 int i;
14472 rtx src, dst, srcbase, dstbase;
14473 rtx part_bytes_reg = NULL;
14474 rtx mem;
14476 if (!CONST_INT_P (operands[2])
14477 || !CONST_INT_P (operands[3])
14478 || INTVAL (operands[2]) > 64)
14479 return 0;
14481 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14482 return arm_movmemqi_unaligned (operands);
14484 if (INTVAL (operands[3]) & 3)
14485 return 0;
14487 dstbase = operands[0];
14488 srcbase = operands[1];
14490 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14491 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14493 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14494 out_words_to_go = INTVAL (operands[2]) / 4;
14495 last_bytes = INTVAL (operands[2]) & 3;
14496 dstoffset = srcoffset = 0;
14498 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14499 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14501 for (i = 0; in_words_to_go >= 2; i+=4)
14503 if (in_words_to_go > 4)
14504 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14505 TRUE, srcbase, &srcoffset));
14506 else
14507 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14508 src, FALSE, srcbase,
14509 &srcoffset));
14511 if (out_words_to_go)
14513 if (out_words_to_go > 4)
14514 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14515 TRUE, dstbase, &dstoffset));
14516 else if (out_words_to_go != 1)
14517 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14518 out_words_to_go, dst,
14519 (last_bytes == 0
14520 ? FALSE : TRUE),
14521 dstbase, &dstoffset));
14522 else
14524 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14525 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14526 if (last_bytes != 0)
14528 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14529 dstoffset += 4;
14534 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14535 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14538 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14539 if (out_words_to_go)
14541 rtx sreg;
14543 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14544 sreg = copy_to_reg (mem);
14546 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14547 emit_move_insn (mem, sreg);
14548 in_words_to_go--;
14550 gcc_assert (!in_words_to_go); /* Sanity check */
14553 if (in_words_to_go)
14555 gcc_assert (in_words_to_go > 0);
14557 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14558 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14561 gcc_assert (!last_bytes || part_bytes_reg);
14563 if (BYTES_BIG_ENDIAN && last_bytes)
14565 rtx tmp = gen_reg_rtx (SImode);
14567 /* The bytes we want are in the top end of the word. */
14568 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14569 GEN_INT (8 * (4 - last_bytes))));
14570 part_bytes_reg = tmp;
14572 while (last_bytes)
14574 mem = adjust_automodify_address (dstbase, QImode,
14575 plus_constant (Pmode, dst,
14576 last_bytes - 1),
14577 dstoffset + last_bytes - 1);
14578 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14580 if (--last_bytes)
14582 tmp = gen_reg_rtx (SImode);
14583 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14584 part_bytes_reg = tmp;
14589 else
14591 if (last_bytes > 1)
14593 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14594 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14595 last_bytes -= 2;
14596 if (last_bytes)
14598 rtx tmp = gen_reg_rtx (SImode);
14599 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14600 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14601 part_bytes_reg = tmp;
14602 dstoffset += 2;
14606 if (last_bytes)
14608 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14609 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14613 return 1;
14616 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14617 by mode size. */
14618 inline static rtx
14619 next_consecutive_mem (rtx mem)
14621 enum machine_mode mode = GET_MODE (mem);
14622 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14623 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14625 return adjust_automodify_address (mem, mode, addr, offset);
14628 /* Copy using LDRD/STRD instructions whenever possible.
14629 Returns true upon success. */
14630 bool
14631 gen_movmem_ldrd_strd (rtx *operands)
14633 unsigned HOST_WIDE_INT len;
14634 HOST_WIDE_INT align;
14635 rtx src, dst, base;
14636 rtx reg0;
14637 bool src_aligned, dst_aligned;
14638 bool src_volatile, dst_volatile;
14640 gcc_assert (CONST_INT_P (operands[2]));
14641 gcc_assert (CONST_INT_P (operands[3]));
14643 len = UINTVAL (operands[2]);
14644 if (len > 64)
14645 return false;
14647 /* Maximum alignment we can assume for both src and dst buffers. */
14648 align = INTVAL (operands[3]);
14650 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14651 return false;
14653 /* Place src and dst addresses in registers
14654 and update the corresponding mem rtx. */
14655 dst = operands[0];
14656 dst_volatile = MEM_VOLATILE_P (dst);
14657 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14658 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14659 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14661 src = operands[1];
14662 src_volatile = MEM_VOLATILE_P (src);
14663 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14664 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14665 src = adjust_automodify_address (src, VOIDmode, base, 0);
14667 if (!unaligned_access && !(src_aligned && dst_aligned))
14668 return false;
14670 if (src_volatile || dst_volatile)
14671 return false;
14673 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14674 if (!(dst_aligned || src_aligned))
14675 return arm_gen_movmemqi (operands);
14677 src = adjust_address (src, DImode, 0);
14678 dst = adjust_address (dst, DImode, 0);
14679 while (len >= 8)
14681 len -= 8;
14682 reg0 = gen_reg_rtx (DImode);
14683 if (src_aligned)
14684 emit_move_insn (reg0, src);
14685 else
14686 emit_insn (gen_unaligned_loaddi (reg0, src));
14688 if (dst_aligned)
14689 emit_move_insn (dst, reg0);
14690 else
14691 emit_insn (gen_unaligned_storedi (dst, reg0));
14693 src = next_consecutive_mem (src);
14694 dst = next_consecutive_mem (dst);
14697 gcc_assert (len < 8);
14698 if (len >= 4)
14700 /* More than a word but less than a double-word to copy. Copy a word. */
14701 reg0 = gen_reg_rtx (SImode);
14702 src = adjust_address (src, SImode, 0);
14703 dst = adjust_address (dst, SImode, 0);
14704 if (src_aligned)
14705 emit_move_insn (reg0, src);
14706 else
14707 emit_insn (gen_unaligned_loadsi (reg0, src));
14709 if (dst_aligned)
14710 emit_move_insn (dst, reg0);
14711 else
14712 emit_insn (gen_unaligned_storesi (dst, reg0));
14714 src = next_consecutive_mem (src);
14715 dst = next_consecutive_mem (dst);
14716 len -= 4;
14719 if (len == 0)
14720 return true;
14722 /* Copy the remaining bytes. */
14723 if (len >= 2)
14725 dst = adjust_address (dst, HImode, 0);
14726 src = adjust_address (src, HImode, 0);
14727 reg0 = gen_reg_rtx (SImode);
14728 if (src_aligned)
14729 emit_insn (gen_zero_extendhisi2 (reg0, src));
14730 else
14731 emit_insn (gen_unaligned_loadhiu (reg0, src));
14733 if (dst_aligned)
14734 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14735 else
14736 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14738 src = next_consecutive_mem (src);
14739 dst = next_consecutive_mem (dst);
14740 if (len == 2)
14741 return true;
14744 dst = adjust_address (dst, QImode, 0);
14745 src = adjust_address (src, QImode, 0);
14746 reg0 = gen_reg_rtx (QImode);
14747 emit_move_insn (reg0, src);
14748 emit_move_insn (dst, reg0);
14749 return true;
14752 /* Select a dominance comparison mode if possible for a test of the general
14753 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14754 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14755 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14756 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14757 In all cases OP will be either EQ or NE, but we don't need to know which
14758 here. If we are unable to support a dominance comparison we return
14759 CC mode. This will then fail to match for the RTL expressions that
14760 generate this call. */
14761 enum machine_mode
14762 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14764 enum rtx_code cond1, cond2;
14765 int swapped = 0;
14767 /* Currently we will probably get the wrong result if the individual
14768 comparisons are not simple. This also ensures that it is safe to
14769 reverse a comparison if necessary. */
14770 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14771 != CCmode)
14772 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14773 != CCmode))
14774 return CCmode;
14776 /* The if_then_else variant of this tests the second condition if the
14777 first passes, but is true if the first fails. Reverse the first
14778 condition to get a true "inclusive-or" expression. */
14779 if (cond_or == DOM_CC_NX_OR_Y)
14780 cond1 = reverse_condition (cond1);
14782 /* If the comparisons are not equal, and one doesn't dominate the other,
14783 then we can't do this. */
14784 if (cond1 != cond2
14785 && !comparison_dominates_p (cond1, cond2)
14786 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14787 return CCmode;
14789 if (swapped)
14791 enum rtx_code temp = cond1;
14792 cond1 = cond2;
14793 cond2 = temp;
14796 switch (cond1)
14798 case EQ:
14799 if (cond_or == DOM_CC_X_AND_Y)
14800 return CC_DEQmode;
14802 switch (cond2)
14804 case EQ: return CC_DEQmode;
14805 case LE: return CC_DLEmode;
14806 case LEU: return CC_DLEUmode;
14807 case GE: return CC_DGEmode;
14808 case GEU: return CC_DGEUmode;
14809 default: gcc_unreachable ();
14812 case LT:
14813 if (cond_or == DOM_CC_X_AND_Y)
14814 return CC_DLTmode;
14816 switch (cond2)
14818 case LT:
14819 return CC_DLTmode;
14820 case LE:
14821 return CC_DLEmode;
14822 case NE:
14823 return CC_DNEmode;
14824 default:
14825 gcc_unreachable ();
14828 case GT:
14829 if (cond_or == DOM_CC_X_AND_Y)
14830 return CC_DGTmode;
14832 switch (cond2)
14834 case GT:
14835 return CC_DGTmode;
14836 case GE:
14837 return CC_DGEmode;
14838 case NE:
14839 return CC_DNEmode;
14840 default:
14841 gcc_unreachable ();
14844 case LTU:
14845 if (cond_or == DOM_CC_X_AND_Y)
14846 return CC_DLTUmode;
14848 switch (cond2)
14850 case LTU:
14851 return CC_DLTUmode;
14852 case LEU:
14853 return CC_DLEUmode;
14854 case NE:
14855 return CC_DNEmode;
14856 default:
14857 gcc_unreachable ();
14860 case GTU:
14861 if (cond_or == DOM_CC_X_AND_Y)
14862 return CC_DGTUmode;
14864 switch (cond2)
14866 case GTU:
14867 return CC_DGTUmode;
14868 case GEU:
14869 return CC_DGEUmode;
14870 case NE:
14871 return CC_DNEmode;
14872 default:
14873 gcc_unreachable ();
14876 /* The remaining cases only occur when both comparisons are the
14877 same. */
14878 case NE:
14879 gcc_assert (cond1 == cond2);
14880 return CC_DNEmode;
14882 case LE:
14883 gcc_assert (cond1 == cond2);
14884 return CC_DLEmode;
14886 case GE:
14887 gcc_assert (cond1 == cond2);
14888 return CC_DGEmode;
14890 case LEU:
14891 gcc_assert (cond1 == cond2);
14892 return CC_DLEUmode;
14894 case GEU:
14895 gcc_assert (cond1 == cond2);
14896 return CC_DGEUmode;
14898 default:
14899 gcc_unreachable ();
14903 enum machine_mode
14904 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14906 /* All floating point compares return CCFP if it is an equality
14907 comparison, and CCFPE otherwise. */
14908 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14910 switch (op)
14912 case EQ:
14913 case NE:
14914 case UNORDERED:
14915 case ORDERED:
14916 case UNLT:
14917 case UNLE:
14918 case UNGT:
14919 case UNGE:
14920 case UNEQ:
14921 case LTGT:
14922 return CCFPmode;
14924 case LT:
14925 case LE:
14926 case GT:
14927 case GE:
14928 return CCFPEmode;
14930 default:
14931 gcc_unreachable ();
14935 /* A compare with a shifted operand. Because of canonicalization, the
14936 comparison will have to be swapped when we emit the assembler. */
14937 if (GET_MODE (y) == SImode
14938 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14939 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14940 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14941 || GET_CODE (x) == ROTATERT))
14942 return CC_SWPmode;
14944 /* This operation is performed swapped, but since we only rely on the Z
14945 flag we don't need an additional mode. */
14946 if (GET_MODE (y) == SImode
14947 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14948 && GET_CODE (x) == NEG
14949 && (op == EQ || op == NE))
14950 return CC_Zmode;
14952 /* This is a special case that is used by combine to allow a
14953 comparison of a shifted byte load to be split into a zero-extend
14954 followed by a comparison of the shifted integer (only valid for
14955 equalities and unsigned inequalities). */
14956 if (GET_MODE (x) == SImode
14957 && GET_CODE (x) == ASHIFT
14958 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14959 && GET_CODE (XEXP (x, 0)) == SUBREG
14960 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14961 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14962 && (op == EQ || op == NE
14963 || op == GEU || op == GTU || op == LTU || op == LEU)
14964 && CONST_INT_P (y))
14965 return CC_Zmode;
14967 /* A construct for a conditional compare, if the false arm contains
14968 0, then both conditions must be true, otherwise either condition
14969 must be true. Not all conditions are possible, so CCmode is
14970 returned if it can't be done. */
14971 if (GET_CODE (x) == IF_THEN_ELSE
14972 && (XEXP (x, 2) == const0_rtx
14973 || XEXP (x, 2) == const1_rtx)
14974 && COMPARISON_P (XEXP (x, 0))
14975 && COMPARISON_P (XEXP (x, 1)))
14976 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14977 INTVAL (XEXP (x, 2)));
14979 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14980 if (GET_CODE (x) == AND
14981 && (op == EQ || op == NE)
14982 && COMPARISON_P (XEXP (x, 0))
14983 && COMPARISON_P (XEXP (x, 1)))
14984 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14985 DOM_CC_X_AND_Y);
14987 if (GET_CODE (x) == IOR
14988 && (op == EQ || op == NE)
14989 && COMPARISON_P (XEXP (x, 0))
14990 && COMPARISON_P (XEXP (x, 1)))
14991 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14992 DOM_CC_X_OR_Y);
14994 /* An operation (on Thumb) where we want to test for a single bit.
14995 This is done by shifting that bit up into the top bit of a
14996 scratch register; we can then branch on the sign bit. */
14997 if (TARGET_THUMB1
14998 && GET_MODE (x) == SImode
14999 && (op == EQ || op == NE)
15000 && GET_CODE (x) == ZERO_EXTRACT
15001 && XEXP (x, 1) == const1_rtx)
15002 return CC_Nmode;
15004 /* An operation that sets the condition codes as a side-effect, the
15005 V flag is not set correctly, so we can only use comparisons where
15006 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15007 instead.) */
15008 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15009 if (GET_MODE (x) == SImode
15010 && y == const0_rtx
15011 && (op == EQ || op == NE || op == LT || op == GE)
15012 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15013 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15014 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15015 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15016 || GET_CODE (x) == LSHIFTRT
15017 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15018 || GET_CODE (x) == ROTATERT
15019 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15020 return CC_NOOVmode;
15022 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15023 return CC_Zmode;
15025 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15026 && GET_CODE (x) == PLUS
15027 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15028 return CC_Cmode;
15030 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15032 switch (op)
15034 case EQ:
15035 case NE:
15036 /* A DImode comparison against zero can be implemented by
15037 or'ing the two halves together. */
15038 if (y == const0_rtx)
15039 return CC_Zmode;
15041 /* We can do an equality test in three Thumb instructions. */
15042 if (!TARGET_32BIT)
15043 return CC_Zmode;
15045 /* FALLTHROUGH */
15047 case LTU:
15048 case LEU:
15049 case GTU:
15050 case GEU:
15051 /* DImode unsigned comparisons can be implemented by cmp +
15052 cmpeq without a scratch register. Not worth doing in
15053 Thumb-2. */
15054 if (TARGET_32BIT)
15055 return CC_CZmode;
15057 /* FALLTHROUGH */
15059 case LT:
15060 case LE:
15061 case GT:
15062 case GE:
15063 /* DImode signed and unsigned comparisons can be implemented
15064 by cmp + sbcs with a scratch register, but that does not
15065 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15066 gcc_assert (op != EQ && op != NE);
15067 return CC_NCVmode;
15069 default:
15070 gcc_unreachable ();
15074 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15075 return GET_MODE (x);
15077 return CCmode;
15080 /* X and Y are two things to compare using CODE. Emit the compare insn and
15081 return the rtx for register 0 in the proper mode. FP means this is a
15082 floating point compare: I don't think that it is needed on the arm. */
15084 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15086 enum machine_mode mode;
15087 rtx cc_reg;
15088 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15090 /* We might have X as a constant, Y as a register because of the predicates
15091 used for cmpdi. If so, force X to a register here. */
15092 if (dimode_comparison && !REG_P (x))
15093 x = force_reg (DImode, x);
15095 mode = SELECT_CC_MODE (code, x, y);
15096 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15098 if (dimode_comparison
15099 && mode != CC_CZmode)
15101 rtx clobber, set;
15103 /* To compare two non-zero values for equality, XOR them and
15104 then compare against zero. Not used for ARM mode; there
15105 CC_CZmode is cheaper. */
15106 if (mode == CC_Zmode && y != const0_rtx)
15108 gcc_assert (!reload_completed);
15109 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15110 y = const0_rtx;
15113 /* A scratch register is required. */
15114 if (reload_completed)
15115 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15116 else
15117 scratch = gen_rtx_SCRATCH (SImode);
15119 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15120 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15121 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15123 else
15124 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15126 return cc_reg;
15129 /* Generate a sequence of insns that will generate the correct return
15130 address mask depending on the physical architecture that the program
15131 is running on. */
15133 arm_gen_return_addr_mask (void)
15135 rtx reg = gen_reg_rtx (Pmode);
15137 emit_insn (gen_return_addr_mask (reg));
15138 return reg;
15141 void
15142 arm_reload_in_hi (rtx *operands)
15144 rtx ref = operands[1];
15145 rtx base, scratch;
15146 HOST_WIDE_INT offset = 0;
15148 if (GET_CODE (ref) == SUBREG)
15150 offset = SUBREG_BYTE (ref);
15151 ref = SUBREG_REG (ref);
15154 if (REG_P (ref))
15156 /* We have a pseudo which has been spilt onto the stack; there
15157 are two cases here: the first where there is a simple
15158 stack-slot replacement and a second where the stack-slot is
15159 out of range, or is used as a subreg. */
15160 if (reg_equiv_mem (REGNO (ref)))
15162 ref = reg_equiv_mem (REGNO (ref));
15163 base = find_replacement (&XEXP (ref, 0));
15165 else
15166 /* The slot is out of range, or was dressed up in a SUBREG. */
15167 base = reg_equiv_address (REGNO (ref));
15169 else
15170 base = find_replacement (&XEXP (ref, 0));
15172 /* Handle the case where the address is too complex to be offset by 1. */
15173 if (GET_CODE (base) == MINUS
15174 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15176 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15178 emit_set_insn (base_plus, base);
15179 base = base_plus;
15181 else if (GET_CODE (base) == PLUS)
15183 /* The addend must be CONST_INT, or we would have dealt with it above. */
15184 HOST_WIDE_INT hi, lo;
15186 offset += INTVAL (XEXP (base, 1));
15187 base = XEXP (base, 0);
15189 /* Rework the address into a legal sequence of insns. */
15190 /* Valid range for lo is -4095 -> 4095 */
15191 lo = (offset >= 0
15192 ? (offset & 0xfff)
15193 : -((-offset) & 0xfff));
15195 /* Corner case, if lo is the max offset then we would be out of range
15196 once we have added the additional 1 below, so bump the msb into the
15197 pre-loading insn(s). */
15198 if (lo == 4095)
15199 lo &= 0x7ff;
15201 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15202 ^ (HOST_WIDE_INT) 0x80000000)
15203 - (HOST_WIDE_INT) 0x80000000);
15205 gcc_assert (hi + lo == offset);
15207 if (hi != 0)
15209 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15211 /* Get the base address; addsi3 knows how to handle constants
15212 that require more than one insn. */
15213 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15214 base = base_plus;
15215 offset = lo;
15219 /* Operands[2] may overlap operands[0] (though it won't overlap
15220 operands[1]), that's why we asked for a DImode reg -- so we can
15221 use the bit that does not overlap. */
15222 if (REGNO (operands[2]) == REGNO (operands[0]))
15223 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15224 else
15225 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15227 emit_insn (gen_zero_extendqisi2 (scratch,
15228 gen_rtx_MEM (QImode,
15229 plus_constant (Pmode, base,
15230 offset))));
15231 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15232 gen_rtx_MEM (QImode,
15233 plus_constant (Pmode, base,
15234 offset + 1))));
15235 if (!BYTES_BIG_ENDIAN)
15236 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15237 gen_rtx_IOR (SImode,
15238 gen_rtx_ASHIFT
15239 (SImode,
15240 gen_rtx_SUBREG (SImode, operands[0], 0),
15241 GEN_INT (8)),
15242 scratch));
15243 else
15244 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15245 gen_rtx_IOR (SImode,
15246 gen_rtx_ASHIFT (SImode, scratch,
15247 GEN_INT (8)),
15248 gen_rtx_SUBREG (SImode, operands[0], 0)));
15251 /* Handle storing a half-word to memory during reload by synthesizing as two
15252 byte stores. Take care not to clobber the input values until after we
15253 have moved them somewhere safe. This code assumes that if the DImode
15254 scratch in operands[2] overlaps either the input value or output address
15255 in some way, then that value must die in this insn (we absolutely need
15256 two scratch registers for some corner cases). */
15257 void
15258 arm_reload_out_hi (rtx *operands)
15260 rtx ref = operands[0];
15261 rtx outval = operands[1];
15262 rtx base, scratch;
15263 HOST_WIDE_INT offset = 0;
15265 if (GET_CODE (ref) == SUBREG)
15267 offset = SUBREG_BYTE (ref);
15268 ref = SUBREG_REG (ref);
15271 if (REG_P (ref))
15273 /* We have a pseudo which has been spilt onto the stack; there
15274 are two cases here: the first where there is a simple
15275 stack-slot replacement and a second where the stack-slot is
15276 out of range, or is used as a subreg. */
15277 if (reg_equiv_mem (REGNO (ref)))
15279 ref = reg_equiv_mem (REGNO (ref));
15280 base = find_replacement (&XEXP (ref, 0));
15282 else
15283 /* The slot is out of range, or was dressed up in a SUBREG. */
15284 base = reg_equiv_address (REGNO (ref));
15286 else
15287 base = find_replacement (&XEXP (ref, 0));
15289 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15291 /* Handle the case where the address is too complex to be offset by 1. */
15292 if (GET_CODE (base) == MINUS
15293 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15295 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15297 /* Be careful not to destroy OUTVAL. */
15298 if (reg_overlap_mentioned_p (base_plus, outval))
15300 /* Updating base_plus might destroy outval, see if we can
15301 swap the scratch and base_plus. */
15302 if (!reg_overlap_mentioned_p (scratch, outval))
15304 rtx tmp = scratch;
15305 scratch = base_plus;
15306 base_plus = tmp;
15308 else
15310 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15312 /* Be conservative and copy OUTVAL into the scratch now,
15313 this should only be necessary if outval is a subreg
15314 of something larger than a word. */
15315 /* XXX Might this clobber base? I can't see how it can,
15316 since scratch is known to overlap with OUTVAL, and
15317 must be wider than a word. */
15318 emit_insn (gen_movhi (scratch_hi, outval));
15319 outval = scratch_hi;
15323 emit_set_insn (base_plus, base);
15324 base = base_plus;
15326 else if (GET_CODE (base) == PLUS)
15328 /* The addend must be CONST_INT, or we would have dealt with it above. */
15329 HOST_WIDE_INT hi, lo;
15331 offset += INTVAL (XEXP (base, 1));
15332 base = XEXP (base, 0);
15334 /* Rework the address into a legal sequence of insns. */
15335 /* Valid range for lo is -4095 -> 4095 */
15336 lo = (offset >= 0
15337 ? (offset & 0xfff)
15338 : -((-offset) & 0xfff));
15340 /* Corner case, if lo is the max offset then we would be out of range
15341 once we have added the additional 1 below, so bump the msb into the
15342 pre-loading insn(s). */
15343 if (lo == 4095)
15344 lo &= 0x7ff;
15346 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15347 ^ (HOST_WIDE_INT) 0x80000000)
15348 - (HOST_WIDE_INT) 0x80000000);
15350 gcc_assert (hi + lo == offset);
15352 if (hi != 0)
15354 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15356 /* Be careful not to destroy OUTVAL. */
15357 if (reg_overlap_mentioned_p (base_plus, outval))
15359 /* Updating base_plus might destroy outval, see if we
15360 can swap the scratch and base_plus. */
15361 if (!reg_overlap_mentioned_p (scratch, outval))
15363 rtx tmp = scratch;
15364 scratch = base_plus;
15365 base_plus = tmp;
15367 else
15369 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15371 /* Be conservative and copy outval into scratch now,
15372 this should only be necessary if outval is a
15373 subreg of something larger than a word. */
15374 /* XXX Might this clobber base? I can't see how it
15375 can, since scratch is known to overlap with
15376 outval. */
15377 emit_insn (gen_movhi (scratch_hi, outval));
15378 outval = scratch_hi;
15382 /* Get the base address; addsi3 knows how to handle constants
15383 that require more than one insn. */
15384 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15385 base = base_plus;
15386 offset = lo;
15390 if (BYTES_BIG_ENDIAN)
15392 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15393 plus_constant (Pmode, base,
15394 offset + 1)),
15395 gen_lowpart (QImode, outval)));
15396 emit_insn (gen_lshrsi3 (scratch,
15397 gen_rtx_SUBREG (SImode, outval, 0),
15398 GEN_INT (8)));
15399 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15400 offset)),
15401 gen_lowpart (QImode, scratch)));
15403 else
15405 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15406 offset)),
15407 gen_lowpart (QImode, outval)));
15408 emit_insn (gen_lshrsi3 (scratch,
15409 gen_rtx_SUBREG (SImode, outval, 0),
15410 GEN_INT (8)));
15411 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15412 plus_constant (Pmode, base,
15413 offset + 1)),
15414 gen_lowpart (QImode, scratch)));
15418 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15419 (padded to the size of a word) should be passed in a register. */
15421 static bool
15422 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15424 if (TARGET_AAPCS_BASED)
15425 return must_pass_in_stack_var_size (mode, type);
15426 else
15427 return must_pass_in_stack_var_size_or_pad (mode, type);
15431 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15432 Return true if an argument passed on the stack should be padded upwards,
15433 i.e. if the least-significant byte has useful data.
15434 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15435 aggregate types are placed in the lowest memory address. */
15437 bool
15438 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15440 if (!TARGET_AAPCS_BASED)
15441 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15443 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15444 return false;
15446 return true;
15450 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15451 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15452 register has useful data, and return the opposite if the most
15453 significant byte does. */
15455 bool
15456 arm_pad_reg_upward (enum machine_mode mode,
15457 tree type, int first ATTRIBUTE_UNUSED)
15459 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15461 /* For AAPCS, small aggregates, small fixed-point types,
15462 and small complex types are always padded upwards. */
15463 if (type)
15465 if ((AGGREGATE_TYPE_P (type)
15466 || TREE_CODE (type) == COMPLEX_TYPE
15467 || FIXED_POINT_TYPE_P (type))
15468 && int_size_in_bytes (type) <= 4)
15469 return true;
15471 else
15473 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15474 && GET_MODE_SIZE (mode) <= 4)
15475 return true;
15479 /* Otherwise, use default padding. */
15480 return !BYTES_BIG_ENDIAN;
15483 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15484 assuming that the address in the base register is word aligned. */
15485 bool
15486 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15488 HOST_WIDE_INT max_offset;
15490 /* Offset must be a multiple of 4 in Thumb mode. */
15491 if (TARGET_THUMB2 && ((offset & 3) != 0))
15492 return false;
15494 if (TARGET_THUMB2)
15495 max_offset = 1020;
15496 else if (TARGET_ARM)
15497 max_offset = 255;
15498 else
15499 return false;
15501 return ((offset <= max_offset) && (offset >= -max_offset));
15504 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15505 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15506 Assumes that the address in the base register RN is word aligned. Pattern
15507 guarantees that both memory accesses use the same base register,
15508 the offsets are constants within the range, and the gap between the offsets is 4.
15509 If preload complete then check that registers are legal. WBACK indicates whether
15510 address is updated. LOAD indicates whether memory access is load or store. */
15511 bool
15512 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15513 bool wback, bool load)
15515 unsigned int t, t2, n;
15517 if (!reload_completed)
15518 return true;
15520 if (!offset_ok_for_ldrd_strd (offset))
15521 return false;
15523 t = REGNO (rt);
15524 t2 = REGNO (rt2);
15525 n = REGNO (rn);
15527 if ((TARGET_THUMB2)
15528 && ((wback && (n == t || n == t2))
15529 || (t == SP_REGNUM)
15530 || (t == PC_REGNUM)
15531 || (t2 == SP_REGNUM)
15532 || (t2 == PC_REGNUM)
15533 || (!load && (n == PC_REGNUM))
15534 || (load && (t == t2))
15535 /* Triggers Cortex-M3 LDRD errata. */
15536 || (!wback && load && fix_cm3_ldrd && (n == t))))
15537 return false;
15539 if ((TARGET_ARM)
15540 && ((wback && (n == t || n == t2))
15541 || (t2 == PC_REGNUM)
15542 || (t % 2 != 0) /* First destination register is not even. */
15543 || (t2 != t + 1)
15544 /* PC can be used as base register (for offset addressing only),
15545 but it is depricated. */
15546 || (n == PC_REGNUM)))
15547 return false;
15549 return true;
15552 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15553 operand MEM's address contains an immediate offset from the base
15554 register and has no side effects, in which case it sets BASE and
15555 OFFSET accordingly. */
15556 static bool
15557 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15559 rtx addr;
15561 gcc_assert (base != NULL && offset != NULL);
15563 /* TODO: Handle more general memory operand patterns, such as
15564 PRE_DEC and PRE_INC. */
15566 if (side_effects_p (mem))
15567 return false;
15569 /* Can't deal with subregs. */
15570 if (GET_CODE (mem) == SUBREG)
15571 return false;
15573 gcc_assert (MEM_P (mem));
15575 *offset = const0_rtx;
15577 addr = XEXP (mem, 0);
15579 /* If addr isn't valid for DImode, then we can't handle it. */
15580 if (!arm_legitimate_address_p (DImode, addr,
15581 reload_in_progress || reload_completed))
15582 return false;
15584 if (REG_P (addr))
15586 *base = addr;
15587 return true;
15589 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15591 *base = XEXP (addr, 0);
15592 *offset = XEXP (addr, 1);
15593 return (REG_P (*base) && CONST_INT_P (*offset));
15596 return false;
15599 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15601 /* Called from a peephole2 to replace two word-size accesses with a
15602 single LDRD/STRD instruction. Returns true iff we can generate a
15603 new instruction sequence. That is, both accesses use the same base
15604 register and the gap between constant offsets is 4. This function
15605 may reorder its operands to match ldrd/strd RTL templates.
15606 OPERANDS are the operands found by the peephole matcher;
15607 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15608 corresponding memory operands. LOAD indicaates whether the access
15609 is load or store. CONST_STORE indicates a store of constant
15610 integer values held in OPERANDS[4,5] and assumes that the pattern
15611 is of length 4 insn, for the purpose of checking dead registers.
15612 COMMUTE indicates that register operands may be reordered. */
15613 bool
15614 gen_operands_ldrd_strd (rtx *operands, bool load,
15615 bool const_store, bool commute)
15617 int nops = 2;
15618 HOST_WIDE_INT offsets[2], offset;
15619 rtx base = NULL_RTX;
15620 rtx cur_base, cur_offset, tmp;
15621 int i, gap;
15622 HARD_REG_SET regset;
15624 gcc_assert (!const_store || !load);
15625 /* Check that the memory references are immediate offsets from the
15626 same base register. Extract the base register, the destination
15627 registers, and the corresponding memory offsets. */
15628 for (i = 0; i < nops; i++)
15630 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15631 return false;
15633 if (i == 0)
15634 base = cur_base;
15635 else if (REGNO (base) != REGNO (cur_base))
15636 return false;
15638 offsets[i] = INTVAL (cur_offset);
15639 if (GET_CODE (operands[i]) == SUBREG)
15641 tmp = SUBREG_REG (operands[i]);
15642 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15643 operands[i] = tmp;
15647 /* Make sure there is no dependency between the individual loads. */
15648 if (load && REGNO (operands[0]) == REGNO (base))
15649 return false; /* RAW */
15651 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15652 return false; /* WAW */
15654 /* If the same input register is used in both stores
15655 when storing different constants, try to find a free register.
15656 For example, the code
15657 mov r0, 0
15658 str r0, [r2]
15659 mov r0, 1
15660 str r0, [r2, #4]
15661 can be transformed into
15662 mov r1, 0
15663 strd r1, r0, [r2]
15664 in Thumb mode assuming that r1 is free. */
15665 if (const_store
15666 && REGNO (operands[0]) == REGNO (operands[1])
15667 && INTVAL (operands[4]) != INTVAL (operands[5]))
15669 if (TARGET_THUMB2)
15671 CLEAR_HARD_REG_SET (regset);
15672 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15673 if (tmp == NULL_RTX)
15674 return false;
15676 /* Use the new register in the first load to ensure that
15677 if the original input register is not dead after peephole,
15678 then it will have the correct constant value. */
15679 operands[0] = tmp;
15681 else if (TARGET_ARM)
15683 return false;
15684 int regno = REGNO (operands[0]);
15685 if (!peep2_reg_dead_p (4, operands[0]))
15687 /* When the input register is even and is not dead after the
15688 pattern, it has to hold the second constant but we cannot
15689 form a legal STRD in ARM mode with this register as the second
15690 register. */
15691 if (regno % 2 == 0)
15692 return false;
15694 /* Is regno-1 free? */
15695 SET_HARD_REG_SET (regset);
15696 CLEAR_HARD_REG_BIT(regset, regno - 1);
15697 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15698 if (tmp == NULL_RTX)
15699 return false;
15701 operands[0] = tmp;
15703 else
15705 /* Find a DImode register. */
15706 CLEAR_HARD_REG_SET (regset);
15707 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15708 if (tmp != NULL_RTX)
15710 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15711 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15713 else
15715 /* Can we use the input register to form a DI register? */
15716 SET_HARD_REG_SET (regset);
15717 CLEAR_HARD_REG_BIT(regset,
15718 regno % 2 == 0 ? regno + 1 : regno - 1);
15719 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15720 if (tmp == NULL_RTX)
15721 return false;
15722 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15726 gcc_assert (operands[0] != NULL_RTX);
15727 gcc_assert (operands[1] != NULL_RTX);
15728 gcc_assert (REGNO (operands[0]) % 2 == 0);
15729 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15733 /* Make sure the instructions are ordered with lower memory access first. */
15734 if (offsets[0] > offsets[1])
15736 gap = offsets[0] - offsets[1];
15737 offset = offsets[1];
15739 /* Swap the instructions such that lower memory is accessed first. */
15740 SWAP_RTX (operands[0], operands[1]);
15741 SWAP_RTX (operands[2], operands[3]);
15742 if (const_store)
15743 SWAP_RTX (operands[4], operands[5]);
15745 else
15747 gap = offsets[1] - offsets[0];
15748 offset = offsets[0];
15751 /* Make sure accesses are to consecutive memory locations. */
15752 if (gap != 4)
15753 return false;
15755 /* Make sure we generate legal instructions. */
15756 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15757 false, load))
15758 return true;
15760 /* In Thumb state, where registers are almost unconstrained, there
15761 is little hope to fix it. */
15762 if (TARGET_THUMB2)
15763 return false;
15765 if (load && commute)
15767 /* Try reordering registers. */
15768 SWAP_RTX (operands[0], operands[1]);
15769 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15770 false, load))
15771 return true;
15774 if (const_store)
15776 /* If input registers are dead after this pattern, they can be
15777 reordered or replaced by other registers that are free in the
15778 current pattern. */
15779 if (!peep2_reg_dead_p (4, operands[0])
15780 || !peep2_reg_dead_p (4, operands[1]))
15781 return false;
15783 /* Try to reorder the input registers. */
15784 /* For example, the code
15785 mov r0, 0
15786 mov r1, 1
15787 str r1, [r2]
15788 str r0, [r2, #4]
15789 can be transformed into
15790 mov r1, 0
15791 mov r0, 1
15792 strd r0, [r2]
15794 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15795 false, false))
15797 SWAP_RTX (operands[0], operands[1]);
15798 return true;
15801 /* Try to find a free DI register. */
15802 CLEAR_HARD_REG_SET (regset);
15803 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15804 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15805 while (true)
15807 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15808 if (tmp == NULL_RTX)
15809 return false;
15811 /* DREG must be an even-numbered register in DImode.
15812 Split it into SI registers. */
15813 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15814 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15815 gcc_assert (operands[0] != NULL_RTX);
15816 gcc_assert (operands[1] != NULL_RTX);
15817 gcc_assert (REGNO (operands[0]) % 2 == 0);
15818 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15820 return (operands_ok_ldrd_strd (operands[0], operands[1],
15821 base, offset,
15822 false, load));
15826 return false;
15828 #undef SWAP_RTX
15833 /* Print a symbolic form of X to the debug file, F. */
15834 static void
15835 arm_print_value (FILE *f, rtx x)
15837 switch (GET_CODE (x))
15839 case CONST_INT:
15840 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15841 return;
15843 case CONST_DOUBLE:
15844 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15845 return;
15847 case CONST_VECTOR:
15849 int i;
15851 fprintf (f, "<");
15852 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15854 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15855 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15856 fputc (',', f);
15858 fprintf (f, ">");
15860 return;
15862 case CONST_STRING:
15863 fprintf (f, "\"%s\"", XSTR (x, 0));
15864 return;
15866 case SYMBOL_REF:
15867 fprintf (f, "`%s'", XSTR (x, 0));
15868 return;
15870 case LABEL_REF:
15871 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15872 return;
15874 case CONST:
15875 arm_print_value (f, XEXP (x, 0));
15876 return;
15878 case PLUS:
15879 arm_print_value (f, XEXP (x, 0));
15880 fprintf (f, "+");
15881 arm_print_value (f, XEXP (x, 1));
15882 return;
15884 case PC:
15885 fprintf (f, "pc");
15886 return;
15888 default:
15889 fprintf (f, "????");
15890 return;
15894 /* Routines for manipulation of the constant pool. */
15896 /* Arm instructions cannot load a large constant directly into a
15897 register; they have to come from a pc relative load. The constant
15898 must therefore be placed in the addressable range of the pc
15899 relative load. Depending on the precise pc relative load
15900 instruction the range is somewhere between 256 bytes and 4k. This
15901 means that we often have to dump a constant inside a function, and
15902 generate code to branch around it.
15904 It is important to minimize this, since the branches will slow
15905 things down and make the code larger.
15907 Normally we can hide the table after an existing unconditional
15908 branch so that there is no interruption of the flow, but in the
15909 worst case the code looks like this:
15911 ldr rn, L1
15913 b L2
15914 align
15915 L1: .long value
15919 ldr rn, L3
15921 b L4
15922 align
15923 L3: .long value
15927 We fix this by performing a scan after scheduling, which notices
15928 which instructions need to have their operands fetched from the
15929 constant table and builds the table.
15931 The algorithm starts by building a table of all the constants that
15932 need fixing up and all the natural barriers in the function (places
15933 where a constant table can be dropped without breaking the flow).
15934 For each fixup we note how far the pc-relative replacement will be
15935 able to reach and the offset of the instruction into the function.
15937 Having built the table we then group the fixes together to form
15938 tables that are as large as possible (subject to addressing
15939 constraints) and emit each table of constants after the last
15940 barrier that is within range of all the instructions in the group.
15941 If a group does not contain a barrier, then we forcibly create one
15942 by inserting a jump instruction into the flow. Once the table has
15943 been inserted, the insns are then modified to reference the
15944 relevant entry in the pool.
15946 Possible enhancements to the algorithm (not implemented) are:
15948 1) For some processors and object formats, there may be benefit in
15949 aligning the pools to the start of cache lines; this alignment
15950 would need to be taken into account when calculating addressability
15951 of a pool. */
15953 /* These typedefs are located at the start of this file, so that
15954 they can be used in the prototypes there. This comment is to
15955 remind readers of that fact so that the following structures
15956 can be understood more easily.
15958 typedef struct minipool_node Mnode;
15959 typedef struct minipool_fixup Mfix; */
15961 struct minipool_node
15963 /* Doubly linked chain of entries. */
15964 Mnode * next;
15965 Mnode * prev;
15966 /* The maximum offset into the code that this entry can be placed. While
15967 pushing fixes for forward references, all entries are sorted in order
15968 of increasing max_address. */
15969 HOST_WIDE_INT max_address;
15970 /* Similarly for an entry inserted for a backwards ref. */
15971 HOST_WIDE_INT min_address;
15972 /* The number of fixes referencing this entry. This can become zero
15973 if we "unpush" an entry. In this case we ignore the entry when we
15974 come to emit the code. */
15975 int refcount;
15976 /* The offset from the start of the minipool. */
15977 HOST_WIDE_INT offset;
15978 /* The value in table. */
15979 rtx value;
15980 /* The mode of value. */
15981 enum machine_mode mode;
15982 /* The size of the value. With iWMMXt enabled
15983 sizes > 4 also imply an alignment of 8-bytes. */
15984 int fix_size;
15987 struct minipool_fixup
15989 Mfix * next;
15990 rtx insn;
15991 HOST_WIDE_INT address;
15992 rtx * loc;
15993 enum machine_mode mode;
15994 int fix_size;
15995 rtx value;
15996 Mnode * minipool;
15997 HOST_WIDE_INT forwards;
15998 HOST_WIDE_INT backwards;
16001 /* Fixes less than a word need padding out to a word boundary. */
16002 #define MINIPOOL_FIX_SIZE(mode) \
16003 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16005 static Mnode * minipool_vector_head;
16006 static Mnode * minipool_vector_tail;
16007 static rtx minipool_vector_label;
16008 static int minipool_pad;
16010 /* The linked list of all minipool fixes required for this function. */
16011 Mfix * minipool_fix_head;
16012 Mfix * minipool_fix_tail;
16013 /* The fix entry for the current minipool, once it has been placed. */
16014 Mfix * minipool_barrier;
16016 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16017 #define JUMP_TABLES_IN_TEXT_SECTION 0
16018 #endif
16020 static HOST_WIDE_INT
16021 get_jump_table_size (rtx insn)
16023 /* ADDR_VECs only take room if read-only data does into the text
16024 section. */
16025 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16027 rtx body = PATTERN (insn);
16028 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16029 HOST_WIDE_INT size;
16030 HOST_WIDE_INT modesize;
16032 modesize = GET_MODE_SIZE (GET_MODE (body));
16033 size = modesize * XVECLEN (body, elt);
16034 switch (modesize)
16036 case 1:
16037 /* Round up size of TBB table to a halfword boundary. */
16038 size = (size + 1) & ~(HOST_WIDE_INT)1;
16039 break;
16040 case 2:
16041 /* No padding necessary for TBH. */
16042 break;
16043 case 4:
16044 /* Add two bytes for alignment on Thumb. */
16045 if (TARGET_THUMB)
16046 size += 2;
16047 break;
16048 default:
16049 gcc_unreachable ();
16051 return size;
16054 return 0;
16057 /* Return the maximum amount of padding that will be inserted before
16058 label LABEL. */
16060 static HOST_WIDE_INT
16061 get_label_padding (rtx label)
16063 HOST_WIDE_INT align, min_insn_size;
16065 align = 1 << label_to_alignment (label);
16066 min_insn_size = TARGET_THUMB ? 2 : 4;
16067 return align > min_insn_size ? align - min_insn_size : 0;
16070 /* Move a minipool fix MP from its current location to before MAX_MP.
16071 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16072 constraints may need updating. */
16073 static Mnode *
16074 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16075 HOST_WIDE_INT max_address)
16077 /* The code below assumes these are different. */
16078 gcc_assert (mp != max_mp);
16080 if (max_mp == NULL)
16082 if (max_address < mp->max_address)
16083 mp->max_address = max_address;
16085 else
16087 if (max_address > max_mp->max_address - mp->fix_size)
16088 mp->max_address = max_mp->max_address - mp->fix_size;
16089 else
16090 mp->max_address = max_address;
16092 /* Unlink MP from its current position. Since max_mp is non-null,
16093 mp->prev must be non-null. */
16094 mp->prev->next = mp->next;
16095 if (mp->next != NULL)
16096 mp->next->prev = mp->prev;
16097 else
16098 minipool_vector_tail = mp->prev;
16100 /* Re-insert it before MAX_MP. */
16101 mp->next = max_mp;
16102 mp->prev = max_mp->prev;
16103 max_mp->prev = mp;
16105 if (mp->prev != NULL)
16106 mp->prev->next = mp;
16107 else
16108 minipool_vector_head = mp;
16111 /* Save the new entry. */
16112 max_mp = mp;
16114 /* Scan over the preceding entries and adjust their addresses as
16115 required. */
16116 while (mp->prev != NULL
16117 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16119 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16120 mp = mp->prev;
16123 return max_mp;
16126 /* Add a constant to the minipool for a forward reference. Returns the
16127 node added or NULL if the constant will not fit in this pool. */
16128 static Mnode *
16129 add_minipool_forward_ref (Mfix *fix)
16131 /* If set, max_mp is the first pool_entry that has a lower
16132 constraint than the one we are trying to add. */
16133 Mnode * max_mp = NULL;
16134 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16135 Mnode * mp;
16137 /* If the minipool starts before the end of FIX->INSN then this FIX
16138 can not be placed into the current pool. Furthermore, adding the
16139 new constant pool entry may cause the pool to start FIX_SIZE bytes
16140 earlier. */
16141 if (minipool_vector_head &&
16142 (fix->address + get_attr_length (fix->insn)
16143 >= minipool_vector_head->max_address - fix->fix_size))
16144 return NULL;
16146 /* Scan the pool to see if a constant with the same value has
16147 already been added. While we are doing this, also note the
16148 location where we must insert the constant if it doesn't already
16149 exist. */
16150 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16152 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16153 && fix->mode == mp->mode
16154 && (!LABEL_P (fix->value)
16155 || (CODE_LABEL_NUMBER (fix->value)
16156 == CODE_LABEL_NUMBER (mp->value)))
16157 && rtx_equal_p (fix->value, mp->value))
16159 /* More than one fix references this entry. */
16160 mp->refcount++;
16161 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16164 /* Note the insertion point if necessary. */
16165 if (max_mp == NULL
16166 && mp->max_address > max_address)
16167 max_mp = mp;
16169 /* If we are inserting an 8-bytes aligned quantity and
16170 we have not already found an insertion point, then
16171 make sure that all such 8-byte aligned quantities are
16172 placed at the start of the pool. */
16173 if (ARM_DOUBLEWORD_ALIGN
16174 && max_mp == NULL
16175 && fix->fix_size >= 8
16176 && mp->fix_size < 8)
16178 max_mp = mp;
16179 max_address = mp->max_address;
16183 /* The value is not currently in the minipool, so we need to create
16184 a new entry for it. If MAX_MP is NULL, the entry will be put on
16185 the end of the list since the placement is less constrained than
16186 any existing entry. Otherwise, we insert the new fix before
16187 MAX_MP and, if necessary, adjust the constraints on the other
16188 entries. */
16189 mp = XNEW (Mnode);
16190 mp->fix_size = fix->fix_size;
16191 mp->mode = fix->mode;
16192 mp->value = fix->value;
16193 mp->refcount = 1;
16194 /* Not yet required for a backwards ref. */
16195 mp->min_address = -65536;
16197 if (max_mp == NULL)
16199 mp->max_address = max_address;
16200 mp->next = NULL;
16201 mp->prev = minipool_vector_tail;
16203 if (mp->prev == NULL)
16205 minipool_vector_head = mp;
16206 minipool_vector_label = gen_label_rtx ();
16208 else
16209 mp->prev->next = mp;
16211 minipool_vector_tail = mp;
16213 else
16215 if (max_address > max_mp->max_address - mp->fix_size)
16216 mp->max_address = max_mp->max_address - mp->fix_size;
16217 else
16218 mp->max_address = max_address;
16220 mp->next = max_mp;
16221 mp->prev = max_mp->prev;
16222 max_mp->prev = mp;
16223 if (mp->prev != NULL)
16224 mp->prev->next = mp;
16225 else
16226 minipool_vector_head = mp;
16229 /* Save the new entry. */
16230 max_mp = mp;
16232 /* Scan over the preceding entries and adjust their addresses as
16233 required. */
16234 while (mp->prev != NULL
16235 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16237 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16238 mp = mp->prev;
16241 return max_mp;
16244 static Mnode *
16245 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16246 HOST_WIDE_INT min_address)
16248 HOST_WIDE_INT offset;
16250 /* The code below assumes these are different. */
16251 gcc_assert (mp != min_mp);
16253 if (min_mp == NULL)
16255 if (min_address > mp->min_address)
16256 mp->min_address = min_address;
16258 else
16260 /* We will adjust this below if it is too loose. */
16261 mp->min_address = min_address;
16263 /* Unlink MP from its current position. Since min_mp is non-null,
16264 mp->next must be non-null. */
16265 mp->next->prev = mp->prev;
16266 if (mp->prev != NULL)
16267 mp->prev->next = mp->next;
16268 else
16269 minipool_vector_head = mp->next;
16271 /* Reinsert it after MIN_MP. */
16272 mp->prev = min_mp;
16273 mp->next = min_mp->next;
16274 min_mp->next = mp;
16275 if (mp->next != NULL)
16276 mp->next->prev = mp;
16277 else
16278 minipool_vector_tail = mp;
16281 min_mp = mp;
16283 offset = 0;
16284 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16286 mp->offset = offset;
16287 if (mp->refcount > 0)
16288 offset += mp->fix_size;
16290 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16291 mp->next->min_address = mp->min_address + mp->fix_size;
16294 return min_mp;
16297 /* Add a constant to the minipool for a backward reference. Returns the
16298 node added or NULL if the constant will not fit in this pool.
16300 Note that the code for insertion for a backwards reference can be
16301 somewhat confusing because the calculated offsets for each fix do
16302 not take into account the size of the pool (which is still under
16303 construction. */
16304 static Mnode *
16305 add_minipool_backward_ref (Mfix *fix)
16307 /* If set, min_mp is the last pool_entry that has a lower constraint
16308 than the one we are trying to add. */
16309 Mnode *min_mp = NULL;
16310 /* This can be negative, since it is only a constraint. */
16311 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16312 Mnode *mp;
16314 /* If we can't reach the current pool from this insn, or if we can't
16315 insert this entry at the end of the pool without pushing other
16316 fixes out of range, then we don't try. This ensures that we
16317 can't fail later on. */
16318 if (min_address >= minipool_barrier->address
16319 || (minipool_vector_tail->min_address + fix->fix_size
16320 >= minipool_barrier->address))
16321 return NULL;
16323 /* Scan the pool to see if a constant with the same value has
16324 already been added. While we are doing this, also note the
16325 location where we must insert the constant if it doesn't already
16326 exist. */
16327 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16329 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16330 && fix->mode == mp->mode
16331 && (!LABEL_P (fix->value)
16332 || (CODE_LABEL_NUMBER (fix->value)
16333 == CODE_LABEL_NUMBER (mp->value)))
16334 && rtx_equal_p (fix->value, mp->value)
16335 /* Check that there is enough slack to move this entry to the
16336 end of the table (this is conservative). */
16337 && (mp->max_address
16338 > (minipool_barrier->address
16339 + minipool_vector_tail->offset
16340 + minipool_vector_tail->fix_size)))
16342 mp->refcount++;
16343 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16346 if (min_mp != NULL)
16347 mp->min_address += fix->fix_size;
16348 else
16350 /* Note the insertion point if necessary. */
16351 if (mp->min_address < min_address)
16353 /* For now, we do not allow the insertion of 8-byte alignment
16354 requiring nodes anywhere but at the start of the pool. */
16355 if (ARM_DOUBLEWORD_ALIGN
16356 && fix->fix_size >= 8 && mp->fix_size < 8)
16357 return NULL;
16358 else
16359 min_mp = mp;
16361 else if (mp->max_address
16362 < minipool_barrier->address + mp->offset + fix->fix_size)
16364 /* Inserting before this entry would push the fix beyond
16365 its maximum address (which can happen if we have
16366 re-located a forwards fix); force the new fix to come
16367 after it. */
16368 if (ARM_DOUBLEWORD_ALIGN
16369 && fix->fix_size >= 8 && mp->fix_size < 8)
16370 return NULL;
16371 else
16373 min_mp = mp;
16374 min_address = mp->min_address + fix->fix_size;
16377 /* Do not insert a non-8-byte aligned quantity before 8-byte
16378 aligned quantities. */
16379 else if (ARM_DOUBLEWORD_ALIGN
16380 && fix->fix_size < 8
16381 && mp->fix_size >= 8)
16383 min_mp = mp;
16384 min_address = mp->min_address + fix->fix_size;
16389 /* We need to create a new entry. */
16390 mp = XNEW (Mnode);
16391 mp->fix_size = fix->fix_size;
16392 mp->mode = fix->mode;
16393 mp->value = fix->value;
16394 mp->refcount = 1;
16395 mp->max_address = minipool_barrier->address + 65536;
16397 mp->min_address = min_address;
16399 if (min_mp == NULL)
16401 mp->prev = NULL;
16402 mp->next = minipool_vector_head;
16404 if (mp->next == NULL)
16406 minipool_vector_tail = mp;
16407 minipool_vector_label = gen_label_rtx ();
16409 else
16410 mp->next->prev = mp;
16412 minipool_vector_head = mp;
16414 else
16416 mp->next = min_mp->next;
16417 mp->prev = min_mp;
16418 min_mp->next = mp;
16420 if (mp->next != NULL)
16421 mp->next->prev = mp;
16422 else
16423 minipool_vector_tail = mp;
16426 /* Save the new entry. */
16427 min_mp = mp;
16429 if (mp->prev)
16430 mp = mp->prev;
16431 else
16432 mp->offset = 0;
16434 /* Scan over the following entries and adjust their offsets. */
16435 while (mp->next != NULL)
16437 if (mp->next->min_address < mp->min_address + mp->fix_size)
16438 mp->next->min_address = mp->min_address + mp->fix_size;
16440 if (mp->refcount)
16441 mp->next->offset = mp->offset + mp->fix_size;
16442 else
16443 mp->next->offset = mp->offset;
16445 mp = mp->next;
16448 return min_mp;
16451 static void
16452 assign_minipool_offsets (Mfix *barrier)
16454 HOST_WIDE_INT offset = 0;
16455 Mnode *mp;
16457 minipool_barrier = barrier;
16459 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16461 mp->offset = offset;
16463 if (mp->refcount > 0)
16464 offset += mp->fix_size;
16468 /* Output the literal table */
16469 static void
16470 dump_minipool (rtx scan)
16472 Mnode * mp;
16473 Mnode * nmp;
16474 int align64 = 0;
16476 if (ARM_DOUBLEWORD_ALIGN)
16477 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16478 if (mp->refcount > 0 && mp->fix_size >= 8)
16480 align64 = 1;
16481 break;
16484 if (dump_file)
16485 fprintf (dump_file,
16486 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16487 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16489 scan = emit_label_after (gen_label_rtx (), scan);
16490 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16491 scan = emit_label_after (minipool_vector_label, scan);
16493 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16495 if (mp->refcount > 0)
16497 if (dump_file)
16499 fprintf (dump_file,
16500 ";; Offset %u, min %ld, max %ld ",
16501 (unsigned) mp->offset, (unsigned long) mp->min_address,
16502 (unsigned long) mp->max_address);
16503 arm_print_value (dump_file, mp->value);
16504 fputc ('\n', dump_file);
16507 switch (mp->fix_size)
16509 #ifdef HAVE_consttable_1
16510 case 1:
16511 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16512 break;
16514 #endif
16515 #ifdef HAVE_consttable_2
16516 case 2:
16517 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16518 break;
16520 #endif
16521 #ifdef HAVE_consttable_4
16522 case 4:
16523 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16524 break;
16526 #endif
16527 #ifdef HAVE_consttable_8
16528 case 8:
16529 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16530 break;
16532 #endif
16533 #ifdef HAVE_consttable_16
16534 case 16:
16535 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16536 break;
16538 #endif
16539 default:
16540 gcc_unreachable ();
16544 nmp = mp->next;
16545 free (mp);
16548 minipool_vector_head = minipool_vector_tail = NULL;
16549 scan = emit_insn_after (gen_consttable_end (), scan);
16550 scan = emit_barrier_after (scan);
16553 /* Return the cost of forcibly inserting a barrier after INSN. */
16554 static int
16555 arm_barrier_cost (rtx insn)
16557 /* Basing the location of the pool on the loop depth is preferable,
16558 but at the moment, the basic block information seems to be
16559 corrupt by this stage of the compilation. */
16560 int base_cost = 50;
16561 rtx next = next_nonnote_insn (insn);
16563 if (next != NULL && LABEL_P (next))
16564 base_cost -= 20;
16566 switch (GET_CODE (insn))
16568 case CODE_LABEL:
16569 /* It will always be better to place the table before the label, rather
16570 than after it. */
16571 return 50;
16573 case INSN:
16574 case CALL_INSN:
16575 return base_cost;
16577 case JUMP_INSN:
16578 return base_cost - 10;
16580 default:
16581 return base_cost + 10;
16585 /* Find the best place in the insn stream in the range
16586 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16587 Create the barrier by inserting a jump and add a new fix entry for
16588 it. */
16589 static Mfix *
16590 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16592 HOST_WIDE_INT count = 0;
16593 rtx barrier;
16594 rtx from = fix->insn;
16595 /* The instruction after which we will insert the jump. */
16596 rtx selected = NULL;
16597 int selected_cost;
16598 /* The address at which the jump instruction will be placed. */
16599 HOST_WIDE_INT selected_address;
16600 Mfix * new_fix;
16601 HOST_WIDE_INT max_count = max_address - fix->address;
16602 rtx label = gen_label_rtx ();
16604 selected_cost = arm_barrier_cost (from);
16605 selected_address = fix->address;
16607 while (from && count < max_count)
16609 rtx tmp;
16610 int new_cost;
16612 /* This code shouldn't have been called if there was a natural barrier
16613 within range. */
16614 gcc_assert (!BARRIER_P (from));
16616 /* Count the length of this insn. This must stay in sync with the
16617 code that pushes minipool fixes. */
16618 if (LABEL_P (from))
16619 count += get_label_padding (from);
16620 else
16621 count += get_attr_length (from);
16623 /* If there is a jump table, add its length. */
16624 if (tablejump_p (from, NULL, &tmp))
16626 count += get_jump_table_size (tmp);
16628 /* Jump tables aren't in a basic block, so base the cost on
16629 the dispatch insn. If we select this location, we will
16630 still put the pool after the table. */
16631 new_cost = arm_barrier_cost (from);
16633 if (count < max_count
16634 && (!selected || new_cost <= selected_cost))
16636 selected = tmp;
16637 selected_cost = new_cost;
16638 selected_address = fix->address + count;
16641 /* Continue after the dispatch table. */
16642 from = NEXT_INSN (tmp);
16643 continue;
16646 new_cost = arm_barrier_cost (from);
16648 if (count < max_count
16649 && (!selected || new_cost <= selected_cost))
16651 selected = from;
16652 selected_cost = new_cost;
16653 selected_address = fix->address + count;
16656 from = NEXT_INSN (from);
16659 /* Make sure that we found a place to insert the jump. */
16660 gcc_assert (selected);
16662 /* Make sure we do not split a call and its corresponding
16663 CALL_ARG_LOCATION note. */
16664 if (CALL_P (selected))
16666 rtx next = NEXT_INSN (selected);
16667 if (next && NOTE_P (next)
16668 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16669 selected = next;
16672 /* Create a new JUMP_INSN that branches around a barrier. */
16673 from = emit_jump_insn_after (gen_jump (label), selected);
16674 JUMP_LABEL (from) = label;
16675 barrier = emit_barrier_after (from);
16676 emit_label_after (label, barrier);
16678 /* Create a minipool barrier entry for the new barrier. */
16679 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16680 new_fix->insn = barrier;
16681 new_fix->address = selected_address;
16682 new_fix->next = fix->next;
16683 fix->next = new_fix;
16685 return new_fix;
16688 /* Record that there is a natural barrier in the insn stream at
16689 ADDRESS. */
16690 static void
16691 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16693 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16695 fix->insn = insn;
16696 fix->address = address;
16698 fix->next = NULL;
16699 if (minipool_fix_head != NULL)
16700 minipool_fix_tail->next = fix;
16701 else
16702 minipool_fix_head = fix;
16704 minipool_fix_tail = fix;
16707 /* Record INSN, which will need fixing up to load a value from the
16708 minipool. ADDRESS is the offset of the insn since the start of the
16709 function; LOC is a pointer to the part of the insn which requires
16710 fixing; VALUE is the constant that must be loaded, which is of type
16711 MODE. */
16712 static void
16713 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16714 enum machine_mode mode, rtx value)
16716 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16718 fix->insn = insn;
16719 fix->address = address;
16720 fix->loc = loc;
16721 fix->mode = mode;
16722 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16723 fix->value = value;
16724 fix->forwards = get_attr_pool_range (insn);
16725 fix->backwards = get_attr_neg_pool_range (insn);
16726 fix->minipool = NULL;
16728 /* If an insn doesn't have a range defined for it, then it isn't
16729 expecting to be reworked by this code. Better to stop now than
16730 to generate duff assembly code. */
16731 gcc_assert (fix->forwards || fix->backwards);
16733 /* If an entry requires 8-byte alignment then assume all constant pools
16734 require 4 bytes of padding. Trying to do this later on a per-pool
16735 basis is awkward because existing pool entries have to be modified. */
16736 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16737 minipool_pad = 4;
16739 if (dump_file)
16741 fprintf (dump_file,
16742 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16743 GET_MODE_NAME (mode),
16744 INSN_UID (insn), (unsigned long) address,
16745 -1 * (long)fix->backwards, (long)fix->forwards);
16746 arm_print_value (dump_file, fix->value);
16747 fprintf (dump_file, "\n");
16750 /* Add it to the chain of fixes. */
16751 fix->next = NULL;
16753 if (minipool_fix_head != NULL)
16754 minipool_fix_tail->next = fix;
16755 else
16756 minipool_fix_head = fix;
16758 minipool_fix_tail = fix;
16761 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16762 Returns the number of insns needed, or 99 if we always want to synthesize
16763 the value. */
16765 arm_max_const_double_inline_cost ()
16767 /* Let the value get synthesized to avoid the use of literal pools. */
16768 if (arm_disable_literal_pool)
16769 return 99;
16771 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16774 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16775 Returns the number of insns needed, or 99 if we don't know how to
16776 do it. */
16778 arm_const_double_inline_cost (rtx val)
16780 rtx lowpart, highpart;
16781 enum machine_mode mode;
16783 mode = GET_MODE (val);
16785 if (mode == VOIDmode)
16786 mode = DImode;
16788 gcc_assert (GET_MODE_SIZE (mode) == 8);
16790 lowpart = gen_lowpart (SImode, val);
16791 highpart = gen_highpart_mode (SImode, mode, val);
16793 gcc_assert (CONST_INT_P (lowpart));
16794 gcc_assert (CONST_INT_P (highpart));
16796 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16797 NULL_RTX, NULL_RTX, 0, 0)
16798 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16799 NULL_RTX, NULL_RTX, 0, 0));
16802 /* Return true if it is worthwhile to split a 64-bit constant into two
16803 32-bit operations. This is the case if optimizing for size, or
16804 if we have load delay slots, or if one 32-bit part can be done with
16805 a single data operation. */
16806 bool
16807 arm_const_double_by_parts (rtx val)
16809 enum machine_mode mode = GET_MODE (val);
16810 rtx part;
16812 if (optimize_size || arm_ld_sched)
16813 return true;
16815 if (mode == VOIDmode)
16816 mode = DImode;
16818 part = gen_highpart_mode (SImode, mode, val);
16820 gcc_assert (CONST_INT_P (part));
16822 if (const_ok_for_arm (INTVAL (part))
16823 || const_ok_for_arm (~INTVAL (part)))
16824 return true;
16826 part = gen_lowpart (SImode, val);
16828 gcc_assert (CONST_INT_P (part));
16830 if (const_ok_for_arm (INTVAL (part))
16831 || const_ok_for_arm (~INTVAL (part)))
16832 return true;
16834 return false;
16837 /* Return true if it is possible to inline both the high and low parts
16838 of a 64-bit constant into 32-bit data processing instructions. */
16839 bool
16840 arm_const_double_by_immediates (rtx val)
16842 enum machine_mode mode = GET_MODE (val);
16843 rtx part;
16845 if (mode == VOIDmode)
16846 mode = DImode;
16848 part = gen_highpart_mode (SImode, mode, val);
16850 gcc_assert (CONST_INT_P (part));
16852 if (!const_ok_for_arm (INTVAL (part)))
16853 return false;
16855 part = gen_lowpart (SImode, val);
16857 gcc_assert (CONST_INT_P (part));
16859 if (!const_ok_for_arm (INTVAL (part)))
16860 return false;
16862 return true;
16865 /* Scan INSN and note any of its operands that need fixing.
16866 If DO_PUSHES is false we do not actually push any of the fixups
16867 needed. */
16868 static void
16869 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16871 int opno;
16873 extract_insn (insn);
16875 if (!constrain_operands (1))
16876 fatal_insn_not_found (insn);
16878 if (recog_data.n_alternatives == 0)
16879 return;
16881 /* Fill in recog_op_alt with information about the constraints of
16882 this insn. */
16883 preprocess_constraints (insn);
16885 const operand_alternative *op_alt = which_op_alt ();
16886 for (opno = 0; opno < recog_data.n_operands; opno++)
16888 /* Things we need to fix can only occur in inputs. */
16889 if (recog_data.operand_type[opno] != OP_IN)
16890 continue;
16892 /* If this alternative is a memory reference, then any mention
16893 of constants in this alternative is really to fool reload
16894 into allowing us to accept one there. We need to fix them up
16895 now so that we output the right code. */
16896 if (op_alt[opno].memory_ok)
16898 rtx op = recog_data.operand[opno];
16900 if (CONSTANT_P (op))
16902 if (do_pushes)
16903 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16904 recog_data.operand_mode[opno], op);
16906 else if (MEM_P (op)
16907 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16908 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16910 if (do_pushes)
16912 rtx cop = avoid_constant_pool_reference (op);
16914 /* Casting the address of something to a mode narrower
16915 than a word can cause avoid_constant_pool_reference()
16916 to return the pool reference itself. That's no good to
16917 us here. Lets just hope that we can use the
16918 constant pool value directly. */
16919 if (op == cop)
16920 cop = get_pool_constant (XEXP (op, 0));
16922 push_minipool_fix (insn, address,
16923 recog_data.operand_loc[opno],
16924 recog_data.operand_mode[opno], cop);
16931 return;
16934 /* Rewrite move insn into subtract of 0 if the condition codes will
16935 be useful in next conditional jump insn. */
16937 static void
16938 thumb1_reorg (void)
16940 basic_block bb;
16942 FOR_EACH_BB_FN (bb, cfun)
16944 rtx dest, src;
16945 rtx pat, op0, set = NULL;
16946 rtx prev, insn = BB_END (bb);
16947 bool insn_clobbered = false;
16949 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16950 insn = PREV_INSN (insn);
16952 /* Find the last cbranchsi4_insn in basic block BB. */
16953 if (insn == BB_HEAD (bb)
16954 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16955 continue;
16957 /* Get the register with which we are comparing. */
16958 pat = PATTERN (insn);
16959 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16961 /* Find the first flag setting insn before INSN in basic block BB. */
16962 gcc_assert (insn != BB_HEAD (bb));
16963 for (prev = PREV_INSN (insn);
16964 (!insn_clobbered
16965 && prev != BB_HEAD (bb)
16966 && (NOTE_P (prev)
16967 || DEBUG_INSN_P (prev)
16968 || ((set = single_set (prev)) != NULL
16969 && get_attr_conds (prev) == CONDS_NOCOND)));
16970 prev = PREV_INSN (prev))
16972 if (reg_set_p (op0, prev))
16973 insn_clobbered = true;
16976 /* Skip if op0 is clobbered by insn other than prev. */
16977 if (insn_clobbered)
16978 continue;
16980 if (!set)
16981 continue;
16983 dest = SET_DEST (set);
16984 src = SET_SRC (set);
16985 if (!low_register_operand (dest, SImode)
16986 || !low_register_operand (src, SImode))
16987 continue;
16989 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16990 in INSN. Both src and dest of the move insn are checked. */
16991 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16993 dest = copy_rtx (dest);
16994 src = copy_rtx (src);
16995 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16996 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16997 INSN_CODE (prev) = -1;
16998 /* Set test register in INSN to dest. */
16999 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17000 INSN_CODE (insn) = -1;
17005 /* Convert instructions to their cc-clobbering variant if possible, since
17006 that allows us to use smaller encodings. */
17008 static void
17009 thumb2_reorg (void)
17011 basic_block bb;
17012 regset_head live;
17014 INIT_REG_SET (&live);
17016 /* We are freeing block_for_insn in the toplev to keep compatibility
17017 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17018 compute_bb_for_insn ();
17019 df_analyze ();
17021 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17023 FOR_EACH_BB_FN (bb, cfun)
17025 if (current_tune->disparage_flag_setting_t16_encodings
17026 && optimize_bb_for_speed_p (bb))
17027 continue;
17029 rtx insn;
17030 Convert_Action action = SKIP;
17031 Convert_Action action_for_partial_flag_setting
17032 = (current_tune->disparage_partial_flag_setting_t16_encodings
17033 && optimize_bb_for_speed_p (bb))
17034 ? SKIP : CONV;
17036 COPY_REG_SET (&live, DF_LR_OUT (bb));
17037 df_simulate_initialize_backwards (bb, &live);
17038 FOR_BB_INSNS_REVERSE (bb, insn)
17040 if (NONJUMP_INSN_P (insn)
17041 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17042 && GET_CODE (PATTERN (insn)) == SET)
17044 action = SKIP;
17045 rtx pat = PATTERN (insn);
17046 rtx dst = XEXP (pat, 0);
17047 rtx src = XEXP (pat, 1);
17048 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17050 if (!OBJECT_P (src))
17051 op0 = XEXP (src, 0);
17053 if (BINARY_P (src))
17054 op1 = XEXP (src, 1);
17056 if (low_register_operand (dst, SImode))
17058 switch (GET_CODE (src))
17060 case PLUS:
17061 /* Adding two registers and storing the result
17062 in the first source is already a 16-bit
17063 operation. */
17064 if (rtx_equal_p (dst, op0)
17065 && register_operand (op1, SImode))
17066 break;
17068 if (low_register_operand (op0, SImode))
17070 /* ADDS <Rd>,<Rn>,<Rm> */
17071 if (low_register_operand (op1, SImode))
17072 action = CONV;
17073 /* ADDS <Rdn>,#<imm8> */
17074 /* SUBS <Rdn>,#<imm8> */
17075 else if (rtx_equal_p (dst, op0)
17076 && CONST_INT_P (op1)
17077 && IN_RANGE (INTVAL (op1), -255, 255))
17078 action = CONV;
17079 /* ADDS <Rd>,<Rn>,#<imm3> */
17080 /* SUBS <Rd>,<Rn>,#<imm3> */
17081 else if (CONST_INT_P (op1)
17082 && IN_RANGE (INTVAL (op1), -7, 7))
17083 action = CONV;
17085 /* ADCS <Rd>, <Rn> */
17086 else if (GET_CODE (XEXP (src, 0)) == PLUS
17087 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17088 && low_register_operand (XEXP (XEXP (src, 0), 1),
17089 SImode)
17090 && COMPARISON_P (op1)
17091 && cc_register (XEXP (op1, 0), VOIDmode)
17092 && maybe_get_arm_condition_code (op1) == ARM_CS
17093 && XEXP (op1, 1) == const0_rtx)
17094 action = CONV;
17095 break;
17097 case MINUS:
17098 /* RSBS <Rd>,<Rn>,#0
17099 Not handled here: see NEG below. */
17100 /* SUBS <Rd>,<Rn>,#<imm3>
17101 SUBS <Rdn>,#<imm8>
17102 Not handled here: see PLUS above. */
17103 /* SUBS <Rd>,<Rn>,<Rm> */
17104 if (low_register_operand (op0, SImode)
17105 && low_register_operand (op1, SImode))
17106 action = CONV;
17107 break;
17109 case MULT:
17110 /* MULS <Rdm>,<Rn>,<Rdm>
17111 As an exception to the rule, this is only used
17112 when optimizing for size since MULS is slow on all
17113 known implementations. We do not even want to use
17114 MULS in cold code, if optimizing for speed, so we
17115 test the global flag here. */
17116 if (!optimize_size)
17117 break;
17118 /* else fall through. */
17119 case AND:
17120 case IOR:
17121 case XOR:
17122 /* ANDS <Rdn>,<Rm> */
17123 if (rtx_equal_p (dst, op0)
17124 && low_register_operand (op1, SImode))
17125 action = action_for_partial_flag_setting;
17126 else if (rtx_equal_p (dst, op1)
17127 && low_register_operand (op0, SImode))
17128 action = action_for_partial_flag_setting == SKIP
17129 ? SKIP : SWAP_CONV;
17130 break;
17132 case ASHIFTRT:
17133 case ASHIFT:
17134 case LSHIFTRT:
17135 /* ASRS <Rdn>,<Rm> */
17136 /* LSRS <Rdn>,<Rm> */
17137 /* LSLS <Rdn>,<Rm> */
17138 if (rtx_equal_p (dst, op0)
17139 && low_register_operand (op1, SImode))
17140 action = action_for_partial_flag_setting;
17141 /* ASRS <Rd>,<Rm>,#<imm5> */
17142 /* LSRS <Rd>,<Rm>,#<imm5> */
17143 /* LSLS <Rd>,<Rm>,#<imm5> */
17144 else if (low_register_operand (op0, SImode)
17145 && CONST_INT_P (op1)
17146 && IN_RANGE (INTVAL (op1), 0, 31))
17147 action = action_for_partial_flag_setting;
17148 break;
17150 case ROTATERT:
17151 /* RORS <Rdn>,<Rm> */
17152 if (rtx_equal_p (dst, op0)
17153 && low_register_operand (op1, SImode))
17154 action = action_for_partial_flag_setting;
17155 break;
17157 case NOT:
17158 /* MVNS <Rd>,<Rm> */
17159 if (low_register_operand (op0, SImode))
17160 action = action_for_partial_flag_setting;
17161 break;
17163 case NEG:
17164 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17165 if (low_register_operand (op0, SImode))
17166 action = CONV;
17167 break;
17169 case CONST_INT:
17170 /* MOVS <Rd>,#<imm8> */
17171 if (CONST_INT_P (src)
17172 && IN_RANGE (INTVAL (src), 0, 255))
17173 action = action_for_partial_flag_setting;
17174 break;
17176 case REG:
17177 /* MOVS and MOV<c> with registers have different
17178 encodings, so are not relevant here. */
17179 break;
17181 default:
17182 break;
17186 if (action != SKIP)
17188 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17189 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17190 rtvec vec;
17192 if (action == SWAP_CONV)
17194 src = copy_rtx (src);
17195 XEXP (src, 0) = op1;
17196 XEXP (src, 1) = op0;
17197 pat = gen_rtx_SET (VOIDmode, dst, src);
17198 vec = gen_rtvec (2, pat, clobber);
17200 else /* action == CONV */
17201 vec = gen_rtvec (2, pat, clobber);
17203 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17204 INSN_CODE (insn) = -1;
17208 if (NONDEBUG_INSN_P (insn))
17209 df_simulate_one_insn_backwards (bb, insn, &live);
17213 CLEAR_REG_SET (&live);
17216 /* Gcc puts the pool in the wrong place for ARM, since we can only
17217 load addresses a limited distance around the pc. We do some
17218 special munging to move the constant pool values to the correct
17219 point in the code. */
17220 static void
17221 arm_reorg (void)
17223 rtx insn;
17224 HOST_WIDE_INT address = 0;
17225 Mfix * fix;
17227 if (TARGET_THUMB1)
17228 thumb1_reorg ();
17229 else if (TARGET_THUMB2)
17230 thumb2_reorg ();
17232 /* Ensure all insns that must be split have been split at this point.
17233 Otherwise, the pool placement code below may compute incorrect
17234 insn lengths. Note that when optimizing, all insns have already
17235 been split at this point. */
17236 if (!optimize)
17237 split_all_insns_noflow ();
17239 minipool_fix_head = minipool_fix_tail = NULL;
17241 /* The first insn must always be a note, or the code below won't
17242 scan it properly. */
17243 insn = get_insns ();
17244 gcc_assert (NOTE_P (insn));
17245 minipool_pad = 0;
17247 /* Scan all the insns and record the operands that will need fixing. */
17248 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17250 if (BARRIER_P (insn))
17251 push_minipool_barrier (insn, address);
17252 else if (INSN_P (insn))
17254 rtx table;
17256 note_invalid_constants (insn, address, true);
17257 address += get_attr_length (insn);
17259 /* If the insn is a vector jump, add the size of the table
17260 and skip the table. */
17261 if (tablejump_p (insn, NULL, &table))
17263 address += get_jump_table_size (table);
17264 insn = table;
17267 else if (LABEL_P (insn))
17268 /* Add the worst-case padding due to alignment. We don't add
17269 the _current_ padding because the minipool insertions
17270 themselves might change it. */
17271 address += get_label_padding (insn);
17274 fix = minipool_fix_head;
17276 /* Now scan the fixups and perform the required changes. */
17277 while (fix)
17279 Mfix * ftmp;
17280 Mfix * fdel;
17281 Mfix * last_added_fix;
17282 Mfix * last_barrier = NULL;
17283 Mfix * this_fix;
17285 /* Skip any further barriers before the next fix. */
17286 while (fix && BARRIER_P (fix->insn))
17287 fix = fix->next;
17289 /* No more fixes. */
17290 if (fix == NULL)
17291 break;
17293 last_added_fix = NULL;
17295 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17297 if (BARRIER_P (ftmp->insn))
17299 if (ftmp->address >= minipool_vector_head->max_address)
17300 break;
17302 last_barrier = ftmp;
17304 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17305 break;
17307 last_added_fix = ftmp; /* Keep track of the last fix added. */
17310 /* If we found a barrier, drop back to that; any fixes that we
17311 could have reached but come after the barrier will now go in
17312 the next mini-pool. */
17313 if (last_barrier != NULL)
17315 /* Reduce the refcount for those fixes that won't go into this
17316 pool after all. */
17317 for (fdel = last_barrier->next;
17318 fdel && fdel != ftmp;
17319 fdel = fdel->next)
17321 fdel->minipool->refcount--;
17322 fdel->minipool = NULL;
17325 ftmp = last_barrier;
17327 else
17329 /* ftmp is first fix that we can't fit into this pool and
17330 there no natural barriers that we could use. Insert a
17331 new barrier in the code somewhere between the previous
17332 fix and this one, and arrange to jump around it. */
17333 HOST_WIDE_INT max_address;
17335 /* The last item on the list of fixes must be a barrier, so
17336 we can never run off the end of the list of fixes without
17337 last_barrier being set. */
17338 gcc_assert (ftmp);
17340 max_address = minipool_vector_head->max_address;
17341 /* Check that there isn't another fix that is in range that
17342 we couldn't fit into this pool because the pool was
17343 already too large: we need to put the pool before such an
17344 instruction. The pool itself may come just after the
17345 fix because create_fix_barrier also allows space for a
17346 jump instruction. */
17347 if (ftmp->address < max_address)
17348 max_address = ftmp->address + 1;
17350 last_barrier = create_fix_barrier (last_added_fix, max_address);
17353 assign_minipool_offsets (last_barrier);
17355 while (ftmp)
17357 if (!BARRIER_P (ftmp->insn)
17358 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17359 == NULL))
17360 break;
17362 ftmp = ftmp->next;
17365 /* Scan over the fixes we have identified for this pool, fixing them
17366 up and adding the constants to the pool itself. */
17367 for (this_fix = fix; this_fix && ftmp != this_fix;
17368 this_fix = this_fix->next)
17369 if (!BARRIER_P (this_fix->insn))
17371 rtx addr
17372 = plus_constant (Pmode,
17373 gen_rtx_LABEL_REF (VOIDmode,
17374 minipool_vector_label),
17375 this_fix->minipool->offset);
17376 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17379 dump_minipool (last_barrier->insn);
17380 fix = ftmp;
17383 /* From now on we must synthesize any constants that we can't handle
17384 directly. This can happen if the RTL gets split during final
17385 instruction generation. */
17386 cfun->machine->after_arm_reorg = 1;
17388 /* Free the minipool memory. */
17389 obstack_free (&minipool_obstack, minipool_startobj);
17392 /* Routines to output assembly language. */
17394 /* If the rtx is the correct value then return the string of the number.
17395 In this way we can ensure that valid double constants are generated even
17396 when cross compiling. */
17397 const char *
17398 fp_immediate_constant (rtx x)
17400 REAL_VALUE_TYPE r;
17402 if (!fp_consts_inited)
17403 init_fp_table ();
17405 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17407 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17408 return "0";
17411 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17412 static const char *
17413 fp_const_from_val (REAL_VALUE_TYPE *r)
17415 if (!fp_consts_inited)
17416 init_fp_table ();
17418 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17419 return "0";
17422 /* OPERANDS[0] is the entire list of insns that constitute pop,
17423 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17424 is in the list, UPDATE is true iff the list contains explicit
17425 update of base register. */
17426 void
17427 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17428 bool update)
17430 int i;
17431 char pattern[100];
17432 int offset;
17433 const char *conditional;
17434 int num_saves = XVECLEN (operands[0], 0);
17435 unsigned int regno;
17436 unsigned int regno_base = REGNO (operands[1]);
17438 offset = 0;
17439 offset += update ? 1 : 0;
17440 offset += return_pc ? 1 : 0;
17442 /* Is the base register in the list? */
17443 for (i = offset; i < num_saves; i++)
17445 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17446 /* If SP is in the list, then the base register must be SP. */
17447 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17448 /* If base register is in the list, there must be no explicit update. */
17449 if (regno == regno_base)
17450 gcc_assert (!update);
17453 conditional = reverse ? "%?%D0" : "%?%d0";
17454 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17456 /* Output pop (not stmfd) because it has a shorter encoding. */
17457 gcc_assert (update);
17458 sprintf (pattern, "pop%s\t{", conditional);
17460 else
17462 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17463 It's just a convention, their semantics are identical. */
17464 if (regno_base == SP_REGNUM)
17465 sprintf (pattern, "ldm%sfd\t", conditional);
17466 else if (TARGET_UNIFIED_ASM)
17467 sprintf (pattern, "ldmia%s\t", conditional);
17468 else
17469 sprintf (pattern, "ldm%sia\t", conditional);
17471 strcat (pattern, reg_names[regno_base]);
17472 if (update)
17473 strcat (pattern, "!, {");
17474 else
17475 strcat (pattern, ", {");
17478 /* Output the first destination register. */
17479 strcat (pattern,
17480 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17482 /* Output the rest of the destination registers. */
17483 for (i = offset + 1; i < num_saves; i++)
17485 strcat (pattern, ", ");
17486 strcat (pattern,
17487 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17490 strcat (pattern, "}");
17492 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17493 strcat (pattern, "^");
17495 output_asm_insn (pattern, &cond);
17499 /* Output the assembly for a store multiple. */
17501 const char *
17502 vfp_output_fstmd (rtx * operands)
17504 char pattern[100];
17505 int p;
17506 int base;
17507 int i;
17509 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17510 p = strlen (pattern);
17512 gcc_assert (REG_P (operands[1]));
17514 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17515 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17517 p += sprintf (&pattern[p], ", d%d", base + i);
17519 strcpy (&pattern[p], "}");
17521 output_asm_insn (pattern, operands);
17522 return "";
17526 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17527 number of bytes pushed. */
17529 static int
17530 vfp_emit_fstmd (int base_reg, int count)
17532 rtx par;
17533 rtx dwarf;
17534 rtx tmp, reg;
17535 int i;
17537 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17538 register pairs are stored by a store multiple insn. We avoid this
17539 by pushing an extra pair. */
17540 if (count == 2 && !arm_arch6)
17542 if (base_reg == LAST_VFP_REGNUM - 3)
17543 base_reg -= 2;
17544 count++;
17547 /* FSTMD may not store more than 16 doubleword registers at once. Split
17548 larger stores into multiple parts (up to a maximum of two, in
17549 practice). */
17550 if (count > 16)
17552 int saved;
17553 /* NOTE: base_reg is an internal register number, so each D register
17554 counts as 2. */
17555 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17556 saved += vfp_emit_fstmd (base_reg, 16);
17557 return saved;
17560 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17561 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17563 reg = gen_rtx_REG (DFmode, base_reg);
17564 base_reg += 2;
17566 XVECEXP (par, 0, 0)
17567 = gen_rtx_SET (VOIDmode,
17568 gen_frame_mem
17569 (BLKmode,
17570 gen_rtx_PRE_MODIFY (Pmode,
17571 stack_pointer_rtx,
17572 plus_constant
17573 (Pmode, stack_pointer_rtx,
17574 - (count * 8)))
17576 gen_rtx_UNSPEC (BLKmode,
17577 gen_rtvec (1, reg),
17578 UNSPEC_PUSH_MULT));
17580 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17581 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17582 RTX_FRAME_RELATED_P (tmp) = 1;
17583 XVECEXP (dwarf, 0, 0) = tmp;
17585 tmp = gen_rtx_SET (VOIDmode,
17586 gen_frame_mem (DFmode, stack_pointer_rtx),
17587 reg);
17588 RTX_FRAME_RELATED_P (tmp) = 1;
17589 XVECEXP (dwarf, 0, 1) = tmp;
17591 for (i = 1; i < count; i++)
17593 reg = gen_rtx_REG (DFmode, base_reg);
17594 base_reg += 2;
17595 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17597 tmp = gen_rtx_SET (VOIDmode,
17598 gen_frame_mem (DFmode,
17599 plus_constant (Pmode,
17600 stack_pointer_rtx,
17601 i * 8)),
17602 reg);
17603 RTX_FRAME_RELATED_P (tmp) = 1;
17604 XVECEXP (dwarf, 0, i + 1) = tmp;
17607 par = emit_insn (par);
17608 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17609 RTX_FRAME_RELATED_P (par) = 1;
17611 return count * 8;
17614 /* Emit a call instruction with pattern PAT. ADDR is the address of
17615 the call target. */
17617 void
17618 arm_emit_call_insn (rtx pat, rtx addr)
17620 rtx insn;
17622 insn = emit_call_insn (pat);
17624 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17625 If the call might use such an entry, add a use of the PIC register
17626 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17627 if (TARGET_VXWORKS_RTP
17628 && flag_pic
17629 && GET_CODE (addr) == SYMBOL_REF
17630 && (SYMBOL_REF_DECL (addr)
17631 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17632 : !SYMBOL_REF_LOCAL_P (addr)))
17634 require_pic_register ();
17635 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17639 /* Output a 'call' insn. */
17640 const char *
17641 output_call (rtx *operands)
17643 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17645 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17646 if (REGNO (operands[0]) == LR_REGNUM)
17648 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17649 output_asm_insn ("mov%?\t%0, %|lr", operands);
17652 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17654 if (TARGET_INTERWORK || arm_arch4t)
17655 output_asm_insn ("bx%?\t%0", operands);
17656 else
17657 output_asm_insn ("mov%?\t%|pc, %0", operands);
17659 return "";
17662 /* Output a 'call' insn that is a reference in memory. This is
17663 disabled for ARMv5 and we prefer a blx instead because otherwise
17664 there's a significant performance overhead. */
17665 const char *
17666 output_call_mem (rtx *operands)
17668 gcc_assert (!arm_arch5);
17669 if (TARGET_INTERWORK)
17671 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17672 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17673 output_asm_insn ("bx%?\t%|ip", operands);
17675 else if (regno_use_in (LR_REGNUM, operands[0]))
17677 /* LR is used in the memory address. We load the address in the
17678 first instruction. It's safe to use IP as the target of the
17679 load since the call will kill it anyway. */
17680 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17681 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17682 if (arm_arch4t)
17683 output_asm_insn ("bx%?\t%|ip", operands);
17684 else
17685 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17687 else
17689 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17690 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17693 return "";
17697 /* Output a move from arm registers to arm registers of a long double
17698 OPERANDS[0] is the destination.
17699 OPERANDS[1] is the source. */
17700 const char *
17701 output_mov_long_double_arm_from_arm (rtx *operands)
17703 /* We have to be careful here because the two might overlap. */
17704 int dest_start = REGNO (operands[0]);
17705 int src_start = REGNO (operands[1]);
17706 rtx ops[2];
17707 int i;
17709 if (dest_start < src_start)
17711 for (i = 0; i < 3; i++)
17713 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17714 ops[1] = gen_rtx_REG (SImode, src_start + i);
17715 output_asm_insn ("mov%?\t%0, %1", ops);
17718 else
17720 for (i = 2; i >= 0; i--)
17722 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17723 ops[1] = gen_rtx_REG (SImode, src_start + i);
17724 output_asm_insn ("mov%?\t%0, %1", ops);
17728 return "";
17731 void
17732 arm_emit_movpair (rtx dest, rtx src)
17734 /* If the src is an immediate, simplify it. */
17735 if (CONST_INT_P (src))
17737 HOST_WIDE_INT val = INTVAL (src);
17738 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17739 if ((val >> 16) & 0x0000ffff)
17740 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17741 GEN_INT (16)),
17742 GEN_INT ((val >> 16) & 0x0000ffff));
17743 return;
17745 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17746 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17749 /* Output a move between double words. It must be REG<-MEM
17750 or MEM<-REG. */
17751 const char *
17752 output_move_double (rtx *operands, bool emit, int *count)
17754 enum rtx_code code0 = GET_CODE (operands[0]);
17755 enum rtx_code code1 = GET_CODE (operands[1]);
17756 rtx otherops[3];
17757 if (count)
17758 *count = 1;
17760 /* The only case when this might happen is when
17761 you are looking at the length of a DImode instruction
17762 that has an invalid constant in it. */
17763 if (code0 == REG && code1 != MEM)
17765 gcc_assert (!emit);
17766 *count = 2;
17767 return "";
17770 if (code0 == REG)
17772 unsigned int reg0 = REGNO (operands[0]);
17774 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17776 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17778 switch (GET_CODE (XEXP (operands[1], 0)))
17780 case REG:
17782 if (emit)
17784 if (TARGET_LDRD
17785 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17786 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17787 else
17788 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17790 break;
17792 case PRE_INC:
17793 gcc_assert (TARGET_LDRD);
17794 if (emit)
17795 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17796 break;
17798 case PRE_DEC:
17799 if (emit)
17801 if (TARGET_LDRD)
17802 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17803 else
17804 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17806 break;
17808 case POST_INC:
17809 if (emit)
17811 if (TARGET_LDRD)
17812 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17813 else
17814 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17816 break;
17818 case POST_DEC:
17819 gcc_assert (TARGET_LDRD);
17820 if (emit)
17821 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17822 break;
17824 case PRE_MODIFY:
17825 case POST_MODIFY:
17826 /* Autoicrement addressing modes should never have overlapping
17827 base and destination registers, and overlapping index registers
17828 are already prohibited, so this doesn't need to worry about
17829 fix_cm3_ldrd. */
17830 otherops[0] = operands[0];
17831 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17832 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17834 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17836 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17838 /* Registers overlap so split out the increment. */
17839 if (emit)
17841 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17842 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17844 if (count)
17845 *count = 2;
17847 else
17849 /* Use a single insn if we can.
17850 FIXME: IWMMXT allows offsets larger than ldrd can
17851 handle, fix these up with a pair of ldr. */
17852 if (TARGET_THUMB2
17853 || !CONST_INT_P (otherops[2])
17854 || (INTVAL (otherops[2]) > -256
17855 && INTVAL (otherops[2]) < 256))
17857 if (emit)
17858 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17860 else
17862 if (emit)
17864 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17865 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17867 if (count)
17868 *count = 2;
17873 else
17875 /* Use a single insn if we can.
17876 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17877 fix these up with a pair of ldr. */
17878 if (TARGET_THUMB2
17879 || !CONST_INT_P (otherops[2])
17880 || (INTVAL (otherops[2]) > -256
17881 && INTVAL (otherops[2]) < 256))
17883 if (emit)
17884 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17886 else
17888 if (emit)
17890 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17891 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17893 if (count)
17894 *count = 2;
17897 break;
17899 case LABEL_REF:
17900 case CONST:
17901 /* We might be able to use ldrd %0, %1 here. However the range is
17902 different to ldr/adr, and it is broken on some ARMv7-M
17903 implementations. */
17904 /* Use the second register of the pair to avoid problematic
17905 overlap. */
17906 otherops[1] = operands[1];
17907 if (emit)
17908 output_asm_insn ("adr%?\t%0, %1", otherops);
17909 operands[1] = otherops[0];
17910 if (emit)
17912 if (TARGET_LDRD)
17913 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17914 else
17915 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17918 if (count)
17919 *count = 2;
17920 break;
17922 /* ??? This needs checking for thumb2. */
17923 default:
17924 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17925 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17927 otherops[0] = operands[0];
17928 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17929 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17931 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17933 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17935 switch ((int) INTVAL (otherops[2]))
17937 case -8:
17938 if (emit)
17939 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17940 return "";
17941 case -4:
17942 if (TARGET_THUMB2)
17943 break;
17944 if (emit)
17945 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17946 return "";
17947 case 4:
17948 if (TARGET_THUMB2)
17949 break;
17950 if (emit)
17951 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17952 return "";
17955 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17956 operands[1] = otherops[0];
17957 if (TARGET_LDRD
17958 && (REG_P (otherops[2])
17959 || TARGET_THUMB2
17960 || (CONST_INT_P (otherops[2])
17961 && INTVAL (otherops[2]) > -256
17962 && INTVAL (otherops[2]) < 256)))
17964 if (reg_overlap_mentioned_p (operands[0],
17965 otherops[2]))
17967 rtx tmp;
17968 /* Swap base and index registers over to
17969 avoid a conflict. */
17970 tmp = otherops[1];
17971 otherops[1] = otherops[2];
17972 otherops[2] = tmp;
17974 /* If both registers conflict, it will usually
17975 have been fixed by a splitter. */
17976 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17977 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17979 if (emit)
17981 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17982 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17984 if (count)
17985 *count = 2;
17987 else
17989 otherops[0] = operands[0];
17990 if (emit)
17991 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17993 return "";
17996 if (CONST_INT_P (otherops[2]))
17998 if (emit)
18000 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18001 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18002 else
18003 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18006 else
18008 if (emit)
18009 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18012 else
18014 if (emit)
18015 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18018 if (count)
18019 *count = 2;
18021 if (TARGET_LDRD)
18022 return "ldr%(d%)\t%0, [%1]";
18024 return "ldm%(ia%)\t%1, %M0";
18026 else
18028 otherops[1] = adjust_address (operands[1], SImode, 4);
18029 /* Take care of overlapping base/data reg. */
18030 if (reg_mentioned_p (operands[0], operands[1]))
18032 if (emit)
18034 output_asm_insn ("ldr%?\t%0, %1", otherops);
18035 output_asm_insn ("ldr%?\t%0, %1", operands);
18037 if (count)
18038 *count = 2;
18041 else
18043 if (emit)
18045 output_asm_insn ("ldr%?\t%0, %1", operands);
18046 output_asm_insn ("ldr%?\t%0, %1", otherops);
18048 if (count)
18049 *count = 2;
18054 else
18056 /* Constraints should ensure this. */
18057 gcc_assert (code0 == MEM && code1 == REG);
18058 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18059 || (TARGET_ARM && TARGET_LDRD));
18061 switch (GET_CODE (XEXP (operands[0], 0)))
18063 case REG:
18064 if (emit)
18066 if (TARGET_LDRD)
18067 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18068 else
18069 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18071 break;
18073 case PRE_INC:
18074 gcc_assert (TARGET_LDRD);
18075 if (emit)
18076 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18077 break;
18079 case PRE_DEC:
18080 if (emit)
18082 if (TARGET_LDRD)
18083 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18084 else
18085 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18087 break;
18089 case POST_INC:
18090 if (emit)
18092 if (TARGET_LDRD)
18093 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18094 else
18095 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18097 break;
18099 case POST_DEC:
18100 gcc_assert (TARGET_LDRD);
18101 if (emit)
18102 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18103 break;
18105 case PRE_MODIFY:
18106 case POST_MODIFY:
18107 otherops[0] = operands[1];
18108 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18109 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18111 /* IWMMXT allows offsets larger than ldrd can handle,
18112 fix these up with a pair of ldr. */
18113 if (!TARGET_THUMB2
18114 && CONST_INT_P (otherops[2])
18115 && (INTVAL(otherops[2]) <= -256
18116 || INTVAL(otherops[2]) >= 256))
18118 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18120 if (emit)
18122 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18123 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18125 if (count)
18126 *count = 2;
18128 else
18130 if (emit)
18132 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18133 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18135 if (count)
18136 *count = 2;
18139 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18141 if (emit)
18142 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18144 else
18146 if (emit)
18147 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18149 break;
18151 case PLUS:
18152 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18153 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18155 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18157 case -8:
18158 if (emit)
18159 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18160 return "";
18162 case -4:
18163 if (TARGET_THUMB2)
18164 break;
18165 if (emit)
18166 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18167 return "";
18169 case 4:
18170 if (TARGET_THUMB2)
18171 break;
18172 if (emit)
18173 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18174 return "";
18177 if (TARGET_LDRD
18178 && (REG_P (otherops[2])
18179 || TARGET_THUMB2
18180 || (CONST_INT_P (otherops[2])
18181 && INTVAL (otherops[2]) > -256
18182 && INTVAL (otherops[2]) < 256)))
18184 otherops[0] = operands[1];
18185 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18186 if (emit)
18187 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18188 return "";
18190 /* Fall through */
18192 default:
18193 otherops[0] = adjust_address (operands[0], SImode, 4);
18194 otherops[1] = operands[1];
18195 if (emit)
18197 output_asm_insn ("str%?\t%1, %0", operands);
18198 output_asm_insn ("str%?\t%H1, %0", otherops);
18200 if (count)
18201 *count = 2;
18205 return "";
18208 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18209 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18211 const char *
18212 output_move_quad (rtx *operands)
18214 if (REG_P (operands[0]))
18216 /* Load, or reg->reg move. */
18218 if (MEM_P (operands[1]))
18220 switch (GET_CODE (XEXP (operands[1], 0)))
18222 case REG:
18223 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18224 break;
18226 case LABEL_REF:
18227 case CONST:
18228 output_asm_insn ("adr%?\t%0, %1", operands);
18229 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18230 break;
18232 default:
18233 gcc_unreachable ();
18236 else
18238 rtx ops[2];
18239 int dest, src, i;
18241 gcc_assert (REG_P (operands[1]));
18243 dest = REGNO (operands[0]);
18244 src = REGNO (operands[1]);
18246 /* This seems pretty dumb, but hopefully GCC won't try to do it
18247 very often. */
18248 if (dest < src)
18249 for (i = 0; i < 4; i++)
18251 ops[0] = gen_rtx_REG (SImode, dest + i);
18252 ops[1] = gen_rtx_REG (SImode, src + i);
18253 output_asm_insn ("mov%?\t%0, %1", ops);
18255 else
18256 for (i = 3; i >= 0; i--)
18258 ops[0] = gen_rtx_REG (SImode, dest + i);
18259 ops[1] = gen_rtx_REG (SImode, src + i);
18260 output_asm_insn ("mov%?\t%0, %1", ops);
18264 else
18266 gcc_assert (MEM_P (operands[0]));
18267 gcc_assert (REG_P (operands[1]));
18268 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18270 switch (GET_CODE (XEXP (operands[0], 0)))
18272 case REG:
18273 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18274 break;
18276 default:
18277 gcc_unreachable ();
18281 return "";
18284 /* Output a VFP load or store instruction. */
18286 const char *
18287 output_move_vfp (rtx *operands)
18289 rtx reg, mem, addr, ops[2];
18290 int load = REG_P (operands[0]);
18291 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18292 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18293 const char *templ;
18294 char buff[50];
18295 enum machine_mode mode;
18297 reg = operands[!load];
18298 mem = operands[load];
18300 mode = GET_MODE (reg);
18302 gcc_assert (REG_P (reg));
18303 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18304 gcc_assert (mode == SFmode
18305 || mode == DFmode
18306 || mode == SImode
18307 || mode == DImode
18308 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18309 gcc_assert (MEM_P (mem));
18311 addr = XEXP (mem, 0);
18313 switch (GET_CODE (addr))
18315 case PRE_DEC:
18316 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18317 ops[0] = XEXP (addr, 0);
18318 ops[1] = reg;
18319 break;
18321 case POST_INC:
18322 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18323 ops[0] = XEXP (addr, 0);
18324 ops[1] = reg;
18325 break;
18327 default:
18328 templ = "f%s%c%%?\t%%%s0, %%1%s";
18329 ops[0] = reg;
18330 ops[1] = mem;
18331 break;
18334 sprintf (buff, templ,
18335 load ? "ld" : "st",
18336 dp ? 'd' : 's',
18337 dp ? "P" : "",
18338 integer_p ? "\t%@ int" : "");
18339 output_asm_insn (buff, ops);
18341 return "";
18344 /* Output a Neon double-word or quad-word load or store, or a load
18345 or store for larger structure modes.
18347 WARNING: The ordering of elements is weird in big-endian mode,
18348 because the EABI requires that vectors stored in memory appear
18349 as though they were stored by a VSTM, as required by the EABI.
18350 GCC RTL defines element ordering based on in-memory order.
18351 This can be different from the architectural ordering of elements
18352 within a NEON register. The intrinsics defined in arm_neon.h use the
18353 NEON register element ordering, not the GCC RTL element ordering.
18355 For example, the in-memory ordering of a big-endian a quadword
18356 vector with 16-bit elements when stored from register pair {d0,d1}
18357 will be (lowest address first, d0[N] is NEON register element N):
18359 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18361 When necessary, quadword registers (dN, dN+1) are moved to ARM
18362 registers from rN in the order:
18364 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18366 So that STM/LDM can be used on vectors in ARM registers, and the
18367 same memory layout will result as if VSTM/VLDM were used.
18369 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18370 possible, which allows use of appropriate alignment tags.
18371 Note that the choice of "64" is independent of the actual vector
18372 element size; this size simply ensures that the behavior is
18373 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18375 Due to limitations of those instructions, use of VST1.64/VLD1.64
18376 is not possible if:
18377 - the address contains PRE_DEC, or
18378 - the mode refers to more than 4 double-word registers
18380 In those cases, it would be possible to replace VSTM/VLDM by a
18381 sequence of instructions; this is not currently implemented since
18382 this is not certain to actually improve performance. */
18384 const char *
18385 output_move_neon (rtx *operands)
18387 rtx reg, mem, addr, ops[2];
18388 int regno, nregs, load = REG_P (operands[0]);
18389 const char *templ;
18390 char buff[50];
18391 enum machine_mode mode;
18393 reg = operands[!load];
18394 mem = operands[load];
18396 mode = GET_MODE (reg);
18398 gcc_assert (REG_P (reg));
18399 regno = REGNO (reg);
18400 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18401 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18402 || NEON_REGNO_OK_FOR_QUAD (regno));
18403 gcc_assert (VALID_NEON_DREG_MODE (mode)
18404 || VALID_NEON_QREG_MODE (mode)
18405 || VALID_NEON_STRUCT_MODE (mode));
18406 gcc_assert (MEM_P (mem));
18408 addr = XEXP (mem, 0);
18410 /* Strip off const from addresses like (const (plus (...))). */
18411 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18412 addr = XEXP (addr, 0);
18414 switch (GET_CODE (addr))
18416 case POST_INC:
18417 /* We have to use vldm / vstm for too-large modes. */
18418 if (nregs > 4)
18420 templ = "v%smia%%?\t%%0!, %%h1";
18421 ops[0] = XEXP (addr, 0);
18423 else
18425 templ = "v%s1.64\t%%h1, %%A0";
18426 ops[0] = mem;
18428 ops[1] = reg;
18429 break;
18431 case PRE_DEC:
18432 /* We have to use vldm / vstm in this case, since there is no
18433 pre-decrement form of the vld1 / vst1 instructions. */
18434 templ = "v%smdb%%?\t%%0!, %%h1";
18435 ops[0] = XEXP (addr, 0);
18436 ops[1] = reg;
18437 break;
18439 case POST_MODIFY:
18440 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18441 gcc_unreachable ();
18443 case LABEL_REF:
18444 case PLUS:
18446 int i;
18447 int overlap = -1;
18448 for (i = 0; i < nregs; i++)
18450 /* We're only using DImode here because it's a convenient size. */
18451 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18452 ops[1] = adjust_address (mem, DImode, 8 * i);
18453 if (reg_overlap_mentioned_p (ops[0], mem))
18455 gcc_assert (overlap == -1);
18456 overlap = i;
18458 else
18460 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18461 output_asm_insn (buff, ops);
18464 if (overlap != -1)
18466 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18467 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18468 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18469 output_asm_insn (buff, ops);
18472 return "";
18475 default:
18476 /* We have to use vldm / vstm for too-large modes. */
18477 if (nregs > 4)
18478 templ = "v%smia%%?\t%%m0, %%h1";
18479 else
18480 templ = "v%s1.64\t%%h1, %%A0";
18482 ops[0] = mem;
18483 ops[1] = reg;
18486 sprintf (buff, templ, load ? "ld" : "st");
18487 output_asm_insn (buff, ops);
18489 return "";
18492 /* Compute and return the length of neon_mov<mode>, where <mode> is
18493 one of VSTRUCT modes: EI, OI, CI or XI. */
18495 arm_attr_length_move_neon (rtx insn)
18497 rtx reg, mem, addr;
18498 int load;
18499 enum machine_mode mode;
18501 extract_insn_cached (insn);
18503 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18505 mode = GET_MODE (recog_data.operand[0]);
18506 switch (mode)
18508 case EImode:
18509 case OImode:
18510 return 8;
18511 case CImode:
18512 return 12;
18513 case XImode:
18514 return 16;
18515 default:
18516 gcc_unreachable ();
18520 load = REG_P (recog_data.operand[0]);
18521 reg = recog_data.operand[!load];
18522 mem = recog_data.operand[load];
18524 gcc_assert (MEM_P (mem));
18526 mode = GET_MODE (reg);
18527 addr = XEXP (mem, 0);
18529 /* Strip off const from addresses like (const (plus (...))). */
18530 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18531 addr = XEXP (addr, 0);
18533 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18535 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18536 return insns * 4;
18538 else
18539 return 4;
18542 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18543 return zero. */
18546 arm_address_offset_is_imm (rtx insn)
18548 rtx mem, addr;
18550 extract_insn_cached (insn);
18552 if (REG_P (recog_data.operand[0]))
18553 return 0;
18555 mem = recog_data.operand[0];
18557 gcc_assert (MEM_P (mem));
18559 addr = XEXP (mem, 0);
18561 if (REG_P (addr)
18562 || (GET_CODE (addr) == PLUS
18563 && REG_P (XEXP (addr, 0))
18564 && CONST_INT_P (XEXP (addr, 1))))
18565 return 1;
18566 else
18567 return 0;
18570 /* Output an ADD r, s, #n where n may be too big for one instruction.
18571 If adding zero to one register, output nothing. */
18572 const char *
18573 output_add_immediate (rtx *operands)
18575 HOST_WIDE_INT n = INTVAL (operands[2]);
18577 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18579 if (n < 0)
18580 output_multi_immediate (operands,
18581 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18582 -n);
18583 else
18584 output_multi_immediate (operands,
18585 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18589 return "";
18592 /* Output a multiple immediate operation.
18593 OPERANDS is the vector of operands referred to in the output patterns.
18594 INSTR1 is the output pattern to use for the first constant.
18595 INSTR2 is the output pattern to use for subsequent constants.
18596 IMMED_OP is the index of the constant slot in OPERANDS.
18597 N is the constant value. */
18598 static const char *
18599 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18600 int immed_op, HOST_WIDE_INT n)
18602 #if HOST_BITS_PER_WIDE_INT > 32
18603 n &= 0xffffffff;
18604 #endif
18606 if (n == 0)
18608 /* Quick and easy output. */
18609 operands[immed_op] = const0_rtx;
18610 output_asm_insn (instr1, operands);
18612 else
18614 int i;
18615 const char * instr = instr1;
18617 /* Note that n is never zero here (which would give no output). */
18618 for (i = 0; i < 32; i += 2)
18620 if (n & (3 << i))
18622 operands[immed_op] = GEN_INT (n & (255 << i));
18623 output_asm_insn (instr, operands);
18624 instr = instr2;
18625 i += 6;
18630 return "";
18633 /* Return the name of a shifter operation. */
18634 static const char *
18635 arm_shift_nmem(enum rtx_code code)
18637 switch (code)
18639 case ASHIFT:
18640 return ARM_LSL_NAME;
18642 case ASHIFTRT:
18643 return "asr";
18645 case LSHIFTRT:
18646 return "lsr";
18648 case ROTATERT:
18649 return "ror";
18651 default:
18652 abort();
18656 /* Return the appropriate ARM instruction for the operation code.
18657 The returned result should not be overwritten. OP is the rtx of the
18658 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18659 was shifted. */
18660 const char *
18661 arithmetic_instr (rtx op, int shift_first_arg)
18663 switch (GET_CODE (op))
18665 case PLUS:
18666 return "add";
18668 case MINUS:
18669 return shift_first_arg ? "rsb" : "sub";
18671 case IOR:
18672 return "orr";
18674 case XOR:
18675 return "eor";
18677 case AND:
18678 return "and";
18680 case ASHIFT:
18681 case ASHIFTRT:
18682 case LSHIFTRT:
18683 case ROTATERT:
18684 return arm_shift_nmem(GET_CODE(op));
18686 default:
18687 gcc_unreachable ();
18691 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18692 for the operation code. The returned result should not be overwritten.
18693 OP is the rtx code of the shift.
18694 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18695 shift. */
18696 static const char *
18697 shift_op (rtx op, HOST_WIDE_INT *amountp)
18699 const char * mnem;
18700 enum rtx_code code = GET_CODE (op);
18702 switch (code)
18704 case ROTATE:
18705 if (!CONST_INT_P (XEXP (op, 1)))
18707 output_operand_lossage ("invalid shift operand");
18708 return NULL;
18711 code = ROTATERT;
18712 *amountp = 32 - INTVAL (XEXP (op, 1));
18713 mnem = "ror";
18714 break;
18716 case ASHIFT:
18717 case ASHIFTRT:
18718 case LSHIFTRT:
18719 case ROTATERT:
18720 mnem = arm_shift_nmem(code);
18721 if (CONST_INT_P (XEXP (op, 1)))
18723 *amountp = INTVAL (XEXP (op, 1));
18725 else if (REG_P (XEXP (op, 1)))
18727 *amountp = -1;
18728 return mnem;
18730 else
18732 output_operand_lossage ("invalid shift operand");
18733 return NULL;
18735 break;
18737 case MULT:
18738 /* We never have to worry about the amount being other than a
18739 power of 2, since this case can never be reloaded from a reg. */
18740 if (!CONST_INT_P (XEXP (op, 1)))
18742 output_operand_lossage ("invalid shift operand");
18743 return NULL;
18746 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18748 /* Amount must be a power of two. */
18749 if (*amountp & (*amountp - 1))
18751 output_operand_lossage ("invalid shift operand");
18752 return NULL;
18755 *amountp = int_log2 (*amountp);
18756 return ARM_LSL_NAME;
18758 default:
18759 output_operand_lossage ("invalid shift operand");
18760 return NULL;
18763 /* This is not 100% correct, but follows from the desire to merge
18764 multiplication by a power of 2 with the recognizer for a
18765 shift. >=32 is not a valid shift for "lsl", so we must try and
18766 output a shift that produces the correct arithmetical result.
18767 Using lsr #32 is identical except for the fact that the carry bit
18768 is not set correctly if we set the flags; but we never use the
18769 carry bit from such an operation, so we can ignore that. */
18770 if (code == ROTATERT)
18771 /* Rotate is just modulo 32. */
18772 *amountp &= 31;
18773 else if (*amountp != (*amountp & 31))
18775 if (code == ASHIFT)
18776 mnem = "lsr";
18777 *amountp = 32;
18780 /* Shifts of 0 are no-ops. */
18781 if (*amountp == 0)
18782 return NULL;
18784 return mnem;
18787 /* Obtain the shift from the POWER of two. */
18789 static HOST_WIDE_INT
18790 int_log2 (HOST_WIDE_INT power)
18792 HOST_WIDE_INT shift = 0;
18794 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18796 gcc_assert (shift <= 31);
18797 shift++;
18800 return shift;
18803 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18804 because /bin/as is horribly restrictive. The judgement about
18805 whether or not each character is 'printable' (and can be output as
18806 is) or not (and must be printed with an octal escape) must be made
18807 with reference to the *host* character set -- the situation is
18808 similar to that discussed in the comments above pp_c_char in
18809 c-pretty-print.c. */
18811 #define MAX_ASCII_LEN 51
18813 void
18814 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18816 int i;
18817 int len_so_far = 0;
18819 fputs ("\t.ascii\t\"", stream);
18821 for (i = 0; i < len; i++)
18823 int c = p[i];
18825 if (len_so_far >= MAX_ASCII_LEN)
18827 fputs ("\"\n\t.ascii\t\"", stream);
18828 len_so_far = 0;
18831 if (ISPRINT (c))
18833 if (c == '\\' || c == '\"')
18835 putc ('\\', stream);
18836 len_so_far++;
18838 putc (c, stream);
18839 len_so_far++;
18841 else
18843 fprintf (stream, "\\%03o", c);
18844 len_so_far += 4;
18848 fputs ("\"\n", stream);
18851 /* Compute the register save mask for registers 0 through 12
18852 inclusive. This code is used by arm_compute_save_reg_mask. */
18854 static unsigned long
18855 arm_compute_save_reg0_reg12_mask (void)
18857 unsigned long func_type = arm_current_func_type ();
18858 unsigned long save_reg_mask = 0;
18859 unsigned int reg;
18861 if (IS_INTERRUPT (func_type))
18863 unsigned int max_reg;
18864 /* Interrupt functions must not corrupt any registers,
18865 even call clobbered ones. If this is a leaf function
18866 we can just examine the registers used by the RTL, but
18867 otherwise we have to assume that whatever function is
18868 called might clobber anything, and so we have to save
18869 all the call-clobbered registers as well. */
18870 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18871 /* FIQ handlers have registers r8 - r12 banked, so
18872 we only need to check r0 - r7, Normal ISRs only
18873 bank r14 and r15, so we must check up to r12.
18874 r13 is the stack pointer which is always preserved,
18875 so we do not need to consider it here. */
18876 max_reg = 7;
18877 else
18878 max_reg = 12;
18880 for (reg = 0; reg <= max_reg; reg++)
18881 if (df_regs_ever_live_p (reg)
18882 || (! crtl->is_leaf && call_used_regs[reg]))
18883 save_reg_mask |= (1 << reg);
18885 /* Also save the pic base register if necessary. */
18886 if (flag_pic
18887 && !TARGET_SINGLE_PIC_BASE
18888 && arm_pic_register != INVALID_REGNUM
18889 && crtl->uses_pic_offset_table)
18890 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18892 else if (IS_VOLATILE(func_type))
18894 /* For noreturn functions we historically omitted register saves
18895 altogether. However this really messes up debugging. As a
18896 compromise save just the frame pointers. Combined with the link
18897 register saved elsewhere this should be sufficient to get
18898 a backtrace. */
18899 if (frame_pointer_needed)
18900 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18901 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18902 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18903 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18904 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18906 else
18908 /* In the normal case we only need to save those registers
18909 which are call saved and which are used by this function. */
18910 for (reg = 0; reg <= 11; reg++)
18911 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18912 save_reg_mask |= (1 << reg);
18914 /* Handle the frame pointer as a special case. */
18915 if (frame_pointer_needed)
18916 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18918 /* If we aren't loading the PIC register,
18919 don't stack it even though it may be live. */
18920 if (flag_pic
18921 && !TARGET_SINGLE_PIC_BASE
18922 && arm_pic_register != INVALID_REGNUM
18923 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18924 || crtl->uses_pic_offset_table))
18925 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18927 /* The prologue will copy SP into R0, so save it. */
18928 if (IS_STACKALIGN (func_type))
18929 save_reg_mask |= 1;
18932 /* Save registers so the exception handler can modify them. */
18933 if (crtl->calls_eh_return)
18935 unsigned int i;
18937 for (i = 0; ; i++)
18939 reg = EH_RETURN_DATA_REGNO (i);
18940 if (reg == INVALID_REGNUM)
18941 break;
18942 save_reg_mask |= 1 << reg;
18946 return save_reg_mask;
18949 /* Return true if r3 is live at the start of the function. */
18951 static bool
18952 arm_r3_live_at_start_p (void)
18954 /* Just look at cfg info, which is still close enough to correct at this
18955 point. This gives false positives for broken functions that might use
18956 uninitialized data that happens to be allocated in r3, but who cares? */
18957 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18960 /* Compute the number of bytes used to store the static chain register on the
18961 stack, above the stack frame. We need to know this accurately to get the
18962 alignment of the rest of the stack frame correct. */
18964 static int
18965 arm_compute_static_chain_stack_bytes (void)
18967 /* See the defining assertion in arm_expand_prologue. */
18968 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18969 && IS_NESTED (arm_current_func_type ())
18970 && arm_r3_live_at_start_p ()
18971 && crtl->args.pretend_args_size == 0)
18972 return 4;
18974 return 0;
18977 /* Compute a bit mask of which registers need to be
18978 saved on the stack for the current function.
18979 This is used by arm_get_frame_offsets, which may add extra registers. */
18981 static unsigned long
18982 arm_compute_save_reg_mask (void)
18984 unsigned int save_reg_mask = 0;
18985 unsigned long func_type = arm_current_func_type ();
18986 unsigned int reg;
18988 if (IS_NAKED (func_type))
18989 /* This should never really happen. */
18990 return 0;
18992 /* If we are creating a stack frame, then we must save the frame pointer,
18993 IP (which will hold the old stack pointer), LR and the PC. */
18994 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18995 save_reg_mask |=
18996 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18997 | (1 << IP_REGNUM)
18998 | (1 << LR_REGNUM)
18999 | (1 << PC_REGNUM);
19001 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19003 /* Decide if we need to save the link register.
19004 Interrupt routines have their own banked link register,
19005 so they never need to save it.
19006 Otherwise if we do not use the link register we do not need to save
19007 it. If we are pushing other registers onto the stack however, we
19008 can save an instruction in the epilogue by pushing the link register
19009 now and then popping it back into the PC. This incurs extra memory
19010 accesses though, so we only do it when optimizing for size, and only
19011 if we know that we will not need a fancy return sequence. */
19012 if (df_regs_ever_live_p (LR_REGNUM)
19013 || (save_reg_mask
19014 && optimize_size
19015 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19016 && !crtl->calls_eh_return))
19017 save_reg_mask |= 1 << LR_REGNUM;
19019 if (cfun->machine->lr_save_eliminated)
19020 save_reg_mask &= ~ (1 << LR_REGNUM);
19022 if (TARGET_REALLY_IWMMXT
19023 && ((bit_count (save_reg_mask)
19024 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19025 arm_compute_static_chain_stack_bytes())
19026 ) % 2) != 0)
19028 /* The total number of registers that are going to be pushed
19029 onto the stack is odd. We need to ensure that the stack
19030 is 64-bit aligned before we start to save iWMMXt registers,
19031 and also before we start to create locals. (A local variable
19032 might be a double or long long which we will load/store using
19033 an iWMMXt instruction). Therefore we need to push another
19034 ARM register, so that the stack will be 64-bit aligned. We
19035 try to avoid using the arg registers (r0 -r3) as they might be
19036 used to pass values in a tail call. */
19037 for (reg = 4; reg <= 12; reg++)
19038 if ((save_reg_mask & (1 << reg)) == 0)
19039 break;
19041 if (reg <= 12)
19042 save_reg_mask |= (1 << reg);
19043 else
19045 cfun->machine->sibcall_blocked = 1;
19046 save_reg_mask |= (1 << 3);
19050 /* We may need to push an additional register for use initializing the
19051 PIC base register. */
19052 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19053 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19055 reg = thumb_find_work_register (1 << 4);
19056 if (!call_used_regs[reg])
19057 save_reg_mask |= (1 << reg);
19060 return save_reg_mask;
19064 /* Compute a bit mask of which registers need to be
19065 saved on the stack for the current function. */
19066 static unsigned long
19067 thumb1_compute_save_reg_mask (void)
19069 unsigned long mask;
19070 unsigned reg;
19072 mask = 0;
19073 for (reg = 0; reg < 12; reg ++)
19074 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19075 mask |= 1 << reg;
19077 if (flag_pic
19078 && !TARGET_SINGLE_PIC_BASE
19079 && arm_pic_register != INVALID_REGNUM
19080 && crtl->uses_pic_offset_table)
19081 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19083 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19084 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19085 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19087 /* LR will also be pushed if any lo regs are pushed. */
19088 if (mask & 0xff || thumb_force_lr_save ())
19089 mask |= (1 << LR_REGNUM);
19091 /* Make sure we have a low work register if we need one.
19092 We will need one if we are going to push a high register,
19093 but we are not currently intending to push a low register. */
19094 if ((mask & 0xff) == 0
19095 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19097 /* Use thumb_find_work_register to choose which register
19098 we will use. If the register is live then we will
19099 have to push it. Use LAST_LO_REGNUM as our fallback
19100 choice for the register to select. */
19101 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19102 /* Make sure the register returned by thumb_find_work_register is
19103 not part of the return value. */
19104 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19105 reg = LAST_LO_REGNUM;
19107 if (! call_used_regs[reg])
19108 mask |= 1 << reg;
19111 /* The 504 below is 8 bytes less than 512 because there are two possible
19112 alignment words. We can't tell here if they will be present or not so we
19113 have to play it safe and assume that they are. */
19114 if ((CALLER_INTERWORKING_SLOT_SIZE +
19115 ROUND_UP_WORD (get_frame_size ()) +
19116 crtl->outgoing_args_size) >= 504)
19118 /* This is the same as the code in thumb1_expand_prologue() which
19119 determines which register to use for stack decrement. */
19120 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19121 if (mask & (1 << reg))
19122 break;
19124 if (reg > LAST_LO_REGNUM)
19126 /* Make sure we have a register available for stack decrement. */
19127 mask |= 1 << LAST_LO_REGNUM;
19131 return mask;
19135 /* Return the number of bytes required to save VFP registers. */
19136 static int
19137 arm_get_vfp_saved_size (void)
19139 unsigned int regno;
19140 int count;
19141 int saved;
19143 saved = 0;
19144 /* Space for saved VFP registers. */
19145 if (TARGET_HARD_FLOAT && TARGET_VFP)
19147 count = 0;
19148 for (regno = FIRST_VFP_REGNUM;
19149 regno < LAST_VFP_REGNUM;
19150 regno += 2)
19152 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19153 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19155 if (count > 0)
19157 /* Workaround ARM10 VFPr1 bug. */
19158 if (count == 2 && !arm_arch6)
19159 count++;
19160 saved += count * 8;
19162 count = 0;
19164 else
19165 count++;
19167 if (count > 0)
19169 if (count == 2 && !arm_arch6)
19170 count++;
19171 saved += count * 8;
19174 return saved;
19178 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19179 everything bar the final return instruction. If simple_return is true,
19180 then do not output epilogue, because it has already been emitted in RTL. */
19181 const char *
19182 output_return_instruction (rtx operand, bool really_return, bool reverse,
19183 bool simple_return)
19185 char conditional[10];
19186 char instr[100];
19187 unsigned reg;
19188 unsigned long live_regs_mask;
19189 unsigned long func_type;
19190 arm_stack_offsets *offsets;
19192 func_type = arm_current_func_type ();
19194 if (IS_NAKED (func_type))
19195 return "";
19197 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19199 /* If this function was declared non-returning, and we have
19200 found a tail call, then we have to trust that the called
19201 function won't return. */
19202 if (really_return)
19204 rtx ops[2];
19206 /* Otherwise, trap an attempted return by aborting. */
19207 ops[0] = operand;
19208 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19209 : "abort");
19210 assemble_external_libcall (ops[1]);
19211 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19214 return "";
19217 gcc_assert (!cfun->calls_alloca || really_return);
19219 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19221 cfun->machine->return_used_this_function = 1;
19223 offsets = arm_get_frame_offsets ();
19224 live_regs_mask = offsets->saved_regs_mask;
19226 if (!simple_return && live_regs_mask)
19228 const char * return_reg;
19230 /* If we do not have any special requirements for function exit
19231 (e.g. interworking) then we can load the return address
19232 directly into the PC. Otherwise we must load it into LR. */
19233 if (really_return
19234 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19235 return_reg = reg_names[PC_REGNUM];
19236 else
19237 return_reg = reg_names[LR_REGNUM];
19239 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19241 /* There are three possible reasons for the IP register
19242 being saved. 1) a stack frame was created, in which case
19243 IP contains the old stack pointer, or 2) an ISR routine
19244 corrupted it, or 3) it was saved to align the stack on
19245 iWMMXt. In case 1, restore IP into SP, otherwise just
19246 restore IP. */
19247 if (frame_pointer_needed)
19249 live_regs_mask &= ~ (1 << IP_REGNUM);
19250 live_regs_mask |= (1 << SP_REGNUM);
19252 else
19253 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19256 /* On some ARM architectures it is faster to use LDR rather than
19257 LDM to load a single register. On other architectures, the
19258 cost is the same. In 26 bit mode, or for exception handlers,
19259 we have to use LDM to load the PC so that the CPSR is also
19260 restored. */
19261 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19262 if (live_regs_mask == (1U << reg))
19263 break;
19265 if (reg <= LAST_ARM_REGNUM
19266 && (reg != LR_REGNUM
19267 || ! really_return
19268 || ! IS_INTERRUPT (func_type)))
19270 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19271 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19273 else
19275 char *p;
19276 int first = 1;
19278 /* Generate the load multiple instruction to restore the
19279 registers. Note we can get here, even if
19280 frame_pointer_needed is true, but only if sp already
19281 points to the base of the saved core registers. */
19282 if (live_regs_mask & (1 << SP_REGNUM))
19284 unsigned HOST_WIDE_INT stack_adjust;
19286 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19287 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19289 if (stack_adjust && arm_arch5 && TARGET_ARM)
19290 if (TARGET_UNIFIED_ASM)
19291 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19292 else
19293 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19294 else
19296 /* If we can't use ldmib (SA110 bug),
19297 then try to pop r3 instead. */
19298 if (stack_adjust)
19299 live_regs_mask |= 1 << 3;
19301 if (TARGET_UNIFIED_ASM)
19302 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19303 else
19304 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19307 else
19308 if (TARGET_UNIFIED_ASM)
19309 sprintf (instr, "pop%s\t{", conditional);
19310 else
19311 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19313 p = instr + strlen (instr);
19315 for (reg = 0; reg <= SP_REGNUM; reg++)
19316 if (live_regs_mask & (1 << reg))
19318 int l = strlen (reg_names[reg]);
19320 if (first)
19321 first = 0;
19322 else
19324 memcpy (p, ", ", 2);
19325 p += 2;
19328 memcpy (p, "%|", 2);
19329 memcpy (p + 2, reg_names[reg], l);
19330 p += l + 2;
19333 if (live_regs_mask & (1 << LR_REGNUM))
19335 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19336 /* If returning from an interrupt, restore the CPSR. */
19337 if (IS_INTERRUPT (func_type))
19338 strcat (p, "^");
19340 else
19341 strcpy (p, "}");
19344 output_asm_insn (instr, & operand);
19346 /* See if we need to generate an extra instruction to
19347 perform the actual function return. */
19348 if (really_return
19349 && func_type != ARM_FT_INTERWORKED
19350 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19352 /* The return has already been handled
19353 by loading the LR into the PC. */
19354 return "";
19358 if (really_return)
19360 switch ((int) ARM_FUNC_TYPE (func_type))
19362 case ARM_FT_ISR:
19363 case ARM_FT_FIQ:
19364 /* ??? This is wrong for unified assembly syntax. */
19365 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19366 break;
19368 case ARM_FT_INTERWORKED:
19369 sprintf (instr, "bx%s\t%%|lr", conditional);
19370 break;
19372 case ARM_FT_EXCEPTION:
19373 /* ??? This is wrong for unified assembly syntax. */
19374 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19375 break;
19377 default:
19378 /* Use bx if it's available. */
19379 if (arm_arch5 || arm_arch4t)
19380 sprintf (instr, "bx%s\t%%|lr", conditional);
19381 else
19382 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19383 break;
19386 output_asm_insn (instr, & operand);
19389 return "";
19392 /* Write the function name into the code section, directly preceding
19393 the function prologue.
19395 Code will be output similar to this:
19397 .ascii "arm_poke_function_name", 0
19398 .align
19400 .word 0xff000000 + (t1 - t0)
19401 arm_poke_function_name
19402 mov ip, sp
19403 stmfd sp!, {fp, ip, lr, pc}
19404 sub fp, ip, #4
19406 When performing a stack backtrace, code can inspect the value
19407 of 'pc' stored at 'fp' + 0. If the trace function then looks
19408 at location pc - 12 and the top 8 bits are set, then we know
19409 that there is a function name embedded immediately preceding this
19410 location and has length ((pc[-3]) & 0xff000000).
19412 We assume that pc is declared as a pointer to an unsigned long.
19414 It is of no benefit to output the function name if we are assembling
19415 a leaf function. These function types will not contain a stack
19416 backtrace structure, therefore it is not possible to determine the
19417 function name. */
19418 void
19419 arm_poke_function_name (FILE *stream, const char *name)
19421 unsigned long alignlength;
19422 unsigned long length;
19423 rtx x;
19425 length = strlen (name) + 1;
19426 alignlength = ROUND_UP_WORD (length);
19428 ASM_OUTPUT_ASCII (stream, name, length);
19429 ASM_OUTPUT_ALIGN (stream, 2);
19430 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19431 assemble_aligned_integer (UNITS_PER_WORD, x);
19434 /* Place some comments into the assembler stream
19435 describing the current function. */
19436 static void
19437 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19439 unsigned long func_type;
19441 /* ??? Do we want to print some of the below anyway? */
19442 if (TARGET_THUMB1)
19443 return;
19445 /* Sanity check. */
19446 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19448 func_type = arm_current_func_type ();
19450 switch ((int) ARM_FUNC_TYPE (func_type))
19452 default:
19453 case ARM_FT_NORMAL:
19454 break;
19455 case ARM_FT_INTERWORKED:
19456 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19457 break;
19458 case ARM_FT_ISR:
19459 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19460 break;
19461 case ARM_FT_FIQ:
19462 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19463 break;
19464 case ARM_FT_EXCEPTION:
19465 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19466 break;
19469 if (IS_NAKED (func_type))
19470 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19472 if (IS_VOLATILE (func_type))
19473 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19475 if (IS_NESTED (func_type))
19476 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19477 if (IS_STACKALIGN (func_type))
19478 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19480 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19481 crtl->args.size,
19482 crtl->args.pretend_args_size, frame_size);
19484 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19485 frame_pointer_needed,
19486 cfun->machine->uses_anonymous_args);
19488 if (cfun->machine->lr_save_eliminated)
19489 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19491 if (crtl->calls_eh_return)
19492 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19496 static void
19497 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19498 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19500 arm_stack_offsets *offsets;
19502 if (TARGET_THUMB1)
19504 int regno;
19506 /* Emit any call-via-reg trampolines that are needed for v4t support
19507 of call_reg and call_value_reg type insns. */
19508 for (regno = 0; regno < LR_REGNUM; regno++)
19510 rtx label = cfun->machine->call_via[regno];
19512 if (label != NULL)
19514 switch_to_section (function_section (current_function_decl));
19515 targetm.asm_out.internal_label (asm_out_file, "L",
19516 CODE_LABEL_NUMBER (label));
19517 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19521 /* ??? Probably not safe to set this here, since it assumes that a
19522 function will be emitted as assembly immediately after we generate
19523 RTL for it. This does not happen for inline functions. */
19524 cfun->machine->return_used_this_function = 0;
19526 else /* TARGET_32BIT */
19528 /* We need to take into account any stack-frame rounding. */
19529 offsets = arm_get_frame_offsets ();
19531 gcc_assert (!use_return_insn (FALSE, NULL)
19532 || (cfun->machine->return_used_this_function != 0)
19533 || offsets->saved_regs == offsets->outgoing_args
19534 || frame_pointer_needed);
19538 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19539 STR and STRD. If an even number of registers are being pushed, one
19540 or more STRD patterns are created for each register pair. If an
19541 odd number of registers are pushed, emit an initial STR followed by
19542 as many STRD instructions as are needed. This works best when the
19543 stack is initially 64-bit aligned (the normal case), since it
19544 ensures that each STRD is also 64-bit aligned. */
19545 static void
19546 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19548 int num_regs = 0;
19549 int i;
19550 int regno;
19551 rtx par = NULL_RTX;
19552 rtx dwarf = NULL_RTX;
19553 rtx tmp;
19554 bool first = true;
19556 num_regs = bit_count (saved_regs_mask);
19558 /* Must be at least one register to save, and can't save SP or PC. */
19559 gcc_assert (num_regs > 0 && num_regs <= 14);
19560 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19561 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19563 /* Create sequence for DWARF info. All the frame-related data for
19564 debugging is held in this wrapper. */
19565 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19567 /* Describe the stack adjustment. */
19568 tmp = gen_rtx_SET (VOIDmode,
19569 stack_pointer_rtx,
19570 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19571 RTX_FRAME_RELATED_P (tmp) = 1;
19572 XVECEXP (dwarf, 0, 0) = tmp;
19574 /* Find the first register. */
19575 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19578 i = 0;
19580 /* If there's an odd number of registers to push. Start off by
19581 pushing a single register. This ensures that subsequent strd
19582 operations are dword aligned (assuming that SP was originally
19583 64-bit aligned). */
19584 if ((num_regs & 1) != 0)
19586 rtx reg, mem, insn;
19588 reg = gen_rtx_REG (SImode, regno);
19589 if (num_regs == 1)
19590 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19591 stack_pointer_rtx));
19592 else
19593 mem = gen_frame_mem (Pmode,
19594 gen_rtx_PRE_MODIFY
19595 (Pmode, stack_pointer_rtx,
19596 plus_constant (Pmode, stack_pointer_rtx,
19597 -4 * num_regs)));
19599 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19600 RTX_FRAME_RELATED_P (tmp) = 1;
19601 insn = emit_insn (tmp);
19602 RTX_FRAME_RELATED_P (insn) = 1;
19603 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19604 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19605 reg);
19606 RTX_FRAME_RELATED_P (tmp) = 1;
19607 i++;
19608 regno++;
19609 XVECEXP (dwarf, 0, i) = tmp;
19610 first = false;
19613 while (i < num_regs)
19614 if (saved_regs_mask & (1 << regno))
19616 rtx reg1, reg2, mem1, mem2;
19617 rtx tmp0, tmp1, tmp2;
19618 int regno2;
19620 /* Find the register to pair with this one. */
19621 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19622 regno2++)
19625 reg1 = gen_rtx_REG (SImode, regno);
19626 reg2 = gen_rtx_REG (SImode, regno2);
19628 if (first)
19630 rtx insn;
19632 first = false;
19633 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19634 stack_pointer_rtx,
19635 -4 * num_regs));
19636 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19637 stack_pointer_rtx,
19638 -4 * (num_regs - 1)));
19639 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19640 plus_constant (Pmode, stack_pointer_rtx,
19641 -4 * (num_regs)));
19642 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19643 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19644 RTX_FRAME_RELATED_P (tmp0) = 1;
19645 RTX_FRAME_RELATED_P (tmp1) = 1;
19646 RTX_FRAME_RELATED_P (tmp2) = 1;
19647 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19648 XVECEXP (par, 0, 0) = tmp0;
19649 XVECEXP (par, 0, 1) = tmp1;
19650 XVECEXP (par, 0, 2) = tmp2;
19651 insn = emit_insn (par);
19652 RTX_FRAME_RELATED_P (insn) = 1;
19653 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19655 else
19657 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19658 stack_pointer_rtx,
19659 4 * i));
19660 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19661 stack_pointer_rtx,
19662 4 * (i + 1)));
19663 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19664 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19665 RTX_FRAME_RELATED_P (tmp1) = 1;
19666 RTX_FRAME_RELATED_P (tmp2) = 1;
19667 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19668 XVECEXP (par, 0, 0) = tmp1;
19669 XVECEXP (par, 0, 1) = tmp2;
19670 emit_insn (par);
19673 /* Create unwind information. This is an approximation. */
19674 tmp1 = gen_rtx_SET (VOIDmode,
19675 gen_frame_mem (Pmode,
19676 plus_constant (Pmode,
19677 stack_pointer_rtx,
19678 4 * i)),
19679 reg1);
19680 tmp2 = gen_rtx_SET (VOIDmode,
19681 gen_frame_mem (Pmode,
19682 plus_constant (Pmode,
19683 stack_pointer_rtx,
19684 4 * (i + 1))),
19685 reg2);
19687 RTX_FRAME_RELATED_P (tmp1) = 1;
19688 RTX_FRAME_RELATED_P (tmp2) = 1;
19689 XVECEXP (dwarf, 0, i + 1) = tmp1;
19690 XVECEXP (dwarf, 0, i + 2) = tmp2;
19691 i += 2;
19692 regno = regno2 + 1;
19694 else
19695 regno++;
19697 return;
19700 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19701 whenever possible, otherwise it emits single-word stores. The first store
19702 also allocates stack space for all saved registers, using writeback with
19703 post-addressing mode. All other stores use offset addressing. If no STRD
19704 can be emitted, this function emits a sequence of single-word stores,
19705 and not an STM as before, because single-word stores provide more freedom
19706 scheduling and can be turned into an STM by peephole optimizations. */
19707 static void
19708 arm_emit_strd_push (unsigned long saved_regs_mask)
19710 int num_regs = 0;
19711 int i, j, dwarf_index = 0;
19712 int offset = 0;
19713 rtx dwarf = NULL_RTX;
19714 rtx insn = NULL_RTX;
19715 rtx tmp, mem;
19717 /* TODO: A more efficient code can be emitted by changing the
19718 layout, e.g., first push all pairs that can use STRD to keep the
19719 stack aligned, and then push all other registers. */
19720 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19721 if (saved_regs_mask & (1 << i))
19722 num_regs++;
19724 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19725 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19726 gcc_assert (num_regs > 0);
19728 /* Create sequence for DWARF info. */
19729 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19731 /* For dwarf info, we generate explicit stack update. */
19732 tmp = gen_rtx_SET (VOIDmode,
19733 stack_pointer_rtx,
19734 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19735 RTX_FRAME_RELATED_P (tmp) = 1;
19736 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19738 /* Save registers. */
19739 offset = - 4 * num_regs;
19740 j = 0;
19741 while (j <= LAST_ARM_REGNUM)
19742 if (saved_regs_mask & (1 << j))
19744 if ((j % 2 == 0)
19745 && (saved_regs_mask & (1 << (j + 1))))
19747 /* Current register and previous register form register pair for
19748 which STRD can be generated. */
19749 if (offset < 0)
19751 /* Allocate stack space for all saved registers. */
19752 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19753 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19754 mem = gen_frame_mem (DImode, tmp);
19755 offset = 0;
19757 else if (offset > 0)
19758 mem = gen_frame_mem (DImode,
19759 plus_constant (Pmode,
19760 stack_pointer_rtx,
19761 offset));
19762 else
19763 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19765 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19766 RTX_FRAME_RELATED_P (tmp) = 1;
19767 tmp = emit_insn (tmp);
19769 /* Record the first store insn. */
19770 if (dwarf_index == 1)
19771 insn = tmp;
19773 /* Generate dwarf info. */
19774 mem = gen_frame_mem (SImode,
19775 plus_constant (Pmode,
19776 stack_pointer_rtx,
19777 offset));
19778 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19779 RTX_FRAME_RELATED_P (tmp) = 1;
19780 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19782 mem = gen_frame_mem (SImode,
19783 plus_constant (Pmode,
19784 stack_pointer_rtx,
19785 offset + 4));
19786 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19787 RTX_FRAME_RELATED_P (tmp) = 1;
19788 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19790 offset += 8;
19791 j += 2;
19793 else
19795 /* Emit a single word store. */
19796 if (offset < 0)
19798 /* Allocate stack space for all saved registers. */
19799 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19800 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19801 mem = gen_frame_mem (SImode, tmp);
19802 offset = 0;
19804 else if (offset > 0)
19805 mem = gen_frame_mem (SImode,
19806 plus_constant (Pmode,
19807 stack_pointer_rtx,
19808 offset));
19809 else
19810 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19812 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19813 RTX_FRAME_RELATED_P (tmp) = 1;
19814 tmp = emit_insn (tmp);
19816 /* Record the first store insn. */
19817 if (dwarf_index == 1)
19818 insn = tmp;
19820 /* Generate dwarf info. */
19821 mem = gen_frame_mem (SImode,
19822 plus_constant(Pmode,
19823 stack_pointer_rtx,
19824 offset));
19825 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19826 RTX_FRAME_RELATED_P (tmp) = 1;
19827 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19829 offset += 4;
19830 j += 1;
19833 else
19834 j++;
19836 /* Attach dwarf info to the first insn we generate. */
19837 gcc_assert (insn != NULL_RTX);
19838 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19839 RTX_FRAME_RELATED_P (insn) = 1;
19842 /* Generate and emit an insn that we will recognize as a push_multi.
19843 Unfortunately, since this insn does not reflect very well the actual
19844 semantics of the operation, we need to annotate the insn for the benefit
19845 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19846 MASK for registers that should be annotated for DWARF2 frame unwind
19847 information. */
19848 static rtx
19849 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19851 int num_regs = 0;
19852 int num_dwarf_regs = 0;
19853 int i, j;
19854 rtx par;
19855 rtx dwarf;
19856 int dwarf_par_index;
19857 rtx tmp, reg;
19859 /* We don't record the PC in the dwarf frame information. */
19860 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19862 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19864 if (mask & (1 << i))
19865 num_regs++;
19866 if (dwarf_regs_mask & (1 << i))
19867 num_dwarf_regs++;
19870 gcc_assert (num_regs && num_regs <= 16);
19871 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19873 /* For the body of the insn we are going to generate an UNSPEC in
19874 parallel with several USEs. This allows the insn to be recognized
19875 by the push_multi pattern in the arm.md file.
19877 The body of the insn looks something like this:
19879 (parallel [
19880 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19881 (const_int:SI <num>)))
19882 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19883 (use (reg:SI XX))
19884 (use (reg:SI YY))
19888 For the frame note however, we try to be more explicit and actually
19889 show each register being stored into the stack frame, plus a (single)
19890 decrement of the stack pointer. We do it this way in order to be
19891 friendly to the stack unwinding code, which only wants to see a single
19892 stack decrement per instruction. The RTL we generate for the note looks
19893 something like this:
19895 (sequence [
19896 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19897 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19898 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19899 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19903 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19904 instead we'd have a parallel expression detailing all
19905 the stores to the various memory addresses so that debug
19906 information is more up-to-date. Remember however while writing
19907 this to take care of the constraints with the push instruction.
19909 Note also that this has to be taken care of for the VFP registers.
19911 For more see PR43399. */
19913 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19914 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19915 dwarf_par_index = 1;
19917 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19919 if (mask & (1 << i))
19921 reg = gen_rtx_REG (SImode, i);
19923 XVECEXP (par, 0, 0)
19924 = gen_rtx_SET (VOIDmode,
19925 gen_frame_mem
19926 (BLKmode,
19927 gen_rtx_PRE_MODIFY (Pmode,
19928 stack_pointer_rtx,
19929 plus_constant
19930 (Pmode, stack_pointer_rtx,
19931 -4 * num_regs))
19933 gen_rtx_UNSPEC (BLKmode,
19934 gen_rtvec (1, reg),
19935 UNSPEC_PUSH_MULT));
19937 if (dwarf_regs_mask & (1 << i))
19939 tmp = gen_rtx_SET (VOIDmode,
19940 gen_frame_mem (SImode, stack_pointer_rtx),
19941 reg);
19942 RTX_FRAME_RELATED_P (tmp) = 1;
19943 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19946 break;
19950 for (j = 1, i++; j < num_regs; i++)
19952 if (mask & (1 << i))
19954 reg = gen_rtx_REG (SImode, i);
19956 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19958 if (dwarf_regs_mask & (1 << i))
19961 = gen_rtx_SET (VOIDmode,
19962 gen_frame_mem
19963 (SImode,
19964 plus_constant (Pmode, stack_pointer_rtx,
19965 4 * j)),
19966 reg);
19967 RTX_FRAME_RELATED_P (tmp) = 1;
19968 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19971 j++;
19975 par = emit_insn (par);
19977 tmp = gen_rtx_SET (VOIDmode,
19978 stack_pointer_rtx,
19979 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19980 RTX_FRAME_RELATED_P (tmp) = 1;
19981 XVECEXP (dwarf, 0, 0) = tmp;
19983 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19985 return par;
19988 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19989 SIZE is the offset to be adjusted.
19990 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19991 static void
19992 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19994 rtx dwarf;
19996 RTX_FRAME_RELATED_P (insn) = 1;
19997 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19998 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20001 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20002 SAVED_REGS_MASK shows which registers need to be restored.
20004 Unfortunately, since this insn does not reflect very well the actual
20005 semantics of the operation, we need to annotate the insn for the benefit
20006 of DWARF2 frame unwind information. */
20007 static void
20008 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20010 int num_regs = 0;
20011 int i, j;
20012 rtx par;
20013 rtx dwarf = NULL_RTX;
20014 rtx tmp, reg;
20015 bool return_in_pc;
20016 int offset_adj;
20017 int emit_update;
20019 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20020 offset_adj = return_in_pc ? 1 : 0;
20021 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20022 if (saved_regs_mask & (1 << i))
20023 num_regs++;
20025 gcc_assert (num_regs && num_regs <= 16);
20027 /* If SP is in reglist, then we don't emit SP update insn. */
20028 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20030 /* The parallel needs to hold num_regs SETs
20031 and one SET for the stack update. */
20032 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20034 if (return_in_pc)
20036 tmp = ret_rtx;
20037 XVECEXP (par, 0, 0) = tmp;
20040 if (emit_update)
20042 /* Increment the stack pointer, based on there being
20043 num_regs 4-byte registers to restore. */
20044 tmp = gen_rtx_SET (VOIDmode,
20045 stack_pointer_rtx,
20046 plus_constant (Pmode,
20047 stack_pointer_rtx,
20048 4 * num_regs));
20049 RTX_FRAME_RELATED_P (tmp) = 1;
20050 XVECEXP (par, 0, offset_adj) = tmp;
20053 /* Now restore every reg, which may include PC. */
20054 for (j = 0, i = 0; j < num_regs; i++)
20055 if (saved_regs_mask & (1 << i))
20057 reg = gen_rtx_REG (SImode, i);
20058 if ((num_regs == 1) && emit_update && !return_in_pc)
20060 /* Emit single load with writeback. */
20061 tmp = gen_frame_mem (SImode,
20062 gen_rtx_POST_INC (Pmode,
20063 stack_pointer_rtx));
20064 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20065 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20066 return;
20069 tmp = gen_rtx_SET (VOIDmode,
20070 reg,
20071 gen_frame_mem
20072 (SImode,
20073 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20074 RTX_FRAME_RELATED_P (tmp) = 1;
20075 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20077 /* We need to maintain a sequence for DWARF info too. As dwarf info
20078 should not have PC, skip PC. */
20079 if (i != PC_REGNUM)
20080 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20082 j++;
20085 if (return_in_pc)
20086 par = emit_jump_insn (par);
20087 else
20088 par = emit_insn (par);
20090 REG_NOTES (par) = dwarf;
20091 if (!return_in_pc)
20092 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20093 stack_pointer_rtx, stack_pointer_rtx);
20096 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20097 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20099 Unfortunately, since this insn does not reflect very well the actual
20100 semantics of the operation, we need to annotate the insn for the benefit
20101 of DWARF2 frame unwind information. */
20102 static void
20103 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20105 int i, j;
20106 rtx par;
20107 rtx dwarf = NULL_RTX;
20108 rtx tmp, reg;
20110 gcc_assert (num_regs && num_regs <= 32);
20112 /* Workaround ARM10 VFPr1 bug. */
20113 if (num_regs == 2 && !arm_arch6)
20115 if (first_reg == 15)
20116 first_reg--;
20118 num_regs++;
20121 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20122 there could be up to 32 D-registers to restore.
20123 If there are more than 16 D-registers, make two recursive calls,
20124 each of which emits one pop_multi instruction. */
20125 if (num_regs > 16)
20127 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20128 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20129 return;
20132 /* The parallel needs to hold num_regs SETs
20133 and one SET for the stack update. */
20134 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20136 /* Increment the stack pointer, based on there being
20137 num_regs 8-byte registers to restore. */
20138 tmp = gen_rtx_SET (VOIDmode,
20139 base_reg,
20140 plus_constant (Pmode, base_reg, 8 * num_regs));
20141 RTX_FRAME_RELATED_P (tmp) = 1;
20142 XVECEXP (par, 0, 0) = tmp;
20144 /* Now show every reg that will be restored, using a SET for each. */
20145 for (j = 0, i=first_reg; j < num_regs; i += 2)
20147 reg = gen_rtx_REG (DFmode, i);
20149 tmp = gen_rtx_SET (VOIDmode,
20150 reg,
20151 gen_frame_mem
20152 (DFmode,
20153 plus_constant (Pmode, base_reg, 8 * j)));
20154 RTX_FRAME_RELATED_P (tmp) = 1;
20155 XVECEXP (par, 0, j + 1) = tmp;
20157 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20159 j++;
20162 par = emit_insn (par);
20163 REG_NOTES (par) = dwarf;
20165 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20166 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20168 RTX_FRAME_RELATED_P (par) = 1;
20169 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20171 else
20172 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20173 base_reg, base_reg);
20176 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20177 number of registers are being popped, multiple LDRD patterns are created for
20178 all register pairs. If odd number of registers are popped, last register is
20179 loaded by using LDR pattern. */
20180 static void
20181 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20183 int num_regs = 0;
20184 int i, j;
20185 rtx par = NULL_RTX;
20186 rtx dwarf = NULL_RTX;
20187 rtx tmp, reg, tmp1;
20188 bool return_in_pc;
20190 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20191 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20192 if (saved_regs_mask & (1 << i))
20193 num_regs++;
20195 gcc_assert (num_regs && num_regs <= 16);
20197 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20198 to be popped. So, if num_regs is even, now it will become odd,
20199 and we can generate pop with PC. If num_regs is odd, it will be
20200 even now, and ldr with return can be generated for PC. */
20201 if (return_in_pc)
20202 num_regs--;
20204 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20206 /* Var j iterates over all the registers to gather all the registers in
20207 saved_regs_mask. Var i gives index of saved registers in stack frame.
20208 A PARALLEL RTX of register-pair is created here, so that pattern for
20209 LDRD can be matched. As PC is always last register to be popped, and
20210 we have already decremented num_regs if PC, we don't have to worry
20211 about PC in this loop. */
20212 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20213 if (saved_regs_mask & (1 << j))
20215 /* Create RTX for memory load. */
20216 reg = gen_rtx_REG (SImode, j);
20217 tmp = gen_rtx_SET (SImode,
20218 reg,
20219 gen_frame_mem (SImode,
20220 plus_constant (Pmode,
20221 stack_pointer_rtx, 4 * i)));
20222 RTX_FRAME_RELATED_P (tmp) = 1;
20224 if (i % 2 == 0)
20226 /* When saved-register index (i) is even, the RTX to be emitted is
20227 yet to be created. Hence create it first. The LDRD pattern we
20228 are generating is :
20229 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20230 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20231 where target registers need not be consecutive. */
20232 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20233 dwarf = NULL_RTX;
20236 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20237 added as 0th element and if i is odd, reg_i is added as 1st element
20238 of LDRD pattern shown above. */
20239 XVECEXP (par, 0, (i % 2)) = tmp;
20240 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20242 if ((i % 2) == 1)
20244 /* When saved-register index (i) is odd, RTXs for both the registers
20245 to be loaded are generated in above given LDRD pattern, and the
20246 pattern can be emitted now. */
20247 par = emit_insn (par);
20248 REG_NOTES (par) = dwarf;
20249 RTX_FRAME_RELATED_P (par) = 1;
20252 i++;
20255 /* If the number of registers pushed is odd AND return_in_pc is false OR
20256 number of registers are even AND return_in_pc is true, last register is
20257 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20258 then LDR with post increment. */
20260 /* Increment the stack pointer, based on there being
20261 num_regs 4-byte registers to restore. */
20262 tmp = gen_rtx_SET (VOIDmode,
20263 stack_pointer_rtx,
20264 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20265 RTX_FRAME_RELATED_P (tmp) = 1;
20266 tmp = emit_insn (tmp);
20267 if (!return_in_pc)
20269 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20270 stack_pointer_rtx, stack_pointer_rtx);
20273 dwarf = NULL_RTX;
20275 if (((num_regs % 2) == 1 && !return_in_pc)
20276 || ((num_regs % 2) == 0 && return_in_pc))
20278 /* Scan for the single register to be popped. Skip until the saved
20279 register is found. */
20280 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20282 /* Gen LDR with post increment here. */
20283 tmp1 = gen_rtx_MEM (SImode,
20284 gen_rtx_POST_INC (SImode,
20285 stack_pointer_rtx));
20286 set_mem_alias_set (tmp1, get_frame_alias_set ());
20288 reg = gen_rtx_REG (SImode, j);
20289 tmp = gen_rtx_SET (SImode, reg, tmp1);
20290 RTX_FRAME_RELATED_P (tmp) = 1;
20291 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20293 if (return_in_pc)
20295 /* If return_in_pc, j must be PC_REGNUM. */
20296 gcc_assert (j == PC_REGNUM);
20297 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20298 XVECEXP (par, 0, 0) = ret_rtx;
20299 XVECEXP (par, 0, 1) = tmp;
20300 par = emit_jump_insn (par);
20302 else
20304 par = emit_insn (tmp);
20305 REG_NOTES (par) = dwarf;
20306 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20307 stack_pointer_rtx, stack_pointer_rtx);
20311 else if ((num_regs % 2) == 1 && return_in_pc)
20313 /* There are 2 registers to be popped. So, generate the pattern
20314 pop_multiple_with_stack_update_and_return to pop in PC. */
20315 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20318 return;
20321 /* LDRD in ARM mode needs consecutive registers as operands. This function
20322 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20323 offset addressing and then generates one separate stack udpate. This provides
20324 more scheduling freedom, compared to writeback on every load. However,
20325 if the function returns using load into PC directly
20326 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20327 before the last load. TODO: Add a peephole optimization to recognize
20328 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20329 peephole optimization to merge the load at stack-offset zero
20330 with the stack update instruction using load with writeback
20331 in post-index addressing mode. */
20332 static void
20333 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20335 int j = 0;
20336 int offset = 0;
20337 rtx par = NULL_RTX;
20338 rtx dwarf = NULL_RTX;
20339 rtx tmp, mem;
20341 /* Restore saved registers. */
20342 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20343 j = 0;
20344 while (j <= LAST_ARM_REGNUM)
20345 if (saved_regs_mask & (1 << j))
20347 if ((j % 2) == 0
20348 && (saved_regs_mask & (1 << (j + 1)))
20349 && (j + 1) != PC_REGNUM)
20351 /* Current register and next register form register pair for which
20352 LDRD can be generated. PC is always the last register popped, and
20353 we handle it separately. */
20354 if (offset > 0)
20355 mem = gen_frame_mem (DImode,
20356 plus_constant (Pmode,
20357 stack_pointer_rtx,
20358 offset));
20359 else
20360 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20362 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20363 tmp = emit_insn (tmp);
20364 RTX_FRAME_RELATED_P (tmp) = 1;
20366 /* Generate dwarf info. */
20368 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20369 gen_rtx_REG (SImode, j),
20370 NULL_RTX);
20371 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20372 gen_rtx_REG (SImode, j + 1),
20373 dwarf);
20375 REG_NOTES (tmp) = dwarf;
20377 offset += 8;
20378 j += 2;
20380 else if (j != PC_REGNUM)
20382 /* Emit a single word load. */
20383 if (offset > 0)
20384 mem = gen_frame_mem (SImode,
20385 plus_constant (Pmode,
20386 stack_pointer_rtx,
20387 offset));
20388 else
20389 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20391 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20392 tmp = emit_insn (tmp);
20393 RTX_FRAME_RELATED_P (tmp) = 1;
20395 /* Generate dwarf info. */
20396 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20397 gen_rtx_REG (SImode, j),
20398 NULL_RTX);
20400 offset += 4;
20401 j += 1;
20403 else /* j == PC_REGNUM */
20404 j++;
20406 else
20407 j++;
20409 /* Update the stack. */
20410 if (offset > 0)
20412 tmp = gen_rtx_SET (Pmode,
20413 stack_pointer_rtx,
20414 plus_constant (Pmode,
20415 stack_pointer_rtx,
20416 offset));
20417 tmp = emit_insn (tmp);
20418 arm_add_cfa_adjust_cfa_note (tmp, offset,
20419 stack_pointer_rtx, stack_pointer_rtx);
20420 offset = 0;
20423 if (saved_regs_mask & (1 << PC_REGNUM))
20425 /* Only PC is to be popped. */
20426 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20427 XVECEXP (par, 0, 0) = ret_rtx;
20428 tmp = gen_rtx_SET (SImode,
20429 gen_rtx_REG (SImode, PC_REGNUM),
20430 gen_frame_mem (SImode,
20431 gen_rtx_POST_INC (SImode,
20432 stack_pointer_rtx)));
20433 RTX_FRAME_RELATED_P (tmp) = 1;
20434 XVECEXP (par, 0, 1) = tmp;
20435 par = emit_jump_insn (par);
20437 /* Generate dwarf info. */
20438 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20439 gen_rtx_REG (SImode, PC_REGNUM),
20440 NULL_RTX);
20441 REG_NOTES (par) = dwarf;
20442 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20443 stack_pointer_rtx, stack_pointer_rtx);
20447 /* Calculate the size of the return value that is passed in registers. */
20448 static unsigned
20449 arm_size_return_regs (void)
20451 enum machine_mode mode;
20453 if (crtl->return_rtx != 0)
20454 mode = GET_MODE (crtl->return_rtx);
20455 else
20456 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20458 return GET_MODE_SIZE (mode);
20461 /* Return true if the current function needs to save/restore LR. */
20462 static bool
20463 thumb_force_lr_save (void)
20465 return !cfun->machine->lr_save_eliminated
20466 && (!leaf_function_p ()
20467 || thumb_far_jump_used_p ()
20468 || df_regs_ever_live_p (LR_REGNUM));
20471 /* We do not know if r3 will be available because
20472 we do have an indirect tailcall happening in this
20473 particular case. */
20474 static bool
20475 is_indirect_tailcall_p (rtx call)
20477 rtx pat = PATTERN (call);
20479 /* Indirect tail call. */
20480 pat = XVECEXP (pat, 0, 0);
20481 if (GET_CODE (pat) == SET)
20482 pat = SET_SRC (pat);
20484 pat = XEXP (XEXP (pat, 0), 0);
20485 return REG_P (pat);
20488 /* Return true if r3 is used by any of the tail call insns in the
20489 current function. */
20490 static bool
20491 any_sibcall_could_use_r3 (void)
20493 edge_iterator ei;
20494 edge e;
20496 if (!crtl->tail_call_emit)
20497 return false;
20498 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20499 if (e->flags & EDGE_SIBCALL)
20501 rtx call = BB_END (e->src);
20502 if (!CALL_P (call))
20503 call = prev_nonnote_nondebug_insn (call);
20504 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20505 if (find_regno_fusage (call, USE, 3)
20506 || is_indirect_tailcall_p (call))
20507 return true;
20509 return false;
20513 /* Compute the distance from register FROM to register TO.
20514 These can be the arg pointer (26), the soft frame pointer (25),
20515 the stack pointer (13) or the hard frame pointer (11).
20516 In thumb mode r7 is used as the soft frame pointer, if needed.
20517 Typical stack layout looks like this:
20519 old stack pointer -> | |
20520 ----
20521 | | \
20522 | | saved arguments for
20523 | | vararg functions
20524 | | /
20526 hard FP & arg pointer -> | | \
20527 | | stack
20528 | | frame
20529 | | /
20531 | | \
20532 | | call saved
20533 | | registers
20534 soft frame pointer -> | | /
20536 | | \
20537 | | local
20538 | | variables
20539 locals base pointer -> | | /
20541 | | \
20542 | | outgoing
20543 | | arguments
20544 current stack pointer -> | | /
20547 For a given function some or all of these stack components
20548 may not be needed, giving rise to the possibility of
20549 eliminating some of the registers.
20551 The values returned by this function must reflect the behavior
20552 of arm_expand_prologue() and arm_compute_save_reg_mask().
20554 The sign of the number returned reflects the direction of stack
20555 growth, so the values are positive for all eliminations except
20556 from the soft frame pointer to the hard frame pointer.
20558 SFP may point just inside the local variables block to ensure correct
20559 alignment. */
20562 /* Calculate stack offsets. These are used to calculate register elimination
20563 offsets and in prologue/epilogue code. Also calculates which registers
20564 should be saved. */
20566 static arm_stack_offsets *
20567 arm_get_frame_offsets (void)
20569 struct arm_stack_offsets *offsets;
20570 unsigned long func_type;
20571 int leaf;
20572 int saved;
20573 int core_saved;
20574 HOST_WIDE_INT frame_size;
20575 int i;
20577 offsets = &cfun->machine->stack_offsets;
20579 /* We need to know if we are a leaf function. Unfortunately, it
20580 is possible to be called after start_sequence has been called,
20581 which causes get_insns to return the insns for the sequence,
20582 not the function, which will cause leaf_function_p to return
20583 the incorrect result.
20585 to know about leaf functions once reload has completed, and the
20586 frame size cannot be changed after that time, so we can safely
20587 use the cached value. */
20589 if (reload_completed)
20590 return offsets;
20592 /* Initially this is the size of the local variables. It will translated
20593 into an offset once we have determined the size of preceding data. */
20594 frame_size = ROUND_UP_WORD (get_frame_size ());
20596 leaf = leaf_function_p ();
20598 /* Space for variadic functions. */
20599 offsets->saved_args = crtl->args.pretend_args_size;
20601 /* In Thumb mode this is incorrect, but never used. */
20602 offsets->frame
20603 = (offsets->saved_args
20604 + arm_compute_static_chain_stack_bytes ()
20605 + (frame_pointer_needed ? 4 : 0));
20607 if (TARGET_32BIT)
20609 unsigned int regno;
20611 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20612 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20613 saved = core_saved;
20615 /* We know that SP will be doubleword aligned on entry, and we must
20616 preserve that condition at any subroutine call. We also require the
20617 soft frame pointer to be doubleword aligned. */
20619 if (TARGET_REALLY_IWMMXT)
20621 /* Check for the call-saved iWMMXt registers. */
20622 for (regno = FIRST_IWMMXT_REGNUM;
20623 regno <= LAST_IWMMXT_REGNUM;
20624 regno++)
20625 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20626 saved += 8;
20629 func_type = arm_current_func_type ();
20630 /* Space for saved VFP registers. */
20631 if (! IS_VOLATILE (func_type)
20632 && TARGET_HARD_FLOAT && TARGET_VFP)
20633 saved += arm_get_vfp_saved_size ();
20635 else /* TARGET_THUMB1 */
20637 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20638 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20639 saved = core_saved;
20640 if (TARGET_BACKTRACE)
20641 saved += 16;
20644 /* Saved registers include the stack frame. */
20645 offsets->saved_regs
20646 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20647 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20649 /* A leaf function does not need any stack alignment if it has nothing
20650 on the stack. */
20651 if (leaf && frame_size == 0
20652 /* However if it calls alloca(), we have a dynamically allocated
20653 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20654 && ! cfun->calls_alloca)
20656 offsets->outgoing_args = offsets->soft_frame;
20657 offsets->locals_base = offsets->soft_frame;
20658 return offsets;
20661 /* Ensure SFP has the correct alignment. */
20662 if (ARM_DOUBLEWORD_ALIGN
20663 && (offsets->soft_frame & 7))
20665 offsets->soft_frame += 4;
20666 /* Try to align stack by pushing an extra reg. Don't bother doing this
20667 when there is a stack frame as the alignment will be rolled into
20668 the normal stack adjustment. */
20669 if (frame_size + crtl->outgoing_args_size == 0)
20671 int reg = -1;
20673 /* If it is safe to use r3, then do so. This sometimes
20674 generates better code on Thumb-2 by avoiding the need to
20675 use 32-bit push/pop instructions. */
20676 if (! any_sibcall_could_use_r3 ()
20677 && arm_size_return_regs () <= 12
20678 && (offsets->saved_regs_mask & (1 << 3)) == 0
20679 && (TARGET_THUMB2
20680 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20682 reg = 3;
20684 else
20685 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20687 /* Avoid fixed registers; they may be changed at
20688 arbitrary times so it's unsafe to restore them
20689 during the epilogue. */
20690 if (!fixed_regs[i]
20691 && (offsets->saved_regs_mask & (1 << i)) == 0)
20693 reg = i;
20694 break;
20698 if (reg != -1)
20700 offsets->saved_regs += 4;
20701 offsets->saved_regs_mask |= (1 << reg);
20706 offsets->locals_base = offsets->soft_frame + frame_size;
20707 offsets->outgoing_args = (offsets->locals_base
20708 + crtl->outgoing_args_size);
20710 if (ARM_DOUBLEWORD_ALIGN)
20712 /* Ensure SP remains doubleword aligned. */
20713 if (offsets->outgoing_args & 7)
20714 offsets->outgoing_args += 4;
20715 gcc_assert (!(offsets->outgoing_args & 7));
20718 return offsets;
20722 /* Calculate the relative offsets for the different stack pointers. Positive
20723 offsets are in the direction of stack growth. */
20725 HOST_WIDE_INT
20726 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20728 arm_stack_offsets *offsets;
20730 offsets = arm_get_frame_offsets ();
20732 /* OK, now we have enough information to compute the distances.
20733 There must be an entry in these switch tables for each pair
20734 of registers in ELIMINABLE_REGS, even if some of the entries
20735 seem to be redundant or useless. */
20736 switch (from)
20738 case ARG_POINTER_REGNUM:
20739 switch (to)
20741 case THUMB_HARD_FRAME_POINTER_REGNUM:
20742 return 0;
20744 case FRAME_POINTER_REGNUM:
20745 /* This is the reverse of the soft frame pointer
20746 to hard frame pointer elimination below. */
20747 return offsets->soft_frame - offsets->saved_args;
20749 case ARM_HARD_FRAME_POINTER_REGNUM:
20750 /* This is only non-zero in the case where the static chain register
20751 is stored above the frame. */
20752 return offsets->frame - offsets->saved_args - 4;
20754 case STACK_POINTER_REGNUM:
20755 /* If nothing has been pushed on the stack at all
20756 then this will return -4. This *is* correct! */
20757 return offsets->outgoing_args - (offsets->saved_args + 4);
20759 default:
20760 gcc_unreachable ();
20762 gcc_unreachable ();
20764 case FRAME_POINTER_REGNUM:
20765 switch (to)
20767 case THUMB_HARD_FRAME_POINTER_REGNUM:
20768 return 0;
20770 case ARM_HARD_FRAME_POINTER_REGNUM:
20771 /* The hard frame pointer points to the top entry in the
20772 stack frame. The soft frame pointer to the bottom entry
20773 in the stack frame. If there is no stack frame at all,
20774 then they are identical. */
20776 return offsets->frame - offsets->soft_frame;
20778 case STACK_POINTER_REGNUM:
20779 return offsets->outgoing_args - offsets->soft_frame;
20781 default:
20782 gcc_unreachable ();
20784 gcc_unreachable ();
20786 default:
20787 /* You cannot eliminate from the stack pointer.
20788 In theory you could eliminate from the hard frame
20789 pointer to the stack pointer, but this will never
20790 happen, since if a stack frame is not needed the
20791 hard frame pointer will never be used. */
20792 gcc_unreachable ();
20796 /* Given FROM and TO register numbers, say whether this elimination is
20797 allowed. Frame pointer elimination is automatically handled.
20799 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20800 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20801 pointer, we must eliminate FRAME_POINTER_REGNUM into
20802 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20803 ARG_POINTER_REGNUM. */
20805 bool
20806 arm_can_eliminate (const int from, const int to)
20808 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20809 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20810 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20811 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20812 true);
20815 /* Emit RTL to save coprocessor registers on function entry. Returns the
20816 number of bytes pushed. */
20818 static int
20819 arm_save_coproc_regs(void)
20821 int saved_size = 0;
20822 unsigned reg;
20823 unsigned start_reg;
20824 rtx insn;
20826 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20827 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20829 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20830 insn = gen_rtx_MEM (V2SImode, insn);
20831 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20832 RTX_FRAME_RELATED_P (insn) = 1;
20833 saved_size += 8;
20836 if (TARGET_HARD_FLOAT && TARGET_VFP)
20838 start_reg = FIRST_VFP_REGNUM;
20840 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20842 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20843 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20845 if (start_reg != reg)
20846 saved_size += vfp_emit_fstmd (start_reg,
20847 (reg - start_reg) / 2);
20848 start_reg = reg + 2;
20851 if (start_reg != reg)
20852 saved_size += vfp_emit_fstmd (start_reg,
20853 (reg - start_reg) / 2);
20855 return saved_size;
20859 /* Set the Thumb frame pointer from the stack pointer. */
20861 static void
20862 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20864 HOST_WIDE_INT amount;
20865 rtx insn, dwarf;
20867 amount = offsets->outgoing_args - offsets->locals_base;
20868 if (amount < 1024)
20869 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20870 stack_pointer_rtx, GEN_INT (amount)));
20871 else
20873 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20874 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20875 expects the first two operands to be the same. */
20876 if (TARGET_THUMB2)
20878 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20879 stack_pointer_rtx,
20880 hard_frame_pointer_rtx));
20882 else
20884 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20885 hard_frame_pointer_rtx,
20886 stack_pointer_rtx));
20888 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20889 plus_constant (Pmode, stack_pointer_rtx, amount));
20890 RTX_FRAME_RELATED_P (dwarf) = 1;
20891 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20894 RTX_FRAME_RELATED_P (insn) = 1;
20897 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20898 function. */
20899 void
20900 arm_expand_prologue (void)
20902 rtx amount;
20903 rtx insn;
20904 rtx ip_rtx;
20905 unsigned long live_regs_mask;
20906 unsigned long func_type;
20907 int fp_offset = 0;
20908 int saved_pretend_args = 0;
20909 int saved_regs = 0;
20910 unsigned HOST_WIDE_INT args_to_push;
20911 arm_stack_offsets *offsets;
20913 func_type = arm_current_func_type ();
20915 /* Naked functions don't have prologues. */
20916 if (IS_NAKED (func_type))
20917 return;
20919 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20920 args_to_push = crtl->args.pretend_args_size;
20922 /* Compute which register we will have to save onto the stack. */
20923 offsets = arm_get_frame_offsets ();
20924 live_regs_mask = offsets->saved_regs_mask;
20926 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20928 if (IS_STACKALIGN (func_type))
20930 rtx r0, r1;
20932 /* Handle a word-aligned stack pointer. We generate the following:
20934 mov r0, sp
20935 bic r1, r0, #7
20936 mov sp, r1
20937 <save and restore r0 in normal prologue/epilogue>
20938 mov sp, r0
20939 bx lr
20941 The unwinder doesn't need to know about the stack realignment.
20942 Just tell it we saved SP in r0. */
20943 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20945 r0 = gen_rtx_REG (SImode, 0);
20946 r1 = gen_rtx_REG (SImode, 1);
20948 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20949 RTX_FRAME_RELATED_P (insn) = 1;
20950 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20952 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20954 /* ??? The CFA changes here, which may cause GDB to conclude that it
20955 has entered a different function. That said, the unwind info is
20956 correct, individually, before and after this instruction because
20957 we've described the save of SP, which will override the default
20958 handling of SP as restoring from the CFA. */
20959 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20962 /* For APCS frames, if IP register is clobbered
20963 when creating frame, save that register in a special
20964 way. */
20965 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20967 if (IS_INTERRUPT (func_type))
20969 /* Interrupt functions must not corrupt any registers.
20970 Creating a frame pointer however, corrupts the IP
20971 register, so we must push it first. */
20972 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20974 /* Do not set RTX_FRAME_RELATED_P on this insn.
20975 The dwarf stack unwinding code only wants to see one
20976 stack decrement per function, and this is not it. If
20977 this instruction is labeled as being part of the frame
20978 creation sequence then dwarf2out_frame_debug_expr will
20979 die when it encounters the assignment of IP to FP
20980 later on, since the use of SP here establishes SP as
20981 the CFA register and not IP.
20983 Anyway this instruction is not really part of the stack
20984 frame creation although it is part of the prologue. */
20986 else if (IS_NESTED (func_type))
20988 /* The static chain register is the same as the IP register
20989 used as a scratch register during stack frame creation.
20990 To get around this need to find somewhere to store IP
20991 whilst the frame is being created. We try the following
20992 places in order:
20994 1. The last argument register r3 if it is available.
20995 2. A slot on the stack above the frame if there are no
20996 arguments to push onto the stack.
20997 3. Register r3 again, after pushing the argument registers
20998 onto the stack, if this is a varargs function.
20999 4. The last slot on the stack created for the arguments to
21000 push, if this isn't a varargs function.
21002 Note - we only need to tell the dwarf2 backend about the SP
21003 adjustment in the second variant; the static chain register
21004 doesn't need to be unwound, as it doesn't contain a value
21005 inherited from the caller. */
21007 if (!arm_r3_live_at_start_p ())
21008 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21009 else if (args_to_push == 0)
21011 rtx addr, dwarf;
21013 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21014 saved_regs += 4;
21016 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21017 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21018 fp_offset = 4;
21020 /* Just tell the dwarf backend that we adjusted SP. */
21021 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21022 plus_constant (Pmode, stack_pointer_rtx,
21023 -fp_offset));
21024 RTX_FRAME_RELATED_P (insn) = 1;
21025 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21027 else
21029 /* Store the args on the stack. */
21030 if (cfun->machine->uses_anonymous_args)
21032 insn
21033 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21034 (0xf0 >> (args_to_push / 4)) & 0xf);
21035 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21036 saved_pretend_args = 1;
21038 else
21040 rtx addr, dwarf;
21042 if (args_to_push == 4)
21043 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21044 else
21045 addr
21046 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21047 plus_constant (Pmode,
21048 stack_pointer_rtx,
21049 -args_to_push));
21051 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21053 /* Just tell the dwarf backend that we adjusted SP. */
21054 dwarf
21055 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21056 plus_constant (Pmode, stack_pointer_rtx,
21057 -args_to_push));
21058 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21061 RTX_FRAME_RELATED_P (insn) = 1;
21062 fp_offset = args_to_push;
21063 args_to_push = 0;
21067 insn = emit_set_insn (ip_rtx,
21068 plus_constant (Pmode, stack_pointer_rtx,
21069 fp_offset));
21070 RTX_FRAME_RELATED_P (insn) = 1;
21073 if (args_to_push)
21075 /* Push the argument registers, or reserve space for them. */
21076 if (cfun->machine->uses_anonymous_args)
21077 insn = emit_multi_reg_push
21078 ((0xf0 >> (args_to_push / 4)) & 0xf,
21079 (0xf0 >> (args_to_push / 4)) & 0xf);
21080 else
21081 insn = emit_insn
21082 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21083 GEN_INT (- args_to_push)));
21084 RTX_FRAME_RELATED_P (insn) = 1;
21087 /* If this is an interrupt service routine, and the link register
21088 is going to be pushed, and we're not generating extra
21089 push of IP (needed when frame is needed and frame layout if apcs),
21090 subtracting four from LR now will mean that the function return
21091 can be done with a single instruction. */
21092 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21093 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21094 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21095 && TARGET_ARM)
21097 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21099 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21102 if (live_regs_mask)
21104 unsigned long dwarf_regs_mask = live_regs_mask;
21106 saved_regs += bit_count (live_regs_mask) * 4;
21107 if (optimize_size && !frame_pointer_needed
21108 && saved_regs == offsets->saved_regs - offsets->saved_args)
21110 /* If no coprocessor registers are being pushed and we don't have
21111 to worry about a frame pointer then push extra registers to
21112 create the stack frame. This is done is a way that does not
21113 alter the frame layout, so is independent of the epilogue. */
21114 int n;
21115 int frame;
21116 n = 0;
21117 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21118 n++;
21119 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21120 if (frame && n * 4 >= frame)
21122 n = frame / 4;
21123 live_regs_mask |= (1 << n) - 1;
21124 saved_regs += frame;
21128 if (TARGET_LDRD
21129 && current_tune->prefer_ldrd_strd
21130 && !optimize_function_for_size_p (cfun))
21132 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21133 if (TARGET_THUMB2)
21134 thumb2_emit_strd_push (live_regs_mask);
21135 else if (TARGET_ARM
21136 && !TARGET_APCS_FRAME
21137 && !IS_INTERRUPT (func_type))
21138 arm_emit_strd_push (live_regs_mask);
21139 else
21141 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21142 RTX_FRAME_RELATED_P (insn) = 1;
21145 else
21147 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21148 RTX_FRAME_RELATED_P (insn) = 1;
21152 if (! IS_VOLATILE (func_type))
21153 saved_regs += arm_save_coproc_regs ();
21155 if (frame_pointer_needed && TARGET_ARM)
21157 /* Create the new frame pointer. */
21158 if (TARGET_APCS_FRAME)
21160 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21161 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21162 RTX_FRAME_RELATED_P (insn) = 1;
21164 if (IS_NESTED (func_type))
21166 /* Recover the static chain register. */
21167 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21168 insn = gen_rtx_REG (SImode, 3);
21169 else
21171 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21172 insn = gen_frame_mem (SImode, insn);
21174 emit_set_insn (ip_rtx, insn);
21175 /* Add a USE to stop propagate_one_insn() from barfing. */
21176 emit_insn (gen_force_register_use (ip_rtx));
21179 else
21181 insn = GEN_INT (saved_regs - 4);
21182 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21183 stack_pointer_rtx, insn));
21184 RTX_FRAME_RELATED_P (insn) = 1;
21188 if (flag_stack_usage_info)
21189 current_function_static_stack_size
21190 = offsets->outgoing_args - offsets->saved_args;
21192 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21194 /* This add can produce multiple insns for a large constant, so we
21195 need to get tricky. */
21196 rtx last = get_last_insn ();
21198 amount = GEN_INT (offsets->saved_args + saved_regs
21199 - offsets->outgoing_args);
21201 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21202 amount));
21205 last = last ? NEXT_INSN (last) : get_insns ();
21206 RTX_FRAME_RELATED_P (last) = 1;
21208 while (last != insn);
21210 /* If the frame pointer is needed, emit a special barrier that
21211 will prevent the scheduler from moving stores to the frame
21212 before the stack adjustment. */
21213 if (frame_pointer_needed)
21214 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21215 hard_frame_pointer_rtx));
21219 if (frame_pointer_needed && TARGET_THUMB2)
21220 thumb_set_frame_pointer (offsets);
21222 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21224 unsigned long mask;
21226 mask = live_regs_mask;
21227 mask &= THUMB2_WORK_REGS;
21228 if (!IS_NESTED (func_type))
21229 mask |= (1 << IP_REGNUM);
21230 arm_load_pic_register (mask);
21233 /* If we are profiling, make sure no instructions are scheduled before
21234 the call to mcount. Similarly if the user has requested no
21235 scheduling in the prolog. Similarly if we want non-call exceptions
21236 using the EABI unwinder, to prevent faulting instructions from being
21237 swapped with a stack adjustment. */
21238 if (crtl->profile || !TARGET_SCHED_PROLOG
21239 || (arm_except_unwind_info (&global_options) == UI_TARGET
21240 && cfun->can_throw_non_call_exceptions))
21241 emit_insn (gen_blockage ());
21243 /* If the link register is being kept alive, with the return address in it,
21244 then make sure that it does not get reused by the ce2 pass. */
21245 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21246 cfun->machine->lr_save_eliminated = 1;
21249 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21250 static void
21251 arm_print_condition (FILE *stream)
21253 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21255 /* Branch conversion is not implemented for Thumb-2. */
21256 if (TARGET_THUMB)
21258 output_operand_lossage ("predicated Thumb instruction");
21259 return;
21261 if (current_insn_predicate != NULL)
21263 output_operand_lossage
21264 ("predicated instruction in conditional sequence");
21265 return;
21268 fputs (arm_condition_codes[arm_current_cc], stream);
21270 else if (current_insn_predicate)
21272 enum arm_cond_code code;
21274 if (TARGET_THUMB1)
21276 output_operand_lossage ("predicated Thumb instruction");
21277 return;
21280 code = get_arm_condition_code (current_insn_predicate);
21281 fputs (arm_condition_codes[code], stream);
21286 /* Globally reserved letters: acln
21287 Puncutation letters currently used: @_|?().!#
21288 Lower case letters currently used: bcdefhimpqtvwxyz
21289 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21290 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21292 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21294 If CODE is 'd', then the X is a condition operand and the instruction
21295 should only be executed if the condition is true.
21296 if CODE is 'D', then the X is a condition operand and the instruction
21297 should only be executed if the condition is false: however, if the mode
21298 of the comparison is CCFPEmode, then always execute the instruction -- we
21299 do this because in these circumstances !GE does not necessarily imply LT;
21300 in these cases the instruction pattern will take care to make sure that
21301 an instruction containing %d will follow, thereby undoing the effects of
21302 doing this instruction unconditionally.
21303 If CODE is 'N' then X is a floating point operand that must be negated
21304 before output.
21305 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21306 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21307 static void
21308 arm_print_operand (FILE *stream, rtx x, int code)
21310 switch (code)
21312 case '@':
21313 fputs (ASM_COMMENT_START, stream);
21314 return;
21316 case '_':
21317 fputs (user_label_prefix, stream);
21318 return;
21320 case '|':
21321 fputs (REGISTER_PREFIX, stream);
21322 return;
21324 case '?':
21325 arm_print_condition (stream);
21326 return;
21328 case '(':
21329 /* Nothing in unified syntax, otherwise the current condition code. */
21330 if (!TARGET_UNIFIED_ASM)
21331 arm_print_condition (stream);
21332 break;
21334 case ')':
21335 /* The current condition code in unified syntax, otherwise nothing. */
21336 if (TARGET_UNIFIED_ASM)
21337 arm_print_condition (stream);
21338 break;
21340 case '.':
21341 /* The current condition code for a condition code setting instruction.
21342 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21343 if (TARGET_UNIFIED_ASM)
21345 fputc('s', stream);
21346 arm_print_condition (stream);
21348 else
21350 arm_print_condition (stream);
21351 fputc('s', stream);
21353 return;
21355 case '!':
21356 /* If the instruction is conditionally executed then print
21357 the current condition code, otherwise print 's'. */
21358 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21359 if (current_insn_predicate)
21360 arm_print_condition (stream);
21361 else
21362 fputc('s', stream);
21363 break;
21365 /* %# is a "break" sequence. It doesn't output anything, but is used to
21366 separate e.g. operand numbers from following text, if that text consists
21367 of further digits which we don't want to be part of the operand
21368 number. */
21369 case '#':
21370 return;
21372 case 'N':
21374 REAL_VALUE_TYPE r;
21375 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21376 r = real_value_negate (&r);
21377 fprintf (stream, "%s", fp_const_from_val (&r));
21379 return;
21381 /* An integer or symbol address without a preceding # sign. */
21382 case 'c':
21383 switch (GET_CODE (x))
21385 case CONST_INT:
21386 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21387 break;
21389 case SYMBOL_REF:
21390 output_addr_const (stream, x);
21391 break;
21393 case CONST:
21394 if (GET_CODE (XEXP (x, 0)) == PLUS
21395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21397 output_addr_const (stream, x);
21398 break;
21400 /* Fall through. */
21402 default:
21403 output_operand_lossage ("Unsupported operand for code '%c'", code);
21405 return;
21407 /* An integer that we want to print in HEX. */
21408 case 'x':
21409 switch (GET_CODE (x))
21411 case CONST_INT:
21412 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21413 break;
21415 default:
21416 output_operand_lossage ("Unsupported operand for code '%c'", code);
21418 return;
21420 case 'B':
21421 if (CONST_INT_P (x))
21423 HOST_WIDE_INT val;
21424 val = ARM_SIGN_EXTEND (~INTVAL (x));
21425 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21427 else
21429 putc ('~', stream);
21430 output_addr_const (stream, x);
21432 return;
21434 case 'b':
21435 /* Print the log2 of a CONST_INT. */
21437 HOST_WIDE_INT val;
21439 if (!CONST_INT_P (x)
21440 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21441 output_operand_lossage ("Unsupported operand for code '%c'", code);
21442 else
21443 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21445 return;
21447 case 'L':
21448 /* The low 16 bits of an immediate constant. */
21449 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21450 return;
21452 case 'i':
21453 fprintf (stream, "%s", arithmetic_instr (x, 1));
21454 return;
21456 case 'I':
21457 fprintf (stream, "%s", arithmetic_instr (x, 0));
21458 return;
21460 case 'S':
21462 HOST_WIDE_INT val;
21463 const char *shift;
21465 shift = shift_op (x, &val);
21467 if (shift)
21469 fprintf (stream, ", %s ", shift);
21470 if (val == -1)
21471 arm_print_operand (stream, XEXP (x, 1), 0);
21472 else
21473 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21476 return;
21478 /* An explanation of the 'Q', 'R' and 'H' register operands:
21480 In a pair of registers containing a DI or DF value the 'Q'
21481 operand returns the register number of the register containing
21482 the least significant part of the value. The 'R' operand returns
21483 the register number of the register containing the most
21484 significant part of the value.
21486 The 'H' operand returns the higher of the two register numbers.
21487 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21488 same as the 'Q' operand, since the most significant part of the
21489 value is held in the lower number register. The reverse is true
21490 on systems where WORDS_BIG_ENDIAN is false.
21492 The purpose of these operands is to distinguish between cases
21493 where the endian-ness of the values is important (for example
21494 when they are added together), and cases where the endian-ness
21495 is irrelevant, but the order of register operations is important.
21496 For example when loading a value from memory into a register
21497 pair, the endian-ness does not matter. Provided that the value
21498 from the lower memory address is put into the lower numbered
21499 register, and the value from the higher address is put into the
21500 higher numbered register, the load will work regardless of whether
21501 the value being loaded is big-wordian or little-wordian. The
21502 order of the two register loads can matter however, if the address
21503 of the memory location is actually held in one of the registers
21504 being overwritten by the load.
21506 The 'Q' and 'R' constraints are also available for 64-bit
21507 constants. */
21508 case 'Q':
21509 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21511 rtx part = gen_lowpart (SImode, x);
21512 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21513 return;
21516 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21518 output_operand_lossage ("invalid operand for code '%c'", code);
21519 return;
21522 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21523 return;
21525 case 'R':
21526 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21528 enum machine_mode mode = GET_MODE (x);
21529 rtx part;
21531 if (mode == VOIDmode)
21532 mode = DImode;
21533 part = gen_highpart_mode (SImode, mode, x);
21534 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21535 return;
21538 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21540 output_operand_lossage ("invalid operand for code '%c'", code);
21541 return;
21544 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21545 return;
21547 case 'H':
21548 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21550 output_operand_lossage ("invalid operand for code '%c'", code);
21551 return;
21554 asm_fprintf (stream, "%r", REGNO (x) + 1);
21555 return;
21557 case 'J':
21558 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21560 output_operand_lossage ("invalid operand for code '%c'", code);
21561 return;
21564 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21565 return;
21567 case 'K':
21568 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21570 output_operand_lossage ("invalid operand for code '%c'", code);
21571 return;
21574 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21575 return;
21577 case 'm':
21578 asm_fprintf (stream, "%r",
21579 REG_P (XEXP (x, 0))
21580 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21581 return;
21583 case 'M':
21584 asm_fprintf (stream, "{%r-%r}",
21585 REGNO (x),
21586 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21587 return;
21589 /* Like 'M', but writing doubleword vector registers, for use by Neon
21590 insns. */
21591 case 'h':
21593 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21594 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21595 if (numregs == 1)
21596 asm_fprintf (stream, "{d%d}", regno);
21597 else
21598 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21600 return;
21602 case 'd':
21603 /* CONST_TRUE_RTX means always -- that's the default. */
21604 if (x == const_true_rtx)
21605 return;
21607 if (!COMPARISON_P (x))
21609 output_operand_lossage ("invalid operand for code '%c'", code);
21610 return;
21613 fputs (arm_condition_codes[get_arm_condition_code (x)],
21614 stream);
21615 return;
21617 case 'D':
21618 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21619 want to do that. */
21620 if (x == const_true_rtx)
21622 output_operand_lossage ("instruction never executed");
21623 return;
21625 if (!COMPARISON_P (x))
21627 output_operand_lossage ("invalid operand for code '%c'", code);
21628 return;
21631 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21632 (get_arm_condition_code (x))],
21633 stream);
21634 return;
21636 case 's':
21637 case 'V':
21638 case 'W':
21639 case 'X':
21640 case 'Y':
21641 case 'Z':
21642 /* Former Maverick support, removed after GCC-4.7. */
21643 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21644 return;
21646 case 'U':
21647 if (!REG_P (x)
21648 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21649 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21650 /* Bad value for wCG register number. */
21652 output_operand_lossage ("invalid operand for code '%c'", code);
21653 return;
21656 else
21657 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21658 return;
21660 /* Print an iWMMXt control register name. */
21661 case 'w':
21662 if (!CONST_INT_P (x)
21663 || INTVAL (x) < 0
21664 || INTVAL (x) >= 16)
21665 /* Bad value for wC register number. */
21667 output_operand_lossage ("invalid operand for code '%c'", code);
21668 return;
21671 else
21673 static const char * wc_reg_names [16] =
21675 "wCID", "wCon", "wCSSF", "wCASF",
21676 "wC4", "wC5", "wC6", "wC7",
21677 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21678 "wC12", "wC13", "wC14", "wC15"
21681 fputs (wc_reg_names [INTVAL (x)], stream);
21683 return;
21685 /* Print the high single-precision register of a VFP double-precision
21686 register. */
21687 case 'p':
21689 enum machine_mode mode = GET_MODE (x);
21690 int regno;
21692 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21694 output_operand_lossage ("invalid operand for code '%c'", code);
21695 return;
21698 regno = REGNO (x);
21699 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21701 output_operand_lossage ("invalid operand for code '%c'", code);
21702 return;
21705 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21707 return;
21709 /* Print a VFP/Neon double precision or quad precision register name. */
21710 case 'P':
21711 case 'q':
21713 enum machine_mode mode = GET_MODE (x);
21714 int is_quad = (code == 'q');
21715 int regno;
21717 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21719 output_operand_lossage ("invalid operand for code '%c'", code);
21720 return;
21723 if (!REG_P (x)
21724 || !IS_VFP_REGNUM (REGNO (x)))
21726 output_operand_lossage ("invalid operand for code '%c'", code);
21727 return;
21730 regno = REGNO (x);
21731 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21732 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21734 output_operand_lossage ("invalid operand for code '%c'", code);
21735 return;
21738 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21739 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21741 return;
21743 /* These two codes print the low/high doubleword register of a Neon quad
21744 register, respectively. For pair-structure types, can also print
21745 low/high quadword registers. */
21746 case 'e':
21747 case 'f':
21749 enum machine_mode mode = GET_MODE (x);
21750 int regno;
21752 if ((GET_MODE_SIZE (mode) != 16
21753 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21755 output_operand_lossage ("invalid operand for code '%c'", code);
21756 return;
21759 regno = REGNO (x);
21760 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21762 output_operand_lossage ("invalid operand for code '%c'", code);
21763 return;
21766 if (GET_MODE_SIZE (mode) == 16)
21767 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21768 + (code == 'f' ? 1 : 0));
21769 else
21770 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21771 + (code == 'f' ? 1 : 0));
21773 return;
21775 /* Print a VFPv3 floating-point constant, represented as an integer
21776 index. */
21777 case 'G':
21779 int index = vfp3_const_double_index (x);
21780 gcc_assert (index != -1);
21781 fprintf (stream, "%d", index);
21783 return;
21785 /* Print bits representing opcode features for Neon.
21787 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21788 and polynomials as unsigned.
21790 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21792 Bit 2 is 1 for rounding functions, 0 otherwise. */
21794 /* Identify the type as 's', 'u', 'p' or 'f'. */
21795 case 'T':
21797 HOST_WIDE_INT bits = INTVAL (x);
21798 fputc ("uspf"[bits & 3], stream);
21800 return;
21802 /* Likewise, but signed and unsigned integers are both 'i'. */
21803 case 'F':
21805 HOST_WIDE_INT bits = INTVAL (x);
21806 fputc ("iipf"[bits & 3], stream);
21808 return;
21810 /* As for 'T', but emit 'u' instead of 'p'. */
21811 case 't':
21813 HOST_WIDE_INT bits = INTVAL (x);
21814 fputc ("usuf"[bits & 3], stream);
21816 return;
21818 /* Bit 2: rounding (vs none). */
21819 case 'O':
21821 HOST_WIDE_INT bits = INTVAL (x);
21822 fputs ((bits & 4) != 0 ? "r" : "", stream);
21824 return;
21826 /* Memory operand for vld1/vst1 instruction. */
21827 case 'A':
21829 rtx addr;
21830 bool postinc = FALSE;
21831 rtx postinc_reg = NULL;
21832 unsigned align, memsize, align_bits;
21834 gcc_assert (MEM_P (x));
21835 addr = XEXP (x, 0);
21836 if (GET_CODE (addr) == POST_INC)
21838 postinc = 1;
21839 addr = XEXP (addr, 0);
21841 if (GET_CODE (addr) == POST_MODIFY)
21843 postinc_reg = XEXP( XEXP (addr, 1), 1);
21844 addr = XEXP (addr, 0);
21846 asm_fprintf (stream, "[%r", REGNO (addr));
21848 /* We know the alignment of this access, so we can emit a hint in the
21849 instruction (for some alignments) as an aid to the memory subsystem
21850 of the target. */
21851 align = MEM_ALIGN (x) >> 3;
21852 memsize = MEM_SIZE (x);
21854 /* Only certain alignment specifiers are supported by the hardware. */
21855 if (memsize == 32 && (align % 32) == 0)
21856 align_bits = 256;
21857 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21858 align_bits = 128;
21859 else if (memsize >= 8 && (align % 8) == 0)
21860 align_bits = 64;
21861 else
21862 align_bits = 0;
21864 if (align_bits != 0)
21865 asm_fprintf (stream, ":%d", align_bits);
21867 asm_fprintf (stream, "]");
21869 if (postinc)
21870 fputs("!", stream);
21871 if (postinc_reg)
21872 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21874 return;
21876 case 'C':
21878 rtx addr;
21880 gcc_assert (MEM_P (x));
21881 addr = XEXP (x, 0);
21882 gcc_assert (REG_P (addr));
21883 asm_fprintf (stream, "[%r]", REGNO (addr));
21885 return;
21887 /* Translate an S register number into a D register number and element index. */
21888 case 'y':
21890 enum machine_mode mode = GET_MODE (x);
21891 int regno;
21893 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21895 output_operand_lossage ("invalid operand for code '%c'", code);
21896 return;
21899 regno = REGNO (x);
21900 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21902 output_operand_lossage ("invalid operand for code '%c'", code);
21903 return;
21906 regno = regno - FIRST_VFP_REGNUM;
21907 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21909 return;
21911 case 'v':
21912 gcc_assert (CONST_DOUBLE_P (x));
21913 int result;
21914 result = vfp3_const_double_for_fract_bits (x);
21915 if (result == 0)
21916 result = vfp3_const_double_for_bits (x);
21917 fprintf (stream, "#%d", result);
21918 return;
21920 /* Register specifier for vld1.16/vst1.16. Translate the S register
21921 number into a D register number and element index. */
21922 case 'z':
21924 enum machine_mode mode = GET_MODE (x);
21925 int regno;
21927 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21929 output_operand_lossage ("invalid operand for code '%c'", code);
21930 return;
21933 regno = REGNO (x);
21934 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21936 output_operand_lossage ("invalid operand for code '%c'", code);
21937 return;
21940 regno = regno - FIRST_VFP_REGNUM;
21941 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21943 return;
21945 default:
21946 if (x == 0)
21948 output_operand_lossage ("missing operand");
21949 return;
21952 switch (GET_CODE (x))
21954 case REG:
21955 asm_fprintf (stream, "%r", REGNO (x));
21956 break;
21958 case MEM:
21959 output_memory_reference_mode = GET_MODE (x);
21960 output_address (XEXP (x, 0));
21961 break;
21963 case CONST_DOUBLE:
21964 if (TARGET_NEON)
21966 char fpstr[20];
21967 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21968 sizeof (fpstr), 0, 1);
21969 fprintf (stream, "#%s", fpstr);
21971 else
21972 fprintf (stream, "#%s", fp_immediate_constant (x));
21973 break;
21975 default:
21976 gcc_assert (GET_CODE (x) != NEG);
21977 fputc ('#', stream);
21978 if (GET_CODE (x) == HIGH)
21980 fputs (":lower16:", stream);
21981 x = XEXP (x, 0);
21984 output_addr_const (stream, x);
21985 break;
21990 /* Target hook for printing a memory address. */
21991 static void
21992 arm_print_operand_address (FILE *stream, rtx x)
21994 if (TARGET_32BIT)
21996 int is_minus = GET_CODE (x) == MINUS;
21998 if (REG_P (x))
21999 asm_fprintf (stream, "[%r]", REGNO (x));
22000 else if (GET_CODE (x) == PLUS || is_minus)
22002 rtx base = XEXP (x, 0);
22003 rtx index = XEXP (x, 1);
22004 HOST_WIDE_INT offset = 0;
22005 if (!REG_P (base)
22006 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22008 /* Ensure that BASE is a register. */
22009 /* (one of them must be). */
22010 /* Also ensure the SP is not used as in index register. */
22011 rtx temp = base;
22012 base = index;
22013 index = temp;
22015 switch (GET_CODE (index))
22017 case CONST_INT:
22018 offset = INTVAL (index);
22019 if (is_minus)
22020 offset = -offset;
22021 asm_fprintf (stream, "[%r, #%wd]",
22022 REGNO (base), offset);
22023 break;
22025 case REG:
22026 asm_fprintf (stream, "[%r, %s%r]",
22027 REGNO (base), is_minus ? "-" : "",
22028 REGNO (index));
22029 break;
22031 case MULT:
22032 case ASHIFTRT:
22033 case LSHIFTRT:
22034 case ASHIFT:
22035 case ROTATERT:
22037 asm_fprintf (stream, "[%r, %s%r",
22038 REGNO (base), is_minus ? "-" : "",
22039 REGNO (XEXP (index, 0)));
22040 arm_print_operand (stream, index, 'S');
22041 fputs ("]", stream);
22042 break;
22045 default:
22046 gcc_unreachable ();
22049 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22050 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22052 extern enum machine_mode output_memory_reference_mode;
22054 gcc_assert (REG_P (XEXP (x, 0)));
22056 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22057 asm_fprintf (stream, "[%r, #%s%d]!",
22058 REGNO (XEXP (x, 0)),
22059 GET_CODE (x) == PRE_DEC ? "-" : "",
22060 GET_MODE_SIZE (output_memory_reference_mode));
22061 else
22062 asm_fprintf (stream, "[%r], #%s%d",
22063 REGNO (XEXP (x, 0)),
22064 GET_CODE (x) == POST_DEC ? "-" : "",
22065 GET_MODE_SIZE (output_memory_reference_mode));
22067 else if (GET_CODE (x) == PRE_MODIFY)
22069 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22070 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22071 asm_fprintf (stream, "#%wd]!",
22072 INTVAL (XEXP (XEXP (x, 1), 1)));
22073 else
22074 asm_fprintf (stream, "%r]!",
22075 REGNO (XEXP (XEXP (x, 1), 1)));
22077 else if (GET_CODE (x) == POST_MODIFY)
22079 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22080 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22081 asm_fprintf (stream, "#%wd",
22082 INTVAL (XEXP (XEXP (x, 1), 1)));
22083 else
22084 asm_fprintf (stream, "%r",
22085 REGNO (XEXP (XEXP (x, 1), 1)));
22087 else output_addr_const (stream, x);
22089 else
22091 if (REG_P (x))
22092 asm_fprintf (stream, "[%r]", REGNO (x));
22093 else if (GET_CODE (x) == POST_INC)
22094 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22095 else if (GET_CODE (x) == PLUS)
22097 gcc_assert (REG_P (XEXP (x, 0)));
22098 if (CONST_INT_P (XEXP (x, 1)))
22099 asm_fprintf (stream, "[%r, #%wd]",
22100 REGNO (XEXP (x, 0)),
22101 INTVAL (XEXP (x, 1)));
22102 else
22103 asm_fprintf (stream, "[%r, %r]",
22104 REGNO (XEXP (x, 0)),
22105 REGNO (XEXP (x, 1)));
22107 else
22108 output_addr_const (stream, x);
22112 /* Target hook for indicating whether a punctuation character for
22113 TARGET_PRINT_OPERAND is valid. */
22114 static bool
22115 arm_print_operand_punct_valid_p (unsigned char code)
22117 return (code == '@' || code == '|' || code == '.'
22118 || code == '(' || code == ')' || code == '#'
22119 || (TARGET_32BIT && (code == '?'))
22120 || (TARGET_THUMB2 && (code == '!'))
22121 || (TARGET_THUMB && (code == '_')));
22124 /* Target hook for assembling integer objects. The ARM version needs to
22125 handle word-sized values specially. */
22126 static bool
22127 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22129 enum machine_mode mode;
22131 if (size == UNITS_PER_WORD && aligned_p)
22133 fputs ("\t.word\t", asm_out_file);
22134 output_addr_const (asm_out_file, x);
22136 /* Mark symbols as position independent. We only do this in the
22137 .text segment, not in the .data segment. */
22138 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22139 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22141 /* See legitimize_pic_address for an explanation of the
22142 TARGET_VXWORKS_RTP check. */
22143 if (!arm_pic_data_is_text_relative
22144 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22145 fputs ("(GOT)", asm_out_file);
22146 else
22147 fputs ("(GOTOFF)", asm_out_file);
22149 fputc ('\n', asm_out_file);
22150 return true;
22153 mode = GET_MODE (x);
22155 if (arm_vector_mode_supported_p (mode))
22157 int i, units;
22159 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22161 units = CONST_VECTOR_NUNITS (x);
22162 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22164 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22165 for (i = 0; i < units; i++)
22167 rtx elt = CONST_VECTOR_ELT (x, i);
22168 assemble_integer
22169 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22171 else
22172 for (i = 0; i < units; i++)
22174 rtx elt = CONST_VECTOR_ELT (x, i);
22175 REAL_VALUE_TYPE rval;
22177 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22179 assemble_real
22180 (rval, GET_MODE_INNER (mode),
22181 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22184 return true;
22187 return default_assemble_integer (x, size, aligned_p);
22190 static void
22191 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22193 section *s;
22195 if (!TARGET_AAPCS_BASED)
22197 (is_ctor ?
22198 default_named_section_asm_out_constructor
22199 : default_named_section_asm_out_destructor) (symbol, priority);
22200 return;
22203 /* Put these in the .init_array section, using a special relocation. */
22204 if (priority != DEFAULT_INIT_PRIORITY)
22206 char buf[18];
22207 sprintf (buf, "%s.%.5u",
22208 is_ctor ? ".init_array" : ".fini_array",
22209 priority);
22210 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22212 else if (is_ctor)
22213 s = ctors_section;
22214 else
22215 s = dtors_section;
22217 switch_to_section (s);
22218 assemble_align (POINTER_SIZE);
22219 fputs ("\t.word\t", asm_out_file);
22220 output_addr_const (asm_out_file, symbol);
22221 fputs ("(target1)\n", asm_out_file);
22224 /* Add a function to the list of static constructors. */
22226 static void
22227 arm_elf_asm_constructor (rtx symbol, int priority)
22229 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22232 /* Add a function to the list of static destructors. */
22234 static void
22235 arm_elf_asm_destructor (rtx symbol, int priority)
22237 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22240 /* A finite state machine takes care of noticing whether or not instructions
22241 can be conditionally executed, and thus decrease execution time and code
22242 size by deleting branch instructions. The fsm is controlled by
22243 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22245 /* The state of the fsm controlling condition codes are:
22246 0: normal, do nothing special
22247 1: make ASM_OUTPUT_OPCODE not output this instruction
22248 2: make ASM_OUTPUT_OPCODE not output this instruction
22249 3: make instructions conditional
22250 4: make instructions conditional
22252 State transitions (state->state by whom under condition):
22253 0 -> 1 final_prescan_insn if the `target' is a label
22254 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22255 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22256 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22257 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22258 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22259 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22260 (the target insn is arm_target_insn).
22262 If the jump clobbers the conditions then we use states 2 and 4.
22264 A similar thing can be done with conditional return insns.
22266 XXX In case the `target' is an unconditional branch, this conditionalising
22267 of the instructions always reduces code size, but not always execution
22268 time. But then, I want to reduce the code size to somewhere near what
22269 /bin/cc produces. */
22271 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22272 instructions. When a COND_EXEC instruction is seen the subsequent
22273 instructions are scanned so that multiple conditional instructions can be
22274 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22275 specify the length and true/false mask for the IT block. These will be
22276 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22278 /* Returns the index of the ARM condition code string in
22279 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22280 COMPARISON should be an rtx like `(eq (...) (...))'. */
22282 enum arm_cond_code
22283 maybe_get_arm_condition_code (rtx comparison)
22285 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22286 enum arm_cond_code code;
22287 enum rtx_code comp_code = GET_CODE (comparison);
22289 if (GET_MODE_CLASS (mode) != MODE_CC)
22290 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22291 XEXP (comparison, 1));
22293 switch (mode)
22295 case CC_DNEmode: code = ARM_NE; goto dominance;
22296 case CC_DEQmode: code = ARM_EQ; goto dominance;
22297 case CC_DGEmode: code = ARM_GE; goto dominance;
22298 case CC_DGTmode: code = ARM_GT; goto dominance;
22299 case CC_DLEmode: code = ARM_LE; goto dominance;
22300 case CC_DLTmode: code = ARM_LT; goto dominance;
22301 case CC_DGEUmode: code = ARM_CS; goto dominance;
22302 case CC_DGTUmode: code = ARM_HI; goto dominance;
22303 case CC_DLEUmode: code = ARM_LS; goto dominance;
22304 case CC_DLTUmode: code = ARM_CC;
22306 dominance:
22307 if (comp_code == EQ)
22308 return ARM_INVERSE_CONDITION_CODE (code);
22309 if (comp_code == NE)
22310 return code;
22311 return ARM_NV;
22313 case CC_NOOVmode:
22314 switch (comp_code)
22316 case NE: return ARM_NE;
22317 case EQ: return ARM_EQ;
22318 case GE: return ARM_PL;
22319 case LT: return ARM_MI;
22320 default: return ARM_NV;
22323 case CC_Zmode:
22324 switch (comp_code)
22326 case NE: return ARM_NE;
22327 case EQ: return ARM_EQ;
22328 default: return ARM_NV;
22331 case CC_Nmode:
22332 switch (comp_code)
22334 case NE: return ARM_MI;
22335 case EQ: return ARM_PL;
22336 default: return ARM_NV;
22339 case CCFPEmode:
22340 case CCFPmode:
22341 /* We can handle all cases except UNEQ and LTGT. */
22342 switch (comp_code)
22344 case GE: return ARM_GE;
22345 case GT: return ARM_GT;
22346 case LE: return ARM_LS;
22347 case LT: return ARM_MI;
22348 case NE: return ARM_NE;
22349 case EQ: return ARM_EQ;
22350 case ORDERED: return ARM_VC;
22351 case UNORDERED: return ARM_VS;
22352 case UNLT: return ARM_LT;
22353 case UNLE: return ARM_LE;
22354 case UNGT: return ARM_HI;
22355 case UNGE: return ARM_PL;
22356 /* UNEQ and LTGT do not have a representation. */
22357 case UNEQ: /* Fall through. */
22358 case LTGT: /* Fall through. */
22359 default: return ARM_NV;
22362 case CC_SWPmode:
22363 switch (comp_code)
22365 case NE: return ARM_NE;
22366 case EQ: return ARM_EQ;
22367 case GE: return ARM_LE;
22368 case GT: return ARM_LT;
22369 case LE: return ARM_GE;
22370 case LT: return ARM_GT;
22371 case GEU: return ARM_LS;
22372 case GTU: return ARM_CC;
22373 case LEU: return ARM_CS;
22374 case LTU: return ARM_HI;
22375 default: return ARM_NV;
22378 case CC_Cmode:
22379 switch (comp_code)
22381 case LTU: return ARM_CS;
22382 case GEU: return ARM_CC;
22383 default: return ARM_NV;
22386 case CC_CZmode:
22387 switch (comp_code)
22389 case NE: return ARM_NE;
22390 case EQ: return ARM_EQ;
22391 case GEU: return ARM_CS;
22392 case GTU: return ARM_HI;
22393 case LEU: return ARM_LS;
22394 case LTU: return ARM_CC;
22395 default: return ARM_NV;
22398 case CC_NCVmode:
22399 switch (comp_code)
22401 case GE: return ARM_GE;
22402 case LT: return ARM_LT;
22403 case GEU: return ARM_CS;
22404 case LTU: return ARM_CC;
22405 default: return ARM_NV;
22408 case CCmode:
22409 switch (comp_code)
22411 case NE: return ARM_NE;
22412 case EQ: return ARM_EQ;
22413 case GE: return ARM_GE;
22414 case GT: return ARM_GT;
22415 case LE: return ARM_LE;
22416 case LT: return ARM_LT;
22417 case GEU: return ARM_CS;
22418 case GTU: return ARM_HI;
22419 case LEU: return ARM_LS;
22420 case LTU: return ARM_CC;
22421 default: return ARM_NV;
22424 default: gcc_unreachable ();
22428 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22429 static enum arm_cond_code
22430 get_arm_condition_code (rtx comparison)
22432 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22433 gcc_assert (code != ARM_NV);
22434 return code;
22437 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22438 instructions. */
22439 void
22440 thumb2_final_prescan_insn (rtx insn)
22442 rtx first_insn = insn;
22443 rtx body = PATTERN (insn);
22444 rtx predicate;
22445 enum arm_cond_code code;
22446 int n;
22447 int mask;
22448 int max;
22450 /* max_insns_skipped in the tune was already taken into account in the
22451 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22452 just emit the IT blocks as we can. It does not make sense to split
22453 the IT blocks. */
22454 max = MAX_INSN_PER_IT_BLOCK;
22456 /* Remove the previous insn from the count of insns to be output. */
22457 if (arm_condexec_count)
22458 arm_condexec_count--;
22460 /* Nothing to do if we are already inside a conditional block. */
22461 if (arm_condexec_count)
22462 return;
22464 if (GET_CODE (body) != COND_EXEC)
22465 return;
22467 /* Conditional jumps are implemented directly. */
22468 if (JUMP_P (insn))
22469 return;
22471 predicate = COND_EXEC_TEST (body);
22472 arm_current_cc = get_arm_condition_code (predicate);
22474 n = get_attr_ce_count (insn);
22475 arm_condexec_count = 1;
22476 arm_condexec_mask = (1 << n) - 1;
22477 arm_condexec_masklen = n;
22478 /* See if subsequent instructions can be combined into the same block. */
22479 for (;;)
22481 insn = next_nonnote_insn (insn);
22483 /* Jumping into the middle of an IT block is illegal, so a label or
22484 barrier terminates the block. */
22485 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22486 break;
22488 body = PATTERN (insn);
22489 /* USE and CLOBBER aren't really insns, so just skip them. */
22490 if (GET_CODE (body) == USE
22491 || GET_CODE (body) == CLOBBER)
22492 continue;
22494 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22495 if (GET_CODE (body) != COND_EXEC)
22496 break;
22497 /* Maximum number of conditionally executed instructions in a block. */
22498 n = get_attr_ce_count (insn);
22499 if (arm_condexec_masklen + n > max)
22500 break;
22502 predicate = COND_EXEC_TEST (body);
22503 code = get_arm_condition_code (predicate);
22504 mask = (1 << n) - 1;
22505 if (arm_current_cc == code)
22506 arm_condexec_mask |= (mask << arm_condexec_masklen);
22507 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22508 break;
22510 arm_condexec_count++;
22511 arm_condexec_masklen += n;
22513 /* A jump must be the last instruction in a conditional block. */
22514 if (JUMP_P (insn))
22515 break;
22517 /* Restore recog_data (getting the attributes of other insns can
22518 destroy this array, but final.c assumes that it remains intact
22519 across this call). */
22520 extract_constrain_insn_cached (first_insn);
22523 void
22524 arm_final_prescan_insn (rtx insn)
22526 /* BODY will hold the body of INSN. */
22527 rtx body = PATTERN (insn);
22529 /* This will be 1 if trying to repeat the trick, and things need to be
22530 reversed if it appears to fail. */
22531 int reverse = 0;
22533 /* If we start with a return insn, we only succeed if we find another one. */
22534 int seeking_return = 0;
22535 enum rtx_code return_code = UNKNOWN;
22537 /* START_INSN will hold the insn from where we start looking. This is the
22538 first insn after the following code_label if REVERSE is true. */
22539 rtx start_insn = insn;
22541 /* If in state 4, check if the target branch is reached, in order to
22542 change back to state 0. */
22543 if (arm_ccfsm_state == 4)
22545 if (insn == arm_target_insn)
22547 arm_target_insn = NULL;
22548 arm_ccfsm_state = 0;
22550 return;
22553 /* If in state 3, it is possible to repeat the trick, if this insn is an
22554 unconditional branch to a label, and immediately following this branch
22555 is the previous target label which is only used once, and the label this
22556 branch jumps to is not too far off. */
22557 if (arm_ccfsm_state == 3)
22559 if (simplejump_p (insn))
22561 start_insn = next_nonnote_insn (start_insn);
22562 if (BARRIER_P (start_insn))
22564 /* XXX Isn't this always a barrier? */
22565 start_insn = next_nonnote_insn (start_insn);
22567 if (LABEL_P (start_insn)
22568 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22569 && LABEL_NUSES (start_insn) == 1)
22570 reverse = TRUE;
22571 else
22572 return;
22574 else if (ANY_RETURN_P (body))
22576 start_insn = next_nonnote_insn (start_insn);
22577 if (BARRIER_P (start_insn))
22578 start_insn = next_nonnote_insn (start_insn);
22579 if (LABEL_P (start_insn)
22580 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22581 && LABEL_NUSES (start_insn) == 1)
22583 reverse = TRUE;
22584 seeking_return = 1;
22585 return_code = GET_CODE (body);
22587 else
22588 return;
22590 else
22591 return;
22594 gcc_assert (!arm_ccfsm_state || reverse);
22595 if (!JUMP_P (insn))
22596 return;
22598 /* This jump might be paralleled with a clobber of the condition codes
22599 the jump should always come first */
22600 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22601 body = XVECEXP (body, 0, 0);
22603 if (reverse
22604 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22605 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22607 int insns_skipped;
22608 int fail = FALSE, succeed = FALSE;
22609 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22610 int then_not_else = TRUE;
22611 rtx this_insn = start_insn, label = 0;
22613 /* Register the insn jumped to. */
22614 if (reverse)
22616 if (!seeking_return)
22617 label = XEXP (SET_SRC (body), 0);
22619 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22620 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22621 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22623 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22624 then_not_else = FALSE;
22626 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22628 seeking_return = 1;
22629 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22631 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22633 seeking_return = 1;
22634 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22635 then_not_else = FALSE;
22637 else
22638 gcc_unreachable ();
22640 /* See how many insns this branch skips, and what kind of insns. If all
22641 insns are okay, and the label or unconditional branch to the same
22642 label is not too far away, succeed. */
22643 for (insns_skipped = 0;
22644 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22646 rtx scanbody;
22648 this_insn = next_nonnote_insn (this_insn);
22649 if (!this_insn)
22650 break;
22652 switch (GET_CODE (this_insn))
22654 case CODE_LABEL:
22655 /* Succeed if it is the target label, otherwise fail since
22656 control falls in from somewhere else. */
22657 if (this_insn == label)
22659 arm_ccfsm_state = 1;
22660 succeed = TRUE;
22662 else
22663 fail = TRUE;
22664 break;
22666 case BARRIER:
22667 /* Succeed if the following insn is the target label.
22668 Otherwise fail.
22669 If return insns are used then the last insn in a function
22670 will be a barrier. */
22671 this_insn = next_nonnote_insn (this_insn);
22672 if (this_insn && this_insn == label)
22674 arm_ccfsm_state = 1;
22675 succeed = TRUE;
22677 else
22678 fail = TRUE;
22679 break;
22681 case CALL_INSN:
22682 /* The AAPCS says that conditional calls should not be
22683 used since they make interworking inefficient (the
22684 linker can't transform BL<cond> into BLX). That's
22685 only a problem if the machine has BLX. */
22686 if (arm_arch5)
22688 fail = TRUE;
22689 break;
22692 /* Succeed if the following insn is the target label, or
22693 if the following two insns are a barrier and the
22694 target label. */
22695 this_insn = next_nonnote_insn (this_insn);
22696 if (this_insn && BARRIER_P (this_insn))
22697 this_insn = next_nonnote_insn (this_insn);
22699 if (this_insn && this_insn == label
22700 && insns_skipped < max_insns_skipped)
22702 arm_ccfsm_state = 1;
22703 succeed = TRUE;
22705 else
22706 fail = TRUE;
22707 break;
22709 case JUMP_INSN:
22710 /* If this is an unconditional branch to the same label, succeed.
22711 If it is to another label, do nothing. If it is conditional,
22712 fail. */
22713 /* XXX Probably, the tests for SET and the PC are
22714 unnecessary. */
22716 scanbody = PATTERN (this_insn);
22717 if (GET_CODE (scanbody) == SET
22718 && GET_CODE (SET_DEST (scanbody)) == PC)
22720 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22721 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22723 arm_ccfsm_state = 2;
22724 succeed = TRUE;
22726 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22727 fail = TRUE;
22729 /* Fail if a conditional return is undesirable (e.g. on a
22730 StrongARM), but still allow this if optimizing for size. */
22731 else if (GET_CODE (scanbody) == return_code
22732 && !use_return_insn (TRUE, NULL)
22733 && !optimize_size)
22734 fail = TRUE;
22735 else if (GET_CODE (scanbody) == return_code)
22737 arm_ccfsm_state = 2;
22738 succeed = TRUE;
22740 else if (GET_CODE (scanbody) == PARALLEL)
22742 switch (get_attr_conds (this_insn))
22744 case CONDS_NOCOND:
22745 break;
22746 default:
22747 fail = TRUE;
22748 break;
22751 else
22752 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22754 break;
22756 case INSN:
22757 /* Instructions using or affecting the condition codes make it
22758 fail. */
22759 scanbody = PATTERN (this_insn);
22760 if (!(GET_CODE (scanbody) == SET
22761 || GET_CODE (scanbody) == PARALLEL)
22762 || get_attr_conds (this_insn) != CONDS_NOCOND)
22763 fail = TRUE;
22764 break;
22766 default:
22767 break;
22770 if (succeed)
22772 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22773 arm_target_label = CODE_LABEL_NUMBER (label);
22774 else
22776 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22778 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22780 this_insn = next_nonnote_insn (this_insn);
22781 gcc_assert (!this_insn
22782 || (!BARRIER_P (this_insn)
22783 && !LABEL_P (this_insn)));
22785 if (!this_insn)
22787 /* Oh, dear! we ran off the end.. give up. */
22788 extract_constrain_insn_cached (insn);
22789 arm_ccfsm_state = 0;
22790 arm_target_insn = NULL;
22791 return;
22793 arm_target_insn = this_insn;
22796 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22797 what it was. */
22798 if (!reverse)
22799 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22801 if (reverse || then_not_else)
22802 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22805 /* Restore recog_data (getting the attributes of other insns can
22806 destroy this array, but final.c assumes that it remains intact
22807 across this call. */
22808 extract_constrain_insn_cached (insn);
22812 /* Output IT instructions. */
22813 void
22814 thumb2_asm_output_opcode (FILE * stream)
22816 char buff[5];
22817 int n;
22819 if (arm_condexec_mask)
22821 for (n = 0; n < arm_condexec_masklen; n++)
22822 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22823 buff[n] = 0;
22824 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22825 arm_condition_codes[arm_current_cc]);
22826 arm_condexec_mask = 0;
22830 /* Returns true if REGNO is a valid register
22831 for holding a quantity of type MODE. */
22833 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22835 if (GET_MODE_CLASS (mode) == MODE_CC)
22836 return (regno == CC_REGNUM
22837 || (TARGET_HARD_FLOAT && TARGET_VFP
22838 && regno == VFPCC_REGNUM));
22840 if (TARGET_THUMB1)
22841 /* For the Thumb we only allow values bigger than SImode in
22842 registers 0 - 6, so that there is always a second low
22843 register available to hold the upper part of the value.
22844 We probably we ought to ensure that the register is the
22845 start of an even numbered register pair. */
22846 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22848 if (TARGET_HARD_FLOAT && TARGET_VFP
22849 && IS_VFP_REGNUM (regno))
22851 if (mode == SFmode || mode == SImode)
22852 return VFP_REGNO_OK_FOR_SINGLE (regno);
22854 if (mode == DFmode)
22855 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22857 /* VFP registers can hold HFmode values, but there is no point in
22858 putting them there unless we have hardware conversion insns. */
22859 if (mode == HFmode)
22860 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22862 if (TARGET_NEON)
22863 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22864 || (VALID_NEON_QREG_MODE (mode)
22865 && NEON_REGNO_OK_FOR_QUAD (regno))
22866 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22867 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22868 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22869 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22870 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22872 return FALSE;
22875 if (TARGET_REALLY_IWMMXT)
22877 if (IS_IWMMXT_GR_REGNUM (regno))
22878 return mode == SImode;
22880 if (IS_IWMMXT_REGNUM (regno))
22881 return VALID_IWMMXT_REG_MODE (mode);
22884 /* We allow almost any value to be stored in the general registers.
22885 Restrict doubleword quantities to even register pairs in ARM state
22886 so that we can use ldrd. Do not allow very large Neon structure
22887 opaque modes in general registers; they would use too many. */
22888 if (regno <= LAST_ARM_REGNUM)
22890 if (ARM_NUM_REGS (mode) > 4)
22891 return FALSE;
22893 if (TARGET_THUMB2)
22894 return TRUE;
22896 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22899 if (regno == FRAME_POINTER_REGNUM
22900 || regno == ARG_POINTER_REGNUM)
22901 /* We only allow integers in the fake hard registers. */
22902 return GET_MODE_CLASS (mode) == MODE_INT;
22904 return FALSE;
22907 /* Implement MODES_TIEABLE_P. */
22909 bool
22910 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22912 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22913 return true;
22915 /* We specifically want to allow elements of "structure" modes to
22916 be tieable to the structure. This more general condition allows
22917 other rarer situations too. */
22918 if (TARGET_NEON
22919 && (VALID_NEON_DREG_MODE (mode1)
22920 || VALID_NEON_QREG_MODE (mode1)
22921 || VALID_NEON_STRUCT_MODE (mode1))
22922 && (VALID_NEON_DREG_MODE (mode2)
22923 || VALID_NEON_QREG_MODE (mode2)
22924 || VALID_NEON_STRUCT_MODE (mode2)))
22925 return true;
22927 return false;
22930 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22931 not used in arm mode. */
22933 enum reg_class
22934 arm_regno_class (int regno)
22936 if (TARGET_THUMB1)
22938 if (regno == STACK_POINTER_REGNUM)
22939 return STACK_REG;
22940 if (regno == CC_REGNUM)
22941 return CC_REG;
22942 if (regno < 8)
22943 return LO_REGS;
22944 return HI_REGS;
22947 if (TARGET_THUMB2 && regno < 8)
22948 return LO_REGS;
22950 if ( regno <= LAST_ARM_REGNUM
22951 || regno == FRAME_POINTER_REGNUM
22952 || regno == ARG_POINTER_REGNUM)
22953 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22955 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22956 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22958 if (IS_VFP_REGNUM (regno))
22960 if (regno <= D7_VFP_REGNUM)
22961 return VFP_D0_D7_REGS;
22962 else if (regno <= LAST_LO_VFP_REGNUM)
22963 return VFP_LO_REGS;
22964 else
22965 return VFP_HI_REGS;
22968 if (IS_IWMMXT_REGNUM (regno))
22969 return IWMMXT_REGS;
22971 if (IS_IWMMXT_GR_REGNUM (regno))
22972 return IWMMXT_GR_REGS;
22974 return NO_REGS;
22977 /* Handle a special case when computing the offset
22978 of an argument from the frame pointer. */
22980 arm_debugger_arg_offset (int value, rtx addr)
22982 rtx insn;
22984 /* We are only interested if dbxout_parms() failed to compute the offset. */
22985 if (value != 0)
22986 return 0;
22988 /* We can only cope with the case where the address is held in a register. */
22989 if (!REG_P (addr))
22990 return 0;
22992 /* If we are using the frame pointer to point at the argument, then
22993 an offset of 0 is correct. */
22994 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22995 return 0;
22997 /* If we are using the stack pointer to point at the
22998 argument, then an offset of 0 is correct. */
22999 /* ??? Check this is consistent with thumb2 frame layout. */
23000 if ((TARGET_THUMB || !frame_pointer_needed)
23001 && REGNO (addr) == SP_REGNUM)
23002 return 0;
23004 /* Oh dear. The argument is pointed to by a register rather
23005 than being held in a register, or being stored at a known
23006 offset from the frame pointer. Since GDB only understands
23007 those two kinds of argument we must translate the address
23008 held in the register into an offset from the frame pointer.
23009 We do this by searching through the insns for the function
23010 looking to see where this register gets its value. If the
23011 register is initialized from the frame pointer plus an offset
23012 then we are in luck and we can continue, otherwise we give up.
23014 This code is exercised by producing debugging information
23015 for a function with arguments like this:
23017 double func (double a, double b, int c, double d) {return d;}
23019 Without this code the stab for parameter 'd' will be set to
23020 an offset of 0 from the frame pointer, rather than 8. */
23022 /* The if() statement says:
23024 If the insn is a normal instruction
23025 and if the insn is setting the value in a register
23026 and if the register being set is the register holding the address of the argument
23027 and if the address is computing by an addition
23028 that involves adding to a register
23029 which is the frame pointer
23030 a constant integer
23032 then... */
23034 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23036 if ( NONJUMP_INSN_P (insn)
23037 && GET_CODE (PATTERN (insn)) == SET
23038 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23039 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23040 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23041 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23042 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23045 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23047 break;
23051 if (value == 0)
23053 debug_rtx (addr);
23054 warning (0, "unable to compute real location of stacked parameter");
23055 value = 8; /* XXX magic hack */
23058 return value;
23061 typedef enum {
23062 T_V8QI,
23063 T_V4HI,
23064 T_V4HF,
23065 T_V2SI,
23066 T_V2SF,
23067 T_DI,
23068 T_V16QI,
23069 T_V8HI,
23070 T_V4SI,
23071 T_V4SF,
23072 T_V2DI,
23073 T_TI,
23074 T_EI,
23075 T_OI,
23076 T_MAX /* Size of enum. Keep last. */
23077 } neon_builtin_type_mode;
23079 #define TYPE_MODE_BIT(X) (1 << (X))
23081 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23082 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23083 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23084 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23085 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23086 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23088 #define v8qi_UP T_V8QI
23089 #define v4hi_UP T_V4HI
23090 #define v4hf_UP T_V4HF
23091 #define v2si_UP T_V2SI
23092 #define v2sf_UP T_V2SF
23093 #define di_UP T_DI
23094 #define v16qi_UP T_V16QI
23095 #define v8hi_UP T_V8HI
23096 #define v4si_UP T_V4SI
23097 #define v4sf_UP T_V4SF
23098 #define v2di_UP T_V2DI
23099 #define ti_UP T_TI
23100 #define ei_UP T_EI
23101 #define oi_UP T_OI
23103 #define UP(X) X##_UP
23105 typedef enum {
23106 NEON_BINOP,
23107 NEON_TERNOP,
23108 NEON_UNOP,
23109 NEON_BSWAP,
23110 NEON_GETLANE,
23111 NEON_SETLANE,
23112 NEON_CREATE,
23113 NEON_RINT,
23114 NEON_DUP,
23115 NEON_DUPLANE,
23116 NEON_COMBINE,
23117 NEON_SPLIT,
23118 NEON_LANEMUL,
23119 NEON_LANEMULL,
23120 NEON_LANEMULH,
23121 NEON_LANEMAC,
23122 NEON_SCALARMUL,
23123 NEON_SCALARMULL,
23124 NEON_SCALARMULH,
23125 NEON_SCALARMAC,
23126 NEON_CONVERT,
23127 NEON_FLOAT_WIDEN,
23128 NEON_FLOAT_NARROW,
23129 NEON_FIXCONV,
23130 NEON_SELECT,
23131 NEON_REINTERP,
23132 NEON_VTBL,
23133 NEON_VTBX,
23134 NEON_LOAD1,
23135 NEON_LOAD1LANE,
23136 NEON_STORE1,
23137 NEON_STORE1LANE,
23138 NEON_LOADSTRUCT,
23139 NEON_LOADSTRUCTLANE,
23140 NEON_STORESTRUCT,
23141 NEON_STORESTRUCTLANE,
23142 NEON_LOGICBINOP,
23143 NEON_SHIFTINSERT,
23144 NEON_SHIFTIMM,
23145 NEON_SHIFTACC
23146 } neon_itype;
23148 typedef struct {
23149 const char *name;
23150 const neon_itype itype;
23151 const neon_builtin_type_mode mode;
23152 const enum insn_code code;
23153 unsigned int fcode;
23154 } neon_builtin_datum;
23156 #define CF(N,X) CODE_FOR_neon_##N##X
23158 #define VAR1(T, N, A) \
23159 {#N, NEON_##T, UP (A), CF (N, A), 0}
23160 #define VAR2(T, N, A, B) \
23161 VAR1 (T, N, A), \
23162 {#N, NEON_##T, UP (B), CF (N, B), 0}
23163 #define VAR3(T, N, A, B, C) \
23164 VAR2 (T, N, A, B), \
23165 {#N, NEON_##T, UP (C), CF (N, C), 0}
23166 #define VAR4(T, N, A, B, C, D) \
23167 VAR3 (T, N, A, B, C), \
23168 {#N, NEON_##T, UP (D), CF (N, D), 0}
23169 #define VAR5(T, N, A, B, C, D, E) \
23170 VAR4 (T, N, A, B, C, D), \
23171 {#N, NEON_##T, UP (E), CF (N, E), 0}
23172 #define VAR6(T, N, A, B, C, D, E, F) \
23173 VAR5 (T, N, A, B, C, D, E), \
23174 {#N, NEON_##T, UP (F), CF (N, F), 0}
23175 #define VAR7(T, N, A, B, C, D, E, F, G) \
23176 VAR6 (T, N, A, B, C, D, E, F), \
23177 {#N, NEON_##T, UP (G), CF (N, G), 0}
23178 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23179 VAR7 (T, N, A, B, C, D, E, F, G), \
23180 {#N, NEON_##T, UP (H), CF (N, H), 0}
23181 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23182 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23183 {#N, NEON_##T, UP (I), CF (N, I), 0}
23184 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23185 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23186 {#N, NEON_##T, UP (J), CF (N, J), 0}
23188 /* The NEON builtin data can be found in arm_neon_builtins.def.
23189 The mode entries in the following table correspond to the "key" type of the
23190 instruction variant, i.e. equivalent to that which would be specified after
23191 the assembler mnemonic, which usually refers to the last vector operand.
23192 (Signed/unsigned/polynomial types are not differentiated between though, and
23193 are all mapped onto the same mode for a given element size.) The modes
23194 listed per instruction should be the same as those defined for that
23195 instruction's pattern in neon.md. */
23197 static neon_builtin_datum neon_builtin_data[] =
23199 #include "arm_neon_builtins.def"
23202 #undef CF
23203 #undef VAR1
23204 #undef VAR2
23205 #undef VAR3
23206 #undef VAR4
23207 #undef VAR5
23208 #undef VAR6
23209 #undef VAR7
23210 #undef VAR8
23211 #undef VAR9
23212 #undef VAR10
23214 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23215 #define VAR1(T, N, A) \
23216 CF (N, A)
23217 #define VAR2(T, N, A, B) \
23218 VAR1 (T, N, A), \
23219 CF (N, B)
23220 #define VAR3(T, N, A, B, C) \
23221 VAR2 (T, N, A, B), \
23222 CF (N, C)
23223 #define VAR4(T, N, A, B, C, D) \
23224 VAR3 (T, N, A, B, C), \
23225 CF (N, D)
23226 #define VAR5(T, N, A, B, C, D, E) \
23227 VAR4 (T, N, A, B, C, D), \
23228 CF (N, E)
23229 #define VAR6(T, N, A, B, C, D, E, F) \
23230 VAR5 (T, N, A, B, C, D, E), \
23231 CF (N, F)
23232 #define VAR7(T, N, A, B, C, D, E, F, G) \
23233 VAR6 (T, N, A, B, C, D, E, F), \
23234 CF (N, G)
23235 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23236 VAR7 (T, N, A, B, C, D, E, F, G), \
23237 CF (N, H)
23238 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23239 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23240 CF (N, I)
23241 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23242 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23243 CF (N, J)
23244 enum arm_builtins
23246 ARM_BUILTIN_GETWCGR0,
23247 ARM_BUILTIN_GETWCGR1,
23248 ARM_BUILTIN_GETWCGR2,
23249 ARM_BUILTIN_GETWCGR3,
23251 ARM_BUILTIN_SETWCGR0,
23252 ARM_BUILTIN_SETWCGR1,
23253 ARM_BUILTIN_SETWCGR2,
23254 ARM_BUILTIN_SETWCGR3,
23256 ARM_BUILTIN_WZERO,
23258 ARM_BUILTIN_WAVG2BR,
23259 ARM_BUILTIN_WAVG2HR,
23260 ARM_BUILTIN_WAVG2B,
23261 ARM_BUILTIN_WAVG2H,
23263 ARM_BUILTIN_WACCB,
23264 ARM_BUILTIN_WACCH,
23265 ARM_BUILTIN_WACCW,
23267 ARM_BUILTIN_WMACS,
23268 ARM_BUILTIN_WMACSZ,
23269 ARM_BUILTIN_WMACU,
23270 ARM_BUILTIN_WMACUZ,
23272 ARM_BUILTIN_WSADB,
23273 ARM_BUILTIN_WSADBZ,
23274 ARM_BUILTIN_WSADH,
23275 ARM_BUILTIN_WSADHZ,
23277 ARM_BUILTIN_WALIGNI,
23278 ARM_BUILTIN_WALIGNR0,
23279 ARM_BUILTIN_WALIGNR1,
23280 ARM_BUILTIN_WALIGNR2,
23281 ARM_BUILTIN_WALIGNR3,
23283 ARM_BUILTIN_TMIA,
23284 ARM_BUILTIN_TMIAPH,
23285 ARM_BUILTIN_TMIABB,
23286 ARM_BUILTIN_TMIABT,
23287 ARM_BUILTIN_TMIATB,
23288 ARM_BUILTIN_TMIATT,
23290 ARM_BUILTIN_TMOVMSKB,
23291 ARM_BUILTIN_TMOVMSKH,
23292 ARM_BUILTIN_TMOVMSKW,
23294 ARM_BUILTIN_TBCSTB,
23295 ARM_BUILTIN_TBCSTH,
23296 ARM_BUILTIN_TBCSTW,
23298 ARM_BUILTIN_WMADDS,
23299 ARM_BUILTIN_WMADDU,
23301 ARM_BUILTIN_WPACKHSS,
23302 ARM_BUILTIN_WPACKWSS,
23303 ARM_BUILTIN_WPACKDSS,
23304 ARM_BUILTIN_WPACKHUS,
23305 ARM_BUILTIN_WPACKWUS,
23306 ARM_BUILTIN_WPACKDUS,
23308 ARM_BUILTIN_WADDB,
23309 ARM_BUILTIN_WADDH,
23310 ARM_BUILTIN_WADDW,
23311 ARM_BUILTIN_WADDSSB,
23312 ARM_BUILTIN_WADDSSH,
23313 ARM_BUILTIN_WADDSSW,
23314 ARM_BUILTIN_WADDUSB,
23315 ARM_BUILTIN_WADDUSH,
23316 ARM_BUILTIN_WADDUSW,
23317 ARM_BUILTIN_WSUBB,
23318 ARM_BUILTIN_WSUBH,
23319 ARM_BUILTIN_WSUBW,
23320 ARM_BUILTIN_WSUBSSB,
23321 ARM_BUILTIN_WSUBSSH,
23322 ARM_BUILTIN_WSUBSSW,
23323 ARM_BUILTIN_WSUBUSB,
23324 ARM_BUILTIN_WSUBUSH,
23325 ARM_BUILTIN_WSUBUSW,
23327 ARM_BUILTIN_WAND,
23328 ARM_BUILTIN_WANDN,
23329 ARM_BUILTIN_WOR,
23330 ARM_BUILTIN_WXOR,
23332 ARM_BUILTIN_WCMPEQB,
23333 ARM_BUILTIN_WCMPEQH,
23334 ARM_BUILTIN_WCMPEQW,
23335 ARM_BUILTIN_WCMPGTUB,
23336 ARM_BUILTIN_WCMPGTUH,
23337 ARM_BUILTIN_WCMPGTUW,
23338 ARM_BUILTIN_WCMPGTSB,
23339 ARM_BUILTIN_WCMPGTSH,
23340 ARM_BUILTIN_WCMPGTSW,
23342 ARM_BUILTIN_TEXTRMSB,
23343 ARM_BUILTIN_TEXTRMSH,
23344 ARM_BUILTIN_TEXTRMSW,
23345 ARM_BUILTIN_TEXTRMUB,
23346 ARM_BUILTIN_TEXTRMUH,
23347 ARM_BUILTIN_TEXTRMUW,
23348 ARM_BUILTIN_TINSRB,
23349 ARM_BUILTIN_TINSRH,
23350 ARM_BUILTIN_TINSRW,
23352 ARM_BUILTIN_WMAXSW,
23353 ARM_BUILTIN_WMAXSH,
23354 ARM_BUILTIN_WMAXSB,
23355 ARM_BUILTIN_WMAXUW,
23356 ARM_BUILTIN_WMAXUH,
23357 ARM_BUILTIN_WMAXUB,
23358 ARM_BUILTIN_WMINSW,
23359 ARM_BUILTIN_WMINSH,
23360 ARM_BUILTIN_WMINSB,
23361 ARM_BUILTIN_WMINUW,
23362 ARM_BUILTIN_WMINUH,
23363 ARM_BUILTIN_WMINUB,
23365 ARM_BUILTIN_WMULUM,
23366 ARM_BUILTIN_WMULSM,
23367 ARM_BUILTIN_WMULUL,
23369 ARM_BUILTIN_PSADBH,
23370 ARM_BUILTIN_WSHUFH,
23372 ARM_BUILTIN_WSLLH,
23373 ARM_BUILTIN_WSLLW,
23374 ARM_BUILTIN_WSLLD,
23375 ARM_BUILTIN_WSRAH,
23376 ARM_BUILTIN_WSRAW,
23377 ARM_BUILTIN_WSRAD,
23378 ARM_BUILTIN_WSRLH,
23379 ARM_BUILTIN_WSRLW,
23380 ARM_BUILTIN_WSRLD,
23381 ARM_BUILTIN_WRORH,
23382 ARM_BUILTIN_WRORW,
23383 ARM_BUILTIN_WRORD,
23384 ARM_BUILTIN_WSLLHI,
23385 ARM_BUILTIN_WSLLWI,
23386 ARM_BUILTIN_WSLLDI,
23387 ARM_BUILTIN_WSRAHI,
23388 ARM_BUILTIN_WSRAWI,
23389 ARM_BUILTIN_WSRADI,
23390 ARM_BUILTIN_WSRLHI,
23391 ARM_BUILTIN_WSRLWI,
23392 ARM_BUILTIN_WSRLDI,
23393 ARM_BUILTIN_WRORHI,
23394 ARM_BUILTIN_WRORWI,
23395 ARM_BUILTIN_WRORDI,
23397 ARM_BUILTIN_WUNPCKIHB,
23398 ARM_BUILTIN_WUNPCKIHH,
23399 ARM_BUILTIN_WUNPCKIHW,
23400 ARM_BUILTIN_WUNPCKILB,
23401 ARM_BUILTIN_WUNPCKILH,
23402 ARM_BUILTIN_WUNPCKILW,
23404 ARM_BUILTIN_WUNPCKEHSB,
23405 ARM_BUILTIN_WUNPCKEHSH,
23406 ARM_BUILTIN_WUNPCKEHSW,
23407 ARM_BUILTIN_WUNPCKEHUB,
23408 ARM_BUILTIN_WUNPCKEHUH,
23409 ARM_BUILTIN_WUNPCKEHUW,
23410 ARM_BUILTIN_WUNPCKELSB,
23411 ARM_BUILTIN_WUNPCKELSH,
23412 ARM_BUILTIN_WUNPCKELSW,
23413 ARM_BUILTIN_WUNPCKELUB,
23414 ARM_BUILTIN_WUNPCKELUH,
23415 ARM_BUILTIN_WUNPCKELUW,
23417 ARM_BUILTIN_WABSB,
23418 ARM_BUILTIN_WABSH,
23419 ARM_BUILTIN_WABSW,
23421 ARM_BUILTIN_WADDSUBHX,
23422 ARM_BUILTIN_WSUBADDHX,
23424 ARM_BUILTIN_WABSDIFFB,
23425 ARM_BUILTIN_WABSDIFFH,
23426 ARM_BUILTIN_WABSDIFFW,
23428 ARM_BUILTIN_WADDCH,
23429 ARM_BUILTIN_WADDCW,
23431 ARM_BUILTIN_WAVG4,
23432 ARM_BUILTIN_WAVG4R,
23434 ARM_BUILTIN_WMADDSX,
23435 ARM_BUILTIN_WMADDUX,
23437 ARM_BUILTIN_WMADDSN,
23438 ARM_BUILTIN_WMADDUN,
23440 ARM_BUILTIN_WMULWSM,
23441 ARM_BUILTIN_WMULWUM,
23443 ARM_BUILTIN_WMULWSMR,
23444 ARM_BUILTIN_WMULWUMR,
23446 ARM_BUILTIN_WMULWL,
23448 ARM_BUILTIN_WMULSMR,
23449 ARM_BUILTIN_WMULUMR,
23451 ARM_BUILTIN_WQMULM,
23452 ARM_BUILTIN_WQMULMR,
23454 ARM_BUILTIN_WQMULWM,
23455 ARM_BUILTIN_WQMULWMR,
23457 ARM_BUILTIN_WADDBHUSM,
23458 ARM_BUILTIN_WADDBHUSL,
23460 ARM_BUILTIN_WQMIABB,
23461 ARM_BUILTIN_WQMIABT,
23462 ARM_BUILTIN_WQMIATB,
23463 ARM_BUILTIN_WQMIATT,
23465 ARM_BUILTIN_WQMIABBN,
23466 ARM_BUILTIN_WQMIABTN,
23467 ARM_BUILTIN_WQMIATBN,
23468 ARM_BUILTIN_WQMIATTN,
23470 ARM_BUILTIN_WMIABB,
23471 ARM_BUILTIN_WMIABT,
23472 ARM_BUILTIN_WMIATB,
23473 ARM_BUILTIN_WMIATT,
23475 ARM_BUILTIN_WMIABBN,
23476 ARM_BUILTIN_WMIABTN,
23477 ARM_BUILTIN_WMIATBN,
23478 ARM_BUILTIN_WMIATTN,
23480 ARM_BUILTIN_WMIAWBB,
23481 ARM_BUILTIN_WMIAWBT,
23482 ARM_BUILTIN_WMIAWTB,
23483 ARM_BUILTIN_WMIAWTT,
23485 ARM_BUILTIN_WMIAWBBN,
23486 ARM_BUILTIN_WMIAWBTN,
23487 ARM_BUILTIN_WMIAWTBN,
23488 ARM_BUILTIN_WMIAWTTN,
23490 ARM_BUILTIN_WMERGE,
23492 ARM_BUILTIN_CRC32B,
23493 ARM_BUILTIN_CRC32H,
23494 ARM_BUILTIN_CRC32W,
23495 ARM_BUILTIN_CRC32CB,
23496 ARM_BUILTIN_CRC32CH,
23497 ARM_BUILTIN_CRC32CW,
23499 ARM_BUILTIN_GET_FPSCR,
23500 ARM_BUILTIN_SET_FPSCR,
23502 #undef CRYPTO1
23503 #undef CRYPTO2
23504 #undef CRYPTO3
23506 #define CRYPTO1(L, U, M1, M2) \
23507 ARM_BUILTIN_CRYPTO_##U,
23508 #define CRYPTO2(L, U, M1, M2, M3) \
23509 ARM_BUILTIN_CRYPTO_##U,
23510 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23511 ARM_BUILTIN_CRYPTO_##U,
23513 #include "crypto.def"
23515 #undef CRYPTO1
23516 #undef CRYPTO2
23517 #undef CRYPTO3
23519 #include "arm_neon_builtins.def"
23521 ,ARM_BUILTIN_MAX
23524 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23526 #undef CF
23527 #undef VAR1
23528 #undef VAR2
23529 #undef VAR3
23530 #undef VAR4
23531 #undef VAR5
23532 #undef VAR6
23533 #undef VAR7
23534 #undef VAR8
23535 #undef VAR9
23536 #undef VAR10
23538 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23540 #define NUM_DREG_TYPES 5
23541 #define NUM_QREG_TYPES 6
23543 static void
23544 arm_init_neon_builtins (void)
23546 unsigned int i, fcode;
23547 tree decl;
23549 tree neon_intQI_type_node;
23550 tree neon_intHI_type_node;
23551 tree neon_floatHF_type_node;
23552 tree neon_polyQI_type_node;
23553 tree neon_polyHI_type_node;
23554 tree neon_intSI_type_node;
23555 tree neon_intDI_type_node;
23556 tree neon_intUTI_type_node;
23557 tree neon_float_type_node;
23559 tree intQI_pointer_node;
23560 tree intHI_pointer_node;
23561 tree intSI_pointer_node;
23562 tree intDI_pointer_node;
23563 tree float_pointer_node;
23565 tree const_intQI_node;
23566 tree const_intHI_node;
23567 tree const_intSI_node;
23568 tree const_intDI_node;
23569 tree const_float_node;
23571 tree const_intQI_pointer_node;
23572 tree const_intHI_pointer_node;
23573 tree const_intSI_pointer_node;
23574 tree const_intDI_pointer_node;
23575 tree const_float_pointer_node;
23577 tree V8QI_type_node;
23578 tree V4HI_type_node;
23579 tree V4UHI_type_node;
23580 tree V4HF_type_node;
23581 tree V2SI_type_node;
23582 tree V2USI_type_node;
23583 tree V2SF_type_node;
23584 tree V16QI_type_node;
23585 tree V8HI_type_node;
23586 tree V8UHI_type_node;
23587 tree V4SI_type_node;
23588 tree V4USI_type_node;
23589 tree V4SF_type_node;
23590 tree V2DI_type_node;
23591 tree V2UDI_type_node;
23593 tree intUQI_type_node;
23594 tree intUHI_type_node;
23595 tree intUSI_type_node;
23596 tree intUDI_type_node;
23598 tree intEI_type_node;
23599 tree intOI_type_node;
23600 tree intCI_type_node;
23601 tree intXI_type_node;
23603 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23604 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23605 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23607 /* Create distinguished type nodes for NEON vector element types,
23608 and pointers to values of such types, so we can detect them later. */
23609 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23610 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23611 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23612 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23613 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23614 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23615 neon_float_type_node = make_node (REAL_TYPE);
23616 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23617 layout_type (neon_float_type_node);
23618 neon_floatHF_type_node = make_node (REAL_TYPE);
23619 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23620 layout_type (neon_floatHF_type_node);
23622 /* Define typedefs which exactly correspond to the modes we are basing vector
23623 types on. If you change these names you'll need to change
23624 the table used by arm_mangle_type too. */
23625 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23626 "__builtin_neon_qi");
23627 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23628 "__builtin_neon_hi");
23629 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23630 "__builtin_neon_hf");
23631 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23632 "__builtin_neon_si");
23633 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23634 "__builtin_neon_sf");
23635 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23636 "__builtin_neon_di");
23637 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23638 "__builtin_neon_poly8");
23639 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23640 "__builtin_neon_poly16");
23642 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23643 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23644 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23645 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23646 float_pointer_node = build_pointer_type (neon_float_type_node);
23648 /* Next create constant-qualified versions of the above types. */
23649 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23650 TYPE_QUAL_CONST);
23651 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23652 TYPE_QUAL_CONST);
23653 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23654 TYPE_QUAL_CONST);
23655 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23656 TYPE_QUAL_CONST);
23657 const_float_node = build_qualified_type (neon_float_type_node,
23658 TYPE_QUAL_CONST);
23660 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23661 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23662 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23663 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23664 const_float_pointer_node = build_pointer_type (const_float_node);
23666 /* Unsigned integer types for various mode sizes. */
23667 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23668 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23669 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23670 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23671 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23672 /* Now create vector types based on our NEON element types. */
23673 /* 64-bit vectors. */
23674 V8QI_type_node =
23675 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23676 V4HI_type_node =
23677 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23678 V4UHI_type_node =
23679 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23680 V4HF_type_node =
23681 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23682 V2SI_type_node =
23683 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23684 V2USI_type_node =
23685 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23686 V2SF_type_node =
23687 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23688 /* 128-bit vectors. */
23689 V16QI_type_node =
23690 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23691 V8HI_type_node =
23692 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23693 V8UHI_type_node =
23694 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23695 V4SI_type_node =
23696 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23697 V4USI_type_node =
23698 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23699 V4SF_type_node =
23700 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23701 V2DI_type_node =
23702 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23703 V2UDI_type_node =
23704 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23707 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23708 "__builtin_neon_uqi");
23709 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23710 "__builtin_neon_uhi");
23711 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23712 "__builtin_neon_usi");
23713 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23714 "__builtin_neon_udi");
23715 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23716 "__builtin_neon_poly64");
23717 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23718 "__builtin_neon_poly128");
23720 /* Opaque integer types for structures of vectors. */
23721 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23722 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23723 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23724 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23726 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23727 "__builtin_neon_ti");
23728 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23729 "__builtin_neon_ei");
23730 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23731 "__builtin_neon_oi");
23732 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23733 "__builtin_neon_ci");
23734 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23735 "__builtin_neon_xi");
23737 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23740 tree V16UQI_type_node =
23741 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23743 tree v16uqi_ftype_v16uqi
23744 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23746 tree v16uqi_ftype_v16uqi_v16uqi
23747 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23748 V16UQI_type_node, NULL_TREE);
23750 tree v4usi_ftype_v4usi
23751 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23753 tree v4usi_ftype_v4usi_v4usi
23754 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23755 V4USI_type_node, NULL_TREE);
23757 tree v4usi_ftype_v4usi_v4usi_v4usi
23758 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23759 V4USI_type_node, V4USI_type_node, NULL_TREE);
23761 tree uti_ftype_udi_udi
23762 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23763 intUDI_type_node, NULL_TREE);
23765 #undef CRYPTO1
23766 #undef CRYPTO2
23767 #undef CRYPTO3
23768 #undef C
23769 #undef N
23770 #undef CF
23771 #undef FT1
23772 #undef FT2
23773 #undef FT3
23775 #define C(U) \
23776 ARM_BUILTIN_CRYPTO_##U
23777 #define N(L) \
23778 "__builtin_arm_crypto_"#L
23779 #define FT1(R, A) \
23780 R##_ftype_##A
23781 #define FT2(R, A1, A2) \
23782 R##_ftype_##A1##_##A2
23783 #define FT3(R, A1, A2, A3) \
23784 R##_ftype_##A1##_##A2##_##A3
23785 #define CRYPTO1(L, U, R, A) \
23786 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23787 C (U), BUILT_IN_MD, \
23788 NULL, NULL_TREE);
23789 #define CRYPTO2(L, U, R, A1, A2) \
23790 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23791 C (U), BUILT_IN_MD, \
23792 NULL, NULL_TREE);
23794 #define CRYPTO3(L, U, R, A1, A2, A3) \
23795 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23796 C (U), BUILT_IN_MD, \
23797 NULL, NULL_TREE);
23798 #include "crypto.def"
23800 #undef CRYPTO1
23801 #undef CRYPTO2
23802 #undef CRYPTO3
23803 #undef C
23804 #undef N
23805 #undef FT1
23806 #undef FT2
23807 #undef FT3
23809 dreg_types[0] = V8QI_type_node;
23810 dreg_types[1] = V4HI_type_node;
23811 dreg_types[2] = V2SI_type_node;
23812 dreg_types[3] = V2SF_type_node;
23813 dreg_types[4] = neon_intDI_type_node;
23815 qreg_types[0] = V16QI_type_node;
23816 qreg_types[1] = V8HI_type_node;
23817 qreg_types[2] = V4SI_type_node;
23818 qreg_types[3] = V4SF_type_node;
23819 qreg_types[4] = V2DI_type_node;
23820 qreg_types[5] = neon_intUTI_type_node;
23822 for (i = 0; i < NUM_QREG_TYPES; i++)
23824 int j;
23825 for (j = 0; j < NUM_QREG_TYPES; j++)
23827 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23828 reinterp_ftype_dreg[i][j]
23829 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23831 reinterp_ftype_qreg[i][j]
23832 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23836 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23837 i < ARRAY_SIZE (neon_builtin_data);
23838 i++, fcode++)
23840 neon_builtin_datum *d = &neon_builtin_data[i];
23842 const char* const modenames[] = {
23843 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23844 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23845 "ti", "ei", "oi"
23847 char namebuf[60];
23848 tree ftype = NULL;
23849 int is_load = 0, is_store = 0;
23851 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23853 d->fcode = fcode;
23855 switch (d->itype)
23857 case NEON_LOAD1:
23858 case NEON_LOAD1LANE:
23859 case NEON_LOADSTRUCT:
23860 case NEON_LOADSTRUCTLANE:
23861 is_load = 1;
23862 /* Fall through. */
23863 case NEON_STORE1:
23864 case NEON_STORE1LANE:
23865 case NEON_STORESTRUCT:
23866 case NEON_STORESTRUCTLANE:
23867 if (!is_load)
23868 is_store = 1;
23869 /* Fall through. */
23870 case NEON_UNOP:
23871 case NEON_RINT:
23872 case NEON_BINOP:
23873 case NEON_LOGICBINOP:
23874 case NEON_SHIFTINSERT:
23875 case NEON_TERNOP:
23876 case NEON_GETLANE:
23877 case NEON_SETLANE:
23878 case NEON_CREATE:
23879 case NEON_DUP:
23880 case NEON_DUPLANE:
23881 case NEON_SHIFTIMM:
23882 case NEON_SHIFTACC:
23883 case NEON_COMBINE:
23884 case NEON_SPLIT:
23885 case NEON_CONVERT:
23886 case NEON_FIXCONV:
23887 case NEON_LANEMUL:
23888 case NEON_LANEMULL:
23889 case NEON_LANEMULH:
23890 case NEON_LANEMAC:
23891 case NEON_SCALARMUL:
23892 case NEON_SCALARMULL:
23893 case NEON_SCALARMULH:
23894 case NEON_SCALARMAC:
23895 case NEON_SELECT:
23896 case NEON_VTBL:
23897 case NEON_VTBX:
23899 int k;
23900 tree return_type = void_type_node, args = void_list_node;
23902 /* Build a function type directly from the insn_data for
23903 this builtin. The build_function_type() function takes
23904 care of removing duplicates for us. */
23905 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23907 tree eltype;
23909 if (is_load && k == 1)
23911 /* Neon load patterns always have the memory
23912 operand in the operand 1 position. */
23913 gcc_assert (insn_data[d->code].operand[k].predicate
23914 == neon_struct_operand);
23916 switch (d->mode)
23918 case T_V8QI:
23919 case T_V16QI:
23920 eltype = const_intQI_pointer_node;
23921 break;
23923 case T_V4HI:
23924 case T_V8HI:
23925 eltype = const_intHI_pointer_node;
23926 break;
23928 case T_V2SI:
23929 case T_V4SI:
23930 eltype = const_intSI_pointer_node;
23931 break;
23933 case T_V2SF:
23934 case T_V4SF:
23935 eltype = const_float_pointer_node;
23936 break;
23938 case T_DI:
23939 case T_V2DI:
23940 eltype = const_intDI_pointer_node;
23941 break;
23943 default: gcc_unreachable ();
23946 else if (is_store && k == 0)
23948 /* Similarly, Neon store patterns use operand 0 as
23949 the memory location to store to. */
23950 gcc_assert (insn_data[d->code].operand[k].predicate
23951 == neon_struct_operand);
23953 switch (d->mode)
23955 case T_V8QI:
23956 case T_V16QI:
23957 eltype = intQI_pointer_node;
23958 break;
23960 case T_V4HI:
23961 case T_V8HI:
23962 eltype = intHI_pointer_node;
23963 break;
23965 case T_V2SI:
23966 case T_V4SI:
23967 eltype = intSI_pointer_node;
23968 break;
23970 case T_V2SF:
23971 case T_V4SF:
23972 eltype = float_pointer_node;
23973 break;
23975 case T_DI:
23976 case T_V2DI:
23977 eltype = intDI_pointer_node;
23978 break;
23980 default: gcc_unreachable ();
23983 else
23985 switch (insn_data[d->code].operand[k].mode)
23987 case VOIDmode: eltype = void_type_node; break;
23988 /* Scalars. */
23989 case QImode: eltype = neon_intQI_type_node; break;
23990 case HImode: eltype = neon_intHI_type_node; break;
23991 case SImode: eltype = neon_intSI_type_node; break;
23992 case SFmode: eltype = neon_float_type_node; break;
23993 case DImode: eltype = neon_intDI_type_node; break;
23994 case TImode: eltype = intTI_type_node; break;
23995 case EImode: eltype = intEI_type_node; break;
23996 case OImode: eltype = intOI_type_node; break;
23997 case CImode: eltype = intCI_type_node; break;
23998 case XImode: eltype = intXI_type_node; break;
23999 /* 64-bit vectors. */
24000 case V8QImode: eltype = V8QI_type_node; break;
24001 case V4HImode: eltype = V4HI_type_node; break;
24002 case V2SImode: eltype = V2SI_type_node; break;
24003 case V2SFmode: eltype = V2SF_type_node; break;
24004 /* 128-bit vectors. */
24005 case V16QImode: eltype = V16QI_type_node; break;
24006 case V8HImode: eltype = V8HI_type_node; break;
24007 case V4SImode: eltype = V4SI_type_node; break;
24008 case V4SFmode: eltype = V4SF_type_node; break;
24009 case V2DImode: eltype = V2DI_type_node; break;
24010 default: gcc_unreachable ();
24014 if (k == 0 && !is_store)
24015 return_type = eltype;
24016 else
24017 args = tree_cons (NULL_TREE, eltype, args);
24020 ftype = build_function_type (return_type, args);
24022 break;
24024 case NEON_REINTERP:
24026 /* We iterate over NUM_DREG_TYPES doubleword types,
24027 then NUM_QREG_TYPES quadword types.
24028 V4HF is not a type used in reinterpret, so we translate
24029 d->mode to the correct index in reinterp_ftype_dreg. */
24030 bool qreg_p
24031 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24032 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24033 % NUM_QREG_TYPES;
24034 switch (insn_data[d->code].operand[0].mode)
24036 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24037 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24038 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24039 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24040 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24041 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24042 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24043 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24044 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24045 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24046 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24047 default: gcc_unreachable ();
24050 break;
24051 case NEON_FLOAT_WIDEN:
24053 tree eltype = NULL_TREE;
24054 tree return_type = NULL_TREE;
24056 switch (insn_data[d->code].operand[1].mode)
24058 case V4HFmode:
24059 eltype = V4HF_type_node;
24060 return_type = V4SF_type_node;
24061 break;
24062 default: gcc_unreachable ();
24064 ftype = build_function_type_list (return_type, eltype, NULL);
24065 break;
24067 case NEON_FLOAT_NARROW:
24069 tree eltype = NULL_TREE;
24070 tree return_type = NULL_TREE;
24072 switch (insn_data[d->code].operand[1].mode)
24074 case V4SFmode:
24075 eltype = V4SF_type_node;
24076 return_type = V4HF_type_node;
24077 break;
24078 default: gcc_unreachable ();
24080 ftype = build_function_type_list (return_type, eltype, NULL);
24081 break;
24083 case NEON_BSWAP:
24085 tree eltype = NULL_TREE;
24086 switch (insn_data[d->code].operand[1].mode)
24088 case V4HImode:
24089 eltype = V4UHI_type_node;
24090 break;
24091 case V8HImode:
24092 eltype = V8UHI_type_node;
24093 break;
24094 case V2SImode:
24095 eltype = V2USI_type_node;
24096 break;
24097 case V4SImode:
24098 eltype = V4USI_type_node;
24099 break;
24100 case V2DImode:
24101 eltype = V2UDI_type_node;
24102 break;
24103 default: gcc_unreachable ();
24105 ftype = build_function_type_list (eltype, eltype, NULL);
24106 break;
24108 default:
24109 gcc_unreachable ();
24112 gcc_assert (ftype != NULL);
24114 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24116 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24117 NULL_TREE);
24118 arm_builtin_decls[fcode] = decl;
24122 #undef NUM_DREG_TYPES
24123 #undef NUM_QREG_TYPES
24125 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24126 do \
24128 if ((MASK) & insn_flags) \
24130 tree bdecl; \
24131 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24132 BUILT_IN_MD, NULL, NULL_TREE); \
24133 arm_builtin_decls[CODE] = bdecl; \
24136 while (0)
24138 struct builtin_description
24140 const unsigned int mask;
24141 const enum insn_code icode;
24142 const char * const name;
24143 const enum arm_builtins code;
24144 const enum rtx_code comparison;
24145 const unsigned int flag;
24148 static const struct builtin_description bdesc_2arg[] =
24150 #define IWMMXT_BUILTIN(code, string, builtin) \
24151 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24152 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24154 #define IWMMXT2_BUILTIN(code, string, builtin) \
24155 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24156 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24158 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24159 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24160 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24161 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24162 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24163 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24164 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24165 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24166 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24167 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24168 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24169 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24170 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24171 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24172 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24173 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24174 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24175 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24176 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24177 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24178 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24179 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24180 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24181 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24182 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24183 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24184 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24185 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24186 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24187 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24188 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24189 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24190 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24191 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24192 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24193 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24194 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24195 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24196 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24197 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24198 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24199 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24200 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24201 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24202 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24203 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24204 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24205 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24206 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24207 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24208 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24209 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24210 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24211 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24212 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24213 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24214 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24215 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24216 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24217 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24218 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24219 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24220 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24221 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24222 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24223 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24224 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24225 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24226 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24227 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24228 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24229 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24230 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24231 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24232 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24233 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24234 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24235 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24237 #define IWMMXT_BUILTIN2(code, builtin) \
24238 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24240 #define IWMMXT2_BUILTIN2(code, builtin) \
24241 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24243 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24244 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24245 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24246 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24247 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24248 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24249 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24250 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24251 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24252 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24255 #define FP_BUILTIN(L, U) \
24256 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24257 UNKNOWN, 0},
24259 FP_BUILTIN (set_fpscr, GET_FPSCR)
24260 FP_BUILTIN (get_fpscr, SET_FPSCR)
24261 #undef FP_BUILTIN
24263 #define CRC32_BUILTIN(L, U) \
24264 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24265 UNKNOWN, 0},
24266 CRC32_BUILTIN (crc32b, CRC32B)
24267 CRC32_BUILTIN (crc32h, CRC32H)
24268 CRC32_BUILTIN (crc32w, CRC32W)
24269 CRC32_BUILTIN (crc32cb, CRC32CB)
24270 CRC32_BUILTIN (crc32ch, CRC32CH)
24271 CRC32_BUILTIN (crc32cw, CRC32CW)
24272 #undef CRC32_BUILTIN
24275 #define CRYPTO_BUILTIN(L, U) \
24276 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24277 UNKNOWN, 0},
24278 #undef CRYPTO1
24279 #undef CRYPTO2
24280 #undef CRYPTO3
24281 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24282 #define CRYPTO1(L, U, R, A)
24283 #define CRYPTO3(L, U, R, A1, A2, A3)
24284 #include "crypto.def"
24285 #undef CRYPTO1
24286 #undef CRYPTO2
24287 #undef CRYPTO3
24291 static const struct builtin_description bdesc_1arg[] =
24293 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24294 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24295 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24296 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24297 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24298 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24299 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24300 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24301 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24302 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24303 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24304 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24305 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24306 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24307 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24308 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24309 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24310 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24311 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24312 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24313 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24314 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24315 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24316 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24318 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24319 #define CRYPTO2(L, U, R, A1, A2)
24320 #define CRYPTO3(L, U, R, A1, A2, A3)
24321 #include "crypto.def"
24322 #undef CRYPTO1
24323 #undef CRYPTO2
24324 #undef CRYPTO3
24327 static const struct builtin_description bdesc_3arg[] =
24329 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24330 #define CRYPTO1(L, U, R, A)
24331 #define CRYPTO2(L, U, R, A1, A2)
24332 #include "crypto.def"
24333 #undef CRYPTO1
24334 #undef CRYPTO2
24335 #undef CRYPTO3
24337 #undef CRYPTO_BUILTIN
24339 /* Set up all the iWMMXt builtins. This is not called if
24340 TARGET_IWMMXT is zero. */
24342 static void
24343 arm_init_iwmmxt_builtins (void)
24345 const struct builtin_description * d;
24346 size_t i;
24348 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24349 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24350 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24352 tree v8qi_ftype_v8qi_v8qi_int
24353 = build_function_type_list (V8QI_type_node,
24354 V8QI_type_node, V8QI_type_node,
24355 integer_type_node, NULL_TREE);
24356 tree v4hi_ftype_v4hi_int
24357 = build_function_type_list (V4HI_type_node,
24358 V4HI_type_node, integer_type_node, NULL_TREE);
24359 tree v2si_ftype_v2si_int
24360 = build_function_type_list (V2SI_type_node,
24361 V2SI_type_node, integer_type_node, NULL_TREE);
24362 tree v2si_ftype_di_di
24363 = build_function_type_list (V2SI_type_node,
24364 long_long_integer_type_node,
24365 long_long_integer_type_node,
24366 NULL_TREE);
24367 tree di_ftype_di_int
24368 = build_function_type_list (long_long_integer_type_node,
24369 long_long_integer_type_node,
24370 integer_type_node, NULL_TREE);
24371 tree di_ftype_di_int_int
24372 = build_function_type_list (long_long_integer_type_node,
24373 long_long_integer_type_node,
24374 integer_type_node,
24375 integer_type_node, NULL_TREE);
24376 tree int_ftype_v8qi
24377 = build_function_type_list (integer_type_node,
24378 V8QI_type_node, NULL_TREE);
24379 tree int_ftype_v4hi
24380 = build_function_type_list (integer_type_node,
24381 V4HI_type_node, NULL_TREE);
24382 tree int_ftype_v2si
24383 = build_function_type_list (integer_type_node,
24384 V2SI_type_node, NULL_TREE);
24385 tree int_ftype_v8qi_int
24386 = build_function_type_list (integer_type_node,
24387 V8QI_type_node, integer_type_node, NULL_TREE);
24388 tree int_ftype_v4hi_int
24389 = build_function_type_list (integer_type_node,
24390 V4HI_type_node, integer_type_node, NULL_TREE);
24391 tree int_ftype_v2si_int
24392 = build_function_type_list (integer_type_node,
24393 V2SI_type_node, integer_type_node, NULL_TREE);
24394 tree v8qi_ftype_v8qi_int_int
24395 = build_function_type_list (V8QI_type_node,
24396 V8QI_type_node, integer_type_node,
24397 integer_type_node, NULL_TREE);
24398 tree v4hi_ftype_v4hi_int_int
24399 = build_function_type_list (V4HI_type_node,
24400 V4HI_type_node, integer_type_node,
24401 integer_type_node, NULL_TREE);
24402 tree v2si_ftype_v2si_int_int
24403 = build_function_type_list (V2SI_type_node,
24404 V2SI_type_node, integer_type_node,
24405 integer_type_node, NULL_TREE);
24406 /* Miscellaneous. */
24407 tree v8qi_ftype_v4hi_v4hi
24408 = build_function_type_list (V8QI_type_node,
24409 V4HI_type_node, V4HI_type_node, NULL_TREE);
24410 tree v4hi_ftype_v2si_v2si
24411 = build_function_type_list (V4HI_type_node,
24412 V2SI_type_node, V2SI_type_node, NULL_TREE);
24413 tree v8qi_ftype_v4hi_v8qi
24414 = build_function_type_list (V8QI_type_node,
24415 V4HI_type_node, V8QI_type_node, NULL_TREE);
24416 tree v2si_ftype_v4hi_v4hi
24417 = build_function_type_list (V2SI_type_node,
24418 V4HI_type_node, V4HI_type_node, NULL_TREE);
24419 tree v2si_ftype_v8qi_v8qi
24420 = build_function_type_list (V2SI_type_node,
24421 V8QI_type_node, V8QI_type_node, NULL_TREE);
24422 tree v4hi_ftype_v4hi_di
24423 = build_function_type_list (V4HI_type_node,
24424 V4HI_type_node, long_long_integer_type_node,
24425 NULL_TREE);
24426 tree v2si_ftype_v2si_di
24427 = build_function_type_list (V2SI_type_node,
24428 V2SI_type_node, long_long_integer_type_node,
24429 NULL_TREE);
24430 tree di_ftype_void
24431 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24432 tree int_ftype_void
24433 = build_function_type_list (integer_type_node, NULL_TREE);
24434 tree di_ftype_v8qi
24435 = build_function_type_list (long_long_integer_type_node,
24436 V8QI_type_node, NULL_TREE);
24437 tree di_ftype_v4hi
24438 = build_function_type_list (long_long_integer_type_node,
24439 V4HI_type_node, NULL_TREE);
24440 tree di_ftype_v2si
24441 = build_function_type_list (long_long_integer_type_node,
24442 V2SI_type_node, NULL_TREE);
24443 tree v2si_ftype_v4hi
24444 = build_function_type_list (V2SI_type_node,
24445 V4HI_type_node, NULL_TREE);
24446 tree v4hi_ftype_v8qi
24447 = build_function_type_list (V4HI_type_node,
24448 V8QI_type_node, NULL_TREE);
24449 tree v8qi_ftype_v8qi
24450 = build_function_type_list (V8QI_type_node,
24451 V8QI_type_node, NULL_TREE);
24452 tree v4hi_ftype_v4hi
24453 = build_function_type_list (V4HI_type_node,
24454 V4HI_type_node, NULL_TREE);
24455 tree v2si_ftype_v2si
24456 = build_function_type_list (V2SI_type_node,
24457 V2SI_type_node, NULL_TREE);
24459 tree di_ftype_di_v4hi_v4hi
24460 = build_function_type_list (long_long_unsigned_type_node,
24461 long_long_unsigned_type_node,
24462 V4HI_type_node, V4HI_type_node,
24463 NULL_TREE);
24465 tree di_ftype_v4hi_v4hi
24466 = build_function_type_list (long_long_unsigned_type_node,
24467 V4HI_type_node,V4HI_type_node,
24468 NULL_TREE);
24470 tree v2si_ftype_v2si_v4hi_v4hi
24471 = build_function_type_list (V2SI_type_node,
24472 V2SI_type_node, V4HI_type_node,
24473 V4HI_type_node, NULL_TREE);
24475 tree v2si_ftype_v2si_v8qi_v8qi
24476 = build_function_type_list (V2SI_type_node,
24477 V2SI_type_node, V8QI_type_node,
24478 V8QI_type_node, NULL_TREE);
24480 tree di_ftype_di_v2si_v2si
24481 = build_function_type_list (long_long_unsigned_type_node,
24482 long_long_unsigned_type_node,
24483 V2SI_type_node, V2SI_type_node,
24484 NULL_TREE);
24486 tree di_ftype_di_di_int
24487 = build_function_type_list (long_long_unsigned_type_node,
24488 long_long_unsigned_type_node,
24489 long_long_unsigned_type_node,
24490 integer_type_node, NULL_TREE);
24492 tree void_ftype_int
24493 = build_function_type_list (void_type_node,
24494 integer_type_node, NULL_TREE);
24496 tree v8qi_ftype_char
24497 = build_function_type_list (V8QI_type_node,
24498 signed_char_type_node, NULL_TREE);
24500 tree v4hi_ftype_short
24501 = build_function_type_list (V4HI_type_node,
24502 short_integer_type_node, NULL_TREE);
24504 tree v2si_ftype_int
24505 = build_function_type_list (V2SI_type_node,
24506 integer_type_node, NULL_TREE);
24508 /* Normal vector binops. */
24509 tree v8qi_ftype_v8qi_v8qi
24510 = build_function_type_list (V8QI_type_node,
24511 V8QI_type_node, V8QI_type_node, NULL_TREE);
24512 tree v4hi_ftype_v4hi_v4hi
24513 = build_function_type_list (V4HI_type_node,
24514 V4HI_type_node,V4HI_type_node, NULL_TREE);
24515 tree v2si_ftype_v2si_v2si
24516 = build_function_type_list (V2SI_type_node,
24517 V2SI_type_node, V2SI_type_node, NULL_TREE);
24518 tree di_ftype_di_di
24519 = build_function_type_list (long_long_unsigned_type_node,
24520 long_long_unsigned_type_node,
24521 long_long_unsigned_type_node,
24522 NULL_TREE);
24524 /* Add all builtins that are more or less simple operations on two
24525 operands. */
24526 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24528 /* Use one of the operands; the target can have a different mode for
24529 mask-generating compares. */
24530 enum machine_mode mode;
24531 tree type;
24533 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24534 continue;
24536 mode = insn_data[d->icode].operand[1].mode;
24538 switch (mode)
24540 case V8QImode:
24541 type = v8qi_ftype_v8qi_v8qi;
24542 break;
24543 case V4HImode:
24544 type = v4hi_ftype_v4hi_v4hi;
24545 break;
24546 case V2SImode:
24547 type = v2si_ftype_v2si_v2si;
24548 break;
24549 case DImode:
24550 type = di_ftype_di_di;
24551 break;
24553 default:
24554 gcc_unreachable ();
24557 def_mbuiltin (d->mask, d->name, type, d->code);
24560 /* Add the remaining MMX insns with somewhat more complicated types. */
24561 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24562 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24563 ARM_BUILTIN_ ## CODE)
24565 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24566 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24567 ARM_BUILTIN_ ## CODE)
24569 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24570 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24571 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24572 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24573 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24574 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24575 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24576 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24577 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24579 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24580 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24581 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24582 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24583 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24584 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24586 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24587 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24588 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24589 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24590 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24591 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24593 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24594 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24595 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24596 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24597 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24598 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24600 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24601 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24602 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24603 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24604 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24605 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24607 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24609 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24610 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24611 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24612 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24613 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24614 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24615 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24616 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24617 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24618 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24620 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24621 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24622 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24623 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24624 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24625 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24626 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24627 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24628 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24630 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24631 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24632 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24634 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24635 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24636 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24638 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24639 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24641 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24642 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24643 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24644 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24645 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24646 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24648 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24649 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24650 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24651 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24652 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24653 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24654 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24655 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24656 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24657 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24658 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24659 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24661 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24662 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24663 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24664 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24666 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24667 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24668 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24669 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24670 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24671 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24672 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24674 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24675 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24676 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24678 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24679 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24680 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24681 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24683 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24684 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24685 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24686 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24688 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24689 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24690 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24691 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24693 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24694 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24695 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24696 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24698 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24699 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24700 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24701 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24703 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24704 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24705 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24706 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24708 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24710 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24711 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24712 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24714 #undef iwmmx_mbuiltin
24715 #undef iwmmx2_mbuiltin
24718 static void
24719 arm_init_fp16_builtins (void)
24721 tree fp16_type = make_node (REAL_TYPE);
24722 TYPE_PRECISION (fp16_type) = 16;
24723 layout_type (fp16_type);
24724 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24727 static void
24728 arm_init_crc32_builtins ()
24730 tree si_ftype_si_qi
24731 = build_function_type_list (unsigned_intSI_type_node,
24732 unsigned_intSI_type_node,
24733 unsigned_intQI_type_node, NULL_TREE);
24734 tree si_ftype_si_hi
24735 = build_function_type_list (unsigned_intSI_type_node,
24736 unsigned_intSI_type_node,
24737 unsigned_intHI_type_node, NULL_TREE);
24738 tree si_ftype_si_si
24739 = build_function_type_list (unsigned_intSI_type_node,
24740 unsigned_intSI_type_node,
24741 unsigned_intSI_type_node, NULL_TREE);
24743 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24744 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24745 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24746 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24747 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24748 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24749 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24750 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24751 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24752 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24753 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24754 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24755 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24756 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24757 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24758 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24759 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24760 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24763 static void
24764 arm_init_builtins (void)
24766 if (TARGET_REALLY_IWMMXT)
24767 arm_init_iwmmxt_builtins ();
24769 if (TARGET_NEON)
24770 arm_init_neon_builtins ();
24772 if (arm_fp16_format)
24773 arm_init_fp16_builtins ();
24775 if (TARGET_CRC32)
24776 arm_init_crc32_builtins ();
24778 if (TARGET_VFP && TARGET_HARD_FLOAT)
24780 tree ftype_set_fpscr
24781 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24782 tree ftype_get_fpscr
24783 = build_function_type_list (unsigned_type_node, NULL);
24785 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24786 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24787 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24788 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24789 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24790 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24794 /* Return the ARM builtin for CODE. */
24796 static tree
24797 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24799 if (code >= ARM_BUILTIN_MAX)
24800 return error_mark_node;
24802 return arm_builtin_decls[code];
24805 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24807 static const char *
24808 arm_invalid_parameter_type (const_tree t)
24810 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24811 return N_("function parameters cannot have __fp16 type");
24812 return NULL;
24815 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24817 static const char *
24818 arm_invalid_return_type (const_tree t)
24820 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24821 return N_("functions cannot return __fp16 type");
24822 return NULL;
24825 /* Implement TARGET_PROMOTED_TYPE. */
24827 static tree
24828 arm_promoted_type (const_tree t)
24830 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24831 return float_type_node;
24832 return NULL_TREE;
24835 /* Implement TARGET_CONVERT_TO_TYPE.
24836 Specifically, this hook implements the peculiarity of the ARM
24837 half-precision floating-point C semantics that requires conversions between
24838 __fp16 to or from double to do an intermediate conversion to float. */
24840 static tree
24841 arm_convert_to_type (tree type, tree expr)
24843 tree fromtype = TREE_TYPE (expr);
24844 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24845 return NULL_TREE;
24846 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24847 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24848 return convert (type, convert (float_type_node, expr));
24849 return NULL_TREE;
24852 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24853 This simply adds HFmode as a supported mode; even though we don't
24854 implement arithmetic on this type directly, it's supported by
24855 optabs conversions, much the way the double-word arithmetic is
24856 special-cased in the default hook. */
24858 static bool
24859 arm_scalar_mode_supported_p (enum machine_mode mode)
24861 if (mode == HFmode)
24862 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24863 else if (ALL_FIXED_POINT_MODE_P (mode))
24864 return true;
24865 else
24866 return default_scalar_mode_supported_p (mode);
24869 /* Errors in the source file can cause expand_expr to return const0_rtx
24870 where we expect a vector. To avoid crashing, use one of the vector
24871 clear instructions. */
24873 static rtx
24874 safe_vector_operand (rtx x, enum machine_mode mode)
24876 if (x != const0_rtx)
24877 return x;
24878 x = gen_reg_rtx (mode);
24880 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24881 : gen_rtx_SUBREG (DImode, x, 0)));
24882 return x;
24885 /* Function to expand ternary builtins. */
24886 static rtx
24887 arm_expand_ternop_builtin (enum insn_code icode,
24888 tree exp, rtx target)
24890 rtx pat;
24891 tree arg0 = CALL_EXPR_ARG (exp, 0);
24892 tree arg1 = CALL_EXPR_ARG (exp, 1);
24893 tree arg2 = CALL_EXPR_ARG (exp, 2);
24895 rtx op0 = expand_normal (arg0);
24896 rtx op1 = expand_normal (arg1);
24897 rtx op2 = expand_normal (arg2);
24898 rtx op3 = NULL_RTX;
24900 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24901 lane operand depending on endianness. */
24902 bool builtin_sha1cpm_p = false;
24904 if (insn_data[icode].n_operands == 5)
24906 gcc_assert (icode == CODE_FOR_crypto_sha1c
24907 || icode == CODE_FOR_crypto_sha1p
24908 || icode == CODE_FOR_crypto_sha1m);
24909 builtin_sha1cpm_p = true;
24911 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24912 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24913 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24914 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24917 if (VECTOR_MODE_P (mode0))
24918 op0 = safe_vector_operand (op0, mode0);
24919 if (VECTOR_MODE_P (mode1))
24920 op1 = safe_vector_operand (op1, mode1);
24921 if (VECTOR_MODE_P (mode2))
24922 op2 = safe_vector_operand (op2, mode2);
24924 if (! target
24925 || GET_MODE (target) != tmode
24926 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24927 target = gen_reg_rtx (tmode);
24929 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24930 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24931 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24933 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24934 op0 = copy_to_mode_reg (mode0, op0);
24935 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24936 op1 = copy_to_mode_reg (mode1, op1);
24937 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24938 op2 = copy_to_mode_reg (mode2, op2);
24939 if (builtin_sha1cpm_p)
24940 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24942 if (builtin_sha1cpm_p)
24943 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24944 else
24945 pat = GEN_FCN (icode) (target, op0, op1, op2);
24946 if (! pat)
24947 return 0;
24948 emit_insn (pat);
24949 return target;
24952 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24954 static rtx
24955 arm_expand_binop_builtin (enum insn_code icode,
24956 tree exp, rtx target)
24958 rtx pat;
24959 tree arg0 = CALL_EXPR_ARG (exp, 0);
24960 tree arg1 = CALL_EXPR_ARG (exp, 1);
24961 rtx op0 = expand_normal (arg0);
24962 rtx op1 = expand_normal (arg1);
24963 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24964 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24965 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24967 if (VECTOR_MODE_P (mode0))
24968 op0 = safe_vector_operand (op0, mode0);
24969 if (VECTOR_MODE_P (mode1))
24970 op1 = safe_vector_operand (op1, mode1);
24972 if (! target
24973 || GET_MODE (target) != tmode
24974 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24975 target = gen_reg_rtx (tmode);
24977 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24978 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24980 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24981 op0 = copy_to_mode_reg (mode0, op0);
24982 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24983 op1 = copy_to_mode_reg (mode1, op1);
24985 pat = GEN_FCN (icode) (target, op0, op1);
24986 if (! pat)
24987 return 0;
24988 emit_insn (pat);
24989 return target;
24992 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24994 static rtx
24995 arm_expand_unop_builtin (enum insn_code icode,
24996 tree exp, rtx target, int do_load)
24998 rtx pat;
24999 tree arg0 = CALL_EXPR_ARG (exp, 0);
25000 rtx op0 = expand_normal (arg0);
25001 rtx op1 = NULL_RTX;
25002 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25003 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25004 bool builtin_sha1h_p = false;
25006 if (insn_data[icode].n_operands == 3)
25008 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25009 builtin_sha1h_p = true;
25012 if (! target
25013 || GET_MODE (target) != tmode
25014 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25015 target = gen_reg_rtx (tmode);
25016 if (do_load)
25017 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25018 else
25020 if (VECTOR_MODE_P (mode0))
25021 op0 = safe_vector_operand (op0, mode0);
25023 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25024 op0 = copy_to_mode_reg (mode0, op0);
25026 if (builtin_sha1h_p)
25027 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25029 if (builtin_sha1h_p)
25030 pat = GEN_FCN (icode) (target, op0, op1);
25031 else
25032 pat = GEN_FCN (icode) (target, op0);
25033 if (! pat)
25034 return 0;
25035 emit_insn (pat);
25036 return target;
25039 typedef enum {
25040 NEON_ARG_COPY_TO_REG,
25041 NEON_ARG_CONSTANT,
25042 NEON_ARG_MEMORY,
25043 NEON_ARG_STOP
25044 } builtin_arg;
25046 #define NEON_MAX_BUILTIN_ARGS 5
25048 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25049 and return an expression for the accessed memory.
25051 The intrinsic function operates on a block of registers that has
25052 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25053 function references the memory at EXP of type TYPE and in mode
25054 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25055 available. */
25057 static tree
25058 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25059 enum machine_mode reg_mode,
25060 neon_builtin_type_mode type_mode)
25062 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25063 tree elem_type, upper_bound, array_type;
25065 /* Work out the size of the register block in bytes. */
25066 reg_size = GET_MODE_SIZE (reg_mode);
25068 /* Work out the size of each vector in bytes. */
25069 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25070 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25072 /* Work out how many vectors there are. */
25073 gcc_assert (reg_size % vector_size == 0);
25074 nvectors = reg_size / vector_size;
25076 /* Work out the type of each element. */
25077 gcc_assert (POINTER_TYPE_P (type));
25078 elem_type = TREE_TYPE (type);
25080 /* Work out how many elements are being loaded or stored.
25081 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25082 and memory elements; anything else implies a lane load or store. */
25083 if (mem_mode == reg_mode)
25084 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25085 else
25086 nelems = nvectors;
25088 /* Create a type that describes the full access. */
25089 upper_bound = build_int_cst (size_type_node, nelems - 1);
25090 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25092 /* Dereference EXP using that type. */
25093 return fold_build2 (MEM_REF, array_type, exp,
25094 build_int_cst (build_pointer_type (array_type), 0));
25097 /* Expand a Neon builtin. */
25098 static rtx
25099 arm_expand_neon_args (rtx target, int icode, int have_retval,
25100 neon_builtin_type_mode type_mode,
25101 tree exp, int fcode, ...)
25103 va_list ap;
25104 rtx pat;
25105 tree arg[NEON_MAX_BUILTIN_ARGS];
25106 rtx op[NEON_MAX_BUILTIN_ARGS];
25107 tree arg_type;
25108 tree formals;
25109 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25110 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25111 enum machine_mode other_mode;
25112 int argc = 0;
25113 int opno;
25115 if (have_retval
25116 && (!target
25117 || GET_MODE (target) != tmode
25118 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25119 target = gen_reg_rtx (tmode);
25121 va_start (ap, fcode);
25123 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25125 for (;;)
25127 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25129 if (thisarg == NEON_ARG_STOP)
25130 break;
25131 else
25133 opno = argc + have_retval;
25134 mode[argc] = insn_data[icode].operand[opno].mode;
25135 arg[argc] = CALL_EXPR_ARG (exp, argc);
25136 arg_type = TREE_VALUE (formals);
25137 if (thisarg == NEON_ARG_MEMORY)
25139 other_mode = insn_data[icode].operand[1 - opno].mode;
25140 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25141 mode[argc], other_mode,
25142 type_mode);
25145 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25146 be returned. */
25147 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25148 (thisarg == NEON_ARG_MEMORY
25149 ? EXPAND_MEMORY : EXPAND_NORMAL));
25151 switch (thisarg)
25153 case NEON_ARG_COPY_TO_REG:
25154 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25155 if (!(*insn_data[icode].operand[opno].predicate)
25156 (op[argc], mode[argc]))
25157 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25158 break;
25160 case NEON_ARG_CONSTANT:
25161 /* FIXME: This error message is somewhat unhelpful. */
25162 if (!(*insn_data[icode].operand[opno].predicate)
25163 (op[argc], mode[argc]))
25164 error ("argument must be a constant");
25165 break;
25167 case NEON_ARG_MEMORY:
25168 /* Check if expand failed. */
25169 if (op[argc] == const0_rtx)
25170 return 0;
25171 gcc_assert (MEM_P (op[argc]));
25172 PUT_MODE (op[argc], mode[argc]);
25173 /* ??? arm_neon.h uses the same built-in functions for signed
25174 and unsigned accesses, casting where necessary. This isn't
25175 alias safe. */
25176 set_mem_alias_set (op[argc], 0);
25177 if (!(*insn_data[icode].operand[opno].predicate)
25178 (op[argc], mode[argc]))
25179 op[argc] = (replace_equiv_address
25180 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25181 break;
25183 case NEON_ARG_STOP:
25184 gcc_unreachable ();
25187 argc++;
25188 formals = TREE_CHAIN (formals);
25192 va_end (ap);
25194 if (have_retval)
25195 switch (argc)
25197 case 1:
25198 pat = GEN_FCN (icode) (target, op[0]);
25199 break;
25201 case 2:
25202 pat = GEN_FCN (icode) (target, op[0], op[1]);
25203 break;
25205 case 3:
25206 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25207 break;
25209 case 4:
25210 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25211 break;
25213 case 5:
25214 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25215 break;
25217 default:
25218 gcc_unreachable ();
25220 else
25221 switch (argc)
25223 case 1:
25224 pat = GEN_FCN (icode) (op[0]);
25225 break;
25227 case 2:
25228 pat = GEN_FCN (icode) (op[0], op[1]);
25229 break;
25231 case 3:
25232 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25233 break;
25235 case 4:
25236 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25237 break;
25239 case 5:
25240 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25241 break;
25243 default:
25244 gcc_unreachable ();
25247 if (!pat)
25248 return 0;
25250 emit_insn (pat);
25252 return target;
25255 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25256 constants defined per-instruction or per instruction-variant. Instead, the
25257 required info is looked up in the table neon_builtin_data. */
25258 static rtx
25259 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25261 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25262 neon_itype itype = d->itype;
25263 enum insn_code icode = d->code;
25264 neon_builtin_type_mode type_mode = d->mode;
25266 switch (itype)
25268 case NEON_UNOP:
25269 case NEON_CONVERT:
25270 case NEON_DUPLANE:
25271 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25272 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25274 case NEON_BINOP:
25275 case NEON_SETLANE:
25276 case NEON_SCALARMUL:
25277 case NEON_SCALARMULL:
25278 case NEON_SCALARMULH:
25279 case NEON_SHIFTINSERT:
25280 case NEON_LOGICBINOP:
25281 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25282 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25283 NEON_ARG_STOP);
25285 case NEON_TERNOP:
25286 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25287 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25288 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25290 case NEON_GETLANE:
25291 case NEON_FIXCONV:
25292 case NEON_SHIFTIMM:
25293 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25294 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25295 NEON_ARG_STOP);
25297 case NEON_CREATE:
25298 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25299 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25301 case NEON_DUP:
25302 case NEON_RINT:
25303 case NEON_SPLIT:
25304 case NEON_FLOAT_WIDEN:
25305 case NEON_FLOAT_NARROW:
25306 case NEON_BSWAP:
25307 case NEON_REINTERP:
25308 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25309 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25311 case NEON_COMBINE:
25312 case NEON_VTBL:
25313 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25314 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25316 case NEON_LANEMUL:
25317 case NEON_LANEMULL:
25318 case NEON_LANEMULH:
25319 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25320 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25321 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25323 case NEON_LANEMAC:
25324 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25325 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25326 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25328 case NEON_SHIFTACC:
25329 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25330 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25331 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25333 case NEON_SCALARMAC:
25334 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25335 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25336 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25338 case NEON_SELECT:
25339 case NEON_VTBX:
25340 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25341 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25342 NEON_ARG_STOP);
25344 case NEON_LOAD1:
25345 case NEON_LOADSTRUCT:
25346 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25347 NEON_ARG_MEMORY, NEON_ARG_STOP);
25349 case NEON_LOAD1LANE:
25350 case NEON_LOADSTRUCTLANE:
25351 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25352 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25353 NEON_ARG_STOP);
25355 case NEON_STORE1:
25356 case NEON_STORESTRUCT:
25357 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25358 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25360 case NEON_STORE1LANE:
25361 case NEON_STORESTRUCTLANE:
25362 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25363 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25364 NEON_ARG_STOP);
25367 gcc_unreachable ();
25370 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25371 void
25372 neon_reinterpret (rtx dest, rtx src)
25374 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25377 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25378 not to early-clobber SRC registers in the process.
25380 We assume that the operands described by SRC and DEST represent a
25381 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25382 number of components into which the copy has been decomposed. */
25383 void
25384 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25386 unsigned int i;
25388 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25389 || REGNO (operands[0]) < REGNO (operands[1]))
25391 for (i = 0; i < count; i++)
25393 operands[2 * i] = dest[i];
25394 operands[2 * i + 1] = src[i];
25397 else
25399 for (i = 0; i < count; i++)
25401 operands[2 * i] = dest[count - i - 1];
25402 operands[2 * i + 1] = src[count - i - 1];
25407 /* Split operands into moves from op[1] + op[2] into op[0]. */
25409 void
25410 neon_split_vcombine (rtx operands[3])
25412 unsigned int dest = REGNO (operands[0]);
25413 unsigned int src1 = REGNO (operands[1]);
25414 unsigned int src2 = REGNO (operands[2]);
25415 enum machine_mode halfmode = GET_MODE (operands[1]);
25416 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25417 rtx destlo, desthi;
25419 if (src1 == dest && src2 == dest + halfregs)
25421 /* No-op move. Can't split to nothing; emit something. */
25422 emit_note (NOTE_INSN_DELETED);
25423 return;
25426 /* Preserve register attributes for variable tracking. */
25427 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25428 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25429 GET_MODE_SIZE (halfmode));
25431 /* Special case of reversed high/low parts. Use VSWP. */
25432 if (src2 == dest && src1 == dest + halfregs)
25434 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25435 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25436 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25437 return;
25440 if (!reg_overlap_mentioned_p (operands[2], destlo))
25442 /* Try to avoid unnecessary moves if part of the result
25443 is in the right place already. */
25444 if (src1 != dest)
25445 emit_move_insn (destlo, operands[1]);
25446 if (src2 != dest + halfregs)
25447 emit_move_insn (desthi, operands[2]);
25449 else
25451 if (src2 != dest + halfregs)
25452 emit_move_insn (desthi, operands[2]);
25453 if (src1 != dest)
25454 emit_move_insn (destlo, operands[1]);
25458 /* Expand an expression EXP that calls a built-in function,
25459 with result going to TARGET if that's convenient
25460 (and in mode MODE if that's convenient).
25461 SUBTARGET may be used as the target for computing one of EXP's operands.
25462 IGNORE is nonzero if the value is to be ignored. */
25464 static rtx
25465 arm_expand_builtin (tree exp,
25466 rtx target,
25467 rtx subtarget ATTRIBUTE_UNUSED,
25468 enum machine_mode mode ATTRIBUTE_UNUSED,
25469 int ignore ATTRIBUTE_UNUSED)
25471 const struct builtin_description * d;
25472 enum insn_code icode;
25473 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25474 tree arg0;
25475 tree arg1;
25476 tree arg2;
25477 rtx op0;
25478 rtx op1;
25479 rtx op2;
25480 rtx pat;
25481 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25482 size_t i;
25483 enum machine_mode tmode;
25484 enum machine_mode mode0;
25485 enum machine_mode mode1;
25486 enum machine_mode mode2;
25487 int opint;
25488 int selector;
25489 int mask;
25490 int imm;
25492 if (fcode >= ARM_BUILTIN_NEON_BASE)
25493 return arm_expand_neon_builtin (fcode, exp, target);
25495 switch (fcode)
25497 case ARM_BUILTIN_GET_FPSCR:
25498 case ARM_BUILTIN_SET_FPSCR:
25499 if (fcode == ARM_BUILTIN_GET_FPSCR)
25501 icode = CODE_FOR_get_fpscr;
25502 target = gen_reg_rtx (SImode);
25503 pat = GEN_FCN (icode) (target);
25505 else
25507 target = NULL_RTX;
25508 icode = CODE_FOR_set_fpscr;
25509 arg0 = CALL_EXPR_ARG (exp, 0);
25510 op0 = expand_normal (arg0);
25511 pat = GEN_FCN (icode) (op0);
25513 emit_insn (pat);
25514 return target;
25516 case ARM_BUILTIN_TEXTRMSB:
25517 case ARM_BUILTIN_TEXTRMUB:
25518 case ARM_BUILTIN_TEXTRMSH:
25519 case ARM_BUILTIN_TEXTRMUH:
25520 case ARM_BUILTIN_TEXTRMSW:
25521 case ARM_BUILTIN_TEXTRMUW:
25522 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25523 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25524 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25525 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25526 : CODE_FOR_iwmmxt_textrmw);
25528 arg0 = CALL_EXPR_ARG (exp, 0);
25529 arg1 = CALL_EXPR_ARG (exp, 1);
25530 op0 = expand_normal (arg0);
25531 op1 = expand_normal (arg1);
25532 tmode = insn_data[icode].operand[0].mode;
25533 mode0 = insn_data[icode].operand[1].mode;
25534 mode1 = insn_data[icode].operand[2].mode;
25536 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25537 op0 = copy_to_mode_reg (mode0, op0);
25538 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25540 /* @@@ better error message */
25541 error ("selector must be an immediate");
25542 return gen_reg_rtx (tmode);
25545 opint = INTVAL (op1);
25546 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25548 if (opint > 7 || opint < 0)
25549 error ("the range of selector should be in 0 to 7");
25551 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25553 if (opint > 3 || opint < 0)
25554 error ("the range of selector should be in 0 to 3");
25556 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25558 if (opint > 1 || opint < 0)
25559 error ("the range of selector should be in 0 to 1");
25562 if (target == 0
25563 || GET_MODE (target) != tmode
25564 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25565 target = gen_reg_rtx (tmode);
25566 pat = GEN_FCN (icode) (target, op0, op1);
25567 if (! pat)
25568 return 0;
25569 emit_insn (pat);
25570 return target;
25572 case ARM_BUILTIN_WALIGNI:
25573 /* If op2 is immediate, call walighi, else call walighr. */
25574 arg0 = CALL_EXPR_ARG (exp, 0);
25575 arg1 = CALL_EXPR_ARG (exp, 1);
25576 arg2 = CALL_EXPR_ARG (exp, 2);
25577 op0 = expand_normal (arg0);
25578 op1 = expand_normal (arg1);
25579 op2 = expand_normal (arg2);
25580 if (CONST_INT_P (op2))
25582 icode = CODE_FOR_iwmmxt_waligni;
25583 tmode = insn_data[icode].operand[0].mode;
25584 mode0 = insn_data[icode].operand[1].mode;
25585 mode1 = insn_data[icode].operand[2].mode;
25586 mode2 = insn_data[icode].operand[3].mode;
25587 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25588 op0 = copy_to_mode_reg (mode0, op0);
25589 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25590 op1 = copy_to_mode_reg (mode1, op1);
25591 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25592 selector = INTVAL (op2);
25593 if (selector > 7 || selector < 0)
25594 error ("the range of selector should be in 0 to 7");
25596 else
25598 icode = CODE_FOR_iwmmxt_walignr;
25599 tmode = insn_data[icode].operand[0].mode;
25600 mode0 = insn_data[icode].operand[1].mode;
25601 mode1 = insn_data[icode].operand[2].mode;
25602 mode2 = insn_data[icode].operand[3].mode;
25603 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25604 op0 = copy_to_mode_reg (mode0, op0);
25605 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25606 op1 = copy_to_mode_reg (mode1, op1);
25607 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25608 op2 = copy_to_mode_reg (mode2, op2);
25610 if (target == 0
25611 || GET_MODE (target) != tmode
25612 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25613 target = gen_reg_rtx (tmode);
25614 pat = GEN_FCN (icode) (target, op0, op1, op2);
25615 if (!pat)
25616 return 0;
25617 emit_insn (pat);
25618 return target;
25620 case ARM_BUILTIN_TINSRB:
25621 case ARM_BUILTIN_TINSRH:
25622 case ARM_BUILTIN_TINSRW:
25623 case ARM_BUILTIN_WMERGE:
25624 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25625 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25626 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25627 : CODE_FOR_iwmmxt_tinsrw);
25628 arg0 = CALL_EXPR_ARG (exp, 0);
25629 arg1 = CALL_EXPR_ARG (exp, 1);
25630 arg2 = CALL_EXPR_ARG (exp, 2);
25631 op0 = expand_normal (arg0);
25632 op1 = expand_normal (arg1);
25633 op2 = expand_normal (arg2);
25634 tmode = insn_data[icode].operand[0].mode;
25635 mode0 = insn_data[icode].operand[1].mode;
25636 mode1 = insn_data[icode].operand[2].mode;
25637 mode2 = insn_data[icode].operand[3].mode;
25639 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25640 op0 = copy_to_mode_reg (mode0, op0);
25641 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25642 op1 = copy_to_mode_reg (mode1, op1);
25643 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25645 error ("selector must be an immediate");
25646 return const0_rtx;
25648 if (icode == CODE_FOR_iwmmxt_wmerge)
25650 selector = INTVAL (op2);
25651 if (selector > 7 || selector < 0)
25652 error ("the range of selector should be in 0 to 7");
25654 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25655 || (icode == CODE_FOR_iwmmxt_tinsrh)
25656 || (icode == CODE_FOR_iwmmxt_tinsrw))
25658 mask = 0x01;
25659 selector= INTVAL (op2);
25660 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25661 error ("the range of selector should be in 0 to 7");
25662 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25663 error ("the range of selector should be in 0 to 3");
25664 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25665 error ("the range of selector should be in 0 to 1");
25666 mask <<= selector;
25667 op2 = GEN_INT (mask);
25669 if (target == 0
25670 || GET_MODE (target) != tmode
25671 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25672 target = gen_reg_rtx (tmode);
25673 pat = GEN_FCN (icode) (target, op0, op1, op2);
25674 if (! pat)
25675 return 0;
25676 emit_insn (pat);
25677 return target;
25679 case ARM_BUILTIN_SETWCGR0:
25680 case ARM_BUILTIN_SETWCGR1:
25681 case ARM_BUILTIN_SETWCGR2:
25682 case ARM_BUILTIN_SETWCGR3:
25683 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25684 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25685 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25686 : CODE_FOR_iwmmxt_setwcgr3);
25687 arg0 = CALL_EXPR_ARG (exp, 0);
25688 op0 = expand_normal (arg0);
25689 mode0 = insn_data[icode].operand[0].mode;
25690 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25691 op0 = copy_to_mode_reg (mode0, op0);
25692 pat = GEN_FCN (icode) (op0);
25693 if (!pat)
25694 return 0;
25695 emit_insn (pat);
25696 return 0;
25698 case ARM_BUILTIN_GETWCGR0:
25699 case ARM_BUILTIN_GETWCGR1:
25700 case ARM_BUILTIN_GETWCGR2:
25701 case ARM_BUILTIN_GETWCGR3:
25702 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25703 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25704 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25705 : CODE_FOR_iwmmxt_getwcgr3);
25706 tmode = insn_data[icode].operand[0].mode;
25707 if (target == 0
25708 || GET_MODE (target) != tmode
25709 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25710 target = gen_reg_rtx (tmode);
25711 pat = GEN_FCN (icode) (target);
25712 if (!pat)
25713 return 0;
25714 emit_insn (pat);
25715 return target;
25717 case ARM_BUILTIN_WSHUFH:
25718 icode = CODE_FOR_iwmmxt_wshufh;
25719 arg0 = CALL_EXPR_ARG (exp, 0);
25720 arg1 = CALL_EXPR_ARG (exp, 1);
25721 op0 = expand_normal (arg0);
25722 op1 = expand_normal (arg1);
25723 tmode = insn_data[icode].operand[0].mode;
25724 mode1 = insn_data[icode].operand[1].mode;
25725 mode2 = insn_data[icode].operand[2].mode;
25727 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25728 op0 = copy_to_mode_reg (mode1, op0);
25729 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25731 error ("mask must be an immediate");
25732 return const0_rtx;
25734 selector = INTVAL (op1);
25735 if (selector < 0 || selector > 255)
25736 error ("the range of mask should be in 0 to 255");
25737 if (target == 0
25738 || GET_MODE (target) != tmode
25739 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25740 target = gen_reg_rtx (tmode);
25741 pat = GEN_FCN (icode) (target, op0, op1);
25742 if (! pat)
25743 return 0;
25744 emit_insn (pat);
25745 return target;
25747 case ARM_BUILTIN_WMADDS:
25748 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25749 case ARM_BUILTIN_WMADDSX:
25750 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25751 case ARM_BUILTIN_WMADDSN:
25752 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25753 case ARM_BUILTIN_WMADDU:
25754 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25755 case ARM_BUILTIN_WMADDUX:
25756 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25757 case ARM_BUILTIN_WMADDUN:
25758 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25759 case ARM_BUILTIN_WSADBZ:
25760 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25761 case ARM_BUILTIN_WSADHZ:
25762 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25764 /* Several three-argument builtins. */
25765 case ARM_BUILTIN_WMACS:
25766 case ARM_BUILTIN_WMACU:
25767 case ARM_BUILTIN_TMIA:
25768 case ARM_BUILTIN_TMIAPH:
25769 case ARM_BUILTIN_TMIATT:
25770 case ARM_BUILTIN_TMIATB:
25771 case ARM_BUILTIN_TMIABT:
25772 case ARM_BUILTIN_TMIABB:
25773 case ARM_BUILTIN_WQMIABB:
25774 case ARM_BUILTIN_WQMIABT:
25775 case ARM_BUILTIN_WQMIATB:
25776 case ARM_BUILTIN_WQMIATT:
25777 case ARM_BUILTIN_WQMIABBN:
25778 case ARM_BUILTIN_WQMIABTN:
25779 case ARM_BUILTIN_WQMIATBN:
25780 case ARM_BUILTIN_WQMIATTN:
25781 case ARM_BUILTIN_WMIABB:
25782 case ARM_BUILTIN_WMIABT:
25783 case ARM_BUILTIN_WMIATB:
25784 case ARM_BUILTIN_WMIATT:
25785 case ARM_BUILTIN_WMIABBN:
25786 case ARM_BUILTIN_WMIABTN:
25787 case ARM_BUILTIN_WMIATBN:
25788 case ARM_BUILTIN_WMIATTN:
25789 case ARM_BUILTIN_WMIAWBB:
25790 case ARM_BUILTIN_WMIAWBT:
25791 case ARM_BUILTIN_WMIAWTB:
25792 case ARM_BUILTIN_WMIAWTT:
25793 case ARM_BUILTIN_WMIAWBBN:
25794 case ARM_BUILTIN_WMIAWBTN:
25795 case ARM_BUILTIN_WMIAWTBN:
25796 case ARM_BUILTIN_WMIAWTTN:
25797 case ARM_BUILTIN_WSADB:
25798 case ARM_BUILTIN_WSADH:
25799 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25800 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25801 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25802 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25803 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25804 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25805 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25806 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25807 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25808 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25809 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25810 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25811 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25812 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25813 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25814 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25815 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25816 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25817 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25818 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25819 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25820 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25821 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25822 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25823 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25824 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25825 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25826 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25827 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25828 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25829 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25830 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25831 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25832 : CODE_FOR_iwmmxt_wsadh);
25833 arg0 = CALL_EXPR_ARG (exp, 0);
25834 arg1 = CALL_EXPR_ARG (exp, 1);
25835 arg2 = CALL_EXPR_ARG (exp, 2);
25836 op0 = expand_normal (arg0);
25837 op1 = expand_normal (arg1);
25838 op2 = expand_normal (arg2);
25839 tmode = insn_data[icode].operand[0].mode;
25840 mode0 = insn_data[icode].operand[1].mode;
25841 mode1 = insn_data[icode].operand[2].mode;
25842 mode2 = insn_data[icode].operand[3].mode;
25844 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25845 op0 = copy_to_mode_reg (mode0, op0);
25846 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25847 op1 = copy_to_mode_reg (mode1, op1);
25848 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25849 op2 = copy_to_mode_reg (mode2, op2);
25850 if (target == 0
25851 || GET_MODE (target) != tmode
25852 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25853 target = gen_reg_rtx (tmode);
25854 pat = GEN_FCN (icode) (target, op0, op1, op2);
25855 if (! pat)
25856 return 0;
25857 emit_insn (pat);
25858 return target;
25860 case ARM_BUILTIN_WZERO:
25861 target = gen_reg_rtx (DImode);
25862 emit_insn (gen_iwmmxt_clrdi (target));
25863 return target;
25865 case ARM_BUILTIN_WSRLHI:
25866 case ARM_BUILTIN_WSRLWI:
25867 case ARM_BUILTIN_WSRLDI:
25868 case ARM_BUILTIN_WSLLHI:
25869 case ARM_BUILTIN_WSLLWI:
25870 case ARM_BUILTIN_WSLLDI:
25871 case ARM_BUILTIN_WSRAHI:
25872 case ARM_BUILTIN_WSRAWI:
25873 case ARM_BUILTIN_WSRADI:
25874 case ARM_BUILTIN_WRORHI:
25875 case ARM_BUILTIN_WRORWI:
25876 case ARM_BUILTIN_WRORDI:
25877 case ARM_BUILTIN_WSRLH:
25878 case ARM_BUILTIN_WSRLW:
25879 case ARM_BUILTIN_WSRLD:
25880 case ARM_BUILTIN_WSLLH:
25881 case ARM_BUILTIN_WSLLW:
25882 case ARM_BUILTIN_WSLLD:
25883 case ARM_BUILTIN_WSRAH:
25884 case ARM_BUILTIN_WSRAW:
25885 case ARM_BUILTIN_WSRAD:
25886 case ARM_BUILTIN_WRORH:
25887 case ARM_BUILTIN_WRORW:
25888 case ARM_BUILTIN_WRORD:
25889 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25890 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25891 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25892 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25893 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25894 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25895 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25896 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25897 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25898 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25899 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25900 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25901 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25902 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25903 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25904 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25905 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25906 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25907 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25908 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25909 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25910 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25911 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25912 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25913 : CODE_FOR_nothing);
25914 arg1 = CALL_EXPR_ARG (exp, 1);
25915 op1 = expand_normal (arg1);
25916 if (GET_MODE (op1) == VOIDmode)
25918 imm = INTVAL (op1);
25919 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25920 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25921 && (imm < 0 || imm > 32))
25923 if (fcode == ARM_BUILTIN_WRORHI)
25924 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25925 else if (fcode == ARM_BUILTIN_WRORWI)
25926 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25927 else if (fcode == ARM_BUILTIN_WRORH)
25928 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25929 else
25930 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25932 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25933 && (imm < 0 || imm > 64))
25935 if (fcode == ARM_BUILTIN_WRORDI)
25936 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25937 else
25938 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25940 else if (imm < 0)
25942 if (fcode == ARM_BUILTIN_WSRLHI)
25943 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25944 else if (fcode == ARM_BUILTIN_WSRLWI)
25945 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25946 else if (fcode == ARM_BUILTIN_WSRLDI)
25947 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25948 else if (fcode == ARM_BUILTIN_WSLLHI)
25949 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25950 else if (fcode == ARM_BUILTIN_WSLLWI)
25951 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25952 else if (fcode == ARM_BUILTIN_WSLLDI)
25953 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25954 else if (fcode == ARM_BUILTIN_WSRAHI)
25955 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25956 else if (fcode == ARM_BUILTIN_WSRAWI)
25957 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25958 else if (fcode == ARM_BUILTIN_WSRADI)
25959 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25960 else if (fcode == ARM_BUILTIN_WSRLH)
25961 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25962 else if (fcode == ARM_BUILTIN_WSRLW)
25963 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25964 else if (fcode == ARM_BUILTIN_WSRLD)
25965 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25966 else if (fcode == ARM_BUILTIN_WSLLH)
25967 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25968 else if (fcode == ARM_BUILTIN_WSLLW)
25969 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25970 else if (fcode == ARM_BUILTIN_WSLLD)
25971 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25972 else if (fcode == ARM_BUILTIN_WSRAH)
25973 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25974 else if (fcode == ARM_BUILTIN_WSRAW)
25975 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25976 else
25977 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25980 return arm_expand_binop_builtin (icode, exp, target);
25982 default:
25983 break;
25986 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25987 if (d->code == (const enum arm_builtins) fcode)
25988 return arm_expand_binop_builtin (d->icode, exp, target);
25990 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25991 if (d->code == (const enum arm_builtins) fcode)
25992 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25994 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25995 if (d->code == (const enum arm_builtins) fcode)
25996 return arm_expand_ternop_builtin (d->icode, exp, target);
25998 /* @@@ Should really do something sensible here. */
25999 return NULL_RTX;
26002 /* Return the number (counting from 0) of
26003 the least significant set bit in MASK. */
26005 inline static int
26006 number_of_first_bit_set (unsigned mask)
26008 return ctz_hwi (mask);
26011 /* Like emit_multi_reg_push, but allowing for a different set of
26012 registers to be described as saved. MASK is the set of registers
26013 to be saved; REAL_REGS is the set of registers to be described as
26014 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26016 static rtx
26017 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26019 unsigned long regno;
26020 rtx par[10], tmp, reg, insn;
26021 int i, j;
26023 /* Build the parallel of the registers actually being stored. */
26024 for (i = 0; mask; ++i, mask &= mask - 1)
26026 regno = ctz_hwi (mask);
26027 reg = gen_rtx_REG (SImode, regno);
26029 if (i == 0)
26030 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26031 else
26032 tmp = gen_rtx_USE (VOIDmode, reg);
26034 par[i] = tmp;
26037 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26038 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26039 tmp = gen_frame_mem (BLKmode, tmp);
26040 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26041 par[0] = tmp;
26043 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26044 insn = emit_insn (tmp);
26046 /* Always build the stack adjustment note for unwind info. */
26047 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26048 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26049 par[0] = tmp;
26051 /* Build the parallel of the registers recorded as saved for unwind. */
26052 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26054 regno = ctz_hwi (real_regs);
26055 reg = gen_rtx_REG (SImode, regno);
26057 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26058 tmp = gen_frame_mem (SImode, tmp);
26059 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26060 RTX_FRAME_RELATED_P (tmp) = 1;
26061 par[j + 1] = tmp;
26064 if (j == 0)
26065 tmp = par[0];
26066 else
26068 RTX_FRAME_RELATED_P (par[0]) = 1;
26069 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26072 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26074 return insn;
26077 /* Emit code to push or pop registers to or from the stack. F is the
26078 assembly file. MASK is the registers to pop. */
26079 static void
26080 thumb_pop (FILE *f, unsigned long mask)
26082 int regno;
26083 int lo_mask = mask & 0xFF;
26084 int pushed_words = 0;
26086 gcc_assert (mask);
26088 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26090 /* Special case. Do not generate a POP PC statement here, do it in
26091 thumb_exit() */
26092 thumb_exit (f, -1);
26093 return;
26096 fprintf (f, "\tpop\t{");
26098 /* Look at the low registers first. */
26099 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26101 if (lo_mask & 1)
26103 asm_fprintf (f, "%r", regno);
26105 if ((lo_mask & ~1) != 0)
26106 fprintf (f, ", ");
26108 pushed_words++;
26112 if (mask & (1 << PC_REGNUM))
26114 /* Catch popping the PC. */
26115 if (TARGET_INTERWORK || TARGET_BACKTRACE
26116 || crtl->calls_eh_return)
26118 /* The PC is never poped directly, instead
26119 it is popped into r3 and then BX is used. */
26120 fprintf (f, "}\n");
26122 thumb_exit (f, -1);
26124 return;
26126 else
26128 if (mask & 0xFF)
26129 fprintf (f, ", ");
26131 asm_fprintf (f, "%r", PC_REGNUM);
26135 fprintf (f, "}\n");
26138 /* Generate code to return from a thumb function.
26139 If 'reg_containing_return_addr' is -1, then the return address is
26140 actually on the stack, at the stack pointer. */
26141 static void
26142 thumb_exit (FILE *f, int reg_containing_return_addr)
26144 unsigned regs_available_for_popping;
26145 unsigned regs_to_pop;
26146 int pops_needed;
26147 unsigned available;
26148 unsigned required;
26149 enum machine_mode mode;
26150 int size;
26151 int restore_a4 = FALSE;
26153 /* Compute the registers we need to pop. */
26154 regs_to_pop = 0;
26155 pops_needed = 0;
26157 if (reg_containing_return_addr == -1)
26159 regs_to_pop |= 1 << LR_REGNUM;
26160 ++pops_needed;
26163 if (TARGET_BACKTRACE)
26165 /* Restore the (ARM) frame pointer and stack pointer. */
26166 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26167 pops_needed += 2;
26170 /* If there is nothing to pop then just emit the BX instruction and
26171 return. */
26172 if (pops_needed == 0)
26174 if (crtl->calls_eh_return)
26175 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26177 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26178 return;
26180 /* Otherwise if we are not supporting interworking and we have not created
26181 a backtrace structure and the function was not entered in ARM mode then
26182 just pop the return address straight into the PC. */
26183 else if (!TARGET_INTERWORK
26184 && !TARGET_BACKTRACE
26185 && !is_called_in_ARM_mode (current_function_decl)
26186 && !crtl->calls_eh_return)
26188 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26189 return;
26192 /* Find out how many of the (return) argument registers we can corrupt. */
26193 regs_available_for_popping = 0;
26195 /* If returning via __builtin_eh_return, the bottom three registers
26196 all contain information needed for the return. */
26197 if (crtl->calls_eh_return)
26198 size = 12;
26199 else
26201 /* If we can deduce the registers used from the function's
26202 return value. This is more reliable that examining
26203 df_regs_ever_live_p () because that will be set if the register is
26204 ever used in the function, not just if the register is used
26205 to hold a return value. */
26207 if (crtl->return_rtx != 0)
26208 mode = GET_MODE (crtl->return_rtx);
26209 else
26210 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26212 size = GET_MODE_SIZE (mode);
26214 if (size == 0)
26216 /* In a void function we can use any argument register.
26217 In a function that returns a structure on the stack
26218 we can use the second and third argument registers. */
26219 if (mode == VOIDmode)
26220 regs_available_for_popping =
26221 (1 << ARG_REGISTER (1))
26222 | (1 << ARG_REGISTER (2))
26223 | (1 << ARG_REGISTER (3));
26224 else
26225 regs_available_for_popping =
26226 (1 << ARG_REGISTER (2))
26227 | (1 << ARG_REGISTER (3));
26229 else if (size <= 4)
26230 regs_available_for_popping =
26231 (1 << ARG_REGISTER (2))
26232 | (1 << ARG_REGISTER (3));
26233 else if (size <= 8)
26234 regs_available_for_popping =
26235 (1 << ARG_REGISTER (3));
26238 /* Match registers to be popped with registers into which we pop them. */
26239 for (available = regs_available_for_popping,
26240 required = regs_to_pop;
26241 required != 0 && available != 0;
26242 available &= ~(available & - available),
26243 required &= ~(required & - required))
26244 -- pops_needed;
26246 /* If we have any popping registers left over, remove them. */
26247 if (available > 0)
26248 regs_available_for_popping &= ~available;
26250 /* Otherwise if we need another popping register we can use
26251 the fourth argument register. */
26252 else if (pops_needed)
26254 /* If we have not found any free argument registers and
26255 reg a4 contains the return address, we must move it. */
26256 if (regs_available_for_popping == 0
26257 && reg_containing_return_addr == LAST_ARG_REGNUM)
26259 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26260 reg_containing_return_addr = LR_REGNUM;
26262 else if (size > 12)
26264 /* Register a4 is being used to hold part of the return value,
26265 but we have dire need of a free, low register. */
26266 restore_a4 = TRUE;
26268 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26271 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26273 /* The fourth argument register is available. */
26274 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26276 --pops_needed;
26280 /* Pop as many registers as we can. */
26281 thumb_pop (f, regs_available_for_popping);
26283 /* Process the registers we popped. */
26284 if (reg_containing_return_addr == -1)
26286 /* The return address was popped into the lowest numbered register. */
26287 regs_to_pop &= ~(1 << LR_REGNUM);
26289 reg_containing_return_addr =
26290 number_of_first_bit_set (regs_available_for_popping);
26292 /* Remove this register for the mask of available registers, so that
26293 the return address will not be corrupted by further pops. */
26294 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26297 /* If we popped other registers then handle them here. */
26298 if (regs_available_for_popping)
26300 int frame_pointer;
26302 /* Work out which register currently contains the frame pointer. */
26303 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26305 /* Move it into the correct place. */
26306 asm_fprintf (f, "\tmov\t%r, %r\n",
26307 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26309 /* (Temporarily) remove it from the mask of popped registers. */
26310 regs_available_for_popping &= ~(1 << frame_pointer);
26311 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26313 if (regs_available_for_popping)
26315 int stack_pointer;
26317 /* We popped the stack pointer as well,
26318 find the register that contains it. */
26319 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26321 /* Move it into the stack register. */
26322 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26324 /* At this point we have popped all necessary registers, so
26325 do not worry about restoring regs_available_for_popping
26326 to its correct value:
26328 assert (pops_needed == 0)
26329 assert (regs_available_for_popping == (1 << frame_pointer))
26330 assert (regs_to_pop == (1 << STACK_POINTER)) */
26332 else
26334 /* Since we have just move the popped value into the frame
26335 pointer, the popping register is available for reuse, and
26336 we know that we still have the stack pointer left to pop. */
26337 regs_available_for_popping |= (1 << frame_pointer);
26341 /* If we still have registers left on the stack, but we no longer have
26342 any registers into which we can pop them, then we must move the return
26343 address into the link register and make available the register that
26344 contained it. */
26345 if (regs_available_for_popping == 0 && pops_needed > 0)
26347 regs_available_for_popping |= 1 << reg_containing_return_addr;
26349 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26350 reg_containing_return_addr);
26352 reg_containing_return_addr = LR_REGNUM;
26355 /* If we have registers left on the stack then pop some more.
26356 We know that at most we will want to pop FP and SP. */
26357 if (pops_needed > 0)
26359 int popped_into;
26360 int move_to;
26362 thumb_pop (f, regs_available_for_popping);
26364 /* We have popped either FP or SP.
26365 Move whichever one it is into the correct register. */
26366 popped_into = number_of_first_bit_set (regs_available_for_popping);
26367 move_to = number_of_first_bit_set (regs_to_pop);
26369 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26371 regs_to_pop &= ~(1 << move_to);
26373 --pops_needed;
26376 /* If we still have not popped everything then we must have only
26377 had one register available to us and we are now popping the SP. */
26378 if (pops_needed > 0)
26380 int popped_into;
26382 thumb_pop (f, regs_available_for_popping);
26384 popped_into = number_of_first_bit_set (regs_available_for_popping);
26386 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26388 assert (regs_to_pop == (1 << STACK_POINTER))
26389 assert (pops_needed == 1)
26393 /* If necessary restore the a4 register. */
26394 if (restore_a4)
26396 if (reg_containing_return_addr != LR_REGNUM)
26398 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26399 reg_containing_return_addr = LR_REGNUM;
26402 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26405 if (crtl->calls_eh_return)
26406 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26408 /* Return to caller. */
26409 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26412 /* Scan INSN just before assembler is output for it.
26413 For Thumb-1, we track the status of the condition codes; this
26414 information is used in the cbranchsi4_insn pattern. */
26415 void
26416 thumb1_final_prescan_insn (rtx insn)
26418 if (flag_print_asm_name)
26419 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26420 INSN_ADDRESSES (INSN_UID (insn)));
26421 /* Don't overwrite the previous setter when we get to a cbranch. */
26422 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26424 enum attr_conds conds;
26426 if (cfun->machine->thumb1_cc_insn)
26428 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26429 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26430 CC_STATUS_INIT;
26432 conds = get_attr_conds (insn);
26433 if (conds == CONDS_SET)
26435 rtx set = single_set (insn);
26436 cfun->machine->thumb1_cc_insn = insn;
26437 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26438 cfun->machine->thumb1_cc_op1 = const0_rtx;
26439 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26440 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26442 rtx src1 = XEXP (SET_SRC (set), 1);
26443 if (src1 == const0_rtx)
26444 cfun->machine->thumb1_cc_mode = CCmode;
26446 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26448 /* Record the src register operand instead of dest because
26449 cprop_hardreg pass propagates src. */
26450 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26453 else if (conds != CONDS_NOCOND)
26454 cfun->machine->thumb1_cc_insn = NULL_RTX;
26457 /* Check if unexpected far jump is used. */
26458 if (cfun->machine->lr_save_eliminated
26459 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26460 internal_error("Unexpected thumb1 far jump");
26464 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26466 unsigned HOST_WIDE_INT mask = 0xff;
26467 int i;
26469 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26470 if (val == 0) /* XXX */
26471 return 0;
26473 for (i = 0; i < 25; i++)
26474 if ((val & (mask << i)) == val)
26475 return 1;
26477 return 0;
26480 /* Returns nonzero if the current function contains,
26481 or might contain a far jump. */
26482 static int
26483 thumb_far_jump_used_p (void)
26485 rtx insn;
26486 bool far_jump = false;
26487 unsigned int func_size = 0;
26489 /* This test is only important for leaf functions. */
26490 /* assert (!leaf_function_p ()); */
26492 /* If we have already decided that far jumps may be used,
26493 do not bother checking again, and always return true even if
26494 it turns out that they are not being used. Once we have made
26495 the decision that far jumps are present (and that hence the link
26496 register will be pushed onto the stack) we cannot go back on it. */
26497 if (cfun->machine->far_jump_used)
26498 return 1;
26500 /* If this function is not being called from the prologue/epilogue
26501 generation code then it must be being called from the
26502 INITIAL_ELIMINATION_OFFSET macro. */
26503 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26505 /* In this case we know that we are being asked about the elimination
26506 of the arg pointer register. If that register is not being used,
26507 then there are no arguments on the stack, and we do not have to
26508 worry that a far jump might force the prologue to push the link
26509 register, changing the stack offsets. In this case we can just
26510 return false, since the presence of far jumps in the function will
26511 not affect stack offsets.
26513 If the arg pointer is live (or if it was live, but has now been
26514 eliminated and so set to dead) then we do have to test to see if
26515 the function might contain a far jump. This test can lead to some
26516 false negatives, since before reload is completed, then length of
26517 branch instructions is not known, so gcc defaults to returning their
26518 longest length, which in turn sets the far jump attribute to true.
26520 A false negative will not result in bad code being generated, but it
26521 will result in a needless push and pop of the link register. We
26522 hope that this does not occur too often.
26524 If we need doubleword stack alignment this could affect the other
26525 elimination offsets so we can't risk getting it wrong. */
26526 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26527 cfun->machine->arg_pointer_live = 1;
26528 else if (!cfun->machine->arg_pointer_live)
26529 return 0;
26532 /* We should not change far_jump_used during or after reload, as there is
26533 no chance to change stack frame layout. */
26534 if (reload_in_progress || reload_completed)
26535 return 0;
26537 /* Check to see if the function contains a branch
26538 insn with the far jump attribute set. */
26539 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26541 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26543 far_jump = true;
26545 func_size += get_attr_length (insn);
26548 /* Attribute far_jump will always be true for thumb1 before
26549 shorten_branch pass. So checking far_jump attribute before
26550 shorten_branch isn't much useful.
26552 Following heuristic tries to estimate more accurately if a far jump
26553 may finally be used. The heuristic is very conservative as there is
26554 no chance to roll-back the decision of not to use far jump.
26556 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26557 2-byte insn is associated with a 4 byte constant pool. Using
26558 function size 2048/3 as the threshold is conservative enough. */
26559 if (far_jump)
26561 if ((func_size * 3) >= 2048)
26563 /* Record the fact that we have decided that
26564 the function does use far jumps. */
26565 cfun->machine->far_jump_used = 1;
26566 return 1;
26570 return 0;
26573 /* Return nonzero if FUNC must be entered in ARM mode. */
26575 is_called_in_ARM_mode (tree func)
26577 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26579 /* Ignore the problem about functions whose address is taken. */
26580 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26581 return TRUE;
26583 #ifdef ARM_PE
26584 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26585 #else
26586 return FALSE;
26587 #endif
26590 /* Given the stack offsets and register mask in OFFSETS, decide how
26591 many additional registers to push instead of subtracting a constant
26592 from SP. For epilogues the principle is the same except we use pop.
26593 FOR_PROLOGUE indicates which we're generating. */
26594 static int
26595 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26597 HOST_WIDE_INT amount;
26598 unsigned long live_regs_mask = offsets->saved_regs_mask;
26599 /* Extract a mask of the ones we can give to the Thumb's push/pop
26600 instruction. */
26601 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26602 /* Then count how many other high registers will need to be pushed. */
26603 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26604 int n_free, reg_base, size;
26606 if (!for_prologue && frame_pointer_needed)
26607 amount = offsets->locals_base - offsets->saved_regs;
26608 else
26609 amount = offsets->outgoing_args - offsets->saved_regs;
26611 /* If the stack frame size is 512 exactly, we can save one load
26612 instruction, which should make this a win even when optimizing
26613 for speed. */
26614 if (!optimize_size && amount != 512)
26615 return 0;
26617 /* Can't do this if there are high registers to push. */
26618 if (high_regs_pushed != 0)
26619 return 0;
26621 /* Shouldn't do it in the prologue if no registers would normally
26622 be pushed at all. In the epilogue, also allow it if we'll have
26623 a pop insn for the PC. */
26624 if (l_mask == 0
26625 && (for_prologue
26626 || TARGET_BACKTRACE
26627 || (live_regs_mask & 1 << LR_REGNUM) == 0
26628 || TARGET_INTERWORK
26629 || crtl->args.pretend_args_size != 0))
26630 return 0;
26632 /* Don't do this if thumb_expand_prologue wants to emit instructions
26633 between the push and the stack frame allocation. */
26634 if (for_prologue
26635 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26636 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26637 return 0;
26639 reg_base = 0;
26640 n_free = 0;
26641 if (!for_prologue)
26643 size = arm_size_return_regs ();
26644 reg_base = ARM_NUM_INTS (size);
26645 live_regs_mask >>= reg_base;
26648 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26649 && (for_prologue || call_used_regs[reg_base + n_free]))
26651 live_regs_mask >>= 1;
26652 n_free++;
26655 if (n_free == 0)
26656 return 0;
26657 gcc_assert (amount / 4 * 4 == amount);
26659 if (amount >= 512 && (amount - n_free * 4) < 512)
26660 return (amount - 508) / 4;
26661 if (amount <= n_free * 4)
26662 return amount / 4;
26663 return 0;
26666 /* The bits which aren't usefully expanded as rtl. */
26667 const char *
26668 thumb1_unexpanded_epilogue (void)
26670 arm_stack_offsets *offsets;
26671 int regno;
26672 unsigned long live_regs_mask = 0;
26673 int high_regs_pushed = 0;
26674 int extra_pop;
26675 int had_to_push_lr;
26676 int size;
26678 if (cfun->machine->return_used_this_function != 0)
26679 return "";
26681 if (IS_NAKED (arm_current_func_type ()))
26682 return "";
26684 offsets = arm_get_frame_offsets ();
26685 live_regs_mask = offsets->saved_regs_mask;
26686 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26688 /* If we can deduce the registers used from the function's return value.
26689 This is more reliable that examining df_regs_ever_live_p () because that
26690 will be set if the register is ever used in the function, not just if
26691 the register is used to hold a return value. */
26692 size = arm_size_return_regs ();
26694 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26695 if (extra_pop > 0)
26697 unsigned long extra_mask = (1 << extra_pop) - 1;
26698 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26701 /* The prolog may have pushed some high registers to use as
26702 work registers. e.g. the testsuite file:
26703 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26704 compiles to produce:
26705 push {r4, r5, r6, r7, lr}
26706 mov r7, r9
26707 mov r6, r8
26708 push {r6, r7}
26709 as part of the prolog. We have to undo that pushing here. */
26711 if (high_regs_pushed)
26713 unsigned long mask = live_regs_mask & 0xff;
26714 int next_hi_reg;
26716 /* The available low registers depend on the size of the value we are
26717 returning. */
26718 if (size <= 12)
26719 mask |= 1 << 3;
26720 if (size <= 8)
26721 mask |= 1 << 2;
26723 if (mask == 0)
26724 /* Oh dear! We have no low registers into which we can pop
26725 high registers! */
26726 internal_error
26727 ("no low registers available for popping high registers");
26729 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26730 if (live_regs_mask & (1 << next_hi_reg))
26731 break;
26733 while (high_regs_pushed)
26735 /* Find lo register(s) into which the high register(s) can
26736 be popped. */
26737 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26739 if (mask & (1 << regno))
26740 high_regs_pushed--;
26741 if (high_regs_pushed == 0)
26742 break;
26745 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26747 /* Pop the values into the low register(s). */
26748 thumb_pop (asm_out_file, mask);
26750 /* Move the value(s) into the high registers. */
26751 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26753 if (mask & (1 << regno))
26755 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26756 regno);
26758 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26759 if (live_regs_mask & (1 << next_hi_reg))
26760 break;
26764 live_regs_mask &= ~0x0f00;
26767 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26768 live_regs_mask &= 0xff;
26770 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26772 /* Pop the return address into the PC. */
26773 if (had_to_push_lr)
26774 live_regs_mask |= 1 << PC_REGNUM;
26776 /* Either no argument registers were pushed or a backtrace
26777 structure was created which includes an adjusted stack
26778 pointer, so just pop everything. */
26779 if (live_regs_mask)
26780 thumb_pop (asm_out_file, live_regs_mask);
26782 /* We have either just popped the return address into the
26783 PC or it is was kept in LR for the entire function.
26784 Note that thumb_pop has already called thumb_exit if the
26785 PC was in the list. */
26786 if (!had_to_push_lr)
26787 thumb_exit (asm_out_file, LR_REGNUM);
26789 else
26791 /* Pop everything but the return address. */
26792 if (live_regs_mask)
26793 thumb_pop (asm_out_file, live_regs_mask);
26795 if (had_to_push_lr)
26797 if (size > 12)
26799 /* We have no free low regs, so save one. */
26800 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26801 LAST_ARG_REGNUM);
26804 /* Get the return address into a temporary register. */
26805 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26807 if (size > 12)
26809 /* Move the return address to lr. */
26810 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26811 LAST_ARG_REGNUM);
26812 /* Restore the low register. */
26813 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26814 IP_REGNUM);
26815 regno = LR_REGNUM;
26817 else
26818 regno = LAST_ARG_REGNUM;
26820 else
26821 regno = LR_REGNUM;
26823 /* Remove the argument registers that were pushed onto the stack. */
26824 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26825 SP_REGNUM, SP_REGNUM,
26826 crtl->args.pretend_args_size);
26828 thumb_exit (asm_out_file, regno);
26831 return "";
26834 /* Functions to save and restore machine-specific function data. */
26835 static struct machine_function *
26836 arm_init_machine_status (void)
26838 struct machine_function *machine;
26839 machine = ggc_cleared_alloc<machine_function> ();
26841 #if ARM_FT_UNKNOWN != 0
26842 machine->func_type = ARM_FT_UNKNOWN;
26843 #endif
26844 return machine;
26847 /* Return an RTX indicating where the return address to the
26848 calling function can be found. */
26850 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26852 if (count != 0)
26853 return NULL_RTX;
26855 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26858 /* Do anything needed before RTL is emitted for each function. */
26859 void
26860 arm_init_expanders (void)
26862 /* Arrange to initialize and mark the machine per-function status. */
26863 init_machine_status = arm_init_machine_status;
26865 /* This is to stop the combine pass optimizing away the alignment
26866 adjustment of va_arg. */
26867 /* ??? It is claimed that this should not be necessary. */
26868 if (cfun)
26869 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26873 /* Like arm_compute_initial_elimination offset. Simpler because there
26874 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26875 to point at the base of the local variables after static stack
26876 space for a function has been allocated. */
26878 HOST_WIDE_INT
26879 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26881 arm_stack_offsets *offsets;
26883 offsets = arm_get_frame_offsets ();
26885 switch (from)
26887 case ARG_POINTER_REGNUM:
26888 switch (to)
26890 case STACK_POINTER_REGNUM:
26891 return offsets->outgoing_args - offsets->saved_args;
26893 case FRAME_POINTER_REGNUM:
26894 return offsets->soft_frame - offsets->saved_args;
26896 case ARM_HARD_FRAME_POINTER_REGNUM:
26897 return offsets->saved_regs - offsets->saved_args;
26899 case THUMB_HARD_FRAME_POINTER_REGNUM:
26900 return offsets->locals_base - offsets->saved_args;
26902 default:
26903 gcc_unreachable ();
26905 break;
26907 case FRAME_POINTER_REGNUM:
26908 switch (to)
26910 case STACK_POINTER_REGNUM:
26911 return offsets->outgoing_args - offsets->soft_frame;
26913 case ARM_HARD_FRAME_POINTER_REGNUM:
26914 return offsets->saved_regs - offsets->soft_frame;
26916 case THUMB_HARD_FRAME_POINTER_REGNUM:
26917 return offsets->locals_base - offsets->soft_frame;
26919 default:
26920 gcc_unreachable ();
26922 break;
26924 default:
26925 gcc_unreachable ();
26929 /* Generate the function's prologue. */
26931 void
26932 thumb1_expand_prologue (void)
26934 rtx insn;
26936 HOST_WIDE_INT amount;
26937 arm_stack_offsets *offsets;
26938 unsigned long func_type;
26939 int regno;
26940 unsigned long live_regs_mask;
26941 unsigned long l_mask;
26942 unsigned high_regs_pushed = 0;
26944 func_type = arm_current_func_type ();
26946 /* Naked functions don't have prologues. */
26947 if (IS_NAKED (func_type))
26948 return;
26950 if (IS_INTERRUPT (func_type))
26952 error ("interrupt Service Routines cannot be coded in Thumb mode");
26953 return;
26956 if (is_called_in_ARM_mode (current_function_decl))
26957 emit_insn (gen_prologue_thumb1_interwork ());
26959 offsets = arm_get_frame_offsets ();
26960 live_regs_mask = offsets->saved_regs_mask;
26962 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26963 l_mask = live_regs_mask & 0x40ff;
26964 /* Then count how many other high registers will need to be pushed. */
26965 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26967 if (crtl->args.pretend_args_size)
26969 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26971 if (cfun->machine->uses_anonymous_args)
26973 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26974 unsigned long mask;
26976 mask = 1ul << (LAST_ARG_REGNUM + 1);
26977 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26979 insn = thumb1_emit_multi_reg_push (mask, 0);
26981 else
26983 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26984 stack_pointer_rtx, x));
26986 RTX_FRAME_RELATED_P (insn) = 1;
26989 if (TARGET_BACKTRACE)
26991 HOST_WIDE_INT offset = 0;
26992 unsigned work_register;
26993 rtx work_reg, x, arm_hfp_rtx;
26995 /* We have been asked to create a stack backtrace structure.
26996 The code looks like this:
26998 0 .align 2
26999 0 func:
27000 0 sub SP, #16 Reserve space for 4 registers.
27001 2 push {R7} Push low registers.
27002 4 add R7, SP, #20 Get the stack pointer before the push.
27003 6 str R7, [SP, #8] Store the stack pointer
27004 (before reserving the space).
27005 8 mov R7, PC Get hold of the start of this code + 12.
27006 10 str R7, [SP, #16] Store it.
27007 12 mov R7, FP Get hold of the current frame pointer.
27008 14 str R7, [SP, #4] Store it.
27009 16 mov R7, LR Get hold of the current return address.
27010 18 str R7, [SP, #12] Store it.
27011 20 add R7, SP, #16 Point at the start of the
27012 backtrace structure.
27013 22 mov FP, R7 Put this value into the frame pointer. */
27015 work_register = thumb_find_work_register (live_regs_mask);
27016 work_reg = gen_rtx_REG (SImode, work_register);
27017 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27019 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27020 stack_pointer_rtx, GEN_INT (-16)));
27021 RTX_FRAME_RELATED_P (insn) = 1;
27023 if (l_mask)
27025 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27026 RTX_FRAME_RELATED_P (insn) = 1;
27028 offset = bit_count (l_mask) * UNITS_PER_WORD;
27031 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27032 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27034 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27035 x = gen_frame_mem (SImode, x);
27036 emit_move_insn (x, work_reg);
27038 /* Make sure that the instruction fetching the PC is in the right place
27039 to calculate "start of backtrace creation code + 12". */
27040 /* ??? The stores using the common WORK_REG ought to be enough to
27041 prevent the scheduler from doing anything weird. Failing that
27042 we could always move all of the following into an UNSPEC_VOLATILE. */
27043 if (l_mask)
27045 x = gen_rtx_REG (SImode, PC_REGNUM);
27046 emit_move_insn (work_reg, x);
27048 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27049 x = gen_frame_mem (SImode, x);
27050 emit_move_insn (x, work_reg);
27052 emit_move_insn (work_reg, arm_hfp_rtx);
27054 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27055 x = gen_frame_mem (SImode, x);
27056 emit_move_insn (x, work_reg);
27058 else
27060 emit_move_insn (work_reg, arm_hfp_rtx);
27062 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27063 x = gen_frame_mem (SImode, x);
27064 emit_move_insn (x, work_reg);
27066 x = gen_rtx_REG (SImode, PC_REGNUM);
27067 emit_move_insn (work_reg, x);
27069 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27070 x = gen_frame_mem (SImode, x);
27071 emit_move_insn (x, work_reg);
27074 x = gen_rtx_REG (SImode, LR_REGNUM);
27075 emit_move_insn (work_reg, x);
27077 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27078 x = gen_frame_mem (SImode, x);
27079 emit_move_insn (x, work_reg);
27081 x = GEN_INT (offset + 12);
27082 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27084 emit_move_insn (arm_hfp_rtx, work_reg);
27086 /* Optimization: If we are not pushing any low registers but we are going
27087 to push some high registers then delay our first push. This will just
27088 be a push of LR and we can combine it with the push of the first high
27089 register. */
27090 else if ((l_mask & 0xff) != 0
27091 || (high_regs_pushed == 0 && l_mask))
27093 unsigned long mask = l_mask;
27094 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27095 insn = thumb1_emit_multi_reg_push (mask, mask);
27096 RTX_FRAME_RELATED_P (insn) = 1;
27099 if (high_regs_pushed)
27101 unsigned pushable_regs;
27102 unsigned next_hi_reg;
27103 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27104 : crtl->args.info.nregs;
27105 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27107 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27108 if (live_regs_mask & (1 << next_hi_reg))
27109 break;
27111 /* Here we need to mask out registers used for passing arguments
27112 even if they can be pushed. This is to avoid using them to stash the high
27113 registers. Such kind of stash may clobber the use of arguments. */
27114 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27116 if (pushable_regs == 0)
27117 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27119 while (high_regs_pushed > 0)
27121 unsigned long real_regs_mask = 0;
27123 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27125 if (pushable_regs & (1 << regno))
27127 emit_move_insn (gen_rtx_REG (SImode, regno),
27128 gen_rtx_REG (SImode, next_hi_reg));
27130 high_regs_pushed --;
27131 real_regs_mask |= (1 << next_hi_reg);
27133 if (high_regs_pushed)
27135 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27136 next_hi_reg --)
27137 if (live_regs_mask & (1 << next_hi_reg))
27138 break;
27140 else
27142 pushable_regs &= ~((1 << regno) - 1);
27143 break;
27148 /* If we had to find a work register and we have not yet
27149 saved the LR then add it to the list of regs to push. */
27150 if (l_mask == (1 << LR_REGNUM))
27152 pushable_regs |= l_mask;
27153 real_regs_mask |= l_mask;
27154 l_mask = 0;
27157 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27158 RTX_FRAME_RELATED_P (insn) = 1;
27162 /* Load the pic register before setting the frame pointer,
27163 so we can use r7 as a temporary work register. */
27164 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27165 arm_load_pic_register (live_regs_mask);
27167 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27168 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27169 stack_pointer_rtx);
27171 if (flag_stack_usage_info)
27172 current_function_static_stack_size
27173 = offsets->outgoing_args - offsets->saved_args;
27175 amount = offsets->outgoing_args - offsets->saved_regs;
27176 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27177 if (amount)
27179 if (amount < 512)
27181 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27182 GEN_INT (- amount)));
27183 RTX_FRAME_RELATED_P (insn) = 1;
27185 else
27187 rtx reg, dwarf;
27189 /* The stack decrement is too big for an immediate value in a single
27190 insn. In theory we could issue multiple subtracts, but after
27191 three of them it becomes more space efficient to place the full
27192 value in the constant pool and load into a register. (Also the
27193 ARM debugger really likes to see only one stack decrement per
27194 function). So instead we look for a scratch register into which
27195 we can load the decrement, and then we subtract this from the
27196 stack pointer. Unfortunately on the thumb the only available
27197 scratch registers are the argument registers, and we cannot use
27198 these as they may hold arguments to the function. Instead we
27199 attempt to locate a call preserved register which is used by this
27200 function. If we can find one, then we know that it will have
27201 been pushed at the start of the prologue and so we can corrupt
27202 it now. */
27203 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27204 if (live_regs_mask & (1 << regno))
27205 break;
27207 gcc_assert(regno <= LAST_LO_REGNUM);
27209 reg = gen_rtx_REG (SImode, regno);
27211 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27213 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27214 stack_pointer_rtx, reg));
27216 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27217 plus_constant (Pmode, stack_pointer_rtx,
27218 -amount));
27219 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27220 RTX_FRAME_RELATED_P (insn) = 1;
27224 if (frame_pointer_needed)
27225 thumb_set_frame_pointer (offsets);
27227 /* If we are profiling, make sure no instructions are scheduled before
27228 the call to mcount. Similarly if the user has requested no
27229 scheduling in the prolog. Similarly if we want non-call exceptions
27230 using the EABI unwinder, to prevent faulting instructions from being
27231 swapped with a stack adjustment. */
27232 if (crtl->profile || !TARGET_SCHED_PROLOG
27233 || (arm_except_unwind_info (&global_options) == UI_TARGET
27234 && cfun->can_throw_non_call_exceptions))
27235 emit_insn (gen_blockage ());
27237 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27238 if (live_regs_mask & 0xff)
27239 cfun->machine->lr_save_eliminated = 0;
27242 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27243 POP instruction can be generated. LR should be replaced by PC. All
27244 the checks required are already done by USE_RETURN_INSN (). Hence,
27245 all we really need to check here is if single register is to be
27246 returned, or multiple register return. */
27247 void
27248 thumb2_expand_return (bool simple_return)
27250 int i, num_regs;
27251 unsigned long saved_regs_mask;
27252 arm_stack_offsets *offsets;
27254 offsets = arm_get_frame_offsets ();
27255 saved_regs_mask = offsets->saved_regs_mask;
27257 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27258 if (saved_regs_mask & (1 << i))
27259 num_regs++;
27261 if (!simple_return && saved_regs_mask)
27263 if (num_regs == 1)
27265 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27266 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27267 rtx addr = gen_rtx_MEM (SImode,
27268 gen_rtx_POST_INC (SImode,
27269 stack_pointer_rtx));
27270 set_mem_alias_set (addr, get_frame_alias_set ());
27271 XVECEXP (par, 0, 0) = ret_rtx;
27272 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27273 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27274 emit_jump_insn (par);
27276 else
27278 saved_regs_mask &= ~ (1 << LR_REGNUM);
27279 saved_regs_mask |= (1 << PC_REGNUM);
27280 arm_emit_multi_reg_pop (saved_regs_mask);
27283 else
27285 emit_jump_insn (simple_return_rtx);
27289 void
27290 thumb1_expand_epilogue (void)
27292 HOST_WIDE_INT amount;
27293 arm_stack_offsets *offsets;
27294 int regno;
27296 /* Naked functions don't have prologues. */
27297 if (IS_NAKED (arm_current_func_type ()))
27298 return;
27300 offsets = arm_get_frame_offsets ();
27301 amount = offsets->outgoing_args - offsets->saved_regs;
27303 if (frame_pointer_needed)
27305 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27306 amount = offsets->locals_base - offsets->saved_regs;
27308 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27310 gcc_assert (amount >= 0);
27311 if (amount)
27313 emit_insn (gen_blockage ());
27315 if (amount < 512)
27316 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27317 GEN_INT (amount)));
27318 else
27320 /* r3 is always free in the epilogue. */
27321 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27323 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27324 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27328 /* Emit a USE (stack_pointer_rtx), so that
27329 the stack adjustment will not be deleted. */
27330 emit_insn (gen_force_register_use (stack_pointer_rtx));
27332 if (crtl->profile || !TARGET_SCHED_PROLOG)
27333 emit_insn (gen_blockage ());
27335 /* Emit a clobber for each insn that will be restored in the epilogue,
27336 so that flow2 will get register lifetimes correct. */
27337 for (regno = 0; regno < 13; regno++)
27338 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27339 emit_clobber (gen_rtx_REG (SImode, regno));
27341 if (! df_regs_ever_live_p (LR_REGNUM))
27342 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27345 /* Epilogue code for APCS frame. */
27346 static void
27347 arm_expand_epilogue_apcs_frame (bool really_return)
27349 unsigned long func_type;
27350 unsigned long saved_regs_mask;
27351 int num_regs = 0;
27352 int i;
27353 int floats_from_frame = 0;
27354 arm_stack_offsets *offsets;
27356 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27357 func_type = arm_current_func_type ();
27359 /* Get frame offsets for ARM. */
27360 offsets = arm_get_frame_offsets ();
27361 saved_regs_mask = offsets->saved_regs_mask;
27363 /* Find the offset of the floating-point save area in the frame. */
27364 floats_from_frame
27365 = (offsets->saved_args
27366 + arm_compute_static_chain_stack_bytes ()
27367 - offsets->frame);
27369 /* Compute how many core registers saved and how far away the floats are. */
27370 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27371 if (saved_regs_mask & (1 << i))
27373 num_regs++;
27374 floats_from_frame += 4;
27377 if (TARGET_HARD_FLOAT && TARGET_VFP)
27379 int start_reg;
27380 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27382 /* The offset is from IP_REGNUM. */
27383 int saved_size = arm_get_vfp_saved_size ();
27384 if (saved_size > 0)
27386 rtx insn;
27387 floats_from_frame += saved_size;
27388 insn = emit_insn (gen_addsi3 (ip_rtx,
27389 hard_frame_pointer_rtx,
27390 GEN_INT (-floats_from_frame)));
27391 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27392 ip_rtx, hard_frame_pointer_rtx);
27395 /* Generate VFP register multi-pop. */
27396 start_reg = FIRST_VFP_REGNUM;
27398 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27399 /* Look for a case where a reg does not need restoring. */
27400 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27401 && (!df_regs_ever_live_p (i + 1)
27402 || call_used_regs[i + 1]))
27404 if (start_reg != i)
27405 arm_emit_vfp_multi_reg_pop (start_reg,
27406 (i - start_reg) / 2,
27407 gen_rtx_REG (SImode,
27408 IP_REGNUM));
27409 start_reg = i + 2;
27412 /* Restore the remaining regs that we have discovered (or possibly
27413 even all of them, if the conditional in the for loop never
27414 fired). */
27415 if (start_reg != i)
27416 arm_emit_vfp_multi_reg_pop (start_reg,
27417 (i - start_reg) / 2,
27418 gen_rtx_REG (SImode, IP_REGNUM));
27421 if (TARGET_IWMMXT)
27423 /* The frame pointer is guaranteed to be non-double-word aligned, as
27424 it is set to double-word-aligned old_stack_pointer - 4. */
27425 rtx insn;
27426 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27428 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27429 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27431 rtx addr = gen_frame_mem (V2SImode,
27432 plus_constant (Pmode, hard_frame_pointer_rtx,
27433 - lrm_count * 4));
27434 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27435 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27436 gen_rtx_REG (V2SImode, i),
27437 NULL_RTX);
27438 lrm_count += 2;
27442 /* saved_regs_mask should contain IP which contains old stack pointer
27443 at the time of activation creation. Since SP and IP are adjacent registers,
27444 we can restore the value directly into SP. */
27445 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27446 saved_regs_mask &= ~(1 << IP_REGNUM);
27447 saved_regs_mask |= (1 << SP_REGNUM);
27449 /* There are two registers left in saved_regs_mask - LR and PC. We
27450 only need to restore LR (the return address), but to
27451 save time we can load it directly into PC, unless we need a
27452 special function exit sequence, or we are not really returning. */
27453 if (really_return
27454 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27455 && !crtl->calls_eh_return)
27456 /* Delete LR from the register mask, so that LR on
27457 the stack is loaded into the PC in the register mask. */
27458 saved_regs_mask &= ~(1 << LR_REGNUM);
27459 else
27460 saved_regs_mask &= ~(1 << PC_REGNUM);
27462 num_regs = bit_count (saved_regs_mask);
27463 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27465 rtx insn;
27466 emit_insn (gen_blockage ());
27467 /* Unwind the stack to just below the saved registers. */
27468 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27469 hard_frame_pointer_rtx,
27470 GEN_INT (- 4 * num_regs)));
27472 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27473 stack_pointer_rtx, hard_frame_pointer_rtx);
27476 arm_emit_multi_reg_pop (saved_regs_mask);
27478 if (IS_INTERRUPT (func_type))
27480 /* Interrupt handlers will have pushed the
27481 IP onto the stack, so restore it now. */
27482 rtx insn;
27483 rtx addr = gen_rtx_MEM (SImode,
27484 gen_rtx_POST_INC (SImode,
27485 stack_pointer_rtx));
27486 set_mem_alias_set (addr, get_frame_alias_set ());
27487 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27488 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27489 gen_rtx_REG (SImode, IP_REGNUM),
27490 NULL_RTX);
27493 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27494 return;
27496 if (crtl->calls_eh_return)
27497 emit_insn (gen_addsi3 (stack_pointer_rtx,
27498 stack_pointer_rtx,
27499 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27501 if (IS_STACKALIGN (func_type))
27502 /* Restore the original stack pointer. Before prologue, the stack was
27503 realigned and the original stack pointer saved in r0. For details,
27504 see comment in arm_expand_prologue. */
27505 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27507 emit_jump_insn (simple_return_rtx);
27510 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27511 function is not a sibcall. */
27512 void
27513 arm_expand_epilogue (bool really_return)
27515 unsigned long func_type;
27516 unsigned long saved_regs_mask;
27517 int num_regs = 0;
27518 int i;
27519 int amount;
27520 arm_stack_offsets *offsets;
27522 func_type = arm_current_func_type ();
27524 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27525 let output_return_instruction take care of instruction emission if any. */
27526 if (IS_NAKED (func_type)
27527 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27529 if (really_return)
27530 emit_jump_insn (simple_return_rtx);
27531 return;
27534 /* If we are throwing an exception, then we really must be doing a
27535 return, so we can't tail-call. */
27536 gcc_assert (!crtl->calls_eh_return || really_return);
27538 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27540 arm_expand_epilogue_apcs_frame (really_return);
27541 return;
27544 /* Get frame offsets for ARM. */
27545 offsets = arm_get_frame_offsets ();
27546 saved_regs_mask = offsets->saved_regs_mask;
27547 num_regs = bit_count (saved_regs_mask);
27549 if (frame_pointer_needed)
27551 rtx insn;
27552 /* Restore stack pointer if necessary. */
27553 if (TARGET_ARM)
27555 /* In ARM mode, frame pointer points to first saved register.
27556 Restore stack pointer to last saved register. */
27557 amount = offsets->frame - offsets->saved_regs;
27559 /* Force out any pending memory operations that reference stacked data
27560 before stack de-allocation occurs. */
27561 emit_insn (gen_blockage ());
27562 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27563 hard_frame_pointer_rtx,
27564 GEN_INT (amount)));
27565 arm_add_cfa_adjust_cfa_note (insn, amount,
27566 stack_pointer_rtx,
27567 hard_frame_pointer_rtx);
27569 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27570 deleted. */
27571 emit_insn (gen_force_register_use (stack_pointer_rtx));
27573 else
27575 /* In Thumb-2 mode, the frame pointer points to the last saved
27576 register. */
27577 amount = offsets->locals_base - offsets->saved_regs;
27578 if (amount)
27580 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27581 hard_frame_pointer_rtx,
27582 GEN_INT (amount)));
27583 arm_add_cfa_adjust_cfa_note (insn, amount,
27584 hard_frame_pointer_rtx,
27585 hard_frame_pointer_rtx);
27588 /* Force out any pending memory operations that reference stacked data
27589 before stack de-allocation occurs. */
27590 emit_insn (gen_blockage ());
27591 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27592 hard_frame_pointer_rtx));
27593 arm_add_cfa_adjust_cfa_note (insn, 0,
27594 stack_pointer_rtx,
27595 hard_frame_pointer_rtx);
27596 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27597 deleted. */
27598 emit_insn (gen_force_register_use (stack_pointer_rtx));
27601 else
27603 /* Pop off outgoing args and local frame to adjust stack pointer to
27604 last saved register. */
27605 amount = offsets->outgoing_args - offsets->saved_regs;
27606 if (amount)
27608 rtx tmp;
27609 /* Force out any pending memory operations that reference stacked data
27610 before stack de-allocation occurs. */
27611 emit_insn (gen_blockage ());
27612 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27613 stack_pointer_rtx,
27614 GEN_INT (amount)));
27615 arm_add_cfa_adjust_cfa_note (tmp, amount,
27616 stack_pointer_rtx, stack_pointer_rtx);
27617 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27618 not deleted. */
27619 emit_insn (gen_force_register_use (stack_pointer_rtx));
27623 if (TARGET_HARD_FLOAT && TARGET_VFP)
27625 /* Generate VFP register multi-pop. */
27626 int end_reg = LAST_VFP_REGNUM + 1;
27628 /* Scan the registers in reverse order. We need to match
27629 any groupings made in the prologue and generate matching
27630 vldm operations. The need to match groups is because,
27631 unlike pop, vldm can only do consecutive regs. */
27632 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27633 /* Look for a case where a reg does not need restoring. */
27634 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27635 && (!df_regs_ever_live_p (i + 1)
27636 || call_used_regs[i + 1]))
27638 /* Restore the regs discovered so far (from reg+2 to
27639 end_reg). */
27640 if (end_reg > i + 2)
27641 arm_emit_vfp_multi_reg_pop (i + 2,
27642 (end_reg - (i + 2)) / 2,
27643 stack_pointer_rtx);
27644 end_reg = i;
27647 /* Restore the remaining regs that we have discovered (or possibly
27648 even all of them, if the conditional in the for loop never
27649 fired). */
27650 if (end_reg > i + 2)
27651 arm_emit_vfp_multi_reg_pop (i + 2,
27652 (end_reg - (i + 2)) / 2,
27653 stack_pointer_rtx);
27656 if (TARGET_IWMMXT)
27657 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27658 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27660 rtx insn;
27661 rtx addr = gen_rtx_MEM (V2SImode,
27662 gen_rtx_POST_INC (SImode,
27663 stack_pointer_rtx));
27664 set_mem_alias_set (addr, get_frame_alias_set ());
27665 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27666 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27667 gen_rtx_REG (V2SImode, i),
27668 NULL_RTX);
27669 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27670 stack_pointer_rtx, stack_pointer_rtx);
27673 if (saved_regs_mask)
27675 rtx insn;
27676 bool return_in_pc = false;
27678 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27679 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27680 && !IS_STACKALIGN (func_type)
27681 && really_return
27682 && crtl->args.pretend_args_size == 0
27683 && saved_regs_mask & (1 << LR_REGNUM)
27684 && !crtl->calls_eh_return)
27686 saved_regs_mask &= ~(1 << LR_REGNUM);
27687 saved_regs_mask |= (1 << PC_REGNUM);
27688 return_in_pc = true;
27691 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27693 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27694 if (saved_regs_mask & (1 << i))
27696 rtx addr = gen_rtx_MEM (SImode,
27697 gen_rtx_POST_INC (SImode,
27698 stack_pointer_rtx));
27699 set_mem_alias_set (addr, get_frame_alias_set ());
27701 if (i == PC_REGNUM)
27703 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27704 XVECEXP (insn, 0, 0) = ret_rtx;
27705 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27706 gen_rtx_REG (SImode, i),
27707 addr);
27708 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27709 insn = emit_jump_insn (insn);
27711 else
27713 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27714 addr));
27715 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27716 gen_rtx_REG (SImode, i),
27717 NULL_RTX);
27718 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27719 stack_pointer_rtx,
27720 stack_pointer_rtx);
27724 else
27726 if (TARGET_LDRD
27727 && current_tune->prefer_ldrd_strd
27728 && !optimize_function_for_size_p (cfun))
27730 if (TARGET_THUMB2)
27731 thumb2_emit_ldrd_pop (saved_regs_mask);
27732 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27733 arm_emit_ldrd_pop (saved_regs_mask);
27734 else
27735 arm_emit_multi_reg_pop (saved_regs_mask);
27737 else
27738 arm_emit_multi_reg_pop (saved_regs_mask);
27741 if (return_in_pc == true)
27742 return;
27745 if (crtl->args.pretend_args_size)
27747 int i, j;
27748 rtx dwarf = NULL_RTX;
27749 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27750 stack_pointer_rtx,
27751 GEN_INT (crtl->args.pretend_args_size)));
27753 RTX_FRAME_RELATED_P (tmp) = 1;
27755 if (cfun->machine->uses_anonymous_args)
27757 /* Restore pretend args. Refer arm_expand_prologue on how to save
27758 pretend_args in stack. */
27759 int num_regs = crtl->args.pretend_args_size / 4;
27760 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27761 for (j = 0, i = 0; j < num_regs; i++)
27762 if (saved_regs_mask & (1 << i))
27764 rtx reg = gen_rtx_REG (SImode, i);
27765 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27766 j++;
27768 REG_NOTES (tmp) = dwarf;
27770 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27771 stack_pointer_rtx, stack_pointer_rtx);
27774 if (!really_return)
27775 return;
27777 if (crtl->calls_eh_return)
27778 emit_insn (gen_addsi3 (stack_pointer_rtx,
27779 stack_pointer_rtx,
27780 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27782 if (IS_STACKALIGN (func_type))
27783 /* Restore the original stack pointer. Before prologue, the stack was
27784 realigned and the original stack pointer saved in r0. For details,
27785 see comment in arm_expand_prologue. */
27786 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27788 emit_jump_insn (simple_return_rtx);
27791 /* Implementation of insn prologue_thumb1_interwork. This is the first
27792 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27794 const char *
27795 thumb1_output_interwork (void)
27797 const char * name;
27798 FILE *f = asm_out_file;
27800 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27801 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27802 == SYMBOL_REF);
27803 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27805 /* Generate code sequence to switch us into Thumb mode. */
27806 /* The .code 32 directive has already been emitted by
27807 ASM_DECLARE_FUNCTION_NAME. */
27808 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27809 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27811 /* Generate a label, so that the debugger will notice the
27812 change in instruction sets. This label is also used by
27813 the assembler to bypass the ARM code when this function
27814 is called from a Thumb encoded function elsewhere in the
27815 same file. Hence the definition of STUB_NAME here must
27816 agree with the definition in gas/config/tc-arm.c. */
27818 #define STUB_NAME ".real_start_of"
27820 fprintf (f, "\t.code\t16\n");
27821 #ifdef ARM_PE
27822 if (arm_dllexport_name_p (name))
27823 name = arm_strip_name_encoding (name);
27824 #endif
27825 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27826 fprintf (f, "\t.thumb_func\n");
27827 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27829 return "";
27832 /* Handle the case of a double word load into a low register from
27833 a computed memory address. The computed address may involve a
27834 register which is overwritten by the load. */
27835 const char *
27836 thumb_load_double_from_address (rtx *operands)
27838 rtx addr;
27839 rtx base;
27840 rtx offset;
27841 rtx arg1;
27842 rtx arg2;
27844 gcc_assert (REG_P (operands[0]));
27845 gcc_assert (MEM_P (operands[1]));
27847 /* Get the memory address. */
27848 addr = XEXP (operands[1], 0);
27850 /* Work out how the memory address is computed. */
27851 switch (GET_CODE (addr))
27853 case REG:
27854 operands[2] = adjust_address (operands[1], SImode, 4);
27856 if (REGNO (operands[0]) == REGNO (addr))
27858 output_asm_insn ("ldr\t%H0, %2", operands);
27859 output_asm_insn ("ldr\t%0, %1", operands);
27861 else
27863 output_asm_insn ("ldr\t%0, %1", operands);
27864 output_asm_insn ("ldr\t%H0, %2", operands);
27866 break;
27868 case CONST:
27869 /* Compute <address> + 4 for the high order load. */
27870 operands[2] = adjust_address (operands[1], SImode, 4);
27872 output_asm_insn ("ldr\t%0, %1", operands);
27873 output_asm_insn ("ldr\t%H0, %2", operands);
27874 break;
27876 case PLUS:
27877 arg1 = XEXP (addr, 0);
27878 arg2 = XEXP (addr, 1);
27880 if (CONSTANT_P (arg1))
27881 base = arg2, offset = arg1;
27882 else
27883 base = arg1, offset = arg2;
27885 gcc_assert (REG_P (base));
27887 /* Catch the case of <address> = <reg> + <reg> */
27888 if (REG_P (offset))
27890 int reg_offset = REGNO (offset);
27891 int reg_base = REGNO (base);
27892 int reg_dest = REGNO (operands[0]);
27894 /* Add the base and offset registers together into the
27895 higher destination register. */
27896 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27897 reg_dest + 1, reg_base, reg_offset);
27899 /* Load the lower destination register from the address in
27900 the higher destination register. */
27901 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27902 reg_dest, reg_dest + 1);
27904 /* Load the higher destination register from its own address
27905 plus 4. */
27906 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27907 reg_dest + 1, reg_dest + 1);
27909 else
27911 /* Compute <address> + 4 for the high order load. */
27912 operands[2] = adjust_address (operands[1], SImode, 4);
27914 /* If the computed address is held in the low order register
27915 then load the high order register first, otherwise always
27916 load the low order register first. */
27917 if (REGNO (operands[0]) == REGNO (base))
27919 output_asm_insn ("ldr\t%H0, %2", operands);
27920 output_asm_insn ("ldr\t%0, %1", operands);
27922 else
27924 output_asm_insn ("ldr\t%0, %1", operands);
27925 output_asm_insn ("ldr\t%H0, %2", operands);
27928 break;
27930 case LABEL_REF:
27931 /* With no registers to worry about we can just load the value
27932 directly. */
27933 operands[2] = adjust_address (operands[1], SImode, 4);
27935 output_asm_insn ("ldr\t%H0, %2", operands);
27936 output_asm_insn ("ldr\t%0, %1", operands);
27937 break;
27939 default:
27940 gcc_unreachable ();
27943 return "";
27946 const char *
27947 thumb_output_move_mem_multiple (int n, rtx *operands)
27949 rtx tmp;
27951 switch (n)
27953 case 2:
27954 if (REGNO (operands[4]) > REGNO (operands[5]))
27956 tmp = operands[4];
27957 operands[4] = operands[5];
27958 operands[5] = tmp;
27960 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27961 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27962 break;
27964 case 3:
27965 if (REGNO (operands[4]) > REGNO (operands[5]))
27967 tmp = operands[4];
27968 operands[4] = operands[5];
27969 operands[5] = tmp;
27971 if (REGNO (operands[5]) > REGNO (operands[6]))
27973 tmp = operands[5];
27974 operands[5] = operands[6];
27975 operands[6] = tmp;
27977 if (REGNO (operands[4]) > REGNO (operands[5]))
27979 tmp = operands[4];
27980 operands[4] = operands[5];
27981 operands[5] = tmp;
27984 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27985 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27986 break;
27988 default:
27989 gcc_unreachable ();
27992 return "";
27995 /* Output a call-via instruction for thumb state. */
27996 const char *
27997 thumb_call_via_reg (rtx reg)
27999 int regno = REGNO (reg);
28000 rtx *labelp;
28002 gcc_assert (regno < LR_REGNUM);
28004 /* If we are in the normal text section we can use a single instance
28005 per compilation unit. If we are doing function sections, then we need
28006 an entry per section, since we can't rely on reachability. */
28007 if (in_section == text_section)
28009 thumb_call_reg_needed = 1;
28011 if (thumb_call_via_label[regno] == NULL)
28012 thumb_call_via_label[regno] = gen_label_rtx ();
28013 labelp = thumb_call_via_label + regno;
28015 else
28017 if (cfun->machine->call_via[regno] == NULL)
28018 cfun->machine->call_via[regno] = gen_label_rtx ();
28019 labelp = cfun->machine->call_via + regno;
28022 output_asm_insn ("bl\t%a0", labelp);
28023 return "";
28026 /* Routines for generating rtl. */
28027 void
28028 thumb_expand_movmemqi (rtx *operands)
28030 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28031 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28032 HOST_WIDE_INT len = INTVAL (operands[2]);
28033 HOST_WIDE_INT offset = 0;
28035 while (len >= 12)
28037 emit_insn (gen_movmem12b (out, in, out, in));
28038 len -= 12;
28041 if (len >= 8)
28043 emit_insn (gen_movmem8b (out, in, out, in));
28044 len -= 8;
28047 if (len >= 4)
28049 rtx reg = gen_reg_rtx (SImode);
28050 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28051 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28052 len -= 4;
28053 offset += 4;
28056 if (len >= 2)
28058 rtx reg = gen_reg_rtx (HImode);
28059 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28060 plus_constant (Pmode, in,
28061 offset))));
28062 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28063 offset)),
28064 reg));
28065 len -= 2;
28066 offset += 2;
28069 if (len)
28071 rtx reg = gen_reg_rtx (QImode);
28072 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28073 plus_constant (Pmode, in,
28074 offset))));
28075 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28076 offset)),
28077 reg));
28081 void
28082 thumb_reload_out_hi (rtx *operands)
28084 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28087 /* Handle reading a half-word from memory during reload. */
28088 void
28089 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28091 gcc_unreachable ();
28094 /* Return the length of a function name prefix
28095 that starts with the character 'c'. */
28096 static int
28097 arm_get_strip_length (int c)
28099 switch (c)
28101 ARM_NAME_ENCODING_LENGTHS
28102 default: return 0;
28106 /* Return a pointer to a function's name with any
28107 and all prefix encodings stripped from it. */
28108 const char *
28109 arm_strip_name_encoding (const char *name)
28111 int skip;
28113 while ((skip = arm_get_strip_length (* name)))
28114 name += skip;
28116 return name;
28119 /* If there is a '*' anywhere in the name's prefix, then
28120 emit the stripped name verbatim, otherwise prepend an
28121 underscore if leading underscores are being used. */
28122 void
28123 arm_asm_output_labelref (FILE *stream, const char *name)
28125 int skip;
28126 int verbatim = 0;
28128 while ((skip = arm_get_strip_length (* name)))
28130 verbatim |= (*name == '*');
28131 name += skip;
28134 if (verbatim)
28135 fputs (name, stream);
28136 else
28137 asm_fprintf (stream, "%U%s", name);
28140 /* This function is used to emit an EABI tag and its associated value.
28141 We emit the numerical value of the tag in case the assembler does not
28142 support textual tags. (Eg gas prior to 2.20). If requested we include
28143 the tag name in a comment so that anyone reading the assembler output
28144 will know which tag is being set.
28146 This function is not static because arm-c.c needs it too. */
28148 void
28149 arm_emit_eabi_attribute (const char *name, int num, int val)
28151 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28152 if (flag_verbose_asm || flag_debug_asm)
28153 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28154 asm_fprintf (asm_out_file, "\n");
28157 static void
28158 arm_file_start (void)
28160 int val;
28162 if (TARGET_UNIFIED_ASM)
28163 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28165 if (TARGET_BPABI)
28167 const char *fpu_name;
28168 if (arm_selected_arch)
28170 /* armv7ve doesn't support any extensions. */
28171 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28173 /* Keep backward compatability for assemblers
28174 which don't support armv7ve. */
28175 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28176 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28177 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28178 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28179 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28181 else
28183 const char* pos = strchr (arm_selected_arch->name, '+');
28184 if (pos)
28186 char buf[15];
28187 gcc_assert (strlen (arm_selected_arch->name)
28188 <= sizeof (buf) / sizeof (*pos));
28189 strncpy (buf, arm_selected_arch->name,
28190 (pos - arm_selected_arch->name) * sizeof (*pos));
28191 buf[pos - arm_selected_arch->name] = '\0';
28192 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28193 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28195 else
28196 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28199 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28200 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28201 else
28203 const char* truncated_name
28204 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28205 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28208 if (TARGET_SOFT_FLOAT)
28210 fpu_name = "softvfp";
28212 else
28214 fpu_name = arm_fpu_desc->name;
28215 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28217 if (TARGET_HARD_FLOAT)
28218 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28219 if (TARGET_HARD_FLOAT_ABI)
28220 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28223 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28225 /* Some of these attributes only apply when the corresponding features
28226 are used. However we don't have any easy way of figuring this out.
28227 Conservatively record the setting that would have been used. */
28229 if (flag_rounding_math)
28230 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28232 if (!flag_unsafe_math_optimizations)
28234 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28235 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28237 if (flag_signaling_nans)
28238 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28240 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28241 flag_finite_math_only ? 1 : 3);
28243 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28244 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28245 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28246 flag_short_enums ? 1 : 2);
28248 /* Tag_ABI_optimization_goals. */
28249 if (optimize_size)
28250 val = 4;
28251 else if (optimize >= 2)
28252 val = 2;
28253 else if (optimize)
28254 val = 1;
28255 else
28256 val = 6;
28257 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28259 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28260 unaligned_access);
28262 if (arm_fp16_format)
28263 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28264 (int) arm_fp16_format);
28266 if (arm_lang_output_object_attributes_hook)
28267 arm_lang_output_object_attributes_hook();
28270 default_file_start ();
28273 static void
28274 arm_file_end (void)
28276 int regno;
28278 if (NEED_INDICATE_EXEC_STACK)
28279 /* Add .note.GNU-stack. */
28280 file_end_indicate_exec_stack ();
28282 if (! thumb_call_reg_needed)
28283 return;
28285 switch_to_section (text_section);
28286 asm_fprintf (asm_out_file, "\t.code 16\n");
28287 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28289 for (regno = 0; regno < LR_REGNUM; regno++)
28291 rtx label = thumb_call_via_label[regno];
28293 if (label != 0)
28295 targetm.asm_out.internal_label (asm_out_file, "L",
28296 CODE_LABEL_NUMBER (label));
28297 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28302 #ifndef ARM_PE
28303 /* Symbols in the text segment can be accessed without indirecting via the
28304 constant pool; it may take an extra binary operation, but this is still
28305 faster than indirecting via memory. Don't do this when not optimizing,
28306 since we won't be calculating al of the offsets necessary to do this
28307 simplification. */
28309 static void
28310 arm_encode_section_info (tree decl, rtx rtl, int first)
28312 if (optimize > 0 && TREE_CONSTANT (decl))
28313 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28315 default_encode_section_info (decl, rtl, first);
28317 #endif /* !ARM_PE */
28319 static void
28320 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28322 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28323 && !strcmp (prefix, "L"))
28325 arm_ccfsm_state = 0;
28326 arm_target_insn = NULL;
28328 default_internal_label (stream, prefix, labelno);
28331 /* Output code to add DELTA to the first argument, and then jump
28332 to FUNCTION. Used for C++ multiple inheritance. */
28333 static void
28334 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28335 HOST_WIDE_INT delta,
28336 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28337 tree function)
28339 static int thunk_label = 0;
28340 char label[256];
28341 char labelpc[256];
28342 int mi_delta = delta;
28343 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28344 int shift = 0;
28345 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28346 ? 1 : 0);
28347 if (mi_delta < 0)
28348 mi_delta = - mi_delta;
28350 final_start_function (emit_barrier (), file, 1);
28352 if (TARGET_THUMB1)
28354 int labelno = thunk_label++;
28355 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28356 /* Thunks are entered in arm mode when avaiable. */
28357 if (TARGET_THUMB1_ONLY)
28359 /* push r3 so we can use it as a temporary. */
28360 /* TODO: Omit this save if r3 is not used. */
28361 fputs ("\tpush {r3}\n", file);
28362 fputs ("\tldr\tr3, ", file);
28364 else
28366 fputs ("\tldr\tr12, ", file);
28368 assemble_name (file, label);
28369 fputc ('\n', file);
28370 if (flag_pic)
28372 /* If we are generating PIC, the ldr instruction below loads
28373 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28374 the address of the add + 8, so we have:
28376 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28377 = target + 1.
28379 Note that we have "+ 1" because some versions of GNU ld
28380 don't set the low bit of the result for R_ARM_REL32
28381 relocations against thumb function symbols.
28382 On ARMv6M this is +4, not +8. */
28383 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28384 assemble_name (file, labelpc);
28385 fputs (":\n", file);
28386 if (TARGET_THUMB1_ONLY)
28388 /* This is 2 insns after the start of the thunk, so we know it
28389 is 4-byte aligned. */
28390 fputs ("\tadd\tr3, pc, r3\n", file);
28391 fputs ("\tmov r12, r3\n", file);
28393 else
28394 fputs ("\tadd\tr12, pc, r12\n", file);
28396 else if (TARGET_THUMB1_ONLY)
28397 fputs ("\tmov r12, r3\n", file);
28399 if (TARGET_THUMB1_ONLY)
28401 if (mi_delta > 255)
28403 fputs ("\tldr\tr3, ", file);
28404 assemble_name (file, label);
28405 fputs ("+4\n", file);
28406 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28407 mi_op, this_regno, this_regno);
28409 else if (mi_delta != 0)
28411 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28412 mi_op, this_regno, this_regno,
28413 mi_delta);
28416 else
28418 /* TODO: Use movw/movt for large constants when available. */
28419 while (mi_delta != 0)
28421 if ((mi_delta & (3 << shift)) == 0)
28422 shift += 2;
28423 else
28425 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28426 mi_op, this_regno, this_regno,
28427 mi_delta & (0xff << shift));
28428 mi_delta &= ~(0xff << shift);
28429 shift += 8;
28433 if (TARGET_THUMB1)
28435 if (TARGET_THUMB1_ONLY)
28436 fputs ("\tpop\t{r3}\n", file);
28438 fprintf (file, "\tbx\tr12\n");
28439 ASM_OUTPUT_ALIGN (file, 2);
28440 assemble_name (file, label);
28441 fputs (":\n", file);
28442 if (flag_pic)
28444 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28445 rtx tem = XEXP (DECL_RTL (function), 0);
28446 tem = plus_constant (GET_MODE (tem), tem, -7);
28447 tem = gen_rtx_MINUS (GET_MODE (tem),
28448 tem,
28449 gen_rtx_SYMBOL_REF (Pmode,
28450 ggc_strdup (labelpc)));
28451 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28453 else
28454 /* Output ".word .LTHUNKn". */
28455 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28457 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28458 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28460 else
28462 fputs ("\tb\t", file);
28463 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28464 if (NEED_PLT_RELOC)
28465 fputs ("(PLT)", file);
28466 fputc ('\n', file);
28469 final_end_function ();
28473 arm_emit_vector_const (FILE *file, rtx x)
28475 int i;
28476 const char * pattern;
28478 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28480 switch (GET_MODE (x))
28482 case V2SImode: pattern = "%08x"; break;
28483 case V4HImode: pattern = "%04x"; break;
28484 case V8QImode: pattern = "%02x"; break;
28485 default: gcc_unreachable ();
28488 fprintf (file, "0x");
28489 for (i = CONST_VECTOR_NUNITS (x); i--;)
28491 rtx element;
28493 element = CONST_VECTOR_ELT (x, i);
28494 fprintf (file, pattern, INTVAL (element));
28497 return 1;
28500 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28501 HFmode constant pool entries are actually loaded with ldr. */
28502 void
28503 arm_emit_fp16_const (rtx c)
28505 REAL_VALUE_TYPE r;
28506 long bits;
28508 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28509 bits = real_to_target (NULL, &r, HFmode);
28510 if (WORDS_BIG_ENDIAN)
28511 assemble_zeros (2);
28512 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28513 if (!WORDS_BIG_ENDIAN)
28514 assemble_zeros (2);
28517 const char *
28518 arm_output_load_gr (rtx *operands)
28520 rtx reg;
28521 rtx offset;
28522 rtx wcgr;
28523 rtx sum;
28525 if (!MEM_P (operands [1])
28526 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28527 || !REG_P (reg = XEXP (sum, 0))
28528 || !CONST_INT_P (offset = XEXP (sum, 1))
28529 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28530 return "wldrw%?\t%0, %1";
28532 /* Fix up an out-of-range load of a GR register. */
28533 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28534 wcgr = operands[0];
28535 operands[0] = reg;
28536 output_asm_insn ("ldr%?\t%0, %1", operands);
28538 operands[0] = wcgr;
28539 operands[1] = reg;
28540 output_asm_insn ("tmcr%?\t%0, %1", operands);
28541 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28543 return "";
28546 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28548 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28549 named arg and all anonymous args onto the stack.
28550 XXX I know the prologue shouldn't be pushing registers, but it is faster
28551 that way. */
28553 static void
28554 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28555 enum machine_mode mode,
28556 tree type,
28557 int *pretend_size,
28558 int second_time ATTRIBUTE_UNUSED)
28560 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28561 int nregs;
28563 cfun->machine->uses_anonymous_args = 1;
28564 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28566 nregs = pcum->aapcs_ncrn;
28567 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28568 nregs++;
28570 else
28571 nregs = pcum->nregs;
28573 if (nregs < NUM_ARG_REGS)
28574 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28577 /* We can't rely on the caller doing the proper promotion when
28578 using APCS or ATPCS. */
28580 static bool
28581 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28583 return !TARGET_AAPCS_BASED;
28586 static enum machine_mode
28587 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28588 enum machine_mode mode,
28589 int *punsignedp ATTRIBUTE_UNUSED,
28590 const_tree fntype ATTRIBUTE_UNUSED,
28591 int for_return ATTRIBUTE_UNUSED)
28593 if (GET_MODE_CLASS (mode) == MODE_INT
28594 && GET_MODE_SIZE (mode) < 4)
28595 return SImode;
28597 return mode;
28600 /* AAPCS based ABIs use short enums by default. */
28602 static bool
28603 arm_default_short_enums (void)
28605 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28609 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28611 static bool
28612 arm_align_anon_bitfield (void)
28614 return TARGET_AAPCS_BASED;
28618 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28620 static tree
28621 arm_cxx_guard_type (void)
28623 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28627 /* The EABI says test the least significant bit of a guard variable. */
28629 static bool
28630 arm_cxx_guard_mask_bit (void)
28632 return TARGET_AAPCS_BASED;
28636 /* The EABI specifies that all array cookies are 8 bytes long. */
28638 static tree
28639 arm_get_cookie_size (tree type)
28641 tree size;
28643 if (!TARGET_AAPCS_BASED)
28644 return default_cxx_get_cookie_size (type);
28646 size = build_int_cst (sizetype, 8);
28647 return size;
28651 /* The EABI says that array cookies should also contain the element size. */
28653 static bool
28654 arm_cookie_has_size (void)
28656 return TARGET_AAPCS_BASED;
28660 /* The EABI says constructors and destructors should return a pointer to
28661 the object constructed/destroyed. */
28663 static bool
28664 arm_cxx_cdtor_returns_this (void)
28666 return TARGET_AAPCS_BASED;
28669 /* The EABI says that an inline function may never be the key
28670 method. */
28672 static bool
28673 arm_cxx_key_method_may_be_inline (void)
28675 return !TARGET_AAPCS_BASED;
28678 static void
28679 arm_cxx_determine_class_data_visibility (tree decl)
28681 if (!TARGET_AAPCS_BASED
28682 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28683 return;
28685 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28686 is exported. However, on systems without dynamic vague linkage,
28687 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28688 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28689 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28690 else
28691 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28692 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28695 static bool
28696 arm_cxx_class_data_always_comdat (void)
28698 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28699 vague linkage if the class has no key function. */
28700 return !TARGET_AAPCS_BASED;
28704 /* The EABI says __aeabi_atexit should be used to register static
28705 destructors. */
28707 static bool
28708 arm_cxx_use_aeabi_atexit (void)
28710 return TARGET_AAPCS_BASED;
28714 void
28715 arm_set_return_address (rtx source, rtx scratch)
28717 arm_stack_offsets *offsets;
28718 HOST_WIDE_INT delta;
28719 rtx addr;
28720 unsigned long saved_regs;
28722 offsets = arm_get_frame_offsets ();
28723 saved_regs = offsets->saved_regs_mask;
28725 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28726 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28727 else
28729 if (frame_pointer_needed)
28730 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28731 else
28733 /* LR will be the first saved register. */
28734 delta = offsets->outgoing_args - (offsets->frame + 4);
28737 if (delta >= 4096)
28739 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28740 GEN_INT (delta & ~4095)));
28741 addr = scratch;
28742 delta &= 4095;
28744 else
28745 addr = stack_pointer_rtx;
28747 addr = plus_constant (Pmode, addr, delta);
28749 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28754 void
28755 thumb_set_return_address (rtx source, rtx scratch)
28757 arm_stack_offsets *offsets;
28758 HOST_WIDE_INT delta;
28759 HOST_WIDE_INT limit;
28760 int reg;
28761 rtx addr;
28762 unsigned long mask;
28764 emit_use (source);
28766 offsets = arm_get_frame_offsets ();
28767 mask = offsets->saved_regs_mask;
28768 if (mask & (1 << LR_REGNUM))
28770 limit = 1024;
28771 /* Find the saved regs. */
28772 if (frame_pointer_needed)
28774 delta = offsets->soft_frame - offsets->saved_args;
28775 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28776 if (TARGET_THUMB1)
28777 limit = 128;
28779 else
28781 delta = offsets->outgoing_args - offsets->saved_args;
28782 reg = SP_REGNUM;
28784 /* Allow for the stack frame. */
28785 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28786 delta -= 16;
28787 /* The link register is always the first saved register. */
28788 delta -= 4;
28790 /* Construct the address. */
28791 addr = gen_rtx_REG (SImode, reg);
28792 if (delta > limit)
28794 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28795 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28796 addr = scratch;
28798 else
28799 addr = plus_constant (Pmode, addr, delta);
28801 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28803 else
28804 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28807 /* Implements target hook vector_mode_supported_p. */
28808 bool
28809 arm_vector_mode_supported_p (enum machine_mode mode)
28811 /* Neon also supports V2SImode, etc. listed in the clause below. */
28812 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28813 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28814 return true;
28816 if ((TARGET_NEON || TARGET_IWMMXT)
28817 && ((mode == V2SImode)
28818 || (mode == V4HImode)
28819 || (mode == V8QImode)))
28820 return true;
28822 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28823 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28824 || mode == V2HAmode))
28825 return true;
28827 return false;
28830 /* Implements target hook array_mode_supported_p. */
28832 static bool
28833 arm_array_mode_supported_p (enum machine_mode mode,
28834 unsigned HOST_WIDE_INT nelems)
28836 if (TARGET_NEON
28837 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28838 && (nelems >= 2 && nelems <= 4))
28839 return true;
28841 return false;
28844 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28845 registers when autovectorizing for Neon, at least until multiple vector
28846 widths are supported properly by the middle-end. */
28848 static enum machine_mode
28849 arm_preferred_simd_mode (enum machine_mode mode)
28851 if (TARGET_NEON)
28852 switch (mode)
28854 case SFmode:
28855 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28856 case SImode:
28857 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28858 case HImode:
28859 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28860 case QImode:
28861 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28862 case DImode:
28863 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28864 return V2DImode;
28865 break;
28867 default:;
28870 if (TARGET_REALLY_IWMMXT)
28871 switch (mode)
28873 case SImode:
28874 return V2SImode;
28875 case HImode:
28876 return V4HImode;
28877 case QImode:
28878 return V8QImode;
28880 default:;
28883 return word_mode;
28886 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28888 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28889 using r0-r4 for function arguments, r7 for the stack frame and don't have
28890 enough left over to do doubleword arithmetic. For Thumb-2 all the
28891 potentially problematic instructions accept high registers so this is not
28892 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28893 that require many low registers. */
28894 static bool
28895 arm_class_likely_spilled_p (reg_class_t rclass)
28897 if ((TARGET_THUMB1 && rclass == LO_REGS)
28898 || rclass == CC_REG)
28899 return true;
28901 return false;
28904 /* Implements target hook small_register_classes_for_mode_p. */
28905 bool
28906 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28908 return TARGET_THUMB1;
28911 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28912 ARM insns and therefore guarantee that the shift count is modulo 256.
28913 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28914 guarantee no particular behavior for out-of-range counts. */
28916 static unsigned HOST_WIDE_INT
28917 arm_shift_truncation_mask (enum machine_mode mode)
28919 return mode == SImode ? 255 : 0;
28923 /* Map internal gcc register numbers to DWARF2 register numbers. */
28925 unsigned int
28926 arm_dbx_register_number (unsigned int regno)
28928 if (regno < 16)
28929 return regno;
28931 if (IS_VFP_REGNUM (regno))
28933 /* See comment in arm_dwarf_register_span. */
28934 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28935 return 64 + regno - FIRST_VFP_REGNUM;
28936 else
28937 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28940 if (IS_IWMMXT_GR_REGNUM (regno))
28941 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28943 if (IS_IWMMXT_REGNUM (regno))
28944 return 112 + regno - FIRST_IWMMXT_REGNUM;
28946 gcc_unreachable ();
28949 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28950 GCC models tham as 64 32-bit registers, so we need to describe this to
28951 the DWARF generation code. Other registers can use the default. */
28952 static rtx
28953 arm_dwarf_register_span (rtx rtl)
28955 enum machine_mode mode;
28956 unsigned regno;
28957 rtx parts[16];
28958 int nregs;
28959 int i;
28961 regno = REGNO (rtl);
28962 if (!IS_VFP_REGNUM (regno))
28963 return NULL_RTX;
28965 /* XXX FIXME: The EABI defines two VFP register ranges:
28966 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28967 256-287: D0-D31
28968 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28969 corresponding D register. Until GDB supports this, we shall use the
28970 legacy encodings. We also use these encodings for D0-D15 for
28971 compatibility with older debuggers. */
28972 mode = GET_MODE (rtl);
28973 if (GET_MODE_SIZE (mode) < 8)
28974 return NULL_RTX;
28976 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28978 nregs = GET_MODE_SIZE (mode) / 4;
28979 for (i = 0; i < nregs; i += 2)
28980 if (TARGET_BIG_END)
28982 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28983 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28985 else
28987 parts[i] = gen_rtx_REG (SImode, regno + i);
28988 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28991 else
28993 nregs = GET_MODE_SIZE (mode) / 8;
28994 for (i = 0; i < nregs; i++)
28995 parts[i] = gen_rtx_REG (DImode, regno + i);
28998 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29001 #if ARM_UNWIND_INFO
29002 /* Emit unwind directives for a store-multiple instruction or stack pointer
29003 push during alignment.
29004 These should only ever be generated by the function prologue code, so
29005 expect them to have a particular form.
29006 The store-multiple instruction sometimes pushes pc as the last register,
29007 although it should not be tracked into unwind information, or for -Os
29008 sometimes pushes some dummy registers before first register that needs
29009 to be tracked in unwind information; such dummy registers are there just
29010 to avoid separate stack adjustment, and will not be restored in the
29011 epilogue. */
29013 static void
29014 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29016 int i;
29017 HOST_WIDE_INT offset;
29018 HOST_WIDE_INT nregs;
29019 int reg_size;
29020 unsigned reg;
29021 unsigned lastreg;
29022 unsigned padfirst = 0, padlast = 0;
29023 rtx e;
29025 e = XVECEXP (p, 0, 0);
29026 gcc_assert (GET_CODE (e) == SET);
29028 /* First insn will adjust the stack pointer. */
29029 gcc_assert (GET_CODE (e) == SET
29030 && REG_P (SET_DEST (e))
29031 && REGNO (SET_DEST (e)) == SP_REGNUM
29032 && GET_CODE (SET_SRC (e)) == PLUS);
29034 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29035 nregs = XVECLEN (p, 0) - 1;
29036 gcc_assert (nregs);
29038 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29039 if (reg < 16)
29041 /* For -Os dummy registers can be pushed at the beginning to
29042 avoid separate stack pointer adjustment. */
29043 e = XVECEXP (p, 0, 1);
29044 e = XEXP (SET_DEST (e), 0);
29045 if (GET_CODE (e) == PLUS)
29046 padfirst = INTVAL (XEXP (e, 1));
29047 gcc_assert (padfirst == 0 || optimize_size);
29048 /* The function prologue may also push pc, but not annotate it as it is
29049 never restored. We turn this into a stack pointer adjustment. */
29050 e = XVECEXP (p, 0, nregs);
29051 e = XEXP (SET_DEST (e), 0);
29052 if (GET_CODE (e) == PLUS)
29053 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29054 else
29055 padlast = offset - 4;
29056 gcc_assert (padlast == 0 || padlast == 4);
29057 if (padlast == 4)
29058 fprintf (asm_out_file, "\t.pad #4\n");
29059 reg_size = 4;
29060 fprintf (asm_out_file, "\t.save {");
29062 else if (IS_VFP_REGNUM (reg))
29064 reg_size = 8;
29065 fprintf (asm_out_file, "\t.vsave {");
29067 else
29068 /* Unknown register type. */
29069 gcc_unreachable ();
29071 /* If the stack increment doesn't match the size of the saved registers,
29072 something has gone horribly wrong. */
29073 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29075 offset = padfirst;
29076 lastreg = 0;
29077 /* The remaining insns will describe the stores. */
29078 for (i = 1; i <= nregs; i++)
29080 /* Expect (set (mem <addr>) (reg)).
29081 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29082 e = XVECEXP (p, 0, i);
29083 gcc_assert (GET_CODE (e) == SET
29084 && MEM_P (SET_DEST (e))
29085 && REG_P (SET_SRC (e)));
29087 reg = REGNO (SET_SRC (e));
29088 gcc_assert (reg >= lastreg);
29090 if (i != 1)
29091 fprintf (asm_out_file, ", ");
29092 /* We can't use %r for vfp because we need to use the
29093 double precision register names. */
29094 if (IS_VFP_REGNUM (reg))
29095 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29096 else
29097 asm_fprintf (asm_out_file, "%r", reg);
29099 #ifdef ENABLE_CHECKING
29100 /* Check that the addresses are consecutive. */
29101 e = XEXP (SET_DEST (e), 0);
29102 if (GET_CODE (e) == PLUS)
29103 gcc_assert (REG_P (XEXP (e, 0))
29104 && REGNO (XEXP (e, 0)) == SP_REGNUM
29105 && CONST_INT_P (XEXP (e, 1))
29106 && offset == INTVAL (XEXP (e, 1)));
29107 else
29108 gcc_assert (i == 1
29109 && REG_P (e)
29110 && REGNO (e) == SP_REGNUM);
29111 offset += reg_size;
29112 #endif
29114 fprintf (asm_out_file, "}\n");
29115 if (padfirst)
29116 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29119 /* Emit unwind directives for a SET. */
29121 static void
29122 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29124 rtx e0;
29125 rtx e1;
29126 unsigned reg;
29128 e0 = XEXP (p, 0);
29129 e1 = XEXP (p, 1);
29130 switch (GET_CODE (e0))
29132 case MEM:
29133 /* Pushing a single register. */
29134 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29135 || !REG_P (XEXP (XEXP (e0, 0), 0))
29136 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29137 abort ();
29139 asm_fprintf (asm_out_file, "\t.save ");
29140 if (IS_VFP_REGNUM (REGNO (e1)))
29141 asm_fprintf(asm_out_file, "{d%d}\n",
29142 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29143 else
29144 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29145 break;
29147 case REG:
29148 if (REGNO (e0) == SP_REGNUM)
29150 /* A stack increment. */
29151 if (GET_CODE (e1) != PLUS
29152 || !REG_P (XEXP (e1, 0))
29153 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29154 || !CONST_INT_P (XEXP (e1, 1)))
29155 abort ();
29157 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29158 -INTVAL (XEXP (e1, 1)));
29160 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29162 HOST_WIDE_INT offset;
29164 if (GET_CODE (e1) == PLUS)
29166 if (!REG_P (XEXP (e1, 0))
29167 || !CONST_INT_P (XEXP (e1, 1)))
29168 abort ();
29169 reg = REGNO (XEXP (e1, 0));
29170 offset = INTVAL (XEXP (e1, 1));
29171 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29172 HARD_FRAME_POINTER_REGNUM, reg,
29173 offset);
29175 else if (REG_P (e1))
29177 reg = REGNO (e1);
29178 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29179 HARD_FRAME_POINTER_REGNUM, reg);
29181 else
29182 abort ();
29184 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29186 /* Move from sp to reg. */
29187 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29189 else if (GET_CODE (e1) == PLUS
29190 && REG_P (XEXP (e1, 0))
29191 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29192 && CONST_INT_P (XEXP (e1, 1)))
29194 /* Set reg to offset from sp. */
29195 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29196 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29198 else
29199 abort ();
29200 break;
29202 default:
29203 abort ();
29208 /* Emit unwind directives for the given insn. */
29210 static void
29211 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29213 rtx note, pat;
29214 bool handled_one = false;
29216 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29217 return;
29219 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29220 && (TREE_NOTHROW (current_function_decl)
29221 || crtl->all_throwers_are_sibcalls))
29222 return;
29224 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29225 return;
29227 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29229 switch (REG_NOTE_KIND (note))
29231 case REG_FRAME_RELATED_EXPR:
29232 pat = XEXP (note, 0);
29233 goto found;
29235 case REG_CFA_REGISTER:
29236 pat = XEXP (note, 0);
29237 if (pat == NULL)
29239 pat = PATTERN (insn);
29240 if (GET_CODE (pat) == PARALLEL)
29241 pat = XVECEXP (pat, 0, 0);
29244 /* Only emitted for IS_STACKALIGN re-alignment. */
29246 rtx dest, src;
29247 unsigned reg;
29249 src = SET_SRC (pat);
29250 dest = SET_DEST (pat);
29252 gcc_assert (src == stack_pointer_rtx);
29253 reg = REGNO (dest);
29254 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29255 reg + 0x90, reg);
29257 handled_one = true;
29258 break;
29260 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29261 to get correct dwarf information for shrink-wrap. We should not
29262 emit unwind information for it because these are used either for
29263 pretend arguments or notes to adjust sp and restore registers from
29264 stack. */
29265 case REG_CFA_DEF_CFA:
29266 case REG_CFA_ADJUST_CFA:
29267 case REG_CFA_RESTORE:
29268 return;
29270 case REG_CFA_EXPRESSION:
29271 case REG_CFA_OFFSET:
29272 /* ??? Only handling here what we actually emit. */
29273 gcc_unreachable ();
29275 default:
29276 break;
29279 if (handled_one)
29280 return;
29281 pat = PATTERN (insn);
29282 found:
29284 switch (GET_CODE (pat))
29286 case SET:
29287 arm_unwind_emit_set (asm_out_file, pat);
29288 break;
29290 case SEQUENCE:
29291 /* Store multiple. */
29292 arm_unwind_emit_sequence (asm_out_file, pat);
29293 break;
29295 default:
29296 abort();
29301 /* Output a reference from a function exception table to the type_info
29302 object X. The EABI specifies that the symbol should be relocated by
29303 an R_ARM_TARGET2 relocation. */
29305 static bool
29306 arm_output_ttype (rtx x)
29308 fputs ("\t.word\t", asm_out_file);
29309 output_addr_const (asm_out_file, x);
29310 /* Use special relocations for symbol references. */
29311 if (!CONST_INT_P (x))
29312 fputs ("(TARGET2)", asm_out_file);
29313 fputc ('\n', asm_out_file);
29315 return TRUE;
29318 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29320 static void
29321 arm_asm_emit_except_personality (rtx personality)
29323 fputs ("\t.personality\t", asm_out_file);
29324 output_addr_const (asm_out_file, personality);
29325 fputc ('\n', asm_out_file);
29328 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29330 static void
29331 arm_asm_init_sections (void)
29333 exception_section = get_unnamed_section (0, output_section_asm_op,
29334 "\t.handlerdata");
29336 #endif /* ARM_UNWIND_INFO */
29338 /* Output unwind directives for the start/end of a function. */
29340 void
29341 arm_output_fn_unwind (FILE * f, bool prologue)
29343 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29344 return;
29346 if (prologue)
29347 fputs ("\t.fnstart\n", f);
29348 else
29350 /* If this function will never be unwound, then mark it as such.
29351 The came condition is used in arm_unwind_emit to suppress
29352 the frame annotations. */
29353 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29354 && (TREE_NOTHROW (current_function_decl)
29355 || crtl->all_throwers_are_sibcalls))
29356 fputs("\t.cantunwind\n", f);
29358 fputs ("\t.fnend\n", f);
29362 static bool
29363 arm_emit_tls_decoration (FILE *fp, rtx x)
29365 enum tls_reloc reloc;
29366 rtx val;
29368 val = XVECEXP (x, 0, 0);
29369 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29371 output_addr_const (fp, val);
29373 switch (reloc)
29375 case TLS_GD32:
29376 fputs ("(tlsgd)", fp);
29377 break;
29378 case TLS_LDM32:
29379 fputs ("(tlsldm)", fp);
29380 break;
29381 case TLS_LDO32:
29382 fputs ("(tlsldo)", fp);
29383 break;
29384 case TLS_IE32:
29385 fputs ("(gottpoff)", fp);
29386 break;
29387 case TLS_LE32:
29388 fputs ("(tpoff)", fp);
29389 break;
29390 case TLS_DESCSEQ:
29391 fputs ("(tlsdesc)", fp);
29392 break;
29393 default:
29394 gcc_unreachable ();
29397 switch (reloc)
29399 case TLS_GD32:
29400 case TLS_LDM32:
29401 case TLS_IE32:
29402 case TLS_DESCSEQ:
29403 fputs (" + (. - ", fp);
29404 output_addr_const (fp, XVECEXP (x, 0, 2));
29405 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29406 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29407 output_addr_const (fp, XVECEXP (x, 0, 3));
29408 fputc (')', fp);
29409 break;
29410 default:
29411 break;
29414 return TRUE;
29417 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29419 static void
29420 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29422 gcc_assert (size == 4);
29423 fputs ("\t.word\t", file);
29424 output_addr_const (file, x);
29425 fputs ("(tlsldo)", file);
29428 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29430 static bool
29431 arm_output_addr_const_extra (FILE *fp, rtx x)
29433 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29434 return arm_emit_tls_decoration (fp, x);
29435 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29437 char label[256];
29438 int labelno = INTVAL (XVECEXP (x, 0, 0));
29440 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29441 assemble_name_raw (fp, label);
29443 return TRUE;
29445 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29447 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29448 if (GOT_PCREL)
29449 fputs ("+.", fp);
29450 fputs ("-(", fp);
29451 output_addr_const (fp, XVECEXP (x, 0, 0));
29452 fputc (')', fp);
29453 return TRUE;
29455 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29457 output_addr_const (fp, XVECEXP (x, 0, 0));
29458 if (GOT_PCREL)
29459 fputs ("+.", fp);
29460 fputs ("-(", fp);
29461 output_addr_const (fp, XVECEXP (x, 0, 1));
29462 fputc (')', fp);
29463 return TRUE;
29465 else if (GET_CODE (x) == CONST_VECTOR)
29466 return arm_emit_vector_const (fp, x);
29468 return FALSE;
29471 /* Output assembly for a shift instruction.
29472 SET_FLAGS determines how the instruction modifies the condition codes.
29473 0 - Do not set condition codes.
29474 1 - Set condition codes.
29475 2 - Use smallest instruction. */
29476 const char *
29477 arm_output_shift(rtx * operands, int set_flags)
29479 char pattern[100];
29480 static const char flag_chars[3] = {'?', '.', '!'};
29481 const char *shift;
29482 HOST_WIDE_INT val;
29483 char c;
29485 c = flag_chars[set_flags];
29486 if (TARGET_UNIFIED_ASM)
29488 shift = shift_op(operands[3], &val);
29489 if (shift)
29491 if (val != -1)
29492 operands[2] = GEN_INT(val);
29493 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29495 else
29496 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29498 else
29499 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29500 output_asm_insn (pattern, operands);
29501 return "";
29504 /* Output assembly for a WMMX immediate shift instruction. */
29505 const char *
29506 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29508 int shift = INTVAL (operands[2]);
29509 char templ[50];
29510 enum machine_mode opmode = GET_MODE (operands[0]);
29512 gcc_assert (shift >= 0);
29514 /* If the shift value in the register versions is > 63 (for D qualifier),
29515 31 (for W qualifier) or 15 (for H qualifier). */
29516 if (((opmode == V4HImode) && (shift > 15))
29517 || ((opmode == V2SImode) && (shift > 31))
29518 || ((opmode == DImode) && (shift > 63)))
29520 if (wror_or_wsra)
29522 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29523 output_asm_insn (templ, operands);
29524 if (opmode == DImode)
29526 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29527 output_asm_insn (templ, operands);
29530 else
29532 /* The destination register will contain all zeros. */
29533 sprintf (templ, "wzero\t%%0");
29534 output_asm_insn (templ, operands);
29536 return "";
29539 if ((opmode == DImode) && (shift > 32))
29541 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29542 output_asm_insn (templ, operands);
29543 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29544 output_asm_insn (templ, operands);
29546 else
29548 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29549 output_asm_insn (templ, operands);
29551 return "";
29554 /* Output assembly for a WMMX tinsr instruction. */
29555 const char *
29556 arm_output_iwmmxt_tinsr (rtx *operands)
29558 int mask = INTVAL (operands[3]);
29559 int i;
29560 char templ[50];
29561 int units = mode_nunits[GET_MODE (operands[0])];
29562 gcc_assert ((mask & (mask - 1)) == 0);
29563 for (i = 0; i < units; ++i)
29565 if ((mask & 0x01) == 1)
29567 break;
29569 mask >>= 1;
29571 gcc_assert (i < units);
29573 switch (GET_MODE (operands[0]))
29575 case V8QImode:
29576 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29577 break;
29578 case V4HImode:
29579 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29580 break;
29581 case V2SImode:
29582 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29583 break;
29584 default:
29585 gcc_unreachable ();
29586 break;
29588 output_asm_insn (templ, operands);
29590 return "";
29593 /* Output a Thumb-1 casesi dispatch sequence. */
29594 const char *
29595 thumb1_output_casesi (rtx *operands)
29597 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29599 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29601 switch (GET_MODE(diff_vec))
29603 case QImode:
29604 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29605 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29606 case HImode:
29607 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29608 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29609 case SImode:
29610 return "bl\t%___gnu_thumb1_case_si";
29611 default:
29612 gcc_unreachable ();
29616 /* Output a Thumb-2 casesi instruction. */
29617 const char *
29618 thumb2_output_casesi (rtx *operands)
29620 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29622 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29624 output_asm_insn ("cmp\t%0, %1", operands);
29625 output_asm_insn ("bhi\t%l3", operands);
29626 switch (GET_MODE(diff_vec))
29628 case QImode:
29629 return "tbb\t[%|pc, %0]";
29630 case HImode:
29631 return "tbh\t[%|pc, %0, lsl #1]";
29632 case SImode:
29633 if (flag_pic)
29635 output_asm_insn ("adr\t%4, %l2", operands);
29636 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29637 output_asm_insn ("add\t%4, %4, %5", operands);
29638 return "bx\t%4";
29640 else
29642 output_asm_insn ("adr\t%4, %l2", operands);
29643 return "ldr\t%|pc, [%4, %0, lsl #2]";
29645 default:
29646 gcc_unreachable ();
29650 /* Most ARM cores are single issue, but some newer ones can dual issue.
29651 The scheduler descriptions rely on this being correct. */
29652 static int
29653 arm_issue_rate (void)
29655 switch (arm_tune)
29657 case cortexa15:
29658 case cortexa57:
29659 return 3;
29661 case cortexr4:
29662 case cortexr4f:
29663 case cortexr5:
29664 case genericv7a:
29665 case cortexa5:
29666 case cortexa7:
29667 case cortexa8:
29668 case cortexa9:
29669 case cortexa12:
29670 case cortexa53:
29671 case fa726te:
29672 case marvell_pj4:
29673 return 2;
29675 default:
29676 return 1;
29680 /* A table and a function to perform ARM-specific name mangling for
29681 NEON vector types in order to conform to the AAPCS (see "Procedure
29682 Call Standard for the ARM Architecture", Appendix A). To qualify
29683 for emission with the mangled names defined in that document, a
29684 vector type must not only be of the correct mode but also be
29685 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29686 typedef struct
29688 enum machine_mode mode;
29689 const char *element_type_name;
29690 const char *aapcs_name;
29691 } arm_mangle_map_entry;
29693 static arm_mangle_map_entry arm_mangle_map[] = {
29694 /* 64-bit containerized types. */
29695 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29696 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29697 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29698 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29699 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29700 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29701 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29702 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29703 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29704 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29706 /* 128-bit containerized types. */
29707 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29708 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29709 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29710 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29711 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29712 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29713 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29714 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29715 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29716 { VOIDmode, NULL, NULL }
29719 const char *
29720 arm_mangle_type (const_tree type)
29722 arm_mangle_map_entry *pos = arm_mangle_map;
29724 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29725 has to be managled as if it is in the "std" namespace. */
29726 if (TARGET_AAPCS_BASED
29727 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29728 return "St9__va_list";
29730 /* Half-precision float. */
29731 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29732 return "Dh";
29734 if (TREE_CODE (type) != VECTOR_TYPE)
29735 return NULL;
29737 /* Check the mode of the vector type, and the name of the vector
29738 element type, against the table. */
29739 while (pos->mode != VOIDmode)
29741 tree elt_type = TREE_TYPE (type);
29743 if (pos->mode == TYPE_MODE (type)
29744 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29745 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29746 pos->element_type_name))
29747 return pos->aapcs_name;
29749 pos++;
29752 /* Use the default mangling for unrecognized (possibly user-defined)
29753 vector types. */
29754 return NULL;
29757 /* Order of allocation of core registers for Thumb: this allocation is
29758 written over the corresponding initial entries of the array
29759 initialized with REG_ALLOC_ORDER. We allocate all low registers
29760 first. Saving and restoring a low register is usually cheaper than
29761 using a call-clobbered high register. */
29763 static const int thumb_core_reg_alloc_order[] =
29765 3, 2, 1, 0, 4, 5, 6, 7,
29766 14, 12, 8, 9, 10, 11
29769 /* Adjust register allocation order when compiling for Thumb. */
29771 void
29772 arm_order_regs_for_local_alloc (void)
29774 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29775 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29776 if (TARGET_THUMB)
29777 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29778 sizeof (thumb_core_reg_alloc_order));
29781 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29783 bool
29784 arm_frame_pointer_required (void)
29786 return (cfun->has_nonlocal_label
29787 || SUBTARGET_FRAME_POINTER_REQUIRED
29788 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29791 /* Only thumb1 can't support conditional execution, so return true if
29792 the target is not thumb1. */
29793 static bool
29794 arm_have_conditional_execution (void)
29796 return !TARGET_THUMB1;
29799 tree
29800 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29802 enum machine_mode in_mode, out_mode;
29803 int in_n, out_n;
29805 if (TREE_CODE (type_out) != VECTOR_TYPE
29806 || TREE_CODE (type_in) != VECTOR_TYPE)
29807 return NULL_TREE;
29809 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29810 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29811 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29812 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29814 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29815 decl of the vectorized builtin for the appropriate vector mode.
29816 NULL_TREE is returned if no such builtin is available. */
29817 #undef ARM_CHECK_BUILTIN_MODE
29818 #define ARM_CHECK_BUILTIN_MODE(C) \
29819 (TARGET_NEON && TARGET_FPU_ARMV8 \
29820 && flag_unsafe_math_optimizations \
29821 && ARM_CHECK_BUILTIN_MODE_1 (C))
29823 #undef ARM_CHECK_BUILTIN_MODE_1
29824 #define ARM_CHECK_BUILTIN_MODE_1(C) \
29825 (out_mode == SFmode && out_n == C \
29826 && in_mode == SFmode && in_n == C)
29828 #undef ARM_FIND_VRINT_VARIANT
29829 #define ARM_FIND_VRINT_VARIANT(N) \
29830 (ARM_CHECK_BUILTIN_MODE (2) \
29831 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29832 : (ARM_CHECK_BUILTIN_MODE (4) \
29833 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29834 : NULL_TREE))
29836 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29838 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29839 switch (fn)
29841 case BUILT_IN_FLOORF:
29842 return ARM_FIND_VRINT_VARIANT (vrintm);
29843 case BUILT_IN_CEILF:
29844 return ARM_FIND_VRINT_VARIANT (vrintp);
29845 case BUILT_IN_TRUNCF:
29846 return ARM_FIND_VRINT_VARIANT (vrintz);
29847 case BUILT_IN_ROUNDF:
29848 return ARM_FIND_VRINT_VARIANT (vrinta);
29849 #undef ARM_CHECK_BUILTIN_MODE
29850 #define ARM_CHECK_BUILTIN_MODE(C, N) \
29851 (out_mode == N##Imode && out_n == C \
29852 && in_mode == N##Imode && in_n == C)
29853 case BUILT_IN_BSWAP16:
29854 if (ARM_CHECK_BUILTIN_MODE (4, H))
29855 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
29856 else if (ARM_CHECK_BUILTIN_MODE (8, H))
29857 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
29858 else
29859 return NULL_TREE;
29860 case BUILT_IN_BSWAP32:
29861 if (ARM_CHECK_BUILTIN_MODE (2, S))
29862 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
29863 else if (ARM_CHECK_BUILTIN_MODE (4, S))
29864 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
29865 else
29866 return NULL_TREE;
29867 case BUILT_IN_BSWAP64:
29868 if (ARM_CHECK_BUILTIN_MODE (2, D))
29869 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
29870 else
29871 return NULL_TREE;
29873 default:
29874 return NULL_TREE;
29877 return NULL_TREE;
29879 #undef ARM_CHECK_BUILTIN_MODE
29880 #undef ARM_FIND_VRINT_VARIANT
29882 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29883 static HOST_WIDE_INT
29884 arm_vector_alignment (const_tree type)
29886 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29888 if (TARGET_AAPCS_BASED)
29889 align = MIN (align, 64);
29891 return align;
29894 static unsigned int
29895 arm_autovectorize_vector_sizes (void)
29897 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29900 static bool
29901 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29903 /* Vectors which aren't in packed structures will not be less aligned than
29904 the natural alignment of their element type, so this is safe. */
29905 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29906 return !is_packed;
29908 return default_builtin_vector_alignment_reachable (type, is_packed);
29911 static bool
29912 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29913 const_tree type, int misalignment,
29914 bool is_packed)
29916 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29918 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29920 if (is_packed)
29921 return align == 1;
29923 /* If the misalignment is unknown, we should be able to handle the access
29924 so long as it is not to a member of a packed data structure. */
29925 if (misalignment == -1)
29926 return true;
29928 /* Return true if the misalignment is a multiple of the natural alignment
29929 of the vector's element type. This is probably always going to be
29930 true in practice, since we've already established that this isn't a
29931 packed access. */
29932 return ((misalignment % align) == 0);
29935 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29936 is_packed);
29939 static void
29940 arm_conditional_register_usage (void)
29942 int regno;
29944 if (TARGET_THUMB1 && optimize_size)
29946 /* When optimizing for size on Thumb-1, it's better not
29947 to use the HI regs, because of the overhead of
29948 stacking them. */
29949 for (regno = FIRST_HI_REGNUM;
29950 regno <= LAST_HI_REGNUM; ++regno)
29951 fixed_regs[regno] = call_used_regs[regno] = 1;
29954 /* The link register can be clobbered by any branch insn,
29955 but we have no way to track that at present, so mark
29956 it as unavailable. */
29957 if (TARGET_THUMB1)
29958 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29960 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29962 /* VFPv3 registers are disabled when earlier VFP
29963 versions are selected due to the definition of
29964 LAST_VFP_REGNUM. */
29965 for (regno = FIRST_VFP_REGNUM;
29966 regno <= LAST_VFP_REGNUM; ++ regno)
29968 fixed_regs[regno] = 0;
29969 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29970 || regno >= FIRST_VFP_REGNUM + 32;
29974 if (TARGET_REALLY_IWMMXT)
29976 regno = FIRST_IWMMXT_GR_REGNUM;
29977 /* The 2002/10/09 revision of the XScale ABI has wCG0
29978 and wCG1 as call-preserved registers. The 2002/11/21
29979 revision changed this so that all wCG registers are
29980 scratch registers. */
29981 for (regno = FIRST_IWMMXT_GR_REGNUM;
29982 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29983 fixed_regs[regno] = 0;
29984 /* The XScale ABI has wR0 - wR9 as scratch registers,
29985 the rest as call-preserved registers. */
29986 for (regno = FIRST_IWMMXT_REGNUM;
29987 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29989 fixed_regs[regno] = 0;
29990 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29994 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29996 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29997 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29999 else if (TARGET_APCS_STACK)
30001 fixed_regs[10] = 1;
30002 call_used_regs[10] = 1;
30004 /* -mcaller-super-interworking reserves r11 for calls to
30005 _interwork_r11_call_via_rN(). Making the register global
30006 is an easy way of ensuring that it remains valid for all
30007 calls. */
30008 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30009 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30011 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30012 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30013 if (TARGET_CALLER_INTERWORKING)
30014 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30016 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30019 static reg_class_t
30020 arm_preferred_rename_class (reg_class_t rclass)
30022 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30023 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30024 and code size can be reduced. */
30025 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30026 return LO_REGS;
30027 else
30028 return NO_REGS;
30031 /* Compute the atrribute "length" of insn "*push_multi".
30032 So this function MUST be kept in sync with that insn pattern. */
30034 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30036 int i, regno, hi_reg;
30037 int num_saves = XVECLEN (parallel_op, 0);
30039 /* ARM mode. */
30040 if (TARGET_ARM)
30041 return 4;
30042 /* Thumb1 mode. */
30043 if (TARGET_THUMB1)
30044 return 2;
30046 /* Thumb2 mode. */
30047 regno = REGNO (first_op);
30048 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30049 for (i = 1; i < num_saves && !hi_reg; i++)
30051 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30052 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30055 if (!hi_reg)
30056 return 2;
30057 return 4;
30060 /* Compute the number of instructions emitted by output_move_double. */
30062 arm_count_output_move_double_insns (rtx *operands)
30064 int count;
30065 rtx ops[2];
30066 /* output_move_double may modify the operands array, so call it
30067 here on a copy of the array. */
30068 ops[0] = operands[0];
30069 ops[1] = operands[1];
30070 output_move_double (ops, false, &count);
30071 return count;
30075 vfp3_const_double_for_fract_bits (rtx operand)
30077 REAL_VALUE_TYPE r0;
30079 if (!CONST_DOUBLE_P (operand))
30080 return 0;
30082 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30083 if (exact_real_inverse (DFmode, &r0))
30085 if (exact_real_truncate (DFmode, &r0))
30087 HOST_WIDE_INT value = real_to_integer (&r0);
30088 value = value & 0xffffffff;
30089 if ((value != 0) && ( (value & (value - 1)) == 0))
30090 return int_log2 (value);
30093 return 0;
30097 vfp3_const_double_for_bits (rtx operand)
30099 REAL_VALUE_TYPE r0;
30101 if (!CONST_DOUBLE_P (operand))
30102 return 0;
30104 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30105 if (exact_real_truncate (DFmode, &r0))
30107 HOST_WIDE_INT value = real_to_integer (&r0);
30108 value = value & 0xffffffff;
30109 if ((value != 0) && ( (value & (value - 1)) == 0))
30110 return int_log2 (value);
30113 return 0;
30116 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30118 static void
30119 arm_pre_atomic_barrier (enum memmodel model)
30121 if (need_atomic_barrier_p (model, true))
30122 emit_insn (gen_memory_barrier ());
30125 static void
30126 arm_post_atomic_barrier (enum memmodel model)
30128 if (need_atomic_barrier_p (model, false))
30129 emit_insn (gen_memory_barrier ());
30132 /* Emit the load-exclusive and store-exclusive instructions.
30133 Use acquire and release versions if necessary. */
30135 static void
30136 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30138 rtx (*gen) (rtx, rtx);
30140 if (acq)
30142 switch (mode)
30144 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30145 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30146 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30147 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30148 default:
30149 gcc_unreachable ();
30152 else
30154 switch (mode)
30156 case QImode: gen = gen_arm_load_exclusiveqi; break;
30157 case HImode: gen = gen_arm_load_exclusivehi; break;
30158 case SImode: gen = gen_arm_load_exclusivesi; break;
30159 case DImode: gen = gen_arm_load_exclusivedi; break;
30160 default:
30161 gcc_unreachable ();
30165 emit_insn (gen (rval, mem));
30168 static void
30169 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30170 rtx mem, bool rel)
30172 rtx (*gen) (rtx, rtx, rtx);
30174 if (rel)
30176 switch (mode)
30178 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30179 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30180 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30181 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30182 default:
30183 gcc_unreachable ();
30186 else
30188 switch (mode)
30190 case QImode: gen = gen_arm_store_exclusiveqi; break;
30191 case HImode: gen = gen_arm_store_exclusivehi; break;
30192 case SImode: gen = gen_arm_store_exclusivesi; break;
30193 case DImode: gen = gen_arm_store_exclusivedi; break;
30194 default:
30195 gcc_unreachable ();
30199 emit_insn (gen (bval, rval, mem));
30202 /* Mark the previous jump instruction as unlikely. */
30204 static void
30205 emit_unlikely_jump (rtx insn)
30207 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30209 insn = emit_jump_insn (insn);
30210 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30213 /* Expand a compare and swap pattern. */
30215 void
30216 arm_expand_compare_and_swap (rtx operands[])
30218 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30219 enum machine_mode mode;
30220 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30222 bval = operands[0];
30223 rval = operands[1];
30224 mem = operands[2];
30225 oldval = operands[3];
30226 newval = operands[4];
30227 is_weak = operands[5];
30228 mod_s = operands[6];
30229 mod_f = operands[7];
30230 mode = GET_MODE (mem);
30232 /* Normally the succ memory model must be stronger than fail, but in the
30233 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30234 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30236 if (TARGET_HAVE_LDACQ
30237 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30238 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30239 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30241 switch (mode)
30243 case QImode:
30244 case HImode:
30245 /* For narrow modes, we're going to perform the comparison in SImode,
30246 so do the zero-extension now. */
30247 rval = gen_reg_rtx (SImode);
30248 oldval = convert_modes (SImode, mode, oldval, true);
30249 /* FALLTHRU */
30251 case SImode:
30252 /* Force the value into a register if needed. We waited until after
30253 the zero-extension above to do this properly. */
30254 if (!arm_add_operand (oldval, SImode))
30255 oldval = force_reg (SImode, oldval);
30256 break;
30258 case DImode:
30259 if (!cmpdi_operand (oldval, mode))
30260 oldval = force_reg (mode, oldval);
30261 break;
30263 default:
30264 gcc_unreachable ();
30267 switch (mode)
30269 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30270 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30271 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30272 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30273 default:
30274 gcc_unreachable ();
30277 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30279 if (mode == QImode || mode == HImode)
30280 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30282 /* In all cases, we arrange for success to be signaled by Z set.
30283 This arrangement allows for the boolean result to be used directly
30284 in a subsequent branch, post optimization. */
30285 x = gen_rtx_REG (CCmode, CC_REGNUM);
30286 x = gen_rtx_EQ (SImode, x, const0_rtx);
30287 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30290 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30291 another memory store between the load-exclusive and store-exclusive can
30292 reset the monitor from Exclusive to Open state. This means we must wait
30293 until after reload to split the pattern, lest we get a register spill in
30294 the middle of the atomic sequence. */
30296 void
30297 arm_split_compare_and_swap (rtx operands[])
30299 rtx rval, mem, oldval, newval, scratch;
30300 enum machine_mode mode;
30301 enum memmodel mod_s, mod_f;
30302 bool is_weak;
30303 rtx label1, label2, x, cond;
30305 rval = operands[0];
30306 mem = operands[1];
30307 oldval = operands[2];
30308 newval = operands[3];
30309 is_weak = (operands[4] != const0_rtx);
30310 mod_s = (enum memmodel) INTVAL (operands[5]);
30311 mod_f = (enum memmodel) INTVAL (operands[6]);
30312 scratch = operands[7];
30313 mode = GET_MODE (mem);
30315 bool use_acquire = TARGET_HAVE_LDACQ
30316 && !(mod_s == MEMMODEL_RELAXED
30317 || mod_s == MEMMODEL_CONSUME
30318 || mod_s == MEMMODEL_RELEASE);
30320 bool use_release = TARGET_HAVE_LDACQ
30321 && !(mod_s == MEMMODEL_RELAXED
30322 || mod_s == MEMMODEL_CONSUME
30323 || mod_s == MEMMODEL_ACQUIRE);
30325 /* Checks whether a barrier is needed and emits one accordingly. */
30326 if (!(use_acquire || use_release))
30327 arm_pre_atomic_barrier (mod_s);
30329 label1 = NULL_RTX;
30330 if (!is_weak)
30332 label1 = gen_label_rtx ();
30333 emit_label (label1);
30335 label2 = gen_label_rtx ();
30337 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30339 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30340 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30341 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30342 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30343 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30345 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30347 /* Weak or strong, we want EQ to be true for success, so that we
30348 match the flags that we got from the compare above. */
30349 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30350 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30351 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30353 if (!is_weak)
30355 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30356 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30357 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30358 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30361 if (mod_f != MEMMODEL_RELAXED)
30362 emit_label (label2);
30364 /* Checks whether a barrier is needed and emits one accordingly. */
30365 if (!(use_acquire || use_release))
30366 arm_post_atomic_barrier (mod_s);
30368 if (mod_f == MEMMODEL_RELAXED)
30369 emit_label (label2);
30372 void
30373 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30374 rtx value, rtx model_rtx, rtx cond)
30376 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30377 enum machine_mode mode = GET_MODE (mem);
30378 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30379 rtx label, x;
30381 bool use_acquire = TARGET_HAVE_LDACQ
30382 && !(model == MEMMODEL_RELAXED
30383 || model == MEMMODEL_CONSUME
30384 || model == MEMMODEL_RELEASE);
30386 bool use_release = TARGET_HAVE_LDACQ
30387 && !(model == MEMMODEL_RELAXED
30388 || model == MEMMODEL_CONSUME
30389 || model == MEMMODEL_ACQUIRE);
30391 /* Checks whether a barrier is needed and emits one accordingly. */
30392 if (!(use_acquire || use_release))
30393 arm_pre_atomic_barrier (model);
30395 label = gen_label_rtx ();
30396 emit_label (label);
30398 if (new_out)
30399 new_out = gen_lowpart (wmode, new_out);
30400 if (old_out)
30401 old_out = gen_lowpart (wmode, old_out);
30402 else
30403 old_out = new_out;
30404 value = simplify_gen_subreg (wmode, value, mode, 0);
30406 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30408 switch (code)
30410 case SET:
30411 new_out = value;
30412 break;
30414 case NOT:
30415 x = gen_rtx_AND (wmode, old_out, value);
30416 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30417 x = gen_rtx_NOT (wmode, new_out);
30418 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30419 break;
30421 case MINUS:
30422 if (CONST_INT_P (value))
30424 value = GEN_INT (-INTVAL (value));
30425 code = PLUS;
30427 /* FALLTHRU */
30429 case PLUS:
30430 if (mode == DImode)
30432 /* DImode plus/minus need to clobber flags. */
30433 /* The adddi3 and subdi3 patterns are incorrectly written so that
30434 they require matching operands, even when we could easily support
30435 three operands. Thankfully, this can be fixed up post-splitting,
30436 as the individual add+adc patterns do accept three operands and
30437 post-reload cprop can make these moves go away. */
30438 emit_move_insn (new_out, old_out);
30439 if (code == PLUS)
30440 x = gen_adddi3 (new_out, new_out, value);
30441 else
30442 x = gen_subdi3 (new_out, new_out, value);
30443 emit_insn (x);
30444 break;
30446 /* FALLTHRU */
30448 default:
30449 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30450 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30451 break;
30454 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30455 use_release);
30457 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30458 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30460 /* Checks whether a barrier is needed and emits one accordingly. */
30461 if (!(use_acquire || use_release))
30462 arm_post_atomic_barrier (model);
30465 #define MAX_VECT_LEN 16
30467 struct expand_vec_perm_d
30469 rtx target, op0, op1;
30470 unsigned char perm[MAX_VECT_LEN];
30471 enum machine_mode vmode;
30472 unsigned char nelt;
30473 bool one_vector_p;
30474 bool testing_p;
30477 /* Generate a variable permutation. */
30479 static void
30480 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30482 enum machine_mode vmode = GET_MODE (target);
30483 bool one_vector_p = rtx_equal_p (op0, op1);
30485 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30486 gcc_checking_assert (GET_MODE (op0) == vmode);
30487 gcc_checking_assert (GET_MODE (op1) == vmode);
30488 gcc_checking_assert (GET_MODE (sel) == vmode);
30489 gcc_checking_assert (TARGET_NEON);
30491 if (one_vector_p)
30493 if (vmode == V8QImode)
30494 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30495 else
30496 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30498 else
30500 rtx pair;
30502 if (vmode == V8QImode)
30504 pair = gen_reg_rtx (V16QImode);
30505 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30506 pair = gen_lowpart (TImode, pair);
30507 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30509 else
30511 pair = gen_reg_rtx (OImode);
30512 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30513 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30518 void
30519 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30521 enum machine_mode vmode = GET_MODE (target);
30522 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30523 bool one_vector_p = rtx_equal_p (op0, op1);
30524 rtx rmask[MAX_VECT_LEN], mask;
30526 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30527 numbering of elements for big-endian, we must reverse the order. */
30528 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30530 /* The VTBL instruction does not use a modulo index, so we must take care
30531 of that ourselves. */
30532 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30533 for (i = 0; i < nelt; ++i)
30534 rmask[i] = mask;
30535 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30536 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30538 arm_expand_vec_perm_1 (target, op0, op1, sel);
30541 /* Generate or test for an insn that supports a constant permutation. */
30543 /* Recognize patterns for the VUZP insns. */
30545 static bool
30546 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30548 unsigned int i, odd, mask, nelt = d->nelt;
30549 rtx out0, out1, in0, in1, x;
30550 rtx (*gen)(rtx, rtx, rtx, rtx);
30552 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30553 return false;
30555 /* Note that these are little-endian tests. Adjust for big-endian later. */
30556 if (d->perm[0] == 0)
30557 odd = 0;
30558 else if (d->perm[0] == 1)
30559 odd = 1;
30560 else
30561 return false;
30562 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30564 for (i = 0; i < nelt; i++)
30566 unsigned elt = (i * 2 + odd) & mask;
30567 if (d->perm[i] != elt)
30568 return false;
30571 /* Success! */
30572 if (d->testing_p)
30573 return true;
30575 switch (d->vmode)
30577 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30578 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30579 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30580 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30581 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30582 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30583 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30584 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30585 default:
30586 gcc_unreachable ();
30589 in0 = d->op0;
30590 in1 = d->op1;
30591 if (BYTES_BIG_ENDIAN)
30593 x = in0, in0 = in1, in1 = x;
30594 odd = !odd;
30597 out0 = d->target;
30598 out1 = gen_reg_rtx (d->vmode);
30599 if (odd)
30600 x = out0, out0 = out1, out1 = x;
30602 emit_insn (gen (out0, in0, in1, out1));
30603 return true;
30606 /* Recognize patterns for the VZIP insns. */
30608 static bool
30609 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30611 unsigned int i, high, mask, nelt = d->nelt;
30612 rtx out0, out1, in0, in1, x;
30613 rtx (*gen)(rtx, rtx, rtx, rtx);
30615 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30616 return false;
30618 /* Note that these are little-endian tests. Adjust for big-endian later. */
30619 high = nelt / 2;
30620 if (d->perm[0] == high)
30622 else if (d->perm[0] == 0)
30623 high = 0;
30624 else
30625 return false;
30626 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30628 for (i = 0; i < nelt / 2; i++)
30630 unsigned elt = (i + high) & mask;
30631 if (d->perm[i * 2] != elt)
30632 return false;
30633 elt = (elt + nelt) & mask;
30634 if (d->perm[i * 2 + 1] != elt)
30635 return false;
30638 /* Success! */
30639 if (d->testing_p)
30640 return true;
30642 switch (d->vmode)
30644 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30645 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30646 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30647 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30648 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30649 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30650 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30651 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30652 default:
30653 gcc_unreachable ();
30656 in0 = d->op0;
30657 in1 = d->op1;
30658 if (BYTES_BIG_ENDIAN)
30660 x = in0, in0 = in1, in1 = x;
30661 high = !high;
30664 out0 = d->target;
30665 out1 = gen_reg_rtx (d->vmode);
30666 if (high)
30667 x = out0, out0 = out1, out1 = x;
30669 emit_insn (gen (out0, in0, in1, out1));
30670 return true;
30673 /* Recognize patterns for the VREV insns. */
30675 static bool
30676 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30678 unsigned int i, j, diff, nelt = d->nelt;
30679 rtx (*gen)(rtx, rtx, rtx);
30681 if (!d->one_vector_p)
30682 return false;
30684 diff = d->perm[0];
30685 switch (diff)
30687 case 7:
30688 switch (d->vmode)
30690 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30691 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30692 default:
30693 return false;
30695 break;
30696 case 3:
30697 switch (d->vmode)
30699 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30700 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30701 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30702 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30703 default:
30704 return false;
30706 break;
30707 case 1:
30708 switch (d->vmode)
30710 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30711 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30712 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30713 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30714 case V4SImode: gen = gen_neon_vrev64v4si; break;
30715 case V2SImode: gen = gen_neon_vrev64v2si; break;
30716 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30717 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30718 default:
30719 return false;
30721 break;
30722 default:
30723 return false;
30726 for (i = 0; i < nelt ; i += diff + 1)
30727 for (j = 0; j <= diff; j += 1)
30729 /* This is guaranteed to be true as the value of diff
30730 is 7, 3, 1 and we should have enough elements in the
30731 queue to generate this. Getting a vector mask with a
30732 value of diff other than these values implies that
30733 something is wrong by the time we get here. */
30734 gcc_assert (i + j < nelt);
30735 if (d->perm[i + j] != i + diff - j)
30736 return false;
30739 /* Success! */
30740 if (d->testing_p)
30741 return true;
30743 /* ??? The third operand is an artifact of the builtin infrastructure
30744 and is ignored by the actual instruction. */
30745 emit_insn (gen (d->target, d->op0, const0_rtx));
30746 return true;
30749 /* Recognize patterns for the VTRN insns. */
30751 static bool
30752 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30754 unsigned int i, odd, mask, nelt = d->nelt;
30755 rtx out0, out1, in0, in1, x;
30756 rtx (*gen)(rtx, rtx, rtx, rtx);
30758 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30759 return false;
30761 /* Note that these are little-endian tests. Adjust for big-endian later. */
30762 if (d->perm[0] == 0)
30763 odd = 0;
30764 else if (d->perm[0] == 1)
30765 odd = 1;
30766 else
30767 return false;
30768 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30770 for (i = 0; i < nelt; i += 2)
30772 if (d->perm[i] != i + odd)
30773 return false;
30774 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30775 return false;
30778 /* Success! */
30779 if (d->testing_p)
30780 return true;
30782 switch (d->vmode)
30784 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30785 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30786 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30787 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30788 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30789 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30790 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30791 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30792 default:
30793 gcc_unreachable ();
30796 in0 = d->op0;
30797 in1 = d->op1;
30798 if (BYTES_BIG_ENDIAN)
30800 x = in0, in0 = in1, in1 = x;
30801 odd = !odd;
30804 out0 = d->target;
30805 out1 = gen_reg_rtx (d->vmode);
30806 if (odd)
30807 x = out0, out0 = out1, out1 = x;
30809 emit_insn (gen (out0, in0, in1, out1));
30810 return true;
30813 /* Recognize patterns for the VEXT insns. */
30815 static bool
30816 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30818 unsigned int i, nelt = d->nelt;
30819 rtx (*gen) (rtx, rtx, rtx, rtx);
30820 rtx offset;
30822 unsigned int location;
30824 unsigned int next = d->perm[0] + 1;
30826 /* TODO: Handle GCC's numbering of elements for big-endian. */
30827 if (BYTES_BIG_ENDIAN)
30828 return false;
30830 /* Check if the extracted indexes are increasing by one. */
30831 for (i = 1; i < nelt; next++, i++)
30833 /* If we hit the most significant element of the 2nd vector in
30834 the previous iteration, no need to test further. */
30835 if (next == 2 * nelt)
30836 return false;
30838 /* If we are operating on only one vector: it could be a
30839 rotation. If there are only two elements of size < 64, let
30840 arm_evpc_neon_vrev catch it. */
30841 if (d->one_vector_p && (next == nelt))
30843 if ((nelt == 2) && (d->vmode != V2DImode))
30844 return false;
30845 else
30846 next = 0;
30849 if (d->perm[i] != next)
30850 return false;
30853 location = d->perm[0];
30855 switch (d->vmode)
30857 case V16QImode: gen = gen_neon_vextv16qi; break;
30858 case V8QImode: gen = gen_neon_vextv8qi; break;
30859 case V4HImode: gen = gen_neon_vextv4hi; break;
30860 case V8HImode: gen = gen_neon_vextv8hi; break;
30861 case V2SImode: gen = gen_neon_vextv2si; break;
30862 case V4SImode: gen = gen_neon_vextv4si; break;
30863 case V2SFmode: gen = gen_neon_vextv2sf; break;
30864 case V4SFmode: gen = gen_neon_vextv4sf; break;
30865 case V2DImode: gen = gen_neon_vextv2di; break;
30866 default:
30867 return false;
30870 /* Success! */
30871 if (d->testing_p)
30872 return true;
30874 offset = GEN_INT (location);
30875 emit_insn (gen (d->target, d->op0, d->op1, offset));
30876 return true;
30879 /* The NEON VTBL instruction is a fully variable permuation that's even
30880 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30881 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30882 can do slightly better by expanding this as a constant where we don't
30883 have to apply a mask. */
30885 static bool
30886 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30888 rtx rperm[MAX_VECT_LEN], sel;
30889 enum machine_mode vmode = d->vmode;
30890 unsigned int i, nelt = d->nelt;
30892 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30893 numbering of elements for big-endian, we must reverse the order. */
30894 if (BYTES_BIG_ENDIAN)
30895 return false;
30897 if (d->testing_p)
30898 return true;
30900 /* Generic code will try constant permutation twice. Once with the
30901 original mode and again with the elements lowered to QImode.
30902 So wait and don't do the selector expansion ourselves. */
30903 if (vmode != V8QImode && vmode != V16QImode)
30904 return false;
30906 for (i = 0; i < nelt; ++i)
30907 rperm[i] = GEN_INT (d->perm[i]);
30908 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30909 sel = force_reg (vmode, sel);
30911 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30912 return true;
30915 static bool
30916 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30918 /* Check if the input mask matches vext before reordering the
30919 operands. */
30920 if (TARGET_NEON)
30921 if (arm_evpc_neon_vext (d))
30922 return true;
30924 /* The pattern matching functions above are written to look for a small
30925 number to begin the sequence (0, 1, N/2). If we begin with an index
30926 from the second operand, we can swap the operands. */
30927 if (d->perm[0] >= d->nelt)
30929 unsigned i, nelt = d->nelt;
30930 rtx x;
30932 for (i = 0; i < nelt; ++i)
30933 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30935 x = d->op0;
30936 d->op0 = d->op1;
30937 d->op1 = x;
30940 if (TARGET_NEON)
30942 if (arm_evpc_neon_vuzp (d))
30943 return true;
30944 if (arm_evpc_neon_vzip (d))
30945 return true;
30946 if (arm_evpc_neon_vrev (d))
30947 return true;
30948 if (arm_evpc_neon_vtrn (d))
30949 return true;
30950 return arm_evpc_neon_vtbl (d);
30952 return false;
30955 /* Expand a vec_perm_const pattern. */
30957 bool
30958 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30960 struct expand_vec_perm_d d;
30961 int i, nelt, which;
30963 d.target = target;
30964 d.op0 = op0;
30965 d.op1 = op1;
30967 d.vmode = GET_MODE (target);
30968 gcc_assert (VECTOR_MODE_P (d.vmode));
30969 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30970 d.testing_p = false;
30972 for (i = which = 0; i < nelt; ++i)
30974 rtx e = XVECEXP (sel, 0, i);
30975 int ei = INTVAL (e) & (2 * nelt - 1);
30976 which |= (ei < nelt ? 1 : 2);
30977 d.perm[i] = ei;
30980 switch (which)
30982 default:
30983 gcc_unreachable();
30985 case 3:
30986 d.one_vector_p = false;
30987 if (!rtx_equal_p (op0, op1))
30988 break;
30990 /* The elements of PERM do not suggest that only the first operand
30991 is used, but both operands are identical. Allow easier matching
30992 of the permutation by folding the permutation into the single
30993 input vector. */
30994 /* FALLTHRU */
30995 case 2:
30996 for (i = 0; i < nelt; ++i)
30997 d.perm[i] &= nelt - 1;
30998 d.op0 = op1;
30999 d.one_vector_p = true;
31000 break;
31002 case 1:
31003 d.op1 = op0;
31004 d.one_vector_p = true;
31005 break;
31008 return arm_expand_vec_perm_const_1 (&d);
31011 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31013 static bool
31014 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
31015 const unsigned char *sel)
31017 struct expand_vec_perm_d d;
31018 unsigned int i, nelt, which;
31019 bool ret;
31021 d.vmode = vmode;
31022 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31023 d.testing_p = true;
31024 memcpy (d.perm, sel, nelt);
31026 /* Categorize the set of elements in the selector. */
31027 for (i = which = 0; i < nelt; ++i)
31029 unsigned char e = d.perm[i];
31030 gcc_assert (e < 2 * nelt);
31031 which |= (e < nelt ? 1 : 2);
31034 /* For all elements from second vector, fold the elements to first. */
31035 if (which == 2)
31036 for (i = 0; i < nelt; ++i)
31037 d.perm[i] -= nelt;
31039 /* Check whether the mask can be applied to the vector type. */
31040 d.one_vector_p = (which != 3);
31042 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31043 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31044 if (!d.one_vector_p)
31045 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31047 start_sequence ();
31048 ret = arm_expand_vec_perm_const_1 (&d);
31049 end_sequence ();
31051 return ret;
31054 bool
31055 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31057 /* If we are soft float and we do not have ldrd
31058 then all auto increment forms are ok. */
31059 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31060 return true;
31062 switch (code)
31064 /* Post increment and Pre Decrement are supported for all
31065 instruction forms except for vector forms. */
31066 case ARM_POST_INC:
31067 case ARM_PRE_DEC:
31068 if (VECTOR_MODE_P (mode))
31070 if (code != ARM_PRE_DEC)
31071 return true;
31072 else
31073 return false;
31076 return true;
31078 case ARM_POST_DEC:
31079 case ARM_PRE_INC:
31080 /* Without LDRD and mode size greater than
31081 word size, there is no point in auto-incrementing
31082 because ldm and stm will not have these forms. */
31083 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31084 return false;
31086 /* Vector and floating point modes do not support
31087 these auto increment forms. */
31088 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31089 return false;
31091 return true;
31093 default:
31094 return false;
31098 return false;
31101 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31102 on ARM, since we know that shifts by negative amounts are no-ops.
31103 Additionally, the default expansion code is not available or suitable
31104 for post-reload insn splits (this can occur when the register allocator
31105 chooses not to do a shift in NEON).
31107 This function is used in both initial expand and post-reload splits, and
31108 handles all kinds of 64-bit shifts.
31110 Input requirements:
31111 - It is safe for the input and output to be the same register, but
31112 early-clobber rules apply for the shift amount and scratch registers.
31113 - Shift by register requires both scratch registers. In all other cases
31114 the scratch registers may be NULL.
31115 - Ashiftrt by a register also clobbers the CC register. */
31116 void
31117 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31118 rtx amount, rtx scratch1, rtx scratch2)
31120 rtx out_high = gen_highpart (SImode, out);
31121 rtx out_low = gen_lowpart (SImode, out);
31122 rtx in_high = gen_highpart (SImode, in);
31123 rtx in_low = gen_lowpart (SImode, in);
31125 /* Terminology:
31126 in = the register pair containing the input value.
31127 out = the destination register pair.
31128 up = the high- or low-part of each pair.
31129 down = the opposite part to "up".
31130 In a shift, we can consider bits to shift from "up"-stream to
31131 "down"-stream, so in a left-shift "up" is the low-part and "down"
31132 is the high-part of each register pair. */
31134 rtx out_up = code == ASHIFT ? out_low : out_high;
31135 rtx out_down = code == ASHIFT ? out_high : out_low;
31136 rtx in_up = code == ASHIFT ? in_low : in_high;
31137 rtx in_down = code == ASHIFT ? in_high : in_low;
31139 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31140 gcc_assert (out
31141 && (REG_P (out) || GET_CODE (out) == SUBREG)
31142 && GET_MODE (out) == DImode);
31143 gcc_assert (in
31144 && (REG_P (in) || GET_CODE (in) == SUBREG)
31145 && GET_MODE (in) == DImode);
31146 gcc_assert (amount
31147 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31148 && GET_MODE (amount) == SImode)
31149 || CONST_INT_P (amount)));
31150 gcc_assert (scratch1 == NULL
31151 || (GET_CODE (scratch1) == SCRATCH)
31152 || (GET_MODE (scratch1) == SImode
31153 && REG_P (scratch1)));
31154 gcc_assert (scratch2 == NULL
31155 || (GET_CODE (scratch2) == SCRATCH)
31156 || (GET_MODE (scratch2) == SImode
31157 && REG_P (scratch2)));
31158 gcc_assert (!REG_P (out) || !REG_P (amount)
31159 || !HARD_REGISTER_P (out)
31160 || (REGNO (out) != REGNO (amount)
31161 && REGNO (out) + 1 != REGNO (amount)));
31163 /* Macros to make following code more readable. */
31164 #define SUB_32(DEST,SRC) \
31165 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31166 #define RSB_32(DEST,SRC) \
31167 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31168 #define SUB_S_32(DEST,SRC) \
31169 gen_addsi3_compare0 ((DEST), (SRC), \
31170 GEN_INT (-32))
31171 #define SET(DEST,SRC) \
31172 gen_rtx_SET (SImode, (DEST), (SRC))
31173 #define SHIFT(CODE,SRC,AMOUNT) \
31174 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31175 #define LSHIFT(CODE,SRC,AMOUNT) \
31176 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31177 SImode, (SRC), (AMOUNT))
31178 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31179 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31180 SImode, (SRC), (AMOUNT))
31181 #define ORR(A,B) \
31182 gen_rtx_IOR (SImode, (A), (B))
31183 #define BRANCH(COND,LABEL) \
31184 gen_arm_cond_branch ((LABEL), \
31185 gen_rtx_ ## COND (CCmode, cc_reg, \
31186 const0_rtx), \
31187 cc_reg)
31189 /* Shifts by register and shifts by constant are handled separately. */
31190 if (CONST_INT_P (amount))
31192 /* We have a shift-by-constant. */
31194 /* First, handle out-of-range shift amounts.
31195 In both cases we try to match the result an ARM instruction in a
31196 shift-by-register would give. This helps reduce execution
31197 differences between optimization levels, but it won't stop other
31198 parts of the compiler doing different things. This is "undefined
31199 behaviour, in any case. */
31200 if (INTVAL (amount) <= 0)
31201 emit_insn (gen_movdi (out, in));
31202 else if (INTVAL (amount) >= 64)
31204 if (code == ASHIFTRT)
31206 rtx const31_rtx = GEN_INT (31);
31207 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31208 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31210 else
31211 emit_insn (gen_movdi (out, const0_rtx));
31214 /* Now handle valid shifts. */
31215 else if (INTVAL (amount) < 32)
31217 /* Shifts by a constant less than 32. */
31218 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31220 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31221 emit_insn (SET (out_down,
31222 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31223 out_down)));
31224 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31226 else
31228 /* Shifts by a constant greater than 31. */
31229 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31231 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31232 if (code == ASHIFTRT)
31233 emit_insn (gen_ashrsi3 (out_up, in_up,
31234 GEN_INT (31)));
31235 else
31236 emit_insn (SET (out_up, const0_rtx));
31239 else
31241 /* We have a shift-by-register. */
31242 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31244 /* This alternative requires the scratch registers. */
31245 gcc_assert (scratch1 && REG_P (scratch1));
31246 gcc_assert (scratch2 && REG_P (scratch2));
31248 /* We will need the values "amount-32" and "32-amount" later.
31249 Swapping them around now allows the later code to be more general. */
31250 switch (code)
31252 case ASHIFT:
31253 emit_insn (SUB_32 (scratch1, amount));
31254 emit_insn (RSB_32 (scratch2, amount));
31255 break;
31256 case ASHIFTRT:
31257 emit_insn (RSB_32 (scratch1, amount));
31258 /* Also set CC = amount > 32. */
31259 emit_insn (SUB_S_32 (scratch2, amount));
31260 break;
31261 case LSHIFTRT:
31262 emit_insn (RSB_32 (scratch1, amount));
31263 emit_insn (SUB_32 (scratch2, amount));
31264 break;
31265 default:
31266 gcc_unreachable ();
31269 /* Emit code like this:
31271 arithmetic-left:
31272 out_down = in_down << amount;
31273 out_down = (in_up << (amount - 32)) | out_down;
31274 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31275 out_up = in_up << amount;
31277 arithmetic-right:
31278 out_down = in_down >> amount;
31279 out_down = (in_up << (32 - amount)) | out_down;
31280 if (amount < 32)
31281 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31282 out_up = in_up << amount;
31284 logical-right:
31285 out_down = in_down >> amount;
31286 out_down = (in_up << (32 - amount)) | out_down;
31287 if (amount < 32)
31288 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31289 out_up = in_up << amount;
31291 The ARM and Thumb2 variants are the same but implemented slightly
31292 differently. If this were only called during expand we could just
31293 use the Thumb2 case and let combine do the right thing, but this
31294 can also be called from post-reload splitters. */
31296 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31298 if (!TARGET_THUMB2)
31300 /* Emit code for ARM mode. */
31301 emit_insn (SET (out_down,
31302 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31303 if (code == ASHIFTRT)
31305 rtx done_label = gen_label_rtx ();
31306 emit_jump_insn (BRANCH (LT, done_label));
31307 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31308 out_down)));
31309 emit_label (done_label);
31311 else
31312 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31313 out_down)));
31315 else
31317 /* Emit code for Thumb2 mode.
31318 Thumb2 can't do shift and or in one insn. */
31319 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31320 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31322 if (code == ASHIFTRT)
31324 rtx done_label = gen_label_rtx ();
31325 emit_jump_insn (BRANCH (LT, done_label));
31326 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31327 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31328 emit_label (done_label);
31330 else
31332 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31333 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31337 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31340 #undef SUB_32
31341 #undef RSB_32
31342 #undef SUB_S_32
31343 #undef SET
31344 #undef SHIFT
31345 #undef LSHIFT
31346 #undef REV_LSHIFT
31347 #undef ORR
31348 #undef BRANCH
31352 /* Returns true if a valid comparison operation and makes
31353 the operands in a form that is valid. */
31354 bool
31355 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31357 enum rtx_code code = GET_CODE (*comparison);
31358 int code_int;
31359 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31360 ? GET_MODE (*op2) : GET_MODE (*op1);
31362 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31364 if (code == UNEQ || code == LTGT)
31365 return false;
31367 code_int = (int)code;
31368 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31369 PUT_CODE (*comparison, (enum rtx_code)code_int);
31371 switch (mode)
31373 case SImode:
31374 if (!arm_add_operand (*op1, mode))
31375 *op1 = force_reg (mode, *op1);
31376 if (!arm_add_operand (*op2, mode))
31377 *op2 = force_reg (mode, *op2);
31378 return true;
31380 case DImode:
31381 if (!cmpdi_operand (*op1, mode))
31382 *op1 = force_reg (mode, *op1);
31383 if (!cmpdi_operand (*op2, mode))
31384 *op2 = force_reg (mode, *op2);
31385 return true;
31387 case SFmode:
31388 case DFmode:
31389 if (!arm_float_compare_operand (*op1, mode))
31390 *op1 = force_reg (mode, *op1);
31391 if (!arm_float_compare_operand (*op2, mode))
31392 *op2 = force_reg (mode, *op2);
31393 return true;
31394 default:
31395 break;
31398 return false;
31402 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31404 static unsigned HOST_WIDE_INT
31405 arm_asan_shadow_offset (void)
31407 return (unsigned HOST_WIDE_INT) 1 << 29;
31411 /* This is a temporary fix for PR60655. Ideally we need
31412 to handle most of these cases in the generic part but
31413 currently we reject minus (..) (sym_ref). We try to
31414 ameliorate the case with minus (sym_ref1) (sym_ref2)
31415 where they are in the same section. */
31417 static bool
31418 arm_const_not_ok_for_debug_p (rtx p)
31420 tree decl_op0 = NULL;
31421 tree decl_op1 = NULL;
31423 if (GET_CODE (p) == MINUS)
31425 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31427 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31428 if (decl_op1
31429 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31430 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31432 if ((TREE_CODE (decl_op1) == VAR_DECL
31433 || TREE_CODE (decl_op1) == CONST_DECL)
31434 && (TREE_CODE (decl_op0) == VAR_DECL
31435 || TREE_CODE (decl_op0) == CONST_DECL))
31436 return (get_variable_section (decl_op1, false)
31437 != get_variable_section (decl_op0, false));
31439 if (TREE_CODE (decl_op1) == LABEL_DECL
31440 && TREE_CODE (decl_op0) == LABEL_DECL)
31441 return (DECL_CONTEXT (decl_op1)
31442 != DECL_CONTEXT (decl_op0));
31445 return true;
31449 return false;
31452 static void
31453 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
31455 const unsigned ARM_FE_INVALID = 1;
31456 const unsigned ARM_FE_DIVBYZERO = 2;
31457 const unsigned ARM_FE_OVERFLOW = 4;
31458 const unsigned ARM_FE_UNDERFLOW = 8;
31459 const unsigned ARM_FE_INEXACT = 16;
31460 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
31461 | ARM_FE_DIVBYZERO
31462 | ARM_FE_OVERFLOW
31463 | ARM_FE_UNDERFLOW
31464 | ARM_FE_INEXACT);
31465 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
31466 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
31467 tree new_fenv_var, reload_fenv, restore_fnenv;
31468 tree update_call, atomic_feraiseexcept, hold_fnclex;
31470 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
31471 return default_atomic_assign_expand_fenv (hold, clear, update);
31473 /* Generate the equivalent of :
31474 unsigned int fenv_var;
31475 fenv_var = __builtin_arm_get_fpscr ();
31477 unsigned int masked_fenv;
31478 masked_fenv = fenv_var & mask;
31480 __builtin_arm_set_fpscr (masked_fenv); */
31482 fenv_var = create_tmp_var (unsigned_type_node, NULL);
31483 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
31484 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
31485 mask = build_int_cst (unsigned_type_node,
31486 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
31487 | ARM_FE_ALL_EXCEPT));
31488 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
31489 fenv_var, build_call_expr (get_fpscr, 0));
31490 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
31491 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
31492 *hold = build2 (COMPOUND_EXPR, void_type_node,
31493 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
31494 hold_fnclex);
31496 /* Store the value of masked_fenv to clear the exceptions:
31497 __builtin_arm_set_fpscr (masked_fenv); */
31499 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
31501 /* Generate the equivalent of :
31502 unsigned int new_fenv_var;
31503 new_fenv_var = __builtin_arm_get_fpscr ();
31505 __builtin_arm_set_fpscr (fenv_var);
31507 __atomic_feraiseexcept (new_fenv_var); */
31509 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
31510 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
31511 build_call_expr (get_fpscr, 0));
31512 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
31513 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
31514 update_call = build_call_expr (atomic_feraiseexcept, 1,
31515 fold_convert (integer_type_node, new_fenv_var));
31516 *update = build2 (COMPOUND_EXPR, void_type_node,
31517 build2 (COMPOUND_EXPR, void_type_node,
31518 reload_fenv, restore_fnenv), update_call);
31521 #include "gt-arm.h"